mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-24 07:20:23 +07:00
f38e87e8c8
This patch fixes the following crash seen when MIC reset is invoked in RESET_FAILED state due to device_del being called a second time on an already deleted device: [<ffffffff813b2295>] device_del+0x45/0x1d0 [<ffffffff813b243e>] device_unregister+0x1e/0x60 [<ffffffffa040f1c2>] scif_unregister_device+0x12/0x20 [scif_bus] [<ffffffffa042f75a>] cosm_stop+0xaa/0xe0 [mic_cosm] [<ffffffffa042f844>] cosm_reset_trigger_work+0x14/0x20 [mic_cosm] The fix consists in realizing that because cosm_reset changes the state to MIC_RESETTING, cosm_stop needs the previous state, before it changed to MIC_RESETTING, to decide whether a hw_ops->stop had previously been issued. This is now provided in a new cosm_device member cdev->prev_state. Reviewed-by: Sudeep Dutt <sudeep.dutt@intel.com> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
394 lines
9.6 KiB
C
394 lines
9.6 KiB
C
/*
|
|
* Intel MIC Platform Software Stack (MPSS)
|
|
*
|
|
* Copyright(c) 2015 Intel Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* The full GNU General Public License is included in this distribution in
|
|
* the file called "COPYING".
|
|
*
|
|
* Intel MIC Coprocessor State Management (COSM) Driver
|
|
*
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/cred.h>
|
|
#include "cosm_main.h"
|
|
|
|
static const char cosm_driver_name[] = "mic";
|
|
|
|
/* COSM ID allocator */
|
|
static struct ida g_cosm_ida;
|
|
/* Class of MIC devices for sysfs accessibility. */
|
|
static struct class *g_cosm_class;
|
|
/* Number of MIC devices */
|
|
static atomic_t g_num_dev;
|
|
|
|
/**
|
|
* cosm_hw_reset - Issue a HW reset for the MIC device
|
|
* @cdev: pointer to cosm_device instance
|
|
*/
|
|
static void cosm_hw_reset(struct cosm_device *cdev, bool force)
|
|
{
|
|
int i;
|
|
|
|
#define MIC_RESET_TO (45)
|
|
if (force && cdev->hw_ops->force_reset)
|
|
cdev->hw_ops->force_reset(cdev);
|
|
else
|
|
cdev->hw_ops->reset(cdev);
|
|
|
|
for (i = 0; i < MIC_RESET_TO; i++) {
|
|
if (cdev->hw_ops->ready(cdev)) {
|
|
cosm_set_state(cdev, MIC_READY);
|
|
return;
|
|
}
|
|
/*
|
|
* Resets typically take 10s of seconds to complete.
|
|
* Since an MMIO read is required to check if the
|
|
* firmware is ready or not, a 1 second delay works nicely.
|
|
*/
|
|
msleep(1000);
|
|
}
|
|
cosm_set_state(cdev, MIC_RESET_FAILED);
|
|
}
|
|
|
|
/**
|
|
* cosm_start - Start the MIC
|
|
* @cdev: pointer to cosm_device instance
|
|
*
|
|
* This function prepares an MIC for boot and initiates boot.
|
|
* RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
|
|
*/
|
|
int cosm_start(struct cosm_device *cdev)
|
|
{
|
|
const struct cred *orig_cred;
|
|
struct cred *override_cred;
|
|
int rc;
|
|
|
|
mutex_lock(&cdev->cosm_mutex);
|
|
if (!cdev->bootmode) {
|
|
dev_err(&cdev->dev, "%s %d bootmode not set\n",
|
|
__func__, __LINE__);
|
|
rc = -EINVAL;
|
|
goto unlock_ret;
|
|
}
|
|
retry:
|
|
if (cdev->state != MIC_READY) {
|
|
dev_err(&cdev->dev, "%s %d MIC state not READY\n",
|
|
__func__, __LINE__);
|
|
rc = -EINVAL;
|
|
goto unlock_ret;
|
|
}
|
|
if (!cdev->hw_ops->ready(cdev)) {
|
|
cosm_hw_reset(cdev, false);
|
|
/*
|
|
* The state will either be MIC_READY if the reset succeeded
|
|
* or MIC_RESET_FAILED if the firmware reset failed.
|
|
*/
|
|
goto retry;
|
|
}
|
|
|
|
/*
|
|
* Set credentials to root to allow non-root user to download initramsfs
|
|
* with 600 permissions
|
|
*/
|
|
override_cred = prepare_creds();
|
|
if (!override_cred) {
|
|
dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
|
|
__func__, __LINE__);
|
|
rc = -ENOMEM;
|
|
goto unlock_ret;
|
|
}
|
|
override_cred->fsuid = GLOBAL_ROOT_UID;
|
|
orig_cred = override_creds(override_cred);
|
|
|
|
rc = cdev->hw_ops->start(cdev, cdev->index);
|
|
|
|
revert_creds(orig_cred);
|
|
put_cred(override_cred);
|
|
if (rc)
|
|
goto unlock_ret;
|
|
|
|
/*
|
|
* If linux is being booted, card is treated 'online' only
|
|
* when the scif interface in the card is up. If anything else
|
|
* is booted, we set card to 'online' immediately.
|
|
*/
|
|
if (!strcmp(cdev->bootmode, "linux"))
|
|
cosm_set_state(cdev, MIC_BOOTING);
|
|
else
|
|
cosm_set_state(cdev, MIC_ONLINE);
|
|
unlock_ret:
|
|
mutex_unlock(&cdev->cosm_mutex);
|
|
if (rc)
|
|
dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* cosm_stop - Prepare the MIC for reset and trigger reset
|
|
* @cdev: pointer to cosm_device instance
|
|
* @force: force a MIC to reset even if it is already reset and ready.
|
|
*
|
|
* RETURNS: None
|
|
*/
|
|
void cosm_stop(struct cosm_device *cdev, bool force)
|
|
{
|
|
mutex_lock(&cdev->cosm_mutex);
|
|
if (cdev->state != MIC_READY || force) {
|
|
/*
|
|
* Don't call hw_ops if they have been called previously.
|
|
* stop(..) calls device_unregister and will crash the system if
|
|
* called multiple times.
|
|
*/
|
|
u8 state = cdev->state == MIC_RESETTING ?
|
|
cdev->prev_state : cdev->state;
|
|
bool call_hw_ops = state != MIC_RESET_FAILED &&
|
|
state != MIC_READY;
|
|
|
|
if (cdev->state != MIC_RESETTING)
|
|
cosm_set_state(cdev, MIC_RESETTING);
|
|
cdev->heartbeat_watchdog_enable = false;
|
|
if (call_hw_ops)
|
|
cdev->hw_ops->stop(cdev, force);
|
|
cosm_hw_reset(cdev, force);
|
|
cosm_set_shutdown_status(cdev, MIC_NOP);
|
|
if (call_hw_ops && cdev->hw_ops->post_reset)
|
|
cdev->hw_ops->post_reset(cdev, cdev->state);
|
|
}
|
|
mutex_unlock(&cdev->cosm_mutex);
|
|
flush_work(&cdev->scif_work);
|
|
}
|
|
|
|
/**
|
|
* cosm_reset_trigger_work - Trigger MIC reset
|
|
* @work: The work structure
|
|
*
|
|
* This work is scheduled whenever the host wants to reset the MIC.
|
|
*/
|
|
static void cosm_reset_trigger_work(struct work_struct *work)
|
|
{
|
|
struct cosm_device *cdev = container_of(work, struct cosm_device,
|
|
reset_trigger_work);
|
|
cosm_stop(cdev, false);
|
|
}
|
|
|
|
/**
|
|
* cosm_reset - Schedule MIC reset
|
|
* @cdev: pointer to cosm_device instance
|
|
*
|
|
* RETURNS: An -EINVAL if the card is already READY or 0 for success.
|
|
*/
|
|
int cosm_reset(struct cosm_device *cdev)
|
|
{
|
|
int rc = 0;
|
|
|
|
mutex_lock(&cdev->cosm_mutex);
|
|
if (cdev->state != MIC_READY) {
|
|
if (cdev->state != MIC_RESETTING) {
|
|
cdev->prev_state = cdev->state;
|
|
cosm_set_state(cdev, MIC_RESETTING);
|
|
schedule_work(&cdev->reset_trigger_work);
|
|
}
|
|
} else {
|
|
dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
|
|
rc = -EINVAL;
|
|
}
|
|
mutex_unlock(&cdev->cosm_mutex);
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* cosm_shutdown - Initiate MIC shutdown.
|
|
* @cdev: pointer to cosm_device instance
|
|
*
|
|
* RETURNS: None
|
|
*/
|
|
int cosm_shutdown(struct cosm_device *cdev)
|
|
{
|
|
struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
|
|
int rc = 0;
|
|
|
|
mutex_lock(&cdev->cosm_mutex);
|
|
if (cdev->state != MIC_ONLINE) {
|
|
rc = -EINVAL;
|
|
dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
|
|
__func__, __LINE__, cosm_state_string[cdev->state]);
|
|
goto err;
|
|
}
|
|
|
|
if (!cdev->epd) {
|
|
rc = -ENOTCONN;
|
|
dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
|
|
__func__, __LINE__, rc);
|
|
goto err;
|
|
}
|
|
|
|
rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
|
|
if (rc < 0) {
|
|
dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
|
|
__func__, __LINE__, rc);
|
|
goto err;
|
|
}
|
|
cdev->heartbeat_watchdog_enable = false;
|
|
cosm_set_state(cdev, MIC_SHUTTING_DOWN);
|
|
rc = 0;
|
|
err:
|
|
mutex_unlock(&cdev->cosm_mutex);
|
|
return rc;
|
|
}
|
|
|
|
static int cosm_driver_probe(struct cosm_device *cdev)
|
|
{
|
|
int rc;
|
|
|
|
/* Initialize SCIF server at first probe */
|
|
if (atomic_add_return(1, &g_num_dev) == 1) {
|
|
rc = cosm_scif_init();
|
|
if (rc)
|
|
goto scif_exit;
|
|
}
|
|
mutex_init(&cdev->cosm_mutex);
|
|
INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
|
|
INIT_WORK(&cdev->scif_work, cosm_scif_work);
|
|
cdev->sysfs_heartbeat_enable = true;
|
|
cosm_sysfs_init(cdev);
|
|
cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
|
|
MKDEV(0, cdev->index), cdev, cdev->attr_group,
|
|
"mic%d", cdev->index);
|
|
if (IS_ERR(cdev->sdev)) {
|
|
rc = PTR_ERR(cdev->sdev);
|
|
dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
|
|
rc);
|
|
goto scif_exit;
|
|
}
|
|
|
|
cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
|
|
"state");
|
|
if (!cdev->state_sysfs) {
|
|
rc = -ENODEV;
|
|
dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
|
|
goto destroy_device;
|
|
}
|
|
cosm_create_debug_dir(cdev);
|
|
return 0;
|
|
destroy_device:
|
|
device_destroy(g_cosm_class, MKDEV(0, cdev->index));
|
|
scif_exit:
|
|
if (atomic_dec_and_test(&g_num_dev))
|
|
cosm_scif_exit();
|
|
return rc;
|
|
}
|
|
|
|
static void cosm_driver_remove(struct cosm_device *cdev)
|
|
{
|
|
cosm_delete_debug_dir(cdev);
|
|
sysfs_put(cdev->state_sysfs);
|
|
device_destroy(g_cosm_class, MKDEV(0, cdev->index));
|
|
flush_work(&cdev->reset_trigger_work);
|
|
cosm_stop(cdev, false);
|
|
if (atomic_dec_and_test(&g_num_dev))
|
|
cosm_scif_exit();
|
|
|
|
/* These sysfs entries might have allocated */
|
|
kfree(cdev->cmdline);
|
|
kfree(cdev->firmware);
|
|
kfree(cdev->ramdisk);
|
|
kfree(cdev->bootmode);
|
|
}
|
|
|
|
static int cosm_suspend(struct device *dev)
|
|
{
|
|
struct cosm_device *cdev = dev_to_cosm(dev);
|
|
|
|
mutex_lock(&cdev->cosm_mutex);
|
|
switch (cdev->state) {
|
|
/**
|
|
* Suspend/freeze hooks in userspace have already shutdown the card.
|
|
* Card should be 'ready' in most cases. It is however possible that
|
|
* some userspace application initiated a boot. In those cases, we
|
|
* simply reset the card.
|
|
*/
|
|
case MIC_ONLINE:
|
|
case MIC_BOOTING:
|
|
case MIC_SHUTTING_DOWN:
|
|
mutex_unlock(&cdev->cosm_mutex);
|
|
cosm_stop(cdev, false);
|
|
break;
|
|
default:
|
|
mutex_unlock(&cdev->cosm_mutex);
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static const struct dev_pm_ops cosm_pm_ops = {
|
|
.suspend = cosm_suspend,
|
|
.freeze = cosm_suspend
|
|
};
|
|
|
|
static struct cosm_driver cosm_driver = {
|
|
.driver = {
|
|
.name = KBUILD_MODNAME,
|
|
.owner = THIS_MODULE,
|
|
.pm = &cosm_pm_ops,
|
|
},
|
|
.probe = cosm_driver_probe,
|
|
.remove = cosm_driver_remove
|
|
};
|
|
|
|
static int __init cosm_init(void)
|
|
{
|
|
int ret;
|
|
|
|
cosm_init_debugfs();
|
|
|
|
g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
|
|
if (IS_ERR(g_cosm_class)) {
|
|
ret = PTR_ERR(g_cosm_class);
|
|
pr_err("class_create failed ret %d\n", ret);
|
|
goto cleanup_debugfs;
|
|
}
|
|
|
|
ida_init(&g_cosm_ida);
|
|
ret = cosm_register_driver(&cosm_driver);
|
|
if (ret) {
|
|
pr_err("cosm_register_driver failed ret %d\n", ret);
|
|
goto ida_destroy;
|
|
}
|
|
return 0;
|
|
ida_destroy:
|
|
ida_destroy(&g_cosm_ida);
|
|
class_destroy(g_cosm_class);
|
|
cleanup_debugfs:
|
|
cosm_exit_debugfs();
|
|
return ret;
|
|
}
|
|
|
|
static void __exit cosm_exit(void)
|
|
{
|
|
cosm_unregister_driver(&cosm_driver);
|
|
ida_destroy(&g_cosm_ida);
|
|
class_destroy(g_cosm_class);
|
|
cosm_exit_debugfs();
|
|
}
|
|
|
|
module_init(cosm_init);
|
|
module_exit(cosm_exit);
|
|
|
|
MODULE_AUTHOR("Intel Corporation");
|
|
MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
|
|
MODULE_LICENSE("GPL v2");
|