mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
7b42d97e99
A device still participates in error recovery even if it doesn't have the error callbacks. Always provide the status for user event watchers. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Sinan Kaya <okaya@kernel.org>
248 lines
6.4 KiB
C
248 lines
6.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* This file implements the error recovery as a core part of PCIe error
|
|
* reporting. When a PCIe error is delivered, an error message will be
|
|
* collected and printed to console, then, an error recovery procedure
|
|
* will be executed by following the PCI error recovery rules.
|
|
*
|
|
* Copyright (C) 2006 Intel Corp.
|
|
* Tom Long Nguyen (tom.l.nguyen@intel.com)
|
|
* Zhang Yanmin (yanmin.zhang@intel.com)
|
|
*/
|
|
|
|
#include <linux/pci.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/aer.h>
|
|
#include "portdrv.h"
|
|
#include "../pci.h"
|
|
|
|
static pci_ers_result_t merge_result(enum pci_ers_result orig,
|
|
enum pci_ers_result new)
|
|
{
|
|
if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
|
|
return PCI_ERS_RESULT_NO_AER_DRIVER;
|
|
|
|
if (new == PCI_ERS_RESULT_NONE)
|
|
return orig;
|
|
|
|
switch (orig) {
|
|
case PCI_ERS_RESULT_CAN_RECOVER:
|
|
case PCI_ERS_RESULT_RECOVERED:
|
|
orig = new;
|
|
break;
|
|
case PCI_ERS_RESULT_DISCONNECT:
|
|
if (new == PCI_ERS_RESULT_NEED_RESET)
|
|
orig = PCI_ERS_RESULT_NEED_RESET;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return orig;
|
|
}
|
|
|
|
static int report_error_detected(struct pci_dev *dev,
|
|
enum pci_channel_state state,
|
|
enum pci_ers_result *result)
|
|
{
|
|
pci_ers_result_t vote;
|
|
const struct pci_error_handlers *err_handler;
|
|
|
|
device_lock(&dev->dev);
|
|
dev->error_state = state;
|
|
|
|
if (!dev->driver ||
|
|
!dev->driver->err_handler ||
|
|
!dev->driver->err_handler->error_detected) {
|
|
/*
|
|
* If any device in the subtree does not have an error_detected
|
|
* callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
|
|
* error callbacks of "any" device in the subtree, and will
|
|
* exit in the disconnected error state.
|
|
*/
|
|
if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
|
|
vote = PCI_ERS_RESULT_NO_AER_DRIVER;
|
|
else
|
|
vote = PCI_ERS_RESULT_NONE;
|
|
} else {
|
|
err_handler = dev->driver->err_handler;
|
|
vote = err_handler->error_detected(dev, state);
|
|
}
|
|
pci_uevent_ers(dev, vote);
|
|
*result = merge_result(*result, vote);
|
|
device_unlock(&dev->dev);
|
|
return 0;
|
|
}
|
|
|
|
static int report_frozen_detected(struct pci_dev *dev, void *data)
|
|
{
|
|
return report_error_detected(dev, pci_channel_io_frozen, data);
|
|
}
|
|
|
|
static int report_normal_detected(struct pci_dev *dev, void *data)
|
|
{
|
|
return report_error_detected(dev, pci_channel_io_normal, data);
|
|
}
|
|
|
|
static int report_mmio_enabled(struct pci_dev *dev, void *data)
|
|
{
|
|
pci_ers_result_t vote, *result = data;
|
|
const struct pci_error_handlers *err_handler;
|
|
|
|
device_lock(&dev->dev);
|
|
if (!dev->driver ||
|
|
!dev->driver->err_handler ||
|
|
!dev->driver->err_handler->mmio_enabled)
|
|
goto out;
|
|
|
|
err_handler = dev->driver->err_handler;
|
|
vote = err_handler->mmio_enabled(dev);
|
|
*result = merge_result(*result, vote);
|
|
out:
|
|
device_unlock(&dev->dev);
|
|
return 0;
|
|
}
|
|
|
|
static int report_slot_reset(struct pci_dev *dev, void *data)
|
|
{
|
|
pci_ers_result_t vote, *result = data;
|
|
const struct pci_error_handlers *err_handler;
|
|
|
|
device_lock(&dev->dev);
|
|
if (!dev->driver ||
|
|
!dev->driver->err_handler ||
|
|
!dev->driver->err_handler->slot_reset)
|
|
goto out;
|
|
|
|
err_handler = dev->driver->err_handler;
|
|
vote = err_handler->slot_reset(dev);
|
|
*result = merge_result(*result, vote);
|
|
out:
|
|
device_unlock(&dev->dev);
|
|
return 0;
|
|
}
|
|
|
|
static int report_resume(struct pci_dev *dev, void *data)
|
|
{
|
|
const struct pci_error_handlers *err_handler;
|
|
|
|
device_lock(&dev->dev);
|
|
dev->error_state = pci_channel_io_normal;
|
|
|
|
if (!dev->driver ||
|
|
!dev->driver->err_handler ||
|
|
!dev->driver->err_handler->resume)
|
|
goto out;
|
|
|
|
err_handler = dev->driver->err_handler;
|
|
err_handler->resume(dev);
|
|
out:
|
|
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
|
|
device_unlock(&dev->dev);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* default_reset_link - default reset function
|
|
* @dev: pointer to pci_dev data structure
|
|
*
|
|
* Invoked when performing link reset on a Downstream Port or a
|
|
* Root Port with no aer driver.
|
|
*/
|
|
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
|
|
{
|
|
int rc;
|
|
|
|
rc = pci_bus_error_reset(dev);
|
|
pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
|
|
return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
|
|
}
|
|
|
|
static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
|
|
{
|
|
pci_ers_result_t status;
|
|
struct pcie_port_service_driver *driver = NULL;
|
|
|
|
driver = pcie_port_find_service(dev, service);
|
|
if (driver && driver->reset_link) {
|
|
status = driver->reset_link(dev);
|
|
} else if (dev->has_secondary_link) {
|
|
status = default_reset_link(dev);
|
|
} else {
|
|
pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
|
|
pci_name(dev));
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
}
|
|
|
|
if (status != PCI_ERS_RESULT_RECOVERED) {
|
|
pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
|
|
pci_name(dev));
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
|
|
u32 service)
|
|
{
|
|
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
|
|
struct pci_bus *bus;
|
|
|
|
/*
|
|
* Error recovery runs on all subordinates of the first downstream port.
|
|
* If the downstream port detected the error, it is cleared at the end.
|
|
*/
|
|
if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
|
|
pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
|
|
dev = dev->bus->self;
|
|
bus = dev->subordinate;
|
|
|
|
pci_dbg(dev, "broadcast error_detected message\n");
|
|
if (state == pci_channel_io_frozen)
|
|
pci_walk_bus(bus, report_frozen_detected, &status);
|
|
else
|
|
pci_walk_bus(bus, report_normal_detected, &status);
|
|
|
|
if (state == pci_channel_io_frozen &&
|
|
reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
|
|
goto failed;
|
|
|
|
if (status == PCI_ERS_RESULT_CAN_RECOVER) {
|
|
status = PCI_ERS_RESULT_RECOVERED;
|
|
pci_dbg(dev, "broadcast mmio_enabled message\n");
|
|
pci_walk_bus(bus, report_mmio_enabled, &status);
|
|
}
|
|
|
|
if (status == PCI_ERS_RESULT_NEED_RESET) {
|
|
/*
|
|
* TODO: Should call platform-specific
|
|
* functions to reset slot before calling
|
|
* drivers' slot_reset callbacks?
|
|
*/
|
|
status = PCI_ERS_RESULT_RECOVERED;
|
|
pci_dbg(dev, "broadcast slot_reset message\n");
|
|
pci_walk_bus(bus, report_slot_reset, &status);
|
|
}
|
|
|
|
if (status != PCI_ERS_RESULT_RECOVERED)
|
|
goto failed;
|
|
|
|
pci_dbg(dev, "broadcast resume message\n");
|
|
pci_walk_bus(bus, report_resume, &status);
|
|
|
|
pci_aer_clear_device_status(dev);
|
|
pci_cleanup_aer_uncorrect_error_status(dev);
|
|
pci_info(dev, "AER: Device recovery successful\n");
|
|
return;
|
|
|
|
failed:
|
|
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
|
|
|
|
/* TODO: Should kernel panic here? */
|
|
pci_info(dev, "AER: Device recovery failed\n");
|
|
}
|