linux_dsm_epyc7002/drivers/pci/hotplug/pciehp_ctrl.c
Mika Westerberg 87d0f2a553 PCI: pciehp: Prevent deadlock on disconnect
This addresses deadlocks in these common cases in hierarchies containing
two switches:

  - All involved ports are runtime suspended and they are unplugged. This
    can happen easily if the drivers involved automatically enable runtime
    PM (xHCI for example does that).

  - System is suspended (e.g., closing the lid on a laptop) with a dock +
    something else connected, and the dock is unplugged while suspended.

These cases lead to the following deadlock:

  INFO: task irq/126-pciehp:198 blocked for more than 120 seconds.
  irq/126-pciehp  D    0   198      2 0x80000000
  Call Trace:
   schedule+0x2c/0x80
   schedule_timeout+0x246/0x350
   wait_for_completion+0xb7/0x140
   kthread_stop+0x49/0x110
   free_irq+0x32/0x70
   pcie_shutdown_notification+0x2f/0x50
   pciehp_remove+0x27/0x50
   pcie_port_remove_service+0x36/0x50
   device_release_driver+0x12/0x20
   bus_remove_device+0xec/0x160
   device_del+0x13b/0x350
   device_unregister+0x1a/0x60
   remove_iter+0x1e/0x30
   device_for_each_child+0x56/0x90
   pcie_port_device_remove+0x22/0x40
   pcie_portdrv_remove+0x20/0x60
   pci_device_remove+0x3e/0xc0
   device_release_driver_internal+0x18c/0x250
   device_release_driver+0x12/0x20
   pci_stop_bus_device+0x6f/0x90
   pci_stop_bus_device+0x31/0x90
   pci_stop_and_remove_bus_device+0x12/0x20
   pciehp_unconfigure_device+0x88/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

  INFO: task irq/190-pciehp:2288 blocked for more than 120 seconds.
  irq/190-pciehp  D    0  2288      2 0x80000000
  Call Trace:
   __schedule+0x2a2/0x880
   schedule+0x2c/0x80
   schedule_preempt_disabled+0xe/0x10
   mutex_lock+0x2c/0x30
   pci_lock_rescan_remove+0x15/0x20
   pciehp_unconfigure_device+0x4d/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

What happens here is that the whole hierarchy is runtime resumed and the
parent PCIe downstream port, which got the hot-remove event, starts
removing devices below it, taking pci_lock_rescan_remove() lock. When the
child PCIe port is runtime resumed it calls pciehp_check_presence() which
ends up calling pciehp_card_present() and pciehp_check_link_active().  Both
of these use pcie_capability_read_word(), which notices that the underlying
device is already gone and returns PCIBIOS_DEVICE_NOT_FOUND with the
capability value set to 0. When pciehp gets this value it thinks that its
child device is also hot-removed and schedules its IRQ thread to handle the
event.

The deadlock happens when the child's IRQ thread runs and tries to acquire
pci_lock_rescan_remove() which is already taken by the parent and the
parent waits for the child's IRQ thread to finish.

Prevent this from happening by checking the return value of
pcie_capability_read_word() and if it is PCIBIOS_DEVICE_NOT_FOUND stop
performing any hot-removal activities.

[bhelgaas: add common scenarios to commit log]
Link: https://lore.kernel.org/r/20191029170022.57528-2-mika.westerberg@linux.intel.com
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 17:17:42 -06:00

434 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* PCI Express Hot Plug Controller Driver
*
* Copyright (C) 1995,2001 Compaq Computer Corporation
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
* Copyright (C) 2001 IBM Corp.
* Copyright (C) 2003-2004 Intel Corporation
*
* All rights reserved.
*
* Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
*
*/
#define dev_fmt(fmt) "pciehp: " fmt
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pm_runtime.h>
#include <linux/pci.h>
#include "pciehp.h"
/* The following routines constitute the bulk of the
hotplug controller logic
*/
#define SAFE_REMOVAL true
#define SURPRISE_REMOVAL false
static void set_slot_off(struct controller *ctrl)
{
/*
* Turn off slot, turn on attention indicator, turn off power
* indicator
*/
if (POWER_CTRL(ctrl)) {
pciehp_power_off_slot(ctrl);
/*
* After turning power off, we must wait for at least 1 second
* before taking any action that relies on power having been
* removed from the slot/adapter.
*/
msleep(1000);
}
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
PCI_EXP_SLTCTL_ATTN_IND_ON);
}
/**
* board_added - Called after a board has been added to the system.
* @ctrl: PCIe hotplug controller where board is added
*
* Turns power on for the board.
* Configures board.
*/
static int board_added(struct controller *ctrl)
{
int retval = 0;
struct pci_bus *parent = ctrl->pcie->port->subordinate;
if (POWER_CTRL(ctrl)) {
/* Power on slot */
retval = pciehp_power_on_slot(ctrl);
if (retval)
return retval;
}
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
INDICATOR_NOOP);
/* Check link training status */
retval = pciehp_check_link_status(ctrl);
if (retval) {
ctrl_err(ctrl, "Failed to check link status\n");
goto err_exit;
}
/* Check for a power fault */
if (ctrl->power_fault_detected || pciehp_query_power_fault(ctrl)) {
ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
retval = -EIO;
goto err_exit;
}
retval = pciehp_configure_device(ctrl);
if (retval) {
if (retval != -EEXIST) {
ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
pci_domain_nr(parent), parent->number);
goto err_exit;
}
}
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
PCI_EXP_SLTCTL_ATTN_IND_OFF);
return 0;
err_exit:
set_slot_off(ctrl);
return retval;
}
/**
* remove_board - Turn off slot and Power Indicator
* @ctrl: PCIe hotplug controller where board is being removed
* @safe_removal: whether the board is safely removed (versus surprise removed)
*/
static void remove_board(struct controller *ctrl, bool safe_removal)
{
pciehp_unconfigure_device(ctrl, safe_removal);
if (POWER_CTRL(ctrl)) {
pciehp_power_off_slot(ctrl);
/*
* After turning power off, we must wait for at least 1 second
* before taking any action that relies on power having been
* removed from the slot/adapter.
*/
msleep(1000);
/* Ignore link or presence changes caused by power off */
atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
&ctrl->pending_events);
}
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
INDICATOR_NOOP);
}
static int pciehp_enable_slot(struct controller *ctrl);
static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal);
void pciehp_request(struct controller *ctrl, int action)
{
atomic_or(action, &ctrl->pending_events);
if (!pciehp_poll_mode)
irq_wake_thread(ctrl->pcie->irq, ctrl);
}
void pciehp_queue_pushbutton_work(struct work_struct *work)
{
struct controller *ctrl = container_of(work, struct controller,
button_work.work);
mutex_lock(&ctrl->state_lock);
switch (ctrl->state) {
case BLINKINGOFF_STATE:
pciehp_request(ctrl, DISABLE_SLOT);
break;
case BLINKINGON_STATE:
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
break;
default:
break;
}
mutex_unlock(&ctrl->state_lock);
}
void pciehp_handle_button_press(struct controller *ctrl)
{
mutex_lock(&ctrl->state_lock);
switch (ctrl->state) {
case OFF_STATE:
case ON_STATE:
if (ctrl->state == ON_STATE) {
ctrl->state = BLINKINGOFF_STATE;
ctrl_info(ctrl, "Slot(%s): Powering off due to button press\n",
slot_name(ctrl));
} else {
ctrl->state = BLINKINGON_STATE;
ctrl_info(ctrl, "Slot(%s) Powering on due to button press\n",
slot_name(ctrl));
}
/* blink power indicator and turn off attention */
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
PCI_EXP_SLTCTL_ATTN_IND_OFF);
schedule_delayed_work(&ctrl->button_work, 5 * HZ);
break;
case BLINKINGOFF_STATE:
case BLINKINGON_STATE:
/*
* Cancel if we are still blinking; this means that we
* press the attention again before the 5 sec. limit
* expires to cancel hot-add or hot-remove
*/
ctrl_info(ctrl, "Slot(%s): Button cancel\n", slot_name(ctrl));
cancel_delayed_work(&ctrl->button_work);
if (ctrl->state == BLINKINGOFF_STATE) {
ctrl->state = ON_STATE;
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
PCI_EXP_SLTCTL_ATTN_IND_OFF);
} else {
ctrl->state = OFF_STATE;
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
PCI_EXP_SLTCTL_ATTN_IND_OFF);
}
ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n",
slot_name(ctrl));
break;
default:
ctrl_err(ctrl, "Slot(%s): Ignoring invalid state %#x\n",
slot_name(ctrl), ctrl->state);
break;
}
mutex_unlock(&ctrl->state_lock);
}
void pciehp_handle_disable_request(struct controller *ctrl)
{
mutex_lock(&ctrl->state_lock);
switch (ctrl->state) {
case BLINKINGON_STATE:
case BLINKINGOFF_STATE:
cancel_delayed_work(&ctrl->button_work);
break;
}
ctrl->state = POWEROFF_STATE;
mutex_unlock(&ctrl->state_lock);
ctrl->request_result = pciehp_disable_slot(ctrl, SAFE_REMOVAL);
}
void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
{
int present, link_active;
/*
* If the slot is on and presence or link has changed, turn it off.
* Even if it's occupied again, we cannot assume the card is the same.
*/
mutex_lock(&ctrl->state_lock);
switch (ctrl->state) {
case BLINKINGOFF_STATE:
cancel_delayed_work(&ctrl->button_work);
/* fall through */
case ON_STATE:
ctrl->state = POWEROFF_STATE;
mutex_unlock(&ctrl->state_lock);
if (events & PCI_EXP_SLTSTA_DLLSC)
ctrl_info(ctrl, "Slot(%s): Link Down\n",
slot_name(ctrl));
if (events & PCI_EXP_SLTSTA_PDC)
ctrl_info(ctrl, "Slot(%s): Card not present\n",
slot_name(ctrl));
pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
break;
default:
mutex_unlock(&ctrl->state_lock);
break;
}
/* Turn the slot on if it's occupied or link is up */
mutex_lock(&ctrl->state_lock);
present = pciehp_card_present(ctrl);
link_active = pciehp_check_link_active(ctrl);
if (present <= 0 && link_active <= 0) {
mutex_unlock(&ctrl->state_lock);
return;
}
switch (ctrl->state) {
case BLINKINGON_STATE:
cancel_delayed_work(&ctrl->button_work);
/* fall through */
case OFF_STATE:
ctrl->state = POWERON_STATE;
mutex_unlock(&ctrl->state_lock);
if (present)
ctrl_info(ctrl, "Slot(%s): Card present\n",
slot_name(ctrl));
if (link_active)
ctrl_info(ctrl, "Slot(%s): Link Up\n",
slot_name(ctrl));
ctrl->request_result = pciehp_enable_slot(ctrl);
break;
default:
mutex_unlock(&ctrl->state_lock);
break;
}
}
static int __pciehp_enable_slot(struct controller *ctrl)
{
u8 getstatus = 0;
if (MRL_SENS(ctrl)) {
pciehp_get_latch_status(ctrl, &getstatus);
if (getstatus) {
ctrl_info(ctrl, "Slot(%s): Latch open\n",
slot_name(ctrl));
return -ENODEV;
}
}
if (POWER_CTRL(ctrl)) {
pciehp_get_power_status(ctrl, &getstatus);
if (getstatus) {
ctrl_info(ctrl, "Slot(%s): Already enabled\n",
slot_name(ctrl));
return 0;
}
}
return board_added(ctrl);
}
static int pciehp_enable_slot(struct controller *ctrl)
{
int ret;
pm_runtime_get_sync(&ctrl->pcie->port->dev);
ret = __pciehp_enable_slot(ctrl);
if (ret && ATTN_BUTTN(ctrl))
/* may be blinking */
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
INDICATOR_NOOP);
pm_runtime_put(&ctrl->pcie->port->dev);
mutex_lock(&ctrl->state_lock);
ctrl->state = ret ? OFF_STATE : ON_STATE;
mutex_unlock(&ctrl->state_lock);
return ret;
}
static int __pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
{
u8 getstatus = 0;
if (POWER_CTRL(ctrl)) {
pciehp_get_power_status(ctrl, &getstatus);
if (!getstatus) {
ctrl_info(ctrl, "Slot(%s): Already disabled\n",
slot_name(ctrl));
return -EINVAL;
}
}
remove_board(ctrl, safe_removal);
return 0;
}
static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
{
int ret;
pm_runtime_get_sync(&ctrl->pcie->port->dev);
ret = __pciehp_disable_slot(ctrl, safe_removal);
pm_runtime_put(&ctrl->pcie->port->dev);
mutex_lock(&ctrl->state_lock);
ctrl->state = OFF_STATE;
mutex_unlock(&ctrl->state_lock);
return ret;
}
int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
mutex_lock(&ctrl->state_lock);
switch (ctrl->state) {
case BLINKINGON_STATE:
case OFF_STATE:
mutex_unlock(&ctrl->state_lock);
/*
* The IRQ thread becomes a no-op if the user pulls out the
* card before the thread wakes up, so initialize to -ENODEV.
*/
ctrl->request_result = -ENODEV;
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
wait_event(ctrl->requester,
!atomic_read(&ctrl->pending_events) &&
!ctrl->ist_running);
return ctrl->request_result;
case POWERON_STATE:
ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
slot_name(ctrl));
break;
case BLINKINGOFF_STATE:
case ON_STATE:
case POWEROFF_STATE:
ctrl_info(ctrl, "Slot(%s): Already enabled\n",
slot_name(ctrl));
break;
default:
ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
slot_name(ctrl), ctrl->state);
break;
}
mutex_unlock(&ctrl->state_lock);
return -ENODEV;
}
int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
mutex_lock(&ctrl->state_lock);
switch (ctrl->state) {
case BLINKINGOFF_STATE:
case ON_STATE:
mutex_unlock(&ctrl->state_lock);
pciehp_request(ctrl, DISABLE_SLOT);
wait_event(ctrl->requester,
!atomic_read(&ctrl->pending_events) &&
!ctrl->ist_running);
return ctrl->request_result;
case POWEROFF_STATE:
ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
slot_name(ctrl));
break;
case BLINKINGON_STATE:
case OFF_STATE:
case POWERON_STATE:
ctrl_info(ctrl, "Slot(%s): Already disabled\n",
slot_name(ctrl));
break;
default:
ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
slot_name(ctrl), ctrl->state);
break;
}
mutex_unlock(&ctrl->state_lock);
return -ENODEV;
}