linux_dsm_epyc7002/drivers/pci/hotplug/pciehp_hpc.c
Mika Westerberg 87d0f2a553 PCI: pciehp: Prevent deadlock on disconnect
This addresses deadlocks in these common cases in hierarchies containing
two switches:

  - All involved ports are runtime suspended and they are unplugged. This
    can happen easily if the drivers involved automatically enable runtime
    PM (xHCI for example does that).

  - System is suspended (e.g., closing the lid on a laptop) with a dock +
    something else connected, and the dock is unplugged while suspended.

These cases lead to the following deadlock:

  INFO: task irq/126-pciehp:198 blocked for more than 120 seconds.
  irq/126-pciehp  D    0   198      2 0x80000000
  Call Trace:
   schedule+0x2c/0x80
   schedule_timeout+0x246/0x350
   wait_for_completion+0xb7/0x140
   kthread_stop+0x49/0x110
   free_irq+0x32/0x70
   pcie_shutdown_notification+0x2f/0x50
   pciehp_remove+0x27/0x50
   pcie_port_remove_service+0x36/0x50
   device_release_driver+0x12/0x20
   bus_remove_device+0xec/0x160
   device_del+0x13b/0x350
   device_unregister+0x1a/0x60
   remove_iter+0x1e/0x30
   device_for_each_child+0x56/0x90
   pcie_port_device_remove+0x22/0x40
   pcie_portdrv_remove+0x20/0x60
   pci_device_remove+0x3e/0xc0
   device_release_driver_internal+0x18c/0x250
   device_release_driver+0x12/0x20
   pci_stop_bus_device+0x6f/0x90
   pci_stop_bus_device+0x31/0x90
   pci_stop_and_remove_bus_device+0x12/0x20
   pciehp_unconfigure_device+0x88/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

  INFO: task irq/190-pciehp:2288 blocked for more than 120 seconds.
  irq/190-pciehp  D    0  2288      2 0x80000000
  Call Trace:
   __schedule+0x2a2/0x880
   schedule+0x2c/0x80
   schedule_preempt_disabled+0xe/0x10
   mutex_lock+0x2c/0x30
   pci_lock_rescan_remove+0x15/0x20
   pciehp_unconfigure_device+0x4d/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

What happens here is that the whole hierarchy is runtime resumed and the
parent PCIe downstream port, which got the hot-remove event, starts
removing devices below it, taking pci_lock_rescan_remove() lock. When the
child PCIe port is runtime resumed it calls pciehp_check_presence() which
ends up calling pciehp_card_present() and pciehp_check_link_active().  Both
of these use pcie_capability_read_word(), which notices that the underlying
device is already gone and returns PCIBIOS_DEVICE_NOT_FOUND with the
capability value set to 0. When pciehp gets this value it thinks that its
child device is also hot-removed and schedules its IRQ thread to handle the
event.

The deadlock happens when the child's IRQ thread runs and tries to acquire
pci_lock_rescan_remove() which is already taken by the parent and the
parent waits for the child's IRQ thread to finish.

Prevent this from happening by checking the return value of
pcie_capability_read_word() and if it is PCIBIOS_DEVICE_NOT_FOUND stop
performing any hot-removal activities.

[bhelgaas: add common scenarios to commit log]
Link: https://lore.kernel.org/r/20191029170022.57528-2-mika.westerberg@linux.intel.com
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 17:17:42 -06:00

949 lines
26 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* PCI Express PCI Hot Plug Driver
*
* Copyright (C) 1995,2001 Compaq Computer Corporation
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
* Copyright (C) 2001 IBM Corp.
* Copyright (C) 2003-2004 Intel Corporation
*
* All rights reserved.
*
* Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
*/
#define dev_fmt(fmt) "pciehp: " fmt
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/kthread.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include "../pci.h"
#include "pciehp.h"
static inline struct pci_dev *ctrl_dev(struct controller *ctrl)
{
return ctrl->pcie->port;
}
static irqreturn_t pciehp_isr(int irq, void *dev_id);
static irqreturn_t pciehp_ist(int irq, void *dev_id);
static int pciehp_poll(void *data);
static inline int pciehp_request_irq(struct controller *ctrl)
{
int retval, irq = ctrl->pcie->irq;
if (pciehp_poll_mode) {
ctrl->poll_thread = kthread_run(&pciehp_poll, ctrl,
"pciehp_poll-%s",
slot_name(ctrl));
return PTR_ERR_OR_ZERO(ctrl->poll_thread);
}
/* Installs the interrupt handler */
retval = request_threaded_irq(irq, pciehp_isr, pciehp_ist,
IRQF_SHARED, "pciehp", ctrl);
if (retval)
ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
irq);
return retval;
}
static inline void pciehp_free_irq(struct controller *ctrl)
{
if (pciehp_poll_mode)
kthread_stop(ctrl->poll_thread);
else
free_irq(ctrl->pcie->irq, ctrl);
}
static int pcie_poll_cmd(struct controller *ctrl, int timeout)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
do {
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
if (slot_status == (u16) ~0) {
ctrl_info(ctrl, "%s: no response from device\n",
__func__);
return 0;
}
if (slot_status & PCI_EXP_SLTSTA_CC) {
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_CC);
return 1;
}
msleep(10);
timeout -= 10;
} while (timeout >= 0);
return 0; /* timeout */
}
static void pcie_wait_cmd(struct controller *ctrl)
{
unsigned int msecs = pciehp_poll_mode ? 2500 : 1000;
unsigned long duration = msecs_to_jiffies(msecs);
unsigned long cmd_timeout = ctrl->cmd_started + duration;
unsigned long now, timeout;
int rc;
/*
* If the controller does not generate notifications for command
* completions, we never need to wait between writes.
*/
if (NO_CMD_CMPL(ctrl))
return;
if (!ctrl->cmd_busy)
return;
/*
* Even if the command has already timed out, we want to call
* pcie_poll_cmd() so it can clear PCI_EXP_SLTSTA_CC.
*/
now = jiffies;
if (time_before_eq(cmd_timeout, now))
timeout = 1;
else
timeout = cmd_timeout - now;
if (ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE &&
ctrl->slot_ctrl & PCI_EXP_SLTCTL_CCIE)
rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
else
rc = pcie_poll_cmd(ctrl, jiffies_to_msecs(timeout));
if (!rc)
ctrl_info(ctrl, "Timeout on hotplug command %#06x (issued %u msec ago)\n",
ctrl->slot_ctrl,
jiffies_to_msecs(jiffies - ctrl->cmd_started));
}
#define CC_ERRATUM_MASK (PCI_EXP_SLTCTL_PCC | \
PCI_EXP_SLTCTL_PIC | \
PCI_EXP_SLTCTL_AIC | \
PCI_EXP_SLTCTL_EIC)
static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd,
u16 mask, bool wait)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl_orig, slot_ctrl;
mutex_lock(&ctrl->ctrl_lock);
/*
* Always wait for any previous command that might still be in progress
*/
pcie_wait_cmd(ctrl);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
if (slot_ctrl == (u16) ~0) {
ctrl_info(ctrl, "%s: no response from device\n", __func__);
goto out;
}
slot_ctrl_orig = slot_ctrl;
slot_ctrl &= ~mask;
slot_ctrl |= (cmd & mask);
ctrl->cmd_busy = 1;
smp_mb();
ctrl->slot_ctrl = slot_ctrl;
pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, slot_ctrl);
ctrl->cmd_started = jiffies;
/*
* Controllers with the Intel CF118 and similar errata advertise
* Command Completed support, but they only set Command Completed
* if we change the "Control" bits for power, power indicator,
* attention indicator, or interlock. If we only change the
* "Enable" bits, they never set the Command Completed bit.
*/
if (pdev->broken_cmd_compl &&
(slot_ctrl_orig & CC_ERRATUM_MASK) == (slot_ctrl & CC_ERRATUM_MASK))
ctrl->cmd_busy = 0;
/*
* Optionally wait for the hardware to be ready for a new command,
* indicating completion of the above issued command.
*/
if (wait)
pcie_wait_cmd(ctrl);
out:
mutex_unlock(&ctrl->ctrl_lock);
}
/**
* pcie_write_cmd - Issue controller command
* @ctrl: controller to which the command is issued
* @cmd: command value written to slot control register
* @mask: bitmask of slot control register to be modified
*/
static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
{
pcie_do_write_cmd(ctrl, cmd, mask, true);
}
/* Same as above without waiting for the hardware to latch */
static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask)
{
pcie_do_write_cmd(ctrl, cmd, mask, false);
}
/**
* pciehp_check_link_active() - Is the link active
* @ctrl: PCIe hotplug controller
*
* Check whether the downstream link is currently active. Note it is
* possible that the card is removed immediately after this so the
* caller may need to take it into account.
*
* If the hotplug controller itself is not available anymore returns
* %-ENODEV.
*/
int pciehp_check_link_active(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 lnk_status;
int ret;
ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
if (ret == PCIBIOS_DEVICE_NOT_FOUND || lnk_status == (u16)~0)
return -ENODEV;
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
return ret;
}
static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
{
u32 l;
int count = 0;
int delay = 1000, step = 20;
bool found = false;
do {
found = pci_bus_read_dev_vendor_id(bus, devfn, &l, 0);
count++;
if (found)
break;
msleep(step);
delay -= step;
} while (delay > 0);
if (count > 1)
pr_debug("pci %04x:%02x:%02x.%d id reading try %d times with interval %d ms to get %08x\n",
pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), count, step, l);
return found;
}
int pciehp_check_link_status(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
bool found;
u16 lnk_status;
if (!pcie_wait_for_link(pdev, true))
return -1;
found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
PCI_DEVFN(0, 0));
/* ignore link or presence changes up to this point */
if (found)
atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
&ctrl->pending_events);
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
if ((lnk_status & PCI_EXP_LNKSTA_LT) ||
!(lnk_status & PCI_EXP_LNKSTA_NLW)) {
ctrl_err(ctrl, "link training error: status %#06x\n",
lnk_status);
return -1;
}
pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status);
if (!found)
return -1;
return 0;
}
static int __pciehp_link_set(struct controller *ctrl, bool enable)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 lnk_ctrl;
pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnk_ctrl);
if (enable)
lnk_ctrl &= ~PCI_EXP_LNKCTL_LD;
else
lnk_ctrl |= PCI_EXP_LNKCTL_LD;
pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnk_ctrl);
ctrl_dbg(ctrl, "%s: lnk_ctrl = %x\n", __func__, lnk_ctrl);
return 0;
}
static int pciehp_link_enable(struct controller *ctrl)
{
return __pciehp_link_set(ctrl, true);
}
int pciehp_get_raw_indicator_status(struct hotplug_slot *hotplug_slot,
u8 *status)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl;
pci_config_pm_runtime_get(pdev);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
pci_config_pm_runtime_put(pdev);
*status = (slot_ctrl & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6;
return 0;
}
int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl;
pci_config_pm_runtime_get(pdev);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
pci_config_pm_runtime_put(pdev);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
switch (slot_ctrl & PCI_EXP_SLTCTL_AIC) {
case PCI_EXP_SLTCTL_ATTN_IND_ON:
*status = 1; /* On */
break;
case PCI_EXP_SLTCTL_ATTN_IND_BLINK:
*status = 2; /* Blink */
break;
case PCI_EXP_SLTCTL_ATTN_IND_OFF:
*status = 0; /* Off */
break;
default:
*status = 0xFF;
break;
}
return 0;
}
void pciehp_get_power_status(struct controller *ctrl, u8 *status)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl;
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
switch (slot_ctrl & PCI_EXP_SLTCTL_PCC) {
case PCI_EXP_SLTCTL_PWR_ON:
*status = 1; /* On */
break;
case PCI_EXP_SLTCTL_PWR_OFF:
*status = 0; /* Off */
break;
default:
*status = 0xFF;
break;
}
}
void pciehp_get_latch_status(struct controller *ctrl, u8 *status)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
*status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS);
}
/**
* pciehp_card_present() - Is the card present
* @ctrl: PCIe hotplug controller
*
* Function checks whether the card is currently present in the slot and
* in that case returns true. Note it is possible that the card is
* removed immediately after the check so the caller may need to take
* this into account.
*
* It the hotplug controller itself is not available anymore returns
* %-ENODEV.
*/
int pciehp_card_present(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
int ret;
ret = pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
if (ret == PCIBIOS_DEVICE_NOT_FOUND || slot_status == (u16)~0)
return -ENODEV;
return !!(slot_status & PCI_EXP_SLTSTA_PDS);
}
/**
* pciehp_card_present_or_link_active() - whether given slot is occupied
* @ctrl: PCIe hotplug controller
*
* Unlike pciehp_card_present(), which determines presence solely from the
* Presence Detect State bit, this helper also returns true if the Link Active
* bit is set. This is a concession to broken hotplug ports which hardwire
* Presence Detect State to zero, such as Wilocity's [1ae9:0200].
*
* Returns: %1 if the slot is occupied and %0 if it is not. If the hotplug
* port is not present anymore returns %-ENODEV.
*/
int pciehp_card_present_or_link_active(struct controller *ctrl)
{
int ret;
ret = pciehp_card_present(ctrl);
if (ret)
return ret;
return pciehp_check_link_active(ctrl);
}
int pciehp_query_power_fault(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
return !!(slot_status & PCI_EXP_SLTSTA_PFD);
}
int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
u8 status)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl_dev(ctrl);
pci_config_pm_runtime_get(pdev);
pcie_write_cmd_nowait(ctrl, status << 6,
PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC);
pci_config_pm_runtime_put(pdev);
return 0;
}
/**
* pciehp_set_indicators() - set attention indicator, power indicator, or both
* @ctrl: PCIe hotplug controller
* @pwr: one of:
* PCI_EXP_SLTCTL_PWR_IND_ON
* PCI_EXP_SLTCTL_PWR_IND_BLINK
* PCI_EXP_SLTCTL_PWR_IND_OFF
* @attn: one of:
* PCI_EXP_SLTCTL_ATTN_IND_ON
* PCI_EXP_SLTCTL_ATTN_IND_BLINK
* PCI_EXP_SLTCTL_ATTN_IND_OFF
*
* Either @pwr or @attn can also be INDICATOR_NOOP to leave that indicator
* unchanged.
*/
void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn)
{
u16 cmd = 0, mask = 0;
if (PWR_LED(ctrl) && pwr != INDICATOR_NOOP) {
cmd |= (pwr & PCI_EXP_SLTCTL_PIC);
mask |= PCI_EXP_SLTCTL_PIC;
}
if (ATTN_LED(ctrl) && attn != INDICATOR_NOOP) {
cmd |= (attn & PCI_EXP_SLTCTL_AIC);
mask |= PCI_EXP_SLTCTL_AIC;
}
if (cmd) {
pcie_write_cmd_nowait(ctrl, cmd, mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
}
}
int pciehp_power_on_slot(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
int retval;
/* Clear power-fault bit from previous power failures */
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
if (slot_status & PCI_EXP_SLTSTA_PFD)
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PFD);
ctrl->power_fault_detected = 0;
pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_ON, PCI_EXP_SLTCTL_PCC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_ON);
retval = pciehp_link_enable(ctrl);
if (retval)
ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__);
return retval;
}
void pciehp_power_off_slot(struct controller *ctrl)
{
pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_OFF, PCI_EXP_SLTCTL_PCC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_OFF);
}
static irqreturn_t pciehp_isr(int irq, void *dev_id)
{
struct controller *ctrl = (struct controller *)dev_id;
struct pci_dev *pdev = ctrl_dev(ctrl);
struct device *parent = pdev->dev.parent;
u16 status, events;
/*
* Interrupts only occur in D3hot or shallower and only if enabled
* in the Slot Control register (PCIe r4.0, sec 6.7.3.4).
*/
if (pdev->current_state == PCI_D3cold ||
(!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))
return IRQ_NONE;
/*
* Keep the port accessible by holding a runtime PM ref on its parent.
* Defer resume of the parent to the IRQ thread if it's suspended.
* Mask the interrupt until then.
*/
if (parent) {
pm_runtime_get_noresume(parent);
if (!pm_runtime_active(parent)) {
pm_runtime_put(parent);
disable_irq_nosync(irq);
atomic_or(RERUN_ISR, &ctrl->pending_events);
return IRQ_WAKE_THREAD;
}
}
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status);
if (status == (u16) ~0) {
ctrl_info(ctrl, "%s: no response from device\n", __func__);
if (parent)
pm_runtime_put(parent);
return IRQ_NONE;
}
/*
* Slot Status contains plain status bits as well as event
* notification bits; right now we only want the event bits.
*/
events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
PCI_EXP_SLTSTA_DLLSC);
/*
* If we've already reported a power fault, don't report it again
* until we've done something to handle it.
*/
if (ctrl->power_fault_detected)
events &= ~PCI_EXP_SLTSTA_PFD;
if (!events) {
if (parent)
pm_runtime_put(parent);
return IRQ_NONE;
}
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);
ctrl_dbg(ctrl, "pending interrupts %#06x from Slot Status\n", events);
if (parent)
pm_runtime_put(parent);
/*
* Command Completed notifications are not deferred to the
* IRQ thread because it may be waiting for their arrival.
*/
if (events & PCI_EXP_SLTSTA_CC) {
ctrl->cmd_busy = 0;
smp_mb();
wake_up(&ctrl->queue);
if (events == PCI_EXP_SLTSTA_CC)
return IRQ_HANDLED;
events &= ~PCI_EXP_SLTSTA_CC;
}
if (pdev->ignore_hotplug) {
ctrl_dbg(ctrl, "ignoring hotplug event %#06x\n", events);
return IRQ_HANDLED;
}
/* Save pending events for consumption by IRQ thread. */
atomic_or(events, &ctrl->pending_events);
return IRQ_WAKE_THREAD;
}
static irqreturn_t pciehp_ist(int irq, void *dev_id)
{
struct controller *ctrl = (struct controller *)dev_id;
struct pci_dev *pdev = ctrl_dev(ctrl);
irqreturn_t ret;
u32 events;
ctrl->ist_running = true;
pci_config_pm_runtime_get(pdev);
/* rerun pciehp_isr() if the port was inaccessible on interrupt */
if (atomic_fetch_and(~RERUN_ISR, &ctrl->pending_events) & RERUN_ISR) {
ret = pciehp_isr(irq, dev_id);
enable_irq(irq);
if (ret != IRQ_WAKE_THREAD) {
pci_config_pm_runtime_put(pdev);
return ret;
}
}
synchronize_hardirq(irq);
events = atomic_xchg(&ctrl->pending_events, 0);
if (!events) {
pci_config_pm_runtime_put(pdev);
return IRQ_NONE;
}
/* Check Attention Button Pressed */
if (events & PCI_EXP_SLTSTA_ABP) {
ctrl_info(ctrl, "Slot(%s): Attention button pressed\n",
slot_name(ctrl));
pciehp_handle_button_press(ctrl);
}
/* Check Power Fault Detected */
if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
ctrl->power_fault_detected = 1;
ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
PCI_EXP_SLTCTL_ATTN_IND_ON);
}
/*
* Disable requests have higher priority than Presence Detect Changed
* or Data Link Layer State Changed events.
*/
down_read(&ctrl->reset_lock);
if (events & DISABLE_SLOT)
pciehp_handle_disable_request(ctrl);
else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
pciehp_handle_presence_or_link_change(ctrl, events);
up_read(&ctrl->reset_lock);
pci_config_pm_runtime_put(pdev);
ctrl->ist_running = false;
wake_up(&ctrl->requester);
return IRQ_HANDLED;
}
static int pciehp_poll(void *data)
{
struct controller *ctrl = data;
schedule_timeout_idle(10 * HZ); /* start with 10 sec delay */
while (!kthread_should_stop()) {
/* poll for interrupt events or user requests */
while (pciehp_isr(IRQ_NOTCONNECTED, ctrl) == IRQ_WAKE_THREAD ||
atomic_read(&ctrl->pending_events))
pciehp_ist(IRQ_NOTCONNECTED, ctrl);
if (pciehp_poll_time <= 0 || pciehp_poll_time > 60)
pciehp_poll_time = 2; /* clamp to sane value */
schedule_timeout_idle(pciehp_poll_time * HZ);
}
return 0;
}
static void pcie_enable_notification(struct controller *ctrl)
{
u16 cmd, mask;
/*
* TBD: Power fault detected software notification support.
*
* Power fault detected software notification is not enabled
* now, because it caused power fault detected interrupt storm
* on some machines. On those machines, power fault detected
* bit in the slot status register was set again immediately
* when it is cleared in the interrupt service routine, and
* next power fault detected interrupt was notified again.
*/
/*
* Always enable link events: thus link-up and link-down shall
* always be treated as hotplug and unplug respectively. Enable
* presence detect only if Attention Button is not present.
*/
cmd = PCI_EXP_SLTCTL_DLLSCE;
if (ATTN_BUTTN(ctrl))
cmd |= PCI_EXP_SLTCTL_ABPE;
else
cmd |= PCI_EXP_SLTCTL_PDCE;
if (!pciehp_poll_mode)
cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE;
mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
PCI_EXP_SLTCTL_PFDE |
PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
PCI_EXP_SLTCTL_DLLSCE);
pcie_write_cmd_nowait(ctrl, cmd, mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
}
static void pcie_disable_notification(struct controller *ctrl)
{
u16 mask;
mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE |
PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
PCI_EXP_SLTCTL_DLLSCE);
pcie_write_cmd(ctrl, 0, mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
}
void pcie_clear_hotplug_events(struct controller *ctrl)
{
pcie_capability_write_word(ctrl_dev(ctrl), PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
}
void pcie_enable_interrupt(struct controller *ctrl)
{
u16 mask;
mask = PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_DLLSCE;
pcie_write_cmd(ctrl, mask, mask);
}
void pcie_disable_interrupt(struct controller *ctrl)
{
u16 mask;
/*
* Mask hot-plug interrupt to prevent it triggering immediately
* when the link goes inactive (we still get PME when any of the
* enabled events is detected). Same goes with Link Layer State
* changed event which generates PME immediately when the link goes
* inactive so mask it as well.
*/
mask = PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_DLLSCE;
pcie_write_cmd(ctrl, 0, mask);
}
/*
* pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary
* bus reset of the bridge, but at the same time we want to ensure that it is
* not seen as a hot-unplug, followed by the hot-plug of the device. Thus,
* disable link state notification and presence detection change notification
* momentarily, if we see that they could interfere. Also, clear any spurious
* events after.
*/
int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 stat_mask = 0, ctrl_mask = 0;
int rc;
if (probe)
return 0;
down_write(&ctrl->reset_lock);
if (!ATTN_BUTTN(ctrl)) {
ctrl_mask |= PCI_EXP_SLTCTL_PDCE;
stat_mask |= PCI_EXP_SLTSTA_PDC;
}
ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE;
stat_mask |= PCI_EXP_SLTSTA_DLLSC;
pcie_write_cmd(ctrl, 0, ctrl_mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port);
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask);
pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask);
up_write(&ctrl->reset_lock);
return rc;
}
int pcie_init_notification(struct controller *ctrl)
{
if (pciehp_request_irq(ctrl))
return -1;
pcie_enable_notification(ctrl);
ctrl->notification_enabled = 1;
return 0;
}
void pcie_shutdown_notification(struct controller *ctrl)
{
if (ctrl->notification_enabled) {
pcie_disable_notification(ctrl);
pciehp_free_irq(ctrl);
ctrl->notification_enabled = 0;
}
}
static inline void dbg_ctrl(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl->pcie->port;
u16 reg16;
ctrl_dbg(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap);
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &reg16);
ctrl_dbg(ctrl, "Slot Status : 0x%04x\n", reg16);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &reg16);
ctrl_dbg(ctrl, "Slot Control : 0x%04x\n", reg16);
}
#define FLAG(x, y) (((x) & (y)) ? '+' : '-')
struct controller *pcie_init(struct pcie_device *dev)
{
struct controller *ctrl;
u32 slot_cap, link_cap;
u8 poweron;
struct pci_dev *pdev = dev->port;
struct pci_bus *subordinate = pdev->subordinate;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
return NULL;
ctrl->pcie = dev;
pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
if (pdev->hotplug_user_indicators)
slot_cap &= ~(PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_PIP);
/*
* We assume no Thunderbolt controllers support Command Complete events,
* but some controllers falsely claim they do.
*/
if (pdev->is_thunderbolt)
slot_cap |= PCI_EXP_SLTCAP_NCCS;
ctrl->slot_cap = slot_cap;
mutex_init(&ctrl->ctrl_lock);
mutex_init(&ctrl->state_lock);
init_rwsem(&ctrl->reset_lock);
init_waitqueue_head(&ctrl->requester);
init_waitqueue_head(&ctrl->queue);
INIT_DELAYED_WORK(&ctrl->button_work, pciehp_queue_pushbutton_work);
dbg_ctrl(ctrl);
down_read(&pci_bus_sem);
ctrl->state = list_empty(&subordinate->devices) ? OFF_STATE : ON_STATE;
up_read(&pci_bus_sem);
/* Check if Data Link Layer Link Active Reporting is implemented */
pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
/* Clear all remaining event bits in Slot Status register. */
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_CC |
PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC);
ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c LLActRep%c%s\n",
(slot_cap & PCI_EXP_SLTCAP_PSN) >> 19,
FLAG(slot_cap, PCI_EXP_SLTCAP_ABP),
FLAG(slot_cap, PCI_EXP_SLTCAP_PCP),
FLAG(slot_cap, PCI_EXP_SLTCAP_MRLSP),
FLAG(slot_cap, PCI_EXP_SLTCAP_AIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_PIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_HPC),
FLAG(slot_cap, PCI_EXP_SLTCAP_HPS),
FLAG(slot_cap, PCI_EXP_SLTCAP_EIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS),
FLAG(link_cap, PCI_EXP_LNKCAP_DLLLARC),
pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : "");
/*
* If empty slot's power status is on, turn power off. The IRQ isn't
* requested yet, so avoid triggering a notification with this command.
*/
if (POWER_CTRL(ctrl)) {
pciehp_get_power_status(ctrl, &poweron);
if (!pciehp_card_present_or_link_active(ctrl) && poweron) {
pcie_disable_notification(ctrl);
pciehp_power_off_slot(ctrl);
}
}
return ctrl;
}
void pciehp_release_ctrl(struct controller *ctrl)
{
cancel_delayed_work_sync(&ctrl->button_work);
kfree(ctrl);
}
static void quirk_cmd_compl(struct pci_dev *pdev)
{
u32 slot_cap;
if (pci_is_pcie(pdev)) {
pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
if (slot_cap & PCI_EXP_SLTCAP_HPC &&
!(slot_cap & PCI_EXP_SLTCAP_NCCS))
pdev->broken_cmd_compl = 1;
}
}
DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400,
PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401,
PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_HXT, 0x0401,
PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);