mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-23 07:29:21 +07:00
4bff674990
After commit bcdde7e221
(sysfs: make __sysfs_remove_dir() recursive)
I'm seeing traces analogous to the one below in Thunderbolt testing:
WARNING: CPU: 3 PID: 76 at /scratch/rafael/work/linux-pm/fs/sysfs/group.c:214 sysfs_remove_group+0x59/0xe0()
sysfs group ffffffff81c6c500 not found for kobject '0000:08'
Modules linked in: ...
CPU: 3 PID: 76 Comm: kworker/u16:7 Not tainted 3.13.0-rc1+ #76
Hardware name: Acer Aspire S5-391/Venus , BIOS V1.02 05/29/2012
Workqueue: kacpi_hotplug acpi_hotplug_work_fn
0000000000000009 ffff8801644b9ac8 ffffffff816b23bf 0000000000000007
ffff8801644b9b18 ffff8801644b9b08 ffffffff81046607 ffff88016925b800
0000000000000000 ffffffff81c6c500 ffff88016924f928 ffff88016924f800
Call Trace:
[<ffffffff816b23bf>] dump_stack+0x4e/0x71
[<ffffffff81046607>] warn_slowpath_common+0x87/0xb0
[<ffffffff810466d1>] warn_slowpath_fmt+0x41/0x50
[<ffffffff811e42ef>] ? sysfs_get_dirent_ns+0x6f/0x80
[<ffffffff811e5389>] sysfs_remove_group+0x59/0xe0
[<ffffffff8149f00b>] dpm_sysfs_remove+0x3b/0x50
[<ffffffff81495818>] device_del+0x58/0x1c0
[<ffffffff814959c8>] device_unregister+0x48/0x60
[<ffffffff813254fe>] pci_remove_bus+0x6e/0x80
[<ffffffff81325548>] pci_remove_bus_device+0x38/0x110
[<ffffffff8132555d>] pci_remove_bus_device+0x4d/0x110
[<ffffffff81325639>] pci_stop_and_remove_bus_device+0x19/0x20
[<ffffffff813418d0>] disable_slot+0x20/0xe0
[<ffffffff81341a38>] acpiphp_check_bridge+0xa8/0xd0
[<ffffffff813427ad>] hotplug_event+0x17d/0x220
[<ffffffff81342880>] hotplug_event_work+0x30/0x70
[<ffffffff8136d665>] acpi_hotplug_work_fn+0x18/0x24
[<ffffffff81061331>] process_one_work+0x261/0x450
[<ffffffff81061a7e>] worker_thread+0x21e/0x370
[<ffffffff81061860>] ? rescuer_thread+0x300/0x300
[<ffffffff81068342>] kthread+0xd2/0xe0
[<ffffffff81068270>] ? flush_kthread_worker+0x70/0x70
[<ffffffff816c19bc>] ret_from_fork+0x7c/0xb0
[<ffffffff81068270>] ? flush_kthread_worker+0x70/0x70
(Mika Westerberg sees them too in his tests).
Some investigation documented in kernel bug #65281 led me to the
conclusion that the source of the problem is the device_del() in
pci_stop_dev() as it now causes the sysfs directory of the device to be
removed recursively along with all of its subdirectories. That includes
the sysfs directory of the device's subordinate bus (dev->subordinate) and
its "power" group.
Consequently, when pci_remove_bus() is called for dev->subordinate in
pci_remove_bus_device(), it calls device_unregister(&bus->dev), but at this
point the sysfs directory of bus->dev doesn't exist any more and its
"power" group doesn't exist either. Thus, when dpm_sysfs_remove() called
from device_del() tries to remove that group, it triggers the above
warning.
That indicates a logical mistake in the design of
pci_stop_and_remove_bus_device(), which causes bus device objects to be
left behind their parents (bridge device objects) and can be fixed by
moving the device_del() from pci_stop_dev() into pci_destroy_dev(), so
pci_remove_bus() can be called for the device's subordinate bus before the
device itself is unregistered from the hierarchy. Still, the driver, if
any, should be detached from the device in pci_stop_dev(), so use
device_release_driver() directly from there.
References: https://bugzilla.kernel.org/show_bug.cgi?id=65281#c6
Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
152 lines
3.3 KiB
C
152 lines
3.3 KiB
C
#include <linux/pci.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pci-aspm.h>
|
|
#include "pci.h"
|
|
|
|
static void pci_free_resources(struct pci_dev *dev)
|
|
{
|
|
int i;
|
|
|
|
msi_remove_pci_irq_vectors(dev);
|
|
|
|
pci_cleanup_rom(dev);
|
|
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
|
struct resource *res = dev->resource + i;
|
|
if (res->parent)
|
|
release_resource(res);
|
|
}
|
|
}
|
|
|
|
static void pci_stop_dev(struct pci_dev *dev)
|
|
{
|
|
pci_pme_active(dev, false);
|
|
|
|
if (dev->is_added) {
|
|
pci_proc_detach_device(dev);
|
|
pci_remove_sysfs_dev_files(dev);
|
|
device_release_driver(&dev->dev);
|
|
dev->is_added = 0;
|
|
}
|
|
|
|
if (dev->bus->self)
|
|
pcie_aspm_exit_link_state(dev);
|
|
}
|
|
|
|
static void pci_destroy_dev(struct pci_dev *dev)
|
|
{
|
|
device_del(&dev->dev);
|
|
|
|
down_write(&pci_bus_sem);
|
|
list_del(&dev->bus_list);
|
|
up_write(&pci_bus_sem);
|
|
|
|
pci_free_resources(dev);
|
|
put_device(&dev->dev);
|
|
}
|
|
|
|
void pci_remove_bus(struct pci_bus *bus)
|
|
{
|
|
pci_proc_detach_bus(bus);
|
|
|
|
down_write(&pci_bus_sem);
|
|
list_del(&bus->node);
|
|
pci_bus_release_busn_res(bus);
|
|
up_write(&pci_bus_sem);
|
|
pci_remove_legacy_files(bus);
|
|
pcibios_remove_bus(bus);
|
|
device_unregister(&bus->dev);
|
|
}
|
|
EXPORT_SYMBOL(pci_remove_bus);
|
|
|
|
static void pci_stop_bus_device(struct pci_dev *dev)
|
|
{
|
|
struct pci_bus *bus = dev->subordinate;
|
|
struct pci_dev *child, *tmp;
|
|
|
|
/*
|
|
* Stopping an SR-IOV PF device removes all the associated VFs,
|
|
* which will update the bus->devices list and confuse the
|
|
* iterator. Therefore, iterate in reverse so we remove the VFs
|
|
* first, then the PF.
|
|
*/
|
|
if (bus) {
|
|
list_for_each_entry_safe_reverse(child, tmp,
|
|
&bus->devices, bus_list)
|
|
pci_stop_bus_device(child);
|
|
}
|
|
|
|
pci_stop_dev(dev);
|
|
}
|
|
|
|
static void pci_remove_bus_device(struct pci_dev *dev)
|
|
{
|
|
struct pci_bus *bus = dev->subordinate;
|
|
struct pci_dev *child, *tmp;
|
|
|
|
if (bus) {
|
|
list_for_each_entry_safe(child, tmp,
|
|
&bus->devices, bus_list)
|
|
pci_remove_bus_device(child);
|
|
|
|
pci_remove_bus(bus);
|
|
dev->subordinate = NULL;
|
|
}
|
|
|
|
pci_destroy_dev(dev);
|
|
}
|
|
|
|
/**
|
|
* pci_stop_and_remove_bus_device - remove a PCI device and any children
|
|
* @dev: the device to remove
|
|
*
|
|
* Remove a PCI device from the device lists, informing the drivers
|
|
* that the device has been removed. We also remove any subordinate
|
|
* buses and children in a depth-first manner.
|
|
*
|
|
* For each device we remove, delete the device structure from the
|
|
* device lists, remove the /proc entry, and notify userspace
|
|
* (/sbin/hotplug).
|
|
*/
|
|
void pci_stop_and_remove_bus_device(struct pci_dev *dev)
|
|
{
|
|
pci_stop_bus_device(dev);
|
|
pci_remove_bus_device(dev);
|
|
}
|
|
EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
|
|
|
|
void pci_stop_root_bus(struct pci_bus *bus)
|
|
{
|
|
struct pci_dev *child, *tmp;
|
|
struct pci_host_bridge *host_bridge;
|
|
|
|
if (!pci_is_root_bus(bus))
|
|
return;
|
|
|
|
host_bridge = to_pci_host_bridge(bus->bridge);
|
|
list_for_each_entry_safe_reverse(child, tmp,
|
|
&bus->devices, bus_list)
|
|
pci_stop_bus_device(child);
|
|
|
|
/* stop the host bridge */
|
|
device_del(&host_bridge->dev);
|
|
}
|
|
|
|
void pci_remove_root_bus(struct pci_bus *bus)
|
|
{
|
|
struct pci_dev *child, *tmp;
|
|
struct pci_host_bridge *host_bridge;
|
|
|
|
if (!pci_is_root_bus(bus))
|
|
return;
|
|
|
|
host_bridge = to_pci_host_bridge(bus->bridge);
|
|
list_for_each_entry_safe(child, tmp,
|
|
&bus->devices, bus_list)
|
|
pci_remove_bus_device(child);
|
|
pci_remove_bus(bus);
|
|
host_bridge->bus = NULL;
|
|
|
|
/* remove the host bridge */
|
|
put_device(&host_bridge->dev);
|
|
}
|