mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-23 20:50:32 +07:00
7ae0567fd3
Fix the following kernel oops problem that happens when removing PCI bridge with pciehp loaded. It should also occur with other hotplug driver that is implemented as a bridge's driver. [ 459.997257] pciehp 0000:2f:04.0:pcie24: unloading service driver pciehp [ 459.997495] general protection fault: 0000 [#1] SMP [ 459.997737] last sysfs file: /sys/devices/pci0000:00/0000:00:04.0/0000:2e:00.0/0000:2f:04.0/remove [ 459.997964] CPU 4 [ 459.998129] Modules linked in: pciehp ipv6 autofs4 hidp rfcomm l2cap bluetooth sunrpc cpufreq_ondemand acpi_cpufreq dm_mirror dm_region_hash dm_log dm_multipath scsi_dh dm_mod sbs sbshc battery ac parport_pc lp parport mptspi mptscsih mptbase scsi_transport_spi e1000e sg sr_mod cdrom button serio_raw i2c_i801 i2c_core shpchp pcspkr ata_piix libata megaraid_sas sd_mod scsi_mod crc_t10dif ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last unloaded: microcode] [ 459.998129] Pid: 56, comm: events/4 Not tainted 2.6.29-rc8-kk #1 PRIMERGY [ 459.998129] RIP: 0010:[<ffffffff803bf047>] [<ffffffff803bf047>] pci_slot_release+0x37/0x100 [ 459.998129] RSP: 0018:ffff88083b3bf9e0 EFLAGS: 00010246 [ 459.998129] RAX: ffff88083adc5158 RBX: ffff880836c1bc80 RCX: 6b6b6b6b6b6b6b6b [ 459.998129] RDX: 0000000000000000 RSI: ffffffff803a77f0 RDI: ffff880836c1bc48 [ 459.998129] RBP: ffff88083b3bfa00 R08: 0000000000000002 R09: 0000000000000000 [ 459.998129] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880836c1bc48 [ 459.998129] R13: ffff880836c1bc20 R14: ffff880836c1bc48 R15: ffff880836d1ec38 [ 459.998129] FS: 0000000000000000(0000) GS:ffff88083ccc3770(0000) knlGS:0000000000000000 [ 459.998129] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b [ 459.998129] CR2: 00007f1562f1d558 CR3: 0000000838090000 CR4: 00000000000006e0 [ 459.998129] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 459.998129] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 459.998129] Process events/4 (pid: 56, threadinfo ffff88083b3be000, task ffff88083b3b3e40) [ 459.998129] Stack: [ 459.998129] ffff880836c1bc80 ffff880836c1bc48 ffffffff80793320 ffff88083b0d0960 [ 459.998129] ffff88083b3bfa30 ffffffff803a788a ffff880836c1bc80 ffffffff803a77f0 [ 459.998129] ffff880836c1bc20 ffff880836d1ec38 ffff88083b3bfa50 ffffffff803a8ce7 [ 459.998129] Call Trace: [ 459.998129] [<ffffffff803a788a>] kobject_release+0x9a/0x290 [ 459.998129] [<ffffffff803a77f0>] ? kobject_release+0x0/0x290 [ 459.998129] [<ffffffff803a8ce7>] kref_put+0x37/0x80 [ 459.998129] [<ffffffff803a76f7>] kobject_put+0x27/0x60 [ 459.998129] [<ffffffff803bebcc>] ? pci_destroy_slot+0x3c/0xc0 [ 459.998129] [<ffffffff803bebd5>] pci_destroy_slot+0x45/0xc0 [ 459.998129] [<ffffffff803c797d>] pci_hp_deregister+0x13d/0x210 [ 459.998129] [<ffffffffa031141d>] cleanup_slots+0x2d/0x80 [pciehp] [ 459.998129] [<ffffffffa0311735>] pciehp_remove+0x15/0x30 [pciehp] [ 459.998129] [<ffffffff803c4c99>] pcie_port_remove_service+0x69/0x90 [ 459.998129] [<ffffffff80441da9>] __device_release_driver+0x59/0x90 [ 459.998129] [<ffffffff80441edb>] device_release_driver+0x2b/0x40 [ 459.998129] [<ffffffff804419d6>] bus_remove_device+0xa6/0x120 [ 459.998129] [<ffffffff8043e46b>] device_del+0x12b/0x190 [ 459.998129] [<ffffffff803c4d90>] ? remove_iter+0x0/0x40 [ 459.998129] [<ffffffff8043e4f6>] device_unregister+0x26/0x70 [ 459.998129] [<ffffffff803c4dbf>] remove_iter+0x2f/0x40 [ 459.998129] [<ffffffff8043ddf3>] device_for_each_child+0x33/0x60 [ 459.998129] [<ffffffff8033ee30>] ? sysfs_schedule_callback_work+0x0/0x50 [ 459.998129] [<ffffffff803c4d30>] pcie_port_device_remove+0x30/0x80 [ 459.998129] [<ffffffff803c55a1>] pcie_portdrv_remove+0x11/0x20 [ 459.998129] [<ffffffff803bfeb2>] pci_device_remove+0x32/0x70 [ 459.998129] [<ffffffff80441da9>] __device_release_driver+0x59/0x90 [ 459.998129] [<ffffffff80441edb>] device_release_driver+0x2b/0x40 [ 459.998129] [<ffffffff804419d6>] bus_remove_device+0xa6/0x120 [ 459.998129] [<ffffffff8043e46b>] device_del+0x12b/0x190 [ 459.998129] [<ffffffff8043e4f6>] device_unregister+0x26/0x70 [ 459.998129] [<ffffffff803ba969>] pci_stop_dev+0x49/0x60 [ 459.998129] [<ffffffff803baab0>] pci_remove_bus_device+0x40/0xc0 [ 459.998129] [<ffffffff803c10d9>] remove_callback+0x29/0x40 [ 459.998129] [<ffffffff8033ee4f>] sysfs_schedule_callback_work+0x1f/0x50 [ 459.998129] [<ffffffff8025769a>] run_workqueue+0x15a/0x230 [ 459.998129] [<ffffffff80257648>] ? run_workqueue+0x108/0x230 [ 459.998129] [<ffffffff8025846f>] worker_thread+0x9f/0x100 [ 459.998129] [<ffffffff8025bce0>] ? autoremove_wake_function+0x0/0x40 [ 459.998129] [<ffffffff802583d0>] ? worker_thread+0x0/0x100 [ 459.998129] [<ffffffff8025b89d>] kthread+0x4d/0x80 [ 459.998129] [<ffffffff8020d4ba>] child_rip+0xa/0x20 [ 459.998129] [<ffffffff8020cebc>] ? restore_args+0x0/0x30 [ 459.998129] [<ffffffff8025b850>] ? kthread+0x0/0x80 [ 459.998129] [<ffffffff8020d4b0>] ? child_rip+0x0/0x20 [ 459.998129] Code: 56 49 89 fe 41 55 4c 8d 6f d8 41 54 53 74 09 f6 05 b8 05 c7 00 08 75 72 49 8b 45 00 48 8b 48 28 eb 05 66 90 48 89 f1 49 8b 45 00 <48> 8b 31 48 83 c0 28 0f 18 0e 48 39 c1 74 1c 8b 41 38 41 0f b6 [ 459.998129] RIP [<ffffffff803bf047>] pci_slot_release+0x37/0x100 [ 459.998129] RSP <ffff88083b3bf9e0> [ 460.018595] ---[ end trace 5a08d2095374aedc ]--- The pci_remove_bus_device() removes all buses and devices under the bridge, and then removes the bridge. So the remove() callback of the hotplug drivers implemented as a bridge's driver is executed after the struct pci_bus of the bridge's secondary bus is removed. The remove() callback of those driver unregisters the slot using pci_destroy_slot(), and slot's release callback refers to the the struct pci_bus that was already freed. This is the cause of the kernel oops. This patch solves the problem by stopping bus drivers before removing the bridge and its child bus and devices. Acked-by: Alex Chiang <achiang@hp.com> Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
156 lines
3.6 KiB
C
156 lines
3.6 KiB
C
#include <linux/pci.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pci-aspm.h>
|
|
#include "pci.h"
|
|
|
|
static void pci_free_resources(struct pci_dev *dev)
|
|
{
|
|
int i;
|
|
|
|
msi_remove_pci_irq_vectors(dev);
|
|
|
|
pci_cleanup_rom(dev);
|
|
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
|
struct resource *res = dev->resource + i;
|
|
if (res->parent)
|
|
release_resource(res);
|
|
}
|
|
}
|
|
|
|
static void pci_stop_dev(struct pci_dev *dev)
|
|
{
|
|
if (dev->is_added) {
|
|
pci_proc_detach_device(dev);
|
|
pci_remove_sysfs_dev_files(dev);
|
|
device_unregister(&dev->dev);
|
|
dev->is_added = 0;
|
|
}
|
|
|
|
if (dev->bus->self)
|
|
pcie_aspm_exit_link_state(dev);
|
|
}
|
|
|
|
static void pci_destroy_dev(struct pci_dev *dev)
|
|
{
|
|
pci_stop_dev(dev);
|
|
|
|
/* Remove the device from the device lists, and prevent any further
|
|
* list accesses from this device */
|
|
down_write(&pci_bus_sem);
|
|
list_del(&dev->bus_list);
|
|
dev->bus_list.next = dev->bus_list.prev = NULL;
|
|
up_write(&pci_bus_sem);
|
|
|
|
pci_free_resources(dev);
|
|
pci_dev_put(dev);
|
|
}
|
|
|
|
/**
|
|
* pci_remove_device_safe - remove an unused hotplug device
|
|
* @dev: the device to remove
|
|
*
|
|
* Delete the device structure from the device lists and
|
|
* notify userspace (/sbin/hotplug), but only if the device
|
|
* in question is not being used by a driver.
|
|
* Returns 0 on success.
|
|
*/
|
|
#if 0
|
|
int pci_remove_device_safe(struct pci_dev *dev)
|
|
{
|
|
if (pci_dev_driver(dev))
|
|
return -EBUSY;
|
|
pci_destroy_dev(dev);
|
|
return 0;
|
|
}
|
|
#endif /* 0 */
|
|
|
|
void pci_remove_bus(struct pci_bus *pci_bus)
|
|
{
|
|
pci_proc_detach_bus(pci_bus);
|
|
|
|
down_write(&pci_bus_sem);
|
|
list_del(&pci_bus->node);
|
|
up_write(&pci_bus_sem);
|
|
if (!pci_bus->is_added)
|
|
return;
|
|
|
|
pci_remove_legacy_files(pci_bus);
|
|
device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
|
|
device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
|
|
device_unregister(&pci_bus->dev);
|
|
}
|
|
EXPORT_SYMBOL(pci_remove_bus);
|
|
|
|
/**
|
|
* pci_remove_bus_device - remove a PCI device and any children
|
|
* @dev: the device to remove
|
|
*
|
|
* Remove a PCI device from the device lists, informing the drivers
|
|
* that the device has been removed. We also remove any subordinate
|
|
* buses and children in a depth-first manner.
|
|
*
|
|
* For each device we remove, delete the device structure from the
|
|
* device lists, remove the /proc entry, and notify userspace
|
|
* (/sbin/hotplug).
|
|
*/
|
|
void pci_remove_bus_device(struct pci_dev *dev)
|
|
{
|
|
pci_stop_bus_device(dev);
|
|
if (dev->subordinate) {
|
|
struct pci_bus *b = dev->subordinate;
|
|
|
|
pci_remove_behind_bridge(dev);
|
|
pci_remove_bus(b);
|
|
dev->subordinate = NULL;
|
|
}
|
|
|
|
pci_destroy_dev(dev);
|
|
}
|
|
|
|
/**
|
|
* pci_remove_behind_bridge - remove all devices behind a PCI bridge
|
|
* @dev: PCI bridge device
|
|
*
|
|
* Remove all devices on the bus, except for the parent bridge.
|
|
* This also removes any child buses, and any devices they may
|
|
* contain in a depth-first manner.
|
|
*/
|
|
void pci_remove_behind_bridge(struct pci_dev *dev)
|
|
{
|
|
struct list_head *l, *n;
|
|
|
|
if (dev->subordinate)
|
|
list_for_each_safe(l, n, &dev->subordinate->devices)
|
|
pci_remove_bus_device(pci_dev_b(l));
|
|
}
|
|
|
|
static void pci_stop_bus_devices(struct pci_bus *bus)
|
|
{
|
|
struct list_head *l, *n;
|
|
|
|
list_for_each_safe(l, n, &bus->devices) {
|
|
struct pci_dev *dev = pci_dev_b(l);
|
|
pci_stop_bus_device(dev);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* pci_stop_bus_device - stop a PCI device and any children
|
|
* @dev: the device to stop
|
|
*
|
|
* Stop a PCI device (detach the driver, remove from the global list
|
|
* and so on). This also stop any subordinate buses and children in a
|
|
* depth-first manner.
|
|
*/
|
|
void pci_stop_bus_device(struct pci_dev *dev)
|
|
{
|
|
if (dev->subordinate)
|
|
pci_stop_bus_devices(dev->subordinate);
|
|
|
|
pci_stop_dev(dev);
|
|
}
|
|
|
|
EXPORT_SYMBOL(pci_remove_bus_device);
|
|
EXPORT_SYMBOL(pci_remove_behind_bridge);
|
|
EXPORT_SYMBOL_GPL(pci_stop_bus_device);
|