mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 15:46:09 +07:00
44bda4b7d2
When a PCI device is detected, pdev->is_added is set to 1 and proc and sysfs entries are created. When the device is removed, pdev->is_added is checked for one and then device is detached with clearing of proc and sys entries and at end, pdev->is_added is set to 0. is_added and is_busmaster are bit fields in pci_dev structure sharing same memory location. A strange issue was observed with multiple removal and rescan of a PCIe NVMe device using sysfs commands where is_added flag was observed as zero instead of one while removing device and proc,sys entries are not cleared. This causes issue in later device addition with warning message "proc_dir_entry" already registered. Debugging revealed a race condition between the PCI core setting the is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue setting the is_busmaster bit in pci_set_master(). As these fields are not handled atomically, that clears the is_added bit. Move the is_added bit to a separate private flag variable and use atomic functions to set and retrieve the device addition state. This avoids the race because is_added no longer shares a memory location with is_busmaster. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283 Signed-off-by: Hari Vyas <hari.vyas@broadcom.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Lukas Wunner <lukas@wunner.de> Acked-by: Michael Ellerman <mpe@ellerman.id.au>
429 lines
10 KiB
C
429 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* From setup-res.c, by:
|
|
* Dave Rusling (david.rusling@reo.mts.dec.com)
|
|
* David Mosberger (davidm@cs.arizona.edu)
|
|
* David Miller (davem@redhat.com)
|
|
* Ivan Kokshaysky (ink@jurassic.park.msu.ru)
|
|
*/
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include "pci.h"
|
|
|
|
void pci_add_resource_offset(struct list_head *resources, struct resource *res,
|
|
resource_size_t offset)
|
|
{
|
|
struct resource_entry *entry;
|
|
|
|
entry = resource_list_create_entry(res, 0);
|
|
if (!entry) {
|
|
printk(KERN_ERR "PCI: can't add host bridge window %pR\n", res);
|
|
return;
|
|
}
|
|
|
|
entry->offset = offset;
|
|
resource_list_add_tail(entry, resources);
|
|
}
|
|
EXPORT_SYMBOL(pci_add_resource_offset);
|
|
|
|
void pci_add_resource(struct list_head *resources, struct resource *res)
|
|
{
|
|
pci_add_resource_offset(resources, res, 0);
|
|
}
|
|
EXPORT_SYMBOL(pci_add_resource);
|
|
|
|
void pci_free_resource_list(struct list_head *resources)
|
|
{
|
|
resource_list_free(resources);
|
|
}
|
|
EXPORT_SYMBOL(pci_free_resource_list);
|
|
|
|
void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
|
|
unsigned int flags)
|
|
{
|
|
struct pci_bus_resource *bus_res;
|
|
|
|
bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL);
|
|
if (!bus_res) {
|
|
dev_err(&bus->dev, "can't add %pR resource\n", res);
|
|
return;
|
|
}
|
|
|
|
bus_res->res = res;
|
|
bus_res->flags = flags;
|
|
list_add_tail(&bus_res->list, &bus->resources);
|
|
}
|
|
|
|
struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
|
|
{
|
|
struct pci_bus_resource *bus_res;
|
|
|
|
if (n < PCI_BRIDGE_RESOURCE_NUM)
|
|
return bus->resource[n];
|
|
|
|
n -= PCI_BRIDGE_RESOURCE_NUM;
|
|
list_for_each_entry(bus_res, &bus->resources, list) {
|
|
if (n-- == 0)
|
|
return bus_res->res;
|
|
}
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(pci_bus_resource_n);
|
|
|
|
void pci_bus_remove_resources(struct pci_bus *bus)
|
|
{
|
|
int i;
|
|
struct pci_bus_resource *bus_res, *tmp;
|
|
|
|
for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
|
|
bus->resource[i] = NULL;
|
|
|
|
list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
|
|
list_del(&bus_res->list);
|
|
kfree(bus_res);
|
|
}
|
|
}
|
|
|
|
int devm_request_pci_bus_resources(struct device *dev,
|
|
struct list_head *resources)
|
|
{
|
|
struct resource_entry *win;
|
|
struct resource *parent, *res;
|
|
int err;
|
|
|
|
resource_list_for_each_entry(win, resources) {
|
|
res = win->res;
|
|
switch (resource_type(res)) {
|
|
case IORESOURCE_IO:
|
|
parent = &ioport_resource;
|
|
break;
|
|
case IORESOURCE_MEM:
|
|
parent = &iomem_resource;
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
err = devm_request_resource(dev, parent, res);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(devm_request_pci_bus_resources);
|
|
|
|
static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL};
|
|
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
|
|
static struct pci_bus_region pci_64_bit = {0,
|
|
(pci_bus_addr_t) 0xffffffffffffffffULL};
|
|
static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL,
|
|
(pci_bus_addr_t) 0xffffffffffffffffULL};
|
|
#endif
|
|
|
|
/*
|
|
* @res contains CPU addresses. Clip it so the corresponding bus addresses
|
|
* on @bus are entirely within @region. This is used to control the bus
|
|
* addresses of resources we allocate, e.g., we may need a resource that
|
|
* can be mapped by a 32-bit BAR.
|
|
*/
|
|
static void pci_clip_resource_to_region(struct pci_bus *bus,
|
|
struct resource *res,
|
|
struct pci_bus_region *region)
|
|
{
|
|
struct pci_bus_region r;
|
|
|
|
pcibios_resource_to_bus(bus, &r, res);
|
|
if (r.start < region->start)
|
|
r.start = region->start;
|
|
if (r.end > region->end)
|
|
r.end = region->end;
|
|
|
|
if (r.end < r.start)
|
|
res->end = res->start - 1;
|
|
else
|
|
pcibios_bus_to_resource(bus, res, &r);
|
|
}
|
|
|
|
static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res,
|
|
resource_size_t size, resource_size_t align,
|
|
resource_size_t min, unsigned long type_mask,
|
|
resource_size_t (*alignf)(void *,
|
|
const struct resource *,
|
|
resource_size_t,
|
|
resource_size_t),
|
|
void *alignf_data,
|
|
struct pci_bus_region *region)
|
|
{
|
|
int i, ret;
|
|
struct resource *r, avail;
|
|
resource_size_t max;
|
|
|
|
type_mask |= IORESOURCE_TYPE_BITS;
|
|
|
|
pci_bus_for_each_resource(bus, r, i) {
|
|
resource_size_t min_used = min;
|
|
|
|
if (!r)
|
|
continue;
|
|
|
|
/* type_mask must match */
|
|
if ((res->flags ^ r->flags) & type_mask)
|
|
continue;
|
|
|
|
/* We cannot allocate a non-prefetching resource
|
|
from a pre-fetching area */
|
|
if ((r->flags & IORESOURCE_PREFETCH) &&
|
|
!(res->flags & IORESOURCE_PREFETCH))
|
|
continue;
|
|
|
|
avail = *r;
|
|
pci_clip_resource_to_region(bus, &avail, region);
|
|
|
|
/*
|
|
* "min" is typically PCIBIOS_MIN_IO or PCIBIOS_MIN_MEM to
|
|
* protect badly documented motherboard resources, but if
|
|
* this is an already-configured bridge window, its start
|
|
* overrides "min".
|
|
*/
|
|
if (avail.start)
|
|
min_used = avail.start;
|
|
|
|
max = avail.end;
|
|
|
|
/* Ok, try it out.. */
|
|
ret = allocate_resource(r, res, size, min_used, max,
|
|
align, alignf, alignf_data);
|
|
if (ret == 0)
|
|
return 0;
|
|
}
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/**
|
|
* pci_bus_alloc_resource - allocate a resource from a parent bus
|
|
* @bus: PCI bus
|
|
* @res: resource to allocate
|
|
* @size: size of resource to allocate
|
|
* @align: alignment of resource to allocate
|
|
* @min: minimum /proc/iomem address to allocate
|
|
* @type_mask: IORESOURCE_* type flags
|
|
* @alignf: resource alignment function
|
|
* @alignf_data: data argument for resource alignment function
|
|
*
|
|
* Given the PCI bus a device resides on, the size, minimum address,
|
|
* alignment and type, try to find an acceptable resource allocation
|
|
* for a specific device resource.
|
|
*/
|
|
int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
|
|
resource_size_t size, resource_size_t align,
|
|
resource_size_t min, unsigned long type_mask,
|
|
resource_size_t (*alignf)(void *,
|
|
const struct resource *,
|
|
resource_size_t,
|
|
resource_size_t),
|
|
void *alignf_data)
|
|
{
|
|
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
|
|
int rc;
|
|
|
|
if (res->flags & IORESOURCE_MEM_64) {
|
|
rc = pci_bus_alloc_from_region(bus, res, size, align, min,
|
|
type_mask, alignf, alignf_data,
|
|
&pci_high);
|
|
if (rc == 0)
|
|
return 0;
|
|
|
|
return pci_bus_alloc_from_region(bus, res, size, align, min,
|
|
type_mask, alignf, alignf_data,
|
|
&pci_64_bit);
|
|
}
|
|
#endif
|
|
|
|
return pci_bus_alloc_from_region(bus, res, size, align, min,
|
|
type_mask, alignf, alignf_data,
|
|
&pci_32_bit);
|
|
}
|
|
EXPORT_SYMBOL(pci_bus_alloc_resource);
|
|
|
|
/*
|
|
* The @idx resource of @dev should be a PCI-PCI bridge window. If this
|
|
* resource fits inside a window of an upstream bridge, do nothing. If it
|
|
* overlaps an upstream window but extends outside it, clip the resource so
|
|
* it fits completely inside.
|
|
*/
|
|
bool pci_bus_clip_resource(struct pci_dev *dev, int idx)
|
|
{
|
|
struct pci_bus *bus = dev->bus;
|
|
struct resource *res = &dev->resource[idx];
|
|
struct resource orig_res = *res;
|
|
struct resource *r;
|
|
int i;
|
|
|
|
pci_bus_for_each_resource(bus, r, i) {
|
|
resource_size_t start, end;
|
|
|
|
if (!r)
|
|
continue;
|
|
|
|
if (resource_type(res) != resource_type(r))
|
|
continue;
|
|
|
|
start = max(r->start, res->start);
|
|
end = min(r->end, res->end);
|
|
|
|
if (start > end)
|
|
continue; /* no overlap */
|
|
|
|
if (res->start == start && res->end == end)
|
|
return false; /* no change */
|
|
|
|
res->start = start;
|
|
res->end = end;
|
|
res->flags &= ~IORESOURCE_UNSET;
|
|
orig_res.flags &= ~IORESOURCE_UNSET;
|
|
pci_printk(KERN_DEBUG, dev, "%pR clipped to %pR\n",
|
|
&orig_res, res);
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
|
|
|
|
void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
|
|
|
|
/**
|
|
* pci_bus_add_device - start driver for a single device
|
|
* @dev: device to add
|
|
*
|
|
* This adds add sysfs entries and start device drivers
|
|
*/
|
|
void pci_bus_add_device(struct pci_dev *dev)
|
|
{
|
|
int retval;
|
|
|
|
/*
|
|
* Can not put in pci_device_add yet because resources
|
|
* are not assigned yet for some devices.
|
|
*/
|
|
pcibios_bus_add_device(dev);
|
|
pci_fixup_device(pci_fixup_final, dev);
|
|
pci_create_sysfs_dev_files(dev);
|
|
pci_proc_attach_device(dev);
|
|
pci_bridge_d3_update(dev);
|
|
|
|
dev->match_driver = true;
|
|
retval = device_attach(&dev->dev);
|
|
if (retval < 0 && retval != -EPROBE_DEFER) {
|
|
pci_warn(dev, "device attach failed (%d)\n", retval);
|
|
pci_proc_detach_device(dev);
|
|
pci_remove_sysfs_dev_files(dev);
|
|
return;
|
|
}
|
|
|
|
pci_dev_assign_added(dev, true);
|
|
}
|
|
EXPORT_SYMBOL_GPL(pci_bus_add_device);
|
|
|
|
/**
|
|
* pci_bus_add_devices - start driver for PCI devices
|
|
* @bus: bus to check for new devices
|
|
*
|
|
* Start driver for PCI devices and add some sysfs entries.
|
|
*/
|
|
void pci_bus_add_devices(const struct pci_bus *bus)
|
|
{
|
|
struct pci_dev *dev;
|
|
struct pci_bus *child;
|
|
|
|
list_for_each_entry(dev, &bus->devices, bus_list) {
|
|
/* Skip already-added devices */
|
|
if (pci_dev_is_added(dev))
|
|
continue;
|
|
pci_bus_add_device(dev);
|
|
}
|
|
|
|
list_for_each_entry(dev, &bus->devices, bus_list) {
|
|
/* Skip if device attach failed */
|
|
if (!pci_dev_is_added(dev))
|
|
continue;
|
|
child = dev->subordinate;
|
|
if (child)
|
|
pci_bus_add_devices(child);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(pci_bus_add_devices);
|
|
|
|
/** pci_walk_bus - walk devices on/under bus, calling callback.
|
|
* @top bus whose devices should be walked
|
|
* @cb callback to be called for each device found
|
|
* @userdata arbitrary pointer to be passed to callback.
|
|
*
|
|
* Walk the given bus, including any bridged devices
|
|
* on buses under this bus. Call the provided callback
|
|
* on each device found.
|
|
*
|
|
* We check the return of @cb each time. If it returns anything
|
|
* other than 0, we break out.
|
|
*
|
|
*/
|
|
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
|
|
void *userdata)
|
|
{
|
|
struct pci_dev *dev;
|
|
struct pci_bus *bus;
|
|
struct list_head *next;
|
|
int retval;
|
|
|
|
bus = top;
|
|
down_read(&pci_bus_sem);
|
|
next = top->devices.next;
|
|
for (;;) {
|
|
if (next == &bus->devices) {
|
|
/* end of this bus, go up or finish */
|
|
if (bus == top)
|
|
break;
|
|
next = bus->self->bus_list.next;
|
|
bus = bus->self->bus;
|
|
continue;
|
|
}
|
|
dev = list_entry(next, struct pci_dev, bus_list);
|
|
if (dev->subordinate) {
|
|
/* this is a pci-pci bridge, do its devices next */
|
|
next = dev->subordinate->devices.next;
|
|
bus = dev->subordinate;
|
|
} else
|
|
next = dev->bus_list.next;
|
|
|
|
retval = cb(dev, userdata);
|
|
if (retval)
|
|
break;
|
|
}
|
|
up_read(&pci_bus_sem);
|
|
}
|
|
EXPORT_SYMBOL_GPL(pci_walk_bus);
|
|
|
|
struct pci_bus *pci_bus_get(struct pci_bus *bus)
|
|
{
|
|
if (bus)
|
|
get_device(&bus->dev);
|
|
return bus;
|
|
}
|
|
EXPORT_SYMBOL(pci_bus_get);
|
|
|
|
void pci_bus_put(struct pci_bus *bus)
|
|
{
|
|
if (bus)
|
|
put_device(&bus->dev);
|
|
}
|
|
EXPORT_SYMBOL(pci_bus_put);
|