mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-22 21:21:02 +07:00
d2b0f6f77e
The issue was detected in a bit complicated test case where we have multiple hierarchical PEs shown as following figure: +-----------------+ | PE#3 p2p#0 | | p2p#1 | +-----------------+ | +-----------------+ | PE#4 pdev#0 | | pdev#1 | +-----------------+ PE#4 (have 2 PCI devices) is the child of PE#3, which has 2 p2p bridges. We accidentally had less-known scenario: PE#4 was removed permanently from the system because of permanent failure (e.g. exceeding the max allowd failure times in last hour), then we detects EEH errors on PE#3 and tried to recover it. However, eeh_dev instances for pdev#0/1 were not detached from PE#4, which was still connected to PE#3. All of that was because of the fact that we rely on count-based pcibios_release_device(), which isn't reliable enough. When doing recovery for PE#3, we still apply hotplug on PE#4 and pdev#0/1, which are not valid any more. Eventually, we run into kernel crash. The patch fixes above issue from two aspects. For unplug, we simply skip those permanently removed PE, whose state is (EEH_PE_STATE_ISOLATED && !EEH_PE_STATE_RECOVERING) and its frozen count should be greater than EEH_MAX_ALLOWED_FREEZES. For plug, we marked all permanently removed EEH devices with EEH_DEV_REMOVED and return 0xFF's on read its PCI config so that PCI core will omit them. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
85 lines
2.6 KiB
C
85 lines
2.6 KiB
C
/*
|
|
* c 2001 PPC 64 Team, IBM Corp
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#ifndef _ASM_POWERPC_PPC_PCI_H
|
|
#define _ASM_POWERPC_PPC_PCI_H
|
|
#ifdef __KERNEL__
|
|
|
|
#ifdef CONFIG_PCI
|
|
|
|
#include <linux/pci.h>
|
|
#include <asm/pci-bridge.h>
|
|
|
|
extern unsigned long isa_io_base;
|
|
|
|
extern void pci_setup_phb_io(struct pci_controller *hose, int primary);
|
|
extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
|
|
|
|
|
|
extern struct list_head hose_list;
|
|
|
|
extern void find_and_init_phbs(void);
|
|
|
|
extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */
|
|
|
|
/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
|
|
#define BUID_HI(buid) upper_32_bits(buid)
|
|
#define BUID_LO(buid) lower_32_bits(buid)
|
|
|
|
/* PCI device_node operations */
|
|
struct device_node;
|
|
typedef void *(*traverse_func)(struct device_node *me, void *data);
|
|
void *traverse_pci_devices(struct device_node *start, traverse_func pre,
|
|
void *data);
|
|
|
|
extern void pci_devs_phb_init(void);
|
|
extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
|
|
|
|
/* From rtas_pci.h */
|
|
extern void init_pci_config_tokens (void);
|
|
extern unsigned long get_phb_buid (struct device_node *);
|
|
extern int rtas_setup_phb(struct pci_controller *phb);
|
|
|
|
#ifdef CONFIG_EEH
|
|
|
|
void eeh_addr_cache_insert_dev(struct pci_dev *dev);
|
|
void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
|
|
struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
|
|
void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
|
|
int eeh_pci_enable(struct eeh_pe *pe, int function);
|
|
int eeh_reset_pe(struct eeh_pe *);
|
|
void eeh_save_bars(struct eeh_dev *edev);
|
|
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
|
|
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
|
|
void eeh_pe_state_mark(struct eeh_pe *pe, int state);
|
|
void eeh_pe_state_clear(struct eeh_pe *pe, int state);
|
|
void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
|
|
|
|
void eeh_sysfs_add_device(struct pci_dev *pdev);
|
|
void eeh_sysfs_remove_device(struct pci_dev *pdev);
|
|
|
|
static inline const char *eeh_pci_name(struct pci_dev *pdev)
|
|
{
|
|
return pdev ? pci_name(pdev) : "<null>";
|
|
}
|
|
|
|
static inline const char *eeh_driver_name(struct pci_dev *pdev)
|
|
{
|
|
return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
|
|
}
|
|
|
|
#endif /* CONFIG_EEH */
|
|
|
|
#else /* CONFIG_PCI */
|
|
static inline void find_and_init_phbs(void) { }
|
|
static inline void init_pci_config_tokens(void) { }
|
|
#endif /* !CONFIG_PCI */
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_PPC_PCI_H */
|