linux_dsm_epyc7002/arch/powerpc/include/asm/drmem.h
Nathan Fontenot b2d3b5ee66 powerpc/pseries: Track LMB nid instead of using device tree
When removing memory we need to remove the memory from the node
it was added to instead of looking up the node it should be in
in the device tree.

During testing we have seen scenarios where the affinity for a
LMB changes due to a partition migration or PRRN event. In these
cases the node the LMB exists in may not match the node the device
tree indicates it belongs in. This can lead to a system crash
when trying to DLPAR remove the LMB after a migration or PRRN
event. The current code looks up the node in the device tree to
remove the LMB from, the crash occurs when we try to offline this
node and it does not have any data, i.e. node_data[nid] == NULL.

36:mon> e
cpu 0x36: Vector: 300 (Data Access) at [c0000001828b7810]
    pc: c00000000036d08c: try_offline_node+0x2c/0x1b0
    lr: c0000000003a14ec: remove_memory+0xbc/0x110
    sp: c0000001828b7a90
   msr: 800000000280b033
   dar: 9a28
 dsisr: 40000000
  current = 0xc0000006329c4c80
  paca    = 0xc000000007a55200   softe: 0        irq_happened: 0x01
    pid   = 76926, comm = kworker/u320:3

36:mon> t
[link register   ] c0000000003a14ec remove_memory+0xbc/0x110
[c0000001828b7a90] c00000000006a1cc arch_remove_memory+0x9c/0xd0 (unreliable)
[c0000001828b7ad0] c0000000003a14e0 remove_memory+0xb0/0x110
[c0000001828b7b20] c0000000000c7db4 dlpar_remove_lmb+0x94/0x160
[c0000001828b7b60] c0000000000c8ef8 dlpar_memory+0x7e8/0xd10
[c0000001828b7bf0] c0000000000bf828 handle_dlpar_errorlog+0xf8/0x160
[c0000001828b7c60] c0000000000bf8cc pseries_hp_work_fn+0x3c/0xa0
[c0000001828b7c90] c000000000128cd8 process_one_work+0x298/0x5a0
[c0000001828b7d20] c000000000129068 worker_thread+0x88/0x620
[c0000001828b7dc0] c00000000013223c kthread+0x1ac/0x1c0
[c0000001828b7e30] c00000000000b45c ret_from_kernel_thread+0x5c/0x80

To resolve this we need to track the node a LMB belongs to when
it is added to the system so we can remove it from that node instead
of the node that the device tree indicates it should belong to.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2019-04-29 22:27:16 +10:00

129 lines
3.0 KiB
C

/*
* drmem.h: Power specific logical memory block representation
*
* Copyright 2017 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_LMB_H
#define _ASM_POWERPC_LMB_H
struct drmem_lmb {
u64 base_addr;
u32 drc_index;
u32 aa_index;
u32 flags;
#ifdef CONFIG_MEMORY_HOTPLUG
int nid;
#endif
};
struct drmem_lmb_info {
struct drmem_lmb *lmbs;
int n_lmbs;
u32 lmb_size;
};
extern struct drmem_lmb_info *drmem_info;
#define for_each_drmem_lmb_in_range(lmb, start, end) \
for ((lmb) = (start); (lmb) <= (end); (lmb)++)
#define for_each_drmem_lmb(lmb) \
for_each_drmem_lmb_in_range((lmb), \
&drmem_info->lmbs[0], \
&drmem_info->lmbs[drmem_info->n_lmbs - 1])
/*
* The of_drconf_cell_v1 struct defines the layout of the LMB data
* specified in the ibm,dynamic-memory device tree property.
* The property itself is a 32-bit value specifying the number of
* LMBs followed by an array of of_drconf_cell_v1 entries, one
* per LMB.
*/
struct of_drconf_cell_v1 {
__be64 base_addr;
__be32 drc_index;
__be32 reserved;
__be32 aa_index;
__be32 flags;
};
/*
* Version 2 of the ibm,dynamic-memory property is defined as a
* 32-bit value specifying the number of LMB sets followed by an
* array of of_drconf_cell_v2 entries, one per LMB set.
*/
struct of_drconf_cell_v2 {
u32 seq_lmbs;
u64 base_addr;
u32 drc_index;
u32 aa_index;
u32 flags;
} __packed;
#define DRCONF_MEM_ASSIGNED 0x00000008
#define DRCONF_MEM_AI_INVALID 0x00000040
#define DRCONF_MEM_RESERVED 0x00000080
static inline u32 drmem_lmb_size(void)
{
return drmem_info->lmb_size;
}
#define DRMEM_LMB_RESERVED 0x80000000
static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
{
lmb->flags |= DRMEM_LMB_RESERVED;
}
static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
{
lmb->flags &= ~DRMEM_LMB_RESERVED;
}
static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
{
return lmb->flags & DRMEM_LMB_RESERVED;
}
u64 drmem_lmb_memory_max(void);
void __init walk_drmem_lmbs(struct device_node *dn,
void (*func)(struct drmem_lmb *, const __be32 **));
int drmem_update_dt(void);
#ifdef CONFIG_PPC_PSERIES
void __init walk_drmem_lmbs_early(unsigned long node,
void (*func)(struct drmem_lmb *, const __be32 **));
#endif
static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
{
lmb->aa_index = 0xffffffff;
}
#ifdef CONFIG_MEMORY_HOTPLUG
static inline void lmb_set_nid(struct drmem_lmb *lmb)
{
lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
}
static inline void lmb_clear_nid(struct drmem_lmb *lmb)
{
lmb->nid = -1;
}
#else
static inline void lmb_set_nid(struct drmem_lmb *lmb)
{
}
static inline void lmb_clear_nid(struct drmem_lmb *lmb)
{
}
#endif
#endif /* _ASM_POWERPC_LMB_H */