mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-25 09:20:50 +07:00
5fb7dc37dc
per cpu data section contains two types of data. One set which is exclusively accessed by the local cpu and the other set which is per cpu, but also shared by remote cpus. In the current kernel, these two sets are not clearely separated out. This can potentially cause the same data cacheline shared between the two sets of data, which will result in unnecessary bouncing of the cacheline between cpus. One way to fix the problem is to cacheline align the remotely accessed per cpu data, both at the beginning and at the end. Because of the padding at both ends, this will likely cause some memory wastage and also the interface to achieve this is not clean. This patch: Moves the remotely accessed per cpu data (which is currently marked as ____cacheline_aligned_in_smp) into a different section, where all the data elements are cacheline aligned. And as such, this differentiates the local only data and remotely accessed data cleanly. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Acked-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Christoph Lameter <clameter@sgi.com> Cc: <linux-arch@vger.kernel.org> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
82 lines
2.5 KiB
C
82 lines
2.5 KiB
C
#ifndef __ARCH_S390_PERCPU__
|
|
#define __ARCH_S390_PERCPU__
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm/lowcore.h>
|
|
|
|
#define __GENERIC_PER_CPU
|
|
|
|
/*
|
|
* s390 uses its own implementation for per cpu data, the offset of
|
|
* the cpu local data area is cached in the cpu's lowcore memory.
|
|
* For 64 bit module code s390 forces the use of a GOT slot for the
|
|
* address of the per cpu variable. This is needed because the module
|
|
* may be more than 4G above the per cpu area.
|
|
*/
|
|
#if defined(__s390x__) && defined(MODULE)
|
|
|
|
#define __reloc_hide(var,offset) (*({ \
|
|
extern int simple_identifier_##var(void); \
|
|
unsigned long *__ptr; \
|
|
asm ( "larl %0,per_cpu__"#var"@GOTENT" \
|
|
: "=a" (__ptr) : "X" (per_cpu__##var) ); \
|
|
(typeof(&per_cpu__##var))((*__ptr) + (offset)); }))
|
|
|
|
#else
|
|
|
|
#define __reloc_hide(var, offset) (*({ \
|
|
extern int simple_identifier_##var(void); \
|
|
unsigned long __ptr; \
|
|
asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \
|
|
(typeof(&per_cpu__##var)) (__ptr + (offset)); }))
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
extern unsigned long __per_cpu_offset[NR_CPUS];
|
|
|
|
/* Separate out the type, so (int[3], foo) works. */
|
|
#define DEFINE_PER_CPU(type, name) \
|
|
__attribute__((__section__(".data.percpu"))) \
|
|
__typeof__(type) per_cpu__##name
|
|
|
|
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
|
|
__attribute__((__section__(".data.percpu.shared_aligned"))) \
|
|
__typeof__(type) per_cpu__##name \
|
|
____cacheline_aligned_in_smp
|
|
|
|
#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
|
|
#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
|
|
#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
|
|
#define per_cpu_offset(x) (__per_cpu_offset[x])
|
|
|
|
/* A macro to avoid #include hell... */
|
|
#define percpu_modcopy(pcpudst, src, size) \
|
|
do { \
|
|
unsigned int __i; \
|
|
for_each_possible_cpu(__i) \
|
|
memcpy((pcpudst)+__per_cpu_offset[__i], \
|
|
(src), (size)); \
|
|
} while (0)
|
|
|
|
#else /* ! SMP */
|
|
|
|
#define DEFINE_PER_CPU(type, name) \
|
|
__typeof__(type) per_cpu__##name
|
|
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
|
|
DEFINE_PER_CPU(type, name)
|
|
|
|
#define __get_cpu_var(var) __reloc_hide(var,0)
|
|
#define __raw_get_cpu_var(var) __reloc_hide(var,0)
|
|
#define per_cpu(var,cpu) __reloc_hide(var,0)
|
|
|
|
#endif /* SMP */
|
|
|
|
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
|
|
|
|
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
|
|
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
|
|
|
|
#endif /* __ARCH_S390_PERCPU__ */
|