mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-27 15:42:33 +07:00
464d1a78fb
Convert the PDA code to use %fs rather than %gs as the segment for per-processor data. This is because some processors show a small but measurable performance gain for reloading a NULL segment selector (as %fs generally is in user-space) versus a non-NULL one (as %gs generally is). On modern processors the difference is very small, perhaps undetectable. Some old AMD "K6 3D+" processors are noticably slower when %fs is used rather than %gs; I have no idea why this might be, but I think they're sufficiently rare that it doesn't matter much. This patch also fixes the math emulator, which had not been adjusted to match the changed struct pt_regs. [frederik.deweerdt@gmail.com: fixit with gdb] [mingo@elte.hu: Fix KVM too] Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Ian Campbell <Ian.Campbell@XenSource.com> Acked-by: Ingo Molnar <mingo@elte.hu> Acked-by: Zachary Amsden <zach@vmware.com> Cc: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com> Signed-off-by: Andrew Morton <akpm@osdl.org>
101 lines
2.8 KiB
C
101 lines
2.8 KiB
C
/*
|
|
Per-processor Data Areas
|
|
Jeremy Fitzhardinge <jeremy@goop.org> 2006
|
|
Based on asm-x86_64/pda.h by Andi Kleen.
|
|
*/
|
|
#ifndef _I386_PDA_H
|
|
#define _I386_PDA_H
|
|
|
|
#include <linux/stddef.h>
|
|
#include <linux/types.h>
|
|
|
|
struct i386_pda
|
|
{
|
|
struct i386_pda *_pda; /* pointer to self */
|
|
|
|
int cpu_number;
|
|
struct task_struct *pcurrent; /* current process */
|
|
struct pt_regs *irq_regs;
|
|
};
|
|
|
|
extern struct i386_pda *_cpu_pda[];
|
|
|
|
#define cpu_pda(i) (_cpu_pda[i])
|
|
|
|
#define pda_offset(field) offsetof(struct i386_pda, field)
|
|
|
|
extern void __bad_pda_field(void);
|
|
|
|
/* This variable is never instantiated. It is only used as a stand-in
|
|
for the real per-cpu PDA memory, so that gcc can understand what
|
|
memory operations the inline asms() below are performing. This
|
|
eliminates the need to make the asms volatile or have memory
|
|
clobbers, so gcc can readily analyse them. */
|
|
extern struct i386_pda _proxy_pda;
|
|
|
|
#define pda_to_op(op,field,val) \
|
|
do { \
|
|
typedef typeof(_proxy_pda.field) T__; \
|
|
if (0) { T__ tmp__; tmp__ = (val); } \
|
|
switch (sizeof(_proxy_pda.field)) { \
|
|
case 1: \
|
|
asm(op "b %1,%%fs:%c2" \
|
|
: "+m" (_proxy_pda.field) \
|
|
:"ri" ((T__)val), \
|
|
"i"(pda_offset(field))); \
|
|
break; \
|
|
case 2: \
|
|
asm(op "w %1,%%fs:%c2" \
|
|
: "+m" (_proxy_pda.field) \
|
|
:"ri" ((T__)val), \
|
|
"i"(pda_offset(field))); \
|
|
break; \
|
|
case 4: \
|
|
asm(op "l %1,%%fs:%c2" \
|
|
: "+m" (_proxy_pda.field) \
|
|
:"ri" ((T__)val), \
|
|
"i"(pda_offset(field))); \
|
|
break; \
|
|
default: __bad_pda_field(); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define pda_from_op(op,field) \
|
|
({ \
|
|
typeof(_proxy_pda.field) ret__; \
|
|
switch (sizeof(_proxy_pda.field)) { \
|
|
case 1: \
|
|
asm(op "b %%fs:%c1,%0" \
|
|
: "=r" (ret__) \
|
|
: "i" (pda_offset(field)), \
|
|
"m" (_proxy_pda.field)); \
|
|
break; \
|
|
case 2: \
|
|
asm(op "w %%fs:%c1,%0" \
|
|
: "=r" (ret__) \
|
|
: "i" (pda_offset(field)), \
|
|
"m" (_proxy_pda.field)); \
|
|
break; \
|
|
case 4: \
|
|
asm(op "l %%fs:%c1,%0" \
|
|
: "=r" (ret__) \
|
|
: "i" (pda_offset(field)), \
|
|
"m" (_proxy_pda.field)); \
|
|
break; \
|
|
default: __bad_pda_field(); \
|
|
} \
|
|
ret__; })
|
|
|
|
/* Return a pointer to a pda field */
|
|
#define pda_addr(field) \
|
|
((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \
|
|
pda_offset(field)))
|
|
|
|
#define read_pda(field) pda_from_op("mov",field)
|
|
#define write_pda(field,val) pda_to_op("mov",field,val)
|
|
#define add_pda(field,val) pda_to_op("add",field,val)
|
|
#define sub_pda(field,val) pda_to_op("sub",field,val)
|
|
#define or_pda(field,val) pda_to_op("or",field,val)
|
|
|
|
#endif /* _I386_PDA_H */
|