mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 22:50:57 +07:00
x86: Separate out entry text section
Put x86 entry code into a separate link section: .entry.text. Separating the entry text section seems to have performance benefits - caused by more efficient instruction cache usage. Running hackbench with perf stat --repeat showed that the change compresses the icache footprint. The icache load miss rate went down by about 15%: before patch: 19417627 L1-icache-load-misses ( +- 0.147% ) after patch: 16490788 L1-icache-load-misses ( +- 0.180% ) The motivation of the patch was to fix a particular kprobes bug that relates to the entry text section, the performance advantage was discovered accidentally. Whole perf output follows: - results for current tip tree: Performance counter stats for './hackbench/hackbench 10' (500 runs): 19417627 L1-icache-load-misses ( +- 0.147% ) 2676914223 instructions # 0.497 IPC ( +- 0.079% ) 5389516026 cycles ( +- 0.144% ) 0.206267711 seconds time elapsed ( +- 0.138% ) - results for current tip tree with the patch applied: Performance counter stats for './hackbench/hackbench 10' (500 runs): 16490788 L1-icache-load-misses ( +- 0.180% ) 2717734941 instructions # 0.502 IPC ( +- 0.079% ) 5414756975 cycles ( +- 0.148% ) 0.206747566 seconds time elapsed ( +- 0.137% ) Signed-off-by: Jiri Olsa <jolsa@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: masami.hiramatsu.pt@hitachi.com Cc: ananth@in.ibm.com Cc: davem@davemloft.net Cc: 2nddept-manager@sdl.hitachi.co.jp LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
86cb2ec7b2
commit
ea7145477a
@ -25,6 +25,8 @@
|
|||||||
#define sysretl_audit ia32_ret_from_sys_call
|
#define sysretl_audit ia32_ret_from_sys_call
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
.section .entry.text, "ax"
|
||||||
|
|
||||||
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
|
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
|
||||||
|
|
||||||
.macro IA32_ARG_FIXUP noebp=0
|
.macro IA32_ARG_FIXUP noebp=0
|
||||||
|
@ -65,6 +65,8 @@
|
|||||||
#define sysexit_audit syscall_exit_work
|
#define sysexit_audit syscall_exit_work
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
.section .entry.text, "ax"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use macros for low-level operations which need to be overridden
|
* We use macros for low-level operations which need to be overridden
|
||||||
* for paravirtualization. The following will never clobber any registers:
|
* for paravirtualization. The following will never clobber any registers:
|
||||||
@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
|
|||||||
*/
|
*/
|
||||||
.section .init.rodata,"a"
|
.section .init.rodata,"a"
|
||||||
ENTRY(interrupt)
|
ENTRY(interrupt)
|
||||||
.text
|
.section .entry.text, "ax"
|
||||||
.p2align 5
|
.p2align 5
|
||||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||||
ENTRY(irq_entries_start)
|
ENTRY(irq_entries_start)
|
||||||
@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
|
|||||||
.endif
|
.endif
|
||||||
.previous
|
.previous
|
||||||
.long 1b
|
.long 1b
|
||||||
.text
|
.section .entry.text, "ax"
|
||||||
vector=vector+1
|
vector=vector+1
|
||||||
.endif
|
.endif
|
||||||
.endr
|
.endr
|
||||||
|
@ -61,6 +61,8 @@
|
|||||||
#define __AUDIT_ARCH_LE 0x40000000
|
#define __AUDIT_ARCH_LE 0x40000000
|
||||||
|
|
||||||
.code64
|
.code64
|
||||||
|
.section .entry.text, "ax"
|
||||||
|
|
||||||
#ifdef CONFIG_FUNCTION_TRACER
|
#ifdef CONFIG_FUNCTION_TRACER
|
||||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||||
ENTRY(mcount)
|
ENTRY(mcount)
|
||||||
@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
|
|||||||
*/
|
*/
|
||||||
.section .init.rodata,"a"
|
.section .init.rodata,"a"
|
||||||
ENTRY(interrupt)
|
ENTRY(interrupt)
|
||||||
.text
|
.section .entry.text
|
||||||
.p2align 5
|
.p2align 5
|
||||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||||
ENTRY(irq_entries_start)
|
ENTRY(irq_entries_start)
|
||||||
@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
|
|||||||
.endif
|
.endif
|
||||||
.previous
|
.previous
|
||||||
.quad 1b
|
.quad 1b
|
||||||
.text
|
.section .entry.text
|
||||||
vector=vector+1
|
vector=vector+1
|
||||||
.endif
|
.endif
|
||||||
.endr
|
.endr
|
||||||
|
@ -105,6 +105,7 @@ SECTIONS
|
|||||||
SCHED_TEXT
|
SCHED_TEXT
|
||||||
LOCK_TEXT
|
LOCK_TEXT
|
||||||
KPROBES_TEXT
|
KPROBES_TEXT
|
||||||
|
ENTRY_TEXT
|
||||||
IRQENTRY_TEXT
|
IRQENTRY_TEXT
|
||||||
*(.fixup)
|
*(.fixup)
|
||||||
*(.gnu.warning)
|
*(.gnu.warning)
|
||||||
|
@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
|
|||||||
extern char _end[];
|
extern char _end[];
|
||||||
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
|
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
|
||||||
extern char __kprobes_text_start[], __kprobes_text_end[];
|
extern char __kprobes_text_start[], __kprobes_text_end[];
|
||||||
|
extern char __entry_text_start[], __entry_text_end[];
|
||||||
extern char __initdata_begin[], __initdata_end[];
|
extern char __initdata_begin[], __initdata_end[];
|
||||||
extern char __start_rodata[], __end_rodata[];
|
extern char __start_rodata[], __end_rodata[];
|
||||||
|
|
||||||
|
@ -424,6 +424,12 @@
|
|||||||
*(.kprobes.text) \
|
*(.kprobes.text) \
|
||||||
VMLINUX_SYMBOL(__kprobes_text_end) = .;
|
VMLINUX_SYMBOL(__kprobes_text_end) = .;
|
||||||
|
|
||||||
|
#define ENTRY_TEXT \
|
||||||
|
ALIGN_FUNCTION(); \
|
||||||
|
VMLINUX_SYMBOL(__entry_text_start) = .; \
|
||||||
|
*(.entry.text) \
|
||||||
|
VMLINUX_SYMBOL(__entry_text_end) = .;
|
||||||
|
|
||||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||||
#define IRQENTRY_TEXT \
|
#define IRQENTRY_TEXT \
|
||||||
ALIGN_FUNCTION(); \
|
ALIGN_FUNCTION(); \
|
||||||
|
Loading…
Reference in New Issue
Block a user