mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-27 12:55:04 +07:00
e354d7dc81
current may be cached by the compiler, so remove the volatile asm restriction. This results in better generated code, as well as being smaller and fewer dependent loads, it can avoid store-hit-load flushes like this one that shows up in irq_exit(): preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && ...) Which ends up as: ((struct thread_info *)current)->preempt_count -= HARDIRQ_OFFSET; if (((struct thread_info *)current)->preempt_count ... Evaluating current twice presently means it has to be loaded twice, and here gcc happens to pick a different register each time, then preempt_count is accessed via that base register: 1058: ld r10,2392(r13) <-- current 105c: lwz r9,0(r10) <-- preempt_count 1060: addis r9,r9,-1 1064: stw r9,0(r10) <-- preempt_count 1068: ld r9,2392(r13) <-- current 106c: lwz r9,0(r9) <-- preempt_count 1070: rlwinm. r9,r9,0,11,23 1074: bne 1090 <irq_exit+0x60> This can frustrate store-hit-load detection heuristics and cause flushes. Allowing the compiler to cache current in a reigster with this patch results in the same base register being used for all accesses, which is more likely to be detected as an alias: 1058: ld r31,2392(r13) ... 1070: lwz r9,0(r31) 1074: addis r9,r9,-1 1078: stw r9,0(r31) 107c: lwz r9,0(r31) 1080: rlwinm. r9,r9,0,11,23 1084: bne 10a0 <irq_exit+0x60> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20190612140317.24490-1-npiggin@gmail.com
39 lines
680 B
C
39 lines
680 B
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#ifndef _ASM_POWERPC_CURRENT_H
|
|
#define _ASM_POWERPC_CURRENT_H
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
*/
|
|
|
|
struct task_struct;
|
|
|
|
#ifdef __powerpc64__
|
|
#include <linux/stddef.h>
|
|
#include <asm/paca.h>
|
|
|
|
static inline struct task_struct *get_current(void)
|
|
{
|
|
struct task_struct *task;
|
|
|
|
/* get_current can be cached by the compiler, so no volatile */
|
|
asm ("ld %0,%1(13)"
|
|
: "=r" (task)
|
|
: "i" (offsetof(struct paca_struct, __current)));
|
|
|
|
return task;
|
|
}
|
|
#define current get_current()
|
|
|
|
#else
|
|
|
|
/*
|
|
* We keep `current' in r2 for speed.
|
|
*/
|
|
register struct task_struct *current asm ("r2");
|
|
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_CURRENT_H */
|