mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-26 15:50:57 +07:00
Merge branch 'cmpxchg64' of git://git.linaro.org/people/nico/linux into devel-stable
This commit is contained in:
commit
2ff0720933
267
Documentation/arm/kernel_user_helpers.txt
Normal file
267
Documentation/arm/kernel_user_helpers.txt
Normal file
@ -0,0 +1,267 @@
|
||||
Kernel-provided User Helpers
|
||||
============================
|
||||
|
||||
These are segment of kernel provided user code reachable from user space
|
||||
at a fixed address in kernel memory. This is used to provide user space
|
||||
with some operations which require kernel help because of unimplemented
|
||||
native feature and/or instructions in many ARM CPUs. The idea is for this
|
||||
code to be executed directly in user mode for best efficiency but which is
|
||||
too intimate with the kernel counter part to be left to user libraries.
|
||||
In fact this code might even differ from one CPU to another depending on
|
||||
the available instruction set, or whether it is a SMP systems. In other
|
||||
words, the kernel reserves the right to change this code as needed without
|
||||
warning. Only the entry points and their results as documented here are
|
||||
guaranteed to be stable.
|
||||
|
||||
This is different from (but doesn't preclude) a full blown VDSO
|
||||
implementation, however a VDSO would prevent some assembly tricks with
|
||||
constants that allows for efficient branching to those code segments. And
|
||||
since those code segments only use a few cycles before returning to user
|
||||
code, the overhead of a VDSO indirect far call would add a measurable
|
||||
overhead to such minimalistic operations.
|
||||
|
||||
User space is expected to bypass those helpers and implement those things
|
||||
inline (either in the code emitted directly by the compiler, or part of
|
||||
the implementation of a library call) when optimizing for a recent enough
|
||||
processor that has the necessary native support, but only if resulting
|
||||
binaries are already to be incompatible with earlier ARM processors due to
|
||||
useage of similar native instructions for other things. In other words
|
||||
don't make binaries unable to run on earlier processors just for the sake
|
||||
of not using these kernel helpers if your compiled code is not going to
|
||||
use new instructions for other purpose.
|
||||
|
||||
New helpers may be added over time, so an older kernel may be missing some
|
||||
helpers present in a newer kernel. For this reason, programs must check
|
||||
the value of __kuser_helper_version (see below) before assuming that it is
|
||||
safe to call any particular helper. This check should ideally be
|
||||
performed only once at process startup time, and execution aborted early
|
||||
if the required helpers are not provided by the kernel version that
|
||||
process is running on.
|
||||
|
||||
kuser_helper_version
|
||||
--------------------
|
||||
|
||||
Location: 0xffff0ffc
|
||||
|
||||
Reference declaration:
|
||||
|
||||
extern int32_t __kuser_helper_version;
|
||||
|
||||
Definition:
|
||||
|
||||
This field contains the number of helpers being implemented by the
|
||||
running kernel. User space may read this to determine the availability
|
||||
of a particular helper.
|
||||
|
||||
Usage example:
|
||||
|
||||
#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
|
||||
|
||||
void check_kuser_version(void)
|
||||
{
|
||||
if (__kuser_helper_version < 2) {
|
||||
fprintf(stderr, "can't do atomic operations, kernel too old\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
User space may assume that the value of this field never changes
|
||||
during the lifetime of any single process. This means that this
|
||||
field can be read once during the initialisation of a library or
|
||||
startup phase of a program.
|
||||
|
||||
kuser_get_tls
|
||||
-------------
|
||||
|
||||
Location: 0xffff0fe0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
void * __kuser_get_tls(void);
|
||||
|
||||
Input:
|
||||
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = TLS value
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
none
|
||||
|
||||
Definition:
|
||||
|
||||
Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef void * (__kuser_get_tls_t)(void);
|
||||
#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
|
||||
|
||||
void foo()
|
||||
{
|
||||
void *tls = __kuser_get_tls();
|
||||
printf("TLS = %p\n", tls);
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
|
||||
|
||||
kuser_cmpxchg
|
||||
-------------
|
||||
|
||||
Location: 0xffff0fc0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
|
||||
|
||||
Input:
|
||||
|
||||
r0 = oldval
|
||||
r1 = newval
|
||||
r2 = ptr
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = success code (zero or non-zero)
|
||||
C flag = set if r0 == 0, clear if r0 != 0
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
r3, ip, flags
|
||||
|
||||
Definition:
|
||||
|
||||
Atomically store newval in *ptr only if *ptr is equal to oldval.
|
||||
Return zero if *ptr was changed or non-zero if no exchange happened.
|
||||
The C flag is also set if *ptr was changed to allow for assembly
|
||||
optimization in the calling code.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
|
||||
#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
|
||||
|
||||
int atomic_add(volatile int *ptr, int val)
|
||||
{
|
||||
int old, new;
|
||||
|
||||
do {
|
||||
old = *ptr;
|
||||
new = old + val;
|
||||
} while(__kuser_cmpxchg(old, new, ptr));
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- This routine already includes memory barriers as needed.
|
||||
|
||||
- Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
|
||||
|
||||
kuser_memory_barrier
|
||||
--------------------
|
||||
|
||||
Location: 0xffff0fa0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
void __kuser_memory_barrier(void);
|
||||
|
||||
Input:
|
||||
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
none
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
none
|
||||
|
||||
Definition:
|
||||
|
||||
Apply any needed memory barrier to preserve consistency with data modified
|
||||
manually and __kuser_cmpxchg usage.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef void (__kuser_dmb_t)(void);
|
||||
#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
|
||||
|
||||
Notes:
|
||||
|
||||
- Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
|
||||
|
||||
kuser_cmpxchg64
|
||||
---------------
|
||||
|
||||
Location: 0xffff0f60
|
||||
|
||||
Reference prototype:
|
||||
|
||||
int __kuser_cmpxchg64(const int64_t *oldval,
|
||||
const int64_t *newval,
|
||||
volatile int64_t *ptr);
|
||||
|
||||
Input:
|
||||
|
||||
r0 = pointer to oldval
|
||||
r1 = pointer to newval
|
||||
r2 = pointer to target value
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = success code (zero or non-zero)
|
||||
C flag = set if r0 == 0, clear if r0 != 0
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
r3, lr, flags
|
||||
|
||||
Definition:
|
||||
|
||||
Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
|
||||
is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
|
||||
changed or non-zero if no exchange happened.
|
||||
|
||||
The C flag is also set if *ptr was changed to allow for assembly
|
||||
optimization in the calling code.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
|
||||
const int64_t *newval,
|
||||
volatile int64_t *ptr);
|
||||
#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
|
||||
|
||||
int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
|
||||
{
|
||||
int64_t old, new;
|
||||
|
||||
do {
|
||||
old = *ptr;
|
||||
new = old + val;
|
||||
} while(__kuser_cmpxchg64(&old, &new, ptr));
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- This routine already includes memory barriers as needed.
|
||||
|
||||
- Due to the length of this sequence, this spans 2 conventional kuser
|
||||
"slots", therefore 0xffff0f80 is not used as a valid entry point.
|
||||
|
||||
- Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
|
@ -383,7 +383,7 @@ ENDPROC(__pabt_svc)
|
||||
.endm
|
||||
|
||||
.macro kuser_cmpxchg_check
|
||||
#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
|
||||
#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
|
||||
#ifndef CONFIG_MMU
|
||||
#warning "NPTL on non MMU needs fixing"
|
||||
#else
|
||||
@ -392,7 +392,7 @@ ENDPROC(__pabt_svc)
|
||||
@ perform a quick test inline since it should be false
|
||||
@ 99.9999% of the time. The rest is done out of line.
|
||||
cmp r2, #TASK_SIZE
|
||||
blhs kuser_cmpxchg_fixup
|
||||
blhs kuser_cmpxchg64_fixup
|
||||
#endif
|
||||
#endif
|
||||
.endm
|
||||
@ -758,31 +758,12 @@ ENDPROC(__switch_to)
|
||||
/*
|
||||
* User helpers.
|
||||
*
|
||||
* These are segment of kernel provided user code reachable from user space
|
||||
* at a fixed address in kernel memory. This is used to provide user space
|
||||
* with some operations which require kernel help because of unimplemented
|
||||
* native feature and/or instructions in many ARM CPUs. The idea is for
|
||||
* this code to be executed directly in user mode for best efficiency but
|
||||
* which is too intimate with the kernel counter part to be left to user
|
||||
* libraries. In fact this code might even differ from one CPU to another
|
||||
* depending on the available instruction set and restrictions like on
|
||||
* SMP systems. In other words, the kernel reserves the right to change
|
||||
* this code as needed without warning. Only the entry points and their
|
||||
* results are guaranteed to be stable.
|
||||
*
|
||||
* Each segment is 32-byte aligned and will be moved to the top of the high
|
||||
* vector page. New segments (if ever needed) must be added in front of
|
||||
* existing ones. This mechanism should be used only for things that are
|
||||
* really small and justified, and not be abused freely.
|
||||
*
|
||||
* User space is expected to implement those things inline when optimizing
|
||||
* for a processor that has the necessary native support, but only if such
|
||||
* resulting binaries are already to be incompatible with earlier ARM
|
||||
* processors due to the use of unsupported instructions other than what
|
||||
* is provided here. In other words don't make binaries unable to run on
|
||||
* earlier processors just for the sake of not using these kernel helpers
|
||||
* if your compiled code is not going to use the new instructions for other
|
||||
* purpose.
|
||||
* See Documentation/arm/kernel_user_helpers.txt for formal definitions.
|
||||
*/
|
||||
THUMB( .arm )
|
||||
|
||||
@ -799,97 +780,104 @@ ENDPROC(__switch_to)
|
||||
__kuser_helper_start:
|
||||
|
||||
/*
|
||||
* Reference prototype:
|
||||
*
|
||||
* void __kernel_memory_barrier(void)
|
||||
*
|
||||
* Input:
|
||||
*
|
||||
* lr = return address
|
||||
*
|
||||
* Output:
|
||||
*
|
||||
* none
|
||||
*
|
||||
* Clobbered:
|
||||
*
|
||||
* none
|
||||
*
|
||||
* Definition and user space usage example:
|
||||
*
|
||||
* typedef void (__kernel_dmb_t)(void);
|
||||
* #define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0)
|
||||
*
|
||||
* Apply any needed memory barrier to preserve consistency with data modified
|
||||
* manually and __kuser_cmpxchg usage.
|
||||
*
|
||||
* This could be used as follows:
|
||||
*
|
||||
* #define __kernel_dmb() \
|
||||
* asm volatile ( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #95" \
|
||||
* : : : "r0", "lr","cc" )
|
||||
* Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular
|
||||
* kuser "slots", therefore 0xffff0f80 is not used as a valid entry point.
|
||||
*/
|
||||
|
||||
__kuser_cmpxchg64: @ 0xffff0f60
|
||||
|
||||
#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
|
||||
|
||||
/*
|
||||
* Poor you. No fast solution possible...
|
||||
* The kernel itself must perform the operation.
|
||||
* A special ghost syscall is used for that (see traps.c).
|
||||
*/
|
||||
stmfd sp!, {r7, lr}
|
||||
ldr r7, 1f @ it's 20 bits
|
||||
swi __ARM_NR_cmpxchg64
|
||||
ldmfd sp!, {r7, pc}
|
||||
1: .word __ARM_NR_cmpxchg64
|
||||
|
||||
#elif defined(CONFIG_CPU_32v6K)
|
||||
|
||||
stmfd sp!, {r4, r5, r6, r7}
|
||||
ldrd r4, r5, [r0] @ load old val
|
||||
ldrd r6, r7, [r1] @ load new val
|
||||
smp_dmb arm
|
||||
1: ldrexd r0, r1, [r2] @ load current val
|
||||
eors r3, r0, r4 @ compare with oldval (1)
|
||||
eoreqs r3, r1, r5 @ compare with oldval (2)
|
||||
strexdeq r3, r6, r7, [r2] @ store newval if eq
|
||||
teqeq r3, #1 @ success?
|
||||
beq 1b @ if no then retry
|
||||
smp_dmb arm
|
||||
rsbs r0, r3, #0 @ set returned val and C flag
|
||||
ldmfd sp!, {r4, r5, r6, r7}
|
||||
bx lr
|
||||
|
||||
#elif !defined(CONFIG_SMP)
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
/*
|
||||
* The only thing that can break atomicity in this cmpxchg64
|
||||
* implementation is either an IRQ or a data abort exception
|
||||
* causing another process/thread to be scheduled in the middle of
|
||||
* the critical sequence. The same strategy as for cmpxchg is used.
|
||||
*/
|
||||
stmfd sp!, {r4, r5, r6, lr}
|
||||
ldmia r0, {r4, r5} @ load old val
|
||||
ldmia r1, {r6, lr} @ load new val
|
||||
1: ldmia r2, {r0, r1} @ load current val
|
||||
eors r3, r0, r4 @ compare with oldval (1)
|
||||
eoreqs r3, r1, r5 @ compare with oldval (2)
|
||||
2: stmeqia r2, {r6, lr} @ store newval if eq
|
||||
rsbs r0, r3, #0 @ set return val and C flag
|
||||
ldmfd sp!, {r4, r5, r6, pc}
|
||||
|
||||
.text
|
||||
kuser_cmpxchg64_fixup:
|
||||
@ Called from kuser_cmpxchg_fixup.
|
||||
@ r2 = address of interrupted insn (must be preserved).
|
||||
@ sp = saved regs. r7 and r8 are clobbered.
|
||||
@ 1b = first critical insn, 2b = last critical insn.
|
||||
@ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b.
|
||||
mov r7, #0xffff0fff
|
||||
sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
|
||||
subs r8, r2, r7
|
||||
rsbcss r8, r8, #(2b - 1b)
|
||||
strcs r7, [sp, #S_PC]
|
||||
#if __LINUX_ARM_ARCH__ < 6
|
||||
bcc kuser_cmpxchg32_fixup
|
||||
#endif
|
||||
mov pc, lr
|
||||
.previous
|
||||
|
||||
#else
|
||||
#warning "NPTL on non MMU needs fixing"
|
||||
mov r0, #-1
|
||||
adds r0, r0, #0
|
||||
usr_ret lr
|
||||
#endif
|
||||
|
||||
#else
|
||||
#error "incoherent kernel configuration"
|
||||
#endif
|
||||
|
||||
/* pad to next slot */
|
||||
.rept (16 - (. - __kuser_cmpxchg64)/4)
|
||||
.word 0
|
||||
.endr
|
||||
|
||||
.align 5
|
||||
|
||||
__kuser_memory_barrier: @ 0xffff0fa0
|
||||
smp_dmb arm
|
||||
usr_ret lr
|
||||
|
||||
.align 5
|
||||
|
||||
/*
|
||||
* Reference prototype:
|
||||
*
|
||||
* int __kernel_cmpxchg(int oldval, int newval, int *ptr)
|
||||
*
|
||||
* Input:
|
||||
*
|
||||
* r0 = oldval
|
||||
* r1 = newval
|
||||
* r2 = ptr
|
||||
* lr = return address
|
||||
*
|
||||
* Output:
|
||||
*
|
||||
* r0 = returned value (zero or non-zero)
|
||||
* C flag = set if r0 == 0, clear if r0 != 0
|
||||
*
|
||||
* Clobbered:
|
||||
*
|
||||
* r3, ip, flags
|
||||
*
|
||||
* Definition and user space usage example:
|
||||
*
|
||||
* typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
|
||||
* #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
|
||||
*
|
||||
* Atomically store newval in *ptr if *ptr is equal to oldval for user space.
|
||||
* Return zero if *ptr was changed or non-zero if no exchange happened.
|
||||
* The C flag is also set if *ptr was changed to allow for assembly
|
||||
* optimization in the calling code.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
* - This routine already includes memory barriers as needed.
|
||||
*
|
||||
* For example, a user space atomic_add implementation could look like this:
|
||||
*
|
||||
* #define atomic_add(ptr, val) \
|
||||
* ({ register unsigned int *__ptr asm("r2") = (ptr); \
|
||||
* register unsigned int __result asm("r1"); \
|
||||
* asm volatile ( \
|
||||
* "1: @ atomic_add\n\t" \
|
||||
* "ldr r0, [r2]\n\t" \
|
||||
* "mov r3, #0xffff0fff\n\t" \
|
||||
* "add lr, pc, #4\n\t" \
|
||||
* "add r1, r0, %2\n\t" \
|
||||
* "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
|
||||
* "bcc 1b" \
|
||||
* : "=&r" (__result) \
|
||||
* : "r" (__ptr), "rIL" (val) \
|
||||
* : "r0","r3","ip","lr","cc","memory" ); \
|
||||
* __result; })
|
||||
*/
|
||||
|
||||
__kuser_cmpxchg: @ 0xffff0fc0
|
||||
|
||||
#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
|
||||
@ -925,7 +913,7 @@ __kuser_cmpxchg: @ 0xffff0fc0
|
||||
usr_ret lr
|
||||
|
||||
.text
|
||||
kuser_cmpxchg_fixup:
|
||||
kuser_cmpxchg32_fixup:
|
||||
@ Called from kuser_cmpxchg_check macro.
|
||||
@ r2 = address of interrupted insn (must be preserved).
|
||||
@ sp = saved regs. r7 and r8 are clobbered.
|
||||
@ -963,39 +951,6 @@ kuser_cmpxchg_fixup:
|
||||
|
||||
.align 5
|
||||
|
||||
/*
|
||||
* Reference prototype:
|
||||
*
|
||||
* int __kernel_get_tls(void)
|
||||
*
|
||||
* Input:
|
||||
*
|
||||
* lr = return address
|
||||
*
|
||||
* Output:
|
||||
*
|
||||
* r0 = TLS value
|
||||
*
|
||||
* Clobbered:
|
||||
*
|
||||
* none
|
||||
*
|
||||
* Definition and user space usage example:
|
||||
*
|
||||
* typedef int (__kernel_get_tls_t)(void);
|
||||
* #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
|
||||
*
|
||||
* Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
|
||||
*
|
||||
* This could be used as follows:
|
||||
*
|
||||
* #define __kernel_get_tls() \
|
||||
* ({ register unsigned int __val asm("r0"); \
|
||||
* asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
|
||||
* : "=r" (__val) : : "lr","cc" ); \
|
||||
* __val; })
|
||||
*/
|
||||
|
||||
__kuser_get_tls: @ 0xffff0fe0
|
||||
ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init
|
||||
usr_ret lr
|
||||
@ -1004,19 +959,6 @@ __kuser_get_tls: @ 0xffff0fe0
|
||||
.word 0 @ 0xffff0ff0 software TLS value, then
|
||||
.endr @ pad up to __kuser_helper_version
|
||||
|
||||
/*
|
||||
* Reference declaration:
|
||||
*
|
||||
* extern unsigned int __kernel_helper_version;
|
||||
*
|
||||
* Definition and user space usage example:
|
||||
*
|
||||
* #define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
|
||||
*
|
||||
* User space may read this to determine the curent number of helpers
|
||||
* available.
|
||||
*/
|
||||
|
||||
__kuser_helper_version: @ 0xffff0ffc
|
||||
.word ((__kuser_helper_end - __kuser_helper_start) >> 5)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user