linux_dsm_epyc7002/arch/mips/include/asm/futex.h
Huacai Chen e02e07e312
MIPS: Loongson: Introduce and use loongson_llsc_mb()
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
lld/scd is very weak ordering. We should add sync instructions "before
each ll/lld" and "at the branch-target between ll/sc" to workaround.
Otherwise, this flaw will cause deadlock occasionally (e.g. when doing
heavy load test with LTP).

Below is the explaination of CPU designer:

"For Loongson 3 family, when a memory access instruction (load, store,
or prefetch)'s executing occurs between the execution of LL and SC, the
success or failure of SC is not predictable. Although programmer would
not insert memory access instructions between LL and SC, the memory
instructions before LL in program-order, may dynamically executed
between the execution of LL/SC, so a memory fence (SYNC) is needed
before LL/LLD to avoid this situation.

Since Loongson-3A R2 (3A2000), we have improved our hardware design to
handle this case. But we later deduce a rarely circumstance that some
speculatively executed memory instructions due to branch misprediction
between LL/SC still fall into the above case, so a memory fence (SYNC)
at branch-target (if its target is not between LL/SC) is needed for
Loongson 3A1000, 3B1500, 3A2000 and 3A3000.

Our processor is continually evolving and we aim to to remove all these
workaround-SYNCs around LL/SC for new-come processor."

Here is an example:

Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var,
this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same
time('sc' return 1), and the variable is only *added by 1*, sometimes,
which is wrong and unacceptable(it should be added by 2).

Why disable fix-loongson3-llsc in compiler?
Because compiler fix will cause problems in kernel's __ex_table section.

This patch fix all the cases in kernel, but:

+. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target
of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix
the ll and branch-target coincidently such as atomic_sub_if_positive/
cmpxchg/xchg, just like this one.

+. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch
edac.h

+. local_ops and cmpxchg_local should not be affected by this bug since
only the owner can write.

+. mips_atomic_set for syscall.c is deprecated and rarely used, just let
it go

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Huang Pei <huangpei@loongson.cn>
[paul.burton@mips.com:
  - Simplify the addition of -mno-fix-loongson3-llsc to cflags, and add
    a comment describing why it's there.
  - Make loongson_llsc_mb() a no-op when
    CONFIG_CPU_LOONGSON3_WORKAROUNDS=n, rather than a compiler memory
    barrier.
  - Add a comment describing the bug & how loongson_llsc_mb() helps
    in asm/barrier.h.]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: ambrosehua@gmail.com
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Li Xuefeng <lixuefeng@loongson.cn>
Cc: Xu Chenghua <xuchenghua@loongson.cn>
2019-02-04 10:53:34 -08:00

207 lines
5.0 KiB
C

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2006 Ralf Baechle (ralf@linux-mips.org)
*/
#ifndef _ASM_FUTEX_H
#define _ASM_FUTEX_H
#ifdef __KERNEL__
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/asm-eva.h>
#include <asm/barrier.h>
#include <asm/compiler.h>
#include <asm/errno.h>
#include <asm/war.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
if (cpu_has_llsc && R10000_LLSC_WAR) { \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
" .set push \n" \
" .set arch=r4000 \n" \
"1: ll %1, %4 # __futex_atomic_op \n" \
" .set pop \n" \
" " insn " \n" \
" .set arch=r4000 \n" \
"2: sc $1, %2 \n" \
" beqzl $1, 1b \n" \
__WEAK_LLSC_MB \
"3: \n" \
" .insn \n" \
" .set pop \n" \
" .section .fixup,\"ax\" \n" \
"4: li %0, %6 \n" \
" j 3b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" "__UA_ADDR "\t1b, 4b \n" \
" "__UA_ADDR "\t2b, 4b \n" \
" .previous \n" \
: "=r" (ret), "=&r" (oldval), \
"=" GCC_OFF_SMALL_ASM() (*uaddr) \
: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \
"i" (-EFAULT) \
: "memory"); \
} else if (cpu_has_llsc) { \
loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
" .set push \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
"1: "user_ll("%1", "%4")" # __futex_atomic_op\n" \
" .set pop \n" \
" " insn " \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
"2: "user_sc("$1", "%2")" \n" \
" beqz $1, 1b \n" \
__WEAK_LLSC_MB \
"3: \n" \
" .insn \n" \
" .set pop \n" \
" .section .fixup,\"ax\" \n" \
"4: li %0, %6 \n" \
" j 3b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" "__UA_ADDR "\t1b, 4b \n" \
" "__UA_ADDR "\t2b, 4b \n" \
" .previous \n" \
: "=r" (ret), "=&r" (oldval), \
"=" GCC_OFF_SMALL_ASM() (*uaddr) \
: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \
"i" (-EFAULT) \
: "memory"); \
} else \
ret = -ENOSYS; \
}
static inline int
arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("move $1, %z5", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ADD:
__futex_atomic_op("addu $1, %1, %z5",
ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_OR:
__futex_atomic_op("or $1, %1, %z5",
ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ANDN:
__futex_atomic_op("and $1, %1, %z5",
ret, oldval, uaddr, ~oparg);
break;
case FUTEX_OP_XOR:
__futex_atomic_op("xor $1, %1, %z5",
ret, oldval, uaddr, oparg);
break;
default:
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
return ret;
}
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
int ret = 0;
u32 val;
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
if (cpu_has_llsc && R10000_LLSC_WAR) {
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
" .set push \n"
" .set noat \n"
" .set push \n"
" .set arch=r4000 \n"
"1: ll %1, %3 \n"
" bne %1, %z4, 3f \n"
" .set pop \n"
" move $1, %z5 \n"
" .set arch=r4000 \n"
"2: sc $1, %2 \n"
" beqzl $1, 1b \n"
__WEAK_LLSC_MB
"3: \n"
" .insn \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
"4: li %0, %6 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
loongson_llsc_mb();
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
" .set push \n"
" .set noat \n"
" .set push \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
"1: "user_ll("%1", "%3")" \n"
" bne %1, %z4, 3f \n"
" .set pop \n"
" move $1, %z5 \n"
" .set "MIPS_ISA_ARCH_LEVEL" \n"
"2: "user_sc("$1", "%2")" \n"
" beqz $1, 1b \n"
__WEAK_LLSC_MB
"3: \n"
" .insn \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
"4: li %0, %6 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory");
loongson_llsc_mb();
} else
return -ENOSYS;
*uval = val;
return ret;
}
#endif
#endif /* _ASM_FUTEX_H */