linux_dsm_epyc7002/arch/mips/lib/memset.S

329 lines
7.6 KiB
ArmAsm
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2007 by Maciej W. Rozycki
* Copyright (C) 2011, 2012 MIPS Technologies, Inc.
*/
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/export.h>
#include <asm/regdef.h>
#if LONGSIZE == 4
#define LONG_S_L swl
#define LONG_S_R swr
#else
#define LONG_S_L sdl
#define LONG_S_R sdr
#endif
#ifdef CONFIG_CPU_MICROMIPS
#define STORSIZE (LONGSIZE * 2)
#define STORMASK (STORSIZE - 1)
#define FILL64RG t8
#define FILLPTRG t7
#undef LONG_S
#define LONG_S LONG_SP
#else
#define STORSIZE LONGSIZE
#define STORMASK LONGMASK
#define FILL64RG a1
#define FILLPTRG t0
#endif
#define LEGACY_MODE 1
#define EVA_MODE 2
/*
* No need to protect it with EVA #ifdefery. The generated block of code
* will never be assembled if EVA is not enabled.
*/
#define __EVAFY(insn, reg, addr) __BUILD_EVA_INSN(insn##e, reg, addr)
#define ___BUILD_EVA_INSN(insn, reg, addr) __EVAFY(insn, reg, addr)
#define EX(insn,reg,addr,handler) \
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.else; \
9: ___BUILD_EVA_INSN(insn, reg, addr); \
.endif; \
.section __ex_table,"a"; \
PTR 9b, handler; \
.previous
.macro f_fill64 dst, offset, val, fixup, mode
EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup)
#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup)
#endif
#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
#endif
.endm
.align 5
/*
* Macro to generate the __bzero{,_user} symbol
* Arguments:
* mode: LEGACY_MODE or EVA_MODE
*/
.macro __BUILD_BZERO mode
/* Initialize __memset if this is the first time we call this macro */
.ifnotdef __memset
.set __memset, 1
.hidden __memset /* Make sure it does not leak */
.endif
sltiu t0, a2, STORSIZE /* very small region? */
.set noreorder
bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */
.set reorder
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
move t9, a1
#endif
.set noreorder
[MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2007-10-23 18:43:25 +07:00
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */
[MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2007-10-23 18:43:25 +07:00
#else
.set noat
li AT, STORSIZE
[MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2007-10-23 18:43:25 +07:00
beqz t0, 1f
PTR_SUBU t0, AT /* alignment in bytes */
[MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2007-10-23 18:43:25 +07:00
.set at
#endif
.set reorder
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
#else
EX(LONG_S_R, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
#endif
PTR_SUBU a0, t0 /* long align ptr */
PTR_ADDU a2, t0 /* correct size */
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
#define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
.set noreorder; \
beqz t0, 0f; \
PTR_ADDU t0, 1; \
.set reorder;
PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1
STORE_BYTE(0)
STORE_BYTE(1)
#if LONGSIZE == 4
EX(sb, a1, 2(a0), .Lbyte_fixup\@)
#else
STORE_BYTE(2)
STORE_BYTE(3)
STORE_BYTE(4)
STORE_BYTE(5)
EX(sb, a1, 6(a0), .Lbyte_fixup\@)
#endif
0:
ori a0, STORMASK
xori a0, STORMASK
PTR_ADDIU a0, STORSIZE
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
andi t0, a2, 0x40-STORSIZE
beqz t1, .Lmemset_partial\@ /* no block to fill */
PTR_ADDU t1, a0 /* end address */
1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b
.Lmemset_partial\@:
R10KCBARRIER(0(ra))
PTR_LA t1, 2f /* where to start */
#ifdef CONFIG_CPU_MICROMIPS
LONG_SRL t7, t0, 1
#endif
#if LONGSIZE == 4
PTR_SUBU t1, FILLPTRG
#else
.set noat
LONG_SRL AT, FILLPTRG, 1
PTR_SUBU t1, AT
[MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2007-10-23 18:43:25 +07:00
.set at
#endif
PTR_ADDU a0, t0 /* dest ptr */
jr t1
/* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
2: andi a2, STORMASK /* At most one long to go */
.set noreorder
beqz a2, 1f
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */
.set reorder
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
#else
EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
#endif
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
PTR_SUBU t0, $0, a2
.set reorder
MIPS: memset.S: Fix byte_fixup for MIPSr6 The __clear_user function is defined to return the number of bytes that could not be cleared. From the underlying memset / bzero implementation this means setting register a2 to that number on return. Currently if a page fault is triggered within the MIPSr6 version of setting of initial unaligned bytes, the value loaded into a2 on return is meaningless. During the MIPSr6 version of the initial unaligned bytes block, register a2 contains the number of bytes to be set beyond the initial unaligned bytes. The t0 register is initally set to the number of unaligned bytes - STORSIZE, effectively a negative version of the number of unaligned bytes. This is then incremented before each byte is saved. The label .Lbyte_fixup\@ is jumped to on page fault. Currently the value in a2 is incorrectly replaced by 0 - t0 + 1, effectively the number of unaligned bytes remaining. This leads to the failures being reported by the following test code: static int __init test_clear_user(void) { int j, k; pr_info("\n\n\nTesting clear_user\n"); for (j = 0; j < 512; j++) { if ((k = clear_user(NULL+3, j)) != j) { pr_err("clear_user (NULL %d) returned %d\n", j, k); } } return 0; } late_initcall(test_clear_user); Which reports: [ 3.965439] Testing clear_user [ 3.973169] clear_user (NULL 8) returned 6 [ 3.976782] clear_user (NULL 9) returned 6 [ 3.980390] clear_user (NULL 10) returned 6 [ 3.984052] clear_user (NULL 11) returned 6 [ 3.987524] clear_user (NULL 12) returned 6 Fix this by subtracting t0 from a2 (rather than $0), effectivey giving: unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1 a2 = a2 - t0 + 1 This fixes the value returned from __clear user when the number of bytes to set is > LONGSIZE and the address is invalid and unaligned. Unfortunately, this breaks the fixup handling for unaligned bytes after the final long, where register a2 still contains the number of bytes remaining to be set and the t0 register is to 0 - the number of unaligned bytes remaining. Because t0 is now is now subtracted from a2 rather than 0, the number of bytes unset is reported incorrectly: static int __init test_clear_user(void) { char *test; int j, k; pr_info("\n\n\nTesting clear_user\n"); test = vmalloc(PAGE_SIZE); for (j = 256; j < 512; j++) { if ((k = clear_user(test + PAGE_SIZE - 254, j)) != j - 254) { pr_err("clear_user (%px %d) returned %d\n", test + PAGE_SIZE - 254, j, k); } } return 0; } late_initcall(test_clear_user); [ 3.976775] clear_user (c00000000000df02 256) returned 4 [ 3.981957] clear_user (c00000000000df02 257) returned 6 [ 3.986425] clear_user (c00000000000df02 258) returned 8 [ 3.990850] clear_user (c00000000000df02 259) returned 10 [ 3.995332] clear_user (c00000000000df02 260) returned 12 [ 3.999815] clear_user (c00000000000df02 261) returned 14 Fix this by ensuring that a2 is set to 0 during the set of final unaligned bytes. Signed-off-by: Matt Redfearn <matt.redfearn@mips.com> Signed-off-by: Paul Burton <paul.burton@mips.com> Fixes: 8c56208aff77 ("MIPS: lib: memset: Add MIPS R6 support") Patchwork: https://patchwork.linux-mips.org/patch/19338/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.0+
2018-05-23 20:39:58 +07:00
move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1
STORE_BYTE(0)
STORE_BYTE(1)
#if LONGSIZE == 4
EX(sb, a1, 2(a0), .Lbyte_fixup\@)
#else
STORE_BYTE(2)
STORE_BYTE(3)
STORE_BYTE(4)
STORE_BYTE(5)
EX(sb, a1, 6(a0), .Lbyte_fixup\@)
#endif
0:
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
1: move a2, zero
jr ra
.Lsmall_memset\@:
PTR_ADDU t1, a0, a2
beqz a2, 2f
1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
.set noreorder
bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
.set reorder
2: move a2, zero
jr ra /* done */
.if __memset == 1
END(memset)
.set __memset, 0
.hidden __memset
.endif
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#ifdef CONFIG_CPU_NO_LOAD_STORE_LR
.Lbyte_fixup\@:
/*
* unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1
* a2 = a2 - t0 + 1
*/
MIPS: memset.S: Fix byte_fixup for MIPSr6 The __clear_user function is defined to return the number of bytes that could not be cleared. From the underlying memset / bzero implementation this means setting register a2 to that number on return. Currently if a page fault is triggered within the MIPSr6 version of setting of initial unaligned bytes, the value loaded into a2 on return is meaningless. During the MIPSr6 version of the initial unaligned bytes block, register a2 contains the number of bytes to be set beyond the initial unaligned bytes. The t0 register is initally set to the number of unaligned bytes - STORSIZE, effectively a negative version of the number of unaligned bytes. This is then incremented before each byte is saved. The label .Lbyte_fixup\@ is jumped to on page fault. Currently the value in a2 is incorrectly replaced by 0 - t0 + 1, effectively the number of unaligned bytes remaining. This leads to the failures being reported by the following test code: static int __init test_clear_user(void) { int j, k; pr_info("\n\n\nTesting clear_user\n"); for (j = 0; j < 512; j++) { if ((k = clear_user(NULL+3, j)) != j) { pr_err("clear_user (NULL %d) returned %d\n", j, k); } } return 0; } late_initcall(test_clear_user); Which reports: [ 3.965439] Testing clear_user [ 3.973169] clear_user (NULL 8) returned 6 [ 3.976782] clear_user (NULL 9) returned 6 [ 3.980390] clear_user (NULL 10) returned 6 [ 3.984052] clear_user (NULL 11) returned 6 [ 3.987524] clear_user (NULL 12) returned 6 Fix this by subtracting t0 from a2 (rather than $0), effectivey giving: unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1 a2 = a2 - t0 + 1 This fixes the value returned from __clear user when the number of bytes to set is > LONGSIZE and the address is invalid and unaligned. Unfortunately, this breaks the fixup handling for unaligned bytes after the final long, where register a2 still contains the number of bytes remaining to be set and the t0 register is to 0 - the number of unaligned bytes remaining. Because t0 is now is now subtracted from a2 rather than 0, the number of bytes unset is reported incorrectly: static int __init test_clear_user(void) { char *test; int j, k; pr_info("\n\n\nTesting clear_user\n"); test = vmalloc(PAGE_SIZE); for (j = 256; j < 512; j++) { if ((k = clear_user(test + PAGE_SIZE - 254, j)) != j - 254) { pr_err("clear_user (%px %d) returned %d\n", test + PAGE_SIZE - 254, j, k); } } return 0; } late_initcall(test_clear_user); [ 3.976775] clear_user (c00000000000df02 256) returned 4 [ 3.981957] clear_user (c00000000000df02 257) returned 6 [ 3.986425] clear_user (c00000000000df02 258) returned 8 [ 3.990850] clear_user (c00000000000df02 259) returned 10 [ 3.995332] clear_user (c00000000000df02 260) returned 12 [ 3.999815] clear_user (c00000000000df02 261) returned 14 Fix this by ensuring that a2 is set to 0 during the set of final unaligned bytes. Signed-off-by: Matt Redfearn <matt.redfearn@mips.com> Signed-off-by: Paul Burton <paul.burton@mips.com> Fixes: 8c56208aff77 ("MIPS: lib: memset: Add MIPS R6 support") Patchwork: https://patchwork.linux-mips.org/patch/19338/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.0+
2018-05-23 20:39:58 +07:00
PTR_SUBU a2, t0
PTR_ADDIU a2, 1
jr ra
MIPS: make CPU_HAS_LOAD_STORE_LR opt-out CPU_HAS_LOAD_STORE_LR was introduced in 932afdeec18b ("MIPS: Add Kconfig variable for CPUs with unaligned load/store instructions") to make code in kernel/unaligned.c and lib/mem{cpy,set}.S more intuitive and give a possibility to easily add new CPUs without these instruction sets in future. Hovewer, this variant is not optimal for mainly two reasons: * For now, we have 20+ CPUs with such instructions and only two (MIPS R6) without. It will obviously be more effective and straightforward to have an option for these two rather than for the rest. * You can easily miss the fact that you need to select this option when adding a new CPU, while all processors lacking these sets are well-known, so the probability of missing something is way much lower. We can address both points by turning CPU_HAS_LOAD_STORE_LR into opt-out CPU_NO_LOAD_STORE_LR. This also makes MIPS root Kconfig more clear and understandable. Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> Signed-off-by: Paul Burton <paulburton@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Will Deacon <will@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Allison Randal <allison@lohutok.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org
2020-01-22 17:58:50 +07:00
#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
.Lfirst_fixup\@:
/* unset_bytes already in a2 */
jr ra
.Lfwd_fixup\@:
/*
* unset_bytes = partial_start_addr + #bytes - fault_addr
* a2 = t1 + (a2 & 3f) - $28->task->BUADDR
*/
PTR_L t0, TI_TASK($28)
andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1
LONG_SUBU a2, t0
jr ra
.Lpartial_fixup\@:
/*
* unset_bytes = partial_end_addr + #bytes - fault_addr
* a2 = a0 + (a2 & STORMASK) - $28->task->BUADDR
*/
PTR_L t0, TI_TASK($28)
andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0
LONG_SUBU a2, t0
jr ra
.Llast_fixup\@:
/* unset_bytes already in a2 */
jr ra
.Lsmall_fixup\@:
/*
* unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1
*/
PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1
jr ra
.endm
/*
* memset(void *s, int c, size_t n)
*
* a0: start of area to clear
* a1: char to fill with
* a2: size of area to clear
*/
LEAF(memset)
EXPORT_SYMBOL(memset)
move v0, a0 /* result */
beqz a1, 1f
andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8
or a1, t1
LONG_SLL t1, a1, 16
#if LONGSIZE == 8
or a1, t1
LONG_SLL t1, a1, 32
#endif
or a1, t1
1:
#ifndef CONFIG_EVA
FEXPORT(__bzero)
EXPORT_SYMBOL(__bzero)
#else
FEXPORT(__bzero_kernel)
EXPORT_SYMBOL(__bzero_kernel)
#endif
__BUILD_BZERO LEGACY_MODE
#ifdef CONFIG_EVA
LEAF(__bzero)
EXPORT_SYMBOL(__bzero)
__BUILD_BZERO EVA_MODE
END(__bzero)
#endif