linux_dsm_epyc7002/arch/parisc/kernel/entry.S

2422 lines
57 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Linux/PA-RISC Project (http://www.parisc-linux.org/)
*
* kernel entry points (interruptions, system call wrappers)
* Copyright (C) 1999,2000 Philipp Rumpf
* Copyright (C) 1999 SuSE GmbH Nuernberg
* Copyright (C) 2000 Hewlett-Packard (John Marvin)
* Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
*/
#include <asm/asm-offsets.h>
/* we have the following possibilities to act on an interruption:
* - handle in assembly and use shadowed registers only
* - save registers to kernel stack and handle in assembly or C */
#include <asm/psw.h>
#include <asm/cache.h> /* for L1_CACHE_SHIFT */
#include <asm/assembly.h> /* for LDREG/STREG defines */
#include <asm/pgtable.h>
#include <asm/signal.h>
#include <asm/unistd.h>
#include <asm/ldcw.h>
#include <asm/traps.h>
#include <asm/thread_info.h>
#include <asm/alternative.h>
#include <linux/linkage.h>
#ifdef CONFIG_64BIT
.level 2.0w
#else
.level 2.0
#endif
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
.import pa_tlb_lock,data
.macro load_pa_tlb_lock reg
mfctl %cr25,\reg
addil L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg
.endm
/* space_to_prot macro creates a prot id from a space id */
#if (SPACEID_SHIFT) == 0
.macro space_to_prot spc prot
depd,z \spc,62,31,\prot
.endm
#else
.macro space_to_prot spc prot
extrd,u \spc,(64 - (SPACEID_SHIFT)),32,\prot
.endm
#endif
/* Switch to virtual mapping, trashing only %r1 */
.macro virt_map
/* pcxt_ssm_bug */
rsm PSW_SM_I, %r0 /* barrier for "Relied upon Translation */
mtsp %r0, %sr4
mtsp %r0, %sr5
mtsp %r0, %sr6
tovirt_r1 %r29
load32 KERNEL_PSW, %r1
rsm PSW_SM_QUIET,%r0 /* second "heavy weight" ctl op */
mtctl %r0, %cr17 /* Clear IIASQ tail */
mtctl %r0, %cr17 /* Clear IIASQ head */
mtctl %r1, %ipsw
load32 4f, %r1
mtctl %r1, %cr18 /* Set IIAOQ tail */
ldo 4(%r1), %r1
mtctl %r1, %cr18 /* Set IIAOQ head */
rfir
nop
4:
.endm
/*
* The "get_stack" macros are responsible for determining the
* kernel stack value.
*
* If sr7 == 0
* Already using a kernel stack, so call the
* get_stack_use_r30 macro to push a pt_regs structure
* on the stack, and store registers there.
* else
* Need to set up a kernel stack, so call the
* get_stack_use_cr30 macro to set up a pointer
* to the pt_regs structure contained within the
* task pointer pointed to by cr30. Set the stack
* pointer to point to the end of the task structure.
*
* Note that we use shadowed registers for temps until
* we can save %r26 and %r29. %r26 is used to preserve
* %r8 (a shadowed register) which temporarily contained
* either the fault type ("code") or the eirr. We need
* to use a non-shadowed register to carry the value over
* the rfir in virt_map. We use %r26 since this value winds
* up being passed as the argument to either do_cpu_irq_mask
* or handle_interruption. %r29 is used to hold a pointer
* the register save area, and once again, it needs to
* be a non-shadowed register so that it survives the rfir.
*
* N.B. TASK_SZ_ALGN and PT_SZ_ALGN include space for a stack frame.
*/
.macro get_stack_use_cr30
/* we save the registers in the task struct */
copy %r30, %r17
mfctl %cr30, %r1
ldo THREAD_SZ_ALGN(%r1), %r30
mtsp %r0,%sr7
mtsp %r16,%sr3
tophys %r1,%r9
LDREG TI_TASK(%r9), %r1 /* thread_info -> task_struct */
tophys %r1,%r9
ldo TASK_REGS(%r9),%r9
STREG %r17,PT_GR30(%r9)
STREG %r29,PT_GR29(%r9)
STREG %r26,PT_GR26(%r9)
STREG %r16,PT_SR7(%r9)
copy %r9,%r29
.endm
.macro get_stack_use_r30
/* we put a struct pt_regs on the stack and save the registers there */
tophys %r30,%r9
copy %r30,%r1
ldo PT_SZ_ALGN(%r30),%r30
STREG %r1,PT_GR30(%r9)
STREG %r29,PT_GR29(%r9)
STREG %r26,PT_GR26(%r9)
STREG %r16,PT_SR7(%r9)
copy %r9,%r29
.endm
.macro rest_stack
LDREG PT_GR1(%r29), %r1
LDREG PT_GR30(%r29),%r30
LDREG PT_GR29(%r29),%r29
.endm
/* default interruption handler
* (calls traps.c:handle_interruption) */
.macro def code
b intr_save
ldi \code, %r8
.align 32
.endm
/* Interrupt interruption handler
* (calls irq.c:do_cpu_irq_mask) */
.macro extint code
b intr_extint
mfsp %sr7,%r16
.align 32
.endm
.import os_hpmc, code
/* HPMC handler */
.macro hpmc code
nop /* must be a NOP, will be patched later */
load32 PA(os_hpmc), %r3
bv,n 0(%r3)
nop
.word 0 /* checksum (will be patched) */
parisc: Fix address in HPMC IVA Helge noticed that the address of the os_hpmc handler was not being correctly calculated in the hpmc macro. As a result, PDCE_CHECK would fail to call os_hpmc: <Cpu2> e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC <Cpu2> 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY <Cpu2> f600105e02e00000 fffffff0f0c00000 CC_MC_HPMC_MONARCH_SELECTED <Cpu2> 140003b202e00000 000000000000000b CC_ERR_HPMC_STATE_ENTRY <Cpu2> 5600100b02e00000 00000000000001a0 CC_MC_OS_HPMC_LEN_ERR <Cpu2> 5600106402e00000 fffffff0f0438e70 CC_MC_BR_TO_OS_HPMC_FAILED <Cpu2> e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC <Cpu2> 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY <Cpu2> 4000109f02e00000 0000000000000000 CC_MC_HPMC_INITIATED <Cpu2> 4000101902e00000 0000000000000000 CC_MC_MULTIPLE_HPMCS <Cpu2> 030010d502e00000 0000000000000000 CC_CPU_STOP The address problem can be seen by dumping the fault vector: 0000000040159000 <fault_vector_20>: 40159000: 63 6f 77 73 stb r15,-2447(dp) 40159004: 20 63 61 6e ldil L%b747000,r3 40159008: 20 66 6c 79 ldil L%-1c3b3000,r3 ... 40159020: 08 00 02 40 nop 40159024: 20 6e 60 02 ldil L%15d000,r3 40159028: 34 63 00 00 ldo 0(r3),r3 4015902c: e8 60 c0 02 bv,n r0(r3) 40159030: 08 00 02 40 nop 40159034: 00 00 00 00 break 0,0 40159038: c0 00 70 00 bb,*< r0,sar,40159840 <fault_vector_20+0x840> 4015903c: 00 00 00 00 break 0,0 Location 40159038 should contain the physical address of os_hpmc: 000000004015d000 <os_hpmc>: 4015d000: 08 1a 02 43 copy r26,r3 4015d004: 01 c0 08 a4 mfctl iva,r4 4015d008: 48 85 00 68 ldw 34(r4),r5 This patch moves the address setup into initialize_ivt to resolve the above problem. I tested the change by dumping the HPMC entry after setup: 0000000040209020: 8000240 0000000040209024: 206a2004 0000000040209028: 34630ac0 000000004020902c: e860c002 0000000040209030: 8000240 0000000040209034: 1bdddce6 0000000040209038: 15d000 000000004020903c: 1a0 Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: <stable@vger.kernel.org> Signed-off-by: Helge Deller <deller@gmx.de>
2018-10-07 00:11:30 +07:00
.word 0 /* address of handler */
.word 0 /* length of handler */
.endm
/*
* Performance Note: Instructions will be moved up into
* this part of the code later on, once we are sure
* that the tlb miss handlers are close to final form.
*/
/* Register definitions for tlb miss handler macros */
va = r8 /* virtual address for which the trap occurred */
spc = r24 /* space for which the trap occurred */
#ifndef CONFIG_64BIT
/*
* itlb miss interruption handler (parisc 1.1 - 32 bit)
*/
.macro itlb_11 code
mfctl %pcsq, spc
b itlb_miss_11
mfctl %pcoq, va
.align 32
.endm
#endif
/*
* itlb miss interruption handler (parisc 2.0)
*/
.macro itlb_20 code
mfctl %pcsq, spc
#ifdef CONFIG_64BIT
b itlb_miss_20w
#else
b itlb_miss_20
#endif
mfctl %pcoq, va
.align 32
.endm
#ifndef CONFIG_64BIT
/*
* naitlb miss interruption handler (parisc 1.1 - 32 bit)
*/
.macro naitlb_11 code
mfctl %isr,spc
b naitlb_miss_11
mfctl %ior,va
.align 32
.endm
#endif
/*
* naitlb miss interruption handler (parisc 2.0)
*/
.macro naitlb_20 code
mfctl %isr,spc
#ifdef CONFIG_64BIT
b naitlb_miss_20w
#else
b naitlb_miss_20
#endif
mfctl %ior,va
.align 32
.endm
#ifndef CONFIG_64BIT
/*
* dtlb miss interruption handler (parisc 1.1 - 32 bit)
*/
.macro dtlb_11 code
mfctl %isr, spc
b dtlb_miss_11
mfctl %ior, va
.align 32
.endm
#endif
/*
* dtlb miss interruption handler (parisc 2.0)
*/
.macro dtlb_20 code
mfctl %isr, spc
#ifdef CONFIG_64BIT
b dtlb_miss_20w
#else
b dtlb_miss_20
#endif
mfctl %ior, va
.align 32
.endm
#ifndef CONFIG_64BIT
/* nadtlb miss interruption handler (parisc 1.1 - 32 bit) */
.macro nadtlb_11 code
mfctl %isr,spc
b nadtlb_miss_11
mfctl %ior,va
.align 32
.endm
#endif
/* nadtlb miss interruption handler (parisc 2.0) */
.macro nadtlb_20 code
mfctl %isr,spc
#ifdef CONFIG_64BIT
b nadtlb_miss_20w
#else
b nadtlb_miss_20
#endif
mfctl %ior,va
.align 32
.endm
#ifndef CONFIG_64BIT
/*
* dirty bit trap interruption handler (parisc 1.1 - 32 bit)
*/
.macro dbit_11 code
mfctl %isr,spc
b dbit_trap_11
mfctl %ior,va
.align 32
.endm
#endif
/*
* dirty bit trap interruption handler (parisc 2.0)
*/
.macro dbit_20 code
mfctl %isr,spc
#ifdef CONFIG_64BIT
b dbit_trap_20w
#else
b dbit_trap_20
#endif
mfctl %ior,va
.align 32
.endm
/* In LP64, the space contains part of the upper 32 bits of the
* fault. We have to extract this and place it in the va,
* zeroing the corresponding bits in the space register */
.macro space_adjust spc,va,tmp
#ifdef CONFIG_64BIT
extrd,u \spc,63,SPACEID_SHIFT,\tmp
depd %r0,63,SPACEID_SHIFT,\spc
depd \tmp,31,SPACEID_SHIFT,\va
#endif
.endm
.import swapper_pg_dir,code
/* Get the pgd. For faults on space zero (kernel space), this
* is simply swapper_pg_dir. For user space faults, the
* pgd is stored in %cr25 */
.macro get_pgd spc,reg
ldil L%PA(swapper_pg_dir),\reg
ldo R%PA(swapper_pg_dir)(\reg),\reg
or,COND(=) %r0,\spc,%r0
mfctl %cr25,\reg
.endm
/*
space_check(spc,tmp,fault)
spc - The space we saw the fault with.
tmp - The place to store the current space.
fault - Function to call on failure.
Only allow faults on different spaces from the
currently active one if we're the kernel
*/
.macro space_check spc,tmp,fault
mfsp %sr7,\tmp
/* check against %r0 which is same value as LINUX_GATEWAY_SPACE */
or,COND(<>) %r0,\spc,%r0 /* user may execute gateway page
* as kernel, so defeat the space
* check if it is */
copy \spc,\tmp
or,COND(=) %r0,\tmp,%r0 /* nullify if executing as kernel */
cmpb,COND(<>),n \tmp,\spc,\fault
.endm
/* Look up a PTE in a 2-Level scheme (faulting at each
* level if the entry isn't present
*
* NOTE: we use ldw even for LP64, since the short pointers
* can address up to 1TB
*/
.macro L2_ptep pmd,pte,index,va,fault
#if CONFIG_PGTABLE_LEVELS == 3
extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
#else
# if defined(CONFIG_64BIT)
extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
#else
# if PAGE_SIZE > 4096
extru \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
# else
extru \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
# endif
# endif
#endif
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
copy %r0,\pte
ldw,s \index(\pmd),\pmd
bb,>=,n \pmd,_PxD_PRESENT_BIT,\fault
dep %r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
SHLREG \pmd,PxD_VALUE_SHIFT,\pmd
extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
.endm
/* Look up PTE in a 3-Level scheme.
*
* Here we implement a Hybrid L2/L3 scheme: we allocate the
* first pmd adjacent to the pgd. This means that we can
* subtract a constant offset to get to it. The pmd and pgd
* sizes are arranged so that a single pmd covers 4GB (giving
* a full LP64 process access to 8TB) so our lookups are
* effectively L2 for the first 4GB of the kernel (i.e. for
* all ILP32 processes and all the kernel for machines with
* under 4GB of memory) */
.macro L3_ptep pgd,pte,index,va,fault
#if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */
extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
ldw,s \index(\pgd),\pgd
extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
bb,>=,n \pgd,_PxD_PRESENT_BIT,\fault
extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
shld \pgd,PxD_VALUE_SHIFT,\index
extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
copy \index,\pgd
extrd,u,*<> \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
ldo ASM_PGD_PMD_OFFSET(\pgd),\pgd
#endif
L2_ptep \pgd,\pte,\index,\va,\fault
.endm
/* Acquire pa_tlb_lock lock and check page is present. */
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
98: cmpib,COND(=),n 0,\spc,2f
load_pa_tlb_lock \tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
nop
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
LDREG 0(\ptp),\pte
bb,<,n \pte,_PAGE_PRESENT_BIT,3f
LDCW 0(\tmp),\tmp1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
b \fault
stw \spc,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
2: LDREG 0(\ptp),\pte
bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault
3:
.endm
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
/* Release pa_tlb_lock lock without reloading lock address. */
.macro tlb_unlock0 spc,tmp,tmp1
#ifdef CONFIG_SMP
98: or,COND(=) %r0,\spc,%r0
LDCW 0(\tmp),\tmp1
or,COND(=) %r0,\spc,%r0
stw \spc,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
/* Release pa_tlb_lock lock. */
.macro tlb_unlock1 spc,tmp,tmp1
#ifdef CONFIG_SMP
98: load_pa_tlb_lock \tmp
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
tlb_unlock0 \spc,\tmp,\tmp1
#endif
.endm
/* Set the _PAGE_ACCESSED bit of the PTE. Be clever and
* don't needlessly dirty the cache line if it was already set */
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
.macro update_accessed ptp,pte,tmp,tmp1
ldi _PAGE_ACCESSED,\tmp1
or \tmp1,\pte,\tmp
and,COND(<>) \tmp1,\pte,%r0
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
STREG \tmp,0(\ptp)
.endm
/* Set the dirty bit (and accessed bit). No need to be
* clever, this is only used from the dirty fault */
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
.macro update_dirty ptp,pte,tmp
ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp
or \tmp,\pte,\pte
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
STREG \pte,0(\ptp)
.endm
/* We have (depending on the page size):
* - 38 to 52-bit Physical Page Number
* - 12 to 26-bit page offset
*/
/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
* to a CPU TLB 4k PFN (4k => 12 bits to shift) */
#define PAGE_ADD_SHIFT (PAGE_SHIFT-12)
#define PAGE_ADD_HUGE_SHIFT (REAL_HPAGE_SHIFT-12)
/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
.macro convert_for_tlb_insert20 pte,tmp
#ifdef CONFIG_HUGETLB_PAGE
copy \pte,\tmp
extrd,u \tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\
(63-58)+PAGE_ADD_SHIFT,\pte
extrd,u,*= \tmp,_PAGE_HPAGE_BIT+32,1,%r0
depdi _HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\
(63-58)+PAGE_ADD_HUGE_SHIFT,\pte
#else /* Huge pages disabled */
extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\
(63-58)+PAGE_ADD_SHIFT,\pte
#endif
.endm
/* Convert the pte and prot to tlb insertion values. How
* this happens is quite subtle, read below */
.macro make_insert_tlb spc,pte,prot,tmp
space_to_prot \spc \prot /* create prot id from space */
/* The following is the real subtlety. This is depositing
* T <-> _PAGE_REFTRAP
* D <-> _PAGE_DIRTY
* B <-> _PAGE_DMB (memory break)
*
* Then incredible subtlety: The access rights are
* _PAGE_GATEWAY, _PAGE_EXEC and _PAGE_WRITE
* See 3-14 of the parisc 2.0 manual
*
* Finally, _PAGE_READ goes in the top bit of PL1 (so we
* trigger an access rights trap in user space if the user
* tries to read an unreadable page */
depd \pte,8,7,\prot
/* PAGE_USER indicates the page can be read with user privileges,
* so deposit X1|11 to PL1|PL2 (remember the upper bit of PL1
* contains _PAGE_READ) */
extrd,u,*= \pte,_PAGE_USER_BIT+32,1,%r0
depdi 7,11,3,\prot
/* If we're a gateway page, drop PL2 back to zero for promotion
* to kernel privilege (so we can execute the page as kernel).
* Any privilege promotion page always denys read and write */
extrd,u,*= \pte,_PAGE_GATEWAY_BIT+32,1,%r0
depd %r0,11,2,\prot /* If Gateway, Set PL2 to 0 */
/* Enforce uncacheable pages.
* This should ONLY be use for MMIO on PA 2.0 machines.
* Memory/DMA is cache coherent on all PA2.0 machines we support
* (that means T-class is NOT supported) and the memory controllers
* on most of those machines only handles cache transactions.
*/
extrd,u,*= \pte,_PAGE_NO_CACHE_BIT+32,1,%r0
depdi 1,12,1,\prot
/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
convert_for_tlb_insert20 \pte \tmp
.endm
/* Identical macro to make_insert_tlb above, except it
* makes the tlb entry for the differently formatted pa11
* insertion instructions */
.macro make_insert_tlb_11 spc,pte,prot
zdep \spc,30,15,\prot
dep \pte,8,7,\prot
extru,= \pte,_PAGE_NO_CACHE_BIT,1,%r0
depi 1,12,1,\prot
extru,= \pte,_PAGE_USER_BIT,1,%r0
depi 7,11,3,\prot /* Set for user space (1 rsvd for read) */
extru,= \pte,_PAGE_GATEWAY_BIT,1,%r0
depi 0,11,2,\prot /* If Gateway, Set PL2 to 0 */
/* Get rid of prot bits and convert to page addr for iitlba */
depi 0,31,ASM_PFN_PTE_SHIFT,\pte
SHRREG \pte,(ASM_PFN_PTE_SHIFT-(31-26)),\pte
.endm
/* This is for ILP32 PA2.0 only. The TLB insertion needs
* to extend into I/O space if the address is 0xfXXXXXXX
* so we extend the f's into the top word of the pte in
* this case */
.macro f_extend pte,tmp
extrd,s \pte,42,4,\tmp
addi,<> 1,\tmp,%r0
extrd,s \pte,63,25,\pte
.endm
/* The alias region is an 8MB aligned 16MB to do clear and
* copy user pages at addresses congruent with the user
* virtual address.
*
* To use the alias page, you set %r26 up with the to TLB
* entry (identifying the physical page) and %r23 up with
* the from tlb entry (or nothing if only a to entry---for
* clear_user_page_asm) */
.macro do_alias spc,tmp,tmp1,va,pte,prot,fault,patype
cmpib,COND(<>),n 0,\spc,\fault
ldil L%(TMPALIAS_MAP_START),\tmp
#if defined(CONFIG_64BIT) && (TMPALIAS_MAP_START >= 0x80000000)
/* on LP64, ldi will sign extend into the upper 32 bits,
* which is behaviour we don't want */
depdi 0,31,32,\tmp
#endif
copy \va,\tmp1
depi 0,31,23,\tmp1
cmpb,COND(<>),n \tmp,\tmp1,\fault
mfctl %cr19,\tmp /* iir */
/* get the opcode (first six bits) into \tmp */
extrw,u \tmp,5,6,\tmp
/*
* Only setting the T bit prevents data cache movein
* Setting access rights to zero prevents instruction cache movein
*
* Note subtlety here: _PAGE_GATEWAY, _PAGE_EXEC and _PAGE_WRITE go
* to type field and _PAGE_READ goes to top bit of PL1
*/
ldi (_PAGE_REFTRAP|_PAGE_READ|_PAGE_WRITE),\prot
/*
* so if the opcode is one (i.e. this is a memory management
* instruction) nullify the next load so \prot is only T.
* Otherwise this is a normal data operation
*/
cmpiclr,= 0x01,\tmp,%r0
ldi (_PAGE_DIRTY|_PAGE_READ|_PAGE_WRITE),\prot
.ifc \patype,20
depd,z \prot,8,7,\prot
.else
.ifc \patype,11
depw,z \prot,8,7,\prot
.else
.error "undefined PA type to do_alias"
.endif
.endif
/*
* OK, it is in the temp alias region, check whether "from" or "to".
* Check "subtle" note in pacache.S re: r23/r26.
*/
#ifdef CONFIG_64BIT
extrd,u,*= \va,41,1,%r0
#else
extrw,u,= \va,9,1,%r0
#endif
or,COND(tr) %r23,%r0,\pte
or %r26,%r0,\pte
.endm
/*
* Fault_vectors are architecturally required to be aligned on a 2K
* boundary
*/
.section .text.hot
.align 2048
ENTRY(fault_vector_20)
/* First vector is invalid (0) */
.ascii "cows can fly"
.byte 0
.align 32
hpmc 1
def 2
def 3
extint 4
def 5
itlb_20 PARISC_ITLB_TRAP
def 7
def 8
def 9
def 10
def 11
def 12
def 13
def 14
dtlb_20 15
naitlb_20 16
nadtlb_20 17
def 18
def 19
dbit_20 20
def 21
def 22
def 23
def 24
def 25
def 26
def 27
def 28
def 29
def 30
def 31
END(fault_vector_20)
#ifndef CONFIG_64BIT
.align 2048
ENTRY(fault_vector_11)
/* First vector is invalid (0) */
.ascii "cows can fly"
.byte 0
.align 32
hpmc 1
def 2
def 3
extint 4
def 5
itlb_11 PARISC_ITLB_TRAP
def 7
def 8
def 9
def 10
def 11
def 12
def 13
def 14
dtlb_11 15
naitlb_11 16
nadtlb_11 17
def 18
def 19
dbit_11 20
def 21
def 22
def 23
def 24
def 25
def 26
def 27
def 28
def 29
def 30
def 31
END(fault_vector_11)
#endif
/* Fault vector is separately protected and *must* be on its own page */
.align PAGE_SIZE
.import handle_interruption,code
.import do_cpu_irq_mask,code
/*
* Child Returns here
*
* copy_thread moved args into task save area.
*/
ENTRY(ret_from_kernel_thread)
/* Call schedule_tail first though */
BL schedule_tail, %r2
nop
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
LDREG TASK_PT_GR25(%r1), %r26
#ifdef CONFIG_64BIT
LDREG TASK_PT_GR27(%r1), %r27
#endif
LDREG TASK_PT_GR26(%r1), %r1
ble 0(%sr7, %r1)
copy %r31, %r2
b finish_child_return
nop
END(ret_from_kernel_thread)
/*
* struct task_struct *_switch_to(struct task_struct *prev,
* struct task_struct *next)
*
* switch kernel stacks and return prev */
ENTRY_CFI(_switch_to)
STREG %r2, -RP_OFFSET(%r30)
callee_save_float
callee_save
load32 _switch_to_ret, %r2
STREG %r2, TASK_PT_KPC(%r26)
LDREG TASK_PT_KPC(%r25), %r2
STREG %r30, TASK_PT_KSP(%r26)
LDREG TASK_PT_KSP(%r25), %r30
LDREG TASK_THREAD_INFO(%r25), %r25
bv %r0(%r2)
mtctl %r25,%cr30
ENTRY(_switch_to_ret)
mtctl %r0, %cr0 /* Needed for single stepping */
callee_rest
callee_rest_float
LDREG -RP_OFFSET(%r30), %r2
bv %r0(%r2)
copy %r26, %r28
ENDPROC_CFI(_switch_to)
/*
* Common rfi return path for interruptions, kernel execve, and
* sys_rt_sigreturn (sometimes). The sys_rt_sigreturn syscall will
* return via this path if the signal was received when the process
* was running; if the process was blocked on a syscall then the
* normal syscall_exit path is used. All syscalls for traced
* proceses exit via intr_restore.
*
* XXX If any syscalls that change a processes space id ever exit
* this way, then we will need to copy %sr3 in to PT_SR[3..7], and
* adjust IASQ[0..1].
*
*/
.align PAGE_SIZE
ENTRY_CFI(syscall_exit_rfi)
mfctl %cr30,%r16
LDREG TI_TASK(%r16), %r16 /* thread_info -> task_struct */
ldo TASK_REGS(%r16),%r16
/* Force iaoq to userspace, as the user has had access to our current
* context via sigcontext. Also Filter the PSW for the same reason.
*/
LDREG PT_IAOQ0(%r16),%r19
depi 3,31,2,%r19
STREG %r19,PT_IAOQ0(%r16)
LDREG PT_IAOQ1(%r16),%r19
depi 3,31,2,%r19
STREG %r19,PT_IAOQ1(%r16)
LDREG PT_PSW(%r16),%r19
load32 USER_PSW_MASK,%r1
#ifdef CONFIG_64BIT
load32 USER_PSW_HI_MASK,%r20
depd %r20,31,32,%r1
#endif
and %r19,%r1,%r19 /* Mask out bits that user shouldn't play with */
load32 USER_PSW,%r1
or %r19,%r1,%r19 /* Make sure default USER_PSW bits are set */
STREG %r19,PT_PSW(%r16)
/*
* If we aren't being traced, we never saved space registers
* (we don't store them in the sigcontext), so set them
* to "proper" values now (otherwise we'll wind up restoring
* whatever was last stored in the task structure, which might
* be inconsistent if an interrupt occurred while on the gateway
* page). Note that we may be "trashing" values the user put in
* them, but we don't support the user changing them.
*/
STREG %r0,PT_SR2(%r16)
mfsp %sr3,%r19
STREG %r19,PT_SR0(%r16)
STREG %r19,PT_SR1(%r16)
STREG %r19,PT_SR3(%r16)
STREG %r19,PT_SR4(%r16)
STREG %r19,PT_SR5(%r16)
STREG %r19,PT_SR6(%r16)
STREG %r19,PT_SR7(%r16)
ENTRY(intr_return)
/* check for reschedule */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
bb,<,n %r19,31-TIF_NEED_RESCHED,intr_do_resched /* forward */
.import do_notify_resume,code
intr_check_sig:
/* As above */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19
ldi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), %r20
and,COND(<>) %r19, %r20, %r0
b,n intr_restore /* skip past if we've nothing to do */
/* This check is critical to having LWS
* working. The IASQ is zero on the gateway
* page and we cannot deliver any signals until
* we get off the gateway page.
*
* Only do signals if we are returning to user space
*/
LDREG PT_IASQ0(%r16), %r20
cmpib,COND(=),n LINUX_GATEWAY_SPACE, %r20, intr_restore /* backward */
LDREG PT_IASQ1(%r16), %r20
cmpib,COND(=),n LINUX_GATEWAY_SPACE, %r20, intr_restore /* backward */
Revert "parisc: Re-enable interrupts early" This reverts commit 5c38602d83e584047906b41b162ababd4db4106d. Interrupts can't be enabled early because the register saves are done on the thread stack prior to switching to the IRQ stack. This caused stack overflows and the thread stack needed increasing to 32k. Even then, stack overflows still occasionally occurred. Background: Even with a 32 kB thread stack, I have seen instances where the thread stack overflowed on the mx3210 buildd. Detection of stack overflow only occurs when we have an external interrupt. When an external interrupt occurs, we switch to the thread stack if we are not already on a kernel stack. Then, registers and specials are saved to the kernel stack. The bug occurs in intr_return where interrupts are reenabled prior to returning from the interrupt. This was done incase we need to schedule or deliver signals. However, it introduces the possibility that multiple external interrupts may occur on the thread stack and cause a stack overflow. These might not be detected and cause the kernel to misbehave in random ways. This patch changes the code back to only reenable interrupts when we are going to schedule or deliver signals. As a result, we generally return from an interrupt before reenabling interrupts. This minimizes the growth of the thread stack. Fixes: 5c38602d83e5 ("parisc: Re-enable interrupts early") Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: <stable@vger.kernel.org> # v4.10+ Signed-off-by: Helge Deller <deller@gmx.de>
2017-11-14 07:35:33 +07:00
/* NOTE: We need to enable interrupts if we have to deliver
* signals. We used to do this earlier but it caused kernel
* stack overflows. */
ssm PSW_SM_I, %r0
copy %r0, %r25 /* long in_syscall = 0 */
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
BL do_notify_resume,%r2
copy %r16, %r26 /* struct pt_regs *regs */
b,n intr_check_sig
intr_restore:
copy %r16,%r29
ldo PT_FR31(%r29),%r1
rest_fp %r1
rest_general %r29
/* inverse of virt_map */
pcxt_ssm_bug
rsm PSW_SM_QUIET,%r0 /* prepare for rfi */
tophys_r1 %r29
/* Restore space id's and special cr's from PT_REGS
* structure pointed to by r29
*/
rest_specials %r29
/* IMPORTANT: rest_stack restores r29 last (we are using it)!
* It also restores r1 and r30.
*/
rest_stack
rfi
nop
#ifndef CONFIG_PREEMPTION
# define intr_do_preempt intr_restore
#endif /* !CONFIG_PREEMPTION */
.import schedule,code
intr_do_resched:
/* Only call schedule on return to userspace. If we're returning
* to kernel space, we may schedule if CONFIG_PREEMPTION, otherwise
* we jump back to intr_restore.
*/
LDREG PT_IASQ0(%r16), %r20
cmpib,COND(=) 0, %r20, intr_do_preempt
nop
LDREG PT_IASQ1(%r16), %r20
cmpib,COND(=) 0, %r20, intr_do_preempt
nop
Revert "parisc: Re-enable interrupts early" This reverts commit 5c38602d83e584047906b41b162ababd4db4106d. Interrupts can't be enabled early because the register saves are done on the thread stack prior to switching to the IRQ stack. This caused stack overflows and the thread stack needed increasing to 32k. Even then, stack overflows still occasionally occurred. Background: Even with a 32 kB thread stack, I have seen instances where the thread stack overflowed on the mx3210 buildd. Detection of stack overflow only occurs when we have an external interrupt. When an external interrupt occurs, we switch to the thread stack if we are not already on a kernel stack. Then, registers and specials are saved to the kernel stack. The bug occurs in intr_return where interrupts are reenabled prior to returning from the interrupt. This was done incase we need to schedule or deliver signals. However, it introduces the possibility that multiple external interrupts may occur on the thread stack and cause a stack overflow. These might not be detected and cause the kernel to misbehave in random ways. This patch changes the code back to only reenable interrupts when we are going to schedule or deliver signals. As a result, we generally return from an interrupt before reenabling interrupts. This minimizes the growth of the thread stack. Fixes: 5c38602d83e5 ("parisc: Re-enable interrupts early") Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: <stable@vger.kernel.org> # v4.10+ Signed-off-by: Helge Deller <deller@gmx.de>
2017-11-14 07:35:33 +07:00
/* NOTE: We need to enable interrupts if we schedule. We used
* to do this earlier but it caused kernel stack overflows. */
ssm PSW_SM_I, %r0
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
ldil L%intr_check_sig, %r2
#ifndef CONFIG_64BIT
b schedule
#else
load32 schedule, %r20
bv %r0(%r20)
#endif
ldo R%intr_check_sig(%r2), %r2
/* preempt the current task on returning to kernel
* mode from an interrupt, iff need_resched is set,
* and preempt_count is 0. otherwise, we continue on
* our merry way back to the current running task.
*/
#ifdef CONFIG_PREEMPTION
.import preempt_schedule_irq,code
intr_do_preempt:
rsm PSW_SM_I, %r0 /* disable interrupts */
/* current_thread_info()->preempt_count */
mfctl %cr30, %r1
LDREG TI_PRE_COUNT(%r1), %r19
cmpib,COND(<>) 0, %r19, intr_restore /* if preempt_count > 0 */
nop /* prev insn branched backwards */
/* check if we interrupted a critical path */
LDREG PT_PSW(%r16), %r20
bb,<,n %r20, 31 - PSW_SM_I, intr_restore
nop
BL preempt_schedule_irq, %r2
nop
b,n intr_restore /* ssm PSW_SM_I done by intr_restore */
#endif /* CONFIG_PREEMPTION */
/*
* External interrupts.
*/
intr_extint:
cmpib,COND(=),n 0,%r16,1f
get_stack_use_cr30
b,n 2f
1:
get_stack_use_r30
2:
save_specials %r29
virt_map
save_general %r29
ldo PT_FR0(%r29), %r24
save_fp %r24
loadgp
copy %r29, %r26 /* arg0 is pt_regs */
copy %r29, %r16 /* save pt_regs */
ldil L%intr_return, %r2
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
b do_cpu_irq_mask
ldo R%intr_return(%r2), %r2 /* return to intr_return, not here */
ENDPROC_CFI(syscall_exit_rfi)
/* Generic interruptions (illegal insn, unaligned, page fault, etc) */
ENTRY_CFI(intr_save) /* for os_hpmc */
mfsp %sr7,%r16
cmpib,COND(=),n 0,%r16,1f
get_stack_use_cr30
b 2f
copy %r8,%r26
1:
get_stack_use_r30
copy %r8,%r26
2:
save_specials %r29
/* If this trap is a itlb miss, skip saving/adjusting isr/ior */
cmpib,COND(=),n PARISC_ITLB_TRAP,%r26,skip_save_ior
mfctl %isr, %r16
nop /* serialize mfctl on PA 2.0 to avoid 4 cycle penalty */
mfctl %ior, %r17
#ifdef CONFIG_64BIT
/*
* If the interrupted code was running with W bit off (32 bit),
* clear the b bits (bits 0 & 1) in the ior.
* save_specials left ipsw value in r8 for us to test.
*/
extrd,u,*<> %r8,PSW_W_BIT,1,%r0
depdi 0,1,2,%r17
/* adjust isr/ior: get high bits from isr and deposit in ior */
space_adjust %r16,%r17,%r1
#endif
STREG %r16, PT_ISR(%r29)
STREG %r17, PT_IOR(%r29)
#if 0 && defined(CONFIG_64BIT)
/* Revisit when we have 64-bit code above 4Gb */
b,n intr_save2
skip_save_ior:
/* We have a itlb miss, and when executing code above 4 Gb on ILP64, we
* need to adjust iasq/iaoq here in the same way we adjusted isr/ior
* above.
*/
extrd,u,* %r8,PSW_W_BIT,1,%r1
cmpib,COND(=),n 1,%r1,intr_save2
LDREG PT_IASQ0(%r29), %r16
LDREG PT_IAOQ0(%r29), %r17
/* adjust iasq/iaoq */
space_adjust %r16,%r17,%r1
STREG %r16, PT_IASQ0(%r29)
STREG %r17, PT_IAOQ0(%r29)
#else
skip_save_ior:
#endif
intr_save2:
virt_map
save_general %r29
ldo PT_FR0(%r29), %r25
save_fp %r25
loadgp
copy %r29, %r25 /* arg1 is pt_regs */
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
ldil L%intr_check_sig, %r2
copy %r25, %r16 /* save pt_regs */
b handle_interruption
ldo R%intr_check_sig(%r2), %r2
ENDPROC_CFI(intr_save)
/*
* Note for all tlb miss handlers:
*
* cr24 contains a pointer to the kernel address space
* page directory.
*
* cr25 contains a pointer to the current user address
* space page directory.
*
* sr3 will contain the space id of the user address space
* of the current running thread while that thread is
* running in the kernel.
*/
/*
* register number allocations. Note that these are all
* in the shadowed registers
*/
t0 = r1 /* temporary register 0 */
va = r8 /* virtual address for which the trap occurred */
t1 = r9 /* temporary register 1 */
pte = r16 /* pte/phys page # */
prot = r17 /* prot bits */
spc = r24 /* space for which the trap occurred */
ptp = r25 /* page directory/page table pointer */
#ifdef CONFIG_64BIT
dtlb_miss_20w:
space_adjust spc,va,t0
get_pgd spc,ptp
space_check spc,t0,dtlb_fault
L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
idtlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
dtlb_check_alias_20w:
do_alias spc,t0,t1,va,pte,prot,dtlb_fault,20
idtlbt pte,prot
rfir
nop
nadtlb_miss_20w:
space_adjust spc,va,t0
get_pgd spc,ptp
space_check spc,t0,nadtlb_fault
L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
idtlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
nadtlb_check_alias_20w:
do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,20
idtlbt pte,prot
rfir
nop
#else
dtlb_miss_11:
get_pgd spc,ptp
space_check spc,t0,dtlb_fault
L2_ptep ptp,pte,t0,va,dtlb_check_alias_11
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11
update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
idtlba pte,(%sr1,va)
idtlbp prot,(%sr1,va)
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mtsp t1, %sr1 /* Restore sr1 */
tlb_unlock1 spc,t0,t1
rfir
nop
dtlb_check_alias_11:
do_alias spc,t0,t1,va,pte,prot,dtlb_fault,11
idtlba pte,(va)
idtlbp prot,(va)
rfir
nop
nadtlb_miss_11:
get_pgd spc,ptp
space_check spc,t0,nadtlb_fault
L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11
update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
idtlba pte,(%sr1,va)
idtlbp prot,(%sr1,va)
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mtsp t1, %sr1 /* Restore sr1 */
tlb_unlock1 spc,t0,t1
rfir
nop
nadtlb_check_alias_11:
do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,11
idtlba pte,(va)
idtlbp prot,(va)
rfir
nop
dtlb_miss_20:
space_adjust spc,va,t0
get_pgd spc,ptp
space_check spc,t0,dtlb_fault
L2_ptep ptp,pte,t0,va,dtlb_check_alias_20
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
f_extend pte,t1
idtlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
dtlb_check_alias_20:
do_alias spc,t0,t1,va,pte,prot,dtlb_fault,20
idtlbt pte,prot
rfir
nop
nadtlb_miss_20:
get_pgd spc,ptp
space_check spc,t0,nadtlb_fault
L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
f_extend pte,t1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
idtlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
nadtlb_check_alias_20:
do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,20
idtlbt pte,prot
rfir
nop
#endif
nadtlb_emulate:
/*
* Non access misses can be caused by fdc,fic,pdc,lpa,probe and
* probei instructions. We don't want to fault for these
* instructions (not only does it not make sense, it can cause
* deadlocks, since some flushes are done with the mmap
* semaphore held). If the translation doesn't exist, we can't
* insert a translation, so have to emulate the side effects
* of the instruction. Since we don't insert a translation
* we can get a lot of faults during a flush loop, so it makes
* sense to try to do it here with minimum overhead. We only
* emulate fdc,fic,pdc,probew,prober instructions whose base
* and index registers are not shadowed. We defer everything
* else to the "slow" path.
*/
mfctl %cr19,%r9 /* Get iir */
/* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits.
Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */
/* Checks for fdc,fdce,pdc,"fic,4f" only */
ldi 0x280,%r16
and %r9,%r16,%r17
cmpb,<>,n %r16,%r17,nadtlb_probe_check
bb,>=,n %r9,26,nadtlb_nullify /* m bit not set, just nullify */
BL get_register,%r25
extrw,u %r9,15,5,%r8 /* Get index register # */
cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */
copy %r1,%r24
BL get_register,%r25
extrw,u %r9,10,5,%r8 /* Get base register # */
cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */
BL set_register,%r25
add,l %r1,%r24,%r1 /* doesn't affect c/b bits */
nadtlb_nullify:
mfctl %ipsw,%r8
ldil L%PSW_N,%r9
or %r8,%r9,%r8 /* Set PSW_N */
mtctl %r8,%ipsw
rfir
nop
/*
When there is no translation for the probe address then we
must nullify the insn and return zero in the target register.
This will indicate to the calling code that it does not have
write/read privileges to this address.
This should technically work for prober and probew in PA 1.1,
and also probe,r and probe,w in PA 2.0
WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN!
THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET.
*/
nadtlb_probe_check:
ldi 0x80,%r16
and %r9,%r16,%r17
cmpb,<>,n %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/
BL get_register,%r25 /* Find the target register */
extrw,u %r9,31,5,%r8 /* Get target register */
cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */
BL set_register,%r25
copy %r0,%r1 /* Write zero to target register */
b nadtlb_nullify /* Nullify return insn */
nop
#ifdef CONFIG_64BIT
itlb_miss_20w:
/*
* I miss is a little different, since we allow users to fault
* on the gateway page which is in the kernel address space.
*/
space_adjust spc,va,t0
get_pgd spc,ptp
space_check spc,t0,itlb_fault
L3_ptep ptp,pte,t0,va,itlb_fault
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,itlb_fault
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
iitlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
naitlb_miss_20w:
/*
* I miss is a little different, since we allow users to fault
* on the gateway page which is in the kernel address space.
*/
space_adjust spc,va,t0
get_pgd spc,ptp
space_check spc,t0,naitlb_fault
L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
iitlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
naitlb_check_alias_20w:
do_alias spc,t0,t1,va,pte,prot,naitlb_fault,20
iitlbt pte,prot
rfir
nop
#else
itlb_miss_11:
get_pgd spc,ptp
space_check spc,t0,itlb_fault
L2_ptep ptp,pte,t0,va,itlb_fault
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,itlb_fault
update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
iitlba pte,(%sr1,va)
iitlbp prot,(%sr1,va)
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mtsp t1, %sr1 /* Restore sr1 */
tlb_unlock1 spc,t0,t1
rfir
nop
naitlb_miss_11:
get_pgd spc,ptp
space_check spc,t0,naitlb_fault
L2_ptep ptp,pte,t0,va,naitlb_check_alias_11
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11
update_accessed ptp,pte,t0,t1
make_insert_tlb_11 spc,pte,prot
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
iitlba pte,(%sr1,va)
iitlbp prot,(%sr1,va)
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
mtsp t1, %sr1 /* Restore sr1 */
tlb_unlock1 spc,t0,t1
rfir
nop
naitlb_check_alias_11:
do_alias spc,t0,t1,va,pte,prot,itlb_fault,11
iitlba pte,(%sr0, va)
iitlbp prot,(%sr0, va)
rfir
nop
itlb_miss_20:
get_pgd spc,ptp
space_check spc,t0,itlb_fault
L2_ptep ptp,pte,t0,va,itlb_fault
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,itlb_fault
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
f_extend pte,t1
iitlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
naitlb_miss_20:
get_pgd spc,ptp
space_check spc,t0,naitlb_fault
L2_ptep ptp,pte,t0,va,naitlb_check_alias_20
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20
update_accessed ptp,pte,t0,t1
make_insert_tlb spc,pte,prot,t1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
f_extend pte,t1
iitlbt pte,prot
tlb_unlock1 spc,t0,t1
rfir
nop
naitlb_check_alias_20:
do_alias spc,t0,t1,va,pte,prot,naitlb_fault,20
iitlbt pte,prot
rfir
nop
#endif
#ifdef CONFIG_64BIT
dbit_trap_20w:
space_adjust spc,va,t0
get_pgd spc,ptp
space_check spc,t0,dbit_fault
L3_ptep ptp,pte,t0,va,dbit_fault
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,dbit_fault
update_dirty ptp,pte,t1
make_insert_tlb spc,pte,prot,t1
idtlbt pte,prot
tlb_unlock0 spc,t0,t1
rfir
nop
#else
dbit_trap_11:
get_pgd spc,ptp
space_check spc,t0,dbit_fault
L2_ptep ptp,pte,t0,va,dbit_fault
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,dbit_fault
update_dirty ptp,pte,t1
make_insert_tlb_11 spc,pte,prot
mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */
mtsp spc,%sr1
idtlba pte,(%sr1,va)
idtlbp prot,(%sr1,va)
mtsp t1, %sr1 /* Restore sr1 */
tlb_unlock0 spc,t0,t1
rfir
nop
dbit_trap_20:
get_pgd spc,ptp
space_check spc,t0,dbit_fault
L2_ptep ptp,pte,t0,va,dbit_fault
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
tlb_lock spc,ptp,pte,t0,t1,dbit_fault
update_dirty ptp,pte,t1
make_insert_tlb spc,pte,prot,t1
f_extend pte,t1
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
2015-07-02 04:18:37 +07:00
idtlbt pte,prot
tlb_unlock0 spc,t0,t1
rfir
nop
#endif
.import handle_interruption,code
kernel_bad_space:
b intr_save
ldi 31,%r8 /* Use an unused code */
dbit_fault:
b intr_save
ldi 20,%r8
itlb_fault:
b intr_save
ldi PARISC_ITLB_TRAP,%r8
nadtlb_fault:
b intr_save
ldi 17,%r8
naitlb_fault:
b intr_save
ldi 16,%r8
dtlb_fault:
b intr_save
ldi 15,%r8
/* Register saving semantics for system calls:
%r1 clobbered by system call macro in userspace
%r2 saved in PT_REGS by gateway page
%r3 - %r18 preserved by C code (saved by signal code)
%r19 - %r20 saved in PT_REGS by gateway page
%r21 - %r22 non-standard syscall args
stored in kernel stack by gateway page
%r23 - %r26 arg3-arg0, saved in PT_REGS by gateway page
%r27 - %r30 saved in PT_REGS by gateway page
%r31 syscall return pointer
*/
/* Floating point registers (FIXME: what do we do with these?)
%fr0 - %fr3 status/exception, not preserved
%fr4 - %fr7 arguments
%fr8 - %fr11 not preserved by C code
%fr12 - %fr21 preserved by C code
%fr22 - %fr31 not preserved by C code
*/
.macro reg_save regs
STREG %r3, PT_GR3(\regs)
STREG %r4, PT_GR4(\regs)
STREG %r5, PT_GR5(\regs)
STREG %r6, PT_GR6(\regs)
STREG %r7, PT_GR7(\regs)
STREG %r8, PT_GR8(\regs)
STREG %r9, PT_GR9(\regs)
STREG %r10,PT_GR10(\regs)
STREG %r11,PT_GR11(\regs)
STREG %r12,PT_GR12(\regs)
STREG %r13,PT_GR13(\regs)
STREG %r14,PT_GR14(\regs)
STREG %r15,PT_GR15(\regs)
STREG %r16,PT_GR16(\regs)
STREG %r17,PT_GR17(\regs)
STREG %r18,PT_GR18(\regs)
.endm
.macro reg_restore regs
LDREG PT_GR3(\regs), %r3
LDREG PT_GR4(\regs), %r4
LDREG PT_GR5(\regs), %r5
LDREG PT_GR6(\regs), %r6
LDREG PT_GR7(\regs), %r7
LDREG PT_GR8(\regs), %r8
LDREG PT_GR9(\regs), %r9
LDREG PT_GR10(\regs),%r10
LDREG PT_GR11(\regs),%r11
LDREG PT_GR12(\regs),%r12
LDREG PT_GR13(\regs),%r13
LDREG PT_GR14(\regs),%r14
LDREG PT_GR15(\regs),%r15
LDREG PT_GR16(\regs),%r16
LDREG PT_GR17(\regs),%r17
LDREG PT_GR18(\regs),%r18
.endm
.macro fork_like name
ENTRY_CFI(sys_\name\()_wrapper)
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
ldo TASK_REGS(%r1),%r1
reg_save %r1
mfctl %cr27, %r28
ldil L%sys_\name, %r31
be R%sys_\name(%sr4,%r31)
STREG %r28, PT_CR27(%r1)
ENDPROC_CFI(sys_\name\()_wrapper)
.endm
fork_like clone
fork_like clone3
fork_like fork
fork_like vfork
/* Set the return value for the child */
ENTRY(child_return)
BL schedule_tail, %r2
nop
finish_child_return:
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
ldo TASK_REGS(%r1),%r1 /* get pt regs */
LDREG PT_CR27(%r1), %r3
mtctl %r3, %cr27
reg_restore %r1
b syscall_exit
copy %r0,%r28
END(child_return)
ENTRY_CFI(sys_rt_sigreturn_wrapper)
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26
ldo TASK_REGS(%r26),%r26 /* get pt regs */
/* Don't save regs, we are going to restore them from sigcontext. */
STREG %r2, -RP_OFFSET(%r30)
#ifdef CONFIG_64BIT
ldo FRAME_SIZE(%r30), %r30
BL sys_rt_sigreturn,%r2
ldo -16(%r30),%r29 /* Reference param save area */
#else
BL sys_rt_sigreturn,%r2
ldo FRAME_SIZE(%r30), %r30
#endif
ldo -FRAME_SIZE(%r30), %r30
LDREG -RP_OFFSET(%r30), %r2
/* FIXME: I think we need to restore a few more things here. */
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
ldo TASK_REGS(%r1),%r1 /* get pt regs */
reg_restore %r1
/* If the signal was received while the process was blocked on a
* syscall, then r2 will take us to syscall_exit; otherwise r2 will
* take us to syscall_exit_rfi and on to intr_return.
*/
bv %r0(%r2)
LDREG PT_GR28(%r1),%r28 /* reload original r28 for syscall_exit */
ENDPROC_CFI(sys_rt_sigreturn_wrapper)
ENTRY(syscall_exit)
/* NOTE: Not all syscalls exit this way. rt_sigreturn will exit
* via syscall_exit_rfi if the signal was received while the process
* was running.
*/
/* save return value now */
mfctl %cr30, %r1
LDREG TI_TASK(%r1),%r1
STREG %r28,TASK_PT_GR28(%r1)
/* Seems to me that dp could be wrong here, if the syscall involved
* calling a module, and nothing got round to restoring dp on return.
*/
loadgp
syscall_check_resched:
/* check for reschedule */
LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 /* long */
bb,<,n %r19, 31-TIF_NEED_RESCHED, syscall_do_resched /* forward */
.import do_signal,code
syscall_check_sig:
LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19
ldi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), %r26
and,COND(<>) %r19, %r26, %r0
b,n syscall_restore /* skip past if we've nothing to do */
syscall_do_signal:
/* Save callee-save registers (for sigcontext).
* FIXME: After this point the process structure should be
* consistent with all the relevant state of the process
* before the syscall. We need to verify this.
*/
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
ldo TASK_REGS(%r1), %r26 /* struct pt_regs *regs */
reg_save %r26
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
BL do_notify_resume,%r2
ldi 1, %r25 /* long in_syscall = 1 */
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
ldo TASK_REGS(%r1), %r20 /* reload pt_regs */
reg_restore %r20
b,n syscall_check_sig
syscall_restore:
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
/* Are we being ptraced? */
ldw TASK_FLAGS(%r1),%r19
ldi _TIF_SYSCALL_TRACE_MASK,%r2
and,COND(=) %r19,%r2,%r0
b,n syscall_restore_rfi
ldo TASK_PT_FR31(%r1),%r19 /* reload fpregs */
rest_fp %r19
LDREG TASK_PT_SAR(%r1),%r19 /* restore SAR */
mtsar %r19
LDREG TASK_PT_GR2(%r1),%r2 /* restore user rp */
LDREG TASK_PT_GR19(%r1),%r19
LDREG TASK_PT_GR20(%r1),%r20
LDREG TASK_PT_GR21(%r1),%r21
LDREG TASK_PT_GR22(%r1),%r22
LDREG TASK_PT_GR23(%r1),%r23
LDREG TASK_PT_GR24(%r1),%r24
LDREG TASK_PT_GR25(%r1),%r25
LDREG TASK_PT_GR26(%r1),%r26
LDREG TASK_PT_GR27(%r1),%r27 /* restore user dp */
LDREG TASK_PT_GR28(%r1),%r28 /* syscall return value */
LDREG TASK_PT_GR29(%r1),%r29
LDREG TASK_PT_GR31(%r1),%r31 /* restore syscall rp */
/* NOTE: We use rsm/ssm pair to make this operation atomic */
LDREG TASK_PT_GR30(%r1),%r1 /* Get user sp */
rsm PSW_SM_I, %r0
copy %r1,%r30 /* Restore user sp */
mfsp %sr3,%r1 /* Get user space id */
mtsp %r1,%sr7 /* Restore sr7 */
ssm PSW_SM_I, %r0
/* Set sr2 to zero for userspace syscalls to work. */
mtsp %r0,%sr2
mtsp %r1,%sr4 /* Restore sr4 */
mtsp %r1,%sr5 /* Restore sr5 */
mtsp %r1,%sr6 /* Restore sr6 */
depi 3,31,2,%r31 /* ensure return to user mode. */
#ifdef CONFIG_64BIT
/* decide whether to reset the wide mode bit
*
* For a syscall, the W bit is stored in the lowest bit
* of sp. Extract it and reset W if it is zero */
extrd,u,*<> %r30,63,1,%r1
rsm PSW_SM_W, %r0
/* now reset the lowest bit of sp if it was set */
xor %r30,%r1,%r30
#endif
be,n 0(%sr3,%r31) /* return to user space */
/* We have to return via an RFI, so that PSW T and R bits can be set
* appropriately.
* This sets up pt_regs so we can return via intr_restore, which is not
* the most efficient way of doing things, but it works.
*/
syscall_restore_rfi:
ldo -1(%r0),%r2 /* Set recovery cntr to -1 */
mtctl %r2,%cr0 /* for immediate trap */
LDREG TASK_PT_PSW(%r1),%r2 /* Get old PSW */
ldi 0x0b,%r20 /* Create new PSW */
depi -1,13,1,%r20 /* C, Q, D, and I bits */
/* The values of SINGLESTEP_BIT and BLOCKSTEP_BIT are
* set in thread_info.h and converted to PA bitmap
* numbers in asm-offsets.c */
/* if ((%r19.SINGLESTEP_BIT)) { %r20.27=1} */
extru,= %r19,TIF_SINGLESTEP_PA_BIT,1,%r0
depi -1,27,1,%r20 /* R bit */
/* if ((%r19.BLOCKSTEP_BIT)) { %r20.7=1} */
extru,= %r19,TIF_BLOCKSTEP_PA_BIT,1,%r0
depi -1,7,1,%r20 /* T bit */
STREG %r20,TASK_PT_PSW(%r1)
/* Always store space registers, since sr3 can be changed (e.g. fork) */
mfsp %sr3,%r25
STREG %r25,TASK_PT_SR3(%r1)
STREG %r25,TASK_PT_SR4(%r1)
STREG %r25,TASK_PT_SR5(%r1)
STREG %r25,TASK_PT_SR6(%r1)
STREG %r25,TASK_PT_SR7(%r1)
STREG %r25,TASK_PT_IASQ0(%r1)
STREG %r25,TASK_PT_IASQ1(%r1)
/* XXX W bit??? */
/* Now if old D bit is clear, it means we didn't save all registers
* on syscall entry, so do that now. This only happens on TRACEME
* calls, or if someone attached to us while we were on a syscall.
* We could make this more efficient by not saving r3-r18, but
* then we wouldn't be able to use the common intr_restore path.
* It is only for traced processes anyway, so performance is not
* an issue.
*/
bb,< %r2,30,pt_regs_ok /* Branch if D set */
ldo TASK_REGS(%r1),%r25
reg_save %r25 /* Save r3 to r18 */
/* Save the current sr */
mfsp %sr0,%r2
STREG %r2,TASK_PT_SR0(%r1)
/* Save the scratch sr */
mfsp %sr1,%r2
STREG %r2,TASK_PT_SR1(%r1)
/* sr2 should be set to zero for userspace syscalls */
STREG %r0,TASK_PT_SR2(%r1)
LDREG TASK_PT_GR31(%r1),%r2
depi 3,31,2,%r2 /* ensure return to user mode. */
STREG %r2,TASK_PT_IAOQ0(%r1)
ldo 4(%r2),%r2
STREG %r2,TASK_PT_IAOQ1(%r1)
b intr_restore
copy %r25,%r16
pt_regs_ok:
LDREG TASK_PT_IAOQ0(%r1),%r2
depi 3,31,2,%r2 /* ensure return to user mode. */
STREG %r2,TASK_PT_IAOQ0(%r1)
LDREG TASK_PT_IAOQ1(%r1),%r2
depi 3,31,2,%r2
STREG %r2,TASK_PT_IAOQ1(%r1)
b intr_restore
copy %r25,%r16
syscall_do_resched:
load32 syscall_check_resched,%r2 /* if resched, we start over again */
load32 schedule,%r19
bv %r0(%r19) /* jumps to schedule() */
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#else
nop
#endif
END(syscall_exit)
#ifdef CONFIG_FUNCTION_TRACER
.import ftrace_function_trampoline,code
.align L1_CACHE_BYTES
ENTRY_CFI(mcount, caller)
_mcount:
.export _mcount,data
/*
* The 64bit mcount() function pointer needs 4 dwords, of which the
* first two are free. We optimize it here and put 2 instructions for
* calling mcount(), and 2 instructions for ftrace_stub(). That way we
* have all on one L1 cacheline.
*/
ldi 0, %arg3
b ftrace_function_trampoline
copy %r3, %arg2 /* caller original %sp */
ftrace_stub:
.globl ftrace_stub
.type ftrace_stub, @function
#ifdef CONFIG_64BIT
bve (%rp)
#else
bv %r0(%rp)
#endif
nop
#ifdef CONFIG_64BIT
.dword mcount
.dword 0 /* code in head.S puts value of global gp here */
#endif
ENDPROC_CFI(mcount)
parisc: add dynamic ftrace This patch implements dynamic ftrace for PA-RISC. The required mcount call sequences can get pretty long, so instead of patching the whole call sequence out of the functions, we are using -fpatchable-function-entry from gcc. This puts a configurable amount of NOPS before/at the start of the function. Taking do_sys_open() as example, which would look like this when the call is patched out: 1036b248: 08 00 02 40 nop 1036b24c: 08 00 02 40 nop 1036b250: 08 00 02 40 nop 1036b254: 08 00 02 40 nop 1036b258 <do_sys_open>: 1036b258: 08 00 02 40 nop 1036b25c: 08 03 02 41 copy r3,r1 1036b260: 6b c2 3f d9 stw rp,-14(sp) 1036b264: 08 1e 02 43 copy sp,r3 1036b268: 6f c1 01 00 stw,ma r1,80(sp) When ftrace gets enabled for this function the kernel will patch these NOPs to: 1036b248: 10 19 57 20 <address of ftrace> 1036b24c: 6f c1 00 80 stw,ma r1,40(sp) 1036b250: 48 21 3f d1 ldw -18(r1),r1 1036b254: e8 20 c0 02 bv,n r0(r1) 1036b258 <do_sys_open>: 1036b258: e8 3f 1f df b,l,n .-c,r1 1036b25c: 08 03 02 41 copy r3,r1 1036b260: 6b c2 3f d9 stw rp,-14(sp) 1036b264: 08 1e 02 43 copy sp,r3 1036b268: 6f c1 01 00 stw,ma r1,80(sp) So the first NOP in do_sys_open() will be patched to jump backwards into some minimal trampoline code which pushes a stackframe, saves r1 which holds the return address, loads the address of the real ftrace function, and branches to that location. For 64 Bit things are getting a bit more complicated (and longer) because we must make sure that the address of ftrace location is 8 byte aligned, and the offset passed to ldd for fetching the address is 8 byte aligned as well. Note that gcc has a bug which misplaces the function label, and needs a patch to make dynamic ftrace work. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90751 for details. Signed-off-by: Sven Schnelle <svens@stackframe.org> Signed-off-by: Helge Deller <deller@gmx.de>
2019-06-06 03:32:22 +07:00
#ifdef CONFIG_DYNAMIC_FTRACE
#ifdef CONFIG_64BIT
#define FTRACE_FRAME_SIZE (2*FRAME_SIZE)
#else
#define FTRACE_FRAME_SIZE FRAME_SIZE
#endif
ENTRY_CFI(ftrace_caller, caller,frame=FTRACE_FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
ftrace_caller:
.global ftrace_caller
STREG %r3, -FTRACE_FRAME_SIZE+1*REG_SZ(%sp)
ldo -FTRACE_FRAME_SIZE(%sp), %r3
STREG %rp, -RP_OFFSET(%r3)
/* Offset 0 is already allocated for %r1 */
STREG %r23, 2*REG_SZ(%r3)
STREG %r24, 3*REG_SZ(%r3)
STREG %r25, 4*REG_SZ(%r3)
STREG %r26, 5*REG_SZ(%r3)
STREG %r28, 6*REG_SZ(%r3)
STREG %r29, 7*REG_SZ(%r3)
#ifdef CONFIG_64BIT
STREG %r19, 8*REG_SZ(%r3)
STREG %r20, 9*REG_SZ(%r3)
STREG %r21, 10*REG_SZ(%r3)
STREG %r22, 11*REG_SZ(%r3)
STREG %r27, 12*REG_SZ(%r3)
STREG %r31, 13*REG_SZ(%r3)
loadgp
ldo -16(%sp),%r29
#endif
LDREG 0(%r3), %r25
copy %rp, %r26
ldo -8(%r25), %r25
ldi 0, %r23 /* no pt_regs */
parisc: add dynamic ftrace This patch implements dynamic ftrace for PA-RISC. The required mcount call sequences can get pretty long, so instead of patching the whole call sequence out of the functions, we are using -fpatchable-function-entry from gcc. This puts a configurable amount of NOPS before/at the start of the function. Taking do_sys_open() as example, which would look like this when the call is patched out: 1036b248: 08 00 02 40 nop 1036b24c: 08 00 02 40 nop 1036b250: 08 00 02 40 nop 1036b254: 08 00 02 40 nop 1036b258 <do_sys_open>: 1036b258: 08 00 02 40 nop 1036b25c: 08 03 02 41 copy r3,r1 1036b260: 6b c2 3f d9 stw rp,-14(sp) 1036b264: 08 1e 02 43 copy sp,r3 1036b268: 6f c1 01 00 stw,ma r1,80(sp) When ftrace gets enabled for this function the kernel will patch these NOPs to: 1036b248: 10 19 57 20 <address of ftrace> 1036b24c: 6f c1 00 80 stw,ma r1,40(sp) 1036b250: 48 21 3f d1 ldw -18(r1),r1 1036b254: e8 20 c0 02 bv,n r0(r1) 1036b258 <do_sys_open>: 1036b258: e8 3f 1f df b,l,n .-c,r1 1036b25c: 08 03 02 41 copy r3,r1 1036b260: 6b c2 3f d9 stw rp,-14(sp) 1036b264: 08 1e 02 43 copy sp,r3 1036b268: 6f c1 01 00 stw,ma r1,80(sp) So the first NOP in do_sys_open() will be patched to jump backwards into some minimal trampoline code which pushes a stackframe, saves r1 which holds the return address, loads the address of the real ftrace function, and branches to that location. For 64 Bit things are getting a bit more complicated (and longer) because we must make sure that the address of ftrace location is 8 byte aligned, and the offset passed to ldd for fetching the address is 8 byte aligned as well. Note that gcc has a bug which misplaces the function label, and needs a patch to make dynamic ftrace work. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90751 for details. Signed-off-by: Sven Schnelle <svens@stackframe.org> Signed-off-by: Helge Deller <deller@gmx.de>
2019-06-06 03:32:22 +07:00
b,l ftrace_function_trampoline, %rp
copy %r3, %r24
LDREG -RP_OFFSET(%r3), %rp
LDREG 2*REG_SZ(%r3), %r23
LDREG 3*REG_SZ(%r3), %r24
LDREG 4*REG_SZ(%r3), %r25
LDREG 5*REG_SZ(%r3), %r26
LDREG 6*REG_SZ(%r3), %r28
LDREG 7*REG_SZ(%r3), %r29
#ifdef CONFIG_64BIT
LDREG 8*REG_SZ(%r3), %r19
LDREG 9*REG_SZ(%r3), %r20
LDREG 10*REG_SZ(%r3), %r21
LDREG 11*REG_SZ(%r3), %r22
LDREG 12*REG_SZ(%r3), %r27
LDREG 13*REG_SZ(%r3), %r31
#endif
LDREG 1*REG_SZ(%r3), %r3
LDREGM -FTRACE_FRAME_SIZE(%sp), %r1
/* Adjust return point to jump back to beginning of traced function */
ldo -4(%r1), %r1
bv,n (%r1)
ENDPROC_CFI(ftrace_caller)
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS
ENTRY_CFI(ftrace_regs_caller,caller,frame=FTRACE_FRAME_SIZE+PT_SZ_ALGN,
CALLS,SAVE_RP,SAVE_SP)
ftrace_regs_caller:
.global ftrace_regs_caller
ldo -FTRACE_FRAME_SIZE(%sp), %r1
STREG %rp, -RP_OFFSET(%r1)
copy %sp, %r1
ldo PT_SZ_ALGN(%sp), %sp
STREG %rp, PT_GR2(%r1)
STREG %r3, PT_GR3(%r1)
STREG %r4, PT_GR4(%r1)
STREG %r5, PT_GR5(%r1)
STREG %r6, PT_GR6(%r1)
STREG %r7, PT_GR7(%r1)
STREG %r8, PT_GR8(%r1)
STREG %r9, PT_GR9(%r1)
STREG %r10, PT_GR10(%r1)
STREG %r11, PT_GR11(%r1)
STREG %r12, PT_GR12(%r1)
STREG %r13, PT_GR13(%r1)
STREG %r14, PT_GR14(%r1)
STREG %r15, PT_GR15(%r1)
STREG %r16, PT_GR16(%r1)
STREG %r17, PT_GR17(%r1)
STREG %r18, PT_GR18(%r1)
STREG %r19, PT_GR19(%r1)
STREG %r20, PT_GR20(%r1)
STREG %r21, PT_GR21(%r1)
STREG %r22, PT_GR22(%r1)
STREG %r23, PT_GR23(%r1)
STREG %r24, PT_GR24(%r1)
STREG %r25, PT_GR25(%r1)
STREG %r26, PT_GR26(%r1)
STREG %r27, PT_GR27(%r1)
STREG %r28, PT_GR28(%r1)
STREG %r29, PT_GR29(%r1)
STREG %r30, PT_GR30(%r1)
STREG %r31, PT_GR31(%r1)
mfctl %cr11, %r26
STREG %r26, PT_SAR(%r1)
copy %rp, %r26
LDREG -FTRACE_FRAME_SIZE-PT_SZ_ALGN(%sp), %r25
ldo -8(%r25), %r25
ldo -FTRACE_FRAME_SIZE(%r1), %arg2
b,l ftrace_function_trampoline, %rp
copy %r1, %arg3 /* struct pt_regs */
ldo -PT_SZ_ALGN(%sp), %r1
LDREG PT_SAR(%r1), %rp
mtctl %rp, %cr11
LDREG PT_GR2(%r1), %rp
LDREG PT_GR3(%r1), %r3
LDREG PT_GR4(%r1), %r4
LDREG PT_GR5(%r1), %r5
LDREG PT_GR6(%r1), %r6
LDREG PT_GR7(%r1), %r7
LDREG PT_GR8(%r1), %r8
LDREG PT_GR9(%r1), %r9
LDREG PT_GR10(%r1),%r10
LDREG PT_GR11(%r1),%r11
LDREG PT_GR12(%r1),%r12
LDREG PT_GR13(%r1),%r13
LDREG PT_GR14(%r1),%r14
LDREG PT_GR15(%r1),%r15
LDREG PT_GR16(%r1),%r16
LDREG PT_GR17(%r1),%r17
LDREG PT_GR18(%r1),%r18
LDREG PT_GR19(%r1),%r19
LDREG PT_GR20(%r1),%r20
LDREG PT_GR21(%r1),%r21
LDREG PT_GR22(%r1),%r22
LDREG PT_GR23(%r1),%r23
LDREG PT_GR24(%r1),%r24
LDREG PT_GR25(%r1),%r25
LDREG PT_GR26(%r1),%r26
LDREG PT_GR27(%r1),%r27
LDREG PT_GR28(%r1),%r28
LDREG PT_GR29(%r1),%r29
LDREG PT_GR30(%r1),%r30
LDREG PT_GR31(%r1),%r31
ldo -PT_SZ_ALGN(%sp), %sp
LDREGM -FTRACE_FRAME_SIZE(%sp), %r1
/* Adjust return point to jump back to beginning of traced function */
ldo -4(%r1), %r1
bv,n (%r1)
ENDPROC_CFI(ftrace_regs_caller)
#endif
parisc: add dynamic ftrace This patch implements dynamic ftrace for PA-RISC. The required mcount call sequences can get pretty long, so instead of patching the whole call sequence out of the functions, we are using -fpatchable-function-entry from gcc. This puts a configurable amount of NOPS before/at the start of the function. Taking do_sys_open() as example, which would look like this when the call is patched out: 1036b248: 08 00 02 40 nop 1036b24c: 08 00 02 40 nop 1036b250: 08 00 02 40 nop 1036b254: 08 00 02 40 nop 1036b258 <do_sys_open>: 1036b258: 08 00 02 40 nop 1036b25c: 08 03 02 41 copy r3,r1 1036b260: 6b c2 3f d9 stw rp,-14(sp) 1036b264: 08 1e 02 43 copy sp,r3 1036b268: 6f c1 01 00 stw,ma r1,80(sp) When ftrace gets enabled for this function the kernel will patch these NOPs to: 1036b248: 10 19 57 20 <address of ftrace> 1036b24c: 6f c1 00 80 stw,ma r1,40(sp) 1036b250: 48 21 3f d1 ldw -18(r1),r1 1036b254: e8 20 c0 02 bv,n r0(r1) 1036b258 <do_sys_open>: 1036b258: e8 3f 1f df b,l,n .-c,r1 1036b25c: 08 03 02 41 copy r3,r1 1036b260: 6b c2 3f d9 stw rp,-14(sp) 1036b264: 08 1e 02 43 copy sp,r3 1036b268: 6f c1 01 00 stw,ma r1,80(sp) So the first NOP in do_sys_open() will be patched to jump backwards into some minimal trampoline code which pushes a stackframe, saves r1 which holds the return address, loads the address of the real ftrace function, and branches to that location. For 64 Bit things are getting a bit more complicated (and longer) because we must make sure that the address of ftrace location is 8 byte aligned, and the offset passed to ldd for fetching the address is 8 byte aligned as well. Note that gcc has a bug which misplaces the function label, and needs a patch to make dynamic ftrace work. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90751 for details. Signed-off-by: Sven Schnelle <svens@stackframe.org> Signed-off-by: Helge Deller <deller@gmx.de>
2019-06-06 03:32:22 +07:00
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.align 8
ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE)
.export parisc_return_to_handler,data
parisc_return_to_handler:
copy %r3,%r1
STREG %r0,-RP_OFFSET(%sp) /* store 0 as %rp */
copy %sp,%r3
STREGM %r1,FRAME_SIZE(%sp)
STREG %ret0,8(%r3)
STREG %ret1,16(%r3)
#ifdef CONFIG_64BIT
loadgp
#endif
/* call ftrace_return_to_handler(0) */
.import ftrace_return_to_handler,code
load32 ftrace_return_to_handler,%ret0
load32 .Lftrace_ret,%r2
#ifdef CONFIG_64BIT
ldo -16(%sp),%ret1 /* Reference param save area */
bve (%ret0)
#else
bv %r0(%ret0)
#endif
ldi 0,%r26
.Lftrace_ret:
copy %ret0,%rp
/* restore original return values */
LDREG 8(%r3),%ret0
LDREG 16(%r3),%ret1
/* return from function */
#ifdef CONFIG_64BIT
bve (%rp)
#else
bv %r0(%rp)
#endif
LDREGM -FRAME_SIZE(%sp),%r3
ENDPROC_CFI(return_to_handler)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_IRQSTACKS
/* void call_on_stack(unsigned long param1, void *func,
unsigned long new_stack) */
ENTRY_CFI(call_on_stack, FRAME=2*FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
ENTRY(_call_on_stack)
copy %sp, %r1
/* Regarding the HPPA calling conventions for function pointers,
we assume the PIC register is not changed across call. For
CONFIG_64BIT, the argument pointer is left to point at the
argument region allocated for the call to call_on_stack. */
/* Switch to new stack. We allocate two frames. */
ldo 2*FRAME_SIZE(%arg2), %sp
# ifdef CONFIG_64BIT
/* Save previous stack pointer and return pointer in frame marker */
STREG %rp, -FRAME_SIZE-RP_OFFSET(%sp)
/* Calls always use function descriptor */
LDREG 16(%arg1), %arg1
bve,l (%arg1), %rp
STREG %r1, -FRAME_SIZE-REG_SZ(%sp)
LDREG -FRAME_SIZE-RP_OFFSET(%sp), %rp
bve (%rp)
LDREG -FRAME_SIZE-REG_SZ(%sp), %sp
# else
/* Save previous stack pointer and return pointer in frame marker */
STREG %r1, -FRAME_SIZE-REG_SZ(%sp)
STREG %rp, -FRAME_SIZE-RP_OFFSET(%sp)
/* Calls use function descriptor if PLABEL bit is set */
bb,>=,n %arg1, 30, 1f
depwi 0,31,2, %arg1
LDREG 0(%arg1), %arg1
1:
be,l 0(%sr4,%arg1), %sr0, %r31
copy %r31, %rp
LDREG -FRAME_SIZE-RP_OFFSET(%sp), %rp
bv (%rp)
LDREG -FRAME_SIZE-REG_SZ(%sp), %sp
# endif /* CONFIG_64BIT */
ENDPROC_CFI(call_on_stack)
#endif /* CONFIG_IRQSTACKS */
ENTRY_CFI(get_register)
/*
* get_register is used by the non access tlb miss handlers to
* copy the value of the general register specified in r8 into
* r1. This routine can't be used for shadowed registers, since
* the rfir will restore the original value. So, for the shadowed
* registers we put a -1 into r1 to indicate that the register
* should not be used (the register being copied could also have
* a -1 in it, but that is OK, it just means that we will have
* to use the slow path instead).
*/
blr %r8,%r0
nop
bv %r0(%r25) /* r0 */
copy %r0,%r1
bv %r0(%r25) /* r1 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r2 */
copy %r2,%r1
bv %r0(%r25) /* r3 */
copy %r3,%r1
bv %r0(%r25) /* r4 */
copy %r4,%r1
bv %r0(%r25) /* r5 */
copy %r5,%r1
bv %r0(%r25) /* r6 */
copy %r6,%r1
bv %r0(%r25) /* r7 */
copy %r7,%r1
bv %r0(%r25) /* r8 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r9 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r10 */
copy %r10,%r1
bv %r0(%r25) /* r11 */
copy %r11,%r1
bv %r0(%r25) /* r12 */
copy %r12,%r1
bv %r0(%r25) /* r13 */
copy %r13,%r1
bv %r0(%r25) /* r14 */
copy %r14,%r1
bv %r0(%r25) /* r15 */
copy %r15,%r1
bv %r0(%r25) /* r16 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r17 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r18 */
copy %r18,%r1
bv %r0(%r25) /* r19 */
copy %r19,%r1
bv %r0(%r25) /* r20 */
copy %r20,%r1
bv %r0(%r25) /* r21 */
copy %r21,%r1
bv %r0(%r25) /* r22 */
copy %r22,%r1
bv %r0(%r25) /* r23 */
copy %r23,%r1
bv %r0(%r25) /* r24 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r25 - shadowed */
ldi -1,%r1
bv %r0(%r25) /* r26 */
copy %r26,%r1
bv %r0(%r25) /* r27 */
copy %r27,%r1
bv %r0(%r25) /* r28 */
copy %r28,%r1
bv %r0(%r25) /* r29 */
copy %r29,%r1
bv %r0(%r25) /* r30 */
copy %r30,%r1
bv %r0(%r25) /* r31 */
copy %r31,%r1
ENDPROC_CFI(get_register)
ENTRY_CFI(set_register)
/*
* set_register is used by the non access tlb miss handlers to
* copy the value of r1 into the general register specified in
* r8.
*/
blr %r8,%r0
nop
bv %r0(%r25) /* r0 (silly, but it is a place holder) */
copy %r1,%r0
bv %r0(%r25) /* r1 */
copy %r1,%r1
bv %r0(%r25) /* r2 */
copy %r1,%r2
bv %r0(%r25) /* r3 */
copy %r1,%r3
bv %r0(%r25) /* r4 */
copy %r1,%r4
bv %r0(%r25) /* r5 */
copy %r1,%r5
bv %r0(%r25) /* r6 */
copy %r1,%r6
bv %r0(%r25) /* r7 */
copy %r1,%r7
bv %r0(%r25) /* r8 */
copy %r1,%r8
bv %r0(%r25) /* r9 */
copy %r1,%r9
bv %r0(%r25) /* r10 */
copy %r1,%r10
bv %r0(%r25) /* r11 */
copy %r1,%r11
bv %r0(%r25) /* r12 */
copy %r1,%r12
bv %r0(%r25) /* r13 */
copy %r1,%r13
bv %r0(%r25) /* r14 */
copy %r1,%r14
bv %r0(%r25) /* r15 */
copy %r1,%r15
bv %r0(%r25) /* r16 */
copy %r1,%r16
bv %r0(%r25) /* r17 */
copy %r1,%r17
bv %r0(%r25) /* r18 */
copy %r1,%r18
bv %r0(%r25) /* r19 */
copy %r1,%r19
bv %r0(%r25) /* r20 */
copy %r1,%r20
bv %r0(%r25) /* r21 */
copy %r1,%r21
bv %r0(%r25) /* r22 */
copy %r1,%r22
bv %r0(%r25) /* r23 */
copy %r1,%r23
bv %r0(%r25) /* r24 */
copy %r1,%r24
bv %r0(%r25) /* r25 */
copy %r1,%r25
bv %r0(%r25) /* r26 */
copy %r1,%r26
bv %r0(%r25) /* r27 */
copy %r1,%r27
bv %r0(%r25) /* r28 */
copy %r1,%r28
bv %r0(%r25) /* r29 */
copy %r1,%r29
bv %r0(%r25) /* r30 */
copy %r1,%r30
bv %r0(%r25) /* r31 */
copy %r1,%r31
ENDPROC_CFI(set_register)