linux_dsm_epyc7002/arch/sparc/mm/ultra.S
David S. Miller a74ad5e660 sparc64: Handle extremely large kernel TLB range flushes more gracefully.
When the vmalloc area gets fragmented, and because the firmware
mapping area sits between where modules live and the vmalloc area, we
can sometimes receive requests for enormous kernel TLB range flushes.

When this happens the cpu just spins flushing billions of pages and
this triggers the NMI watchdog and other problems.

We took care of this on the TSB side by doing a linear scan of the
table once we pass a certain threshold.

Do something similar for the TLB flush, however we are limited by
the TLB flush facilities provided by the different chip variants.

First of all we use an (mostly arbitrary) cut-off of 256K which is
about 32 pages.  This can be tuned in the future.

The huge range code path for each chip works as follows:

1) On spitfire we flush all non-locked TLB entries using diagnostic
   acceses.

2) On cheetah we use the "flush all" TLB flush.

3) On sun4v/hypervisor we do a TLB context flush on context 0, which
   unlike previous chips does not remove "permanent" or locked
   entries.

We could probably do something better on spitfire, such as limiting
the flush to kernel TLB entries or even doing range comparisons.
However that probably isn't worth it since those chips are old and
the TLB only had 64 entries.

Reported-by: James Clarke <jrtc27@jrtc27.com>
Tested-by: James Clarke <jrtc27@jrtc27.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-27 09:11:05 -07:00

1107 lines
25 KiB
ArmAsm

/*
* ultra.S: Don't expand these all over the place...
*
* Copyright (C) 1997, 2000, 2008 David S. Miller (davem@davemloft.net)
*/
#include <asm/asi.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/spitfire.h>
#include <asm/mmu_context.h>
#include <asm/mmu.h>
#include <asm/pil.h>
#include <asm/head.h>
#include <asm/thread_info.h>
#include <asm/cacheflush.h>
#include <asm/hypervisor.h>
#include <asm/cpudata.h>
/* Basically, most of the Spitfire vs. Cheetah madness
* has to do with the fact that Cheetah does not support
* IMMU flushes out of the secondary context. Someone needs
* to throw a south lake birthday party for the folks
* in Microelectronics who refused to fix this shit.
*/
/* This file is meant to be read efficiently by the CPU, not humans.
* Staraj sie tego nikomu nie pierdolnac...
*/
.text
.align 32
.globl __flush_tlb_mm
__flush_tlb_mm: /* 19 insns */
/* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0
bne,pn %icc, __spitfire_flush_tlb_mm_slow
mov 0x50, %g3
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
sethi %hi(KERNBASE), %g3
flush %g3
retl
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
.align 32
.globl __flush_tlb_page
__flush_tlb_page: /* 22 insns */
/* %o0 = context, %o1 = vaddr */
rdpr %pstate, %g7
andn %g7, PSTATE_IE, %g2
wrpr %g2, %pstate
mov SECONDARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
stxa %o0, [%o4] ASI_DMMU
andcc %o1, 1, %g0
andn %o1, 1, %o3
be,pn %icc, 1f
or %o3, 0x10, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
1: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
stxa %g2, [%o4] ASI_DMMU
sethi %hi(KERNBASE), %o4
flush %o4
retl
wrpr %g7, 0x0, %pstate
nop
nop
nop
nop
.align 32
.globl __flush_tlb_pending
__flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
andn %g7, PSTATE_IE, %g2
wrpr %g2, %pstate
mov SECONDARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
stxa %o0, [%o4] ASI_DMMU
1: sub %o1, (1 << 3), %o1
ldx [%o2 + %o1], %o3
andcc %o3, 1, %g0
andn %o3, 1, %o3
be,pn %icc, 2f
or %o3, 0x10, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
brnz,pt %o1, 1b
nop
stxa %g2, [%o4] ASI_DMMU
sethi %hi(KERNBASE), %o4
flush %o4
retl
wrpr %g7, 0x0, %pstate
nop
nop
nop
nop
.align 32
.globl __flush_tlb_kernel_range
__flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
sub %o1, %o0, %o3
srlx %o3, 18, %o4
brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
sethi %hi(PAGE_SIZE), %o4
sub %o3, %o4, %o3
or %o0, 0x20, %o0 ! Nucleus
1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %o3, 1b
sub %o3, %o4, %o3
2: sethi %hi(KERNBASE), %o3
flush %o3
retl
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
__spitfire_flush_tlb_kernel_range_slow:
mov 63 * 8, %o4
1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %o3
stxa %g0, [%o3] ASI_IMMU
stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
membar #Sync
2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
andcc %o3, 0x40, %g0
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %o3
stxa %g0, [%o3] ASI_DMMU
stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
membar #Sync
2: sub %o4, 8, %o4
brgez,pt %o4, 1b
nop
retl
nop
__spitfire_flush_tlb_mm_slow:
rdpr %pstate, %g1
wrpr %g1, PSTATE_IE, %pstate
stxa %o0, [%o1] ASI_DMMU
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
flush %g6
stxa %g2, [%o1] ASI_DMMU
sethi %hi(KERNBASE), %o1
flush %o1
retl
wrpr %g1, 0, %pstate
/*
* The following code flushes one page_size worth.
*/
.section .kprobes.text, "ax"
.align 32
.globl __flush_icache_page
__flush_icache_page: /* %o0 = phys_page */
srlx %o0, PAGE_SHIFT, %o0
sethi %hi(PAGE_OFFSET), %g1
sllx %o0, PAGE_SHIFT, %o0
sethi %hi(PAGE_SIZE), %g2
ldx [%g1 + %lo(PAGE_OFFSET)], %g1
add %o0, %g1, %o0
1: subcc %g2, 32, %g2
bne,pt %icc, 1b
flush %o0 + %g2
retl
nop
#ifdef DCACHE_ALIASING_POSSIBLE
#if (PAGE_SHIFT != 13)
#error only page shift of 13 is supported by dcache flush
#endif
#define DTAG_MASK 0x3
/* This routine is Spitfire specific so the hardcoded
* D-cache size and line-size are OK.
*/
.align 64
.globl __flush_dcache_page
__flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */
sethi %hi(PAGE_OFFSET), %g1
ldx [%g1 + %lo(PAGE_OFFSET)], %g1
sub %o0, %g1, %o0 ! physical address
srlx %o0, 11, %o0 ! make D-cache TAG
sethi %hi(1 << 14), %o2 ! D-cache size
sub %o2, (1 << 5), %o2 ! D-cache line size
1: ldxa [%o2] ASI_DCACHE_TAG, %o3 ! load D-cache TAG
andcc %o3, DTAG_MASK, %g0 ! Valid?
be,pn %xcc, 2f ! Nope, branch
andn %o3, DTAG_MASK, %o3 ! Clear valid bits
cmp %o3, %o0 ! TAG match?
bne,pt %xcc, 2f ! Nope, branch
nop
stxa %g0, [%o2] ASI_DCACHE_TAG ! Invalidate TAG
membar #Sync
2: brnz,pt %o2, 1b
sub %o2, (1 << 5), %o2 ! D-cache line size
/* The I-cache does not snoop local stores so we
* better flush that too when necessary.
*/
brnz,pt %o1, __flush_icache_page
sllx %o0, 11, %o0
retl
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
.previous
/* Cheetah specific versions, patched at boot time. */
__cheetah_flush_tlb_mm: /* 19 insns */
rdpr %pstate, %g7
andn %g7, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o2
mov 0x40, %g3
ldxa [%o2] ASI_DMMU, %g2
srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o1
sllx %o1, CTX_PGSZ1_NUC_SHIFT, %o1
or %o0, %o1, %o0 /* Preserve nucleus page size fields */
stxa %o0, [%o2] ASI_DMMU
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
stxa %g2, [%o2] ASI_DMMU
sethi %hi(KERNBASE), %o2
flush %o2
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
__cheetah_flush_tlb_page: /* 22 insns */
/* %o0 = context, %o1 = vaddr */
rdpr %pstate, %g7
andn %g7, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3
sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3
or %o0, %o3, %o0 /* Preserve nucleus page size fields */
stxa %o0, [%o4] ASI_DMMU
andcc %o1, 1, %g0
be,pn %icc, 1f
andn %o1, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
1: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
stxa %g2, [%o4] ASI_DMMU
sethi %hi(KERNBASE), %o4
flush %o4
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
__cheetah_flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
andn %g7, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3
sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3
or %o0, %o3, %o0 /* Preserve nucleus page size fields */
stxa %o0, [%o4] ASI_DMMU
1: sub %o1, (1 << 3), %o1
ldx [%o2 + %o1], %o3
andcc %o3, 1, %g0
be,pn %icc, 2f
andn %o3, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
brnz,pt %o1, 1b
nop
stxa %g2, [%o4] ASI_DMMU
sethi %hi(KERNBASE), %o4
flush %o4
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
__cheetah_flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
sub %o1, %o0, %o3
srlx %o3, 18, %o4
brnz,pn %o4, 3f
sethi %hi(PAGE_SIZE), %o4
sub %o3, %o4, %o3
or %o0, 0x20, %o0 ! Nucleus
1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %o3, 1b
sub %o3, %o4, %o3
2: sethi %hi(KERNBASE), %o3
flush %o3
retl
nop
3: mov 0x80, %o4
stxa %g0, [%o4] ASI_DMMU_DEMAP
membar #Sync
stxa %g0, [%o4] ASI_IMMU_DEMAP
membar #Sync
retl
nop
nop
nop
nop
nop
nop
nop
nop
#ifdef DCACHE_ALIASING_POSSIBLE
__cheetah_flush_dcache_page: /* 11 insns */
sethi %hi(PAGE_OFFSET), %g1
ldx [%g1 + %lo(PAGE_OFFSET)], %g1
sub %o0, %g1, %o0
sethi %hi(PAGE_SIZE), %o4
1: subcc %o4, (1 << 5), %o4
stxa %g0, [%o0 + %o4] ASI_DCACHE_INVALIDATE
membar #Sync
bne,pt %icc, 1b
nop
retl /* I-cache flush never needed on Cheetah, see callers. */
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
/* Hypervisor specific versions, patched at boot time. */
__hypervisor_tlb_tl0_error:
save %sp, -192, %sp
mov %i0, %o0
call hypervisor_tlbop_error
mov %i1, %o1
ret
restore
__hypervisor_flush_tlb_mm: /* 19 insns */
mov %o0, %o2 /* ARG2: mmu context */
mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
brnz,pn %o0, 1f
mov HV_FAST_MMU_DEMAP_CTX, %o1
retl
nop
1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
nop
nop
nop
nop
nop
nop
__hypervisor_flush_tlb_page: /* 22 insns */
/* %o0 = context, %o1 = vaddr */
mov %o0, %g2
mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
mov %g2, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP
brnz,pn %o0, 1f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1
retl
nop
1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
nop
nop
nop
nop
nop
nop
nop
nop
__hypervisor_flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
sllx %o1, 3, %g1
mov %o2, %g2
mov %o0, %g3
1: sub %g1, (1 << 3), %g1
ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */
mov %g3, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP
brnz,pn %o0, 1f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1
brnz,pt %g1, 1b
nop
retl
nop
1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
nop
nop
nop
nop
nop
nop
nop
nop
__hypervisor_flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
sub %o1, %o0, %g2
srlx %g2, 18, %g3
brnz,pn %g3, 4f
mov %o0, %g1
sethi %hi(PAGE_SIZE), %g3
sub %g2, %g3, %g2
1: add %g1, %g2, %o0 /* ARG0: virtual address */
mov 0, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
ta HV_MMU_UNMAP_ADDR_TRAP
brnz,pn %o0, 3f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1
brnz,pt %g2, 1b
sub %g2, %g3, %g2
2: retl
nop
3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov 0, %o2 /* ARG2: mmu context == nucleus */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
brnz,pn %o0, 3b
mov HV_FAST_MMU_DEMAP_CTX, %o1
retl
nop
#ifdef DCACHE_ALIASING_POSSIBLE
/* XXX Niagara and friends have an 8K cache, so no aliasing is
* XXX possible, but nothing explicit in the Hypervisor API
* XXX guarantees this.
*/
__hypervisor_flush_dcache_page: /* 2 insns */
retl
nop
#endif
tlb_patch_one:
1: lduw [%o1], %g1
stw %g1, [%o0]
flush %o0
subcc %o2, 1, %o2
add %o1, 4, %o1
bne,pt %icc, 1b
add %o0, 4, %o0
retl
nop
#ifdef CONFIG_SMP
/* These are all called by the slaves of a cross call, at
* trap level 1, with interrupts fully disabled.
*
* Register usage:
* %g5 mm->context (all tlb flushes)
* %g1 address arg 1 (tlb page and range flushes)
* %g7 address arg 2 (tlb range flush only)
*
* %g6 scratch 1
* %g2 scratch 2
* %g3 scratch 3
* %g4 scratch 4
*/
.align 32
.globl xcall_flush_tlb_mm
xcall_flush_tlb_mm: /* 24 insns */
mov PRIMARY_CONTEXT, %g2
ldxa [%g2] ASI_DMMU, %g3
srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
sllx %g4, CTX_PGSZ1_NUC_SHIFT, %g4
or %g5, %g4, %g5 /* Preserve nucleus page size fields */
stxa %g5, [%g2] ASI_DMMU
mov 0x40, %g4
stxa %g0, [%g4] ASI_DMMU_DEMAP
stxa %g0, [%g4] ASI_IMMU_DEMAP
stxa %g3, [%g2] ASI_DMMU
retry
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
.globl xcall_flush_tlb_page
xcall_flush_tlb_page: /* 20 insns */
/* %g5=context, %g1=vaddr */
mov PRIMARY_CONTEXT, %g4
ldxa [%g4] ASI_DMMU, %g2
srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4
sllx %g4, CTX_PGSZ1_NUC_SHIFT, %g4
or %g5, %g4, %g5
mov PRIMARY_CONTEXT, %g4
stxa %g5, [%g4] ASI_DMMU
andcc %g1, 0x1, %g0
be,pn %icc, 2f
andn %g1, 0x1, %g5
stxa %g0, [%g5] ASI_IMMU_DEMAP
2: stxa %g0, [%g5] ASI_DMMU_DEMAP
membar #Sync
stxa %g2, [%g4] ASI_DMMU
retry
nop
nop
nop
nop
nop
.globl xcall_flush_tlb_kernel_range
xcall_flush_tlb_kernel_range: /* 44 insns */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
srlx %g3, 18, %g2
brnz,pn %g2, 2f
add %g2, 1, %g2
sub %g3, %g2, %g3
or %g1, 0x20, %g1 ! Nucleus
1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %g3, 1b
sub %g3, %g2, %g3
retry
2: mov 63 * 8, %g1
1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %g2
stxa %g0, [%g2] ASI_IMMU
stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
membar #Sync
2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
andcc %g2, 0x40, %g0
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %g2
stxa %g0, [%g2] ASI_DMMU
stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
membar #Sync
2: sub %g1, 8, %g1
brgez,pt %g1, 1b
nop
retry
nop
nop
nop
nop
nop
nop
nop
nop
nop
/* This runs in a very controlled environment, so we do
* not need to worry about BH races etc.
*/
.globl xcall_sync_tick
xcall_sync_tick:
661: rdpr %pstate, %g2
wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
.section .sun4v_2insn_patch, "ax"
.word 661b
nop
nop
.previous
rdpr %pil, %g2
wrpr %g0, PIL_NORMAL_MAX, %pil
sethi %hi(109f), %g7
b,pt %xcc, etrap_irq
109: or %g7, %lo(109b), %g7
#ifdef CONFIG_TRACE_IRQFLAGS
call trace_hardirqs_off
nop
#endif
call smp_synchronize_tick_client
nop
b rtrap_xcall
ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
.globl xcall_fetch_glob_regs
xcall_fetch_glob_regs:
sethi %hi(global_cpu_snapshot), %g1
or %g1, %lo(global_cpu_snapshot), %g1
__GET_CPUID(%g2)
sllx %g2, 6, %g3
add %g1, %g3, %g1
rdpr %tstate, %g7
stx %g7, [%g1 + GR_SNAP_TSTATE]
rdpr %tpc, %g7
stx %g7, [%g1 + GR_SNAP_TPC]
rdpr %tnpc, %g7
stx %g7, [%g1 + GR_SNAP_TNPC]
stx %o7, [%g1 + GR_SNAP_O7]
stx %i7, [%g1 + GR_SNAP_I7]
/* Don't try this at home kids... */
rdpr %cwp, %g3
sub %g3, 1, %g7
wrpr %g7, %cwp
mov %i7, %g7
wrpr %g3, %cwp
stx %g7, [%g1 + GR_SNAP_RPC]
sethi %hi(trap_block), %g7
or %g7, %lo(trap_block), %g7
sllx %g2, TRAP_BLOCK_SZ_SHIFT, %g2
add %g7, %g2, %g7
ldx [%g7 + TRAP_PER_CPU_THREAD], %g3
stx %g3, [%g1 + GR_SNAP_THREAD]
retry
.globl xcall_fetch_glob_pmu
xcall_fetch_glob_pmu:
sethi %hi(global_cpu_snapshot), %g1
or %g1, %lo(global_cpu_snapshot), %g1
__GET_CPUID(%g2)
sllx %g2, 6, %g3
add %g1, %g3, %g1
rd %pic, %g7
stx %g7, [%g1 + (4 * 8)]
rd %pcr, %g7
stx %g7, [%g1 + (0 * 8)]
retry
.globl xcall_fetch_glob_pmu_n4
xcall_fetch_glob_pmu_n4:
sethi %hi(global_cpu_snapshot), %g1
or %g1, %lo(global_cpu_snapshot), %g1
__GET_CPUID(%g2)
sllx %g2, 6, %g3
add %g1, %g3, %g1
ldxa [%g0] ASI_PIC, %g7
stx %g7, [%g1 + (4 * 8)]
mov 0x08, %g3
ldxa [%g3] ASI_PIC, %g7
stx %g7, [%g1 + (5 * 8)]
mov 0x10, %g3
ldxa [%g3] ASI_PIC, %g7
stx %g7, [%g1 + (6 * 8)]
mov 0x18, %g3
ldxa [%g3] ASI_PIC, %g7
stx %g7, [%g1 + (7 * 8)]
mov %o0, %g2
mov %o1, %g3
mov %o5, %g7
mov HV_FAST_VT_GET_PERFREG, %o5
mov 3, %o0
ta HV_FAST_TRAP
stx %o1, [%g1 + (3 * 8)]
mov HV_FAST_VT_GET_PERFREG, %o5
mov 2, %o0
ta HV_FAST_TRAP
stx %o1, [%g1 + (2 * 8)]
mov HV_FAST_VT_GET_PERFREG, %o5
mov 1, %o0
ta HV_FAST_TRAP
stx %o1, [%g1 + (1 * 8)]
mov HV_FAST_VT_GET_PERFREG, %o5
mov 0, %o0
ta HV_FAST_TRAP
stx %o1, [%g1 + (0 * 8)]
mov %g2, %o0
mov %g3, %o1
mov %g7, %o5
retry
__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
srlx %g3, 18, %g2
brnz,pn %g2, 2f
add %g2, 1, %g2
sub %g3, %g2, %g3
or %g1, 0x20, %g1 ! Nucleus
1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %g3, 1b
sub %g3, %g2, %g3
retry
2: mov 0x80, %g2
stxa %g0, [%g2] ASI_DMMU_DEMAP
membar #Sync
stxa %g0, [%g2] ASI_IMMU_DEMAP
membar #Sync
retry
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
#ifdef DCACHE_ALIASING_POSSIBLE
.align 32
.globl xcall_flush_dcache_page_cheetah
xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */
sethi %hi(PAGE_SIZE), %g3
1: subcc %g3, (1 << 5), %g3
stxa %g0, [%g1 + %g3] ASI_DCACHE_INVALIDATE
membar #Sync
bne,pt %icc, 1b
nop
retry
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
.globl xcall_flush_dcache_page_spitfire
xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
%g7 == kernel page virtual address
%g5 == (page->mapping != NULL) */
#ifdef DCACHE_ALIASING_POSSIBLE
srlx %g1, (13 - 2), %g1 ! Form tag comparitor
sethi %hi(L1DCACHE_SIZE), %g3 ! D$ size == 16K
sub %g3, (1 << 5), %g3 ! D$ linesize == 32
1: ldxa [%g3] ASI_DCACHE_TAG, %g2
andcc %g2, 0x3, %g0
be,pn %xcc, 2f
andn %g2, 0x3, %g2
cmp %g2, %g1
bne,pt %xcc, 2f
nop
stxa %g0, [%g3] ASI_DCACHE_TAG
membar #Sync
2: cmp %g3, 0
bne,pt %xcc, 1b
sub %g3, (1 << 5), %g3
brz,pn %g5, 2f
#endif /* DCACHE_ALIASING_POSSIBLE */
sethi %hi(PAGE_SIZE), %g3
1: flush %g7
subcc %g3, (1 << 5), %g3
bne,pt %icc, 1b
add %g7, (1 << 5), %g7
2: retry
nop
nop
/* %g5: error
* %g6: tlb op
*/
__hypervisor_tlb_xcall_error:
mov %g5, %g4
mov %g6, %g5
ba,pt %xcc, etrap
rd %pc, %g7
mov %l4, %o0
call hypervisor_tlbop_error_xcall
mov %l5, %o1
ba,a,pt %xcc, rtrap
.globl __hypervisor_xcall_flush_tlb_mm
__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
/* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
mov %o0, %g2
mov %o1, %g3
mov %o2, %g4
mov %o3, %g1
mov %o5, %g7
clr %o0 /* ARG0: CPU lists unimplemented */
clr %o1 /* ARG1: CPU lists unimplemented */
mov %g5, %o2 /* ARG2: mmu context */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
mov HV_FAST_MMU_DEMAP_CTX, %g6
brnz,pn %o0, 1f
mov %o0, %g5
mov %g2, %o0
mov %g3, %o1
mov %g4, %o2
mov %g1, %o3
mov %g7, %o5
membar #Sync
retry
1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
nop
.globl __hypervisor_xcall_flush_tlb_page
__hypervisor_xcall_flush_tlb_page: /* 20 insns */
/* %g5=ctx, %g1=vaddr */
mov %o0, %g2
mov %o1, %g3
mov %o2, %g4
mov %g1, %o0 /* ARG0: virtual address */
mov %g5, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP
mov HV_MMU_UNMAP_ADDR_TRAP, %g6
brnz,a,pn %o0, 1f
mov %o0, %g5
mov %g2, %o0
mov %g3, %o1
mov %g4, %o2
membar #Sync
retry
1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
nop
.globl __hypervisor_xcall_flush_tlb_kernel_range
__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
/* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
srlx %g3, 18, %g7
add %g2, 1, %g2
sub %g3, %g2, %g3
mov %o0, %g2
mov %o1, %g4
brnz,pn %g7, 2f
mov %o2, %g7
1: add %g1, %g3, %o0 /* ARG0: virtual address */
mov 0, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */
ta HV_MMU_UNMAP_ADDR_TRAP
mov HV_MMU_UNMAP_ADDR_TRAP, %g6
brnz,pn %o0, 1f
mov %o0, %g5
sethi %hi(PAGE_SIZE), %o2
brnz,pt %g3, 1b
sub %g3, %o2, %g3
5: mov %g2, %o0
mov %g4, %o1
mov %g7, %o2
membar #Sync
retry
1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
nop
2: mov %o3, %g1
mov %o5, %g3
mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov 0, %o2 /* ARG2: mmu context == nucleus */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
mov %g1, %o3
brz,pt %o0, 5b
mov %g3, %o5
mov HV_FAST_MMU_DEMAP_CTX, %g6
ba,pt %xcc, 1b
clr %g5
/* These just get rescheduled to PIL vectors. */
.globl xcall_call_function
xcall_call_function:
wr %g0, (1 << PIL_SMP_CALL_FUNC), %set_softint
retry
.globl xcall_call_function_single
xcall_call_function_single:
wr %g0, (1 << PIL_SMP_CALL_FUNC_SNGL), %set_softint
retry
.globl xcall_receive_signal
xcall_receive_signal:
wr %g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint
retry
.globl xcall_capture
xcall_capture:
wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
retry
.globl xcall_new_mmu_context_version
xcall_new_mmu_context_version:
wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
retry
#ifdef CONFIG_KGDB
.globl xcall_kgdb_capture
xcall_kgdb_capture:
wr %g0, (1 << PIL_KGDB_CAPTURE), %set_softint
retry
#endif
#endif /* CONFIG_SMP */
.globl cheetah_patch_cachetlbops
cheetah_patch_cachetlbops:
save %sp, -128, %sp
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
or %o1, %lo(__cheetah_flush_tlb_mm), %o1
call tlb_patch_one
mov 19, %o2
sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__cheetah_flush_tlb_page), %o1
or %o1, %lo(__cheetah_flush_tlb_page), %o1
call tlb_patch_one
mov 22, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__cheetah_flush_tlb_pending), %o1
or %o1, %lo(__cheetah_flush_tlb_pending), %o1
call tlb_patch_one
mov 27, %o2
sethi %hi(__flush_tlb_kernel_range), %o0
or %o0, %lo(__flush_tlb_kernel_range), %o0
sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
call tlb_patch_one
mov 31, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
sethi %hi(__cheetah_flush_dcache_page), %o1
or %o1, %lo(__cheetah_flush_dcache_page), %o1
call tlb_patch_one
mov 11, %o2
#endif /* DCACHE_ALIASING_POSSIBLE */
#ifdef CONFIG_SMP
sethi %hi(xcall_flush_tlb_kernel_range), %o0
or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
call tlb_patch_one
mov 44, %o2
#endif /* CONFIG_SMP */
ret
restore
.globl hypervisor_patch_cachetlbops
hypervisor_patch_cachetlbops:
save %sp, -128, %sp
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__hypervisor_flush_tlb_mm), %o1
or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
call tlb_patch_one
mov 19, %o2
sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__hypervisor_flush_tlb_page), %o1
or %o1, %lo(__hypervisor_flush_tlb_page), %o1
call tlb_patch_one
mov 22, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__hypervisor_flush_tlb_pending), %o1
or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
call tlb_patch_one
mov 27, %o2
sethi %hi(__flush_tlb_kernel_range), %o0
or %o0, %lo(__flush_tlb_kernel_range), %o0
sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
call tlb_patch_one
mov 31, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
sethi %hi(__hypervisor_flush_dcache_page), %o1
or %o1, %lo(__hypervisor_flush_dcache_page), %o1
call tlb_patch_one
mov 2, %o2
#endif /* DCACHE_ALIASING_POSSIBLE */
#ifdef CONFIG_SMP
sethi %hi(xcall_flush_tlb_mm), %o0
or %o0, %lo(xcall_flush_tlb_mm), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
call tlb_patch_one
mov 24, %o2
sethi %hi(xcall_flush_tlb_page), %o0
or %o0, %lo(xcall_flush_tlb_page), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
call tlb_patch_one
mov 20, %o2
sethi %hi(xcall_flush_tlb_kernel_range), %o0
or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
call tlb_patch_one
mov 44, %o2
#endif /* CONFIG_SMP */
ret
restore