linux_dsm_epyc7002/arch/arm/mm/proc-macros.S

378 lines
8.7 KiB
ArmAsm
Raw Normal View History

/*
* We need constants.h for:
* VMA_VM_MM
* VMA_VM_FLAGS
* VM_EXEC
*/
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#ifdef CONFIG_CPU_V7M
#include <asm/v7m.h>
#endif
/*
* vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
*/
.macro vma_vm_mm, rd, rn
ldr \rd, [\rn, #VMA_VM_MM]
.endm
/*
* vma_vm_flags - get vma->vm_flags
*/
.macro vma_vm_flags, rd, rn
ldr \rd, [\rn, #VMA_VM_FLAGS]
.endm
.macro tsk_mm, rd, rn
ldr \rd, [\rn, #TI_TASK]
ldr \rd, [\rd, #TSK_ACTIVE_MM]
.endm
/*
* act_mm - get current->active_mm
*/
.macro act_mm, rd
bic \rd, sp, #8128
bic \rd, \rd, #63
ldr \rd, [\rd, #TI_TASK]
ldr \rd, [\rd, #TSK_ACTIVE_MM]
.endm
/*
* mmid - get context id from mm pointer (mm->context.id)
* note, this field is 64bit, so in big-endian the two words are swapped too.
*/
.macro mmid, rd, rn
#ifdef __ARMEB__
ldr \rd, [\rn, #MM_CONTEXT_ID + 4 ]
#else
ldr \rd, [\rn, #MM_CONTEXT_ID]
#endif
.endm
/*
* mask_asid - mask the ASID from the context ID
*/
.macro asid, rd, rn
and \rd, \rn, #255
.endm
.macro crval, clear, mmuset, ucset
#ifdef CONFIG_MMU
.word \clear
.word \mmuset
#else
.word \clear
.word \ucset
#endif
.endm
/*
* dcache_line_size - get the minimum D-cache line size from the CTR register
* on ARMv7.
*/
.macro dcache_line_size, reg, tmp
#ifdef CONFIG_CPU_V7M
movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
ldr \tmp, [\tmp]
#else
mrc p15, 0, \tmp, c0, c0, 1 @ read ctr
#endif
lsr \tmp, \tmp, #16
and \tmp, \tmp, #0xf @ cache line size encoding
mov \reg, #4 @ bytes per word
mov \reg, \reg, lsl \tmp @ actual cache line size
.endm
/*
* icache_line_size - get the minimum I-cache line size from the CTR register
* on ARMv7.
*/
.macro icache_line_size, reg, tmp
#ifdef CONFIG_CPU_V7M
movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
ldr \tmp, [\tmp]
#else
mrc p15, 0, \tmp, c0, c0, 1 @ read ctr
#endif
and \tmp, \tmp, #0xf @ cache line size encoding
mov \reg, #4 @ bytes per word
mov \reg, \reg, lsl \tmp @ actual cache line size
.endm
/*
* Sanity check the PTE configuration for the code below - which makes
* certain assumptions about how these bits are laid out.
*/
#ifdef CONFIG_MMU
#if L_PTE_SHARED != PTE_EXT_SHARED
#error PTE shared bit mismatch
#endif
#if !defined (CONFIG_ARM_LPAE) && \
(L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
L_PTE_PRESENT) > L_PTE_SHARED
#error Invalid Linux PTE bit settings
#endif
#endif /* CONFIG_MMU */
/*
* The ARMv6 and ARMv7 set_pte_ext translation function.
*
* Permission translation:
* YUWD APX AP1 AP0 SVC User
* 0xxx 0 0 0 no acc no acc
* 100x 1 0 1 r/o no acc
* 10x0 1 0 1 r/o no acc
* 1011 0 0 1 r/w no acc
* 110x 1 1 1 r/o r/o
* 11x0 1 1 1 r/o r/o
ARM: 7954/1: mm: remove remaining domain support from ARMv6 CPU_32v6 currently selects CPU_USE_DOMAINS if CPU_V6 and MMU. This is because ARM 1136 r0pX CPUs lack the v6k extensions, and therefore do not have hardware thread registers. The lack of these registers requires the kernel to update the vectors page at each context switch in order to write a new TLS pointer. This write must be done via the userspace mapping, since aliasing caches can lead to expensive flushing when using kmap. Finally, this requires the vectors page to be mapped r/w for kernel and r/o for user, which has implications for things like put_user which must trigger CoW appropriately when targetting user pages. The upshot of all this is that a v6/v7 kernel makes use of domains to segregate kernel and user memory accesses. This has the nasty side-effect of making device mappings executable, which has been observed to cause subtle bugs on recent cores (e.g. Cortex-A15 performing a speculative instruction fetch from the GIC and acking an interrupt in the process). This patch solves this problem by removing the remaining domain support from ARMv6. A new memory type is added specifically for the vectors page which allows that page (and only that page) to be mapped as user r/o, kernel r/w. All other user r/o pages are mapped also as kernel r/o. Patch co-developed with Russell King. Cc: <stable@vger.kernel.org> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-02-08 01:12:27 +07:00
* 1111 0 1 1 r/w r/w
*/
.macro armv6_mt_table pfx
\pfx\()_mt_table:
.long 0x00 @ L_PTE_MT_UNCACHED
.long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE
.long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH
.long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK
.long PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED
.long 0x00 @ unused
.long 0x00 @ L_PTE_MT_MINICACHE (not present)
.long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC
.long 0x00 @ unused
.long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC
.long 0x00 @ unused
.long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED
.long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED
.long 0x00 @ unused
.long 0x00 @ unused
ARM: 7954/1: mm: remove remaining domain support from ARMv6 CPU_32v6 currently selects CPU_USE_DOMAINS if CPU_V6 and MMU. This is because ARM 1136 r0pX CPUs lack the v6k extensions, and therefore do not have hardware thread registers. The lack of these registers requires the kernel to update the vectors page at each context switch in order to write a new TLS pointer. This write must be done via the userspace mapping, since aliasing caches can lead to expensive flushing when using kmap. Finally, this requires the vectors page to be mapped r/w for kernel and r/o for user, which has implications for things like put_user which must trigger CoW appropriately when targetting user pages. The upshot of all this is that a v6/v7 kernel makes use of domains to segregate kernel and user memory accesses. This has the nasty side-effect of making device mappings executable, which has been observed to cause subtle bugs on recent cores (e.g. Cortex-A15 performing a speculative instruction fetch from the GIC and acking an interrupt in the process). This patch solves this problem by removing the remaining domain support from ARMv6. A new memory type is added specifically for the vectors page which allows that page (and only that page) to be mapped as user r/o, kernel r/w. All other user r/o pages are mapped also as kernel r/o. Patch co-developed with Russell King. Cc: <stable@vger.kernel.org> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-02-08 01:12:27 +07:00
.long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS
.endm
.macro armv6_set_pte_ext pfx
str r1, [r0], #2048 @ linux version
bic r3, r1, #0x000003fc
bic r3, r3, #PTE_TYPE_MASK
orr r3, r3, r2
orr r3, r3, #PTE_EXT_AP0 | 2
adr ip, \pfx\()_mt_table
and r2, r1, #L_PTE_MT_MASK
ldr r2, [ip, r2]
eor r1, r1, #L_PTE_DIRTY
tst r1, #L_PTE_DIRTY|L_PTE_RDONLY
orrne r3, r3, #PTE_EXT_APX
tst r1, #L_PTE_USER
orrne r3, r3, #PTE_EXT_AP1
tstne r3, #PTE_EXT_APX
ARM: 7954/1: mm: remove remaining domain support from ARMv6 CPU_32v6 currently selects CPU_USE_DOMAINS if CPU_V6 and MMU. This is because ARM 1136 r0pX CPUs lack the v6k extensions, and therefore do not have hardware thread registers. The lack of these registers requires the kernel to update the vectors page at each context switch in order to write a new TLS pointer. This write must be done via the userspace mapping, since aliasing caches can lead to expensive flushing when using kmap. Finally, this requires the vectors page to be mapped r/w for kernel and r/o for user, which has implications for things like put_user which must trigger CoW appropriately when targetting user pages. The upshot of all this is that a v6/v7 kernel makes use of domains to segregate kernel and user memory accesses. This has the nasty side-effect of making device mappings executable, which has been observed to cause subtle bugs on recent cores (e.g. Cortex-A15 performing a speculative instruction fetch from the GIC and acking an interrupt in the process). This patch solves this problem by removing the remaining domain support from ARMv6. A new memory type is added specifically for the vectors page which allows that page (and only that page) to be mapped as user r/o, kernel r/w. All other user r/o pages are mapped also as kernel r/o. Patch co-developed with Russell King. Cc: <stable@vger.kernel.org> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-02-08 01:12:27 +07:00
@ user read-only -> kernel read-only
bicne r3, r3, #PTE_EXT_AP0
tst r1, #L_PTE_XN
orrne r3, r3, #PTE_EXT_XN
ARM: 7954/1: mm: remove remaining domain support from ARMv6 CPU_32v6 currently selects CPU_USE_DOMAINS if CPU_V6 and MMU. This is because ARM 1136 r0pX CPUs lack the v6k extensions, and therefore do not have hardware thread registers. The lack of these registers requires the kernel to update the vectors page at each context switch in order to write a new TLS pointer. This write must be done via the userspace mapping, since aliasing caches can lead to expensive flushing when using kmap. Finally, this requires the vectors page to be mapped r/w for kernel and r/o for user, which has implications for things like put_user which must trigger CoW appropriately when targetting user pages. The upshot of all this is that a v6/v7 kernel makes use of domains to segregate kernel and user memory accesses. This has the nasty side-effect of making device mappings executable, which has been observed to cause subtle bugs on recent cores (e.g. Cortex-A15 performing a speculative instruction fetch from the GIC and acking an interrupt in the process). This patch solves this problem by removing the remaining domain support from ARMv6. A new memory type is added specifically for the vectors page which allows that page (and only that page) to be mapped as user r/o, kernel r/w. All other user r/o pages are mapped also as kernel r/o. Patch co-developed with Russell King. Cc: <stable@vger.kernel.org> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-02-08 01:12:27 +07:00
eor r3, r3, r2
tst r1, #L_PTE_YOUNG
tstne r1, #L_PTE_PRESENT
moveq r3, #0
tstne r1, #L_PTE_NONE
movne r3, #0
str r3, [r0]
mcr p15, 0, r0, c7, c10, 1 @ flush_pte
.endm
/*
* The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function,
* covering most CPUs except Xscale and Xscale 3.
*
* Permission translation:
* YUWD AP SVC User
* 0xxx 0x00 no acc no acc
* 100x 0x00 r/o no acc
* 10x0 0x00 r/o no acc
* 1011 0x55 r/w no acc
* 110x 0xaa r/w r/o
* 11x0 0xaa r/w r/o
* 1111 0xff r/w r/w
*/
.macro armv3_set_pte_ext wc_disable=1
str r1, [r0], #2048 @ linux version
eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits
bic r2, r2, #PTE_TYPE_MASK
orr r2, r2, #PTE_TYPE_SMALL
tst r3, #L_PTE_USER @ user?
orrne r2, r2, #PTE_SMALL_AP_URO_SRW
tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty?
orreq r2, r2, #PTE_SMALL_AP_UNO_SRW
tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young?
movne r2, #0
.if \wc_disable
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
tst r2, #PTE_CACHEABLE
bicne r2, r2, #PTE_BUFFERABLE
#endif
.endif
str r2, [r0] @ hardware version
.endm
/*
* Xscale set_pte_ext translation, split into two halves to cope
* with work-arounds. r3 must be preserved by code between these
* two macros.
*
* Permission translation:
* YUWD AP SVC User
* 0xxx 00 no acc no acc
* 100x 00 r/o no acc
* 10x0 00 r/o no acc
* 1011 01 r/w no acc
* 110x 10 r/w r/o
* 11x0 10 r/w r/o
* 1111 11 r/w r/w
*/
.macro xscale_set_pte_ext_prologue
str r1, [r0] @ linux version
eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits
orr r2, r2, #PTE_TYPE_EXT @ extended page
tst r3, #L_PTE_USER @ user?
orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w
tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty?
orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w
@ combined with user -> user r/w
.endm
.macro xscale_set_pte_ext_epilogue
tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young?
movne r2, #0 @ no -> fault
str r2, [r0, #2048]! @ hardware version
mov ip, #0
mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
mcr p15, 0, ip, c7, c10, 4 @ data write barrier
.endm
.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0
.type \name\()_processor_functions, #object
.align 2
ENTRY(\name\()_processor_functions)
.word \dabort
.word \pabort
.word cpu_\name\()_proc_init
.word cpu_\name\()_proc_fin
.word cpu_\name\()_reset
.word cpu_\name\()_do_idle
.word cpu_\name\()_dcache_clean_area
.word cpu_\name\()_switch_mm
.if \nommu
.word 0
.else
.word cpu_\name\()_set_pte_ext
.endif
.if \suspend
.word cpu_\name\()_suspend_size
ARM: mm: Fix ifdef around cpu_*_do_[suspend, resume] ops Ifdef around cpu_\name\()_do_suspend and cpu_\name\()_do_resume ops in proc-macros.S should check for CONFIG_ARM_CPU_SUSPEND and not CONFIG_PM_SLEEP. Fix it. [ Please note that cpu_v7_do_[suspend,resume] code in proc-v7.S already correctly checks for CONFIG_ARM_CPU_SUSPEND, same is true for functions for other architectures. ] This fix is needed for decoupling suspend/resume and advanced cpuidle support on Exynos platform (next patch fixes build for config with CONFIG_PM_SLEEP=n and CONFIG_ARM_EXYNOS_CPUIDLE=y). If this fix is not present then the following OOPS happens on the first attempt to go into advanced cpuidle mode (AFTR): [ 22.244143] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 22.250759] pgd = c0004000 [ 22.253445] [00000000] *pgd=00000000 [ 22.257012] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 22.262906] Modules linked in: [ 22.265949] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.16.0-next-20140811-dirty #730 [ 22.273757] task: c05dce68 ti: c05d2000 task.ti: c05d2000 [ 22.279139] PC is at 0x0 [ 22.281661] LR is at __cpu_suspend_save+0x4c/0xa8 [ 22.286344] pc : [<00000000>] lr : [<c00125e0>] psr: a0000093 [ 22.286344] sp : c05d3ef4 ip : c05da414 fp : 00000001 [ 22.297799] r10: c05da414 r9 : c0609cb0 r8 : 0000000f [ 22.303008] r7 : c05da444 r6 : 00000038 r5 : ea802c00 r4 : c05d3f14 [ 22.309517] r3 : 00000000 r2 : c05d3f4c r1 : 00000038 r0 : c05d3f20 [ 22.316029] Flags: NzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel [ 22.323406] Control: 10c5387d Table: 69d5404a DAC: 00000015 [ 22.329135] Process swapper/0 (pid: 0, stack limit = 0xc05d2240) [ 22.335124] Stack: (0xc05d3ef4 to 0xc05d4000) [ 22.339466] 3ee0: ea802c00 00000038 c05d3f4c [ 22.347626] 3f00: 00000000 00000007 c00123bc 00000000 c001d468 6a888000 c05d3f4c 80000000 [ 22.355785] 3f20: 00000007 c003d3a0 0000193d eaf9dde4 eaf9dde4 c02ef0c8 c000969c fffffffe [ 22.363944] 3f40: 00000000 c0037b54 eaf9dbb8 e9d1a380 00000000 c001d468 c0609cb0 00000000 [ 22.372103] 3f60: c0609cb0 c061649e 00000001 c001250c eaf9dbb8 00000001 c0609cb0 c001d618 [ 22.380262] 3f80: c001d5d0 c02ef56c 2d9d2e1e 00000005 eaf9dbb8 c02edcc4 2d9d2e1e 00000005 [ 22.388421] 3fa0: c040446c c05da4ec c040446c eaf9dbb8 c05cfbb0 c004c580 c05dce68 c05b3ae8 [ 22.396580] 3fc0: 00000000 c058bb24 ffffffff ffffffff c058b5e4 00000000 00000000 c05b3ae8 [ 22.404740] 3fe0: c0616994 c05da47c c05b3ae4 c05ddeec 4000406a 40008074 00000000 00000000 [ 22.412909] [<c00125e0>] (__cpu_suspend_save) from [<c00123bc>] (__cpu_suspend+0x5c/0x70) [ 22.421074] [<c00123bc>] (__cpu_suspend) from [<c05d3f4c>] (init_thread_union+0x1f4c/0x2000) [ 22.429479] Code: bad PC value [ 22.432518] ---[ end trace fb90ebf4217d0ad9 ]--- [ 22.437116] Kernel panic - not syncing: Attempted to kill the idle task! [ 22.443800] Rebooting in 5 seconds.. This patch has been tested on Exynos4210 based Origen board. Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> Acked-by: Kyungmin Park <kyungmin.park@samsung.com> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
2014-09-24 00:18:32 +07:00
#ifdef CONFIG_ARM_CPU_SUSPEND
.word cpu_\name\()_do_suspend
.word cpu_\name\()_do_resume
#else
.word 0
.word 0
#endif
.else
.word 0
.word 0
.word 0
.endif
.size \name\()_processor_functions, . - \name\()_processor_functions
.endm
.macro define_cache_functions name:req
.align 2
.type \name\()_cache_fns, #object
ENTRY(\name\()_cache_fns)
.long \name\()_flush_icache_all
.long \name\()_flush_kern_cache_all
ARM: mm: implement LoUIS API for cache maintenance ops ARM v7 architecture introduced the concept of cache levels and related control registers. New processors like A7 and A15 embed an L2 unified cache controller that becomes part of the cache level hierarchy. Some operations in the kernel like cpu_suspend and __cpu_disable do not require a flush of the entire cache hierarchy to DRAM but just the cache levels belonging to the Level of Unification Inner Shareable (LoUIS), which in most of ARM v7 systems correspond to L1. The current cache flushing API used in cpu_suspend and __cpu_disable, flush_cache_all(), ends up flushing the whole cache hierarchy since for v7 it cleans and invalidates all cache levels up to Level of Coherency (LoC) which cripples system performance when used in hot paths like hotplug and cpuidle. Therefore a new kernel cache maintenance API must be added to cope with latest ARM system requirements. This patch adds flush_cache_louis() to the ARM kernel cache maintenance API. This function cleans and invalidates all data cache levels up to the Level of Unification Inner Shareable (LoUIS) and invalidates the instruction cache for processors that support it (> v7). This patch also creates an alias of the cache LoUIS function to flush_kern_all for all processor versions prior to v7, so that the current cache flushing behaviour is unchanged for those processors. v7 cache maintenance code implements a cache LoUIS function that cleans and invalidates the D-cache up to LoUIS and invalidates the I-cache, according to the new API. Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Tested-by: Shawn Guo <shawn.guo@linaro.org>
2012-09-06 20:05:13 +07:00
.long \name\()_flush_kern_cache_louis
.long \name\()_flush_user_cache_all
.long \name\()_flush_user_cache_range
.long \name\()_coherent_kern_range
.long \name\()_coherent_user_range
.long \name\()_flush_kern_dcache_area
.long \name\()_dma_map_area
.long \name\()_dma_unmap_area
.long \name\()_dma_flush_range
.size \name\()_cache_fns, . - \name\()_cache_fns
.endm
.macro define_tlb_functions name:req, flags_up:req, flags_smp
.type \name\()_tlb_fns, #object
ENTRY(\name\()_tlb_fns)
.long \name\()_flush_user_tlb_range
.long \name\()_flush_kern_tlb_range
.ifnb \flags_smp
ALT_SMP(.long \flags_smp )
ALT_UP(.long \flags_up )
.else
.long \flags_up
.endif
.size \name\()_tlb_fns, . - \name\()_tlb_fns
.endm
.macro globl_equ x, y
.globl \x
.equ \x, \y
.endm
.macro initfn, func, base
.long \func - \base
.endm
/*
* Macro to calculate the log2 size for the protection region
* registers. This calculates rd = log2(size) - 1. tmp must
* not be the same register as rd.
*/
.macro pr_sz, rd, size, tmp
mov \tmp, \size, lsr #12
mov \rd, #11
1: movs \tmp, \tmp, lsr #1
addne \rd, \rd, #1
bne 1b
.endm
/*
* Macro to generate a protection region register value
* given a pre-masked address, size, and enable bit.
* Corrupts size.
*/
.macro pr_val, dest, addr, size, enable
pr_sz \dest, \size, \size @ calculate log2(size) - 1
orr \dest, \addr, \dest, lsl #1 @ mask in the region size
orr \dest, \dest, \enable
.endm