linux_dsm_epyc7002/arch/arm/mm/proc-xscale.S

663 lines
18 KiB
ArmAsm
Raw Normal View History

/*
* linux/arch/arm/mm/proc-xscale.S
*
* Author: Nicolas Pitre
* Created: November 2000
* Copyright: (C) 2000, 2001 MontaVista Software Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* MMU functions for the Intel XScale CPUs
*
* 2001 Aug 21:
* some contributions by Brett Gaines <brett.w.gaines@intel.com>
* Copyright 2001 by Intel Corp.
*
* 2001 Sep 08:
* Completely revisited, many important fixes
* Nicolas Pitre <nico@fluxnic.net>
*/
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include "proc-macros.S"
/*
* This is the maximum size of an area which will be flushed. If the area
* is larger than this, then we flush the whole cache
*/
#define MAX_AREA_SIZE 32768
/*
* the cache line size of the I and D cache
*/
#define CACHELINESIZE 32
/*
* the size of the data cache
*/
#define CACHESIZE 32768
/*
* Virtual address used to allocate the cache when flushed
*
* This must be an address range which is _never_ used. It should
* apparently have a mapping in the corresponding page table for
* compatibility with future CPUs that _could_ require it. For instance we
* don't care.
*
* This must be aligned on a 2*CACHESIZE boundary. The code selects one of
* the 2 areas in alternance each time the clean_d_cache macro is used.
* Without this the XScale core exhibits cache eviction problems and no one
* knows why.
*
* Reminder: the vector table is located at 0xffff0000-0xffff0fff.
*/
#define CLEAN_ADDR 0xfffe0000
/*
* This macro is used to wait for a CP15 write and is needed
* when we have to ensure that the last operation to the co-pro
* was completed before continuing with operation.
*/
.macro cpwait, rd
mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15
mov \rd, \rd @ wait for completion
sub pc, pc, #4 @ flush instruction pipeline
.endm
.macro cpwait_ret, lr, rd
mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15
sub pc, \lr, \rd, LSR #32 @ wait for completion and
@ flush instruction pipeline
.endm
/*
* This macro cleans the entire dcache using line allocate.
* The main loop has been unrolled to reduce loop overhead.
* rd and rs are two scratch registers.
*/
.macro clean_d_cache, rd, rs
ldr \rs, =clean_addr
ldr \rd, [\rs]
eor \rd, \rd, #CACHESIZE
str \rd, [\rs]
add \rs, \rd, #CACHESIZE
1: mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line
add \rd, \rd, #CACHELINESIZE
mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line
add \rd, \rd, #CACHELINESIZE
mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line
add \rd, \rd, #CACHELINESIZE
mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line
add \rd, \rd, #CACHELINESIZE
teq \rd, \rs
bne 1b
.endm
.data
clean_addr: .word CLEAN_ADDR
.text
/*
* cpu_xscale_proc_init()
*
* Nothing too exciting at the moment
*/
ENTRY(cpu_xscale_proc_init)
@ enable write buffer coalescing. Some bootloader disable it
mrc p15, 0, r1, c1, c0, 1
bic r1, r1, #1
mcr p15, 0, r1, c1, c0, 1
mov pc, lr
/*
* cpu_xscale_proc_fin()
*/
ENTRY(cpu_xscale_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
mov pc, lr
/*
* cpu_xscale_reset(loc)
*
* Perform a soft reset of the system. Put the CPU into the
* same state as it would be if it had been reset, and branch
* to what would be the reset vector.
*
* loc: location to jump to for soft reset
*
* Beware PXA270 erratum E7.
*/
.align 5
.pushsection .idmap.text, "ax"
ENTRY(cpu_xscale_reset)
mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
msr cpsr_c, r1 @ reset CPSR
mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB
mcr p15, 0, r1, c8, c5, 0 @ invalidate I-TLB
mrc p15, 0, r1, c1, c0, 0 @ ctrl register
bic r1, r1, #0x0086 @ ........B....CA.
bic r1, r1, #0x3900 @ ..VIZ..S........
sub pc, pc, #4 @ flush pipeline
@ *** cache line aligned ***
mcr p15, 0, r1, c1, c0, 0 @ ctrl register
bic r1, r1, #0x0001 @ ...............M
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB
mcr p15, 0, r1, c1, c0, 0 @ ctrl register
@ CAUTION: MMU turned off from this point. We count on the pipeline
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
mov pc, r0
ENDPROC(cpu_xscale_reset)
.popsection
/*
* cpu_xscale_do_idle()
*
* Cause the processor to idle
*
* For now we do nothing but go to idle mode for every case
*
* XScale supports clock switching, but using idle mode support
* allows external hardware to react to system state changes.
*/
.align 5
ENTRY(cpu_xscale_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE
mov pc, lr
/* ================================= CACHE ================================ */
ARM: 6466/1: implement flush_icache_all for the rest of the CPUs Commit 81d11955bf0 ("ARM: 6405/1: Handle __flush_icache_all for CONFIG_SMP_ON_UP") added a new function to struct cpu_cache_fns: flush_icache_all(). It also implemented this for v6 and v7 but not for v5 and backwards. Without the function pointer in place, we will be calling wrong cache functions. For example with ep93xx we get following: Unable to handle kernel paging request at virtual address ee070f38 pgd = c0004000 [ee070f38] *pgd=00000000 Internal error: Oops: 80000005 [#1] PREEMPT last sysfs file: Modules linked in: CPU: 0 Not tainted (2.6.36+ #1) PC is at 0xee070f38 LR is at __dma_alloc+0x11c/0x2d0 pc : [<ee070f38>] lr : [<c0032c8c>] psr: 60000013 sp : c581bde0 ip : 00000000 fp : c0472000 r10: c0472000 r9 : 000000d0 r8 : 00020000 r7 : 0001ffff r6 : 00000000 r5 : c0472400 r4 : c5980000 r3 : c03ab7e0 r2 : 00000000 r1 : c59a0000 r0 : c5980000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel Control: c000717f Table: c0004000 DAC: 00000017 Process swapper (pid: 1, stack limit = 0xc581a270) [<c0032c8c>] (__dma_alloc+0x11c/0x2d0) [<c0032e5c>] (dma_alloc_writecombine+0x1c/0x24) [<c0204148>] (ep93xx_pcm_preallocate_dma_buffer+0x44/0x60) [<c02041c0>] (ep93xx_pcm_new+0x5c/0x88) [<c01ff188>] (snd_soc_instantiate_cards+0x8a8/0xbc0) [<c01ff59c>] (soc_probe+0xfc/0x134) [<c01adafc>] (platform_drv_probe+0x18/0x1c) [<c01acca4>] (driver_probe_device+0xb0/0x16c) [<c01ac284>] (bus_for_each_drv+0x48/0x84) [<c01ace90>] (device_attach+0x50/0x68) [<c01ac0f8>] (bus_probe_device+0x24/0x44) [<c01aad7c>] (device_add+0x2fc/0x44c) [<c01adfa8>] (platform_device_add+0x104/0x15c) [<c0015eb8>] (simone_init+0x60/0x94) [<c0021410>] (do_one_initcall+0xd0/0x1a4) __dma_alloc() calls (inlined) __dma_alloc_buffer() which ends up calling dmac_flush_range(). Now since the entries in the arm920_cache_fns are shifted by one, we jump into address 0xee070f38 which is actually next instruction after the arm920_cache_fns structure. So implement flush_icache_all() for the rest of the supported CPUs using a generic 'invalidate I cache' instruction. Signed-off-by: Mika Westerberg <mika.westerberg@iki.fi> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2010-10-28 17:27:40 +07:00
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
ENTRY(xscale_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mov pc, lr
ENDPROC(xscale_flush_icache_all)
/*
* flush_user_cache_all()
*
* Invalidate all cache entries in a particular address
* space.
*/
ENTRY(xscale_flush_user_cache_all)
/* FALLTHROUGH */
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
ENTRY(xscale_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
clean_d_cache r0, r1
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* flush_user_cache_range(start, end, vm_flags)
*
* Invalidate a range of cache entries in the specified
* address space.
*
* - start - start address (may not be aligned)
* - end - end address (exclusive, may not be aligned)
* - vma - vma_area_struct describing address space
*/
.align 5
ENTRY(xscale_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #MAX_AREA_SIZE
bhs __flush_whole_cache
1: tst r2, #VM_EXEC
mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line
mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line
mcr p15, 0, r0, c7, c6, 1 @ Invalidate D cache line
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* coherent_kern_range(start, end)
*
* Ensure coherency between the Icache and the Dcache in the
* region described by start. If you have non-snooping
* Harvard caches, you need to implement this function.
*
* - start - virtual start address
* - end - virtual end address
*
* Note: single I-cache line invalidation isn't used here since
* it also trashes the mini I-cache used by JTAG debuggers.
*/
ENTRY(xscale_coherent_kern_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* coherent_user_range(start, end)
*
* Ensure coherency between the Icache and the Dcache in the
* region described by start. If you have non-snooping
* Harvard caches, you need to implement this function.
*
* - start - virtual start address
* - end - virtual end address
*/
ENTRY(xscale_coherent_user_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
*
* Ensure no D cache aliasing occurs, either with itself or
* the I cache
*
* - addr - kernel address
* - size - region size
*/
ENTRY(xscale_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* dma_inv_range(start, end)
*
* Invalidate (discard) the specified virtual address range.
* May not write back any entries. If 'start' or 'end'
* are not cache line aligned, those lines must be written
* back.
*
* - start - virtual start address
* - end - virtual end address
*/
xscale_dma_inv_range:
tst r0, #CACHELINESIZE - 1
bic r0, r0, #CACHELINESIZE - 1
mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
tst r1, #CACHELINESIZE - 1
mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* dma_clean_range(start, end)
*
* Clean the specified virtual address range.
*
* - start - virtual start address
* - end - virtual end address
*/
xscale_dma_clean_range:
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* dma_flush_range(start, end)
*
* Clean and invalidate the specified virtual address range.
*
* - start - virtual start address
* - end - virtual end address
*/
ENTRY(xscale_dma_flush_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
add r0, r0, #CACHELINESIZE
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
mov pc, lr
/*
* dma_map_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
ENTRY(xscale_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq xscale_dma_clean_range
bcs xscale_dma_inv_range
b xscale_dma_flush_range
ENDPROC(xscale_dma_map_area)
/*
* dma_map_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
ENTRY(xscale_80200_A0_A1_dma_map_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
beq xscale_dma_clean_range
b xscale_dma_flush_range
ENDPROC(xscale_80200_A0_A1_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
ENTRY(xscale_dma_unmap_area)
mov pc, lr
ENDPROC(xscale_dma_unmap_area)
ARM: mm: implement LoUIS API for cache maintenance ops ARM v7 architecture introduced the concept of cache levels and related control registers. New processors like A7 and A15 embed an L2 unified cache controller that becomes part of the cache level hierarchy. Some operations in the kernel like cpu_suspend and __cpu_disable do not require a flush of the entire cache hierarchy to DRAM but just the cache levels belonging to the Level of Unification Inner Shareable (LoUIS), which in most of ARM v7 systems correspond to L1. The current cache flushing API used in cpu_suspend and __cpu_disable, flush_cache_all(), ends up flushing the whole cache hierarchy since for v7 it cleans and invalidates all cache levels up to Level of Coherency (LoC) which cripples system performance when used in hot paths like hotplug and cpuidle. Therefore a new kernel cache maintenance API must be added to cope with latest ARM system requirements. This patch adds flush_cache_louis() to the ARM kernel cache maintenance API. This function cleans and invalidates all data cache levels up to the Level of Unification Inner Shareable (LoUIS) and invalidates the instruction cache for processors that support it (> v7). This patch also creates an alias of the cache LoUIS function to flush_kern_all for all processor versions prior to v7, so that the current cache flushing behaviour is unchanged for those processors. v7 cache maintenance code implements a cache LoUIS function that cleans and invalidates the D-cache up to LoUIS and invalidates the I-cache, according to the new API. Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Tested-by: Shawn Guo <shawn.guo@linaro.org>
2012-09-06 20:05:13 +07:00
.globl xscale_flush_kern_cache_louis
.equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions xscale
/*
* On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't
* clear the dirty bits, which means that if we invalidate a dirty line,
* the dirty data can still be written back to external memory later on.
*
* The recommended workaround is to always do a clean D-cache line before
* doing an invalidate D-cache line, so on the affected processors,
* dma_inv_range() is implemented as dma_flush_range().
*
* See erratum #25 of "Intel 80200 Processor Specification Update",
* revision January 22, 2003, available at:
* http://www.intel.com/design/iio/specupdt/273415.htm
*/
.macro a0_alias basename
.globl xscale_80200_A0_A1_\basename
.type xscale_80200_A0_A1_\basename , %function
.equ xscale_80200_A0_A1_\basename , xscale_\basename
.endm
/*
* Most of the cache functions are unchanged for these processor revisions.
* Export suitable alias symbols for the unchanged functions:
*/
a0_alias flush_icache_all
a0_alias flush_user_cache_all
a0_alias flush_kern_cache_all
a0_alias flush_kern_cache_louis
a0_alias flush_user_cache_range
a0_alias coherent_kern_range
a0_alias coherent_user_range
a0_alias flush_kern_dcache_area
a0_alias dma_flush_range
a0_alias dma_unmap_area
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions xscale_80200_A0_A1
ENTRY(cpu_xscale_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
mov pc, lr
/* =============================== PageTable ============================== */
/*
* cpu_xscale_switch_mm(pgd)
*
* Set the translation base pointer to be as described by pgd.
*
* pgd: new page tables
*/
.align 5
ENTRY(cpu_xscale_switch_mm)
clean_d_cache r1, r2
mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
cpwait_ret lr, ip
/*
* cpu_xscale_set_pte_ext(ptep, pte, ext)
*
* Set a PTE and flush it out
*
* Errata 40: must set memory to write-through for user read-only pages.
*/
cpu_xscale_mt_table:
.long 0x00 @ L_PTE_MT_UNCACHED
.long PTE_BUFFERABLE @ L_PTE_MT_BUFFERABLE
.long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH
.long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK
.long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED
.long 0x00 @ unused
.long PTE_EXT_TEX(1) | PTE_CACHEABLE @ L_PTE_MT_MINICACHE
.long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC
.long 0x00 @ unused
.long PTE_BUFFERABLE @ L_PTE_MT_DEV_WC
.long 0x00 @ unused
.long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED
.long 0x00 @ L_PTE_MT_DEV_NONSHARED
.long 0x00 @ unused
.long 0x00 @ unused
.long 0x00 @ unused
.align 5
ENTRY(cpu_xscale_set_pte_ext)
xscale_set_pte_ext_prologue
@
@ Erratum 40: must set memory to write-through for user read-only pages
@
and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2)
teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY
moveq r1, #L_PTE_MT_WRITETHROUGH
and r1, r1, #L_PTE_MT_MASK
adr ip, cpu_xscale_mt_table
ldr ip, [ip, r1]
bic r2, r2, #0x0c
orr r2, r2, ip
xscale_set_pte_ext_epilogue
mov pc, lr
.ltorg
.align
.globl cpu_xscale_suspend_size
.equ cpu_xscale_suspend_size, 4 * 6
#ifdef CONFIG_ARM_CPU_SUSPEND
ENTRY(cpu_xscale_do_suspend)
stmfd sp!, {r4 - r9, lr}
mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode
mrc p15, 0, r5, c15, c1, 0 @ CP access reg
mrc p15, 0, r6, c13, c0, 0 @ PID
mrc p15, 0, r7, c3, c0, 0 @ domain ID
mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg
mrc p15, 0, r9, c1, c0, 0 @ control reg
bic r4, r4, #2 @ clear frequency change bit
stmia r0, {r4 - r9} @ store cp regs
ldmfd sp!, {r4 - r9, pc}
ENDPROC(cpu_xscale_do_suspend)
ENTRY(cpu_xscale_do_resume)
ldmia r0, {r4 - r9} @ load cp regs
mov ip, #0
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB
mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode.
mcr p15, 0, r5, c15, c1, 0 @ CP access reg
mcr p15, 0, r6, c13, c0, 0 @ PID
mcr p15, 0, r7, c3, c0, 0 @ domain ID
mcr p15, 0, r1, c2, c0, 0 @ translation table base addr
mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg
mov r0, r9 @ control register
b cpu_resume_mmu
ENDPROC(cpu_xscale_do_resume)
#endif
__CPUINIT
.type __xscale_setup, #function
__xscale_setup:
mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB
mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs
[ARM] 3881/4: xscale: clean up cp0/cp1 handling XScale cores either have a DSP coprocessor (which contains a single 40 bit accumulator register), or an iWMMXt coprocessor (which contains eight 64 bit registers.) Because of the small amount of state in the DSP coprocessor, access to the DSP coprocessor (CP0) is always enabled, and DSP context switching is done unconditionally on every task switch. Access to the iWMMXt coprocessor (CP0/CP1) is enabled only when an iWMMXt instruction is first issued, and iWMMXt context switching is done lazily. CONFIG_IWMMXT is supposed to mean 'the cpu we will be running on will have iWMMXt support', but boards are supposed to select this config symbol by hand, and at least one pxa27x board doesn't get this right, so on that board, proc-xscale.S will incorrectly assume that we have a DSP coprocessor, enable CP0 on boot, and we will then only save the first iWMMXt register (wR0) on context switches, which is Bad. This patch redefines CONFIG_IWMMXT as 'the cpu we will be running on might have iWMMXt support, and we will enable iWMMXt context switching if it does.' This means that with this patch, running a CONFIG_IWMMXT=n kernel on an iWMMXt-capable CPU will no longer potentially corrupt iWMMXt state over context switches, and running a CONFIG_IWMMXT=y kernel on a non-iWMMXt capable CPU will still do DSP context save/restore. These changes should make iWMMXt work on PXA3xx, and as a side effect, enable proper acc0 save/restore on non-iWMMXt capable xsc3 cores such as IOP13xx and IXP23xx (which will not have CONFIG_CPU_XSCALE defined), as well as setting and using HWCAP_IWMMXT properly. Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org> Acked-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2006-12-04 00:51:14 +07:00
mov r0, #1 << 6 @ cp6 for IOP3xx and Bulverde
orr r0, r0, #1 << 13 @ Its undefined whether this
mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes
adr r5, xscale_crval
ldmia r5, {r5, r6}
mrc p15, 0, r0, c1, c0, 0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
mov pc, lr
.size __xscale_setup, . - __xscale_setup
/*
* R
* .RVI ZFRS BLDP WCAM
* ..11 1.01 .... .101
*
*/
.type xscale_crval, #object
xscale_crval:
crval clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900
__INITDATA
@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
.section ".rodata"
string cpu_arch_name, "armv5te"
string cpu_elf_name, "v5"
string cpu_80200_A0_A1_name, "XScale-80200 A0/A1"
string cpu_80200_name, "XScale-80200"
string cpu_80219_name, "XScale-80219"
string cpu_8032x_name, "XScale-IOP8032x Family"
string cpu_8033x_name, "XScale-IOP8033x Family"
string cpu_pxa250_name, "XScale-PXA250"
string cpu_pxa210_name, "XScale-PXA210"
string cpu_ixp42x_name, "XScale-IXP42x Family"
string cpu_ixp43x_name, "XScale-IXP43x Family"
string cpu_ixp46x_name, "XScale-IXP46x Family"
string cpu_ixp2400_name, "XScale-IXP2400"
string cpu_ixp2800_name, "XScale-IXP2800"
string cpu_pxa255_name, "XScale-PXA255"
string cpu_pxa270_name, "XScale-PXA270"
.align
.section ".proc.info.init", #alloc, #execinstr
.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
.type __\name\()_proc_info,#object
__\name\()_proc_info:
.long \cpu_val
.long \cpu_mask
.long PMD_TYPE_SECT | \
PMD_SECT_BUFFERABLE | \
PMD_SECT_CACHEABLE | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
.long PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
b __xscale_setup
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
.long \cpu_name
.long xscale_processor_functions
.long v4wbi_tlb_fns
.long xscale_mc_user_fns
.ifb \cache
.long xscale_cache_fns
.else
.long \cache
.endif
.size __\name\()_proc_info, . - __\name\()_proc_info
.endm
xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \
cache=xscale_80200_A0_A1_cache_fns
xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name
xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name
xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name
xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name
xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name
xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name
xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name
xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name
xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name
xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name
xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name
xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name
xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name