linux_dsm_epyc7002/arch/arm/mach-mvebu/coherency_ll.S
Russell King 6ebbf2ce43 ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+
ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).

We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.

Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.

Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S
Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood
Tested-by: Shawn Guo <shawn.guo@freescale.com>
Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385
Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci
Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M
Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-07-18 12:29:04 +01:00

150 lines
3.9 KiB
ArmAsm

/*
* Coherency fabric: low level functions
*
* Copyright (C) 2012 Marvell
*
* Gregory CLEMENT <gregory.clement@free-electrons.com>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*
* This file implements the assembly function to add a CPU to the
* coherency fabric. This function is called by each of the secondary
* CPUs during their early boot in an SMP kernel, this why this
* function have to callable from assembly. It can also be called by a
* primary CPU from C code during its boot.
*/
#include <linux/linkage.h>
#define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0
#define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4
#include <asm/assembler.h>
#include <asm/cp15.h>
.text
/* Returns the coherency base address in r1 (r0 is untouched) */
ENTRY(ll_get_coherency_base)
mrc p15, 0, r1, c1, c0, 0
tst r1, #CR_M @ Check MMU bit enabled
bne 1f
/*
* MMU is disabled, use the physical address of the coherency
* base address.
*/
adr r1, 3f
ldr r3, [r1]
ldr r1, [r1, r3]
b 2f
1:
/*
* MMU is enabled, use the virtual address of the coherency
* base address.
*/
ldr r1, =coherency_base
ldr r1, [r1]
2:
ret lr
ENDPROC(ll_get_coherency_base)
/*
* Returns the coherency CPU mask in r3 (r0 is untouched). This
* coherency CPU mask can be used with the coherency fabric
* configuration and control registers. Note that the mask is already
* endian-swapped as appropriate so that the calling functions do not
* have to care about endianness issues while accessing the coherency
* fabric registers
*/
ENTRY(ll_get_coherency_cpumask)
mrc 15, 0, r3, cr0, cr0, 5
and r3, r3, #15
mov r2, #(1 << 24)
lsl r3, r2, r3
ARM_BE8(rev r3, r3)
ret lr
ENDPROC(ll_get_coherency_cpumask)
/*
* ll_add_cpu_to_smp_group(), ll_enable_coherency() and
* ll_disable_coherency() use the strex/ldrex instructions while the
* MMU can be disabled. The Armada XP SoC has an exclusive monitor
* that tracks transactions to Device and/or SO memory and thanks to
* that, exclusive transactions are functional even when the MMU is
* disabled.
*/
ENTRY(ll_add_cpu_to_smp_group)
/*
* As r0 is not modified by ll_get_coherency_base() and
* ll_get_coherency_cpumask(), we use it to temporarly save lr
* and avoid it being modified by the branch and link
* calls. This function is used very early in the secondary
* CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
1:
ldrex r2, [r0]
orr r2, r2, r3
strex r1, r2, [r0]
cmp r1, #0
bne 1b
ret lr
ENDPROC(ll_add_cpu_to_smp_group)
ENTRY(ll_enable_coherency)
/*
* As r0 is not modified by ll_get_coherency_base() and
* ll_get_coherency_cpumask(), we use it to temporarly save lr
* and avoid it being modified by the branch and link
* calls. This function is used very early in the secondary
* CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
1:
ldrex r2, [r0]
orr r2, r2, r3
strex r1, r2, [r0]
cmp r1, #0
bne 1b
dsb
mov r0, #0
ret lr
ENDPROC(ll_enable_coherency)
ENTRY(ll_disable_coherency)
/*
* As r0 is not modified by ll_get_coherency_base() and
* ll_get_coherency_cpumask(), we use it to temporarly save lr
* and avoid it being modified by the branch and link
* calls. This function is used very early in the secondary
* CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
1:
ldrex r2, [r0]
bic r2, r2, r3
strex r1, r2, [r0]
cmp r1, #0
bne 1b
dsb
ret lr
ENDPROC(ll_disable_coherency)
.align 2
3:
.long coherency_phys_base - .