linux_dsm_epyc7002/arch/arm/mach-mvebu/coherency_ll.S

167 lines
4.3 KiB
ArmAsm
Raw Normal View History

/*
* Coherency fabric: low level functions
*
* Copyright (C) 2012 Marvell
*
* Gregory CLEMENT <gregory.clement@free-electrons.com>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*
* This file implements the assembly function to add a CPU to the
* coherency fabric. This function is called by each of the secondary
* CPUs during their early boot in an SMP kernel, this why this
* function have to callable from assembly. It can also be called by a
* primary CPU from C code during its boot.
*/
#include <linux/linkage.h>
#define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0
#define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4
#include <asm/assembler.h>
#include <asm/cp15.h>
.text
/*
* Returns the coherency base address in r1 (r0 is untouched), or 0 if
* the coherency fabric is not enabled.
*/
ENTRY(ll_get_coherency_base)
mrc p15, 0, r1, c1, c0, 0
tst r1, #CR_M @ Check MMU bit enabled
bne 1f
/*
* MMU is disabled, use the physical address of the coherency
* base address. However, if the coherency fabric isn't mapped
* (i.e its virtual address is zero), it means coherency is
* not enabled, so we return 0.
*/
ldr r1, =coherency_base
cmp r1, #0
beq 2f
adr r1, 3f
ldr r3, [r1]
ldr r1, [r1, r3]
b 2f
1:
/*
* MMU is enabled, use the virtual address of the coherency
* base address.
*/
ldr r1, =coherency_base
ldr r1, [r1]
2:
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 22:29:12 +07:00
ret lr
ENDPROC(ll_get_coherency_base)
/*
* Returns the coherency CPU mask in r3 (r0 is untouched). This
* coherency CPU mask can be used with the coherency fabric
* configuration and control registers. Note that the mask is already
* endian-swapped as appropriate so that the calling functions do not
* have to care about endianness issues while accessing the coherency
* fabric registers
*/
ENTRY(ll_get_coherency_cpumask)
mrc p15, 0, r3, cr0, cr0, 5
and r3, r3, #15
mov r2, #(1 << 24)
lsl r3, r2, r3
ARM_BE8(rev r3, r3)
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 22:29:12 +07:00
ret lr
ENDPROC(ll_get_coherency_cpumask)
/*
* ll_add_cpu_to_smp_group(), ll_enable_coherency() and
* ll_disable_coherency() use the strex/ldrex instructions while the
* MMU can be disabled. The Armada XP SoC has an exclusive monitor
* that tracks transactions to Device and/or SO memory and thanks to
* that, exclusive transactions are functional even when the MMU is
* disabled.
*/
ENTRY(ll_add_cpu_to_smp_group)
/*
* As r0 is not modified by ll_get_coherency_base() and
* ll_get_coherency_cpumask(), we use it to temporarly save lr
* and avoid it being modified by the branch and link
* calls. This function is used very early in the secondary
* CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
/* Bail out if the coherency is not enabled */
cmp r1, #0
reteq r0
bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
1:
ldrex r2, [r0]
orr r2, r2, r3
strex r1, r2, [r0]
cmp r1, #0
bne 1b
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 22:29:12 +07:00
ret lr
ENDPROC(ll_add_cpu_to_smp_group)
ENTRY(ll_enable_coherency)
/*
* As r0 is not modified by ll_get_coherency_base() and
* ll_get_coherency_cpumask(), we use it to temporarly save lr
* and avoid it being modified by the branch and link
* calls. This function is used very early in the secondary
* CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
/* Bail out if the coherency is not enabled */
cmp r1, #0
reteq r0
bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
1:
ldrex r2, [r0]
orr r2, r2, r3
strex r1, r2, [r0]
cmp r1, #0
bne 1b
dsb
mov r0, #0
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 22:29:12 +07:00
ret lr
ENDPROC(ll_enable_coherency)
ENTRY(ll_disable_coherency)
/*
* As r0 is not modified by ll_get_coherency_base() and
* ll_get_coherency_cpumask(), we use it to temporarly save lr
* and avoid it being modified by the branch and link
* calls. This function is used very early in the secondary
* CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
/* Bail out if the coherency is not enabled */
cmp r1, #0
reteq r0
bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
1:
ldrex r2, [r0]
bic r2, r2, r3
strex r1, r2, [r0]
cmp r1, #0
bne 1b
dsb
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 22:29:12 +07:00
ret lr
ENDPROC(ll_disable_coherency)
.align 2
3:
.long coherency_phys_base - .