mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
6ebbf2ce43
ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
388 lines
10 KiB
ArmAsm
388 lines
10 KiB
ArmAsm
/*
|
|
* OMAP44xx sleep code.
|
|
*
|
|
* Copyright (C) 2011 Texas Instruments, Inc.
|
|
* Santosh Shilimkar <santosh.shilimkar@ti.com>
|
|
*
|
|
* This program is free software,you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/smp_scu.h>
|
|
#include <asm/memory.h>
|
|
#include <asm/hardware/cache-l2x0.h>
|
|
|
|
#include "omap-secure.h"
|
|
|
|
#include "common.h"
|
|
#include "omap44xx.h"
|
|
#include "omap4-sar-layout.h"
|
|
|
|
#if defined(CONFIG_SMP) && defined(CONFIG_PM)
|
|
|
|
.macro DO_SMC
|
|
dsb
|
|
smc #0
|
|
dsb
|
|
.endm
|
|
|
|
ppa_zero_params:
|
|
.word 0x0
|
|
|
|
ppa_por_params:
|
|
.word 1, 0
|
|
|
|
#ifdef CONFIG_ARCH_OMAP4
|
|
|
|
/*
|
|
* =============================
|
|
* == CPU suspend finisher ==
|
|
* =============================
|
|
*
|
|
* void omap4_finish_suspend(unsigned long cpu_state)
|
|
*
|
|
* This function code saves the CPU context and performs the CPU
|
|
* power down sequence. Calling WFI effectively changes the CPU
|
|
* power domains states to the desired target power state.
|
|
*
|
|
* @cpu_state : contains context save state (r0)
|
|
* 0 - No context lost
|
|
* 1 - CPUx L1 and logic lost: MPUSS CSWR
|
|
* 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR
|
|
* 3 - CPUx L1 and logic lost + GIC + L2 lost: MPUSS OFF
|
|
* @return: This function never returns for CPU OFF and DORMANT power states.
|
|
* Post WFI, CPU transitions to DORMANT or OFF power state and on wake-up
|
|
* from this follows a full CPU reset path via ROM code to CPU restore code.
|
|
* The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET.
|
|
* It returns to the caller for CPU INACTIVE and ON power states or in case
|
|
* CPU failed to transition to targeted OFF/DORMANT state.
|
|
*
|
|
* omap4_finish_suspend() calls v7_flush_dcache_all() which doesn't save
|
|
* stack frame and it expects the caller to take care of it. Hence the entire
|
|
* stack frame is saved to avoid possible stack corruption.
|
|
*/
|
|
ENTRY(omap4_finish_suspend)
|
|
stmfd sp!, {r4-r12, lr}
|
|
cmp r0, #0x0
|
|
beq do_WFI @ No lowpower state, jump to WFI
|
|
|
|
/*
|
|
* Flush all data from the L1 data cache before disabling
|
|
* SCTLR.C bit.
|
|
*/
|
|
bl omap4_get_sar_ram_base
|
|
ldr r9, [r0, #OMAP_TYPE_OFFSET]
|
|
cmp r9, #0x1 @ Check for HS device
|
|
bne skip_secure_l1_clean
|
|
mov r0, #SCU_PM_NORMAL
|
|
mov r1, #0xFF @ clean seucre L1
|
|
stmfd r13!, {r4-r12, r14}
|
|
ldr r12, =OMAP4_MON_SCU_PWR_INDEX
|
|
DO_SMC
|
|
ldmfd r13!, {r4-r12, r14}
|
|
skip_secure_l1_clean:
|
|
bl v7_flush_dcache_all
|
|
|
|
/*
|
|
* Clear the SCTLR.C bit to prevent further data cache
|
|
* allocation. Clearing SCTLR.C would make all the data accesses
|
|
* strongly ordered and would not hit the cache.
|
|
*/
|
|
mrc p15, 0, r0, c1, c0, 0
|
|
bic r0, r0, #(1 << 2) @ Disable the C bit
|
|
mcr p15, 0, r0, c1, c0, 0
|
|
isb
|
|
|
|
/*
|
|
* Invalidate L1 data cache. Even though only invalidate is
|
|
* necessary exported flush API is used here. Doing clean
|
|
* on already clean cache would be almost NOP.
|
|
*/
|
|
bl v7_flush_dcache_all
|
|
|
|
/*
|
|
* Switch the CPU from Symmetric Multiprocessing (SMP) mode
|
|
* to AsymmetricMultiprocessing (AMP) mode by programming
|
|
* the SCU power status to DORMANT or OFF mode.
|
|
* This enables the CPU to be taken out of coherency by
|
|
* preventing the CPU from receiving cache, TLB, or BTB
|
|
* maintenance operations broadcast by other CPUs in the cluster.
|
|
*/
|
|
bl omap4_get_sar_ram_base
|
|
mov r8, r0
|
|
ldr r9, [r8, #OMAP_TYPE_OFFSET]
|
|
cmp r9, #0x1 @ Check for HS device
|
|
bne scu_gp_set
|
|
mrc p15, 0, r0, c0, c0, 5 @ Read MPIDR
|
|
ands r0, r0, #0x0f
|
|
ldreq r0, [r8, #SCU_OFFSET0]
|
|
ldrne r0, [r8, #SCU_OFFSET1]
|
|
mov r1, #0x00
|
|
stmfd r13!, {r4-r12, r14}
|
|
ldr r12, =OMAP4_MON_SCU_PWR_INDEX
|
|
DO_SMC
|
|
ldmfd r13!, {r4-r12, r14}
|
|
b skip_scu_gp_set
|
|
scu_gp_set:
|
|
mrc p15, 0, r0, c0, c0, 5 @ Read MPIDR
|
|
ands r0, r0, #0x0f
|
|
ldreq r1, [r8, #SCU_OFFSET0]
|
|
ldrne r1, [r8, #SCU_OFFSET1]
|
|
bl omap4_get_scu_base
|
|
bl scu_power_mode
|
|
skip_scu_gp_set:
|
|
mrc p15, 0, r0, c1, c1, 2 @ Read NSACR data
|
|
tst r0, #(1 << 18)
|
|
mrcne p15, 0, r0, c1, c0, 1
|
|
bicne r0, r0, #(1 << 6) @ Disable SMP bit
|
|
mcrne p15, 0, r0, c1, c0, 1
|
|
isb
|
|
dsb
|
|
#ifdef CONFIG_CACHE_L2X0
|
|
/*
|
|
* Clean and invalidate the L2 cache.
|
|
* Common cache-l2x0.c functions can't be used here since it
|
|
* uses spinlocks. We are out of coherency here with data cache
|
|
* disabled. The spinlock implementation uses exclusive load/store
|
|
* instruction which can fail without data cache being enabled.
|
|
* OMAP4 hardware doesn't support exclusive monitor which can
|
|
* overcome exclusive access issue. Because of this, CPU can
|
|
* lead to deadlock.
|
|
*/
|
|
bl omap4_get_sar_ram_base
|
|
mov r8, r0
|
|
mrc p15, 0, r5, c0, c0, 5 @ Read MPIDR
|
|
ands r5, r5, #0x0f
|
|
ldreq r0, [r8, #L2X0_SAVE_OFFSET0] @ Retrieve L2 state from SAR
|
|
ldrne r0, [r8, #L2X0_SAVE_OFFSET1] @ memory.
|
|
cmp r0, #3
|
|
bne do_WFI
|
|
#ifdef CONFIG_PL310_ERRATA_727915
|
|
mov r0, #0x03
|
|
mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
|
|
DO_SMC
|
|
#endif
|
|
bl omap4_get_l2cache_base
|
|
mov r2, r0
|
|
ldr r0, =0xffff
|
|
str r0, [r2, #L2X0_CLEAN_INV_WAY]
|
|
wait:
|
|
ldr r0, [r2, #L2X0_CLEAN_INV_WAY]
|
|
ldr r1, =0xffff
|
|
ands r0, r0, r1
|
|
bne wait
|
|
#ifdef CONFIG_PL310_ERRATA_727915
|
|
mov r0, #0x00
|
|
mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
|
|
DO_SMC
|
|
#endif
|
|
l2x_sync:
|
|
bl omap4_get_l2cache_base
|
|
mov r2, r0
|
|
mov r0, #0x0
|
|
str r0, [r2, #L2X0_CACHE_SYNC]
|
|
sync:
|
|
ldr r0, [r2, #L2X0_CACHE_SYNC]
|
|
ands r0, r0, #0x1
|
|
bne sync
|
|
#endif
|
|
|
|
do_WFI:
|
|
bl omap_do_wfi
|
|
|
|
/*
|
|
* CPU is here when it failed to enter OFF/DORMANT or
|
|
* no low power state was attempted.
|
|
*/
|
|
mrc p15, 0, r0, c1, c0, 0
|
|
tst r0, #(1 << 2) @ Check C bit enabled?
|
|
orreq r0, r0, #(1 << 2) @ Enable the C bit
|
|
mcreq p15, 0, r0, c1, c0, 0
|
|
isb
|
|
|
|
/*
|
|
* Ensure the CPU power state is set to NORMAL in
|
|
* SCU power state so that CPU is back in coherency.
|
|
* In non-coherent mode CPU can lock-up and lead to
|
|
* system deadlock.
|
|
*/
|
|
mrc p15, 0, r0, c1, c0, 1
|
|
tst r0, #(1 << 6) @ Check SMP bit enabled?
|
|
orreq r0, r0, #(1 << 6)
|
|
mcreq p15, 0, r0, c1, c0, 1
|
|
isb
|
|
bl omap4_get_sar_ram_base
|
|
mov r8, r0
|
|
ldr r9, [r8, #OMAP_TYPE_OFFSET]
|
|
cmp r9, #0x1 @ Check for HS device
|
|
bne scu_gp_clear
|
|
mov r0, #SCU_PM_NORMAL
|
|
mov r1, #0x00
|
|
stmfd r13!, {r4-r12, r14}
|
|
ldr r12, =OMAP4_MON_SCU_PWR_INDEX
|
|
DO_SMC
|
|
ldmfd r13!, {r4-r12, r14}
|
|
b skip_scu_gp_clear
|
|
scu_gp_clear:
|
|
bl omap4_get_scu_base
|
|
mov r1, #SCU_PM_NORMAL
|
|
bl scu_power_mode
|
|
skip_scu_gp_clear:
|
|
isb
|
|
dsb
|
|
ldmfd sp!, {r4-r12, pc}
|
|
ENDPROC(omap4_finish_suspend)
|
|
|
|
/*
|
|
* ============================
|
|
* == CPU resume entry point ==
|
|
* ============================
|
|
*
|
|
* void omap4_cpu_resume(void)
|
|
*
|
|
* ROM code jumps to this function while waking up from CPU
|
|
* OFF or DORMANT state. Physical address of the function is
|
|
* stored in the SAR RAM while entering to OFF or DORMANT mode.
|
|
* The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET.
|
|
*/
|
|
ENTRY(omap4_cpu_resume)
|
|
/*
|
|
* Configure ACTRL and enable NS SMP bit access on CPU1 on HS device.
|
|
* OMAP44XX EMU/HS devices - CPU0 SMP bit access is enabled in PPA
|
|
* init and for CPU1, a secure PPA API provided. CPU0 must be ON
|
|
* while executing NS_SMP API on CPU1 and PPA version must be 1.4.0+.
|
|
* OMAP443X GP devices- SMP bit isn't accessible.
|
|
* OMAP446X GP devices - SMP bit access is enabled on both CPUs.
|
|
*/
|
|
ldr r8, =OMAP44XX_SAR_RAM_BASE
|
|
ldr r9, [r8, #OMAP_TYPE_OFFSET]
|
|
cmp r9, #0x1 @ Skip if GP device
|
|
bne skip_ns_smp_enable
|
|
mrc p15, 0, r0, c0, c0, 5
|
|
ands r0, r0, #0x0f
|
|
beq skip_ns_smp_enable
|
|
ppa_actrl_retry:
|
|
mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
|
|
adr r3, ppa_zero_params @ Pointer to parameters
|
|
mov r1, #0x0 @ Process ID
|
|
mov r2, #0x4 @ Flag
|
|
mov r6, #0xff
|
|
mov r12, #0x00 @ Secure Service ID
|
|
DO_SMC
|
|
cmp r0, #0x0 @ API returns 0 on success.
|
|
beq enable_smp_bit
|
|
b ppa_actrl_retry
|
|
enable_smp_bit:
|
|
mrc p15, 0, r0, c1, c0, 1
|
|
tst r0, #(1 << 6) @ Check SMP bit enabled?
|
|
orreq r0, r0, #(1 << 6)
|
|
mcreq p15, 0, r0, c1, c0, 1
|
|
isb
|
|
skip_ns_smp_enable:
|
|
#ifdef CONFIG_CACHE_L2X0
|
|
/*
|
|
* Restore the L2 AUXCTRL and enable the L2 cache.
|
|
* OMAP4_MON_L2X0_AUXCTRL_INDEX = Program the L2X0 AUXCTRL
|
|
* OMAP4_MON_L2X0_CTRL_INDEX = Enable the L2 using L2X0 CTRL
|
|
* register r0 contains value to be programmed.
|
|
* L2 cache is already invalidate by ROM code as part
|
|
* of MPUSS OFF wakeup path.
|
|
*/
|
|
ldr r2, =OMAP44XX_L2CACHE_BASE
|
|
ldr r0, [r2, #L2X0_CTRL]
|
|
and r0, #0x0f
|
|
cmp r0, #1
|
|
beq skip_l2en @ Skip if already enabled
|
|
ldr r3, =OMAP44XX_SAR_RAM_BASE
|
|
ldr r1, [r3, #OMAP_TYPE_OFFSET]
|
|
cmp r1, #0x1 @ Check for HS device
|
|
bne set_gp_por
|
|
ldr r0, =OMAP4_PPA_L2_POR_INDEX
|
|
ldr r1, =OMAP44XX_SAR_RAM_BASE
|
|
ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
|
|
adr r3, ppa_por_params
|
|
str r4, [r3, #0x04]
|
|
mov r1, #0x0 @ Process ID
|
|
mov r2, #0x4 @ Flag
|
|
mov r6, #0xff
|
|
mov r12, #0x00 @ Secure Service ID
|
|
DO_SMC
|
|
b set_aux_ctrl
|
|
set_gp_por:
|
|
ldr r1, =OMAP44XX_SAR_RAM_BASE
|
|
ldr r0, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
|
|
ldr r12, =OMAP4_MON_L2X0_PREFETCH_INDEX @ Setup L2 PREFETCH
|
|
DO_SMC
|
|
set_aux_ctrl:
|
|
ldr r1, =OMAP44XX_SAR_RAM_BASE
|
|
ldr r0, [r1, #L2X0_AUXCTRL_OFFSET]
|
|
ldr r12, =OMAP4_MON_L2X0_AUXCTRL_INDEX @ Setup L2 AUXCTRL
|
|
DO_SMC
|
|
mov r0, #0x1
|
|
ldr r12, =OMAP4_MON_L2X0_CTRL_INDEX @ Enable L2 cache
|
|
DO_SMC
|
|
skip_l2en:
|
|
#endif
|
|
|
|
b cpu_resume @ Jump to generic resume
|
|
ENDPROC(omap4_cpu_resume)
|
|
#endif /* CONFIG_ARCH_OMAP4 */
|
|
|
|
#endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */
|
|
|
|
#ifndef CONFIG_OMAP4_ERRATA_I688
|
|
ENTRY(omap_bus_sync)
|
|
ret lr
|
|
ENDPROC(omap_bus_sync)
|
|
#endif
|
|
|
|
ENTRY(omap_do_wfi)
|
|
stmfd sp!, {lr}
|
|
/* Drain interconnect write buffers. */
|
|
bl omap_bus_sync
|
|
|
|
/*
|
|
* Execute an ISB instruction to ensure that all of the
|
|
* CP15 register changes have been committed.
|
|
*/
|
|
isb
|
|
|
|
/*
|
|
* Execute a barrier instruction to ensure that all cache,
|
|
* TLB and branch predictor maintenance operations issued
|
|
* by any CPU in the cluster have completed.
|
|
*/
|
|
dsb
|
|
dmb
|
|
|
|
/*
|
|
* Execute a WFI instruction and wait until the
|
|
* STANDBYWFI output is asserted to indicate that the
|
|
* CPU is in idle and low power state. CPU can specualatively
|
|
* prefetch the instructions so add NOPs after WFI. Sixteen
|
|
* NOPs as per Cortex-A9 pipeline.
|
|
*/
|
|
wfi @ Wait For Interrupt
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
nop
|
|
|
|
ldmfd sp!, {pc}
|
|
ENDPROC(omap_do_wfi)
|