linux_dsm_epyc7002/arch/arm64/mm/cache.S

247 lines
5.6 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Cache maintenance
*
* Copyright (C) 2001 Deep Blue Solutions Ltd.
* Copyright (C) 2012 ARM Ltd.
*/
#include <linux/errno.h>
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/assembler.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
#include <asm/asm-uaccess.h>
/*
* flush_icache_range(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
* and will be executed.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
SYM_FUNC_START(__flush_icache_range)
/* FALLTHROUGH */
/*
* __flush_cache_user_range(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
* and will be executed.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
SYM_FUNC_START(__flush_cache_user_range)
uaccess_ttbr0_enable x2, x3, x4
arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC The DCache clean & ICache invalidation requirements for instructions to be data coherence are discoverable through new fields in CTR_EL0. The following two control bits DIC and IDC were defined for this purpose. No need to perform point of unification cache maintenance operations from software on systems where CPU caches are transparent. This patch optimize the three functions __flush_cache_user_range(), clean_dcache_area_pou() and invalidate_icache_range() if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic in order to avoid the unnecessary overhead. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Co-authored-by: Philip Elcan <pelcan@codeaurora.org> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-03-07 22:00:08 +07:00
alternative_if ARM64_HAS_CACHE_IDC
dsb ishst
b 7f
alternative_else_nop_endif
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
1:
user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
add x4, x4, x2
cmp x4, x1
b.lo 1b
dsb ish
arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC The DCache clean & ICache invalidation requirements for instructions to be data coherence are discoverable through new fields in CTR_EL0. The following two control bits DIC and IDC were defined for this purpose. No need to perform point of unification cache maintenance operations from software on systems where CPU caches are transparent. This patch optimize the three functions __flush_cache_user_range(), clean_dcache_area_pou() and invalidate_icache_range() if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic in order to avoid the unnecessary overhead. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Co-authored-by: Philip Elcan <pelcan@codeaurora.org> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-03-07 22:00:08 +07:00
7:
alternative_if ARM64_HAS_CACHE_DIC
isb
b 8f
alternative_else_nop_endif
invalidate_icache_by_line x0, x1, x2, x3, 9f
arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC The DCache clean & ICache invalidation requirements for instructions to be data coherence are discoverable through new fields in CTR_EL0. The following two control bits DIC and IDC were defined for this purpose. No need to perform point of unification cache maintenance operations from software on systems where CPU caches are transparent. This patch optimize the three functions __flush_cache_user_range(), clean_dcache_area_pou() and invalidate_icache_range() if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic in order to avoid the unnecessary overhead. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Co-authored-by: Philip Elcan <pelcan@codeaurora.org> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-03-07 22:00:08 +07:00
8: mov x0, #0
1:
uaccess_ttbr0_disable x1, x2
ret
9:
mov x0, #-EFAULT
b 1b
SYM_FUNC_END(__flush_icache_range)
SYM_FUNC_END(__flush_cache_user_range)
/*
* invalidate_icache_range(start,end)
*
* Ensure that the I cache is invalid within specified region.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
SYM_FUNC_START(invalidate_icache_range)
arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC The DCache clean & ICache invalidation requirements for instructions to be data coherence are discoverable through new fields in CTR_EL0. The following two control bits DIC and IDC were defined for this purpose. No need to perform point of unification cache maintenance operations from software on systems where CPU caches are transparent. This patch optimize the three functions __flush_cache_user_range(), clean_dcache_area_pou() and invalidate_icache_range() if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic in order to avoid the unnecessary overhead. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Co-authored-by: Philip Elcan <pelcan@codeaurora.org> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-03-07 22:00:08 +07:00
alternative_if ARM64_HAS_CACHE_DIC
mov x0, xzr
isb
ret
alternative_else_nop_endif
uaccess_ttbr0_enable x2, x3, x4
invalidate_icache_by_line x0, x1, x2, x3, 2f
mov x0, xzr
1:
uaccess_ttbr0_disable x1, x2
ret
2:
mov x0, #-EFAULT
b 1b
SYM_FUNC_END(invalidate_icache_range)
/*
* __flush_dcache_area(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned and invalidated to the PoC.
*
* - kaddr - kernel address
* - size - size in question
*/
SYM_FUNC_START_PI(__flush_dcache_area)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(__flush_dcache_area)
/*
* __clean_dcache_area_pou(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned to the PoU.
*
* - kaddr - kernel address
* - size - size in question
*/
SYM_FUNC_START(__clean_dcache_area_pou)
arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC The DCache clean & ICache invalidation requirements for instructions to be data coherence are discoverable through new fields in CTR_EL0. The following two control bits DIC and IDC were defined for this purpose. No need to perform point of unification cache maintenance operations from software on systems where CPU caches are transparent. This patch optimize the three functions __flush_cache_user_range(), clean_dcache_area_pou() and invalidate_icache_range() if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic in order to avoid the unnecessary overhead. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Co-authored-by: Philip Elcan <pelcan@codeaurora.org> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-03-07 22:00:08 +07:00
alternative_if ARM64_HAS_CACHE_IDC
dsb ishst
ret
alternative_else_nop_endif
dcache_by_line_op cvau, ish, x0, x1, x2, x3
ret
SYM_FUNC_END(__clean_dcache_area_pou)
/*
* __inval_dcache_area(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are invalidated. Any partial lines at the ends of the interval are
* also cleaned to PoC to prevent data loss.
*
* - kaddr - kernel address
* - size - size in question
*/
SYM_FUNC_START_LOCAL(__dma_inv_area)
SYM_FUNC_START_PI(__inval_dcache_area)
/* FALLTHROUGH */
/*
* __dma_inv_area(start, size)
* - start - virtual start address of region
* - size - size in question
*/
add x1, x1, x0
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
bic x1, x1, x3
b.eq 1f
dc civac, x1 // clean & invalidate D / U line
1: tst x0, x3 // start cache line aligned?
bic x0, x0, x3
b.eq 2f
dc civac, x0 // clean & invalidate D / U line
b 3f
2: dc ivac, x0 // invalidate D / U line
3: add x0, x0, x2
cmp x0, x1
b.lo 2b
dsb sy
ret
SYM_FUNC_END_PI(__inval_dcache_area)
SYM_FUNC_END(__dma_inv_area)
/*
* __clean_dcache_area_poc(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned to the PoC.
*
* - kaddr - kernel address
* - size - size in question
*/
SYM_FUNC_START_LOCAL(__dma_clean_area)
SYM_FUNC_START_PI(__clean_dcache_area_poc)
/* FALLTHROUGH */
/*
* __dma_clean_area(start, size)
* - start - virtual start address of region
* - size - size in question
*/
dcache_by_line_op cvac, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(__clean_dcache_area_poc)
SYM_FUNC_END(__dma_clean_area)
/*
* __clean_dcache_area_pop(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned to the PoP.
*
* - kaddr - kernel address
* - size - size in question
*/
SYM_FUNC_START_PI(__clean_dcache_area_pop)
alternative_if_not ARM64_HAS_DCPOP
b __clean_dcache_area_poc
alternative_else_nop_endif
dcache_by_line_op cvap, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(__clean_dcache_area_pop)
/*
* __dma_flush_area(start, size)
*
* clean & invalidate D / U line
*
* - start - virtual start address of region
* - size - size in question
*/
SYM_FUNC_START_PI(__dma_flush_area)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(__dma_flush_area)
/*
* __dma_map_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
SYM_FUNC_START_PI(__dma_map_area)
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_area
b __dma_clean_area
SYM_FUNC_END_PI(__dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
* - start - kernel virtual start address
* - size - size of region
* - dir - DMA direction
*/
SYM_FUNC_START_PI(__dma_unmap_area)
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_area
ret
SYM_FUNC_END_PI(__dma_unmap_area)