Merge branch 'for-next/tlbi' into for-next/core

* for-next/tlbi:
  : Support for TTL (translation table level) hint in the TLB operations
  arm64: tlb: Use the TLBI RANGE feature in arm64
  arm64: enable tlbi range instructions
  arm64: tlb: Detect the ARMv8.4 TLBI RANGE feature
  arm64: tlb: don't set the ttl value in flush_tlb_page_nosync
  arm64: Shift the __tlbi_level() indentation left
  arm64: tlb: Set the TTL field in flush_*_tlb_range
  arm64: tlb: Set the TTL field in flush_tlb_range
  tlb: mmu_gather: add tlb_flush_*_range APIs
  arm64: Add tlbi_user_level TLB invalidation helper
  arm64: Add level-hinted TLB invalidation helper
  arm64: Document SW reserved PTE/PMD bits in Stage-2 descriptors
  arm64: Detect the ARMv8.4 TTL feature
This commit is contained in:
Catalin Marinas 2020-07-31 18:09:50 +01:00
commit 18aa3bd58b
12 changed files with 305 additions and 33 deletions

View File

@ -1601,6 +1601,20 @@ config ARM64_AMU_EXTN
correctly reflect reality. Most commonly, the value read will be 0,
indicating that the counter is not enabled.
config AS_HAS_ARMV8_4
def_bool $(cc-option,-Wa$(comma)-march=armv8.4-a)
config ARM64_TLB_RANGE
bool "Enable support for tlbi range feature"
default y
depends on AS_HAS_ARMV8_4
help
ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a
range of input addresses.
The feature introduces new assembly instructions, and they were
support when binutils >= 2.30.
endmenu
menu "ARMv8.5 architectural features"

View File

@ -82,11 +82,18 @@ endif
# compiler to generate them and consequently to break the single image contract
# we pass it only to the assembler. This option is utilized only in case of non
# integrated assemblers.
ifneq ($(CONFIG_AS_HAS_ARMV8_4), y)
branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a
endif
endif
KBUILD_CFLAGS += $(branch-prot-flags-y)
ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
# make sure to pass the newest target architecture to -march.
KBUILD_CFLAGS += -Wa,-march=armv8.4-a
endif
ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
KBUILD_CFLAGS += -ffixed-x18
endif

View File

@ -62,7 +62,9 @@
#define ARM64_HAS_GENERIC_AUTH 52
#define ARM64_HAS_32BIT_EL1 53
#define ARM64_BTI 54
#define ARM64_HAS_ARMv8_4_TTL 55
#define ARM64_HAS_TLB_RANGE 56
#define ARM64_NCAPS 55
#define ARM64_NCAPS 57
#endif /* __ASM_CPUCAPS_H */

View File

@ -692,6 +692,12 @@ static inline bool system_supports_bti(void)
return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
}
static inline bool system_supports_tlb_range(void)
{
return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
}
#define ARM64_BP_HARDEN_UNKNOWN -1
#define ARM64_BP_HARDEN_WA_NEEDED 0
#define ARM64_BP_HARDEN_NOT_REQUIRED 1

View File

@ -178,10 +178,12 @@
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */
#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */
#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */

View File

@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pud_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
/* Set stride and tlb_level in flush_*_tlb_range */
#define flush_pmd_tlb_range(vma, addr, end) \
__flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
#define flush_pud_tlb_range(vma, addr, end) \
__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..

View File

@ -256,4 +256,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
return (boundary - 1 < end - 1) ? boundary : end;
}
/*
* Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
* the architectural page-table level.
*/
#define S2_NO_LEVEL_HINT 0
#define S2_PUD_LEVEL 1
#define S2_PMD_LEVEL 2
#define S2_PTE_LEVEL 3
#endif /* __ARM64_S2_PGTABLE_H_ */

View File

@ -617,6 +617,9 @@
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
#define ID_AA64ISAR0_TLB_RANGE_NI 0x0
#define ID_AA64ISAR0_TLB_RANGE 0x2
/* id_aa64isar1 */
#define ID_AA64ISAR1_I8MM_SHIFT 52
#define ID_AA64ISAR1_DGH_SHIFT 48

View File

@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
/*
* get the tlbi levels in arm64. Default value is 0 if more than one
* of cleared_* is set or neither is set.
* Arm64 doesn't support p4ds now.
*/
static inline int tlb_get_level(struct mmu_gather *tlb)
{
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
tlb->cleared_puds ||
tlb->cleared_p4ds))
return 3;
if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
tlb->cleared_puds ||
tlb->cleared_p4ds))
return 2;
if (tlb->cleared_puds && !(tlb->cleared_ptes ||
tlb->cleared_pmds ||
tlb->cleared_p4ds))
return 1;
return 0;
}
static inline void tlb_flush(struct mmu_gather *tlb)
{
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
bool last_level = !tlb->freed_tables;
unsigned long stride = tlb_get_unmap_size(tlb);
int tlb_level = tlb_get_level(tlb);
/*
* If we're tearing down the address space then we only care about
@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
return;
}
__flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
__flush_tlb_range(&vma, tlb->start, tlb->end, stride,
last_level, tlb_level);
}
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,

View File

@ -10,6 +10,7 @@
#ifndef __ASSEMBLY__
#include <linux/bitfield.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/cputype.h>
@ -59,6 +60,102 @@
__ta; \
})
/*
* Get translation granule of the system, which is decided by
* PAGE_SIZE. Used by TTL.
* - 4KB : 1
* - 16KB : 2
* - 64KB : 3
*/
#define TLBI_TTL_TG_4K 1
#define TLBI_TTL_TG_16K 2
#define TLBI_TTL_TG_64K 3
static inline unsigned long get_trans_granule(void)
{
switch (PAGE_SIZE) {
case SZ_4K:
return TLBI_TTL_TG_4K;
case SZ_16K:
return TLBI_TTL_TG_16K;
case SZ_64K:
return TLBI_TTL_TG_64K;
default:
return 0;
}
}
/*
* Level-based TLBI operations.
*
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
* the level at which the invalidation must take place. If the level is
* wrong, no invalidation may take place. In the case where the level
* cannot be easily determined, a 0 value for the level parameter will
* perform a non-hinted invalidation.
*
* For Stage-2 invalidation, use the level values provided to that effect
* in asm/stage2_pgtable.h.
*/
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
#define __tlbi_level(op, addr, level) do { \
u64 arg = addr; \
\
if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
level) { \
u64 ttl = level & 3; \
ttl |= get_trans_granule() << 2; \
arg &= ~TLBI_TTL_MASK; \
arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
} \
\
__tlbi(op, arg); \
} while(0)
#define __tlbi_user_level(op, arg, level) do { \
if (arm64_kernel_unmapped_at_el0()) \
__tlbi_level(op, (arg | USER_ASID_FLAG), level); \
} while (0)
/*
* This macro creates a properly formatted VA operand for the TLB RANGE.
* The value bit assignments are:
*
* +----------+------+-------+-------+-------+----------------------+
* | ASID | TG | SCALE | NUM | TTL | BADDR |
* +-----------------+-------+-------+-------+----------------------+
* |63 48|47 46|45 44|43 39|38 37|36 0|
*
* The address range is determined by below formula:
* [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
*
*/
#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
({ \
unsigned long __ta = (addr) >> PAGE_SHIFT; \
__ta &= GENMASK_ULL(36, 0); \
__ta |= (unsigned long)(ttl) << 37; \
__ta |= (unsigned long)(num) << 39; \
__ta |= (unsigned long)(scale) << 44; \
__ta |= get_trans_granule() << 46; \
__ta |= (unsigned long)(asid) << 48; \
__ta; \
})
/* These macros are used by the TLBI RANGE feature. */
#define __TLBI_RANGE_PAGES(num, scale) \
((unsigned long)((num) + 1) << (5 * (scale) + 1))
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
* Generate 'num' values from -1 to 30 with -1 rejected by the
* __flush_tlb_range() loop below.
*/
#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
#define __TLBI_RANGE_NUM(pages, scale) \
((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
/*
* TLB Invalidation
* ================
@ -179,34 +276,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level)
unsigned long stride, bool last_level,
int tlb_level)
{
int num = 0;
int scale = 0;
unsigned long asid = ASID(vma->vm_mm);
unsigned long addr;
unsigned long pages;
start = round_down(start, stride);
end = round_up(end, stride);
pages = (end - start) >> PAGE_SHIFT;
if ((end - start) >= (MAX_TLBI_OPS * stride)) {
/*
* When not uses TLB range ops, we can handle up to
* (MAX_TLBI_OPS - 1) pages;
* When uses TLB range ops, we can handle up to
* (MAX_TLBI_RANGE_PAGES - 1) pages.
*/
if ((!system_supports_tlb_range() &&
(end - start) >= (MAX_TLBI_OPS * stride)) ||
pages >= MAX_TLBI_RANGE_PAGES) {
flush_tlb_mm(vma->vm_mm);
return;
}
/* Convert the stride into units of 4k */
stride >>= 12;
start = __TLBI_VADDR(start, asid);
end = __TLBI_VADDR(end, asid);
dsb(ishst);
for (addr = start; addr < end; addr += stride) {
if (last_level) {
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
} else {
__tlbi(vae1is, addr);
__tlbi_user(vae1is, addr);
/*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the the TLB
* range ops are supported, then:
*
* 1. If 'pages' is odd, flush the first page through non-range
* operations;
*
* 2. For remaining pages: the minimum range granularity is decided
* by 'scale', so multiple range TLBI operations may be required.
* Start from scale = 0, flush the corresponding number of pages
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
* until no pages left.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
while (pages > 0) {
if (!system_supports_tlb_range() ||
pages % 2 == 1) {
addr = __TLBI_VADDR(start, asid);
if (last_level) {
__tlbi_level(vale1is, addr, tlb_level);
__tlbi_user_level(vale1is, addr, tlb_level);
} else {
__tlbi_level(vae1is, addr, tlb_level);
__tlbi_user_level(vae1is, addr, tlb_level);
}
start += stride;
pages -= stride >> PAGE_SHIFT;
continue;
}
num = __TLBI_RANGE_NUM(pages, scale);
if (num >= 0) {
addr = __TLBI_VADDR_RANGE(start, asid, scale,
num, tlb_level);
if (last_level) {
__tlbi(rvale1is, addr);
__tlbi_user(rvale1is, addr);
} else {
__tlbi(rvae1is, addr);
__tlbi_user(rvae1is, addr);
}
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
pages -= __TLBI_RANGE_PAGES(num, scale);
}
scale++;
}
dsb(ish);
}
@ -217,8 +363,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
* Set the tlb_level to 0 because we can not get enough information here.
*/
__flush_tlb_range(vma, start, end, PAGE_SIZE, false);
__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)

View File

@ -1925,6 +1925,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = cpu_has_fwb,
},
{
.desc = "ARMv8.4 Translation Table Level",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_HAS_ARMv8_4_TTL,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR2_TTL_SHIFT,
.min_field_value = 1,
.matches = has_cpuid_feature,
},
{
.desc = "TLB range maintenance instructions",
.capability = ARM64_HAS_TLB_RANGE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_TLB_SHIFT,
.sign = FTR_UNSIGNED,
.min_field_value = ID_AA64ISAR0_TLB_RANGE,
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*

View File

@ -512,6 +512,38 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
}
#endif
/*
* tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
* and set corresponding cleared_*.
*/
static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
unsigned long address, unsigned long size)
{
__tlb_adjust_range(tlb, address, size);
tlb->cleared_ptes = 1;
}
static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
unsigned long address, unsigned long size)
{
__tlb_adjust_range(tlb, address, size);
tlb->cleared_pmds = 1;
}
static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
unsigned long address, unsigned long size)
{
__tlb_adjust_range(tlb, address, size);
tlb->cleared_puds = 1;
}
static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
unsigned long address, unsigned long size)
{
__tlb_adjust_range(tlb, address, size);
tlb->cleared_p4ds = 1;
}
#ifndef __tlb_remove_tlb_entry
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#endif
@ -525,19 +557,17 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
*/
#define tlb_remove_tlb_entry(tlb, ptep, address) \
do { \
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
tlb->cleared_ptes = 1; \
tlb_flush_pte_range(tlb, address, PAGE_SIZE); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
do { \
unsigned long _sz = huge_page_size(h); \
__tlb_adjust_range(tlb, address, _sz); \
if (_sz == PMD_SIZE) \
tlb->cleared_pmds = 1; \
tlb_flush_pmd_range(tlb, address, _sz); \
else if (_sz == PUD_SIZE) \
tlb->cleared_puds = 1; \
tlb_flush_pud_range(tlb, address, _sz); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)
@ -551,8 +581,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
do { \
__tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \
tlb->cleared_pmds = 1; \
tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
} while (0)
@ -566,8 +595,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
do { \
__tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \
tlb->cleared_puds = 1; \
tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \
__tlb_remove_pud_tlb_entry(tlb, pudp, address); \
} while (0)
@ -592,9 +620,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#ifndef pte_free_tlb
#define pte_free_tlb(tlb, ptep, address) \
do { \
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \
tlb->freed_tables = 1; \
tlb->cleared_pmds = 1; \
__pte_free_tlb(tlb, ptep, address); \
} while (0)
#endif
@ -602,9 +629,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#ifndef pmd_free_tlb
#define pmd_free_tlb(tlb, pmdp, address) \
do { \
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
tlb_flush_pud_range(tlb, address, PAGE_SIZE); \
tlb->freed_tables = 1; \
tlb->cleared_puds = 1; \
__pmd_free_tlb(tlb, pmdp, address); \
} while (0)
#endif
@ -612,9 +638,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#ifndef pud_free_tlb
#define pud_free_tlb(tlb, pudp, address) \
do { \
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \
tlb->freed_tables = 1; \
tlb->cleared_p4ds = 1; \
__pud_free_tlb(tlb, pudp, address); \
} while (0)
#endif