From db751e309ff05461a0c8e114b1238d7a69cc1f18 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:43 +0000 Subject: [PATCH 001/148] ELF: UAPI and Kconfig additions for ELF program properties Pull the basic ELF definitions relating to the NT_GNU_PROPERTY_TYPE_0 note from Yu-Cheng Yu's earlier x86 shstk series. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Signed-off-by: Yu-cheng Yu Reviewed-by: Catalin Marinas Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- fs/Kconfig.binfmt | 3 +++ include/linux/elf.h | 12 ++++++++++++ include/uapi/linux/elf.h | 1 + 3 files changed, 16 insertions(+) diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 62dc4f577ba1..d2cfe0729a73 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_USE_GNU_PROPERTY + bool + config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" default y if !BINFMT_ELF diff --git a/include/linux/elf.h b/include/linux/elf.h index e3649b3e970e..f7b24c5fcfb6 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -2,6 +2,7 @@ #ifndef _LINUX_ELF_H #define _LINUX_ELF_H +#include #include #include @@ -56,4 +57,15 @@ static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { extern int elf_coredump_extra_notes_size(void); extern int elf_coredump_extra_notes_write(struct coredump_params *cprm); #endif + +/* + * NT_GNU_PROPERTY_TYPE_0 header: + * Keep this internal until/unless there is an agreed UAPI definition. + * pr_type values (GNU_PROPERTY_*) are public and defined in the UAPI header. + */ +struct gnu_property { + u32 pr_type; + u32 pr_datasz; +}; + #endif /* _LINUX_ELF_H */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 34c02e4290fe..c37731407074 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -36,6 +36,7 @@ typedef __s64 Elf64_Sxword; #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_PROPERTY 0x6474e553 #define PT_GNU_STACK (PT_LOOS + 0x474e551) From 00e19ceec80b03a43f626f891fcc53e57919f1b3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:44 +0000 Subject: [PATCH 002/148] ELF: Add ELF program property parsing support ELF program properties will be needed for detecting whether to enable optional architecture or ABI features for a new ELF process. For now, there are no generic properties that we care about, so do nothing unless CONFIG_ARCH_USE_GNU_PROPERTY=y. Otherwise, the presence of properties using the PT_PROGRAM_PROPERTY phdrs entry (if any), and notify each property to the arch code. For now, the added code is not used. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- fs/binfmt_elf.c | 127 +++++++++++++++++++++++++++++++++++++++ fs/compat_binfmt_elf.c | 4 ++ include/linux/elf.h | 19 ++++++ include/uapi/linux/elf.h | 4 ++ 4 files changed, 154 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f4713ea76e82..1fb67e506b68 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -39,12 +39,18 @@ #include #include #include +#include +#include #include #include #include #include #include +#ifndef ELF_COMPAT +#define ELF_COMPAT 0 +#endif + #ifndef user_long_t #define user_long_t long #endif @@ -681,6 +687,111 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, * libraries. There is no binary dependent code anywhere else. */ +static int parse_elf_property(const char *data, size_t *off, size_t datasz, + struct arch_elf_state *arch, + bool have_prev_type, u32 *prev_type) +{ + size_t o, step; + const struct gnu_property *pr; + int ret; + + if (*off == datasz) + return -ENOENT; + + if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN)) + return -EIO; + o = *off; + datasz -= *off; + + if (datasz < sizeof(*pr)) + return -ENOEXEC; + pr = (const struct gnu_property *)(data + o); + o += sizeof(*pr); + datasz -= sizeof(*pr); + + if (pr->pr_datasz > datasz) + return -ENOEXEC; + + WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN); + step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN); + if (step > datasz) + return -ENOEXEC; + + /* Properties are supposed to be unique and sorted on pr_type: */ + if (have_prev_type && pr->pr_type <= *prev_type) + return -ENOEXEC; + *prev_type = pr->pr_type; + + ret = arch_parse_elf_property(pr->pr_type, data + o, + pr->pr_datasz, ELF_COMPAT, arch); + if (ret) + return ret; + + *off = o + step; + return 0; +} + +#define NOTE_DATA_SZ SZ_1K +#define GNU_PROPERTY_TYPE_0_NAME "GNU" +#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME)) + +static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, + struct arch_elf_state *arch) +{ + union { + struct elf_note nhdr; + char data[NOTE_DATA_SZ]; + } note; + loff_t pos; + ssize_t n; + size_t off, datasz; + int ret; + bool have_prev_type; + u32 prev_type; + + if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr) + return 0; + + /* load_elf_binary() shouldn't call us unless this is true... */ + if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY)) + return -ENOEXEC; + + /* If the properties are crazy large, that's too bad (for now): */ + if (phdr->p_filesz > sizeof(note)) + return -ENOEXEC; + + pos = phdr->p_offset; + n = kernel_read(f, ¬e, phdr->p_filesz, &pos); + + BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ); + if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ) + return -EIO; + + if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 || + note.nhdr.n_namesz != NOTE_NAME_SZ || + strncmp(note.data + sizeof(note.nhdr), + GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr))) + return -ENOEXEC; + + off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ, + ELF_GNU_PROPERTY_ALIGN); + if (off > n) + return -ENOEXEC; + + if (note.nhdr.n_descsz > n - off) + return -ENOEXEC; + datasz = off + note.nhdr.n_descsz; + + have_prev_type = false; + do { + ret = parse_elf_property(note.data, &off, datasz, arch, + have_prev_type, &prev_type); + have_prev_type = true; + } while (!ret); + + return ret == -ENOENT ? 0 : ret; +} + static int load_elf_binary(struct linux_binprm *bprm) { struct file *interpreter = NULL; /* to shut gcc up */ @@ -688,6 +799,7 @@ static int load_elf_binary(struct linux_binprm *bprm) int load_addr_set = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; + struct elf_phdr *elf_property_phdata = NULL; unsigned long elf_bss, elf_brk; int bss_prot = 0; int retval, i; @@ -733,6 +845,11 @@ static int load_elf_binary(struct linux_binprm *bprm) for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) { char *elf_interpreter; + if (elf_ppnt->p_type == PT_GNU_PROPERTY) { + elf_property_phdata = elf_ppnt; + continue; + } + if (elf_ppnt->p_type != PT_INTERP) continue; @@ -820,9 +937,14 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out_free_dentry; /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ + elf_property_phdata = NULL; elf_ppnt = interp_elf_phdata; for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++) switch (elf_ppnt->p_type) { + case PT_GNU_PROPERTY: + elf_property_phdata = elf_ppnt; + break; + case PT_LOPROC ... PT_HIPROC: retval = arch_elf_pt_proc(&loc->interp_elf_ex, elf_ppnt, interpreter, @@ -833,6 +955,11 @@ static int load_elf_binary(struct linux_binprm *bprm) } } + retval = parse_elf_properties(interpreter ?: bprm->file, + elf_property_phdata, &arch_state); + if (retval) + goto out_free_dentry; + /* * Allow arch code to reject the ELF at this point, whilst it's * still possible to return an error to the code that invoked diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index aaad4ca1217e..13a087bc816b 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -17,6 +17,8 @@ #include #include +#define ELF_COMPAT 1 + /* * Rename the basic ELF layout types to refer to the 32-bit class of files. */ @@ -28,11 +30,13 @@ #undef elf_shdr #undef elf_note #undef elf_addr_t +#undef ELF_GNU_PROPERTY_ALIGN #define elfhdr elf32_hdr #define elf_phdr elf32_phdr #define elf_shdr elf32_shdr #define elf_note elf32_note #define elf_addr_t Elf32_Addr +#define ELF_GNU_PROPERTY_ALIGN ELF32_GNU_PROPERTY_ALIGN /* * Some data types as stored in coredump. diff --git a/include/linux/elf.h b/include/linux/elf.h index f7b24c5fcfb6..db5113479f5e 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -22,6 +22,9 @@ SET_PERSONALITY(ex) #endif +#define ELF32_GNU_PROPERTY_ALIGN 4 +#define ELF64_GNU_PROPERTY_ALIGN 8 + #if ELF_CLASS == ELFCLASS32 extern Elf32_Dyn _DYNAMIC []; @@ -32,6 +35,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elf_addr_t Elf32_Off #define Elf_Half Elf32_Half #define Elf_Word Elf32_Word +#define ELF_GNU_PROPERTY_ALIGN ELF32_GNU_PROPERTY_ALIGN #else @@ -43,6 +47,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elf_addr_t Elf64_Off #define Elf_Half Elf64_Half #define Elf_Word Elf64_Word +#define ELF_GNU_PROPERTY_ALIGN ELF64_GNU_PROPERTY_ALIGN #endif @@ -68,4 +73,18 @@ struct gnu_property { u32 pr_datasz; }; +struct arch_elf_state; + +#ifndef CONFIG_ARCH_USE_GNU_PROPERTY +static inline int arch_parse_elf_property(u32 type, const void *data, + size_t datasz, bool compat, + struct arch_elf_state *arch) +{ + return 0; +} +#else +extern int arch_parse_elf_property(u32 type, const void *data, size_t datasz, + bool compat, struct arch_elf_state *arch); +#endif + #endif /* _LINUX_ELF_H */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c37731407074..20900f4496b7 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -368,6 +368,7 @@ typedef struct elf64_shdr { * Notes used in ET_CORE. Architectures export some of the arch register sets * using the corresponding note types via the PTRACE_GETREGSET and * PTRACE_SETREGSET requests. + * The note name for all these is "LINUX". */ #define NT_PRSTATUS 1 #define NT_PRFPREG 2 @@ -430,6 +431,9 @@ typedef struct elf64_shdr { #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + /* Note header in a PT_NOTE section */ typedef struct elf32_note { Elf32_Word n_namesz; /* Name size */ From 8ef8f360cf30be12382f89ff48a57fbbd9b31c14 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:45 +0000 Subject: [PATCH 003/148] arm64: Basic Branch Target Identification support This patch adds the bare minimum required to expose the ARMv8.5 Branch Target Identification feature to userspace. By itself, this does _not_ automatically enable BTI for any initial executable pages mapped by execve(). This will come later, but for now it should be possible to enable BTI manually on those pages by using mprotect() from within the target process. Other arches already using the generic mman.h are already using 0x10 for arch-specific prot flags, so we use that for PROT_BTI here. For consistency, signal handler entry points in BTI guarded pages are required to be annotated as such, just like any other function. This blocks a relatively minor attack vector, but comforming userspace will have the annotations anyway, so we may as well enforce them. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 2 + Documentation/arm64/elf_hwcaps.rst | 5 +++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/include/asm/cpufeature.h | 6 +++ arch/arm64/include/asm/esr.h | 2 +- arch/arm64/include/asm/exception.h | 1 + arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/asm/mman.h | 37 +++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/include/asm/sysreg.h | 4 ++ arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/mman.h | 9 +++++ arch/arm64/include/uapi/asm/ptrace.h | 9 +++++ arch/arm64/kernel/cpufeature.c | 33 +++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/kernel/entry-common.c | 11 ++++++ arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 16 ++++++++ arch/arm64/kernel/syscall.c | 18 +++++++++ arch/arm64/kernel/traps.c | 8 ++++ include/linux/mm.h | 3 ++ 23 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/include/asm/mman.h create mode 100644 arch/arm64/include/uapi/asm/mman.h diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 41937a8091aa..314fa5bc2655 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -176,6 +176,8 @@ infrastructure: +------------------------------+---------+---------+ | SSBS | [7-4] | y | +------------------------------+---------+---------+ + | BT | [3-0] | y | + +------------------------------+---------+---------+ 4) MIDR_EL1 - Main ID Register diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 7dfb97dfe416..84a9fd2d41b4 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -236,6 +236,11 @@ HWCAP2_RNG Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001. +HWCAP2_BTI + + Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001. + + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 865e0253fc1e..58e776c22aab 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -58,7 +58,8 @@ #define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 #define ARM64_HAS_E0PD 49 #define ARM64_HAS_RNG 50 +#define ARM64_BTI 51 -#define ARM64_NCAPS 51 +#define ARM64_NCAPS 52 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92ef9539874a..e3ebcc59e83b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -613,6 +613,12 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } +static inline bool system_supports_bti(void) +{ + return IS_ENABLED(CONFIG_ARM64_BTI) && + cpus_have_const_cap(ARM64_BTI); +} + static inline bool system_capabilities_finalized(void) { return static_branch_likely(&arm64_const_caps_ready); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index cb29253ae86b..390b8ba67830 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -22,7 +22,7 @@ #define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ /* Unallocated EC: 0x0A - 0x0B */ #define ESR_ELx_EC_CP14_64 (0x0C) -/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_BTI (0x0D) #define ESR_ELx_EC_ILL (0x0E) /* Unallocated EC: 0x0F - 0x10 */ #define ESR_ELx_EC_SVC32 (0x11) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7a6e81ca23a8..7577a754d443 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); +void do_bti(struct pt_regs *regs); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 0f00265248b5..d683bcbf1e7c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -94,6 +94,7 @@ #define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16) #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) +#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h new file mode 100644 index 000000000000..081ec8de9ea6 --- /dev/null +++ b/arch/arm64/include/asm/mman.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include +#include +#include + +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, + unsigned long pkey __always_unused) +{ + if (system_supports_bti() && (prot & PROT_BTI)) + return VM_ARM64_BTI; + + return 0; +} +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) +{ + return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); +} +#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) + +static inline bool arch_validate_prot(unsigned long prot, + unsigned long addr __always_unused) +{ + unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM; + + if (system_supports_bti()) + supported |= PROT_BTI; + + return (prot & ~supported) == 0; +} +#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) + +#endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 6bf5e650da78..167f1d1d48aa 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -151,6 +151,7 @@ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_GP (_AT(pteval_t, 1) << 50) /* BTI guarded */ #define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 538c85e62f86..4fbf516d8cb2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -660,7 +660,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index bf57308fcd63..2172ec7594ba 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,6 +35,7 @@ #define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ + #define PSR_IL_BIT (1 << 20) /* AArch32-specific ptrace requests */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b91570ff9db1..db08ceb4cc9a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -514,6 +514,8 @@ #endif /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_BT1 (BIT(36)) +#define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) @@ -620,10 +622,12 @@ /* id_aa64pfr1 */ #define ID_AA64PFR1_SSBS_SHIFT 4 +#define ID_AA64PFR1_BT_SHIFT 0 #define ID_AA64PFR1_SSBS_PSTATE_NI 0 #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 +#define ID_AA64PFR1_BT_BTI 0x1 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 7752d93bb50f..2d6ba1c2592e 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -73,5 +73,6 @@ #define HWCAP2_BF16 (1 << 14) #define HWCAP2_DGH (1 << 15) #define HWCAP2_RNG (1 << 16) +#define HWCAP2_BTI (1 << 17) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h new file mode 100644 index 000000000000..6fdd71eb644f --- /dev/null +++ b/arch/arm64/include/uapi/asm/mman.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_MMAN_H +#define _UAPI__ASM_MMAN_H + +#include + +#define PROT_BTI 0x10 /* BTI guarded page */ + +#endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d1bb5b69f1ce..42cbe34d95ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_BTYPE_MASK 0x00000c00 #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 @@ -55,6 +56,8 @@ #define PSR_Z_BIT 0x40000000 #define PSR_N_BIT 0x80000000 +#define PSR_BTYPE_SHIFT 10 + /* * Groups of PSR bits */ @@ -63,6 +66,12 @@ #define PSR_x 0x0000ff00 /* Extension */ #define PSR_c 0x000000ff /* Control */ +/* Convenience names for the values of PSTATE.BTYPE */ +#define PSR_BTYPE_NONE (0b00 << PSR_BTYPE_SHIFT) +#define PSR_BTYPE_JC (0b01 << PSR_BTYPE_SHIFT) +#define PSR_BTYPE_C (0b10 << PSR_BTYPE_SHIFT) +#define PSR_BTYPE_J (0b11 << PSR_BTYPE_SHIFT) + /* syscall emulation path in ptrace */ #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0b6715625cf6..e6d31776e49b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -179,6 +179,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1347,6 +1349,21 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, } #endif +#ifdef CONFIG_ARM64_BTI +static void bti_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Use of X16/X17 for tail-calls and trampolines that jump to + * function entry points using BR is a requirement for + * marking binaries with GNU_PROPERTY_AARCH64_FEATURE_1_BTI. + * So, be strict and forbid other BRs using other registers to + * jump onto a PACIxSP instruction: + */ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); + isb(); +} +#endif /* CONFIG_ARM64_BTI */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1671,6 +1688,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 1, }, +#endif +#ifdef CONFIG_ARM64_BTI + { + .desc = "Branch Target Identification", + .capability = ARM64_BTI, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = bti_enable, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_BT_SHIFT, + .min_field_value = ID_AA64PFR1_BT_BTI, + .sign = FTR_UNSIGNED, + }, #endif {}, }; @@ -1781,6 +1811,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), +#ifdef CONFIG_ARM64_BTI + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), +#endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 86136075ae41..5e47e93b5dc1 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -92,6 +92,7 @@ static const char *const hwcap_str[] = { "bf16", "dgh", "rng", + "bti", NULL }; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index fde59981445c..55ec0627f5a7 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -188,6 +188,14 @@ static void notrace el0_undef(struct pt_regs *regs) } NOKPROBE_SYMBOL(el0_undef); +static void notrace el0_bti(struct pt_regs *regs) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_bti(regs); +} +NOKPROBE_SYMBOL(el0_bti); + static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); @@ -255,6 +263,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_UNKNOWN: el0_undef(regs); break; + case ESR_ELx_EC_BTI: + el0_bti(regs); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index cd6e5fa48b9c..fd8ac7cf68e7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1874,7 +1874,7 @@ void syscall_trace_exit(struct pt_regs *regs) */ #define SPSR_EL1_AARCH64_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ - GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5)) + GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 339882db5a91..801d56cdf701 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -732,6 +732,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->regs[29] = (unsigned long)&user->next_frame->fp; regs->pc = (unsigned long)ka->sa.sa_handler; + /* + * Signal delivery is a (wacky) indirect function call in + * userspace, so simulate the same setting of BTYPE as a BLR + * . + * Signal delivery to a location in a PROT_BTI guarded page + * that is not a function entry point will now trigger a + * SIGILL in userspace. + * + * If the signal handler entry point is not in a PROT_BTI + * guarded page, this is harmless. + */ + if (system_supports_bti()) { + regs->pstate &= ~PSR_BTYPE_MASK; + regs->pstate |= PSR_BTYPE_C; + } + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index a12c0c88d345..5f5b868292f5 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -98,6 +98,24 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, regs->orig_x0 = regs->regs[0]; regs->syscallno = scno; + /* + * BTI note: + * The architecture does not guarantee that SPSR.BTYPE is zero + * on taking an SVC, so we could return to userspace with a + * non-zero BTYPE after the syscall. + * + * This shouldn't matter except when userspace is explicitly + * doing something stupid, such as setting PROT_BTI on a page + * that lacks conforming BTI/PACIxSP instructions, falling + * through from one executable page to another with differing + * PROT_BTI, or messing with BTYPE via ptrace: in such cases, + * userspace should not be surprised if a SIGILL occurs on + * syscall return. + * + * So, don't touch regs->pstate & PSR_BTYPE_MASK here. + * (Similarly for HVC and SMC elsewhere.) + */ + cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); user_exit(); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index cf402be5c573..b8c714dda851 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -411,6 +411,13 @@ void do_undefinstr(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_undefinstr); +void do_bti(struct pt_regs *regs) +{ + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} +NOKPROBE_SYMBOL(do_bti); + #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -753,6 +760,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", [ESR_ELx_EC_PAC] = "PAC", [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", + [ESR_ELx_EC_BTI] = "BTI", [ESR_ELx_EC_ILL] = "PSTATE.IL", [ESR_ELx_EC_SVC32] = "SVC (AArch32)", [ESR_ELx_EC_HVC32] = "HVC (AArch32)", diff --git a/include/linux/mm.h b/include/linux/mm.h index 52269e56c514..9e5fce1b2099 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -324,6 +324,9 @@ extern unsigned int kobjsize(const void *objp); #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI +#elif defined(CONFIG_ARM64) +# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */ +# define VM_ARCH_CLEAR VM_ARM64_BTI #elif !defined(CONFIG_MMU) # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif From fe0f67660ee9c99408be5261ae045f8b41953b05 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:46 +0000 Subject: [PATCH 004/148] elf: Allow arch to tweak initial mmap prot flags An arch may want to tweak the mmap prot flags for an ELFexecutable's initial mappings. For example, arm64 is going to need to add PROT_BTI for executable pages in an ELF process whose executable is marked as using Branch Target Identification (an ARMv8.5-A control flow integrity feature). So that this can be done in a generic way, add a hook arch_elf_adjust_prot() to modify the prot flags as desired: arches can select CONFIG_HAVE_ELF_PROT and implement their own backend where necessary. By default, leave the prot flags unchanged. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- fs/Kconfig.binfmt | 3 +++ fs/binfmt_elf.c | 18 ++++++++++++------ include/linux/elf.h | 12 ++++++++++++ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index d2cfe0729a73..2358368319b8 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_HAVE_ELF_PROT + bool + config ARCH_USE_GNU_PROPERTY bool diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1fb67e506b68..cceb29d6ef1d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -544,7 +544,8 @@ static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp, #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ -static inline int make_prot(u32 p_flags) +static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state, + bool has_interp, bool is_interp) { int prot = 0; @@ -554,7 +555,8 @@ static inline int make_prot(u32 p_flags) prot |= PROT_WRITE; if (p_flags & PF_X) prot |= PROT_EXEC; - return prot; + + return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); } /* This is much more generalized than the library routine read function, @@ -564,7 +566,8 @@ static inline int make_prot(u32 p_flags) static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, struct file *interpreter, - unsigned long no_base, struct elf_phdr *interp_elf_phdata) + unsigned long no_base, struct elf_phdr *interp_elf_phdata, + struct arch_elf_state *arch_state) { struct elf_phdr *eppnt; unsigned long load_addr = 0; @@ -596,7 +599,8 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { if (eppnt->p_type == PT_LOAD) { int elf_type = MAP_PRIVATE | MAP_DENYWRITE; - int elf_prot = make_prot(eppnt->p_flags); + int elf_prot = make_prot(eppnt->p_flags, arch_state, + true, true); unsigned long vaddr = 0; unsigned long k, map_addr; @@ -1041,7 +1045,8 @@ static int load_elf_binary(struct linux_binprm *bprm) } } - elf_prot = make_prot(elf_ppnt->p_flags); + elf_prot = make_prot(elf_ppnt->p_flags, &arch_state, + !!interpreter, false); elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; @@ -1184,7 +1189,8 @@ static int load_elf_binary(struct linux_binprm *bprm) if (interpreter) { elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, - load_bias, interp_elf_phdata); + load_bias, interp_elf_phdata, + &arch_state); if (!IS_ERR((void *)elf_entry)) { /* * load_elf_interp() returns relocation diff --git a/include/linux/elf.h b/include/linux/elf.h index db5113479f5e..5d5b0321da0b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -87,4 +87,16 @@ extern int arch_parse_elf_property(u32 type, const void *data, size_t datasz, bool compat, struct arch_elf_state *arch); #endif +#ifdef CONFIG_ARCH_HAVE_ELF_PROT +int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, + bool has_interp, bool is_interp); +#else +static inline int arch_elf_adjust_prot(int prot, + const struct arch_elf_state *state, + bool has_interp, bool is_interp) +{ + return prot; +} +#endif + #endif /* _LINUX_ELF_H */ From ab7876a98a2160092133de4c648e94b18bc3f139 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:47 +0000 Subject: [PATCH 005/148] arm64: elf: Enable BTI at exec based on ELF program properties For BTI protection to be as comprehensive as possible, it is desirable to have BTI enabled from process startup. If this is not done, the process must use mprotect() to enable BTI for each of its executable mappings, but this is painful to do in the libc startup code. It's simpler and more sound to have the kernel do it instead. To this end, detect BTI support in the executable (or ELF interpreter, as appropriate), via the NT_GNU_PROGRAM_PROPERTY_TYPE_0 note, and tweak the initial prot flags for the process' executable pages to include PROT_BTI as appropriate. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 3 +++ arch/arm64/include/asm/elf.h | 50 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 19 ++++++++++++++ include/uapi/linux/elf.h | 6 +++++ 4 files changed, 78 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0b30e884e088..8a15bc68dadd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -9,6 +9,7 @@ config ARM64 select ACPI_MCFG if (ACPI && PCI) select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI + select ARCH_BINFMT_ELF_STATE select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -33,6 +34,7 @@ config ARM64 select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_ELF_PROT select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION @@ -62,6 +64,7 @@ config ARM64 select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_GNU_PROPERTY if BINFMT_ELF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b618017205a3..4f00d50585a4 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -114,7 +114,11 @@ #ifndef __ASSEMBLY__ +#include #include +#include +#include +#include #include /* for signal_minsigstksz, used by ARCH_DLINFO */ typedef unsigned long elf_greg_t; @@ -224,6 +228,52 @@ extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, #endif /* CONFIG_COMPAT */ +struct arch_elf_state { + int flags; +}; + +#define ARM64_ELF_BTI (1 << 0) + +#define INIT_ARCH_ELF_STATE { \ + .flags = 0, \ +} + +static inline int arch_parse_elf_property(u32 type, const void *data, + size_t datasz, bool compat, + struct arch_elf_state *arch) +{ + /* No known properties for AArch32 yet */ + if (IS_ENABLED(CONFIG_COMPAT) && compat) + return 0; + + if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) { + const u32 *p = data; + + if (datasz != sizeof(*p)) + return -ENOEXEC; + + if (system_supports_bti() && + (*p & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) + arch->flags |= ARM64_ELF_BTI; + } + + return 0; +} + +static inline int arch_elf_pt_proc(void *ehdr, void *phdr, + struct file *f, bool is_interp, + struct arch_elf_state *state) +{ + return 0; +} + +static inline int arch_check_elf(void *ehdr, bool has_interp, + void *interp_ehdr, + struct arch_elf_state *state) +{ + return 0; +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 00626057a384..b8e3faa8d406 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -654,3 +656,20 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void) if (system_capabilities_finalized()) preempt_schedule_irq(); } + +#ifdef CONFIG_BINFMT_ELF +int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, + bool has_interp, bool is_interp) +{ + if (is_interp != has_interp) + return prot; + + if (!(state->flags & ARM64_ELF_BTI)) + return prot; + + if (prot & PROT_EXEC) + prot |= PROT_BTI; + + return prot; +} +#endif diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 20900f4496b7..c6dd0215482e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -448,4 +448,10 @@ typedef struct elf64_note { Elf64_Word n_type; /* Content type */ } Elf64_Nhdr; +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + #endif /* _UAPI_LINUX_ELF_H */ From ec94a46ee7ac999b0f10f7772c40aed3f604831b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:48 +0000 Subject: [PATCH 006/148] arm64: BTI: Decode BYTPE bits when printing PSTATE The current code to print PSTATE symbolically when generating backtraces etc., does not include the BYTPE field used by Branch Target Identification. So, decode BYTPE and print it too. In the interests of human-readability, print the classes of BTI matched. The symbolic notation, BYTPE (PSTATE[11:10]) and permitted classes of subsequent instruction are: -- (BTYPE=0b00): any insn jc (BTYPE=0b01): BTI jc, BTI j, BTI c, PACIxSP -c (BYTPE=0b10): BTI jc, BTI c, PACIxSP j- (BTYPE=0b11): BTI jc, BTI j Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b8e3faa8d406..24af13d7bde6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -211,6 +211,15 @@ void machine_restart(char *cmd) while (1); } +#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str +static const char *const btypes[] = { + bstr(NONE, "--"), + bstr( JC, "jc"), + bstr( C, "-c"), + bstr( J , "j-") +}; +#undef bstr + static void print_pstate(struct pt_regs *regs) { u64 pstate = regs->pstate; @@ -229,7 +238,10 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_AA32_I_BIT ? 'I' : 'i', pstate & PSR_AA32_F_BIT ? 'F' : 'f'); } else { - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n", + const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> + PSR_BTYPE_SHIFT]; + + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -240,7 +252,8 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_I_BIT ? 'I' : 'i', pstate & PSR_F_BIT ? 'F' : 'f', pstate & PSR_PAN_BIT ? '+' : '-', - pstate & PSR_UAO_BIT ? '+' : '-'); + pstate & PSR_UAO_BIT ? '+' : '-', + btype_str); } } From 172a797661d95873c4af528c497cb5e1dfa8b91f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:49 +0000 Subject: [PATCH 007/148] arm64: unify native/compat instruction skipping Skipping of an instruction on AArch32 works a bit differently from AArch64, mainly due to the different CPSR/PSTATE semantics. Currently arm64_skip_faulting_instruction() is only suitable for AArch64, and arm64_compat_skip_faulting_instruction() handles the IT state machine but is local to traps.c. Since manual instruction skipping implies a trap, it's a relatively slow path. So, make arm64_skip_faulting_instruction() handle both compat and native, and get rid of the arm64_compat_skip_faulting_instruction() special case. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b8c714dda851..bc9f4292bfc3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -272,6 +272,8 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, } } +static void advance_itstate(struct pt_regs *regs); + void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) { regs->pc += size; @@ -282,6 +284,9 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) */ if (user_mode(regs)) user_fastforward_single_step(current); + + if (regs->pstate & PSR_MODE32_BIT) + advance_itstate(regs); } static LIST_HEAD(undef_hook); @@ -644,19 +649,12 @@ static void advance_itstate(struct pt_regs *regs) compat_set_it_state(regs, it); } -static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, - unsigned int sz) -{ - advance_itstate(regs); - arm64_skip_faulting_instruction(regs, sz); -} - static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; pt_regs_write_reg(regs, reg, arch_timer_get_rate()); - arm64_compat_skip_faulting_instruction(regs, 4); + arm64_skip_faulting_instruction(regs, 4); } static const struct sys64_hook cp15_32_hooks[] = { @@ -676,7 +674,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) pt_regs_write_reg(regs, rt, lower_32_bits(val)); pt_regs_write_reg(regs, rt2, upper_32_bits(val)); - arm64_compat_skip_faulting_instruction(regs, 4); + arm64_skip_faulting_instruction(regs, 4); } static const struct sys64_hook cp15_64_hooks[] = { @@ -697,7 +695,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs) * There is no T16 variant of a CP access, so we * always advance PC by 4 bytes. */ - arm64_compat_skip_faulting_instruction(regs, 4); + arm64_skip_faulting_instruction(regs, 4); return; } From d2c2ee4cc33bea814eb6739f14d8cb6a4b5265bb Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:50 +0000 Subject: [PATCH 008/148] arm64: traps: Shuffle code to eliminate forward declarations Hoist the IT state handling code earlier in traps.c, to avoid accumulating forward declarations. No functional change. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 107 ++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bc9f4292bfc3..3c986c8ca204 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -272,7 +272,60 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, } } -static void advance_itstate(struct pt_regs *regs); +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} +#else +static void advance_itstate(struct pt_regs *regs) +{ +} +#endif void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) { @@ -285,7 +338,7 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) if (user_mode(regs)) user_fastforward_single_step(current); - if (regs->pstate & PSR_MODE32_BIT) + if (compat_user_mode(regs)) advance_itstate(regs); } @@ -578,34 +631,7 @@ static const struct sys64_hook sys64_hooks[] = { {}, }; - #ifdef CONFIG_COMPAT -#define PSTATE_IT_1_0_SHIFT 25 -#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) -#define PSTATE_IT_7_2_SHIFT 10 -#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) - -static u32 compat_get_it_state(struct pt_regs *regs) -{ - u32 it, pstate = regs->pstate; - - it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; - it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; - - return it; -} - -static void compat_set_it_state(struct pt_regs *regs, u32 it) -{ - u32 pstate_it; - - pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; - pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; - - regs->pstate &= ~PSR_AA32_IT_MASK; - regs->pstate |= pstate_it; -} - static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) { int cond; @@ -626,29 +652,6 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) return aarch32_opcode_cond_checks[cond](regs->pstate); } -static void advance_itstate(struct pt_regs *regs) -{ - u32 it; - - /* ARM mode */ - if (!(regs->pstate & PSR_AA32_T_BIT) || - !(regs->pstate & PSR_AA32_IT_MASK)) - return; - - it = compat_get_it_state(regs); - - /* - * If this is the last instruction of the block, wipe the IT - * state. Otherwise advance it. - */ - if (!(it & 7)) - it = 0; - else - it = (it & 0xe0) | ((it << 1) & 0x1f); - - compat_set_it_state(regs, it); -} - static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; From 0537c4cd71e3c729c278c82f5b088460fb66fc33 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:51 +0000 Subject: [PATCH 009/148] arm64: BTI: Reset BTYPE when skipping emulated instructions Since normal execution of any non-branch instruction resets the PSTATE BTYPE field to 0, so do the same thing when emulating a trapped instruction. Branches don't trap directly, so we should never need to assign a non-zero value to BTYPE here. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3c986c8ca204..10d6451b2776 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -340,6 +340,8 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) if (compat_user_mode(regs)) advance_itstate(regs); + else + regs->pstate &= ~PSR_BTYPE_MASK; } static LIST_HEAD(undef_hook); From 30685d789c48f27f97f6ecde6185b606a6c7abf6 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:52 +0000 Subject: [PATCH 010/148] KVM: arm64: BTI: Reset BTYPE when skipping emulated instructions Since normal execution of any non-branch instruction resets the PSTATE BTYPE field to 0, so do the same thing when emulating a trapped instruction. Branches don't trap directly, so we should never need to assign a non-zero value to BTYPE here. Signed-off-by: Mark Brown Signed-off-by: Dave Martin Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_emulate.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 688c63412cc2..dee51c1dcb93 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -506,10 +506,12 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { kvm_skip_instr32(vcpu, is_wide_instr); - else + } else { *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } /* advance the singlestep state machine */ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; From de48bb369242fe17022fab3a2addc9c6dacad43f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Mar 2020 16:50:53 +0000 Subject: [PATCH 011/148] arm64: mm: Display guarded pages in ptdump v8.5-BTI introduces the GP field in stage 1 translation tables which indicates that blocks and pages with it set are guarded pages for which branch target identification checks should be performed. Decode this when dumping the page tables to aid debugging. Signed-off-by: Mark Brown Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 860c00ec8bd3..78163b7a7dde 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -145,6 +145,11 @@ static const struct prot_bits pte_bits[] = { .val = PTE_UXN, .set = "UXN", .clear = " ", + }, { + .mask = PTE_GP, + .val = PTE_GP, + .set = "GP", + .clear = " ", }, { .mask = PTE_ATTRINDX_MASK, .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), From 424037b77519d1537872442ba144dda1464988d7 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Mon, 16 Mar 2020 16:50:54 +0000 Subject: [PATCH 012/148] mm: smaps: Report arm64 guarded pages in smaps The arm64 Branch Target Identification support is activated by marking executable pages as guarded pages. Report pages mapped this way in smaps to aid diagnostics. Signed-off-by: Mark Brown Signed-off-by: Daniel Kiss Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- Documentation/filesystems/proc.txt | 1 + fs/proc/task_mmu.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 99ca040e3f90..ed5465d0f435 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -519,6 +519,7 @@ manner. The codes are the following: hg - huge page advise flag nh - no-huge page advise flag mg - mergable advise flag + bt - arm64 BTI guarded page Note that there is no guarantee that every flag and associated mnemonic will be present in all further kernel releases. Things get changed, the flags may diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3ba9ae83bff5..1e3409c484d1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -668,6 +668,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_DONTDUMP)] = "dd", +#ifdef CONFIG_ARM64_BTI + [ilog2(VM_ARM64_BTI)] = "bt", +#endif #ifdef CONFIG_MEM_SOFT_DIRTY [ilog2(VM_SOFTDIRTY)] = "sd", #endif From 383499f8863ea8ce869ba62cec91d9f709c9459b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 16 Mar 2020 16:50:55 +0000 Subject: [PATCH 013/148] arm64: BTI: Add Kconfig entry for userspace BTI Now that the code for userspace BTI support is in the kernel add the Kconfig entry so that it can be built and used. [Split out of "arm64: Basic Branch Target Identification support" -- broonie] Signed-off-by: Mark Brown Signed-off-by: Dave Martin Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8a15bc68dadd..d65d226a77ec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1522,6 +1522,28 @@ endmenu menu "ARMv8.5 architectural features" +config ARM64_BTI + bool "Branch Target Identification support" + default y + help + Branch Target Identification (part of the ARMv8.5 Extensions) + provides a mechanism to limit the set of locations to which computed + branch instructions such as BR or BLR can jump. + + To make use of BTI on CPUs that support it, say Y. + + BTI is intended to provide complementary protection to other control + flow integrity protection mechanisms, such as the Pointer + authentication mechanism provided as part of the ARMv8.3 Extensions. + For this reason, it does not make sense to enable this option without + also enabling support for pointer authentication. Thus, when + enabling this option you should also select ARM64_PTR_AUTH=y. + + Userspace binaries must also be specifically compiled to make use of + this mechanism. If you say N here or the hardware does not support + BTI, such binaries can still run, but you get no additional + enforcement of branch destinations. + config ARM64_E0PD bool "Enable support for E0PD" default y From bf7f15c585d5b9b843e662aa78f9fc71037db968 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Mar 2020 08:28:31 +0000 Subject: [PATCH 014/148] arm64: elf: Fix allnoconfig kernel build with !ARCH_USE_GNU_PROPERTY Commit ab7876a98a21 ("arm64: elf: Enable BTI at exec based on ELF program properties") introduced the conditional selection of ARCH_USE_GNU_PROPERTY if BINFMT_ELF is enabled. With allnoconfig, this option is no longer selected and the arm64 arch_parse_elf_property() function clashes with the generic dummy implementation. Link: http://lkml.kernel.org/r/20200318082830.GA31312@willie-the-truck Fixes: ab7876a98a21 ("arm64: elf: Enable BTI at exec based on ELF program properties") Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d65d226a77ec..53c77711f752 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -64,7 +64,7 @@ config ARM64 select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_USE_GNU_PROPERTY if BINFMT_ELF + select ARCH_USE_GNU_PROPERTY select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE From 5d1b631c773ffbbadcbb3176a2ae0ea9d1c114c7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 23 Mar 2020 17:01:19 +0000 Subject: [PATCH 015/148] arm64: bti: Document behaviour for dynamically linked binaries For dynamically linked binaries the interpreter is responsible for setting PROT_BTI on everything except itself. The dynamic linker needs to be aware of PROT_BTI, for example in order to avoid dropping that when marking executable pages read only after doing relocations, and doing everything in userspace ensures that we don't get any issues due to divergences in behaviour between the kernel and dynamic linker within a single executable. Add a comment indicating that this is intentional to the code to help people trying to understand what's going on. Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 24af13d7bde6..127aee478433 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -674,6 +674,11 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void) int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, bool has_interp, bool is_interp) { + /* + * For dynamically linked executables the interpreter is + * responsible for setting PROT_BTI on everything except + * itself. + */ if (is_interp != has_interp) return prot; From 593309423cbad0fab659a685834416cf12d8f581 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Apr 2020 01:33:05 +0200 Subject: [PATCH 016/148] x86/32: Remove CONFIG_DOUBLEFAULT Make the doublefault exception handler unconditional on 32-bit. Yes, it is important to be able to catch #DF exceptions instead of silent reboots. Yes, the code size increase is worth every byte. And one less CONFIG symbol is just the cherry on top. No functional changes. Signed-off-by: Borislav Petkov Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200404083646.8897-1-bp@alien8.de --- arch/x86/Kconfig.debug | 9 --------- arch/x86/entry/entry_32.S | 2 -- arch/x86/include/asm/doublefault.h | 2 +- arch/x86/include/asm/traps.h | 2 -- arch/x86/kernel/Makefile | 4 +--- arch/x86/kernel/dumpstack_32.c | 4 ---- arch/x86/kernel/traps.c | 2 -- arch/x86/mm/cpu_entry_area.c | 4 +--- tools/testing/selftests/wireguard/qemu/debug.config | 1 - 9 files changed, 3 insertions(+), 27 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2e74690b028a..f909d3ce36e6 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -99,15 +99,6 @@ config DEBUG_WX If in doubt, say "Y". -config DOUBLEFAULT - default y - bool "Enable doublefault exception handler" if EXPERT && X86_32 - ---help--- - This option allows trapping of rare doublefault exceptions that - would otherwise cause a system to silently reboot. Disabling this - option saves about 4k and might cause you much additional grey - hair. - config DEBUG_TLBFLUSH bool "Set upper limit of TLB entries to flush one-by-one" depends on DEBUG_KERNEL diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b67bae7091d7..5c9c7eee6325 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1536,7 +1536,6 @@ SYM_CODE_START(debug) jmp common_exception SYM_CODE_END(debug) -#ifdef CONFIG_DOUBLEFAULT SYM_CODE_START(double_fault) 1: /* @@ -1576,7 +1575,6 @@ SYM_CODE_START(double_fault) hlt jmp 1b SYM_CODE_END(double_fault) -#endif /* * NMI is doubly nasty. It can happen on the first instruction of diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h index af9a14ac8962..54a6e4a2e132 100644 --- a/arch/x86/include/asm/doublefault.h +++ b/arch/x86/include/asm/doublefault.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_DOUBLEFAULT_H #define _ASM_X86_DOUBLEFAULT_H -#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +#ifdef CONFIG_X86_32 extern void doublefault_init_cpu_tss(void); #else static inline void doublefault_init_cpu_tss(void) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c26a7e1d8a2c..70bd0f356e5d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -69,9 +69,7 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); -#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -#endif dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ba89cabe5fcf..2a7c3afa62e2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -102,9 +102,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o -ifeq ($(CONFIG_X86_32),y) -obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o -endif +obj-$(CONFIG_X86_32) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 8e3a8fedfa4d..722fd712e1cf 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -87,7 +87,6 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info) { -#ifdef CONFIG_DOUBLEFAULT struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id()); struct doublefault_stack *ss = &cea->doublefault_stack; @@ -103,9 +102,6 @@ static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info) info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp); return true; -#else - return false; -#endif } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d54cffdc7cac..e85561fc0dc8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -326,7 +326,6 @@ __visible void __noreturn handle_stack_overflow(const char *message, } #endif -#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) /* * Runs on an IST stack for x86_64 and on a special task stack for x86_32. * @@ -450,7 +449,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign die("double fault", regs, error_code); panic("Machine halted."); } -#endif dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 56f9189bbadb..5199d8a1daf1 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -17,7 +17,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); #endif -#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +#ifdef CONFIG_X86_32 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); #endif @@ -114,12 +114,10 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) #else static inline void percpu_setup_exception_stacks(unsigned int cpu) { -#ifdef CONFIG_DOUBLEFAULT struct cpu_entry_area *cea = get_cpu_entry_area(cpu); cea_map_percpu_pages(&cea->doublefault_stack, &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL); -#endif } #endif diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 5909e7ef2a5c..807fa7dc60b8 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -58,7 +58,6 @@ CONFIG_RCU_EQS_DEBUG=y CONFIG_USER_STACKTRACE_SUPPORT=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DOUBLEFAULT=y CONFIG_X86_DEBUG_FPU=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_DEBUG_PAGEALLOC=y From 2ce0d7f9766f0e49bb54f149c77bae89464932fb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 16 Apr 2020 19:24:02 +0100 Subject: [PATCH 017/148] x86/asm: Provide a Kconfig symbol for disabling old assembly annotations As x86 was converted to use the modern SYM_ annotations for assembly, ifdefs were added to remove the generic definitions of the old style annotations on x86. Rather than collect a list of architectures in the ifdefs as more architectures are converted over, provide a Kconfig symbol for this and update x86 to use it. Signed-off-by: Mark Brown Signed-off-by: Borislav Petkov Acked-by: Jiri Slaby Link: https://lkml.kernel.org/r/20200416182402.6206-1-broonie@kernel.org --- arch/x86/Kconfig | 1 + include/linux/linkage.h | 8 ++++---- lib/Kconfig | 3 +++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d6104ea8af0..e3d22edfd70a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,6 +91,7 @@ config X86 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_USE_SYM_ANNOTATIONS select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 9280209d1f62..d796ec20d114 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -105,7 +105,7 @@ /* === DEPRECATED annotations === */ -#ifndef CONFIG_X86 +#ifndef CONFIG_ARCH_USE_SYM_ANNOTATIONS #ifndef GLOBAL /* deprecated, use SYM_DATA*, SYM_ENTRY, or similar */ #define GLOBAL(name) \ @@ -118,10 +118,10 @@ #define ENTRY(name) \ SYM_FUNC_START(name) #endif -#endif /* CONFIG_X86 */ +#endif /* CONFIG_ARCH_USE_SYM_ANNOTATIONS */ #endif /* LINKER_SCRIPT */ -#ifndef CONFIG_X86 +#ifndef CONFIG_ARCH_USE_SYM_ANNOTATIONS #ifndef WEAK /* deprecated, use SYM_FUNC_START_WEAK* */ #define WEAK(name) \ @@ -143,7 +143,7 @@ #define ENDPROC(name) \ SYM_FUNC_END(name) #endif -#endif /* CONFIG_X86 */ +#endif /* CONFIG_ARCH_USE_SYM_ANNOTATIONS */ /* === generic annotations === */ diff --git a/lib/Kconfig b/lib/Kconfig index 5d53f9609c25..e831e1f01767 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -80,6 +80,9 @@ config ARCH_USE_CMPXCHG_LOCKREF config ARCH_HAS_FAST_MULTIPLIER bool +config ARCH_USE_SYM_ANNOTATIONS + bool + config INDIRECT_PIO bool "Access I/O in non-MMIO mode" depends on ARM64 From d0055da5266acd316b8cb22b6086b8f9ac2d72cb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 23 Apr 2020 11:16:05 +0100 Subject: [PATCH 018/148] arm64: remove ptrauth_keys_install_kernel sync arg The 'sync' argument to ptrauth_keys_install_kernel macro is somewhat opaque at callsites, so instead lets have regular and _nosync variants of the macro to make this a little more obvious. Signed-off-by: Mark Rutland Cc: Amit Daniel Kachhap Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200423101606.37601-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm_pointer_auth.h | 21 ++++++++++++++++----- arch/arm64/kernel/entry.S | 4 ++-- arch/arm64/mm/proc.S | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index ce2a8486992b..c85540a911d3 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -39,16 +39,24 @@ alternative_if ARM64_HAS_GENERIC_AUTH alternative_else_nop_endif .endm - .macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3 -alternative_if ARM64_HAS_ADDRESS_AUTH + .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 mov \tmp1, #THREAD_KEYS_KERNEL add \tmp1, \tsk, \tmp1 ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_KERNEL_KEY_APIA] msr_s SYS_APIAKEYLO_EL1, \tmp2 msr_s SYS_APIAKEYHI_EL1, \tmp3 - .if \sync == 1 + .endm + + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 +alternative_if ARM64_HAS_ADDRESS_AUTH + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 +alternative_else_nop_endif + .endm + + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 +alternative_if ARM64_HAS_ADDRESS_AUTH + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 isb - .endif alternative_else_nop_endif .endm @@ -57,7 +65,10 @@ alternative_else_nop_endif .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 .endm - .macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3 + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 + .endm + + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ddcde093c433..a9265fef914f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -178,7 +178,7 @@ alternative_cb_end apply_ssbd 1, x22, x23 - ptrauth_keys_install_kernel tsk, 1, x20, x22, x23 + ptrauth_keys_install_kernel tsk, x20, x22, x23 .else add x21, sp, #S_FRAME_SIZE get_current_task tsk @@ -900,7 +900,7 @@ SYM_FUNC_START(cpu_switch_to) ldr lr, [x8] mov sp, x9 msr sp_el0, x1 - ptrauth_keys_install_kernel x1, 1, x8, x9, x10 + ptrauth_keys_install_kernel x1, x8, x9, x10 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 197a9ba2d5ea..0401342741e6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -139,7 +139,7 @@ alternative_if ARM64_HAS_RAS_EXTN msr_s SYS_DISR_EL1, xzr alternative_else_nop_endif - ptrauth_keys_install_kernel x14, 0, x1, x2, x3 + ptrauth_keys_install_kernel_nosync x14, x1, x2, x3 isb ret SYM_FUNC_END(cpu_do_resume) From 62a679cb2825488387f458c16dff32be41eb3d32 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 23 Apr 2020 11:16:06 +0100 Subject: [PATCH 019/148] arm64: simplify ptrauth initialization Currently __cpu_setup conditionally initializes the address authentication keys and enables them in SCTLR_EL1, doing so differently for the primary CPU and secondary CPUs, and skipping this work for CPUs returning from an idle state. For the latter case, cpu_do_resume restores the keys and SCTLR_EL1 value after the MMU has been enabled. This flow is rather difficult to follow, so instead let's move the primary and secondary CPU initialization into their respective boot paths. By following the example of cpu_do_resume and doing so once the MMU is enabled, we can always initialize the keys from the values in thread_struct, and avoid the machinery necessary to pass the keys in secondary_data or open-coding initialization for the boot CPU. This means we perform an additional RMW of SCTLR_EL1, but we already do this in the cpu_do_resume path, and for other features in cpufeature.c, so this isn't a major concern in a bringup path. Note that even while the enable bits are clear, the key registers are accessible. As this now renders the argument to __cpu_setup redundant, let's also remove that entirely. Future extensions can follow a similar approach to initialize values that differ for primary/secondary CPUs. Signed-off-by: Mark Rutland Tested-by: Amit Daniel Kachhap Reviewed-by: Amit Daniel Kachhap Cc: Amit Daniel Kachhap Cc: Catalin Marinas Cc: James Morse Cc: Suzuki K Poulose Cc: Will Deacon Link: https://lore.kernel.org/r/20200423101606.37601-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm_pointer_auth.h | 22 ++++++++++++ arch/arm64/include/asm/smp.h | 11 ------ arch/arm64/kernel/asm-offsets.c | 3 -- arch/arm64/kernel/head.S | 12 +++++-- arch/arm64/kernel/sleep.S | 1 - arch/arm64/kernel/smp.c | 8 ----- arch/arm64/mm/proc.S | 44 ----------------------- 7 files changed, 32 insertions(+), 69 deletions(-) diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index c85540a911d3..52dead2a8640 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -60,6 +60,28 @@ alternative_if ARM64_HAS_ADDRESS_AUTH alternative_else_nop_endif .endm + .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 + mrs \tmp1, id_aa64isar1_el1 + ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + cbz \tmp1, .Lno_addr_auth\@ + mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + mrs \tmp2, sctlr_el1 + orr \tmp2, \tmp2, \tmp1 + msr sctlr_el1, \tmp2 + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 + isb +.Lno_addr_auth\@: + .endm + + .macro ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .Lno_addr_auth\@ +alternative_else_nop_endif + __ptrauth_keys_init_cpu \tsk, \tmp1, \tmp2, \tmp3 +.Lno_addr_auth\@: + .endm + #else /* CONFIG_ARM64_PTR_AUTH */ .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 40d5ba029615..ea268d88b6f7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -23,14 +23,6 @@ #define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT) #define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT) -/* Possible options for __cpu_setup */ -/* Option to setup primary cpu */ -#define ARM64_CPU_BOOT_PRIMARY (1) -/* Option to setup secondary cpus */ -#define ARM64_CPU_BOOT_SECONDARY (2) -/* Option to setup cpus for different cpu run time services */ -#define ARM64_CPU_RUNTIME (3) - #ifndef __ASSEMBLY__ #include @@ -96,9 +88,6 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { void *stack; struct task_struct *task; -#ifdef CONFIG_ARM64_PTR_AUTH - struct ptrauth_keys_kernel ptrauth_key; -#endif long status; }; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9981a0a5a87f..890ddd67b8bf 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -92,9 +92,6 @@ int main(void) BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); -#ifdef CONFIG_ARM64_PTR_AUTH - DEFINE(CPU_BOOT_PTRAUTH_KEY, offsetof(struct secondary_data, ptrauth_key)); -#endif BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 57a91032b4c2..dc2b441d4ed8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -118,7 +119,6 @@ SYM_CODE_START(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - mov x0, #ARM64_CPU_BOOT_PRIMARY bl __cpu_setup // initialise processor b __primary_switch SYM_CODE_END(stext) @@ -417,6 +417,10 @@ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x5, init_task msr sp_el0, x5 // Save thread_info +#ifdef CONFIG_ARM64_PTR_AUTH + __ptrauth_keys_init_cpu x5, x6, x7, x8 +#endif + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -717,7 +721,6 @@ SYM_FUNC_START_LOCAL(secondary_startup) * Common entry point for secondary CPUs. */ bl __cpu_secondary_check52bitva - mov x0, #ARM64_CPU_BOOT_SECONDARY bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir bl __enable_mmu @@ -739,6 +742,11 @@ SYM_FUNC_START_LOCAL(__secondary_switched) msr sp_el0, x2 mov x29, #0 mov x30, #0 + +#ifdef CONFIG_ARM64_PTR_AUTH + ptrauth_keys_init_cpu x2, x3, x4, x5 +#endif + b secondary_start_kernel SYM_FUNC_END(__secondary_switched) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 7b2f2e650c44..56b1fe9bfaa0 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -100,7 +100,6 @@ ENDPROC(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly - mov x0, #ARM64_CPU_RUNTIME bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 061f60fe452f..d6d337d036f0 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -114,10 +114,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) */ secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; -#if defined(CONFIG_ARM64_PTR_AUTH) - secondary_data.ptrauth_key.apia.lo = idle->thread.keys_kernel.apia.lo; - secondary_data.ptrauth_key.apia.hi = idle->thread.keys_kernel.apia.hi; -#endif update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); @@ -140,10 +136,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; -#if defined(CONFIG_ARM64_PTR_AUTH) - secondary_data.ptrauth_key.apia.lo = 0; - secondary_data.ptrauth_key.apia.hi = 0; -#endif __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0401342741e6..7d3bf1afba0c 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -386,8 +386,6 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings) * * Initialise the processor for turning the MMU on. * - * Input: - * x0 with a flag ARM64_CPU_BOOT_PRIMARY/ARM64_CPU_BOOT_SECONDARY/ARM64_CPU_RUNTIME. * Output: * Return in x0 the value of the SCTLR_EL1 register. */ @@ -446,51 +444,9 @@ SYM_FUNC_START(__cpu_setup) 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 - mov x1, x0 /* * Prepare SCTLR */ mov_q x0, SCTLR_EL1_SET - -#ifdef CONFIG_ARM64_PTR_AUTH - /* No ptrauth setup for run time cpus */ - cmp x1, #ARM64_CPU_RUNTIME - b.eq 3f - - /* Check if the CPU supports ptrauth */ - mrs x2, id_aa64isar1_el1 - ubfx x2, x2, #ID_AA64ISAR1_APA_SHIFT, #8 - cbz x2, 3f - - /* - * The primary cpu keys are reset here and can be - * re-initialised with some proper values later. - */ - msr_s SYS_APIAKEYLO_EL1, xzr - msr_s SYS_APIAKEYHI_EL1, xzr - - /* Just enable ptrauth for primary cpu */ - cmp x1, #ARM64_CPU_BOOT_PRIMARY - b.eq 2f - - /* if !system_supports_address_auth() then skip enable */ -alternative_if_not ARM64_HAS_ADDRESS_AUTH - b 3f -alternative_else_nop_endif - - /* Install ptrauth key for secondary cpus */ - adr_l x2, secondary_data - ldr x3, [x2, #CPU_BOOT_TASK] // get secondary_data.task - cbz x3, 2f // check for slow booting cpus - ldp x3, x4, [x2, #CPU_BOOT_PTRAUTH_KEY] - msr_s SYS_APIAKEYLO_EL1, x3 - msr_s SYS_APIAKEYHI_EL1, x4 - -2: /* Enable ptrauth instructions */ - ldr x2, =SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ - SCTLR_ELx_ENDA | SCTLR_ELx_ENDB - orr x0, x0, x2 -3: -#endif ret // return to head.S SYM_FUNC_END(__cpu_setup) From 348a625deef13d7f8537b9704d29d05cafdd8e72 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 26 Mar 2020 18:14:23 +0100 Subject: [PATCH 020/148] arm64: rename stext to primary_entry For historical reasons, the primary entry routine living somewhere in the inittext section is called stext(), which is confusing, given that there is also a section marker called _stext which lives at a fixed offset in the image (either 64 or 4096 bytes, depending on whether CONFIG_EFI is enabled) Let's rename stext to primary_entry(), which is a better description and reflects the secondary_entry() routine that already exists for SMP boot. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200326171423.3080-1-ardb@kernel.org Reviwed-by: Mark Brown Signed-off-by: Will Deacon --- arch/arm64/kernel/efi-entry.S | 2 +- arch/arm64/kernel/head.S | 19 +++++++++---------- arch/arm64/kernel/image-vars.h | 2 +- arch/arm64/kernel/vmlinux.lds.S | 4 ++-- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 1a03618df0df..303642975a93 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -19,7 +19,7 @@ SYM_CODE_START(efi_enter_kernel) * point stored in x0. Save those values in registers which are * callee preserved. */ - ldr w2, =stext_offset + ldr w2, =primary_entry_offset add x19, x0, x2 // relocated Image entrypoint mov x20, x1 // DTB address diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 57a91032b4c2..01aa238c7b10 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -70,9 +70,9 @@ _head: * its opcode forms the magic "MZ" signature required by UEFI. */ add x13, x18, #0x16 - b stext + b primary_entry #else - b stext // branch to kernel start, magic + b primary_entry // branch to kernel start, magic .long 0 // reserved #endif le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian @@ -98,14 +98,13 @@ pe_header: * primary lowlevel boot path: * * Register Scope Purpose - * x21 stext() .. start_kernel() FDT pointer passed at boot in x0 - * x23 stext() .. start_kernel() physical misalignment/KASLR offset - * x28 __create_page_tables() callee preserved temp register - * x19/x20 __primary_switch() callee preserved temp registers - * x24 __primary_switch() .. relocate_kernel() - * current RELR displacement + * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 + * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset + * x28 __create_page_tables() callee preserved temp register + * x19/x20 __primary_switch() callee preserved temp registers + * x24 __primary_switch() .. relocate_kernel() current RELR displacement */ -SYM_CODE_START(stext) +SYM_CODE_START(primary_entry) bl preserve_boot_args bl el2_setup // Drop to EL1, w0=cpu_boot_mode adrp x23, __PHYS_OFFSET @@ -121,7 +120,7 @@ SYM_CODE_START(stext) mov x0, #ARM64_CPU_BOOT_PRIMARY bl __cpu_setup // initialise processor b __primary_switch -SYM_CODE_END(stext) +SYM_CODE_END(primary_entry) /* * Preserve the arguments passed by the bootloader in x0 .. x3 diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7f06ad93fc95..be0a63ffed23 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -13,7 +13,7 @@ #ifdef CONFIG_EFI __efistub_kernel_size = _edata - _text; -__efistub_stext_offset = stext - _text; +__efistub_primary_entry_offset = primary_entry - _text; /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 497f9675071d..8bd825233580 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -72,8 +72,8 @@ jiffies = jiffies_64; /* * The size of the PE/COFF section that covers the kernel image, which - * runs from stext to _edata, must be a round multiple of the PE/COFF - * FileAlignment, which we set to its minimum value of 0x200. 'stext' + * runs from _stext to _edata, must be a round multiple of the PE/COFF + * FileAlignment, which we set to its minimum value of 0x200. '_stext' * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned * boundary should be sufficient. */ From 99ee28d99607d15c6b88c4a9b9fb4a9f0ebf598c Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Mon, 30 Mar 2020 17:38:01 +0000 Subject: [PATCH 021/148] arm64: kexec_file: Avoid temp buffer for RNG seed After using get_random_bytes(), you want to wipe the buffer afterward so the seed remains secret. In this case, we can eliminate the temporary buffer entirely. fdt_setprop_placeholder() returns a pointer to the property value buffer, allowing us to put the random data directly in there without using a temporary buffer at all. Faster and less stack all in one. Signed-off-by: George Spelvin Acked-by: Will Deacon Cc: Hsin-Yi Wang Cc: Catalin Marinas Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20200330173801.GA9199@SDF.ORG Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index b40c3b0def92..e5cbf91aadfe 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -138,12 +138,12 @@ static int setup_dtb(struct kimage *image, /* add rng-seed */ if (rng_is_initialized()) { - u8 rng_seed[RNG_SEED_SIZE]; - get_random_bytes(rng_seed, RNG_SEED_SIZE); - ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed, - RNG_SEED_SIZE); + void *rng_seed; + ret = fdt_setprop_placeholder(dtb, off, FDT_PROP_RNG_SEED, + RNG_SEED_SIZE, &rng_seed); if (ret) goto out; + get_random_bytes(rng_seed, RNG_SEED_SIZE); } else { pr_notice("RNG is not initialised: omitting \"%s\" property\n", FDT_PROP_RNG_SEED); From 4cf234943dcfa16e33daa3429fabc71d166dfb14 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Apr 2020 14:30:49 +0200 Subject: [PATCH 022/148] arm64: drop GZFLAGS definition and export Drop the definition and export of GZFLAGS, which was never referenced on arm64, and whose last recorded use in the ARM port (on which arm64 was based original) was removed by patch commit 5e89d379edb5ae08b57f39dd8d91697275245cbf [*] Author: Russell King Date: Wed Oct 16 14:32:17 2002 +0100 [ARM] Convert ARM makefiles to new kbuild (Sam Ravnborg, Kai, rmk) [*] git commit ID based on Thomas Gleixner's historical GIT repository at git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200415123049.25504-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 85e4149cc5d5..5861cb0f48fd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -12,7 +12,6 @@ LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) -GZFLAGS :=-9 ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -131,7 +130,7 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -export TEXT_OFFSET GZFLAGS +export TEXT_OFFSET core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) From 1eae811da6f44a7a614ce5d33537e279ca7984fc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 16 Apr 2020 15:27:30 +0200 Subject: [PATCH 023/148] arm64/kernel: vmlinux.lds: drop redundant discard/keep macros ARM_EXIT_KEEP and ARM_EXIT_DISCARD are always defined in the same way, so we don't really need them in the first place. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200416132730.25290-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/vmlinux.lds.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 8bd825233580..be8b28551c24 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,10 +17,6 @@ #include "image.h" -/* .exit.text needed in case of alternative patching */ -#define ARM_EXIT_KEEP(x) x -#define ARM_EXIT_DISCARD(x) - OUTPUT_ARCH(aarch64) ENTRY(_text) @@ -95,8 +91,6 @@ SECTIONS * order of matching. */ /DISCARD/ : { - ARM_EXIT_DISCARD(EXIT_TEXT) - ARM_EXIT_DISCARD(EXIT_DATA) EXIT_CALL *(.discard) *(.discard.*) @@ -161,7 +155,7 @@ SECTIONS __exittext_begin = .; .exit.text : { - ARM_EXIT_KEEP(EXIT_TEXT) + EXIT_TEXT } __exittext_end = .; @@ -188,7 +182,7 @@ SECTIONS *(.init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { - ARM_EXIT_KEEP(EXIT_DATA) + EXIT_DATA } PERCPU_SECTION(L1_CACHE_BYTES) From 0dd2334fd5b99e610ceccba0e5263e6969207880 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 16:19:09 +0800 Subject: [PATCH 024/148] arm64: entry: remove unneeded semicolon in el1_sync_handler() Fix the following coccicheck warning: arch/arm64/kernel/entry-common.c:97:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Jason Yan Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200418081909.41471-1-yanaijie@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index c839b5bf1904..bed09a866c2f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -94,7 +94,7 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) break; default: el1_inv(regs, esr); - }; + } } NOKPROBE_SYMBOL(el1_sync_handler); From 2eaf63ba84dc2fa4cecd717e917ea882be08069b Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 14:33:26 +0800 Subject: [PATCH 025/148] arm64: smp: Make cpus_stuck_in_kernel static Fix the following sparse warning: arch/arm64/kernel/smp.c:68:5: warning: symbol 'cpus_stuck_in_kernel' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1587623606-96698-1-git-send-email-zou_wei@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 061f60fe452f..1d06af462b93 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); */ struct secondary_data secondary_data; /* Number of CPUs which aren't online, but looping in kernel text. */ -int cpus_stuck_in_kernel; +static int cpus_stuck_in_kernel; enum ipi_msg_type { IPI_RESCHEDULE, From 68ecabd0e680a4ceaf950ae189a55d4730d10c64 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 28 Apr 2020 09:46:55 +1000 Subject: [PATCH 026/148] arm64/mm: Use phys_to_page() to access pgtable memory The macros {pgd, pud, pmd}_page() retrieves the page struct of the corresponding page frame, which is reserved as page table. There is already a macro (phys_to_page), defined in memory.h as below, to convert the physical address to the page struct. Also, the header file (memory.h) has been included by pgtable.h. #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) So it's reasonable to use the macro in pgtable.h. Signed-off-by: Gavin Shan Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20200427234655.111847-1-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 538c85e62f86..8c20e2bd6287 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -508,7 +508,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd))) +#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) @@ -566,7 +566,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) -#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud))) +#define pud_page(pud) phys_to_page(__pud_to_phys(pud)) /* use ONLY for statically allocated translation tables */ #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) @@ -624,7 +624,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) -#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) +#define pgd_page(pgd) phys_to_page(__pgd_to_phys(pgd)) /* use ONLY for statically allocated translation tables */ #define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) From 9d3f888135505dda7d5539a5756440d298f08173 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Tue, 21 Apr 2020 15:29:15 +0100 Subject: [PATCH 027/148] arm64: cpufeature: Relax check for IESB support We don't care if IESB is supported or not as we always set SCTLR_ELx.IESB and, if it works, that's really great. Relax the ID_AA64MMFR2.IESB cpufeature check so that we don't warn and taint if it's mismatched. [will: rewrote commit message] Signed-off-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-2-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9fac745aa7bb..63df28e6a425 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -247,7 +247,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0), From 0113340e6e83f8710b216f72b826499fc0151c29 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:16 +0100 Subject: [PATCH 028/148] arm64: cpufeature: Spell out register fields for ID_ISAR4 and ID_PFR1 In preparation for runtime updates to the strictness of some AArch32 features, spell out the register fields for ID_ISAR4 and ID_PFR1 to make things clearer to read. Note that this isn't functionally necessary, as the feature arrays themselves are not modified dynamically and remain 'const'. Tested-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-3-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 17 +++++++++++++++++ arch/arm64/kernel/cpufeature.c | 28 ++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c4ac0ac25a00..d7181972d28d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -752,6 +752,15 @@ #define ID_DFR0_PERFMON_8_1 0x4 +#define ID_ISAR4_SWP_FRAC_SHIFT 28 +#define ID_ISAR4_PSR_M_SHIFT 24 +#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 +#define ID_ISAR4_BARRIER_SHIFT 16 +#define ID_ISAR4_SMC_SHIFT 12 +#define ID_ISAR4_WRITEBACK_SHIFT 8 +#define ID_ISAR4_WITHSHIFTS_SHIFT 4 +#define ID_ISAR4_UNPRIV_SHIFT 0 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 @@ -785,6 +794,14 @@ #define MVFR1_FPDNAN_SHIFT 4 #define MVFR1_FPFTZ_SHIFT 0 +#define ID_PFR1_GIC_SHIFT 28 +#define ID_PFR1_VIRT_FRAC_SHIFT 24 +#define ID_PFR1_SEC_FRAC_SHIFT 20 +#define ID_PFR1_GENTIMER_SHIFT 16 +#define ID_PFR1_VIRTUALIZATION_SHIFT 12 +#define ID_PFR1_MPROGMOD_SHIFT 8 +#define ID_PFR1_SECURITY_SHIFT 4 +#define ID_PFR1_PROGMOD_SHIFT 0 #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 63df28e6a425..b143f8bc6c52 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -332,6 +332,18 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_isar4[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SWP_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_PSR_M_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_BARRIER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SMC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WRITEBACK_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WITHSHIFTS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_UNPRIV_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_isar6[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0), @@ -351,6 +363,18 @@ static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRT_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SEC_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GENTIMER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRTUALIZATION_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_MPROGMOD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SECURITY_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_PROGMOD_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ @@ -411,7 +435,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), - ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_id_pfr1), ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0), ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), @@ -423,7 +447,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), - ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_id_isar4), ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), From 540f76d12c662d3da2ebdf0086ee289123fcd120 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:17 +0100 Subject: [PATCH 029/148] arm64: cpufeature: Add CPU capability for AArch32 EL1 support Although we emit a "SANITY CHECK" warning and taint the kernel if we detect a CPU mismatch for AArch32 support at EL1, we still online the CPU with disastrous consequences for any running 32-bit VMs. Introduce a capability for AArch32 support at EL1 so that late onlining of incompatible CPUs is forbidden. Tested-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20200421142922.18950-4-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 12 ++++++++++++ arch/arm64/kvm/reset.c | 12 ++---------- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8eb5a088ae65..c54c674e6c21 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -61,7 +61,8 @@ #define ARM64_HAS_AMU_EXTN 51 #define ARM64_HAS_ADDRESS_AUTH 52 #define ARM64_HAS_GENERIC_AUTH 53 +#define ARM64_HAS_32BIT_EL1 54 -#define ARM64_NCAPS 54 +#define ARM64_NCAPS 55 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d7181972d28d..c4e896bf77f3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -655,6 +655,7 @@ #define ID_AA64PFR0_ASIMD_NI 0xf #define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 #define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL1_32BIT_64BIT 0x2 #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b143f8bc6c52..838fe5cc8d7e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1535,6 +1535,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, +#ifdef CONFIG_KVM + { + .desc = "32-bit EL1 Support", + .capability = ARM64_HAS_32BIT_EL1, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR0_EL1_SHIFT, + .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, + }, +#endif { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 30b7ea680f66..102e5c4e01a0 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,14 +46,6 @@ static const struct kvm_regs default_regs_reset32 = { PSR_AA32_I_BIT | PSR_AA32_F_BIT), }; -static bool cpu_has_32bit_el1(void) -{ - u64 pfr0; - - pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - return !!(pfr0 & 0x20); -} - /** * kvm_arch_vm_ioctl_check_extension * @@ -66,7 +58,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_ARM_EL1_32BIT: - r = cpu_has_32bit_el1(); + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); break; case KVM_CAP_GUEST_DEBUG_HW_BPS: r = get_num_brps(); @@ -288,7 +280,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpu_has_32bit_el1()) + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) goto out; cpu_reset = &default_regs_reset32; } else { From 13dc4d836179444f0ca90188cfccd23f9cd9ff05 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:18 +0100 Subject: [PATCH 030/148] arm64: cpufeature: Remove redundant call to id_aa64pfr0_32bit_el0() There's no need to call id_aa64pfr0_32bit_el0() twice because the sanitised value of ID_AA64PFR0_EL1 has already been updated for the CPU being onlined. Remove the redundant function call. Tested-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-5-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 838fe5cc8d7e..7dfcdd9e75c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -792,9 +792,7 @@ void update_cpu_features(int cpu, * If we have AArch32, we care about 32-bit features for compat. * If the system doesn't support AArch32, don't update them. */ - if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && - id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { - + if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, From 1efcfe79d97dff9d436748a13a329f64cda6e67e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:19 +0100 Subject: [PATCH 031/148] arm64: cpufeature: Factor out checking of AArch32 features update_cpu_features() is pretty large, so split out the checking of the AArch32 features into a separate function and call it after checking the AArch64 features. Tested-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-6-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 112 +++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 47 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7dfcdd9e75c1..6892b2440676 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -715,6 +715,65 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) return 1; } +static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + int taint = 0; + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + /* + * If we don't have AArch32 at all then skip the checks entirely + * as the register values may be UNKNOWN and we're not going to be + * using them for anything. + */ + if (!id_aa64pfr0_32bit_el0(pfr0)) + return taint; + + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + info->reg_id_dfr0, boot->reg_id_dfr0); + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + info->reg_id_isar0, boot->reg_id_isar0); + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + info->reg_id_isar1, boot->reg_id_isar1); + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + info->reg_id_isar2, boot->reg_id_isar2); + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + info->reg_id_isar3, boot->reg_id_isar3); + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + info->reg_id_isar4, boot->reg_id_isar4); + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + info->reg_id_isar5, boot->reg_id_isar5); + taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, + info->reg_id_isar6, boot->reg_id_isar6); + + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + info->reg_id_mmfr0, boot->reg_id_mmfr0); + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + info->reg_id_mmfr1, boot->reg_id_mmfr1); + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + info->reg_id_mmfr2, boot->reg_id_mmfr2); + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + info->reg_id_pfr0, boot->reg_id_pfr0); + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + info->reg_id_pfr1, boot->reg_id_pfr1); + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + info->reg_mvfr0, boot->reg_mvfr0); + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + info->reg_mvfr1, boot->reg_mvfr1); + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + info->reg_mvfr2, boot->reg_mvfr2); + + return taint; +} + + /* * Update system wide CPU feature registers with the values from a * non-boot CPU. Also performs SANITY checks to make sure that there @@ -788,53 +847,6 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); - /* - * If we have AArch32, we care about 32-bit features for compat. - * If the system doesn't support AArch32, don't update them. - */ - if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { - taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, - info->reg_id_dfr0, boot->reg_id_dfr0); - taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, - info->reg_id_isar0, boot->reg_id_isar0); - taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, - info->reg_id_isar1, boot->reg_id_isar1); - taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, - info->reg_id_isar2, boot->reg_id_isar2); - taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, - info->reg_id_isar3, boot->reg_id_isar3); - taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, - info->reg_id_isar4, boot->reg_id_isar4); - taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, - info->reg_id_isar5, boot->reg_id_isar5); - taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, - info->reg_id_isar6, boot->reg_id_isar6); - - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, - info->reg_id_mmfr0, boot->reg_id_mmfr0); - taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, - info->reg_id_mmfr1, boot->reg_id_mmfr1); - taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, - info->reg_id_mmfr2, boot->reg_id_mmfr2); - taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, - info->reg_id_mmfr3, boot->reg_id_mmfr3); - taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, - info->reg_id_pfr0, boot->reg_id_pfr0); - taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, - info->reg_id_pfr1, boot->reg_id_pfr1); - taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, - info->reg_mvfr0, boot->reg_mvfr0); - taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, - info->reg_mvfr1, boot->reg_mvfr1); - taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, - info->reg_mvfr2, boot->reg_mvfr2); - } - if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); @@ -845,6 +857,12 @@ void update_cpu_features(int cpu, sve_update_vq_map(); } + /* + * This relies on a sanitised view of the AArch64 ID registers + * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last. + */ + taint |= update_32bit_cpu_features(cpu, info, boot); + /* * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. From eab2f92607461fc7fa9dba599772a4b214fd9d1a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:20 +0100 Subject: [PATCH 032/148] arm64: cpufeature: Relax AArch32 system checks if EL1 is 64-bit only If AArch32 is not supported at EL1, the AArch32 feature register fields no longer advertise support for some system features: * ISAR4.SMC * PFR1.{Virt_frac, Sec_frac, Virtualization, Security, ProgMod} In which case, we don't need to emit "SANITY CHECK" failures for all of them. Add logic to relax the strictness of individual feature register fields at runtime and use this for the fields above if 32-bit EL1 is not supported. Tested-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-7-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 7 ++++++ arch/arm64/kernel/cpufeature.c | 33 ++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index afe08251ff95..f5c4672e498b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -551,6 +551,13 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; } +static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT); + + return val == ID_AA64PFR0_EL1_32BIT_64BIT; +} + static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6892b2440676..7e0dbe2a2f2d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -715,6 +715,25 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) return 1; } +static void relax_cpu_ftr_reg(u32 sys_id, int field) +{ + const struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); + + if (WARN_ON(!regp)) + return; + + for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) { + if (ftrp->shift == field) { + regp->strict_mask &= ~arm64_ftr_mask(ftrp); + break; + } + } + + /* Bogus field? */ + WARN_ON(!ftrp->width); +} + static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, struct cpuinfo_arm64 *boot) { @@ -729,6 +748,19 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, if (!id_aa64pfr0_32bit_el0(pfr0)) return taint; + /* + * If we don't have AArch32 at EL1, then relax the strictness of + * EL1-dependent register fields to avoid spurious sanity check fails. + */ + if (!id_aa64pfr0_32bit_el1(pfr0)) { + relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_SMC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SECURITY_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_PROGMOD_SHIFT); + } + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, @@ -773,7 +805,6 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, return taint; } - /* * Update system wide CPU feature registers with the values from a * non-boot CPU. Also performs SANITY checks to make sure that there From 98448cdfe7060dd5491bfbd3f7214ffe1395d58e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:21 +0100 Subject: [PATCH 033/148] arm64: cpufeature: Relax checks for AArch32 support at EL[0-2] We don't need to be quite as strict about mismatched AArch32 support, which is good because the friendly hardware folks have been busy mismatching this to their hearts' content. * We don't care about EL2 or EL3 (there are silly comments concerning the latter, so remove those) * EL1 support is gated by the ARM64_HAS_32BIT_EL1 capability and handled gracefully when a mismatch occurs * EL0 support is gated by the ARM64_HAS_32BIT_EL0 capability and handled gracefully when a mismatch occurs Relax the AArch32 checks to FTR_NONSTRICT. Tested-by: Sai Prakash Ranjan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-8-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7e0dbe2a2f2d..d63653d7c5d0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -172,11 +172,10 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), - /* Linux doesn't care about the EL3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), ARM64_FTR_END, }; @@ -867,9 +866,6 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); - /* - * EL3 is not our concern. - */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, From a2a69963524dc4aebba047732dce5d256a70bb3d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Apr 2020 15:29:22 +0100 Subject: [PATCH 034/148] arm64: cpufeature: Add an overview comment for the cpufeature framework Now that Suzuki isn't within throwing distance, I thought I'd better add a rough overview comment to cpufeature.c so that it doesn't take me days to remember how it works next time. Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200421142922.18950-9-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 50 ++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d63653d7c5d0..c1d44d127baa 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3,6 +3,56 @@ * Contains CPU feature definitions * * Copyright (C) 2015 ARM Ltd. + * + * A note for the weary kernel hacker: the code here is confusing and hard to + * follow! That's partly because it's solving a nasty problem, but also because + * there's a little bit of over-abstraction that tends to obscure what's going + * on behind a maze of helper functions and macros. + * + * The basic problem is that hardware folks have started gluing together CPUs + * with distinct architectural features; in some cases even creating SoCs where + * user-visible instructions are available only on a subset of the available + * cores. We try to address this by snapshotting the feature registers of the + * boot CPU and comparing these with the feature registers of each secondary + * CPU when bringing them up. If there is a mismatch, then we update the + * snapshot state to indicate the lowest-common denominator of the feature, + * known as the "safe" value. This snapshot state can be queried to view the + * "sanitised" value of a feature register. + * + * The sanitised register values are used to decide which capabilities we + * have in the system. These may be in the form of traditional "hwcaps" + * advertised to userspace or internal "cpucaps" which are used to configure + * things like alternative patching and static keys. While a feature mismatch + * may result in a TAINT_CPU_OUT_OF_SPEC kernel taint, a capability mismatch + * may prevent a CPU from being onlined at all. + * + * Some implementation details worth remembering: + * + * - Mismatched features are *always* sanitised to a "safe" value, which + * usually indicates that the feature is not supported. + * + * - A mismatched feature marked with FTR_STRICT will cause a "SANITY CHECK" + * warning when onlining an offending CPU and the kernel will be tainted + * with TAINT_CPU_OUT_OF_SPEC. + * + * - Features marked as FTR_VISIBLE have their sanitised value visible to + * userspace. FTR_VISIBLE features in registers that are only visible + * to EL0 by trapping *must* have a corresponding HWCAP so that late + * onlining of CPUs cannot lead to features disappearing at runtime. + * + * - A "feature" is typically a 4-bit register field. A "capability" is the + * high-level description derived from the sanitised field value. + * + * - Read the Arm ARM (DDI 0487F.a) section D13.1.3 ("Principles of the ID + * scheme for fields in ID registers") to understand when feature fields + * may be signed or unsigned (FTR_SIGNED and FTR_UNSIGNED accordingly). + * + * - KVM exposes its own view of the feature registers to guest operating + * systems regardless of FTR_VISIBLE. This is typically driven from the + * sanitised register values to allow virtual CPUs to be migrated between + * arbitrary physical CPUs, but some features not present on the host are + * also advertised and emulated. Look at sys_reg_descs[] for the gory + * details. */ #define pr_fmt(fmt) "CPU features: " fmt From e24e03aa00f0248a716ec7859c03f0034bb42fb2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Apr 2020 10:36:58 +0100 Subject: [PATCH 035/148] arm64: docs: Mandate that the I-cache doesn't hold stale kernel text Although we require that the loaded kernel Image has been cleaned to the PoC, we neglect to spell out the state of the I-cache. Although this should be reasonably obvious, it doesn't hurt to be explicit. Require that the I-cache doesn't hold any stale entries for the kernel Image at boot. Acked-by: Mark Rutland Acked-by: Catalin Marinas Cc: Mark Rutland Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200423093658.10602-1-will@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/booting.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index a3f1a47b6f1c..d063c05d5fb0 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -173,7 +173,8 @@ Before jumping into the kernel, the following conditions must be met: - Caches, MMUs The MMU must be off. - Instruction cache may be on or off. + The instruction cache may be on or off, and must not hold any stale + entries corresponding to the loaded kernel image. The address range corresponding to the loaded kernel image must be cleaned to the PoC. In the presence of a system cache or other coherent masters with caches enabled, this will typically require From 30218da5974ceb29c913e53296577526cb742a7e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 14 Apr 2020 19:28:43 +0100 Subject: [PATCH 036/148] arm64: lib: Consistently enable crc32 extension Currently most of the assembly files that use architecture extensions enable them using the .arch directive but crc32.S uses .cpu instead. Move that over to .arch for consistency. Signed-off-by: Mark Brown Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200414182843.31664-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/lib/crc32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index 243e107e9896..0f9e10ecda23 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -9,7 +9,7 @@ #include #include - .cpu generic+crc + .arch armv8-a+crc .macro __crc32, c cmp x2, #16 From cfa7ede20f133cc81cef01dc3a516dda3a9721ee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Apr 2020 10:29:22 +0200 Subject: [PATCH 037/148] arm64: set TEXT_OFFSET to 0x0 in preparation for removing it entirely TEXT_OFFSET on arm64 is a historical artifact from the early days of the arm64 port where the boot protocol was basically 'copy this image to the base of memory + 512k', giving us 512 KB of guaranteed BSS space to put the swapper page tables. When the arm64 Image header was added in v3.10, it already carried the actual value of TEXT_OFFSET, to allow the bootloader to discover it dynamically rather than hardcode it to 512 KB. Today, this memory window is not used for any particular purpose, and it is simply handed to the page allocator at boot. The only reason it still exists is because of the 512k misalignment it causes with respect to the 2 MB aligned virtual base address of the kernel, which affects the virtual addresses of all statically allocated objects in the kernel image. However, with the introduction of KASLR in v4.6, we added the concept of relocatable kernels, which rewrite all absolute symbol references at boot anyway, and so the placement of such kernels in the physical address space is irrelevant, provided that the minimum segment alignment is honoured (64 KB in most cases, 128 KB for 64k pages kernels with vmap'ed stacks enabled). This makes 0x0 and 512 KB equally suitable values for TEXT_OFFSET on the off chance that we are dealing with boot loaders that ignore the value passed via the header entirely. Considering that the distros as well as Android ship KASLR-capable kernels today, and the fact that TEXT_OFFSET was discoverable from the Image header from the very beginning, let's change this value to 0x0, in preparation for removing it entirely at a later date. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200415082922.32709-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5861cb0f48fd..e48867c335d2 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -117,7 +117,7 @@ TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \ int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \ rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}") else -TEXT_OFFSET := 0x00080000 +TEXT_OFFSET := 0x0 endif ifeq ($(CONFIG_KASAN_SW_TAGS), y) From 9d2d75ede59bc1edd8561f2ee9d4702a5ea0ae30 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 28 Apr 2020 09:57:00 +1000 Subject: [PATCH 038/148] arm64/kernel: Fix range on invalidating dcache for boot page tables Prior to commit 8eb7e28d4c642c31 ("arm64/mm: move runtime pgds to rodata"), idmap_pgd_dir, tramp_pg_dir, reserved_ttbr0, swapper_pg_dir, and init_pg_dir were contiguous at the end of the kernel image. The maintenance at the end of __create_page_tables assumed these were contiguous, and affected everything from the start of idmap_pg_dir to the end of init_pg_dir. That commit moved all but init_pg_dir into the .rodata section, with other data placed between idmap_pg_dir and init_pg_dir, but did not update the maintenance. Hence the maintenance is performed on much more data than necessary (but as the bootloader previously made this clean to the PoC there is no functional problem). As we only alter idmap_pg_dir, and init_pg_dir, we only need to perform maintenance for these. As the other dirs are in .rodata, the bootloader will have initialised them as expected and cleaned them to the PoC. The kernel will initialize them as necessary after enabling the MMU. This patch reworks the maintenance to only cover the idmap_pg_dir and init_pg_dir to avoid this unnecessary work. Signed-off-by: Gavin Shan Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20200427235700.112220-1-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/head.S | 12 +++++++++--- arch/arm64/kernel/vmlinux.lds.S | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8c20e2bd6287..5caff09c6a3a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -457,6 +457,7 @@ extern pgd_t init_pg_dir[PTRS_PER_PGD]; extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_end[]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 01aa238c7b10..a46caf2fd35e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -393,13 +393,19 @@ SYM_FUNC_START_LOCAL(__create_page_tables) /* * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. + * accesses (MMU disabled), invalidate those tables again to + * remove any speculatively loaded cache lines. */ + dmb sy + adrp x0, idmap_pg_dir + adrp x1, idmap_pg_end + sub x1, x1, x0 + bl __inval_dcache_area + + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 - dmb sy bl __inval_dcache_area ret x28 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index be8b28551c24..81cb501b84f3 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -133,6 +133,7 @@ SECTIONS idmap_pg_dir = .; . += IDMAP_DIR_SIZE; + idmap_pg_end = .; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; From f4be140fa33f9c9ba36374a52e1c317f0ba17089 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 15 Apr 2020 18:57:46 +0800 Subject: [PATCH 039/148] KVM: arm64: Drop PTE_S2_MEMATTR_MASK The only user of PTE_S2_MEMATTR_MASK macro had been removed since commit a501e32430d4 ("arm64: Clean up the default pgprot setting"). It has been about six years and no one has used it again. Let's drop it. Signed-off-by: Zenghui Yu Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20200415105746.314-1-yuzenghui@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 6bf5e650da78..99315bdca0e6 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -190,7 +190,6 @@ * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) -#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2) /* * EL2/HYP PTE/PMD definitions From 9b5aaec441d3c76d6255049e99c446f14c89a284 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 14 Apr 2020 15:47:06 +0200 Subject: [PATCH 040/148] arm64: drop duplicate definitions of ID_AA64MMFR0_TGRAN constants A bunch of ID_AA64MMFR0_TGRAN_* constant definitions appear twice in arch/arm64/include/asm/sysreg.h, so drop the duplicates. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Acked-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200414134706.8435-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c4e896bf77f3..2dd3f4ca9780 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -804,17 +804,6 @@ #define ID_PFR1_SECURITY_SHIFT 4 #define ID_PFR1_PROGMOD_SHIFT 0 -#define ID_AA64MMFR0_TGRAN4_SHIFT 28 -#define ID_AA64MMFR0_TGRAN64_SHIFT 24 -#define ID_AA64MMFR0_TGRAN16_SHIFT 20 - -#define ID_AA64MMFR0_TGRAN4_NI 0xf -#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN64_NI 0xf -#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN16_NI 0x0 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 - #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED From 86b8783701246a22a734824674cc3f87a5ed9f13 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 28 Apr 2020 16:08:54 +0100 Subject: [PATCH 041/148] arm64: vdso: Add '-Bsymbolic' to ldflags Commit 28b1a824a4f44 ("arm64: vdso: Substitute gettimeofday() with C implementation") introduced an unused 'VDSO_LDFLAGS' variable to the vdso Makefile, suggesting that we should be passing '-Bsymbolic' to the linker, as we do when linking the compat vDSO. Although it's not strictly necessary to pass this flag, it would be required if we were to add any internal references to the exported symbols. It's also consistent with how we link the compat vdso so, since there's no real downside from passing it, add '-Bsymbolic' to the ldflags for the native vDSO. Fixes: 28b1a824a4f44 ("arm64: vdso: Substitute gettimeofday() with C implementation") Reported-by: Geoff Levand Signed-off-by: Vincenzo Frascino Cc: Will Deacon Cc: Catalin Marinas Cc: Ard Biesheuvel Link: https://lore.kernel.org/r/20200428150854.33130-1-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index dd2514bb1511..abf61c96edbc 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -17,14 +17,16 @@ obj-vdso := vgettimeofday.o note.o sigreturn.o targets := $(obj-vdso) vdso.so vdso.so.dbg obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +# -Bsymbolic has been added for consistency with arm, the compat vDSO and +# potential future proofing if we end up with internal calls to the exported +# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so +# preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - --build-id -n -T + -Bsymbolic --build-id -n -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -VDSO_LDFLAGS := -Bsymbolic - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n From 76085aff29f585139a37a10ea0a7daa63f70872c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Apr 2020 17:55:18 +0200 Subject: [PATCH 042/148] efi/libstub/arm64: align PE/COFF sections to segment alignment The arm64 kernel's segment alignment is fixed at 64 KB for any page size, and relocatable kernels are able to fix up any misalignment of the kernel image with respect to the 2 MB section alignment that is mandated by the arm64 boot protocol. Let's increase the PE/COFF section alignment to the same value, so that kernels loaded by the UEFI PE/COFF loader are guaranteed to end up at an address that doesn't require any reallocation to be done if the kernel is relocatable. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200413155521.24698-6-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/efi-header.S | 2 +- arch/arm64/kernel/vmlinux.lds.S | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index 914999ccaf8a..6f58998ef647 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -32,7 +32,7 @@ optional_header: extra_header_fields: .quad 0 // ImageBase - .long SZ_4K // SectionAlignment + .long SEGMENT_ALIGN // SectionAlignment .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 81cb501b84f3..3be632177631 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -170,7 +170,7 @@ SECTIONS *(.altinstr_replacement) } - . = ALIGN(PAGE_SIZE); + . = ALIGN(SEGMENT_ALIGN); __inittext_end = .; __initdata_begin = .; @@ -241,6 +241,7 @@ SECTIONS . += INIT_DIR_SIZE; init_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; From 7fb89e1d44cb6aec342e5cca6ed6371d818a428c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Apr 2020 11:27:53 +0200 Subject: [PATCH 043/148] ACPI/IORT: take _DMA methods into account for named components Where IORT nodes for named components can describe simple DMA limits expressed as the number of address bits a device can drive, _DMA methods in AML can express more complex topologies, involving DMA translation in particular. Currently, we only take this _DMA method into account if it appears on a ACPI device node describing a PCIe root complex, but it is perfectly acceptable to use them for named components as well, so let's ensure we take them into account in those cases too. Note that such named components are expected to reside under a pseudo-bus node such as the ACPI0004 container device, which should be providing the _DMA method as well as a _CRS (as mandated by the ACPI spec). This is not enforced by the code however. Reported-by: Andrei Warkentin Signed-off-by: Ard Biesheuvel Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20200420092753.9819-1-ardb@kernel.org Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 7d04424189df..051b2ce03070 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1148,13 +1148,10 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) else size = 1ULL << 32; - if (dev_is_pci(dev)) { - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); - if (ret == -ENODEV) - ret = rc_dma_get_range(dev, &size); - } else { - ret = nc_dma_get_range(dev, &size); - } + ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + if (ret == -ENODEV) + ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size) + : nc_dma_get_range(dev, &size); if (!ret) { /* From 74fc72e77dc5c8033d1b47d2c8a7229b4b83a746 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 28 Apr 2020 17:49:18 +0100 Subject: [PATCH 044/148] arm64: vdso: remove aarch32_vdso_pages[] The aarch32_vdso_pages[] array is unnecessarily confusing. We only ever use the C_VECTORS and C_SIGPAGE slots, and the other slots are unused despite having corresponding mappings (sharing pages with the AArch64 vDSO). Let's make this clearer by using separate variables for the vectors page and the sigreturn page. A subsequent patch will clean up the C_* naming and conflation of pages with mappings. Note that since both the vectors page and sig page are single pages, and the mapping is a single page long, their pages array do not need to be NULL-terminated (and this was not the case with the existing code for the sig page as it was the last entry in the aarch32_vdso_pages array). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Will Deacon Link: https://lore.kernel.org/r/20200428164921.41641-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 033a48f30dbb..263bc6084c71 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -206,11 +206,16 @@ static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, #define C_SIGPAGE 1 #define C_PAGES (C_SIGPAGE + 1) #endif /* CONFIG_COMPAT_VDSO */ -static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; + +static struct page *aarch32_vectors_page __ro_after_init; +#ifndef CONFIG_COMPAT_VDSO +static struct page *aarch32_sig_page __ro_after_init; +#endif + static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { { .name = "[vectors]", /* ABI */ - .pages = &aarch32_vdso_pages[C_VECTORS], + .pages = &aarch32_vectors_page, }, #ifdef CONFIG_COMPAT_VDSO { @@ -223,7 +228,7 @@ static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { #else { .name = "[sigpage]", /* ABI */ - .pages = &aarch32_vdso_pages[C_SIGPAGE], + .pages = &aarch32_sig_page, }, #endif /* CONFIG_COMPAT_VDSO */ }; @@ -243,8 +248,8 @@ static int aarch32_alloc_kuser_vdso_page(void) memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); - aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vdso_pages[C_VECTORS]); + aarch32_vectors_page = virt_to_page(vdso_page); + flush_dcache_page(aarch32_vectors_page); return 0; } @@ -275,8 +280,8 @@ static int __aarch32_alloc_vdso_pages(void) return -ENOMEM; memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); - aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage); - flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]); + aarch32_sig_page = virt_to_page(sigpage); + flush_dcache_page(aarch32_sig_page); ret = aarch32_alloc_kuser_vdso_page(); if (ret) From 3ee16ff3437ca5388d8b60a122fde94f896f50d3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 28 Apr 2020 17:49:19 +0100 Subject: [PATCH 045/148] arm64: vdso: simplify arch_vdso_type ifdeffery Currently we have some ifdeffery to determine the number of elements in enum arch_vdso_type as VDSO_TYPES, rather that the usual pattern of having the enum define this: | enum foo_type { | FOO_TYPE_A, | FOO_TYPE_B, | #ifdef CONFIG_C | FOO_TYPE_C, | #endif | NR_FOO_TYPES | } ... however, given we only use this number to size the vdso_lookup[] array, this is redundant anyway as the compiler can automatically size the array to fit all defined elements. So let's remove the VDSO_TYPES to simplify the code. At the same time, let's use designated initializers for the array elements so that these are guarnateed to be at the expected indices, regardless of how we modify the structure. For clariy the redundant explicit initialization of the enum elements is dropped. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Will Deacon Link: https://lore.kernel.org/r/20200428164921.41641-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 263bc6084c71..b4b01ac30112 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -35,16 +35,11 @@ extern char vdso32_start[], vdso32_end[]; /* vdso_lookup arch_index */ enum arch_vdso_type { - ARM64_VDSO = 0, + ARM64_VDSO, #ifdef CONFIG_COMPAT_VDSO - ARM64_VDSO32 = 1, + ARM64_VDSO32, #endif /* CONFIG_COMPAT_VDSO */ }; -#ifdef CONFIG_COMPAT_VDSO -#define VDSO_TYPES (ARM64_VDSO32 + 1) -#else -#define VDSO_TYPES (ARM64_VDSO + 1) -#endif /* CONFIG_COMPAT_VDSO */ struct __vdso_abi { const char *name; @@ -57,14 +52,14 @@ struct __vdso_abi { struct vm_special_mapping *cm; }; -static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { - { +static struct __vdso_abi vdso_lookup[] __ro_after_init = { + [ARM64_VDSO] = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, }, #ifdef CONFIG_COMPAT_VDSO - { + [ARM64_VDSO32] = { .name = "vdso32", .vdso_code_start = vdso32_start, .vdso_code_end = vdso32_end, From d3418f3839b667842eba8688ca8ebe84eca158db Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 28 Apr 2020 17:49:20 +0100 Subject: [PATCH 046/148] arm64: vdso: use consistent 'abi' nomenclature The current code doesn't use a consistent naming scheme for structures, enums, or variables, making it harder than necessary to determine the relationship between these. Let's make this easier by consistently using 'vdso_abi' nomenclature. The 'vdso_lookup' array is renamed to 'vdso_info' to describe what it contains rather than how it is consumed. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Will Deacon Link: https://lore.kernel.org/r/20200428164921.41641-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 69 ++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index b4b01ac30112..89ef61686362 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -33,15 +33,14 @@ extern char vdso_start[], vdso_end[]; extern char vdso32_start[], vdso32_end[]; #endif /* CONFIG_COMPAT_VDSO */ -/* vdso_lookup arch_index */ -enum arch_vdso_type { - ARM64_VDSO, +enum vdso_abi { + VDSO_ABI_AA64, #ifdef CONFIG_COMPAT_VDSO - ARM64_VDSO32, + VDSO_ABI_AA32, #endif /* CONFIG_COMPAT_VDSO */ }; -struct __vdso_abi { +struct vdso_abi_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; @@ -52,14 +51,14 @@ struct __vdso_abi { struct vm_special_mapping *cm; }; -static struct __vdso_abi vdso_lookup[] __ro_after_init = { - [ARM64_VDSO] = { +static struct vdso_abi_info vdso_info[] __ro_after_init = { + [VDSO_ABI_AA64] = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, }, #ifdef CONFIG_COMPAT_VDSO - [ARM64_VDSO32] = { + [VDSO_ABI_AA32] = { .name = "vdso32", .vdso_code_start = vdso32_start, .vdso_code_end = vdso32_end, @@ -76,13 +75,13 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = vdso_data_store.data; -static int __vdso_remap(enum arch_vdso_type arch_index, +static int __vdso_remap(enum vdso_abi abi, const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end - - vdso_lookup[arch_index].vdso_code_start; + unsigned long vdso_size = vdso_info[abi].vdso_code_end - + vdso_info[abi].vdso_code_start; if (vdso_size != new_size) return -EINVAL; @@ -92,24 +91,24 @@ static int __vdso_remap(enum arch_vdso_type arch_index, return 0; } -static int __vdso_init(enum arch_vdso_type arch_index) +static int __vdso_init(enum vdso_abi abi) { int i; struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) { + if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_lookup[arch_index].vdso_pages = ( - vdso_lookup[arch_index].vdso_code_end - - vdso_lookup[arch_index].vdso_code_start) >> + vdso_info[abi].vdso_pages = ( + vdso_info[abi].vdso_code_end - + vdso_info[abi].vdso_code_start) >> PAGE_SHIFT; /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1, + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); if (vdso_pagelist == NULL) @@ -120,18 +119,18 @@ static int __vdso_init(enum arch_vdso_type arch_index) /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start); + pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); - for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++) + for (i = 0; i < vdso_info[abi].vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); - vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0]; - vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1]; + vdso_info[abi].dm->pages = &vdso_pagelist[0]; + vdso_info[abi].cm->pages = &vdso_pagelist[1]; return 0; } -static int __setup_additional_pages(enum arch_vdso_type arch_index, +static int __setup_additional_pages(enum vdso_abi abi, struct mm_struct *mm, struct linux_binprm *bprm, int uses_interp) @@ -139,7 +138,7 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; - vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; + vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ vdso_mapping_len = vdso_text_len + PAGE_SIZE; @@ -151,7 +150,7 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_MAYREAD, - vdso_lookup[arch_index].dm); + vdso_info[abi].dm); if (IS_ERR(ret)) goto up_fail; @@ -160,7 +159,7 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_lookup[arch_index].cm); + vdso_info[abi].cm); if (IS_ERR(ret)) goto up_fail; @@ -179,7 +178,7 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - return __vdso_remap(ARM64_VDSO32, sm, new_vma); + return __vdso_remap(VDSO_ABI_AA32, sm, new_vma); } #endif /* CONFIG_COMPAT_VDSO */ @@ -253,10 +252,10 @@ static int __aarch32_alloc_vdso_pages(void) { int ret; - vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR]; - vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO]; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_spec[C_VVAR]; + vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_spec[C_VDSO]; - ret = __vdso_init(ARM64_VDSO32); + ret = __vdso_init(VDSO_ABI_AA32); if (ret) return ret; @@ -354,7 +353,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto out; #ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(ARM64_VDSO32, + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, uses_interp); @@ -371,7 +370,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - return __vdso_remap(ARM64_VDSO, sm, new_vma); + return __vdso_remap(VDSO_ABI_AA64, sm, new_vma); } /* @@ -394,10 +393,10 @@ static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { static int __init vdso_init(void) { - vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR]; - vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO]; + vdso_info[VDSO_ABI_AA64].dm = &vdso_spec[A_VVAR]; + vdso_info[VDSO_ABI_AA64].cm = &vdso_spec[A_VDSO]; - return __vdso_init(ARM64_VDSO); + return __vdso_init(VDSO_ABI_AA64); } arch_initcall(vdso_init); @@ -410,7 +409,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (down_write_killable(&mm->mmap_sem)) return -EINTR; - ret = __setup_additional_pages(ARM64_VDSO, + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); From 1d09094aa6205545cf895bc6965664a5f16af99a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 28 Apr 2020 17:49:21 +0100 Subject: [PATCH 047/148] arm64: vdso: use consistent 'map' nomenclature The current code doesn't use a consistent naming scheme for structures, enums, or variables, making it harder than necessary to determine the relationship between these. Let's make this easier by consistently using 'map' nomenclature for mappings created in userspace, minimizing redundant comments, and using designated array initializers to tie indices to their respective elements. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Will Deacon Link: https://lore.kernel.org/r/20200428164921.41641-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 64 ++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 89ef61686362..f3eea5e20a41 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -182,45 +182,36 @@ static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, } #endif /* CONFIG_COMPAT_VDSO */ -/* - * aarch32_vdso_pages: - * 0 - kuser helpers - * 1 - sigreturn code - * or (CONFIG_COMPAT_VDSO): - * 0 - kuser helpers - * 1 - vdso data - * 2 - vdso code - */ -#define C_VECTORS 0 +enum aarch32_map { + AA32_MAP_VECTORS, /* kuser helpers */ #ifdef CONFIG_COMPAT_VDSO -#define C_VVAR 1 -#define C_VDSO 2 -#define C_PAGES (C_VDSO + 1) + AA32_MAP_VVAR, + AA32_MAP_VDSO, #else -#define C_SIGPAGE 1 -#define C_PAGES (C_SIGPAGE + 1) -#endif /* CONFIG_COMPAT_VDSO */ + AA32_MAP_SIGPAGE +#endif +}; static struct page *aarch32_vectors_page __ro_after_init; #ifndef CONFIG_COMPAT_VDSO static struct page *aarch32_sig_page __ro_after_init; #endif -static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { - { +static struct vm_special_mapping aarch32_vdso_maps[] = { + [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ .pages = &aarch32_vectors_page, }, #ifdef CONFIG_COMPAT_VDSO - { + [AA32_MAP_VVAR] = { .name = "[vvar]", }, - { + [AA32_MAP_VDSO] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, #else - { + [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, }, @@ -252,8 +243,8 @@ static int __aarch32_alloc_vdso_pages(void) { int ret; - vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_spec[C_VVAR]; - vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_spec[C_VDSO]; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; + vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; ret = __vdso_init(VDSO_ABI_AA32); if (ret) @@ -305,7 +296,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC, - &aarch32_vdso_spec[C_VECTORS]); + &aarch32_vdso_maps[AA32_MAP_VECTORS]); return PTR_ERR_OR_ZERO(ret); } @@ -329,7 +320,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) ret = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, - &aarch32_vdso_spec[C_SIGPAGE]); + &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); if (IS_ERR(ret)) goto out; @@ -373,19 +364,16 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return __vdso_remap(VDSO_ABI_AA64, sm, new_vma); } -/* - * aarch64_vdso_pages: - * 0 - vvar - * 1 - vdso - */ -#define A_VVAR 0 -#define A_VDSO 1 -#define A_PAGES (A_VDSO + 1) -static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { - { +enum aarch64_map { + AA64_MAP_VVAR, + AA64_MAP_VDSO, +}; + +static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { + [AA64_MAP_VVAR] = { .name = "[vvar]", }, - { + [AA64_MAP_VDSO] = { .name = "[vdso]", .mremap = vdso_mremap, }, @@ -393,8 +381,8 @@ static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { static int __init vdso_init(void) { - vdso_info[VDSO_ABI_AA64].dm = &vdso_spec[A_VVAR]; - vdso_info[VDSO_ABI_AA64].cm = &vdso_spec[A_VDSO]; + vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; + vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; return __vdso_init(VDSO_ABI_AA64); } From 184dbc152e398d7136dabd59ed3f5c5521935712 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 29 Apr 2020 14:19:21 -0500 Subject: [PATCH 048/148] arm64: silicon-errata.rst: Sort the Cortex-A55 entries The Arm silicon errata list is mostly sorted by CPU name with the exception of Cortex-A55, so let's sort it before adding more entries. Signed-off-by: Rob Herring Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200429191921.32484-1-robh@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 2c08c628febd..936cf2a59ca4 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -64,6 +64,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A55 | #1530923 | ARM64_ERRATUM_1530923 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #852523 | N/A | @@ -78,8 +82,6 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | -+----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1165522 | ARM64_ERRATUM_1165522 | @@ -88,8 +90,6 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1463225 | ARM64_ERRATUM_1463225 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A55 | #1530923 | ARM64_ERRATUM_1530923 | -+----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | From ada66f1837594f38bc2db4f98c4c6589ecc8a7f6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 29 Apr 2020 19:37:02 +0100 Subject: [PATCH 049/148] arm64: Reorder the macro arguments in the copy routines The current argument order is obviously buggy (memcpy.S): macro strb1 ptr, regB, val strb \ptr, [\regB], \val endm However, it cancels out as the calling sites in copy_template.S pass the address as the regB argument. Mechanically reorder the arguments to match the instruction mnemonics. There is no difference in objdump before and after this patch. Signed-off-by: Catalin Marinas Link: https://lore.kernel.org/r/20200429183702.28445-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/copy_from_user.S | 32 ++++++++++++++++---------------- arch/arm64/lib/copy_in_user.S | 32 ++++++++++++++++---------------- arch/arm64/lib/copy_to_user.S | 32 ++++++++++++++++---------------- arch/arm64/lib/memcpy.S | 32 ++++++++++++++++---------------- 4 files changed, 64 insertions(+), 64 deletions(-) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 8e25e89ad01f..0f8a3a9e3795 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -20,36 +20,36 @@ * x0 - bytes not copied */ - .macro ldrb1 ptr, regB, val - uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val + .macro ldrb1 reg, ptr, val + uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val .endm - .macro strb1 ptr, regB, val - strb \ptr, [\regB], \val + .macro strb1 reg, ptr, val + strb \reg, [\ptr], \val .endm - .macro ldrh1 ptr, regB, val - uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val + .macro ldrh1 reg, ptr, val + uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val .endm - .macro strh1 ptr, regB, val - strh \ptr, [\regB], \val + .macro strh1 reg, ptr, val + strh \reg, [\ptr], \val .endm - .macro ldr1 ptr, regB, val - uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val + .macro ldr1 reg, ptr, val + uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val .endm - .macro str1 ptr, regB, val - str \ptr, [\regB], \val + .macro str1 reg, ptr, val + str \reg, [\ptr], \val .endm - .macro ldp1 ptr, regB, regC, val - uao_ldp 9998f, \ptr, \regB, \regC, \val + .macro ldp1 reg1, reg2, ptr, val + uao_ldp 9998f, \reg1, \reg2, \ptr, \val .endm - .macro stp1 ptr, regB, regC, val - stp \ptr, \regB, [\regC], \val + .macro stp1 reg1, reg2, ptr, val + stp \reg1, \reg2, [\ptr], \val .endm end .req x5 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 667139013ed1..80e37ada0ee1 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -21,36 +21,36 @@ * Returns: * x0 - bytes not copied */ - .macro ldrb1 ptr, regB, val - uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val + .macro ldrb1 reg, ptr, val + uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val .endm - .macro strb1 ptr, regB, val - uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val + .macro strb1 reg, ptr, val + uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val .endm - .macro ldrh1 ptr, regB, val - uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val + .macro ldrh1 reg, ptr, val + uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val .endm - .macro strh1 ptr, regB, val - uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val + .macro strh1 reg, ptr, val + uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val .endm - .macro ldr1 ptr, regB, val - uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val + .macro ldr1 reg, ptr, val + uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val .endm - .macro str1 ptr, regB, val - uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val + .macro str1 reg, ptr, val + uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val .endm - .macro ldp1 ptr, regB, regC, val - uao_ldp 9998f, \ptr, \regB, \regC, \val + .macro ldp1 reg1, reg2, ptr, val + uao_ldp 9998f, \reg1, \reg2, \ptr, \val .endm - .macro stp1 ptr, regB, regC, val - uao_stp 9998f, \ptr, \regB, \regC, \val + .macro stp1 reg1, reg2, ptr, val + uao_stp 9998f, \reg1, \reg2, \ptr, \val .endm end .req x5 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 1a104d0089f3..4ec59704b8f2 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -19,36 +19,36 @@ * Returns: * x0 - bytes not copied */ - .macro ldrb1 ptr, regB, val - ldrb \ptr, [\regB], \val + .macro ldrb1 reg, ptr, val + ldrb \reg, [\ptr], \val .endm - .macro strb1 ptr, regB, val - uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val + .macro strb1 reg, ptr, val + uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val .endm - .macro ldrh1 ptr, regB, val - ldrh \ptr, [\regB], \val + .macro ldrh1 reg, ptr, val + ldrh \reg, [\ptr], \val .endm - .macro strh1 ptr, regB, val - uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val + .macro strh1 reg, ptr, val + uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val .endm - .macro ldr1 ptr, regB, val - ldr \ptr, [\regB], \val + .macro ldr1 reg, ptr, val + ldr \reg, [\ptr], \val .endm - .macro str1 ptr, regB, val - uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val + .macro str1 reg, ptr, val + uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val .endm - .macro ldp1 ptr, regB, regC, val - ldp \ptr, \regB, [\regC], \val + .macro ldp1 reg1, reg2, ptr, val + ldp \reg1, \reg2, [\ptr], \val .endm - .macro stp1 ptr, regB, regC, val - uao_stp 9998f, \ptr, \regB, \regC, \val + .macro stp1 reg1, reg2, ptr, val + uao_stp 9998f, \reg1, \reg2, \ptr, \val .endm end .req x5 diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 9f382adfa88a..e0bf83d556f2 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -24,36 +24,36 @@ * Returns: * x0 - dest */ - .macro ldrb1 ptr, regB, val - ldrb \ptr, [\regB], \val + .macro ldrb1 reg, ptr, val + ldrb \reg, [\ptr], \val .endm - .macro strb1 ptr, regB, val - strb \ptr, [\regB], \val + .macro strb1 reg, ptr, val + strb \reg, [\ptr], \val .endm - .macro ldrh1 ptr, regB, val - ldrh \ptr, [\regB], \val + .macro ldrh1 reg, ptr, val + ldrh \reg, [\ptr], \val .endm - .macro strh1 ptr, regB, val - strh \ptr, [\regB], \val + .macro strh1 reg, ptr, val + strh \reg, [\ptr], \val .endm - .macro ldr1 ptr, regB, val - ldr \ptr, [\regB], \val + .macro ldr1 reg, ptr, val + ldr \reg, [\ptr], \val .endm - .macro str1 ptr, regB, val - str \ptr, [\regB], \val + .macro str1 reg, ptr, val + str \reg, [\ptr], \val .endm - .macro ldp1 ptr, regB, regC, val - ldp \ptr, \regB, [\regC], \val + .macro ldp1 reg1, reg2, ptr, val + ldp \reg1, \reg2, [\ptr], \val .endm - .macro stp1 ptr, regB, regC, val - stp \ptr, \regB, [\regC], \val + .macro stp1 reg1, reg2, ptr, val + stp \reg1, \reg2, [\ptr], \val .endm .weak memcpy From 037d9303a7e7bac622e299817c5dd288346db07c Mon Sep 17 00:00:00 2001 From: Guixiong Wei Date: Fri, 1 May 2020 06:18:58 +1400 Subject: [PATCH 050/148] arm: mm: use __pfn_to_section() to get mem_section Replace the open-coded '__nr_to_section(pfn_to_section_nr(pfn))' in pfn_valid() with a more concise call to '__pfn_to_section(pfn)'. No functional change. Signed-off-by: Guixiong Wei Link: https://lore.kernel.org/r/20200430161858.11379-1-guixiongwei@gmail.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e42727e3568e..d2df416b840e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -272,7 +272,7 @@ int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - if (!valid_section(__nr_to_section(pfn_to_section_nr(pfn)))) + if (!valid_section(__pfn_to_section(pfn))) return 0; #endif return memblock_is_map_memory(addr); From 51075e0cb759a736e60ab4f3a5fed8670dba5852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= Date: Thu, 30 Apr 2020 18:31:41 +0200 Subject: [PATCH 051/148] arm64: kexec_file: print appropriate variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value of kbuf->memsz may be different than kbuf->bufsz after calling kexec_add_buffer(). Hence both values should be logged. Fixes: 52b2a8af74360 ("arm64: kexec_file: load initrd and device-tree") Fixes: 3751e728cef29 ("arm64: kexec_file: add crash dump support") Signed-off-by: Łukasz Stelmach Cc: AKASHI Takahiro Cc: James Morse Cc: Bhupesh Sharma Link: https://lore.kernel.org/r/20200430163142.27282-2-l.stelmach@samsung.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index e5cbf91aadfe..522e6f517ec0 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -284,7 +284,7 @@ int load_other_segments(struct kimage *image, image->arch.elf_headers_sz = headers_sz; pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->arch.elf_headers_mem, headers_sz, headers_sz); + image->arch.elf_headers_mem, kbuf.bufsz, kbuf.memsz); } /* load initrd */ @@ -305,7 +305,7 @@ int load_other_segments(struct kimage *image, initrd_load_addr = kbuf.mem; pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - initrd_load_addr, initrd_len, initrd_len); + initrd_load_addr, kbuf.bufsz, kbuf.memsz); } /* load dtb */ @@ -332,7 +332,7 @@ int load_other_segments(struct kimage *image, image->arch.dtb_mem = kbuf.mem; pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - kbuf.mem, dtb_len, dtb_len); + kbuf.mem, kbuf.bufsz, kbuf.memsz); return 0; From 5810f00ade49f4710a9e834db5a3ad786a76e880 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Thu, 2 Apr 2020 19:59:40 +0800 Subject: [PATCH 052/148] drivers/perf: arm_dsu_pmu: Avoid duplicate printouts platform_get_irq() already screams on failure, so the redundant call to dev_err() can be removed. Signed-off-by: Tang Bin Link: https://lore.kernel.org/r/20200402115940.4928-1-tangbin@cmss.chinamobile.com Signed-off-by: Will Deacon --- drivers/perf/arm_dsu_pmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 70968c8c09d7..518d0603e24f 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -690,10 +690,8 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_warn(&pdev->dev, "Failed to find IRQ\n"); + if (irq < 0) return -EINVAL; - } name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d", PMUNAME, atomic_inc_return(&pmu_idx)); From 1f0d97bb7082ead69acc899cd5dae9deb8b91577 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Thu, 2 Apr 2020 20:03:30 +0800 Subject: [PATCH 053/148] drivers/perf: arm_spe_pmu: Avoid duplicate printouts platform_get_irq() already screams on failure, so the redundant call to dev_err() can be removed. Signed-off-by: Tang Bin Link: https://lore.kernel.org/r/20200402120330.19468-1-tangbin@cmss.chinamobile.com Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index b72c04852599..58a710eb4e5a 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1133,10 +1133,8 @@ static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) struct platform_device *pdev = spe_pmu->pdev; int irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "failed to get IRQ (%d)\n", irq); + if (irq < 0) return -ENXIO; - } if (!irq_is_percpu(irq)) { dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq); From 88562f06ebf56587788783e5420f25fde3ca36c8 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 23 Apr 2020 20:05:30 +0800 Subject: [PATCH 054/148] drivers/perf: hisi: Fix typo in events attribute array Fix up one typo: wr_dr_64b -> wr_ddr_64b. Fixes: 2bab3cf9104c ("perf: hisi: Add support for HiSilicon SoC HHA PMU driver") Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1587643530-34357-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 6a1dd72d8abb..e5af9d7e6e14 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -283,7 +283,7 @@ static struct attribute *hisi_hha_pmu_events_attr[] = { HISI_PMU_EVENT_ATTR(rx_wbip, 0x05), HISI_PMU_EVENT_ATTR(rx_wtistash, 0x11), HISI_PMU_EVENT_ATTR(rd_ddr_64b, 0x1c), - HISI_PMU_EVENT_ATTR(wr_dr_64b, 0x1d), + HISI_PMU_EVENT_ATTR(wr_ddr_64b, 0x1d), HISI_PMU_EVENT_ATTR(rd_ddr_128b, 0x1e), HISI_PMU_EVENT_ATTR(wr_ddr_128b, 0x1f), HISI_PMU_EVENT_ATTR(spill_num, 0x20), From 6d3b29d07c3c55532e09d004a1466358c71affa7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 May 2020 18:10:13 +0200 Subject: [PATCH 055/148] Revert "ACPI/IORT: Fix 'Number of IDs' handling in iort_id_map()" This reverts commit 3c23b83a88d00383e1d498cfa515249aa2fe0238. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200501161014.5935-2-ardb@kernel.org Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 57 ++------------------------------------- 1 file changed, 2 insertions(+), 55 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 051b2ce03070..d2fb33a43652 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -299,59 +299,6 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node, return status; } -struct iort_workaround_oem_info { - char oem_id[ACPI_OEM_ID_SIZE + 1]; - char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; - u32 oem_revision; -}; - -static bool apply_id_count_workaround; - -static struct iort_workaround_oem_info wa_info[] __initdata = { - { - .oem_id = "HISI ", - .oem_table_id = "HIP07 ", - .oem_revision = 0, - }, { - .oem_id = "HISI ", - .oem_table_id = "HIP08 ", - .oem_revision = 0, - } -}; - -static void __init -iort_check_id_count_workaround(struct acpi_table_header *tbl) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(wa_info); i++) { - if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) && - !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && - wa_info[i].oem_revision == tbl->oem_revision) { - apply_id_count_workaround = true; - pr_warn(FW_BUG "ID count for ID mapping entry is wrong, applying workaround\n"); - break; - } - } -} - -static inline u32 iort_get_map_max(struct acpi_iort_id_mapping *map) -{ - u32 map_max = map->input_base + map->id_count; - - /* - * The IORT specification revision D (Section 3, table 4, page 9) says - * Number of IDs = The number of IDs in the range minus one, but the - * IORT code ignored the "minus one", and some firmware did that too, - * so apply a workaround here to keep compatible with both the spec - * compliant and non-spec compliant firmwares. - */ - if (apply_id_count_workaround) - map_max--; - - return map_max; -} - static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, u32 *rid_out) { @@ -368,7 +315,8 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, return -ENXIO; } - if (rid_in < map->input_base || rid_in > iort_get_map_max(map)) + if (rid_in < map->input_base || + (rid_in >= map->input_base + map->id_count)) return -ENXIO; *rid_out = map->output_base + (rid_in - map->input_base); @@ -1700,6 +1648,5 @@ void __init acpi_iort_init(void) return; } - iort_check_id_count_workaround(iort_table); iort_init_platform_devices(); } From 539979b6ec62f7bba40b0452b0574a0f4ec4fe4e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 May 2020 18:10:14 +0200 Subject: [PATCH 056/148] ACPI/IORT: work around num_ids ambiguity The ID mapping table structure of the IORT table describes the size of a range using a num_ids field carrying the number of IDs in the region minus one. This has been misinterpreted in the past in the parsing code, and firmware is known to have shipped where this results in an ambiguity, where regions that should be adjacent have an overlap of one value. So let's work around this by detecting this case specifically: when resolving an ID translation, allow one that matches right at the end of a multi-ID region to be superseded by a subsequent one. To prevent potential regressions on broken firmware that happened to work before, only take the subsequent match into account if it occurs at the start of a mapping region. Signed-off-by: Ard Biesheuvel Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20200501161014.5935-3-ardb@kernel.org Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 40 +++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index d2fb33a43652..b011d25af676 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -300,7 +300,7 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node, } static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, - u32 *rid_out) + u32 *rid_out, bool check_overlap) { /* Single mapping does not care for input id */ if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) { @@ -316,10 +316,34 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, } if (rid_in < map->input_base || - (rid_in >= map->input_base + map->id_count)) + (rid_in > map->input_base + map->id_count)) return -ENXIO; + if (check_overlap) { + /* + * We already found a mapping for this input ID at the end of + * another region. If it coincides with the start of this + * region, we assume the prior match was due to the off-by-1 + * issue mentioned below, and allow it to be superseded. + * Otherwise, things are *really* broken, and we just disregard + * duplicate matches entirely to retain compatibility. + */ + pr_err(FW_BUG "[map %p] conflicting mapping for input ID 0x%x\n", + map, rid_in); + if (rid_in != map->input_base) + return -ENXIO; + } + *rid_out = map->output_base + (rid_in - map->input_base); + + /* + * Due to confusion regarding the meaning of the id_count field (which + * carries the number of IDs *minus 1*), we may have to disregard this + * match if it is at the end of the range, and overlaps with the start + * of another one. + */ + if (map->id_count > 0 && rid_in == map->input_base + map->id_count) + return -EAGAIN; return 0; } @@ -404,7 +428,8 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node, /* Parse the ID mapping tree to find specified node type */ while (node) { struct acpi_iort_id_mapping *map; - int i, index; + int i, index, rc = 0; + u32 out_ref = 0, map_id = id; if (IORT_TYPE_MASK(node->type) & type_mask) { if (id_out) @@ -438,15 +463,18 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node, if (i == index) continue; - if (!iort_id_map(map, node->type, id, &id)) + rc = iort_id_map(map, node->type, map_id, &id, out_ref); + if (!rc) break; + if (rc == -EAGAIN) + out_ref = map->output_reference; } - if (i == node->mapping_count) + if (i == node->mapping_count && !out_ref) goto fail_map; node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table, - map->output_reference); + rc ? out_ref : map->output_reference); } fail_map: From 06607c7e93f2a4799346baa262d4304681e4b528 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 May 2020 12:54:28 +0100 Subject: [PATCH 057/148] arm64: entry: Refactor and modernise annotation for ret_to_user As part of an effort to clarify and clean up the assembler annotations new macros have been introduced which annotate the start and end of blocks of code in assembler files. Currently ret_to_user has an out of line slow path work_pending placed above the main function which makes annotating the start and end of these blocks of code awkward. Since work_pending is only referenced from within ret_to_user try to make things a bit clearer by moving it after the current ret_to_user and then marking both ret_to_user and work_pending as part of a single ret_to_user code block. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200501115430.37315-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ddcde093c433..73e2c8f5b728 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -727,21 +727,10 @@ el0_error_naked: b ret_to_user SYM_CODE_END(el0_error) -/* - * Ok, we need to do extra processing, enter the slow path. - */ -work_pending: - mov x0, sp // 'regs' - bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step - b finish_ret_to_user /* * "slow" syscall return path. */ -ret_to_user: +SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 ldr x1, [tsk, #TSK_TI_FLAGS] @@ -753,7 +742,19 @@ finish_ret_to_user: bl stackleak_erase #endif kernel_exit 0 -ENDPROC(ret_to_user) + +/* + * Ok, we need to do extra processing, enter the slow path. + */ +work_pending: + mov x0, sp // 'regs' + bl do_notify_resume +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on // enabled while in userspace +#endif + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + b finish_ret_to_user +SYM_CODE_END(ret_to_user) .popsection // .entry.text From 0343a7e46362c3836ff27ceb749ee2b6f78aa982 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 May 2020 12:54:29 +0100 Subject: [PATCH 058/148] arm64: kernel: Convert to modern annotations for assembly functions In an effort to clarify and simplify the annotation of assembly functions in the kernel new macros have been introduced. These replace ENTRY and ENDPROC and also add a new annotation for static functions which previously had no ENTRY equivalent. Update the annotations in the core kernel code to the new macros. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200501115430.37315-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu-reset.S | 4 +- arch/arm64/kernel/efi-rt-wrapper.S | 4 +- arch/arm64/kernel/entry-fpsimd.S | 20 ++++----- arch/arm64/kernel/hibernate-asm.S | 16 +++---- arch/arm64/kernel/hyp-stub.S | 20 ++++----- arch/arm64/kernel/probes/kprobes_trampoline.S | 4 +- arch/arm64/kernel/reloc_test_syms.S | 44 +++++++++---------- arch/arm64/kernel/relocate_kernel.S | 4 +- arch/arm64/kernel/sleep.S | 12 ++--- arch/arm64/kernel/smccc-call.S | 8 ++-- 10 files changed, 68 insertions(+), 68 deletions(-) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 38087b4c0432..4a18055b2ff9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -29,7 +29,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -ENTRY(__cpu_soft_restart) +SYM_CODE_START(__cpu_soft_restart) /* Clear sctlr_el1 flags. */ mrs x12, sctlr_el1 mov_q x13, SCTLR_ELx_FLAGS @@ -47,6 +47,6 @@ ENTRY(__cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -ENDPROC(__cpu_soft_restart) +SYM_CODE_END(__cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 3fc71106cb2b..1192c4bb48df 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -5,7 +5,7 @@ #include -ENTRY(__efi_rt_asm_wrapper) +SYM_FUNC_START(__efi_rt_asm_wrapper) stp x29, x30, [sp, #-32]! mov x29, sp @@ -35,4 +35,4 @@ ENTRY(__efi_rt_asm_wrapper) b.ne 0f ret 0: b efi_handle_corrupted_x18 // tail call -ENDPROC(__efi_rt_asm_wrapper) +SYM_FUNC_END(__efi_rt_asm_wrapper) diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 0f24eae8f3cc..f880dd63ddc3 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -16,34 +16,34 @@ * * x0 - pointer to struct fpsimd_state */ -ENTRY(fpsimd_save_state) +SYM_FUNC_START(fpsimd_save_state) fpsimd_save x0, 8 ret -ENDPROC(fpsimd_save_state) +SYM_FUNC_END(fpsimd_save_state) /* * Load the FP registers. * * x0 - pointer to struct fpsimd_state */ -ENTRY(fpsimd_load_state) +SYM_FUNC_START(fpsimd_load_state) fpsimd_restore x0, 8 ret -ENDPROC(fpsimd_load_state) +SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE -ENTRY(sve_save_state) +SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret -ENDPROC(sve_save_state) +SYM_FUNC_END(sve_save_state) -ENTRY(sve_load_state) +SYM_FUNC_START(sve_load_state) sve_load 0, x1, x2, 3, x4 ret -ENDPROC(sve_load_state) +SYM_FUNC_END(sve_load_state) -ENTRY(sve_get_vl) +SYM_FUNC_START(sve_get_vl) _sve_rdvl 0, 1 ret -ENDPROC(sve_get_vl) +SYM_FUNC_END(sve_get_vl) #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 6532105b3e32..8ccca660034e 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -65,7 +65,7 @@ * x5: physical address of a zero page that remains zero after resume */ .pushsection ".hibernate_exit.text", "ax" -ENTRY(swsusp_arch_suspend_exit) +SYM_CODE_START(swsusp_arch_suspend_exit) /* * We execute from ttbr0, change ttbr1 to our copied linear map tables * with a break-before-make via the zero page @@ -110,7 +110,7 @@ ENTRY(swsusp_arch_suspend_exit) cbz x24, 3f /* Do we need to re-initialise EL2? */ hvc #0 3: ret -ENDPROC(swsusp_arch_suspend_exit) +SYM_CODE_END(swsusp_arch_suspend_exit) /* * Restore the hyp stub. @@ -119,15 +119,15 @@ ENDPROC(swsusp_arch_suspend_exit) * * x24: The physical address of __hyp_stub_vectors */ -el1_sync: +SYM_CODE_START_LOCAL(el1_sync) msr vbar_el2, x24 eret -ENDPROC(el1_sync) +SYM_CODE_END(el1_sync) .macro invalid_vector label -\label: +SYM_CODE_START_LOCAL(\label) b \label -ENDPROC(\label) +SYM_CODE_END(\label) .endm invalid_vector el2_sync_invalid @@ -141,7 +141,7 @@ ENDPROC(\label) /* el2 vectors - switch el2 here while we restore the memory image. */ .align 11 -ENTRY(hibernate_el2_vectors) +SYM_CODE_START(hibernate_el2_vectors) ventry el2_sync_invalid // Synchronous EL2t ventry el2_irq_invalid // IRQ EL2t ventry el2_fiq_invalid // FIQ EL2t @@ -161,6 +161,6 @@ ENTRY(hibernate_el2_vectors) ventry el1_irq_invalid // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error_invalid // Error 32-bit EL1 -END(hibernate_el2_vectors) +SYM_CODE_END(hibernate_el2_vectors) .popsection diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index e473ead806ed..160f5881a0b7 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -21,7 +21,7 @@ .align 11 -ENTRY(__hyp_stub_vectors) +SYM_CODE_START(__hyp_stub_vectors) ventry el2_sync_invalid // Synchronous EL2t ventry el2_irq_invalid // IRQ EL2t ventry el2_fiq_invalid // FIQ EL2t @@ -41,11 +41,11 @@ ENTRY(__hyp_stub_vectors) ventry el1_irq_invalid // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__hyp_stub_vectors) +SYM_CODE_END(__hyp_stub_vectors) .align 11 -el1_sync: +SYM_CODE_START_LOCAL(el1_sync) cmp x0, #HVC_SET_VECTORS b.ne 2f msr vbar_el2, x1 @@ -68,12 +68,12 @@ el1_sync: 9: mov x0, xzr eret -ENDPROC(el1_sync) +SYM_CODE_END(el1_sync) .macro invalid_vector label -\label: +SYM_CODE_START_LOCAL(\label) b \label -ENDPROC(\label) +SYM_CODE_END(\label) .endm invalid_vector el2_sync_invalid @@ -106,15 +106,15 @@ ENDPROC(\label) * initialisation entry point. */ -ENTRY(__hyp_set_vectors) +SYM_FUNC_START(__hyp_set_vectors) mov x1, x0 mov x0, #HVC_SET_VECTORS hvc #0 ret -ENDPROC(__hyp_set_vectors) +SYM_FUNC_END(__hyp_set_vectors) -ENTRY(__hyp_reset_vectors) +SYM_FUNC_START(__hyp_reset_vectors) mov x0, #HVC_RESET_VECTORS hvc #0 ret -ENDPROC(__hyp_reset_vectors) +SYM_FUNC_END(__hyp_reset_vectors) diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 45dce03aaeaf..890ca72c5a51 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -61,7 +61,7 @@ ldp x28, x29, [sp, #S_X28] .endm -ENTRY(kretprobe_trampoline) +SYM_CODE_START(kretprobe_trampoline) sub sp, sp, #S_FRAME_SIZE save_all_base_regs @@ -79,4 +79,4 @@ ENTRY(kretprobe_trampoline) add sp, sp, #S_FRAME_SIZE ret -ENDPROC(kretprobe_trampoline) +SYM_CODE_END(kretprobe_trampoline) diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S index 16a34f188f26..c50f45fa29fa 100644 --- a/arch/arm64/kernel/reloc_test_syms.S +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -5,81 +5,81 @@ #include -ENTRY(absolute_data64) +SYM_FUNC_START(absolute_data64) ldr x0, 0f ret 0: .quad sym64_abs -ENDPROC(absolute_data64) +SYM_FUNC_END(absolute_data64) -ENTRY(absolute_data32) +SYM_FUNC_START(absolute_data32) ldr w0, 0f ret 0: .long sym32_abs -ENDPROC(absolute_data32) +SYM_FUNC_END(absolute_data32) -ENTRY(absolute_data16) +SYM_FUNC_START(absolute_data16) adr x0, 0f ldrh w0, [x0] ret 0: .short sym16_abs, 0 -ENDPROC(absolute_data16) +SYM_FUNC_END(absolute_data16) -ENTRY(signed_movw) +SYM_FUNC_START(signed_movw) movz x0, #:abs_g2_s:sym64_abs movk x0, #:abs_g1_nc:sym64_abs movk x0, #:abs_g0_nc:sym64_abs ret -ENDPROC(signed_movw) +SYM_FUNC_END(signed_movw) -ENTRY(unsigned_movw) +SYM_FUNC_START(unsigned_movw) movz x0, #:abs_g3:sym64_abs movk x0, #:abs_g2_nc:sym64_abs movk x0, #:abs_g1_nc:sym64_abs movk x0, #:abs_g0_nc:sym64_abs ret -ENDPROC(unsigned_movw) +SYM_FUNC_END(unsigned_movw) .align 12 .space 0xff8 -ENTRY(relative_adrp) +SYM_FUNC_START(relative_adrp) adrp x0, sym64_rel add x0, x0, #:lo12:sym64_rel ret -ENDPROC(relative_adrp) +SYM_FUNC_END(relative_adrp) .align 12 .space 0xffc -ENTRY(relative_adrp_far) +SYM_FUNC_START(relative_adrp_far) adrp x0, memstart_addr add x0, x0, #:lo12:memstart_addr ret -ENDPROC(relative_adrp_far) +SYM_FUNC_END(relative_adrp_far) -ENTRY(relative_adr) +SYM_FUNC_START(relative_adr) adr x0, sym64_rel ret -ENDPROC(relative_adr) +SYM_FUNC_END(relative_adr) -ENTRY(relative_data64) +SYM_FUNC_START(relative_data64) adr x1, 0f ldr x0, [x1] add x0, x0, x1 ret 0: .quad sym64_rel - . -ENDPROC(relative_data64) +SYM_FUNC_END(relative_data64) -ENTRY(relative_data32) +SYM_FUNC_START(relative_data32) adr x1, 0f ldr w0, [x1] add x0, x0, x1 ret 0: .long sym64_rel - . -ENDPROC(relative_data32) +SYM_FUNC_END(relative_data32) -ENTRY(relative_data16) +SYM_FUNC_START(relative_data16) adr x1, 0f ldrsh w0, [x1] add x0, x0, x1 ret 0: .short sym64_rel - ., 0 -ENDPROC(relative_data16) +SYM_FUNC_END(relative_data16) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index c40ce496c78b..542d6edc6806 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -26,7 +26,7 @@ * control_code_page, a special page which has been set up to be preserved * during the copy operation. */ -ENTRY(arm64_relocate_new_kernel) +SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ mov x18, x2 /* x18 = dtb address */ @@ -111,7 +111,7 @@ ENTRY(arm64_relocate_new_kernel) mov x3, xzr br x17 -ENDPROC(arm64_relocate_new_kernel) +SYM_CODE_END(arm64_relocate_new_kernel) .align 3 /* To keep the 64-bit values below naturally aligned. */ diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 7b2f2e650c44..c1bf43cfaa2b 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -62,7 +62,7 @@ * * x0 = struct sleep_stack_data area */ -ENTRY(__cpu_suspend_enter) +SYM_FUNC_START(__cpu_suspend_enter) stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] @@ -95,10 +95,10 @@ ENTRY(__cpu_suspend_enter) ldp x29, lr, [sp], #16 mov x0, #1 ret -ENDPROC(__cpu_suspend_enter) +SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" -ENTRY(cpu_resume) +SYM_CODE_START(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly mov x0, #ARM64_CPU_RUNTIME bl __cpu_setup @@ -107,11 +107,11 @@ ENTRY(cpu_resume) bl __enable_mmu ldr x8, =_cpu_resume br x8 -ENDPROC(cpu_resume) +SYM_CODE_END(cpu_resume) .ltorg .popsection -ENTRY(_cpu_resume) +SYM_FUNC_START(_cpu_resume) mrs x1, mpidr_el1 adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address @@ -147,4 +147,4 @@ ENTRY(_cpu_resume) ldp x29, lr, [x29] mov x0, #0 ret -ENDPROC(_cpu_resume) +SYM_FUNC_END(_cpu_resume) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index 54655273d1e0..1f93809528a4 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -30,9 +30,9 @@ * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ -ENTRY(__arm_smccc_smc) +SYM_FUNC_START(__arm_smccc_smc) SMCCC smc -ENDPROC(__arm_smccc_smc) +SYM_FUNC_END(__arm_smccc_smc) EXPORT_SYMBOL(__arm_smccc_smc) /* @@ -41,7 +41,7 @@ EXPORT_SYMBOL(__arm_smccc_smc) * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ -ENTRY(__arm_smccc_hvc) +SYM_FUNC_START(__arm_smccc_hvc) SMCCC hvc -ENDPROC(__arm_smccc_hvc) +SYM_FUNC_END(__arm_smccc_hvc) EXPORT_SYMBOL(__arm_smccc_hvc) From 50479d58eaa321f916fd916615710c634b8aafaf Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 May 2020 12:54:30 +0100 Subject: [PATCH 059/148] arm64: Disable old style assembly annotations Now that we have converted arm64 over to the new style SYM_ assembler annotations select ARCH_USE_SYM_ANNOTATIONS so the old macros aren't available and we don't regress. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200501115430.37315-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40fb05d96c60..dd007e9ce596 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -63,6 +63,7 @@ config ARM64 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_USE_SYM_ANNOTATIONS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) From 02ab1f5018c3ad0b8677e797b5d3333d2e3b7f20 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Mon, 4 May 2020 10:48:58 +0100 Subject: [PATCH 060/148] arm64: Unify WORKAROUND_SPECULATIVE_AT_{NVHE,VHE} Errata 1165522, 1319367 and 1530923 each allow TLB entries to be allocated as a result of a speculative AT instruction. In order to avoid mandating VHE on certain affected CPUs, apply the workaround to both the nVHE and the VHE case for all affected CPUs. Signed-off-by: Andrew Scull Acked-by: Will Deacon CC: Marc Zyngier CC: James Morse CC: Suzuki K Poulose CC: Will Deacon CC: Steven Price Link: https://lore.kernel.org/r/20200504094858.108917-1-ascull@google.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 41 ++++++++++++++----------------- arch/arm64/include/asm/cpucaps.h | 15 ++++++----- arch/arm64/include/asm/kvm_host.h | 4 --- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kernel/cpu_errata.c | 25 +++++++++---------- arch/arm64/kvm/hyp/switch.c | 6 ++--- arch/arm64/kvm/hyp/sysreg-sr.c | 6 +++-- arch/arm64/kvm/hyp/tlb.c | 11 +++++---- 8 files changed, 51 insertions(+), 59 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40fb05d96c60..c0298e8f1a2d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -524,13 +524,13 @@ config ARM64_ERRATUM_1418040 If unsure, say Y. -config ARM64_WORKAROUND_SPECULATIVE_AT_VHE +config ARM64_WORKAROUND_SPECULATIVE_AT bool config ARM64_ERRATUM_1165522 - bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + bool "Cortex-A76: 1165522: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y - select ARM64_WORKAROUND_SPECULATIVE_AT_VHE + select ARM64_WORKAROUND_SPECULATIVE_AT help This option adds a workaround for ARM Cortex-A76 erratum 1165522. @@ -540,10 +540,23 @@ config ARM64_ERRATUM_1165522 If unsure, say Y. -config ARM64_ERRATUM_1530923 - bool "Cortex-A55: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" +config ARM64_ERRATUM_1319367 + bool "Cortex-A57/A72: 1319537: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y - select ARM64_WORKAROUND_SPECULATIVE_AT_VHE + select ARM64_WORKAROUND_SPECULATIVE_AT + help + This option adds work arounds for ARM Cortex-A57 erratum 1319537 + and A72 erratum 1319367 + + Cortex-A57 and A72 cores could end-up with corrupted TLBs by + speculating an AT instruction during a guest context switch. + + If unsure, say Y. + +config ARM64_ERRATUM_1530923 + bool "Cortex-A55: 1530923: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + default y + select ARM64_WORKAROUND_SPECULATIVE_AT help This option adds a workaround for ARM Cortex-A55 erratum 1530923. @@ -569,22 +582,6 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. -config ARM64_WORKAROUND_SPECULATIVE_AT_NVHE - bool - -config ARM64_ERRATUM_1319367 - bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" - default y - select ARM64_WORKAROUND_SPECULATIVE_AT_NVHE - help - This option adds work arounds for ARM Cortex-A57 erratum 1319537 - and A72 erratum 1319367 - - Cortex-A57 and A72 cores could end-up with corrupted TLBs by - speculating an AT instruction during a guest context switch. - - If unsure, say Y. - config ARM64_ERRATUM_1463225 bool "Cortex-A76: Software Step might prevent interrupt recognition" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8eb5a088ae65..dc70883062ba 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,7 @@ #define ARM64_SSBS 34 #define ARM64_WORKAROUND_1418040 35 #define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE 37 +#define ARM64_WORKAROUND_SPECULATIVE_AT 37 #define ARM64_HAS_ADDRESS_AUTH_ARCH 38 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 #define ARM64_HAS_GENERIC_AUTH_ARCH 40 @@ -55,13 +55,12 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 -#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 -#define ARM64_HAS_E0PD 49 -#define ARM64_HAS_RNG 50 -#define ARM64_HAS_AMU_EXTN 51 -#define ARM64_HAS_ADDRESS_AUTH 52 -#define ARM64_HAS_GENERIC_AUTH 53 +#define ARM64_HAS_E0PD 48 +#define ARM64_HAS_RNG 49 +#define ARM64_HAS_AMU_EXTN 50 +#define ARM64_HAS_ADDRESS_AUTH 51 +#define ARM64_HAS_GENERIC_AUTH 52 -#define ARM64_NCAPS 54 +#define ARM64_NCAPS 53 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32c8a675e5a4..d0e7d7934a1f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -573,10 +573,6 @@ static inline bool kvm_arch_requires_vhe(void) if (system_supports_sve()) return true; - /* Some implementations have defects that confine them to VHE */ - if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) - return true; - return false; } diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fe57f60f06a8..238d2e049694 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -102,7 +102,7 @@ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) * above before we can switch to the EL1/EL0 translation regime used by * the guest. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index df56d2295d16..95006a791026 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -635,7 +635,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367) +#if defined(CONFIG_HARDEN_EL2_VECTORS) static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -757,12 +757,16 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = { }; #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE -static const struct midr_range erratum_speculative_at_vhe_list[] = { +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT +static const struct midr_range erratum_speculative_at_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1165522 /* Cortex A76 r0p0 to r2p0 */ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), #endif +#ifdef CONFIG_ARM64_ERRATUM_1319367 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), +#endif #ifdef CONFIG_ARM64_ERRATUM_1530923 /* Cortex A55 r0p0 to r2p0 */ MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0), @@ -897,11 +901,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), }, #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT { - .desc = "ARM errata 1165522, 1530923", - .capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE, - ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list), + .desc = "ARM errata 1165522, 1319367, 1530923", + .capability = ARM64_WORKAROUND_SPECULATIVE_AT, + ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_list), }, #endif #ifdef CONFIG_ARM64_ERRATUM_1463225 @@ -934,13 +938,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_neoverse_n1_erratum_1542419, .cpu_enable = cpu_enable_trap_ctr_access, }, -#endif -#ifdef CONFIG_ARM64_ERRATUM_1319367 - { - .desc = "ARM erratum 1319367", - .capability = ARM64_WORKAROUND_SPECULATIVE_AT_NVHE, - ERRATA_MIDR_RANGE_LIST(ca57_a72), - }, #endif { } diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 8a1e81a400e0..1336e6f0acdf 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -138,7 +138,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) write_sysreg(val, cptr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; isb(); @@ -181,7 +181,7 @@ static void deactivate_traps_vhe(void) * above before we can switch to the EL2/EL0 translation regime used by * the host. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); write_sysreg(vectors, vbar_el1); @@ -192,7 +192,7 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 75b1925763f1..3234a9dc149f 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -118,7 +118,8 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); } else if (!ctxt->__hyp_running_vcpu) { @@ -149,7 +150,8 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && ctxt->__hyp_running_vcpu) { /* * Must only be done for host registers, hence the context diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index ceaddbe4279f..d063a576d511 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -23,7 +23,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, local_irq_save(cxt->flags); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* * For CPUs that are affected by ARM errata 1165522 or 1530923, * we cannot trust stage-1 to be in a correct state at that @@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* @@ -79,8 +79,9 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, isb(); } + /* __load_guest_stage2() includes an ISB for the workaround. */ __load_guest_stage2(kvm); - isb(); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, @@ -103,7 +104,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); write_sysreg_el1(cxt->sctlr, SYS_SCTLR); @@ -117,7 +118,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, { write_sysreg(0, vttbr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Ensure write of the host VMID */ isb(); /* Restore the host's TCR_EL1 */ From bd507ca2773b20f554a3e14f7aa9080e45ee399a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 May 2020 14:13:23 +0100 Subject: [PATCH 061/148] arm64: insn: Add constants for new HINT instruction decode Add constants for decoding newer instructions defined in the HINT space. Since we are now decoding both the op2 and CRm fields rename the enum as well; this is compatible with what the existing users are doing. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200504131326.18290-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 28 ++++++++++++++++++++++++++-- arch/arm64/kernel/insn.c | 2 +- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index bb313dde58a4..575675145fe2 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -39,13 +39,37 @@ enum aarch64_insn_encoding_class { * system instructions */ }; -enum aarch64_insn_hint_op { +enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_NOP = 0x0 << 5, AARCH64_INSN_HINT_YIELD = 0x1 << 5, AARCH64_INSN_HINT_WFE = 0x2 << 5, AARCH64_INSN_HINT_WFI = 0x3 << 5, AARCH64_INSN_HINT_SEV = 0x4 << 5, AARCH64_INSN_HINT_SEVL = 0x5 << 5, + + AARCH64_INSN_HINT_XPACLRI = 0x07 << 5, + AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5, + AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5, + AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5, + AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5, + AARCH64_INSN_HINT_PACIAZ = 0x18 << 5, + AARCH64_INSN_HINT_PACIASP = 0x19 << 5, + AARCH64_INSN_HINT_PACIBZ = 0x1A << 5, + AARCH64_INSN_HINT_PACIBSP = 0x1B << 5, + AARCH64_INSN_HINT_AUTIAZ = 0x1C << 5, + AARCH64_INSN_HINT_AUTIASP = 0x1D << 5, + AARCH64_INSN_HINT_AUTIBZ = 0x1E << 5, + AARCH64_INSN_HINT_AUTIBSP = 0x1F << 5, + + AARCH64_INSN_HINT_ESB = 0x10 << 5, + AARCH64_INSN_HINT_PSB = 0x11 << 5, + AARCH64_INSN_HINT_TSB = 0x12 << 5, + AARCH64_INSN_HINT_CSDB = 0x14 << 5, + + AARCH64_INSN_HINT_BTI = 0x20 << 5, + AARCH64_INSN_HINT_BTIC = 0x22 << 5, + AARCH64_INSN_HINT_BTIJ = 0x24 << 5, + AARCH64_INSN_HINT_BTIJC = 0x26 << 5, }; enum aarch64_insn_imm_type { @@ -370,7 +394,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_condition cond); -u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op); +u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op); u32 aarch64_insn_gen_nop(void); u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, enum aarch64_insn_branch_type type); diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 4a9e773a177f..d63d9cd8b4a2 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -574,7 +574,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, offset >> 2); } -u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op) { return aarch64_insn_get_hint_value() | op; } From 07dcd9677c5d0f4735041885d3c3c348cc11b75c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 May 2020 14:13:24 +0100 Subject: [PATCH 062/148] arm64: insn: Provide a better name for aarch64_insn_is_nop() The current aarch64_insn_is_nop() has exactly one caller which uses it solely to identify if the instruction is a HINT that can safely be stepped, requiring us to list things that aren't NOPs and make things more confusing than they need to be. Rename the function to reflect the actual usage and make things more clear. Suggested-by: Mark Rutland Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200504131326.18290-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 2 +- arch/arm64/kernel/insn.c | 3 +-- arch/arm64/kernel/probes/decode-insn.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 575675145fe2..0bc46149e491 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -368,7 +368,7 @@ __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS -bool aarch64_insn_is_nop(u32 insn); +bool aarch64_insn_is_steppable_hint(u32 insn); bool aarch64_insn_is_branch_imm(u32 insn); static inline bool aarch64_insn_is_adr_adrp(u32 insn) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index d63d9cd8b4a2..0829bb5b45ec 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -51,8 +51,7 @@ enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; } -/* NOP is an alias of HINT */ -bool __kprobes aarch64_insn_is_nop(u32 insn) +bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) { if (!aarch64_insn_is_hint(insn)) return false; diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index b78fac9e546c..263d5fba4c8a 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -46,7 +46,7 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * except for the NOP case. */ if (aarch64_insn_is_hint(insn)) - return aarch64_insn_is_nop(insn); + return aarch64_insn_is_steppable_hint(insn); return true; } From c71052cc9e14db90d9a95fe0991ecc72556f1818 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 May 2020 14:13:25 +0100 Subject: [PATCH 063/148] arm64: insn: Don't assume unrecognized HINTs are skippable Currently the kernel assumes that any HINT which it does not explicitly recognise is skippable. This is not robust as new instructions may be added which need special handling, and in any case software should only be using explicit NOP instructions for deliberate NOPs. This has the effect of rendering PAC and BTI instructions unprobeable which means that probes can't be inserted on the first instruction of functions built with those features. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200504131326.18290-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 0829bb5b45ec..15c3f0643e3b 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -57,14 +57,10 @@ bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) return false; switch (insn & 0xFE0) { - case AARCH64_INSN_HINT_YIELD: - case AARCH64_INSN_HINT_WFE: - case AARCH64_INSN_HINT_WFI: - case AARCH64_INSN_HINT_SEV: - case AARCH64_INSN_HINT_SEVL: - return false; - default: + case AARCH64_INSN_HINT_NOP: return true; + default: + return false; } } From 47d67e4d19184ec9f8091c98ad4df0411baa8719 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 May 2020 14:13:26 +0100 Subject: [PATCH 064/148] arm64: insn: Report PAC and BTI instructions as skippable The PAC and BTI instructions can be safely skipped so report them as such, allowing them to be probed. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200504131326.18290-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 15c3f0643e3b..6439af794ec4 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -57,6 +57,23 @@ bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) return false; switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_XPACLRI: + case AARCH64_INSN_HINT_PACIA_1716: + case AARCH64_INSN_HINT_PACIB_1716: + case AARCH64_INSN_HINT_AUTIA_1716: + case AARCH64_INSN_HINT_AUTIB_1716: + case AARCH64_INSN_HINT_PACIAZ: + case AARCH64_INSN_HINT_PACIASP: + case AARCH64_INSN_HINT_PACIBZ: + case AARCH64_INSN_HINT_PACIBSP: + case AARCH64_INSN_HINT_AUTIAZ: + case AARCH64_INSN_HINT_AUTIASP: + case AARCH64_INSN_HINT_AUTIBZ: + case AARCH64_INSN_HINT_AUTIBSP: + case AARCH64_INSN_HINT_BTI: + case AARCH64_INSN_HINT_BTIC: + case AARCH64_INSN_HINT_BTIJ: + case AARCH64_INSN_HINT_BTIJC: case AARCH64_INSN_HINT_NOP: return true; default: From da7bad98eebb7bedcb37cf8c34fbb0de470c2b99 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 4 May 2020 17:59:37 +0530 Subject: [PATCH 065/148] arm64/cpuinfo: Move device_initcall() near cpuinfo_regs_init() This moves device_initcall() near cpuinfo_regs_init() making the calling sequence clear. Besides it is a standard practice to have device_initcall() (any __initcall for that matter) just after the function it actually calls. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Mark Rutland Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Mark Rutland Link: https://lore.kernel.org/r/1588595377-4503-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 86136075ae41..a515d8f3639e 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -311,6 +311,8 @@ static int __init cpuinfo_regs_init(void) } return 0; } +device_initcall(cpuinfo_regs_init); + static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); @@ -403,5 +405,3 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; init_cpu_features(&boot_cpu_data); } - -device_initcall(cpuinfo_regs_init); From caf2cd610dbbfb533c8b84255fbe77dca36229d0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 22 Apr 2020 13:28:23 +0100 Subject: [PATCH 066/148] firmware: arm_sdei: Drop check for /firmware/ node and always register driver As with most of the drivers, let us register this driver unconditionally by dropping the checks for presence of firmware nodes(DT) or entries(ACPI). Further, as mentioned in the commit acafce48b07b ("firmware: arm_sdei: Fix DT platform device creation"), the core takes care of creation of platform device when the appropriate device node is found and probe is called accordingly. Let us check only for the presence of ACPI firmware entry before creating the platform device and flag warning if we fail. Signed-off-by: Sudeep Holla Reviewed-by: James Morse Cc: James Morse Link: https://lore.kernel.org/r/20200422122823.1390-1-sudeep.holla@arm.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 334c8be0c11f..5afd7409e6fa 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1079,26 +1079,9 @@ static struct platform_driver sdei_driver = { .probe = sdei_probe, }; -static bool __init sdei_present_dt(void) -{ - struct device_node *np, *fw_np; - - fw_np = of_find_node_by_name(NULL, "firmware"); - if (!fw_np) - return false; - - np = of_find_matching_node(fw_np, sdei_of_match); - if (!np) - return false; - of_node_put(np); - - return true; -} - static bool __init sdei_present_acpi(void) { acpi_status status; - struct platform_device *pdev; struct acpi_table_header *sdei_table_header; if (acpi_disabled) @@ -1113,20 +1096,24 @@ static bool __init sdei_present_acpi(void) if (ACPI_FAILURE(status)) return false; - pdev = platform_device_register_simple(sdei_driver.driver.name, 0, NULL, - 0); - if (IS_ERR(pdev)) - return false; - return true; } static int __init sdei_init(void) { - if (sdei_present_dt() || sdei_present_acpi()) - platform_driver_register(&sdei_driver); + int ret = platform_driver_register(&sdei_driver); - return 0; + if (!ret && sdei_present_acpi()) { + struct platform_device *pdev; + + pdev = platform_device_register_simple(sdei_driver.driver.name, + 0, NULL, 0); + if (IS_ERR(pdev)) + pr_info("Failed to register ACPI:SDEI platform device %ld\n", + PTR_ERR(pdev)); + } + + return ret; } /* From ebcea694e69270f563e69d82bd9930905c954ef1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Apr 2020 13:56:57 +0200 Subject: [PATCH 067/148] arm64: Sort vendor-specific errata Sort configuration options for vendor-specific errata by vendor, to increase uniformity. Move ARM64_WORKAROUND_REPEAT_TLBI up, as it is also selected by ARM64_ERRATUM_1286807. Acked-by: Mark Rutland Acked-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 72 +++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40fb05d96c60..8d33d7fed6d8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -553,6 +553,9 @@ config ARM64_ERRATUM_1530923 If unsure, say Y. +config ARM64_WORKAROUND_REPEAT_TLBI + bool + config ARM64_ERRATUM_1286807 bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" default y @@ -694,6 +697,35 @@ config CAVIUM_TX2_ERRATUM_219 If unsure, say Y. +config FUJITSU_ERRATUM_010001 + bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly" + default y + help + This option adds a workaround for Fujitsu-A64FX erratum E#010001. + On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory + accesses may cause undefined fault (Data abort, DFSC=0b111111). + This fault occurs under a specific hardware condition when a + load/store instruction performs an address translation using: + case-1 TTBR0_EL1 with TCR_EL1.NFD0 == 1. + case-2 TTBR0_EL2 with TCR_EL2.NFD0 == 1. + case-3 TTBR1_EL1 with TCR_EL1.NFD1 == 1. + case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1. + + The workaround is to ensure these bits are clear in TCR_ELx. + The workaround only affects the Fujitsu-A64FX. + + If unsure, say Y. + +config HISILICON_ERRATUM_161600802 + bool "Hip07 161600802: Erroneous redistributor VLPI base" + default y + help + The HiSilicon Hip07 SoC uses the wrong redistributor base + when issued ITS commands such as VMOVP and VMAPP, and requires + a 128kB offset to be applied to the target address in this commands. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y @@ -705,9 +737,6 @@ config QCOM_FALKOR_ERRATUM_1003 is unchanged. Work around the erratum by invalidating the walk cache entries for the trampoline before entering the kernel proper. -config ARM64_WORKAROUND_REPEAT_TLBI - bool - config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" default y @@ -729,25 +758,6 @@ config QCOM_QDF2400_ERRATUM_0065 If unsure, say Y. -config SOCIONEXT_SYNQUACER_PREITS - bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" - default y - help - Socionext Synquacer SoCs implement a separate h/w block to generate - MSI doorbell writes with non-zero values for the device ID. - - If unsure, say Y. - -config HISILICON_ERRATUM_161600802 - bool "Hip07 161600802: Erroneous redistributor VLPI base" - default y - help - The HiSilicon Hip07 SoC uses the wrong redistributor base - when issued ITS commands such as VMOVP and VMAPP, and requires - a 128kB offset to be applied to the target address in this commands. - - If unsure, say Y. - config QCOM_FALKOR_ERRATUM_E1041 bool "Falkor E1041: Speculative instruction fetches might cause errant memory access" default y @@ -758,22 +768,12 @@ config QCOM_FALKOR_ERRATUM_E1041 If unsure, say Y. -config FUJITSU_ERRATUM_010001 - bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly" +config SOCIONEXT_SYNQUACER_PREITS + bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" default y help - This option adds a workaround for Fujitsu-A64FX erratum E#010001. - On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory - accesses may cause undefined fault (Data abort, DFSC=0b111111). - This fault occurs under a specific hardware condition when a - load/store instruction performs an address translation using: - case-1 TTBR0_EL1 with TCR_EL1.NFD0 == 1. - case-2 TTBR0_EL2 with TCR_EL2.NFD0 == 1. - case-3 TTBR1_EL1 with TCR_EL1.NFD1 == 1. - case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1. - - The workaround is to ensure these bits are clear in TCR_ELx. - The workaround only affects the Fujitsu-A64FX. + Socionext Synquacer SoCs implement a separate h/w block to generate + MSI doorbell writes with non-zero values for the device ID. If unsure, say Y. From 433022b58e628418020155e1f6a3d59c06a474c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 May 2020 11:45:21 +0100 Subject: [PATCH 068/148] arm64: cpufeature: Extend comment to describe absence of field info When a feature register field is omitted from the description of the register, the corresponding bits are treated as STRICT RES0, including for KVM guests. This is subtly different to declaring the field as HIDDEN/STRICT/EXACT/0, so update the comment to call this out. Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c1d44d127baa..9b05843d67af 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -53,6 +53,11 @@ * arbitrary physical CPUs, but some features not present on the host are * also advertised and emulated. Look at sys_reg_descs[] for the gory * details. + * + * - If the arm64_ftr_bits[] for a register has a missing field, then this + * field is treated as STRICT RES0, including for read_sanitised_ftr_reg(). + * This is stronger than FTR_HIDDEN and can be used to hide features from + * KVM guests. */ #define pr_fmt(fmt) "CPU features: " fmt From eeb2d87ece1219cff33e5107c2a6d57803354d15 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 May 2020 13:08:02 +0100 Subject: [PATCH 069/148] arm64: cpufeature: Group indexed system register definitions by name Some system registers contain an index in the name (e.g. ID_MMFR_EL1) and, while this index often follows the register encoding, newer additions to the architecture are necessarily tacked on the end. Sorting these registers by encoding therefore becomes a bit of a mess. Group the indexed system register definitions by name so that it's easier to read and will hopefully reduce the chance of us accidentally introducing duplicate definitions in the future. Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 2dd3f4ca9780..194684301df0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -105,6 +105,10 @@ #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +/* + * System registers, organised loosely by encoding but grouped together + * where the architected name contains an index. e.g. ID_MMFR_EL1. + */ #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -140,6 +144,7 @@ #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) #define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) @@ -147,7 +152,6 @@ #define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) -#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) From ab8ad279ceac4fc78ae4dcf1a26326e05695e537 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 4 May 2020 18:05:18 +0100 Subject: [PATCH 070/148] arm64: cacheflush: Fix KGDB trap detection flush_icache_range() contains a bodge to avoid issuing IPIs when the kgdb trap handler is running because issuing IPIs is unsafe (and not needed) in this execution context. However the current test, based on kgdb_connected is flawed: it both over-matches and under-matches. The over match occurs because kgdb_connected is set when gdb attaches to the stub and remains set during normal running. This is relatively harmelss because in almost all cases irq_disabled() will be false. The under match is more serious. When kdb is used instead of kgdb to access the debugger then kgdb_connected is not set in all the places that the debug core updates sw breakpoints (and hence flushes the icache). This can lead to deadlock. Fix by replacing the ad-hoc check with the proper kgdb macro. This also allows us to drop the #ifdef wrapper. Fixes: 3b8c9f1cdfc5 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings") Signed-off-by: Daniel Thompson Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200504170518.2959478-1-daniel.thompson@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index e6cca3d4acf7..ce50c1f1f1ea 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -79,7 +79,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * IPI all online CPUs so that they undergo a context synchronization * event and are forced to refetch the new instructions. */ -#ifdef CONFIG_KGDB + /* * KGDB performs cache maintenance with interrupts disabled, so we * will deadlock trying to IPI the secondary CPUs. In theory, we can @@ -89,9 +89,9 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * the patching operation, so we don't need extra IPIs here anyway. * In which case, add a KGDB-specific bodge and return early. */ - if (kgdb_connected && irqs_disabled()) + if (in_dbg_master()) return; -#endif + kick_all_cpus_sync(); } From 7e9f5e6629f62865e67b8a02a5b522dd9af890bd Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Thu, 7 May 2020 11:40:49 +0100 Subject: [PATCH 071/148] arm64: vdso: Add --eh-frame-hdr to ldflags LLVM's unwinder depends on the .eh_frame_hdr being present for unwinding. However, when compiling Linux with GCC, the section is not present in the vdso library object and when compiling with Clang, it is present, but it has zero length. With GCC the problem was not spotted because libgcc unwinder does not require the .eh_frame_hdr section to be present. Add --eh-frame-hdr to ldflags to correctly generate and populate the section for both GCC and LLVM. Fixes: 28b1a824a4f44 ("arm64: vdso: Substitute gettimeofday() with C implementation") Reported-by: Tamas Zsoldos Signed-off-by: Vincenzo Frascino Tested-by: Tamas Zsoldos Cc: Will Deacon Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200507104049.47834-1-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index abf61c96edbc..95e9e444ca93 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -22,7 +22,7 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --build-id -n -T + -Bsymbolic --eh-frame-hdr --build-id -n -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING From 717b938e22f8dbf0c02d51cb8e74634fac9b57ef Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:28 +0100 Subject: [PATCH 072/148] arm64: Document why we enable PAC support for leaf functions Document the fact that we enable pointer authentication protection for leaf functions since there is some narrow potential for ROP protection benefits and little overhead has been observed. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20200506195138.22086-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 85e4149cc5d5..921c8ee8552b 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -71,6 +71,9 @@ branch-prot-flags-y += $(call cc-option,-mbranch-protection=none) ifeq ($(CONFIG_ARM64_PTR_AUTH),y) branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all +# We enable additional protection for leaf functions as there is some +# narrow potential for ROP protection benefits and no substantial +# performance impact has been observed. branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=pac-ret+leaf # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the # compiler to generate them and consequently to break the single image contract From 92e2294d870bc9e77592c2454f565c3bd6bb79ad Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:29 +0100 Subject: [PATCH 073/148] arm64: bti: Support building kernel C code using BTI When running with BTI enabled we need to ask the compiler to enable generation of BTI landing pads beyond those generated as a result of pointer authentication instructions being landing pads. Since the two features are practically speaking unlikely to be used separately we will make kernel mode BTI depend on pointer authentication in order to simplify the Makefile. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 921c8ee8552b..4780c86b86af 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -74,7 +74,11 @@ branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address= # We enable additional protection for leaf functions as there is some # narrow potential for ROP protection benefits and no substantial # performance impact has been observed. +ifeq ($(CONFIG_ARM64_BTI_KERNEL),y) +branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=pac-ret+leaf+bti +else branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=pac-ret+leaf +endif # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the # compiler to generate them and consequently to break the single image contract # we pass it only to the assembler. This option is utilized only in case of non From 714a8d02ca4da1479bf0b778fc1951dc88515c3d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:30 +0100 Subject: [PATCH 074/148] arm64: asm: Override SYM_FUNC_START when building the kernel with BTI When the kernel is built for BTI override SYM_FUNC_START and related macros to add a BTI landing pad to the start of all global functions, ensuring that they are BTI safe. The ; at the end of the BTI_x macros is for the benefit of the macro-generated functions in xen-hypercall.S. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/linkage.h | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index ebee3113a62f..b5a7998a6b2a 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,6 +4,52 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +#if defined(CONFIG_ARM64_BTI_KERNEL) && defined(__aarch64__) + +/* + * Since current versions of gas reject the BTI instruction unless we + * set the architecture version to v8.5 we use the hint instruction + * instead. + */ +#define BTI_C hint 34 ; +#define BTI_J hint 36 ; + +/* + * When using in-kernel BTI we need to ensure that assembly functions + * have suitable annotations. Override SYM_FUNC_START to insert a BTI + * landing pad at the start of everything. + */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + BTI_C + +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + BTI_C + +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + BTI_C + +#define SYM_INNER_LABEL(name, linkage) \ + .type name SYM_T_NONE ASM_NL \ + SYM_ENTRY(name, linkage, SYM_A_NONE) \ + BTI_J + +#endif + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. From c8027285e3660e3b76eb2fb75a32f1596064b5e4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:31 +0100 Subject: [PATCH 075/148] arm64: Set GP bit in kernel page tables to enable BTI for the kernel Now that the kernel is built with BTI annotations enable the feature by setting the GP bit in the stage 1 translation tables. This is done based on the features supported by the boot CPU so that we do not need to rewrite the translation tables. In order to avoid potential issues on big.LITTLE systems when there are a mix of BTI and non-BTI capable CPUs in the system when we have enabled kernel mode BTI we change BTI to be a _STRICT_BOOT_CPU_FEATURE when we have kernel BTI. This will prevent any CPUs that don't support BTI being started if the boot CPU supports BTI rather than simply not using BTI as we do when supporting BTI only in userspace. The main concern is the possibility of BTYPE being preserved by a CPU that does not implement BTI when a thread is migrated to it resulting in an incorrect state which could generate an exception when the thread migrates back to a CPU that does support BTI. If we encounter practical systems which mix BTI and non-BTI CPUs we will need to revisit this implementation. Since we currently do not generate landing pads in the BPF JIT we only map the base kernel text in this way. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 3 +++ arch/arm64/kernel/cpufeature.c | 4 ++++ arch/arm64/mm/mmu.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 1305e28225fc..310690332896 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -21,6 +21,7 @@ #ifndef __ASSEMBLY__ +#include #include extern bool arm64_use_ng_mappings; @@ -31,6 +32,8 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +#define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0) + #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b234d6f71cba..d3cc247acf61 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1800,7 +1800,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Branch Target Identification", .capability = ARM64_BTI, +#ifdef CONFIG_ARM64_BTI_KERNEL + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, +#else .type = ARM64_CPUCAP_SYSTEM_FEATURE, +#endif .matches = has_cpuid_feature, .cpu_enable = bti_enable, .sys_reg = SYS_ID_AA64PFR1_EL1, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a374e4f51a62..c299b73dd5e4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -609,6 +609,22 @@ static int __init map_entry_trampoline(void) core_initcall(map_entry_trampoline); #endif +/* + * Open coded check for BTI, only for use to determine configuration + * for early mappings for before the cpufeature code has run. + */ +static bool arm64_early_this_cpu_has_bti(void) +{ + u64 pfr1; + + if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + return false; + + pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1); + return cpuid_feature_extract_unsigned_field(pfr1, + ID_AA64PFR1_BT_SHIFT); +} + /* * Create fine-grained mappings for the kernel. */ @@ -624,6 +640,14 @@ static void __init map_kernel(pgd_t *pgdp) */ pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + /* + * If we have a CPU that supports BTI and a kernel built for + * BTI then mark the kernel executable text as guarded pages + * now so we don't have to rewrite the page tables later. + */ + if (arm64_early_this_cpu_has_bti()) + text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + /* * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings. From fa76cfe65c1d748ef418e930a4b631a03b28f04c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:32 +0100 Subject: [PATCH 076/148] arm64: bpf: Annotate JITed code for BTI In order to extend the protection offered by BTI to all code executing in kernel mode we need to annotate JITed BPF code appropriately for BTI. To do this we need to add a landing pad to the start of each BPF function and also immediately after the function prologue if we are emitting a function which can be tail called. Jumps within BPF functions are all to immediate offsets and therefore do not require landing pads. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-6-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/net/bpf_jit.h | 8 ++++++++ arch/arm64/net/bpf_jit_comp.c | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index eb73f9f72c46..05b477709b5f 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -189,4 +189,12 @@ /* Rn & Rm; set condition flags */ #define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm) +/* HINTs */ +#define A64_HINT(x) aarch64_insn_gen_hint(x) + +/* BTI */ +#define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) +#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) +#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc79de0c794..83fa475c6b42 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -171,7 +171,11 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) #define STACK_ALIGN(sz) (((sz) + 15) & ~15) /* Tail call offset to jump into */ +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +#define PROLOGUE_OFFSET 8 +#else #define PROLOGUE_OFFSET 7 +#endif static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -208,6 +212,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + /* BTI landing pad */ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(A64_BTI_C, ctx); + /* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); @@ -230,6 +238,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) cur_offset, PROLOGUE_OFFSET); return -1; } + + /* BTI landing pad for the tail call, done with a BR */ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(A64_BTI_J, ctx); } ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth); From 67d4a1cd09765db92581dbce8438305ea0e06004 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:33 +0100 Subject: [PATCH 077/148] arm64: mm: Mark executable text as guarded pages When the kernel is built for BTI and running on a system which supports make all executable text guarded pages to ensure that loadable module and JITed BPF code is protected by BTI. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-7-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 250c49008d73..bde08090b838 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -126,13 +126,13 @@ int set_memory_nx(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, __pgprot(PTE_PXN), - __pgprot(0)); + __pgprot(PTE_MAYBE_GP)); } int set_memory_x(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, - __pgprot(0), + __pgprot(PTE_MAYBE_GP), __pgprot(PTE_PXN)); } From 97fed779f2a68937d9590fbbe8ed31d6ebbce5a5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:34 +0100 Subject: [PATCH 078/148] arm64: bti: Provide Kconfig for kernel mode BTI Now that all the code is in place provide a Kconfig option allowing users to enable BTI for the kernel if their toolchain supports it, defaulting it on since this has security benefits. This is a separate configuration option since we currently don't support secondary CPUs that lack BTI if the boot CPU supports it. Code generation issues mean that current GCC 9 versions are not able to produce usable BTI binaries so we disable support for building with GCC versions prior to 10, once a fix is backported to GCC 9 the dependencies will be updated. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-8-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f199d8146d4..77d5fa96f9d0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1610,6 +1610,25 @@ config ARM64_BTI BTI, such binaries can still run, but you get no additional enforcement of branch destinations. +config ARM64_BTI_KERNEL + bool "Use Branch Target Identification for kernel" + default y + depends on ARM64_BTI + depends on ARM64_PTR_AUTH + depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI + depends on !CC_IS_GCC || GCC_VERSION >= 100000 + depends on !(CC_IS_CLANG && GCOV_KERNEL) + depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) + help + Build the kernel with Branch Target Identification annotations + and enable enforcement of this for kernel code. When this option + is enabled and the system supports BTI all kernel code including + modular code must have BTI enabled. + +config CC_HAS_BRANCH_PROT_PAC_RET_BTI + # GCC 9 or later, clang 8 or later + def_bool $(cc-option,-mbranch-protection=pac-ret+leaf+bti) + config ARM64_E0PD bool "Enable support for E0PD" default y From 3a9b136c998fc990cbde1a3cbc343050de704d6b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:35 +0100 Subject: [PATCH 079/148] arm64: asm: Provide a mechanism for generating ELF note for BTI ELF files built for BTI should have a program property note section which identifies them as such. The linker expects to find this note in all object files it is linking into a BTI annotated output, the compiler will ensure that this happens for C files but for assembler files we need to do this in the source so provide a macro which can be used for this purpose. To support likely future requirements for additional notes we split the defininition of the flags to set for BTI code from the macro that creates the note itself. This is mainly for use in the vDSO which should be a normal ELF shared library and should therefore include BTI annotations when built for BTI. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20200506195138.22086-9-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bff325117b4..54d181177656 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -736,4 +736,54 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .Lyield_out_\@ : .endm +/* + * This macro emits a program property note section identifying + * architecture features which require special handling, mainly for + * use in assembly files included in the VDSO. + */ + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + +#ifdef CONFIG_ARM64_BTI_KERNEL +#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT \ + ((GNU_PROPERTY_AARCH64_FEATURE_1_BTI | \ + GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) +#endif + +#ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT +.macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: + /* + * This is described with an array of char in the Linux API + * spec but the text and all other usage (including binutils, + * clang and GCC) treat this as a 32 bit value so no swizzling + * is required for big endian. + */ + .long \feat +5: + .align 3 +6: + .popsection +.endm + +#else +.macro emit_aarch64_feature_1_and, feat=0 +.endm + +#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + #endif /* __ASM_ASSEMBLER_H */ From a6aadc28278a05f394c9353460ec74610ddf7ac6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:36 +0100 Subject: [PATCH 080/148] arm64: vdso: Annotate for BTI Generate BTI annotations for all assembly files included in the 64 bit vDSO. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-10-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/note.S | 3 +++ arch/arm64/kernel/vdso/sigreturn.S | 3 +++ arch/arm64/kernel/vdso/vdso.S | 3 +++ 3 files changed, 9 insertions(+) diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S index 0ce6ec75a525..3d4e82290c80 100644 --- a/arch/arm64/kernel/vdso/note.S +++ b/arch/arm64/kernel/vdso/note.S @@ -12,9 +12,12 @@ #include #include #include +#include ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END BUILD_SALT + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index 12324863d5c2..3fb13b81f780 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -9,6 +9,7 @@ */ #include +#include #include .text @@ -24,3 +25,5 @@ SYM_FUNC_START(__kernel_rt_sigreturn) svc #0 .cfi_endproc SYM_FUNC_END(__kernel_rt_sigreturn) + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index d1414fee5274..c4b1990bf2be 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -8,6 +8,7 @@ #include #include #include +#include #include .globl vdso_start, vdso_end @@ -19,3 +20,5 @@ vdso_start: vdso_end: .previous + +emit_aarch64_feature_1_and From 5e02a1887fce5108eb8ffe7957f66f3c4e0fded9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:37 +0100 Subject: [PATCH 081/148] arm64: vdso: Force the vDSO to be linked as BTI when built for BTI When the kernel and hence vDSO are built with BTI enabled force the linker to link the vDSO as BTI. This will cause the linker to warn if any of the input files do not have the BTI annotation, ensuring that we don't silently fail to provide a vDSO that is built and annotated for BTI when the kernel is being built with BTI. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-11-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index dd2514bb1511..51ad1cce8133 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -17,8 +17,10 @@ obj-vdso := vgettimeofday.o note.o sigreturn.o targets := $(obj-vdso) vdso.so vdso.so.dbg obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti + ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - --build-id -n -T + --build-id -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING From bf740a905ffedda60d2dacbfa0c3aca81490fda1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 May 2020 20:51:38 +0100 Subject: [PATCH 082/148] arm64: vdso: Map the vDSO text with guarded pages when built for BTI The kernel is responsible for mapping the vDSO into userspace processes, including mapping the text section as executable. Handle the mapping of the vDSO for BTI similarly, mapping the text section as guarded pages so the BTI annotations in the vDSO become effective when they are present. This will mean that we can have BTI active for the vDSO in processes that do not otherwise support BTI. This should not be an issue for any expected use of the vDSO. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200506195138.22086-12-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 033a48f30dbb..3b0289d5cccb 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -142,6 +142,7 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, int uses_interp) { unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + unsigned long gp_flags = 0; void *ret; vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; @@ -160,10 +161,13 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, if (IS_ERR(ret)) goto up_fail; + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti()) + gp_flags = VM_ARM64_BTI; + vdso_base += PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|gp_flags| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_lookup[arch_index].cm); if (IS_ERR(ret)) From 579d1b3faa3735e781ff74aac0afd598515dbc63 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Fri, 8 May 2020 11:15:44 -0700 Subject: [PATCH 083/148] arm64: insn: Fix two bugs in encoding 32-bit logical immediates This patch fixes two issues present in the current function for encoding arm64 logical immediates when using the 32-bit variants of instructions. First, the code does not correctly reject an all-ones 32-bit immediate, and returns an undefined instruction encoding. Second, the code incorrectly rejects some 32-bit immediates that are actually encodable as logical immediates. The root cause is that the code uses a default mask of 64-bit all-ones, even for 32-bit immediates. This causes an issue later on when the default mask is used to fill the top bits of the immediate with ones, shown here: /* * Pattern: 0..01..10..01..1 * * Fill the unused top bits with ones, and check if * the result is a valid immediate (all ones with a * contiguous ranges of zeroes). */ imm |= ~mask; if (!range_of_ones(~imm)) return AARCH64_BREAK_FAULT; To see the problem, consider an immediate of the form 0..01..10..01..1, where the upper 32 bits are zero, such as 0x80000001. The code checks if ~(imm | ~mask) contains a range of ones: the incorrect mask yields 1..10..01..10..0, which fails the check; the correct mask yields 0..01..10..0, which succeeds. The fix for both issues is to generate a correct mask based on the instruction immediate size, and use the mask to check for all-ones, all-zeroes, and values wider than the mask. Currently, arch/arm64/kvm/va_layout.c is the only user of this function, which uses 64-bit immediates and therefore won't trigger these bugs. We tested the new code against llvm-mc with all 1,302 encodable 32-bit logical immediates and all 5,334 encodable 64-bit logical immediates. Fixes: ef3935eeebff ("arm64: insn: Add encoder for bitwise operations using literals") Suggested-by: Will Deacon Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20200508181547.24783-2-luke.r.nels@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 4a9e773a177f..cc2f3d901c91 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -1535,16 +1535,10 @@ static u32 aarch64_encode_immediate(u64 imm, u32 insn) { unsigned int immr, imms, n, ones, ror, esz, tmp; - u64 mask = ~0UL; - - /* Can't encode full zeroes or full ones */ - if (!imm || !~imm) - return AARCH64_BREAK_FAULT; + u64 mask; switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - if (upper_32_bits(imm)) - return AARCH64_BREAK_FAULT; esz = 32; break; case AARCH64_INSN_VARIANT_64BIT: @@ -1556,6 +1550,12 @@ static u32 aarch64_encode_immediate(u64 imm, return AARCH64_BREAK_FAULT; } + mask = GENMASK(esz - 1, 0); + + /* Can't encode full zeroes, full ones, or value wider than the mask */ + if (!imm || imm == mask || imm & ~mask) + return AARCH64_BREAK_FAULT; + /* * Inverse of Replicate(). Try to spot a repeating pattern * with a pow2 stride. From fd49591cb49b72abd1b665222a635ccb17df7923 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Fri, 8 May 2020 11:15:45 -0700 Subject: [PATCH 084/148] bpf, arm64: Optimize AND,OR,XOR,JSET BPF_K using arm64 logical immediates The current code for BPF_{AND,OR,XOR,JSET} BPF_K loads the immediate to a temporary register before use. This patch changes the code to avoid using a temporary register when the BPF immediate is encodable using an arm64 logical immediate instruction. If the encoding fails (due to the immediate not being encodable), it falls back to using a temporary register. Example of generated code for BPF_ALU32_IMM(BPF_AND, R0, 0x80000001): without optimization: 24: mov w10, #0x8000ffff 28: movk w10, #0x1 2c: and w7, w7, w10 with optimization: 24: and w7, w7, #0x80000001 Since the encoding process is quite complex, the JIT reuses existing functionality in arch/arm64/kernel/insn.c for encoding logical immediates rather than duplicate it in the JIT. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20200508181547.24783-3-luke.r.nels@gmail.com Signed-off-by: Will Deacon --- arch/arm64/net/bpf_jit.h | 14 +++++++++++++ arch/arm64/net/bpf_jit_comp.c | 37 +++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index eb73f9f72c46..f36a779949e6 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -189,4 +189,18 @@ /* Rn & Rm; set condition flags */ #define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm) +/* Logical (immediate) */ +#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \ + u64 imm64 = (sf) ? (u64)imm : (u64)(u32)imm; \ + aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_##type, \ + A64_VARIANT(sf), Rn, Rd, imm64); \ +}) +/* Rd = Rn OP imm */ +#define A64_AND_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND) +#define A64_ORR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, ORR) +#define A64_EOR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, EOR) +#define A64_ANDS_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND_SETFLAGS) +/* Rn & imm; set condition flags */ +#define A64_TST_I(sf, Rn, imm) A64_ANDS_I(sf, A64_ZR, Rn, imm) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc79de0c794..083e5d8a5e2c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -356,6 +356,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const bool isdw = BPF_SIZE(code) == BPF_DW; u8 jmp_cond, reg; s32 jmp_offset; + u32 a64_insn; #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -488,18 +489,33 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_AND(is64, dst, dst, tmp), ctx); + a64_insn = A64_AND_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_AND(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_ORR(is64, dst, dst, tmp), ctx); + a64_insn = A64_ORR_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_ORR(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_EOR(is64, dst, dst, tmp), ctx); + a64_insn = A64_EOR_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_EOR(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: @@ -628,8 +644,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_TST(is64, dst, tmp), ctx); + a64_insn = A64_TST_I(is64, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_TST(is64, dst, tmp), ctx); + } goto emit_cond_jmp; /* function call */ case BPF_JMP | BPF_CALL: From fd868f14818901821699988fdac680ebd80cd360 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Fri, 8 May 2020 11:15:46 -0700 Subject: [PATCH 085/148] bpf, arm64: Optimize ADD,SUB,JMP BPF_K using arm64 add/sub immediates The current code for BPF_{ADD,SUB} BPF_K loads the BPF immediate to a temporary register before performing the addition/subtraction. Similarly, BPF_JMP BPF_K cases load the immediate to a temporary register before comparison. This patch introduces optimizations that use arm64 immediate add, sub, cmn, or cmp instructions when the BPF immediate fits. If the immediate does not fit, it falls back to using a temporary register. Example of generated code for BPF_ALU64_IMM(BPF_ADD, R0, 2): without optimization: 24: mov x10, #0x2 28: add x7, x7, x10 with optimization: 24: add x7, x7, #0x2 The code could use A64_{ADD,SUB}_I directly and check if it returns AARCH64_BREAK_FAULT, similar to how logical immediates are handled. However, aarch64_insn_gen_add_sub_imm from insn.c prints error messages when the immediate does not fit, and it's simpler to check if the immediate fits ahead of time. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20200508181547.24783-4-luke.r.nels@gmail.com Signed-off-by: Will Deacon --- arch/arm64/net/bpf_jit.h | 8 ++++++++ arch/arm64/net/bpf_jit_comp.c | 36 +++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index f36a779949e6..923ae7ff68c8 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,6 +100,14 @@ /* Rd = Rn OP imm12 */ #define A64_ADD_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD) #define A64_SUB_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB) +#define A64_ADDS_I(sf, Rd, Rn, imm12) \ + A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD_SETFLAGS) +#define A64_SUBS_I(sf, Rd, Rn, imm12) \ + A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB_SETFLAGS) +/* Rn + imm12; set condition flags */ +#define A64_CMN_I(sf, Rn, imm12) A64_ADDS_I(sf, A64_ZR, Rn, imm12) +/* Rn - imm12; set condition flags */ +#define A64_CMP_I(sf, Rn, imm12) A64_SUBS_I(sf, A64_ZR, Rn, imm12) /* Rd = Rn */ #define A64_MOV(sf, Rd, Rn) A64_ADD_I(sf, Rd, Rn, 0) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 083e5d8a5e2c..561a2fea9cdd 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -167,6 +167,12 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from; } +static bool is_addsub_imm(u32 imm) +{ + /* Either imm12 or shifted imm12. */ + return !(imm & ~0xfff) || !(imm & ~0xfff000); +} + /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) @@ -479,13 +485,25 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* dst = dst OP imm */ case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_ADD(is64, dst, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_ADD_I(is64, dst, dst, imm), ctx); + } else if (is_addsub_imm(-imm)) { + emit(A64_SUB_I(is64, dst, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_ADD(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_SUB_I(is64, dst, dst, imm), ctx); + } else if (is_addsub_imm(-imm)) { + emit(A64_ADD_I(is64, dst, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: @@ -639,8 +657,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_CMP(is64, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_CMP_I(is64, dst, imm), ctx); + } else if (is_addsub_imm(-imm)) { + emit(A64_CMN_I(is64, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_CMP(is64, dst, tmp), ctx); + } goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: From 203b1152d18c04823f73b843eb3b0ad646e4dbb2 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 11 May 2020 18:31:55 +0530 Subject: [PATCH 086/148] arm64/crash_core: Export KERNELPACMASK in vmcoreinfo Recently arm64 linux kernel added support for Armv8.3-A Pointer Authentication feature. If this feature is enabled in the kernel and the hardware supports address authentication then the return addresses are signed and stored in the stack to prevent ROP kind of attack. Kdump tool will now dump the kernel with signed lr values in the stack. Any user analysis tool for this kernel dump may need the kernel pac mask information in vmcoreinfo to generate the correct return address for stacktrace purpose as well as to resolve the symbol name. This patch is similar to commit ec6e822d1a22d0eef ("arm64: expose user PAC bit positions via ptrace") which exposes pac mask information via ptrace interfaces. The config gaurd ARM64_PTR_AUTH is removed form asm/compiler.h so macros like ptrauth_kernel_pac_mask can be used ungaurded. This config protection is confusing as the pointer authentication feature may be missing at runtime even though this config is present. Signed-off-by: Amit Daniel Kachhap Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1589202116-18265-1-git-send-email-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/compiler.h | 4 ---- arch/arm64/kernel/crash_core.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index eece20d2c55f..51a7ce87cdfe 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -2,8 +2,6 @@ #ifndef __ASM_COMPILER_H #define __ASM_COMPILER_H -#if defined(CONFIG_ARM64_PTR_AUTH) - /* * The EL0/EL1 pointer bits used by a pointer authentication code. * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. @@ -19,6 +17,4 @@ #define __builtin_return_address(val) \ (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) -#endif /* CONFIG_ARM64_PTR_AUTH */ - #endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index ca4c3e12d8c5..1f646b07e3e9 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -5,6 +5,7 @@ */ #include +#include #include void arch_crash_save_vmcoreinfo(void) @@ -16,4 +17,7 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n", + system_supports_address_auth() ? + ptrauth_kernel_pac_mask() : 0); } From c0fc00ec6304a8e2e438ca1688b243815084dc50 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 11 May 2020 18:31:56 +0530 Subject: [PATCH 087/148] Documentation/vmcoreinfo: Add documentation for 'KERNELPACMASK' Add documentation for KERNELPACMASK variable being added to the vmcoreinfo. It indicates the PAC bits mask information of signed kernel pointers if Armv8.3-A Pointer Authentication feature is present. Signed-off-by: Amit Daniel Kachhap Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Dave Young Cc: Baoquan He Link: https://lore.kernel.org/r/1589202116-18265-2-git-send-email-amit.kachhap@arm.com Signed-off-by: Will Deacon --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 007a6b86e0ee..e4ee8b2db604 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -393,6 +393,12 @@ KERNELOFFSET The kernel randomization offset. Used to compute the page offset. If KASLR is disabled, this value is zero. +KERNELPACMASK +------------- + +The mask to extract the Pointer Authentication Code from a kernel virtual +address. + arm === From 44cdc7b16e0a6a69a170cf98006aba1c1359ee3b Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 8 May 2020 11:56:38 +0800 Subject: [PATCH 088/148] ACPI: IORT: Add extra message "applying workaround" for off-by-1 issue As we already applied a workaround for the off-by-1 issue, it's good to add extra message "applying workaround" to make people less uneasy to see FW_BUG message in the boot log. Signed-off-by: Hanjun Guo Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/1588910198-8348-1-git-send-email-guohanjun@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index b011d25af676..6e445bc55537 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -332,6 +332,8 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, map, rid_in); if (rid_in != map->input_base) return -ENXIO; + + pr_err(FW_BUG "applying workaround.\n"); } *rid_out = map->output_base + (rid_in - map->input_base); From 3a88d7c5c9448d24785ce862334bde2d5dc39aca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 May 2020 12:45:40 +0100 Subject: [PATCH 089/148] arm64: kconfig: Update and comment GCC version check for kernel BTI Some versions of GCC are known to suffer from a BTI code generation bug, meaning that CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI cannot be solely used to determine whether or not we can compile with kernel with BTI enabled. Update the BTI Kconfig entry to refer to the relevant GCC bugzilla entry (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697) and update the check now that the fix has been merged into GCC release 10.1. Acked-by: Mark Brown Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 77d5fa96f9d0..e058854bcefc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1616,7 +1616,8 @@ config ARM64_BTI_KERNEL depends on ARM64_BTI depends on ARM64_PTR_AUTH depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI - depends on !CC_IS_GCC || GCC_VERSION >= 100000 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 + depends on !CC_IS_GCC || GCC_VERSION >= 100100 depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help From 357dd8a2aff25270971e11a37ddec807f37488e9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 12 May 2020 16:52:55 +0200 Subject: [PATCH 090/148] arm64: cpufeature: Add "or" to mitigations for multiple errata Several actions are not mitigations for a single erratum, but for multiple errata. However, printing a line like CPU features: detected: ARM errata 1165522, 1530923 may give the false impression that all three listed errata have been detected. This can confuse the user, who may think his Cortex-A55 is suddenly affected by a Cortex-A76 erratum. Add "or" to all descriptions for mitigations for multiple errata, to make it clear that only one or more of the errata printed are applicable, and not necessarily all of them. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200512145255.5520-1-geert+renesas@glider.be Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index df56d2295d16..c8cb98a4fa6d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -774,7 +774,7 @@ static const struct midr_range erratum_speculative_at_vhe_list[] = { const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { - .desc = "ARM errata 826319, 827319, 824069, 819472", + .desc = "ARM errata 826319, 827319, 824069, or 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, ERRATA_MIDR_RANGE_LIST(workaround_clean_cache), .cpu_enable = cpu_enable_cache_maint_trap, @@ -856,7 +856,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm erratum 1009, ARM erratum 1286807", + .desc = "Qualcomm erratum 1009, or ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, @@ -899,7 +899,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE { - .desc = "ARM errata 1165522, 1530923", + .desc = "ARM errata 1165522 or 1530923", .capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE, ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list), }, From e4e9f6dfeedc86afef2c3fa4102d274862fe2cf9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 12 May 2020 12:39:50 +0100 Subject: [PATCH 091/148] arm64: bti: Fix support for userspace only BTI When setting PTE_MAYBE_GP we check system_supports_bti() but this is true for systems where only CONFIG_BTI is set causing us to enable BTI on some kernel text. Add an extra check for the kernel mode option, using an ifdef due to line length. Fixes: c8027285e366 ("arm64: Set GP bit in kernel page tables to enable BTI for the kernel") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20200512113950.29996-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 310690332896..2e7e0f452301 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -32,7 +32,15 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +/* + * If we have userspace only BTI we don't want to mark kernel pages + * guarded even if the system does support BTI. + */ +#ifdef CONFIG_ARM64_BTI_KERNEL #define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0) +#else +#define PTE_MAYBE_GP 0 +#endif #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) From d08b9f0ca6605e13dcb48f04e55a30545b3c71eb Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:07 -0700 Subject: [PATCH 092/148] scs: Add support for Clang's Shadow Call Stack (SCS) This change adds generic support for Clang's Shadow Call Stack, which uses a shadow stack to protect return addresses from being overwritten by an attacker. Details are available here: https://clang.llvm.org/docs/ShadowCallStack.html Note that security guarantees in the kernel differ from the ones documented for user space. The kernel must store addresses of shadow stacks in memory, which means an attacker capable reading and writing arbitrary memory may be able to locate them and hijack control flow by modifying the stacks. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Reviewed-by: Miguel Ojeda [will: Numerous cosmetic changes] Signed-off-by: Will Deacon --- Makefile | 6 +++ arch/Kconfig | 24 ++++++++++++ include/linux/compiler-clang.h | 4 ++ include/linux/compiler_types.h | 4 ++ include/linux/scs.h | 68 ++++++++++++++++++++++++++++++++++ init/init_task.c | 8 ++++ kernel/Makefile | 1 + kernel/fork.c | 9 +++++ kernel/sched/core.c | 2 + kernel/scs.c | 65 ++++++++++++++++++++++++++++++++ 10 files changed, 191 insertions(+) create mode 100644 include/linux/scs.h create mode 100644 kernel/scs.c diff --git a/Makefile b/Makefile index 679f302a8b8b..33dc0d0cdd08 100644 --- a/Makefile +++ b/Makefile @@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone) endif +ifdef CONFIG_SHADOW_CALL_STACK +CC_FLAGS_SCS := -fsanitize=shadow-call-stack +KBUILD_CFLAGS += $(CC_FLAGS_SCS) +export CC_FLAGS_SCS +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) diff --git a/arch/Kconfig b/arch/Kconfig index 786a85d4ad40..334a3d9b19df 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -533,6 +533,30 @@ config STACKPROTECTOR_STRONG about 20% of all kernel functions, which increases the kernel code size by about 2%. +config ARCH_SUPPORTS_SHADOW_CALL_STACK + bool + help + An architecture should select this if it supports Clang's Shadow + Call Stack, has asm/scs.h, and implements runtime support for shadow + stack switching. + +config SHADOW_CALL_STACK + bool "Clang Shadow Call Stack" + depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK + help + This option enables Clang's Shadow Call Stack, which uses a + shadow stack to protect function return addresses from being + overwritten by an attacker. More information can be found in + Clang's documentation: + + https://clang.llvm.org/docs/ShadowCallStack.html + + Note that security guarantees in the kernel differ from the + ones documented for user space. The kernel must store addresses + of shadow stacks in memory, which means an attacker capable of + reading and writing arbitrary memory may be able to locate them + and hijack control flow by modifying the stacks. + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 333a6695a918..790c0c6b8552 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -42,3 +42,7 @@ * compilers, like ICC. */ #define barrier() __asm__ __volatile__("" : : : "memory") + +#if __has_feature(shadow_call_stack) +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..97b62f47a80d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -193,6 +193,10 @@ struct ftrace_likely_data { # define randomized_struct_fields_end #endif +#ifndef __noscs +# define __noscs +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/scs.h b/include/linux/scs.h new file mode 100644 index 000000000000..3f3662621a27 --- /dev/null +++ b/include/linux/scs.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#ifndef _LINUX_SCS_H +#define _LINUX_SCS_H + +#include +#include +#include +#include + +#ifdef CONFIG_SHADOW_CALL_STACK + +/* + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit + * architecture) provided ~40% safety margin on stack usage while keeping + * memory allocation overhead reasonable. + */ +#define SCS_SIZE SZ_1K +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO) + +/* An illegal pointer value to mark the end of the shadow stack. */ +#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) + +#define task_scs(tsk) (task_thread_info(tsk)->scs_base) +#define task_scs_offset(tsk) (task_thread_info(tsk)->scs_offset) + +void scs_init(void); +int scs_prepare(struct task_struct *tsk, int node); +void scs_release(struct task_struct *tsk); + +static inline void scs_task_reset(struct task_struct *tsk) +{ + /* + * Reset the shadow stack to the base address in case the task + * is reused. + */ + task_scs_offset(tsk) = 0; +} + +static inline unsigned long *__scs_magic(void *s) +{ + return (unsigned long *)(s + SCS_SIZE) - 1; +} + +static inline bool scs_corrupted(struct task_struct *tsk) +{ + unsigned long *magic = __scs_magic(task_scs(tsk)); + + return (task_scs_offset(tsk) >= SCS_SIZE - 1 || + READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC); +} + +#else /* CONFIG_SHADOW_CALL_STACK */ + +static inline void scs_init(void) {} +static inline void scs_task_reset(struct task_struct *tsk) {} +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } +static inline bool scs_corrupted(struct task_struct *tsk) { return false; } +static inline void scs_release(struct task_struct *tsk) {} + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* _LINUX_SCS_H */ diff --git a/init/init_task.c b/init/init_task.c index bd403ed3e418..169e34066d35 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,13 @@ static struct sighand_struct init_sighand = { .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), }; +#ifdef CONFIG_SHADOW_CALL_STACK +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] + __init_task_data = { + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC +}; +#endif + /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) diff --git a/kernel/Makefile b/kernel/Makefile index 4cb4130ced32..c332eb9d4841 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/fork.c b/kernel/fork.c index 8c700f881d92..f6339f9d232d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include @@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk) void free_task(struct task_struct *tsk) { + scs_release(tsk); + #ifndef CONFIG_THREAD_INFO_IN_TASK /* * The task is finally done with both the stack and thread_info, @@ -840,6 +843,8 @@ void __init fork_init(void) NULL, free_vm_stack_cache); #endif + scs_init(); + lockdep_init_task(&init_task); uprobes_init(); } @@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (err) goto free_stack; + err = scs_prepare(tsk, node); + if (err) + goto free_stack; + #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9a2fbf98fd6f..934e03cfaec7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -6040,6 +6041,7 @@ void init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; + scs_task_reset(idle); kasan_unpoison_task_stack(idle); #ifdef CONFIG_SMP diff --git a/kernel/scs.c b/kernel/scs.c new file mode 100644 index 000000000000..38f8f31c9451 --- /dev/null +++ b/kernel/scs.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include +#include +#include +#include + +static struct kmem_cache *scs_cache; + +static void *scs_alloc(int node) +{ + void *s; + + s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); + if (s) { + *__scs_magic(s) = SCS_END_MAGIC; + /* + * Poison the allocation to catch unintentional accesses to + * the shadow stack when KASAN is enabled. + */ + kasan_poison_object_data(scs_cache, s); + } + + return s; +} + +static void scs_free(void *s) +{ + kasan_unpoison_object_data(scs_cache, s); + kmem_cache_free(scs_cache, s); +} + +void __init scs_init(void) +{ + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL); +} + +int scs_prepare(struct task_struct *tsk, int node) +{ + void *s = scs_alloc(node); + + if (!s) + return -ENOMEM; + + task_scs(tsk) = s; + task_scs_offset(tsk) = 0; + + return 0; +} + +void scs_release(struct task_struct *tsk) +{ + void *s = task_scs(tsk); + + if (!s) + return; + + WARN(scs_corrupted(tsk), "corrupted shadow stack detected when freeing task\n"); + scs_free(s); +} From 628d06a48f57c36abdc2a024930212e654a501b7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:08 -0700 Subject: [PATCH 093/148] scs: Add page accounting for shadow call stack allocations This change adds accounting for the memory allocated for shadow stacks. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Will Deacon Signed-off-by: Will Deacon --- drivers/base/node.c | 6 ++++++ fs/proc/meminfo.c | 4 ++++ include/linux/mmzone.h | 3 +++ kernel/scs.c | 15 +++++++++++++++ mm/page_alloc.c | 6 ++++++ mm/vmstat.c | 3 +++ 6 files changed, 37 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index 10d7e818e118..50b8c0d43859 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev, "Node %d AnonPages: %8lu kB\n" "Node %d Shmem: %8lu kB\n" "Node %d KernelStack: %8lu kB\n" +#ifdef CONFIG_SHADOW_CALL_STACK + "Node %d ShadowCallStack:%8lu kB\n" +#endif "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" @@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(i.sharedram), nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), +#endif nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 8c1f1bb1a5ce..09cd51c8d23d 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "SUnreclaim: ", sunreclaim); seq_printf(m, "KernelStack: %8lu kB\n", global_zone_page_state(NR_KERNEL_STACK_KB)); +#ifdef CONFIG_SHADOW_CALL_STACK + seq_printf(m, "ShadowCallStack:%8lu kB\n", + global_zone_page_state(NR_KERNEL_SCS_KB)); +#endif show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b9de7d220fb..acffc3bc6178 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -156,6 +156,9 @@ enum zone_stat_item { NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_KB, /* measured in KiB */ +#endif /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) diff --git a/kernel/scs.c b/kernel/scs.c index 38f8f31c9451..6d2f983ac54e 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -6,8 +6,10 @@ */ #include +#include #include #include +#include #include static struct kmem_cache *scs_cache; @@ -40,6 +42,17 @@ void __init scs_init(void) scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL); } +static struct page *__scs_page(struct task_struct *tsk) +{ + return virt_to_page(task_scs(tsk)); +} + +static void scs_account(struct task_struct *tsk, int account) +{ + mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_KB, + account * (SCS_SIZE / 1024)); +} + int scs_prepare(struct task_struct *tsk, int node) { void *s = scs_alloc(node); @@ -49,6 +62,7 @@ int scs_prepare(struct task_struct *tsk, int node) task_scs(tsk) = s; task_scs_offset(tsk) = 0; + scs_account(tsk, 1); return 0; } @@ -61,5 +75,6 @@ void scs_release(struct task_struct *tsk) return; WARN(scs_corrupted(tsk), "corrupted shadow stack detected when freeing task\n"); + scs_account(tsk, -1); scs_free(s); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 69827d4fa052..83743d7a6177 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " managed:%lukB" " mlocked:%lukB" " kernel_stack:%lukB" +#ifdef CONFIG_SHADOW_CALL_STACK + " shadow_call_stack:%lukB" +#endif " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" @@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), zone_page_state(zone, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + zone_page_state(zone, NR_KERNEL_SCS_KB), +#endif K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), diff --git a/mm/vmstat.c b/mm/vmstat.c index 96d21a792b57..2435d2c24657 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = { "nr_mlock", "nr_page_table_pages", "nr_kernel_stack", +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + "nr_shadow_call_stack", +#endif "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", From 5bbaf9d1fcb9be696ee9a61636ab6803556c70f2 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:09 -0700 Subject: [PATCH 094/148] scs: Add support for stack usage debugging Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled, also prints out the highest shadow stack usage per process. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Will Deacon [will: rewrote most of scs_check_usage()] Signed-off-by: Will Deacon --- kernel/scs.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/kernel/scs.c b/kernel/scs.c index 6d2f983ac54e..9389c28f0853 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -63,10 +63,37 @@ int scs_prepare(struct task_struct *tsk, int node) task_scs(tsk) = s; task_scs_offset(tsk) = 0; scs_account(tsk, 1); - return 0; } +static void scs_check_usage(struct task_struct *tsk) +{ + static unsigned long highest; + + unsigned long *p, prev, curr = highest, used = 0; + + if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE)) + return; + + for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) { + if (!READ_ONCE_NOCHECK(*p)) + break; + used++; + } + + while (used > curr) { + prev = cmpxchg_relaxed(&highest, curr, used); + + if (prev == curr) { + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", + tsk->comm, task_pid_nr(tsk), used); + break; + } + + curr = prev; + } +} + void scs_release(struct task_struct *tsk) { void *s = task_scs(tsk); @@ -75,6 +102,7 @@ void scs_release(struct task_struct *tsk) return; WARN(scs_corrupted(tsk), "corrupted shadow stack detected when freeing task\n"); + scs_check_usage(tsk); scs_account(tsk, -1); scs_free(s); } From ddc9863e9e9025e2fc0c8dba31cc060cd626606f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:10 -0700 Subject: [PATCH 095/148] scs: Disable when function graph tracing is enabled The graph tracer hooks returns by modifying frame records on the (regular) stack, but with SCS the return address is taken from the shadow stack, and the value in the frame record has no effect. As we don't currently have a mechanism to determine the corresponding slot on the shadow stack (and to pass this through the ftrace infrastructure), for now let's disable SCS when the graph tracer is enabled. With SCS the return address is taken from the shadow stack and the value in the frame record has no effect. The mcount based graph tracer hooks returns by modifying frame records on the (regular) stack, and thus is not compatible. The patchable-function-entry graph tracer used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved to the shadow stack, and is compatible. Modifying the mcount based graph tracer to work with SCS would require a mechanism to determine the corresponding slot on the shadow stack (and to pass this through the ftrace infrastructure), and we expect that everyone will eventually move to the patchable-function-entry based graph tracer anyway, so for now let's disable SCS when the mcount-based graph tracer is enabled. SCS and patchable-function-entry are both supported from LLVM 10.x. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index 334a3d9b19df..45dfca9a98d3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -543,6 +543,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK config SHADOW_CALL_STACK bool "Clang Shadow Call Stack" depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER help This option enables Clang's Shadow Call Stack, which uses a shadow stack to protect function return addresses from being From da64e9d1f8c3dad6898dac6edb39a68d3aa9ce93 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:11 -0700 Subject: [PATCH 096/148] arm64: Reserve register x18 from general allocation with SCS Reserve the x18 register from general allocation when SCS is enabled, because the compiler uses the register to store the current task's shadow stack pointer. Note that all external kernel modules must also be compiled with -ffixed-x18 if the kernel has SCS enabled. Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 85e4149cc5d5..409a6c1be8cc 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -81,6 +81,10 @@ endif KBUILD_CFLAGS += $(branch-prot-flags-y) +ifeq ($(CONFIG_SHADOW_CALL_STACK), y) +KBUILD_CFLAGS += -ffixed-x18 +endif + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ From 6d37d81f449a103a8b43c5c972b5055b8936ef0e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:12 -0700 Subject: [PATCH 097/148] arm64: Preserve register x18 when CPU is suspended Don't lose the current task's shadow stack when the CPU is suspended. Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/suspend.h | 2 +- arch/arm64/mm/proc.S | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 8939c87c4dce..0cde2f473971 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -2,7 +2,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 12 +#define NR_CTX_REGS 13 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 197a9ba2d5ea..ed15be0f8103 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -58,6 +58,8 @@ * cpu_do_suspend - save CPU registers context * * x0: virtual address of context pointer + * + * This must be kept in sync with struct cpu_suspend_ctx in . */ SYM_FUNC_START(cpu_do_suspend) mrs x2, tpidr_el0 @@ -82,6 +84,11 @@ alternative_endif stp x8, x9, [x0, #48] stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] + /* + * Save x18 as it may be used as a platform register, e.g. by shadow + * call stack. + */ + str x18, [x0, #96] ret SYM_FUNC_END(cpu_do_suspend) @@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume) ldp x9, x10, [x0, #48] ldp x11, x12, [x0, #64] ldp x13, x14, [x0, #80] + /* + * Restore x18, as it may be used as a platform register, and clear + * the buffer to minimize the risk of exposure when used for shadow + * call stack. + */ + ldr x18, [x0, #96] + str xzr, [x0, #96] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 From e73f02c6eb15729164b9dd5e19214b54446823ab Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:13 -0700 Subject: [PATCH 098/148] arm64: efi: Restore register x18 if it was corrupted If we detect a corrupted x18, restore the register before jumping back to potentially SCS instrumented code. This is safe, because the wrapper is called with preemption disabled and a separate shadow stack is used for interrupt handling. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 3fc71106cb2b..6ca6c0dc11a1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper) ldp x29, x30, [sp], #32 b.ne 0f ret -0: b efi_handle_corrupted_x18 // tail call +0: + /* + * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a + * shadow stack pointer, which we need to restore before returning to + * potentially instrumented code. This is safe because the wrapper is + * called with preemption disabled and a separate shadow stack is used + * for interrupts. + */ + mov x18, x2 + b efi_handle_corrupted_x18 // tail call ENDPROC(__efi_rt_asm_wrapper) From cde5dec89e5dee5a6de12cd99fdb24651ee03146 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:14 -0700 Subject: [PATCH 099/148] arm64: vdso: Disable Shadow Call Stack Shadow stacks are only available in the kernel, so disable SCS instrumentation for the vDSO. Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index dd2514bb1511..a87a4f11724e 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING VDSO_LDFLAGS := -Bsymbolic -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n From 9654736891c3ac6a60b52ce70d33cf57cf95bff7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:15 -0700 Subject: [PATCH 100/148] arm64: Disable SCS for hypervisor code Disable SCS for code that runs at a different exception level by adding __noscs to __hyp_text. Suggested-by: James Morse Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_hyp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fe57f60f06a8..875b106c5d98 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -13,7 +13,7 @@ #include #include -#define __hyp_text __section(.hyp.text) notrace +#define __hyp_text __section(.hyp.text) notrace __noscs #define read_sysreg_elx(r,nvh,vh) \ ({ \ From 5287569a790d2546a06db07e391bf84b8bd6cf51 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:16 -0700 Subject: [PATCH 101/148] arm64: Implement Shadow Call Stack This change implements shadow stack switching, initial SCS set-up, and interrupt shadow stacks for arm64. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 5 +++ arch/arm64/include/asm/scs.h | 46 ++++++++++++++++++++++++++++ arch/arm64/include/asm/thread_info.h | 13 ++++++++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/asm-offsets.c | 4 +++ arch/arm64/kernel/entry.S | 24 +++++++++++++-- arch/arm64/kernel/head.S | 6 ++++ arch/arm64/kernel/process.c | 2 ++ arch/arm64/kernel/scs.c | 15 +++++++++ 9 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/scs.h create mode 100644 arch/arm64/kernel/scs.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40fb05d96c60..c380a16533f6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -64,6 +64,7 @@ config ARM64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE + select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING @@ -1025,6 +1026,10 @@ config ARCH_HAS_CACHE_LINE_SIZE config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 +# Supported by clang >= 7.0 +config CC_HAVE_SHADOW_CALL_STACK + def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" ---help--- diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h new file mode 100644 index 000000000000..96549353b0cb --- /dev/null +++ b/arch/arm64/include/asm/scs.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_SHADOW_CALL_STACK + .macro scs_load tsk, tmp + ldp x18, \tmp, [\tsk, #TSK_TI_SCS_BASE] + add x18, x18, \tmp + .endm + + .macro scs_save tsk, tmp + ldr \tmp, [\tsk, #TSK_TI_SCS_BASE] + sub \tmp, x18, \tmp + str \tmp, [\tsk, #TSK_TI_SCS_OFFSET] + .endm +#else + .macro scs_load tsk, tmp + .endm + + .macro scs_save tsk, tmp + .endm +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#else /* __ASSEMBLY__ */ + +#include + +#ifdef CONFIG_SHADOW_CALL_STACK + +static inline void scs_overflow_check(struct task_struct *tsk) +{ + if (unlikely(scs_corrupted(tsk))) + panic("corrupted shadow stack detected inside scheduler\n"); +} + +#else /* CONFIG_SHADOW_CALL_STACK */ + +static inline void scs_overflow_check(struct task_struct *tsk) {} + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* __ASSEMBLY __ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 512174a8e789..9df79c0a4c43 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -41,6 +41,10 @@ struct thread_info { #endif } preempt; }; +#ifdef CONFIG_SHADOW_CALL_STACK + void *scs_base; + unsigned long scs_offset; +#endif }; #define thread_saved_pc(tsk) \ @@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk); _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) +#ifdef CONFIG_SHADOW_CALL_STACK +#define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ + .scs_offset = 0, +#else +#define INIT_SCS +#endif + #define INIT_THREAD_INFO(tsk) \ { \ .flags = _TIF_FOREIGN_FPSTATE, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ + INIT_SCS \ } #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4e5b8ee31442..151f28521f1e 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9981a0a5a87f..d7934250b68c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -33,6 +33,10 @@ int main(void) DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); +#endif +#ifdef CONFIG_SHADOW_CALL_STACK + DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base)); + DEFINE(TSK_TI_SCS_OFFSET, offsetof(struct task_struct, thread_info.scs_offset)); #endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); #ifdef CONFIG_STACKPROTECTOR diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ddcde093c433..244268d5ae47 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -179,6 +180,8 @@ alternative_cb_end apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, 1, x20, x22, x23 + + scs_load tsk, x20 .else add x21, sp, #S_FRAME_SIZE get_current_task tsk @@ -343,6 +346,8 @@ alternative_else_nop_endif msr cntkctl_el1, x1 4: #endif + scs_save tsk, x0 + /* No kernel C function calls after this as user keys are set. */ ptrauth_keys_install_user tsk, x0, x1, x2 @@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 .macro irq_stack_entry mov x19, sp // preserve the original sp +#ifdef CONFIG_SHADOW_CALL_STACK + mov x24, x18 // preserve the original shadow stack +#endif /* * Compare sp with the base of the task stack. @@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 /* switch to the irq stack */ mov sp, x26 + +#ifdef CONFIG_SHADOW_CALL_STACK + /* also switch to the irq shadow stack */ + adr_this_cpu x18, irq_shadow_call_stack, x26 +#endif + 9998: .endm /* - * x19 should be preserved between irq_stack_entry and - * irq_stack_exit. + * The callee-saved regs (x19-x29) should be preserved between + * irq_stack_entry and irq_stack_exit, but note that kernel_entry + * uses x20-x23 to store data for later use. */ .macro irq_stack_exit mov sp, x19 +#ifdef CONFIG_SHADOW_CALL_STACK + mov x18, x24 +#endif .endm /* GPRs used by entry code */ @@ -901,6 +919,8 @@ SYM_FUNC_START(cpu_switch_to) mov sp, x9 msr sp_el0, x1 ptrauth_keys_install_kernel x1, 1, x8, x9, x10 + scs_save x0, x8 + scs_load x1, x8 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 57a91032b4c2..2b01c19c5483 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -424,6 +425,10 @@ SYM_FUNC_START_LOCAL(__primary_switched) stp xzr, x30, [sp, #-16]! mov x29, sp +#ifdef CONFIG_SHADOW_CALL_STACK + adr_l x18, init_shadow_call_stack // Set shadow call stack +#endif + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -737,6 +742,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched) ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow msr sp_el0, x2 + scs_load x2, x3 mov x29, #0 mov x30, #0 b secondary_start_kernel diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 56be4cbf771f..a35d3318492c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) @@ -515,6 +516,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); + scs_overflow_check(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c new file mode 100644 index 000000000000..acc6741d1a40 --- /dev/null +++ b/arch/arm64/kernel/scs.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include +#include + +/* Allocate a static per-CPU shadow stack */ +#define DEFINE_SCS(name) \ + DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ + +DEFINE_SCS(irq_shadow_call_stack); From 439dc2a11727314cdc3ad0ad13c122d910dae411 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:17 -0700 Subject: [PATCH 102/148] arm64: scs: Add shadow stacks for SDEI This change adds per-CPU shadow call stacks for the SDEI handler. Similarly to how the kernel stacks are handled, we add separate shadow stacks for normal and critical events. Signed-off-by: Sami Tolvanen Reviewed-by: James Morse Tested-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 14 +++++++++++++- arch/arm64/kernel/scs.c | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 244268d5ae47..cb0516e6f963 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1049,13 +1049,16 @@ SYM_CODE_START(__sdei_asm_handler) mov x19, x1 +#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) + ldrb w4, [x19, #SDEI_EVENT_PRIORITY] +#endif + #ifdef CONFIG_VMAP_STACK /* * entry.S may have been using sp as a scratch register, find whether * this is a normal or critical event and switch to the appropriate * stack for this CPU. */ - ldrb w4, [x19, #SDEI_EVENT_PRIORITY] cbnz w4, 1f ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6 b 2f @@ -1065,6 +1068,15 @@ SYM_CODE_START(__sdei_asm_handler) mov sp, x5 #endif +#ifdef CONFIG_SHADOW_CALL_STACK + /* Use a separate shadow call stack for normal and critical events */ + cbnz w4, 3f + adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal, tmp=x6 + b 4f +3: adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical, tmp=x6 +4: +#endif + /* * We may have interrupted userspace, or a guest, or exit-from or * return-to either of these. We can't trust sp_el0, restore it. diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c index acc6741d1a40..adc97f826fab 100644 --- a/arch/arm64/kernel/scs.c +++ b/arch/arm64/kernel/scs.c @@ -13,3 +13,8 @@ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ DEFINE_SCS(irq_shadow_call_stack); + +#ifdef CONFIG_ARM_SDE_INTERFACE +DEFINE_SCS(sdei_shadow_call_stack_normal); +DEFINE_SCS(sdei_shadow_call_stack_critical); +#endif From cc49c71d2abe99c1c2c9bedf0693ad2d3ee4a067 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 27 Apr 2020 09:00:18 -0700 Subject: [PATCH 103/148] efi/libstub: Disable Shadow Call Stack Shadow stacks are not available in the EFI stub, filter out SCS flags. Suggested-by: James Morse Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/firmware/efi/libstub/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 094eabdecfe6..b52ae8c29560 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ $(call cc-option,-fno-stack-protector) \ -D__DISABLE_EXPORTS +# remove SCS flags from all objects in this directory +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) + GCOV_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n From 51189c7a7ed1b4ed4493e27275d466ff60406d3a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 May 2020 14:11:05 +0100 Subject: [PATCH 104/148] arm64: scs: Store absolute SCS stack pointer value in thread_info Storing the SCS information in thread_info as a {base,offset} pair introduces an additional load instruction on the ret-to-user path, since the SCS stack pointer in x18 has to be converted back to an offset by subtracting the base. Replace the offset with the absolute SCS stack pointer value instead and avoid the redundant load. Tested-by: Sami Tolvanen Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/scs.h | 9 ++++----- arch/arm64/include/asm/thread_info.h | 4 ++-- arch/arm64/kernel/asm-offsets.c | 2 +- include/linux/scs.h | 8 ++++---- kernel/scs.c | 3 +-- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 96549353b0cb..6b8cf4352fe3 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -4,16 +4,15 @@ #ifdef __ASSEMBLY__ +#include + #ifdef CONFIG_SHADOW_CALL_STACK .macro scs_load tsk, tmp - ldp x18, \tmp, [\tsk, #TSK_TI_SCS_BASE] - add x18, x18, \tmp + ldr x18, [\tsk, #TSK_TI_SCS_SP] .endm .macro scs_save tsk, tmp - ldr \tmp, [\tsk, #TSK_TI_SCS_BASE] - sub \tmp, x18, \tmp - str \tmp, [\tsk, #TSK_TI_SCS_OFFSET] + str x18, [\tsk, #TSK_TI_SCS_SP] .endm #else .macro scs_load tsk, tmp diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 9df79c0a4c43..6ea8b6a26ae9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -43,7 +43,7 @@ struct thread_info { }; #ifdef CONFIG_SHADOW_CALL_STACK void *scs_base; - unsigned long scs_offset; + void *scs_sp; #endif }; @@ -107,7 +107,7 @@ void arch_release_task_struct(struct task_struct *tsk); #ifdef CONFIG_SHADOW_CALL_STACK #define INIT_SCS \ .scs_base = init_shadow_call_stack, \ - .scs_offset = 0, + .scs_sp = init_shadow_call_stack, #else #define INIT_SCS #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index d7934250b68c..a098a45f63d8 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -36,7 +36,7 @@ int main(void) #endif #ifdef CONFIG_SHADOW_CALL_STACK DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base)); - DEFINE(TSK_TI_SCS_OFFSET, offsetof(struct task_struct, thread_info.scs_offset)); + DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp)); #endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); #ifdef CONFIG_STACKPROTECTOR diff --git a/include/linux/scs.h b/include/linux/scs.h index 3f3662621a27..0eb2485ef832 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -27,7 +27,7 @@ #define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) #define task_scs(tsk) (task_thread_info(tsk)->scs_base) -#define task_scs_offset(tsk) (task_thread_info(tsk)->scs_offset) +#define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) void scs_init(void); int scs_prepare(struct task_struct *tsk, int node); @@ -39,7 +39,7 @@ static inline void scs_task_reset(struct task_struct *tsk) * Reset the shadow stack to the base address in case the task * is reused. */ - task_scs_offset(tsk) = 0; + task_scs_sp(tsk) = task_scs(tsk); } static inline unsigned long *__scs_magic(void *s) @@ -50,9 +50,9 @@ static inline unsigned long *__scs_magic(void *s) static inline bool scs_corrupted(struct task_struct *tsk) { unsigned long *magic = __scs_magic(task_scs(tsk)); + unsigned long sz = task_scs_sp(tsk) - task_scs(tsk); - return (task_scs_offset(tsk) >= SCS_SIZE - 1 || - READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC); + return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; } #else /* CONFIG_SHADOW_CALL_STACK */ diff --git a/kernel/scs.c b/kernel/scs.c index 9389c28f0853..5ff8663e4a67 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -60,8 +60,7 @@ int scs_prepare(struct task_struct *tsk, int node) if (!s) return -ENOMEM; - task_scs(tsk) = s; - task_scs_offset(tsk) = 0; + task_scs(tsk) = task_scs_sp(tsk) = s; scs_account(tsk, 1); return 0; } From bee348fab099b0f551caa874663e82a7f3bb64b3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 May 2020 14:43:11 +0100 Subject: [PATCH 105/148] scs: Move accounting into alloc/free functions There's no need to perform the shadow stack page accounting independently of the lifetime of the underlying allocation, so call the accounting code from the {alloc,free}() functions and simplify the code in the process. Tested-by: Sami Tolvanen Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- kernel/scs.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/kernel/scs.c b/kernel/scs.c index 5ff8663e4a67..aea841cd7586 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -14,25 +14,35 @@ static struct kmem_cache *scs_cache; +static void __scs_account(void *s, int account) +{ + struct page *scs_page = virt_to_page(s); + + mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB, + account * (SCS_SIZE / SZ_1K)); +} + static void *scs_alloc(int node) { - void *s; + void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); - s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); - if (s) { - *__scs_magic(s) = SCS_END_MAGIC; - /* - * Poison the allocation to catch unintentional accesses to - * the shadow stack when KASAN is enabled. - */ - kasan_poison_object_data(scs_cache, s); - } + if (!s) + return NULL; + *__scs_magic(s) = SCS_END_MAGIC; + + /* + * Poison the allocation to catch unintentional accesses to + * the shadow stack when KASAN is enabled. + */ + kasan_poison_object_data(scs_cache, s); + __scs_account(s, 1); return s; } static void scs_free(void *s) { + __scs_account(s, -1); kasan_unpoison_object_data(scs_cache, s); kmem_cache_free(scs_cache, s); } @@ -42,17 +52,6 @@ void __init scs_init(void) scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL); } -static struct page *__scs_page(struct task_struct *tsk) -{ - return virt_to_page(task_scs(tsk)); -} - -static void scs_account(struct task_struct *tsk, int account) -{ - mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_KB, - account * (SCS_SIZE / 1024)); -} - int scs_prepare(struct task_struct *tsk, int node) { void *s = scs_alloc(node); @@ -61,7 +60,6 @@ int scs_prepare(struct task_struct *tsk, int node) return -ENOMEM; task_scs(tsk) = task_scs_sp(tsk) = s; - scs_account(tsk, 1); return 0; } @@ -102,6 +100,5 @@ void scs_release(struct task_struct *tsk) WARN(scs_corrupted(tsk), "corrupted shadow stack detected when freeing task\n"); scs_check_usage(tsk); - scs_account(tsk, -1); scs_free(s); } From 711e8b0de0d63c70c825b473da01288b661a2386 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 May 2020 14:46:46 +0100 Subject: [PATCH 106/148] arm64: scs: Use 'scs_sp' register alias for x18 x18 holds the SCS stack pointer value, so introduce a register alias to make this easier to read in assembly code. Tested-by: Sami Tolvanen Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/scs.h | 6 ++++-- arch/arm64/kernel/entry.S | 10 +++++----- arch/arm64/kernel/head.S | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 6b8cf4352fe3..d46efdd2060a 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -7,12 +7,14 @@ #include #ifdef CONFIG_SHADOW_CALL_STACK + scs_sp .req x18 + .macro scs_load tsk, tmp - ldr x18, [\tsk, #TSK_TI_SCS_SP] + ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] .endm .macro scs_save tsk, tmp - str x18, [\tsk, #TSK_TI_SCS_SP] + str scs_sp, [\tsk, #TSK_TI_SCS_SP] .endm #else .macro scs_load tsk, tmp diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index cb0516e6f963..741faf0706f1 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -394,7 +394,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 .macro irq_stack_entry mov x19, sp // preserve the original sp #ifdef CONFIG_SHADOW_CALL_STACK - mov x24, x18 // preserve the original shadow stack + mov x24, scs_sp // preserve the original shadow stack #endif /* @@ -416,7 +416,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_SHADOW_CALL_STACK /* also switch to the irq shadow stack */ - adr_this_cpu x18, irq_shadow_call_stack, x26 + adr_this_cpu scs_sp, irq_shadow_call_stack, x26 #endif 9998: @@ -430,7 +430,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 .macro irq_stack_exit mov sp, x19 #ifdef CONFIG_SHADOW_CALL_STACK - mov x18, x24 + mov scs_sp, x24 #endif .endm @@ -1071,9 +1071,9 @@ SYM_CODE_START(__sdei_asm_handler) #ifdef CONFIG_SHADOW_CALL_STACK /* Use a separate shadow call stack for normal and critical events */ cbnz w4, 3f - adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal, tmp=x6 + adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6 b 4f -3: adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical, tmp=x6 +3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6 4: #endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2b01c19c5483..1293baddfd20 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -426,7 +426,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) mov x29, sp #ifdef CONFIG_SHADOW_CALL_STACK - adr_l x18, init_shadow_call_stack // Set shadow call stack + adr_l scs_sp, init_shadow_call_stack // Set shadow call stack #endif str_l x21, __fdt_pointer, x5 // Save FDT pointer From 88485be531f4aee841ddc53b56e2f6e6a338854d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 May 2020 14:56:05 +0100 Subject: [PATCH 107/148] scs: Move scs_overflow_check() out of architecture code There is nothing architecture-specific about scs_overflow_check() as it's just a trivial wrapper around scs_corrupted(). For parity with task_stack_end_corrupted(), rename scs_corrupted() to task_scs_end_corrupted() and call it from schedule_debug() when CONFIG_SCHED_STACK_END_CHECK_is enabled, which better reflects its purpose as a debug feature to catch inadvertent overflow of the SCS. Finally, remove the unused scs_overflow_check() function entirely. This has absolutely no impact on architectures that do not support SCS (currently arm64 only). Tested-by: Sami Tolvanen Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/scs.h | 18 ------------------ arch/arm64/kernel/process.c | 2 -- arch/arm64/kernel/scs.c | 2 +- include/linux/scs.h | 4 ++-- kernel/sched/core.c | 3 +++ kernel/scs.c | 3 ++- 6 files changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index d46efdd2060a..eaa2cd92e4c1 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -24,24 +24,6 @@ .endm #endif /* CONFIG_SHADOW_CALL_STACK */ -#else /* __ASSEMBLY__ */ - -#include - -#ifdef CONFIG_SHADOW_CALL_STACK - -static inline void scs_overflow_check(struct task_struct *tsk) -{ - if (unlikely(scs_corrupted(tsk))) - panic("corrupted shadow stack detected inside scheduler\n"); -} - -#else /* CONFIG_SHADOW_CALL_STACK */ - -static inline void scs_overflow_check(struct task_struct *tsk) {} - -#endif /* CONFIG_SHADOW_CALL_STACK */ - #endif /* __ASSEMBLY __ */ #endif /* _ASM_SCS_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a35d3318492c..56be4cbf771f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) @@ -516,7 +515,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); - scs_overflow_check(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c index adc97f826fab..955875dff9e1 100644 --- a/arch/arm64/kernel/scs.c +++ b/arch/arm64/kernel/scs.c @@ -6,7 +6,7 @@ */ #include -#include +#include /* Allocate a static per-CPU shadow stack */ #define DEFINE_SCS(name) \ diff --git a/include/linux/scs.h b/include/linux/scs.h index 0eb2485ef832..2fd3df50e93e 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -47,7 +47,7 @@ static inline unsigned long *__scs_magic(void *s) return (unsigned long *)(s + SCS_SIZE) - 1; } -static inline bool scs_corrupted(struct task_struct *tsk) +static inline bool task_scs_end_corrupted(struct task_struct *tsk) { unsigned long *magic = __scs_magic(task_scs(tsk)); unsigned long sz = task_scs_sp(tsk) - task_scs(tsk); @@ -60,8 +60,8 @@ static inline bool scs_corrupted(struct task_struct *tsk) static inline void scs_init(void) {} static inline void scs_task_reset(struct task_struct *tsk) {} static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } -static inline bool scs_corrupted(struct task_struct *tsk) { return false; } static inline void scs_release(struct task_struct *tsk) {} +static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; } #endif /* CONFIG_SHADOW_CALL_STACK */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 934e03cfaec7..a1d815a11b90 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3878,6 +3878,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) #ifdef CONFIG_SCHED_STACK_END_CHECK if (task_stack_end_corrupted(prev)) panic("corrupted stack end detected inside scheduler\n"); + + if (task_scs_end_corrupted(prev)) + panic("corrupted shadow stack detected inside scheduler\n"); #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP diff --git a/kernel/scs.c b/kernel/scs.c index aea841cd7586..faf0ecd7b893 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -98,7 +98,8 @@ void scs_release(struct task_struct *tsk) if (!s) return; - WARN(scs_corrupted(tsk), "corrupted shadow stack detected when freeing task\n"); + WARN(task_scs_end_corrupted(tsk), + "corrupted shadow stack detected when freeing task\n"); scs_check_usage(tsk); scs_free(s); } From aa7a65ae5b8f459617e5ed1422301386e7f12274 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 May 2020 16:15:46 +0100 Subject: [PATCH 108/148] scs: Remove references to asm/scs.h from core code asm/scs.h is no longer needed by the core code, so remove a redundant header inclusion and update the stale Kconfig text. Tested-by: Sami Tolvanen Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/Kconfig | 4 ++-- kernel/scs.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 45dfca9a98d3..2e6f843d87c4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -537,8 +537,8 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK bool help An architecture should select this if it supports Clang's Shadow - Call Stack, has asm/scs.h, and implements runtime support for shadow - stack switching. + Call Stack and implements runtime support for shadow stack + switching. config SHADOW_CALL_STACK bool "Clang Shadow Call Stack" diff --git a/kernel/scs.c b/kernel/scs.c index faf0ecd7b893..222a7a9ad543 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -10,7 +10,6 @@ #include #include #include -#include static struct kmem_cache *scs_cache; From 871e100e432c651c9c46fb9c3184b4577e0de3ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 May 2020 16:17:12 +0100 Subject: [PATCH 109/148] scs: Move DEFINE_SCS macro into core code Defining static shadow call stacks is not architecture-specific, so move the DEFINE_SCS() macro into the core header file. Tested-by: Sami Tolvanen Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/scs.c | 4 ---- include/linux/scs.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c index 955875dff9e1..e8f7ff45dd8f 100644 --- a/arch/arm64/kernel/scs.c +++ b/arch/arm64/kernel/scs.c @@ -8,10 +8,6 @@ #include #include -/* Allocate a static per-CPU shadow stack */ -#define DEFINE_SCS(name) \ - DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ - DEFINE_SCS(irq_shadow_call_stack); #ifdef CONFIG_ARM_SDE_INTERFACE diff --git a/include/linux/scs.h b/include/linux/scs.h index 2fd3df50e93e..6dec390cf154 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -26,6 +26,10 @@ /* An illegal pointer value to mark the end of the shadow stack. */ #define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) +/* Allocate a static per-CPU shadow stack */ +#define DEFINE_SCS(name) \ + DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ + #define task_scs(tsk) (task_thread_info(tsk)->scs_base) #define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) From 258c3d628fe9e7512d98a0000709773457c66ef1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 May 2020 14:01:01 +0100 Subject: [PATCH 110/148] arm64: entry-ftrace.S: Update comment to indicate that x18 is live The Shadow Call Stack pointer is held in x18, so update the ftrace entry comment to indicate that it cannot be safely clobbered. Reported-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-ftrace.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 833d48c9acb5..a338f40e64d3 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -23,8 +23,9 @@ * * ... where is either ftrace_caller or ftrace_regs_caller. * - * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are - * live, and x9-x18 are safe to clobber. + * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are + * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to + * clobber. * * We save the callsite's context into a pt_regs before invoking any ftrace * callbacks. So that we can get a sensible backtrace, we create a stack record From b322c65f8ca37396cfd7d4d0ac2f7f2dc08fa9eb Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 13 May 2020 16:06:37 -0700 Subject: [PATCH 111/148] arm64: Call debug_traps_init() from trap_init() to help early kgdb A new kgdb feature will soon land (kgdb_earlycon) that lets us run kgdb much earlier. In order for everything to work properly it's important that the break hook is setup by the time we process "kgdbwait". Right now the break hook is setup in debug_traps_init() and that's called from arch_initcall(). That's a bit too late since kgdb_earlycon really needs things to be setup by the time the system calls dbg_late_init(). We could fix this by adding call_break_hook() into early_brk64() and that works fine. However, it's a little ugly. Instead, let's just add a call to debug_traps_init() straight from trap_init(). There's already a documented dependency between trap_init() and debug_traps_init() and this makes the dependency more obvious rather than just relying on a comment. NOTE: this solution isn't early enough to let us select the "ARCH_HAS_EARLY_DEBUG" KConfig option that is introduced by the kgdb_earlycon patch series. That would only be set if we could do breakpoints when early params are parsed. This patch only enables "late early" breakpoints, AKA breakpoints when dbg_late_init() is called. It's expected that this should be fine for most people. It should also be noted that if you crash you can still end up in kgdb earlier than debug_traps_init(). Since you don't need breakpoints to debug a crash that's fine. Suggested-by: Will Deacon Signed-off-by: Douglas Anderson Acked-by: Will Deacon Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200513160501.1.I0b5edf030cc6ebef6ab4829f8867cdaea42485d8@changeid Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 2 ++ arch/arm64/kernel/debug-monitors.c | 4 +--- arch/arm64/kernel/traps.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7619f473155f..e5ceea213e39 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -125,5 +125,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) int aarch32_break_handler(struct pt_regs *regs); +void debug_traps_init(void); + #endif /* __ASSEMBLY */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 48222a4760c2..15e80c876d46 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -376,15 +376,13 @@ int aarch32_break_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(aarch32_break_handler); -static int __init debug_traps_init(void) +void __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler"); - return 0; } -arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index cf402be5c573..8408e8670f2e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -1047,11 +1047,11 @@ int __init early_brk64(unsigned long addr, unsigned int esr, return bug_handler(regs, esr) != DBG_HOOK_HANDLED; } -/* This registration must happen early, before debug_traps_init(). */ void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); #endif + debug_traps_init(); } From bd4298c72b56d7faf0ee3671739f3a704a962d0f Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Fri, 8 May 2020 11:15:45 +0800 Subject: [PATCH 112/148] arm64: stacktrace: Factor out some common code into on_stack() There are some common codes for stack checking, so factors it out into the function on_stack(). No functional change. Signed-off-by: Yunfeng Ye Link: https://lore.kernel.org/r/07b3b0e6-3f58-4fed-07ea-7d17b7508948@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 40 ++++++++++------------------- arch/arm64/kernel/sdei.c | 28 ++------------------ 2 files changed, 16 insertions(+), 52 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 4d9b1f48dc39..5017b531a415 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -68,12 +68,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_irq_stack(unsigned long sp, +static inline bool on_stack(unsigned long sp, unsigned long low, + unsigned long high, enum stack_type type, struct stack_info *info) { - unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); - unsigned long high = low + IRQ_STACK_SIZE; - if (!low) return false; @@ -83,12 +81,20 @@ static inline bool on_irq_stack(unsigned long sp, if (info) { info->low = low; info->high = high; - info->type = STACK_TYPE_IRQ; + info->type = type; } - return true; } +static inline bool on_irq_stack(unsigned long sp, + struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); + unsigned long high = low + IRQ_STACK_SIZE; + + return on_stack(sp, low, high, STACK_TYPE_IRQ, info); +} + static inline bool on_task_stack(const struct task_struct *tsk, unsigned long sp, struct stack_info *info) @@ -96,16 +102,7 @@ static inline bool on_task_stack(const struct task_struct *tsk, unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_TASK; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_TASK, info); } #ifdef CONFIG_VMAP_STACK @@ -117,16 +114,7 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_OVERFLOW; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info); } #else static inline bool on_overflow_stack(unsigned long sp, diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index d6259dac62b6..3afed808b474 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -95,19 +95,7 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; - if (!low) - return false; - - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_SDEI_NORMAL; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info); } static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) @@ -115,19 +103,7 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; - if (!low) - return false; - - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_SDEI_CRITICAL; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info); } bool _on_sdei_stack(unsigned long sp, struct stack_info *info) From 5ec605108ff4901aedd62ee1bdd4250f2f7cf978 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 8 May 2020 12:05:52 +0800 Subject: [PATCH 113/148] ACPI: GTDT: Put GTDT table after parsing The mapped GTDT table needs to be released after the driver init. Signed-off-by: Hanjun Guo Link: https://lore.kernel.org/r/1588910753-18543-1-git-send-email-guohanjun@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/gtdt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 01962c63a711..f2d0e5915dab 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -394,7 +394,7 @@ static int __init gtdt_sbsa_gwdt_init(void) */ ret = acpi_gtdt_init(table, &timer_count); if (ret || !timer_count) - return ret; + goto out_put_gtdt; for_each_platform_timer(platform_timer) { if (is_non_secure_watchdog(platform_timer)) { @@ -408,6 +408,8 @@ static int __init gtdt_sbsa_gwdt_init(void) if (gwdt_count) pr_info("found %d SBSA generic Watchdog(s).\n", gwdt_count); +out_put_gtdt: + acpi_put_table(table); return ret; } From 701dafe0670c736c0131328c0fd64c1190f0bb0c Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 8 May 2020 12:05:53 +0800 Subject: [PATCH 114/148] ACPI: IORT: Add comments for not calling acpi_put_table() The iort_table will be used at runtime after acpi_iort_init(), so add some comments to clarify this to make it less confusing. Signed-off-by: Hanjun Guo Link: https://lore.kernel.org/r/1588910753-18543-2-git-send-email-guohanjun@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 6e445bc55537..619a3e503346 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1667,6 +1667,10 @@ void __init acpi_iort_init(void) { acpi_status status; + /* iort_table will be used at runtime after the iort init, + * so we don't need to call acpi_put_table() to release + * the IORT table mapping. + */ status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table); if (ACPI_FAILURE(status)) { if (status != AE_NOT_FOUND) { From 97807325a02b41de2f641d98dda1041549a23cd8 Mon Sep 17 00:00:00 2001 From: Zhou Wang Date: Thu, 7 May 2020 10:58:25 +0800 Subject: [PATCH 115/148] drivers/perf: hisi: Permit modular builds of HiSilicon uncore drivers This patch lets HiSilicon uncore PMU driver can be built as modules. A common module and three specific uncore PMU driver modules will be built. Export necessary functions in hisi_uncore_pmu module, and change irq_set_affinity to irq_set_affinity_hint to pass compile. Signed-off-by: Zhou Wang Tested-by: Qi Liu Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/1588820305-174479-1-git-send-email-wangzhou1@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 9 ++------ drivers/perf/hisilicon/Kconfig | 7 ++++++ drivers/perf/hisilicon/Makefile | 3 ++- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 10 ++++---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 10 ++++---- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 10 ++++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 23 +++++++++++++++++-- 7 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 drivers/perf/hisilicon/Kconfig diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 09ae8a970880..a9261cf48293 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -79,13 +79,6 @@ config FSL_IMX8_DDR_PMU can give information about memory throughput and other related events. -config HISI_PMU - bool "HiSilicon SoC PMU" - depends on ARM64 && ACPI - help - Support for HiSilicon SoC uncore performance monitoring - unit (PMU), such as: L3C, HHA and DDRC. - config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" depends on ARCH_QCOM && ARM64 && ACPI @@ -129,4 +122,6 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +source "drivers/perf/hisilicon/Kconfig" + endmenu diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig new file mode 100644 index 000000000000..c5d1b7019fff --- /dev/null +++ b/drivers/perf/hisilicon/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +config HISI_PMU + tristate "HiSilicon SoC PMU drivers" + depends on ARM64 && ACPI + help + Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home + Agent performance monitor and DDR Controller performance monitor. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index c3a96ec2bf66..e8377061845f 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o +obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ + hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 453f1c6a16ca..15713faaa07e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -394,8 +394,9 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n"); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node); + irq_set_affinity_hint(ddrc_pmu->irq, NULL); } return ret; @@ -406,8 +407,9 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&ddrc_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + &ddrc_pmu->node); + irq_set_affinity_hint(ddrc_pmu->irq, NULL); return 0; } diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index e5af9d7e6e14..dcc5600788a9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -406,8 +406,9 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { dev_err(hha_pmu->dev, "HHA PMU register failed!\n"); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, - &hha_pmu->node); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); + irq_set_affinity_hint(hha_pmu->irq, NULL); } return ret; @@ -418,8 +419,9 @@ static int hisi_hha_pmu_remove(struct platform_device *pdev) struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&hha_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, - &hha_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, + &hha_pmu->node); + irq_set_affinity_hint(hha_pmu->irq, NULL); return 0; } diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 1151e99b241c..8dd1278bec04 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -396,8 +396,9 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { dev_err(l3c_pmu->dev, "L3C PMU register failed!\n"); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, - &l3c_pmu->node); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); + irq_set_affinity_hint(l3c_pmu->irq, NULL); } return ret; @@ -408,8 +409,9 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&l3c_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, - &l3c_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + &l3c_pmu->node); + irq_set_affinity_hint(l3c_pmu->irq, NULL); return 0; } diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 584de8f807cc..97aff877a4e7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -35,6 +35,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev, return sprintf(buf, "%s\n", (char *)eattr->var); } +EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); /* * PMU event attributes @@ -48,6 +49,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); } +EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -59,6 +61,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sprintf(buf, "%d\n", hisi_pmu->on_cpu); } +EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); static bool hisi_validate_event_group(struct perf_event *event) { @@ -97,6 +100,7 @@ int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx) { return idx >= 0 && idx < hisi_pmu->num_counters; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_counter_valid); int hisi_uncore_pmu_get_event_idx(struct perf_event *event) { @@ -113,6 +117,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -173,6 +178,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); /* * Set the counter to count the event that we're interested in, @@ -220,6 +226,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -240,6 +247,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -262,6 +270,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -278,6 +287,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -300,6 +310,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -311,12 +322,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -329,6 +342,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -336,6 +350,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); /* @@ -414,10 +429,11 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) hisi_pmu->on_cpu = cpu; /* Overflow interrupt also should use the same CPU */ - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(cpu))); return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -446,7 +462,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); /* Use this CPU for event counting */ hisi_pmu->on_cpu = target; - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); + WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(target))); return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); + +MODULE_LICENSE("GPL v2"); From 10f6cd2af21bb44faab31a50ec3361d7649e5a39 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 22 Apr 2020 10:48:06 +0200 Subject: [PATCH 116/148] pmu/smmuv3: Clear IRQ affinity hint on device removal Currently when trying to remove the SMMUv3 PMU module we get a WARN_ON_ONCE from free_irq(), because the affinity hint set during probe hasn't been properly cleared. [ 238.878383] WARNING: CPU: 0 PID: 175 at kernel/irq/manage.c:1744 free_irq+0x324/0x358 ... [ 238.897263] Call trace: [ 238.897998] free_irq+0x324/0x358 [ 238.898792] devm_irq_release+0x18/0x28 [ 238.899189] release_nodes+0x1b0/0x228 [ 238.899984] devres_release_all+0x38/0x60 [ 238.900779] device_release_driver_internal+0x10c/0x1d0 [ 238.901574] driver_detach+0x50/0xe0 [ 238.902368] bus_remove_driver+0x5c/0xd8 [ 238.903448] driver_unregister+0x30/0x60 [ 238.903958] platform_driver_unregister+0x14/0x20 [ 238.905075] arm_smmu_pmu_exit+0x1c/0xecc [arm_smmuv3_pmu] [ 238.905547] __arm64_sys_delete_module+0x14c/0x260 [ 238.906342] el0_svc_common.constprop.0+0x74/0x178 [ 238.907355] do_el0_svc+0x24/0x90 [ 238.907932] el0_sync_handler+0x11c/0x198 [ 238.908979] el0_sync+0x158/0x180 Just like the other perf drivers, clear the affinity hint before releasing the device. Fixes: 7d839b4b9e00 ("perf/smmuv3: Add arm64 smmuv3 pmu driver") Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200422084805.237738-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index f01a57e5a5f3..48e28ef93a70 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -814,7 +814,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) if (err) { dev_err(dev, "Error %d registering hotplug, PMU @%pa\n", err, &res_0->start); - return err; + goto out_clear_affinity; } err = perf_pmu_register(&smmu_pmu->pmu, name, -1); @@ -833,6 +833,8 @@ static int smmu_pmu_probe(struct platform_device *pdev) out_unregister: cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); +out_clear_affinity: + irq_set_affinity_hint(smmu_pmu->irq, NULL); return err; } @@ -842,6 +844,7 @@ static int smmu_pmu_remove(struct platform_device *pdev) perf_pmu_unregister(&smmu_pmu->pmu); cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); + irq_set_affinity_hint(smmu_pmu->irq, NULL); return 0; } From 70e6352aefb18e6158f334af2e0291f133562b62 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 19 May 2020 19:21:06 +0100 Subject: [PATCH 117/148] firmware: arm_sdei: Put the SDEI table after using it The acpi_get_table() should be coupled with acpi_put_table() if the mapped table is not used for runtime after the initialization to release the table mapping, put the SDEI table after using it. Signed-off-by: Hanjun Guo Signed-off-by: James Morse Link: https://lore.kernel.org/linux-arm-kernel/1589021566-46373-1-git-send-email-guohanjun@huawei.com/ Link: https://lore.kernel.org/r/20200519182108.13693-2-james.morse@arm.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 5afd7409e6fa..a33b701ab2b0 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1096,6 +1096,8 @@ static bool __init sdei_present_acpi(void) if (ACPI_FAILURE(status)) return false; + acpi_put_table(sdei_table_header); + return true; } From 82b2077afccd2a46ce15a43a50f2bfbf1b295de5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2020 19:21:07 +0100 Subject: [PATCH 118/148] firmware: arm_sdei: remove unused interfaces The export symbols to register/unregister and enable/disable events aren't used upstream, remove them. [ dropped the parts of Christoph's patch that made the API static too ] Signed-off-by: Christoph Hellwig Signed-off-by: James Morse Link: https://lore.kernel.org/linux-arm-kernel/20200504164224.2842960-1-hch@lst.de/ Link: https://lore.kernel.org/r/20200519182108.13693-3-james.morse@arm.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index a33b701ab2b0..b12b99a19f66 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -429,7 +429,6 @@ int sdei_event_enable(u32 event_num) return err; } -EXPORT_SYMBOL(sdei_event_enable); static int sdei_api_event_disable(u32 event_num) { @@ -471,7 +470,6 @@ int sdei_event_disable(u32 event_num) return err; } -EXPORT_SYMBOL(sdei_event_disable); static int sdei_api_event_unregister(u32 event_num) { @@ -533,7 +531,6 @@ int sdei_event_unregister(u32 event_num) return err; } -EXPORT_SYMBOL(sdei_event_unregister); /* * unregister events, but don't destroy them as they are re-registered by @@ -643,7 +640,6 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) return err; } -EXPORT_SYMBOL(sdei_event_register); static int sdei_reregister_event_llocked(struct sdei_event *event) { From 472de63b0b8383565e103f809f5df37d1c4390ab Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 19 May 2020 19:21:08 +0100 Subject: [PATCH 119/148] firmware: arm_sdei: Document the motivation behind these set_fs() calls The SDEI handler save/restores the addr_limit using set_fs(). It isn't very clear why. The reason is to mirror the arch code's entry assembly. The arch code does this because perf may access user-space, and inheriting the addr_limit may be a problem. Add a comment explaining why this is here. Suggested-by: Christoph Hellwig Signed-off-by: James Morse Link: https://bugs.chromium.org/p/project-zero/issues/detail?id=822 Link: https://lore.kernel.org/r/20200519182108.13693-4-james.morse@arm.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index b12b99a19f66..e7e36aab2386 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1128,6 +1128,14 @@ int sdei_event_handler(struct pt_regs *regs, mm_segment_t orig_addr_limit; u32 event_num = arg->event_num; + /* + * Save restore 'fs'. + * The architecture's entry code save/restores 'fs' when taking an + * exception from the kernel. This ensures addr_limit isn't inherited + * if you interrupted something that allowed the uaccess routines to + * access kernel memory. + * Do the same here because this doesn't come via the same entry code. + */ orig_addr_limit = get_fs(); set_fs(USER_DS); From c73433fc630cda102f6527d4e5dfd289a9baec08 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 12 May 2020 07:27:27 +0530 Subject: [PATCH 120/148] arm64/cpufeature: Validate hypervisor capabilities during CPU hotplug This validates hypervisor capabilities like VMID width, IPA range for any hot plug CPU against system finalized values. KVM's view of the IPA space is used while allowing a given CPU to come up. While here, it factors out get_vmid_bits() for general use. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: James Morse Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Suggested-by: Suzuki Poulose Signed-off-by: Anshuman Khandual Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/1589248647-22925-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 18 ++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/kernel/cpufeature.c | 32 +++++++++++++++++++++++++++++ arch/arm64/kvm/reset.c | 5 +++++ 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f5c4672e498b..928814d35669 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -752,6 +752,24 @@ static inline bool cpu_has_hw_af(void) extern bool cpu_has_amu_feat(int cpu); #endif +static inline unsigned int get_vmid_bits(u64 mmfr1) +{ + int vmid_bits; + + vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_VMIDBITS_SHIFT); + if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16) + return 16; + + /* + * Return the default here even if any reserved + * value is fetched from the system register. + */ + return 8; +} + +u32 get_kvm_ipa_limit(void); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 30b0e8d6b895..a7137e144b97 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -416,7 +416,7 @@ static inline unsigned int kvm_get_vmid_bits(void) { int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; + return get_vmid_bits(reg); } /* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9b05843d67af..be8a634abdd4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2315,6 +2315,35 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } +static void verify_hyp_capabilities(void) +{ + u64 safe_mmfr1, mmfr0, mmfr1; + int parange, ipa_max; + unsigned int safe_vmid_bits, vmid_bits; + + if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST)) + return; + + safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + + /* Verify VMID bits */ + safe_vmid_bits = get_vmid_bits(safe_mmfr1); + vmid_bits = get_vmid_bits(mmfr1); + if (vmid_bits < safe_vmid_bits) { + pr_crit("CPU%d: VMID width mismatch\n", smp_processor_id()); + cpu_die_early(); + } + + /* Verify IPA range */ + parange = mmfr0 & 0x7; + ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange); + if (ipa_max < get_kvm_ipa_limit()) { + pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id()); + cpu_die_early(); + } +} /* * Run through the enabled system capabilities and enable() it on this CPU. @@ -2340,6 +2369,9 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); + + if (is_hyp_mode_available()) + verify_hyp_capabilities(); } void check_local_cpu_capabilities(void) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 102e5c4e01a0..d4eb6612bb3c 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -332,6 +332,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return ret; } +u32 get_kvm_ipa_limit(void) +{ + return kvm_ipa_limit; +} + void kvm_set_ipa_limit(void) { unsigned int ipa_max, pa_max, va_max, parange; From f73531f0257f6bac44a8c9d5c2f3a3ccaea3d1e9 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 13 May 2020 14:33:34 +0530 Subject: [PATCH 121/148] arm64/cpufeature: Drop open encodings while extracting parange Currently there are multiple instances of parange feature width mask open encodings while fetching it's value. Even the width mask value (0x7) itself is not accurate. It should be (0xf) per ID_AA64MMFR0_EL1.PARange[3:0] as in ARM ARM (0487F.a). Replace them with cpuid_feature_extract_unsigned_field() which can extract given standard feature (4 bits width i.e 0xf mask) field. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: kvmarm@lists.cs.columbia.edu Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/1589360614-1164-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 3 ++- arch/arm64/kvm/reset.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index be8a634abdd4..1c5bfe48d381 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2337,7 +2337,8 @@ static void verify_hyp_capabilities(void) } /* Verify IPA range */ - parange = mmfr0 & 0x7; + parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange); if (ipa_max < get_kvm_ipa_limit()) { pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id()); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d4eb6612bb3c..d8800ef4f42d 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -340,8 +340,11 @@ u32 get_kvm_ipa_limit(void) void kvm_set_ipa_limit(void) { unsigned int ipa_max, pa_max, va_max, parange; + u64 mmfr0; - parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); /* Clamp the IPA limit to the PA size supported by the kernel */ @@ -387,7 +390,7 @@ void kvm_set_ipa_limit(void) */ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { - u64 vtcr = VTCR_EL2_FLAGS; + u64 vtcr = VTCR_EL2_FLAGS, mmfr0; u32 parange, phys_shift; u8 lvls; @@ -403,7 +406,9 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) phys_shift = KVM_PHYS_SHIFT; } - parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); if (parange > ID_AA64MMFR0_PARANGE_MAX) parange = ID_AA64MMFR0_PARANGE_MAX; vtcr |= parange << VTCR_EL2_PS_SHIFT; From 50c8ab8d9fbf5b18d5162a797ca26568afc0af1a Mon Sep 17 00:00:00 2001 From: Tuan Phan Date: Wed, 20 May 2020 10:13:07 -0700 Subject: [PATCH 122/148] ACPI/IORT: Fix PMCG node single ID mapping handling An IORT PMCG node can have no ID mapping if its overflow interrupt is wire based therefore the code that parses the PMCG node can not assume the node will always have a single mapping present at index 0. Fix iort_get_id_mapping_index() by checking for an overflow interrupt and mapping count. Fixes: 24e516049360 ("ACPI/IORT: Add support for PMCG") Signed-off-by: Tuan Phan Reviewed-by: Hanjun Guo Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/1589994787-28637-1-git-send-email-tuanphan@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 619a3e503346..9c40709c2f4e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -388,6 +388,7 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, static int iort_get_id_mapping_index(struct acpi_iort_node *node) { struct acpi_iort_smmu_v3 *smmu; + struct acpi_iort_pmcg *pmcg; switch (node->type) { case ACPI_IORT_NODE_SMMU_V3: @@ -415,6 +416,10 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node) return smmu->id_mapping_index; case ACPI_IORT_NODE_PMCG: + pmcg = (struct acpi_iort_pmcg *)node->node_data; + if (pmcg->overflow_gsiv || node->mapping_count == 0) + return -EINVAL; + return 0; default: return -EINVAL; From e5bfb21d98b660580b8cdd8a442d12d38d655d11 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 May 2020 10:12:16 +0100 Subject: [PATCH 123/148] firmware: smccc: Add HAVE_ARM_SMCCC_DISCOVERY to identify SMCCC v1.1 and above SMCCC v1.0 lacked discoverability of version and features. To accelerate adoption of few mitigations and protect systems more rapidly from various vulnerability, PSCI v1.0 was updated to add SMCCC discovery mechanism though the PSCI firmware implementation of PSCI_FEATURES(SMCCC_VERSION) which returns success on firmware compliant to SMCCC v1.1 and above. This inturn makes SMCCC v1.1 and above dependent on ARM_PSCI_FW for backward compatibility. Let us introduce a new hidden config for the same to build more features on top of SMCCC v1.1 and above. While at it, also sort alphabetically the psci entry. Signed-off-by: Sudeep Holla Tested-by: Etienne Carriere Reviewed-by: Etienne Carriere Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200518091222.27467-2-sudeep.holla@arm.com Signed-off-by: Will Deacon --- drivers/firmware/Kconfig | 6 ++---- drivers/firmware/smccc/Kconfig | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 drivers/firmware/smccc/Kconfig diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 8007d4aa76dc..4843e94713a4 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -295,15 +295,13 @@ config TURRIS_MOX_RWTM other manufacturing data and also utilize the Entropy Bit Generator for hardware random number generation. -config HAVE_ARM_SMCCC - bool - -source "drivers/firmware/psci/Kconfig" source "drivers/firmware/broadcom/Kconfig" source "drivers/firmware/google/Kconfig" source "drivers/firmware/efi/Kconfig" source "drivers/firmware/imx/Kconfig" source "drivers/firmware/meson/Kconfig" +source "drivers/firmware/psci/Kconfig" +source "drivers/firmware/smccc/Kconfig" source "drivers/firmware/tegra/Kconfig" source "drivers/firmware/xilinx/Kconfig" diff --git a/drivers/firmware/smccc/Kconfig b/drivers/firmware/smccc/Kconfig new file mode 100644 index 000000000000..27b675d76235 --- /dev/null +++ b/drivers/firmware/smccc/Kconfig @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0-only +config HAVE_ARM_SMCCC + bool + help + Include support for the Secure Monitor Call (SMC) and Hypervisor + Call (HVC) instructions on Armv7 and above architectures. + +config HAVE_ARM_SMCCC_DISCOVERY + bool + depends on ARM_PSCI_FW + default y + help + SMCCC v1.0 lacked discoverability and hence PSCI v1.0 was updated + to add SMCCC discovery mechanism though the PSCI firmware + implementation of PSCI_FEATURES(SMCCC_VERSION) which returns + success on firmware compliant to SMCCC v1.1 and above. From 15c704ab6244ac95be54b2c05411b70501d50e8f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 May 2020 10:12:17 +0100 Subject: [PATCH 124/148] firmware: smccc: Update link to latest SMCCC specification The current link gets redirected to the revision B published in November 2016 though it actually points to the original revision A published in June 2013. Let us update the link to point to the latest version, so that it doesn't get stale anytime soon. Currently it points to v1.2 published in March 2020(i.e. DEN0028C). Signed-off-by: Sudeep Holla Tested-by: Etienne Carriere Reviewed-by: Steven Price Reviewed-by: Etienne Carriere Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200518091222.27467-3-sudeep.holla@arm.com Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 59494df0f55b..31b15db9685d 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -10,7 +10,9 @@ /* * This file provides common defines for ARM SMC Calling Convention as * specified in - * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html + * https://developer.arm.com/docs/den0028/latest + * + * This code is up-to-date with version DEN 0028 B */ #define ARM_SMCCC_STD_CALL _AC(0,U) From 0441bfe7f00acaae7c4937ba0ca48ee1de9b709f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 May 2020 10:12:18 +0100 Subject: [PATCH 125/148] firmware: smccc: Add the definition for SMCCCv1.2 version/error codes Add the definition for SMCCC v1.2 version and new error code added. While at it, also add a note that ARM DEN 0070A is deprecated and is now merged into the main SMCCC specification(ARM DEN 0028C). Signed-off-by: Sudeep Holla Tested-by: Etienne Carriere Reviewed-by: Steven Price Reviewed-by: Etienne Carriere Link: https://lore.kernel.org/r/20200518091222.27467-4-sudeep.holla@arm.com Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 31b15db9685d..c3784ba8e2a4 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -12,7 +12,7 @@ * specified in * https://developer.arm.com/docs/den0028/latest * - * This code is up-to-date with version DEN 0028 B + * This code is up-to-date with version DEN 0028 C */ #define ARM_SMCCC_STD_CALL _AC(0,U) @@ -58,6 +58,7 @@ #define ARM_SMCCC_VERSION_1_0 0x10000 #define ARM_SMCCC_VERSION_1_1 0x10001 +#define ARM_SMCCC_VERSION_1_2 0x10002 #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ @@ -316,10 +317,14 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) -/* Return codes defined in ARM DEN 0070A */ +/* + * Return codes defined in ARM DEN 0070A + * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C + */ #define SMCCC_RET_SUCCESS 0 #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2 +#define SMCCC_RET_INVALID_PARAMETER -3 /* * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. From ad5a57dfe434b02ab28852703d7ad5510998ccef Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 May 2020 10:12:19 +0100 Subject: [PATCH 126/148] firmware: smccc: Drop smccc_version enum and use ARM_SMCCC_VERSION_1_x instead Instead of maintaining 2 sets of enums/macros for tracking SMCCC version, let us drop smccc_version enum and use ARM_SMCCC_VERSION_1_x directly instead. This is in preparation to drop smccc_version here and move it separately under drivers/firmware/smccc. Signed-off-by: Sudeep Holla Tested-by: Etienne Carriere Reviewed-by: Steven Price Reviewed-by: Etienne Carriere Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200518091222.27467-5-sudeep.holla@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/paravirt.c | 2 +- drivers/firmware/psci/psci.c | 8 ++++---- include/linux/psci.h | 7 +------ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 1ef702b0be2d..295d66490584 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -120,7 +120,7 @@ static bool has_pv_steal_clock(void) struct arm_smccc_res res; /* To detect the presence of PV time support we require SMCCC 1.1+ */ - if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE) return false; arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 2937d44b5df4..6a56d7196697 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -54,12 +54,12 @@ bool psci_tos_resident_on(int cpu) struct psci_operations psci_ops = { .conduit = SMCCC_CONDUIT_NONE, - .smccc_version = SMCCC_VERSION_1_0, + .smccc_version = ARM_SMCCC_VERSION_1_0, }; enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) { - if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + if (psci_ops.smccc_version < ARM_SMCCC_VERSION_1_1) return SMCCC_CONDUIT_NONE; return psci_ops.conduit; @@ -411,8 +411,8 @@ static void __init psci_init_smccc(void) if (feature != PSCI_RET_NOT_SUPPORTED) { u32 ret; ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0); - if (ret == ARM_SMCCC_VERSION_1_1) { - psci_ops.smccc_version = SMCCC_VERSION_1_1; + if (ret >= ARM_SMCCC_VERSION_1_1) { + psci_ops.smccc_version = ret; ver = ret; } } diff --git a/include/linux/psci.h b/include/linux/psci.h index a67712b73b6c..29bd0671e5bb 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -21,11 +21,6 @@ bool psci_power_state_is_valid(u32 state); int psci_set_osi_mode(void); bool psci_has_osi_support(void); -enum smccc_version { - SMCCC_VERSION_1_0, - SMCCC_VERSION_1_1, -}; - struct psci_operations { u32 (*get_version)(void); int (*cpu_suspend)(u32 state, unsigned long entry_point); @@ -36,7 +31,7 @@ struct psci_operations { unsigned long lowest_affinity_level); int (*migrate_info_type)(void); enum arm_smccc_conduit conduit; - enum smccc_version smccc_version; + u32 smccc_version; }; extern struct psci_operations psci_ops; From f2ae97062a48b114bcf8fb2e99574d9ed2c2cd1b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 May 2020 10:12:20 +0100 Subject: [PATCH 127/148] firmware: smccc: Refactor SMCCC specific bits into separate file In order to add newer SMCCC v1.1+ functionality and to avoid cluttering PSCI firmware driver with SMCCC bits, let us move the SMCCC specific details under drivers/firmware/smccc/smccc.c We can also drop conduit and smccc_version from psci_operations structure as SMCCC was the sole user and now it maintains those. No functionality change in this patch though. Signed-off-by: Sudeep Holla Tested-by: Etienne Carriere Reviewed-by: Etienne Carriere Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200518091222.27467-6-sudeep.holla@arm.com Signed-off-by: Will Deacon --- MAINTAINERS | 9 +++++++++ drivers/firmware/Makefile | 3 ++- drivers/firmware/psci/psci.c | 20 +++++--------------- drivers/firmware/smccc/Makefile | 3 +++ drivers/firmware/smccc/smccc.c | 26 ++++++++++++++++++++++++++ include/linux/psci.h | 2 -- 6 files changed, 45 insertions(+), 18 deletions(-) create mode 100644 drivers/firmware/smccc/Makefile create mode 100644 drivers/firmware/smccc/smccc.c diff --git a/MAINTAINERS b/MAINTAINERS index 26f281d9f32a..cb993caf5cd2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15474,6 +15474,15 @@ M: Nicolas Pitre S: Odd Fixes F: drivers/net/ethernet/smsc/smc91x.* +SECURE MONITOR CALL(SMC) CALLING CONVENTION (SMCCC) +M: Mark Rutland +M: Lorenzo Pieralisi +M: Sudeep Holla +L: linux-arm-kernel@lists.infradead.org +S: Maintained +F: drivers/firmware/smccc/ +F: include/linux/arm-smccc.h + SMIA AND SMIA++ IMAGE SENSOR DRIVER M: Sakari Ailus L: linux-media@vger.kernel.org diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index e9fb838af4df..99510be9f5ed 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -23,12 +23,13 @@ obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o obj-$(CONFIG_TURRIS_MOX_RWTM) += turris-mox-rwtm.o obj-$(CONFIG_ARM_SCMI_PROTOCOL) += arm_scmi/ -obj-y += psci/ obj-y += broadcom/ obj-y += meson/ obj-$(CONFIG_GOOGLE_FIRMWARE) += google/ obj-$(CONFIG_EFI) += efi/ obj-$(CONFIG_UEFI_CPER) += efi/ obj-y += imx/ +obj-y += psci/ +obj-y += smccc/ obj-y += tegra/ obj-y += xilinx/ diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 6a56d7196697..1330a698a178 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -46,25 +46,14 @@ * require cooperation with a Trusted OS driver. */ static int resident_cpu = -1; +struct psci_operations psci_ops; +static enum arm_smccc_conduit psci_conduit = SMCCC_CONDUIT_NONE; bool psci_tos_resident_on(int cpu) { return cpu == resident_cpu; } -struct psci_operations psci_ops = { - .conduit = SMCCC_CONDUIT_NONE, - .smccc_version = ARM_SMCCC_VERSION_1_0, -}; - -enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) -{ - if (psci_ops.smccc_version < ARM_SMCCC_VERSION_1_1) - return SMCCC_CONDUIT_NONE; - - return psci_ops.conduit; -} - typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); static psci_fn *invoke_psci_fn; @@ -90,6 +79,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; static u32 psci_cpu_suspend_feature; static bool psci_system_reset2_supported; +void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); static inline bool psci_has_ext_power_state(void) { @@ -242,7 +232,7 @@ static void set_conduit(enum arm_smccc_conduit conduit) WARN(1, "Unexpected PSCI conduit %d\n", conduit); } - psci_ops.conduit = conduit; + psci_conduit = conduit; } static int get_set_conduit_method(struct device_node *np) @@ -412,7 +402,7 @@ static void __init psci_init_smccc(void) u32 ret; ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0); if (ret >= ARM_SMCCC_VERSION_1_1) { - psci_ops.smccc_version = ret; + arm_smccc_version_init(ret, psci_conduit); ver = ret; } } diff --git a/drivers/firmware/smccc/Makefile b/drivers/firmware/smccc/Makefile new file mode 100644 index 000000000000..6f369fe3f0b9 --- /dev/null +++ b/drivers/firmware/smccc/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +# +obj-$(CONFIG_HAVE_ARM_SMCCC_DISCOVERY) += smccc.o diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c new file mode 100644 index 000000000000..de92a4b9f8f6 --- /dev/null +++ b/drivers/firmware/smccc/smccc.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Arm Limited + */ + +#define pr_fmt(fmt) "smccc: " fmt + +#include +#include + +static u32 smccc_version = ARM_SMCCC_VERSION_1_0; +static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE; + +void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit) +{ + smccc_version = version; + smccc_conduit = conduit; +} + +enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) +{ + if (smccc_version < ARM_SMCCC_VERSION_1_1) + return SMCCC_CONDUIT_NONE; + + return smccc_conduit; +} diff --git a/include/linux/psci.h b/include/linux/psci.h index 29bd0671e5bb..14ad9b9ebcd6 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -30,8 +30,6 @@ struct psci_operations { int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); - enum arm_smccc_conduit conduit; - u32 smccc_version; }; extern struct psci_operations psci_ops; From a4fb17465182c9fc13104e4df04d050892055205 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 May 2020 10:12:21 +0100 Subject: [PATCH 128/148] firmware: smccc: Add function to fetch SMCCC version For backward compatibility reasons, PSCI maintains SMCCC version as SMCCC didn't provide ARM_SMCCC_VERSION_FUNC_ID until v1.1. PSCI initialises both the SMCCC version and conduit. Similar to the conduit, let us provide accessors to fetch the SMCCC version also so that other SMCCC v1.1+ features can use it. Signed-off-by: Sudeep Holla Tested-by: Etienne Carriere Reviewed-by: Steven Price Reviewed-by: Etienne Carriere Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200518091222.27467-7-sudeep.holla@arm.com Signed-off-by: Will Deacon --- drivers/firmware/smccc/smccc.c | 5 +++++ include/linux/arm-smccc.h | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c index de92a4b9f8f6..4e80921ee212 100644 --- a/drivers/firmware/smccc/smccc.c +++ b/drivers/firmware/smccc/smccc.c @@ -24,3 +24,8 @@ enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) return smccc_conduit; } + +u32 arm_smccc_get_version(void) +{ + return smccc_version; +} diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index c3784ba8e2a4..c491d210e3c3 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -100,6 +100,17 @@ enum arm_smccc_conduit { */ enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void); +/** + * arm_smccc_get_version() + * + * Returns the version to be used for SMCCCv1.1 or later. + * + * When SMCCCv1.1 or above is not present, returns SMCCCv1.0, but this + * does not imply the presence of firmware or a valid conduit. Caller + * handling SMCCCv1.0 must determine the conduit by other means. + */ +u32 arm_smccc_get_version(void); + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 From 9a964285572b5a3ea268bd744bb6837aecf09640 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 May 2020 12:38:33 +0100 Subject: [PATCH 129/148] arm64: vdso: Don't prefix sigreturn trampoline with a BTI C instruction For better or worse, GDB relies on the exact instruction sequence in the VDSO sigreturn trampoline in order to unwind from signals correctly. Commit c91db232da48 ("arm64: vdso: Convert to modern assembler annotations") unfortunately added a BTI C instruction to the start of __kernel_rt_sigreturn, which breaks this check. Thankfully, it's also not required, since the trampoline is called from a RET instruction when returning from the signal handler Remove the unnecessary BTI C instruction from __kernel_rt_sigreturn, and do the same for the 32-bit VDSO as well for good measure. Cc: Daniel Kiss Cc: Tamas Zsoldos Reviewed-by: Dave Martin Reviewed-by: Mark Brown Fixes: c91db232da48 ("arm64: vdso: Convert to modern assembler annotations") Signed-off-by: Will Deacon --- arch/arm64/include/asm/linkage.h | 6 +++--- arch/arm64/kernel/vdso/sigreturn.S | 11 +++++++++-- arch/arm64/kernel/vdso32/sigreturn.S | 16 ++++++++-------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index b5a7998a6b2a..81fefd2a1d02 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -15,9 +15,9 @@ #define BTI_J hint 36 ; /* - * When using in-kernel BTI we need to ensure that assembly functions - * have suitable annotations. Override SYM_FUNC_START to insert a BTI - * landing pad at the start of everything. + * When using in-kernel BTI we need to ensure that PCS-conformant assembly + * functions have suitable annotations. Override SYM_FUNC_START to insert + * a BTI landing pad at the start of everything. */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index 3fb13b81f780..0c921130002a 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -15,7 +15,14 @@ .text nop -SYM_FUNC_START(__kernel_rt_sigreturn) +/* + * GDB relies on being able to identify the sigreturn instruction sequence to + * unwind from signal handlers. We cannot, therefore, use SYM_FUNC_START() + * here, as it will emit a BTI C instruction and break the unwinder. Thankfully, + * this function is only ever called from a RET and so omitting the landing pad + * is perfectly fine. + */ +SYM_CODE_START(__kernel_rt_sigreturn) .cfi_startproc .cfi_signal_frame .cfi_def_cfa x29, 0 @@ -24,6 +31,6 @@ SYM_FUNC_START(__kernel_rt_sigreturn) mov x8, #__NR_rt_sigreturn svc #0 .cfi_endproc -SYM_FUNC_END(__kernel_rt_sigreturn) +SYM_CODE_END(__kernel_rt_sigreturn) emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S index 620524969696..b36d4e2267a3 100644 --- a/arch/arm64/kernel/vdso32/sigreturn.S +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -17,39 +17,39 @@ .save {r0-r15} .pad #COMPAT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_sigreturn_arm) +SYM_CODE_START(__kernel_sigreturn_arm) mov r7, #__NR_compat_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_sigreturn_arm) +SYM_CODE_END(__kernel_sigreturn_arm) .fnstart .save {r0-r15} .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_rt_sigreturn_arm) +SYM_CODE_START(__kernel_rt_sigreturn_arm) mov r7, #__NR_compat_rt_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_rt_sigreturn_arm) +SYM_CODE_END(__kernel_rt_sigreturn_arm) .thumb .fnstart .save {r0-r15} .pad #COMPAT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_sigreturn_thumb) +SYM_CODE_START(__kernel_sigreturn_thumb) mov r7, #__NR_compat_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_sigreturn_thumb) +SYM_CODE_END(__kernel_sigreturn_thumb) .fnstart .save {r0-r15} .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_rt_sigreturn_thumb) +SYM_CODE_START(__kernel_rt_sigreturn_thumb) mov r7, #__NR_compat_rt_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_rt_sigreturn_thumb) +SYM_CODE_END(__kernel_rt_sigreturn_thumb) From a4eb355a3fdad85d16e4b098e8d56bb28b812ce0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 May 2020 12:56:05 +0100 Subject: [PATCH 130/148] arm64: vdso: Fix CFI directives in sigreturn trampoline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daniel reports that the .cfi_startproc is misplaced for the sigreturn trampoline, which causes LLVM's unwinder to misbehave: | I run into this with LLVM’s unwinder. | This combination was always broken. This prompted Dave to question our use of CFI directives more generally, and I ended up going down a rabbit hole trying to figure out how this very poorly documented stuff gets used. Move the CFI directives so that the "mysterious NOP" is included in the .cfi_{start,end}proc block and add a bunch of comments so that I can save myself another headache in future. Cc: Tamas Zsoldos Reported-by: Dave Martin Reported-by: Daniel Kiss Tested-by: Daniel Kiss Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/sigreturn.S | 44 +++++++++++++++++++++++----- arch/arm64/kernel/vdso32/sigreturn.S | 3 ++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index 0c921130002a..620a3ef837b7 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -1,7 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Sigreturn trampoline for returning from a signal when the SA_RESTORER - * flag is not set. + * flag is not set. It serves primarily as a hall of shame for crappy + * unwinders and features an exciting but mysterious NOP instruction. + * + * It's also fragile as hell, so please think twice before changing anything + * in here. * * Copyright (C) 2012 ARM Limited * @@ -14,7 +18,38 @@ .text - nop +/* Ensure that the mysterious NOP can be associated with a function. */ + .cfi_startproc + +/* + * .cfi_signal_frame causes the corresponding Frame Description Entry in the + * .eh_frame section to be annotated as a signal frame. This allows DWARF + * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo(), which permits + * unwinding out of the signal trampoline without the need for the mysterious + * NOP. + */ + .cfi_signal_frame + +/* + * Tell the unwinder where to locate the frame record linking back to the + * interrupted context. We don't provide unwind info for registers other + * than the frame pointer and the link register here; in practice, this + * is sufficient for unwinding in C/C++ based runtimes and the values in + * the sigcontext may have been modified by this point anyway. Debuggers + * already have baked-in strategies for attempting to unwind out of signals. + */ + .cfi_def_cfa x29, 0 + .cfi_offset x29, 0 * 8 + .cfi_offset x30, 1 * 8 + +/* + * This mysterious NOP is required for some unwinders (e.g. libc++) that + * unconditionally subtract one from the result of _Unwind_GetIP() in order to + * identify the calling function. + * Hack borrowed from arch/powerpc/kernel/vdso64/sigtramp.S. + */ + nop // Mysterious NOP + /* * GDB relies on being able to identify the sigreturn instruction sequence to * unwind from signal handlers. We cannot, therefore, use SYM_FUNC_START() @@ -23,11 +58,6 @@ * is perfectly fine. */ SYM_CODE_START(__kernel_rt_sigreturn) - .cfi_startproc - .cfi_signal_frame - .cfi_def_cfa x29, 0 - .cfi_offset x29, 0 * 8 - .cfi_offset x30, 1 * 8 mov x8, #__NR_rt_sigreturn svc #0 .cfi_endproc diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S index b36d4e2267a3..b0091064c3d6 100644 --- a/arch/arm64/kernel/vdso32/sigreturn.S +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -3,6 +3,9 @@ * This file provides both A32 and T32 versions, in accordance with the * arm sigreturn code. * + * Please read the comments in arch/arm64/kernel/vdso/sigreturn.S to + * understand some of the craziness in here. + * * Copyright (C) 2018 ARM Limited */ From 269fd61e15d785b9e20786672765400732dde8a0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 21 May 2020 12:08:36 +0100 Subject: [PATCH 131/148] firmware: smccc: Fix missing prototype warning for arm_smccc_version_init Commit f2ae97062a48 ("firmware: smccc: Refactor SMCCC specific bits into separate file") introduced the following build warning: drivers/firmware/smccc/smccc.c:14:13: warning: no previous prototype for function 'arm_smccc_version_init' [-Wmissing-prototypes] void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit) ^~~~~~~~~~~~~~~~~~~~~~ Fix the same by adding the missing prototype in arm-smccc.h Reported-by: kbuild test robot Signed-off-by: Sudeep Holla Link: https://lore.kernel.org/r/20200521110836.57252-1-sudeep.holla@arm.com Signed-off-by: Will Deacon --- drivers/firmware/psci/psci.c | 1 - include/linux/arm-smccc.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 1330a698a178..92013ecc2d9e 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -79,7 +79,6 @@ static u32 psci_function_id[PSCI_FN_MAX]; static u32 psci_cpu_suspend_feature; static bool psci_system_reset2_supported; -void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); static inline bool psci_has_ext_power_state(void) { diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index c491d210e3c3..56d6a5c6e353 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -5,6 +5,7 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H +#include #include /* @@ -111,6 +112,8 @@ enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void); */ u32 arm_smccc_get_version(void); +void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 From 4fc92254bf86b19ac1ef81f61f4e690fccfcba4a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 19 May 2020 19:54:43 +0200 Subject: [PATCH 132/148] arm64: mm: Add asid_gen_match() helper Add a macro to check if an ASID is from the current generation, since a subsequent patch will introduce a third user for this test. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200519175502.2504091-6-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 9b26f9a88724..d702d60e64da 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -92,6 +92,9 @@ static void set_reserved_asid_bits(void) bitmap_clear(asid_map, 0, NUM_USER_ASIDS); } +#define asid_gen_match(asid) \ + (!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits)) + static void flush_context(void) { int i; @@ -220,8 +223,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) * because atomic RmWs are totally ordered for a given location. */ old_active_asid = atomic64_read(&per_cpu(active_asids, cpu)); - if (old_active_asid && - !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && + if (old_active_asid && asid_gen_match(asid) && atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu), old_active_asid, asid)) goto switch_mm_fastpath; @@ -229,7 +231,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_lock_irqsave(&cpu_asid_lock, flags); /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); - if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + if (!asid_gen_match(asid)) { asid = new_context(mm); atomic64_set(&mm->context.id, asid); } From 2a5bc6c47bc3b1bcdab5bef7e74fbb74d17dc618 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:38 +0530 Subject: [PATCH 133/148] arm64/cpufeature: Add explicit ftr_id_isar0[] for ID_ISAR0 register ID_ISAR0[31..28] bits are RES0 in ARMv8, Reserved/UNK in ARMv7. Currently these bits get exposed through generic_id_ftr32[] which is not desirable. Hence define an explicit ftr_id_isar0[] array for ID_ISAR0 register where those bits can be hidden. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 8 ++++++++ arch/arm64/kernel/cpufeature.c | 14 ++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 194684301df0..ea55fe5925c4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -766,6 +766,14 @@ #define ID_ISAR4_WITHSHIFTS_SHIFT 4 #define ID_ISAR4_UNPRIV_SHIFT 0 +#define ID_ISAR0_DIVIDE_SHIFT 24 +#define ID_ISAR0_DEBUG_SHIFT 20 +#define ID_ISAR0_COPROC_SHIFT 16 +#define ID_ISAR0_CMPBRANCH_SHIFT 12 +#define ID_ISAR0_BITFIELD_SHIFT 8 +#define ID_ISAR0_BITCOUNT_SHIFT 4 +#define ID_ISAR0_SWAP_SHIFT 0 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1c5bfe48d381..420cd68d9ba4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -370,6 +370,16 @@ static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_isar0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_COPROC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_CMPBRANCH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITFIELD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITCOUNT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_SWAP_SHIFT, 4, 0), + ARM64_FTR_END, +}; static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), @@ -451,7 +461,7 @@ static const struct arm64_ftr_bits ftr_zcr[] = { * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of * 0. Covers the following 32bit registers: - * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + * id_isar[1-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] */ static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), @@ -497,7 +507,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), /* Op1 = 0, CRn = 0, CRm = 2 */ - ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_id_isar0), ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), From 1ed1b90a0594c8c9d31e8bb8be25a2b37717dc9e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:39 +0530 Subject: [PATCH 134/148] arm64/cpufeature: Drop TraceFilt feature exposure from ID_DFR0 register ID_DFR0 based TraceFilt feature should not be exposed to guests. Hence lets drop it. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: James Morse Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 420cd68d9ba4..b3fceca408e5 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -440,7 +440,7 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_dfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + /* [31:28] TraceFilt */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), From e965bcb0625626c691677e9283cee354c5b2433a Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:40 +0530 Subject: [PATCH 135/148] arm64/cpufeature: Make doublelock a signed feature in ID_AA64DFR0 Double lock feature can have the following possible values. 0b0000 - Double lock implemented 0b1111 - Double lock not implemented But in case of a conflict the safe value should be 0b1111. Hence this must be a signed feature instead. Also change FTR_EXACT to FTR_LOWER_SAFE. While here, fix the erroneous bit width value from 28 to 4. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Suzuki K Poulose Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-4-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b3fceca408e5..ae88d2c820ee 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -343,7 +343,7 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), From 16824085a7dd426408d4403284374da90671e749 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:41 +0530 Subject: [PATCH 136/148] arm64/cpufeature: Introduce ID_PFR2 CPU register This adds basic building blocks required for ID_PFR2 CPU register which provides information about the AArch32 programmers model which must be interpreted along with ID_PFR0 and ID_PFR1 CPU registers. This is added per ARM DDI 0487F.a specification. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: James Morse Cc: Suzuki K Poulose Cc: kvmarm@lists.cs.columbia.edu Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-5-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 11 +++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/kvm/sys_regs.c | 2 +- 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b4a40535a3d8..464e828a994d 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -46,6 +46,7 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr3; u32 reg_id_pfr0; u32 reg_id_pfr1; + u32 reg_id_pfr2; u32 reg_mvfr0; u32 reg_mvfr1; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ea55fe5925c4..d0ea916b8528 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -138,6 +138,7 @@ #define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) @@ -789,6 +790,9 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_PFR2_SSBS_SHIFT 4 +#define ID_PFR2_CSV3_SHIFT 0 + #define MVFR0_FPROUND_SHIFT 28 #define MVFR0_FPSHVEC_SHIFT 24 #define MVFR0_FPSQRT_SHIFT 20 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ae88d2c820ee..b81b74d6dc20 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -439,6 +439,12 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_pfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ @@ -520,6 +526,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), + ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), @@ -726,6 +733,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); @@ -859,6 +867,8 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, info->reg_id_pfr0, boot->reg_id_pfr0); taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, info->reg_id_pfr1, boot->reg_id_pfr1); + taint |= check_update_ftr_reg(SYS_ID_PFR2_EL1, cpu, + info->reg_id_pfr2, boot->reg_id_pfr2); taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, info->reg_mvfr0, boot->reg_mvfr0); taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, @@ -986,6 +996,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); read_sysreg_case(SYS_ID_PFR1_EL1); + read_sysreg_case(SYS_ID_PFR2_EL1); read_sysreg_case(SYS_ID_DFR0_EL1); read_sysreg_case(SYS_ID_MMFR0_EL1); read_sysreg_case(SYS_ID_MMFR1_EL1); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 86136075ae41..cb79b083f97f 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -375,6 +375,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); info->reg_mvfr0 = read_cpuid(MVFR0_EL1); info->reg_mvfr1 = read_cpuid(MVFR1_EL1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 51db934702b6..b784b156edb3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1456,7 +1456,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(MVFR1_EL1), ID_SANITISED(MVFR2_EL1), ID_UNALLOCATED(3,3), - ID_UNALLOCATED(3,4), + ID_SANITISED(ID_PFR2_EL1), ID_UNALLOCATED(3,5), ID_UNALLOCATED(3,6), ID_UNALLOCATED(3,7), From dd35ec07045753adcc86f89d485e589d860e298f Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:42 +0530 Subject: [PATCH 137/148] arm64/cpufeature: Introduce ID_DFR1 CPU register This adds basic building blocks required for ID_DFR1 CPU register which provides top level information about the debug system in AArch32 state. We hide the register from KVM guests, as we don't emulate the 'MTPMU' feature. This is added per ARM DDI 0487F.a specification. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: James Morse Cc: Suzuki K Poulose Cc: kvmarm@lists.cs.columbia.edu Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Will Deacon Reviewed-by : Suzuki K Poulose Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1589881254-10082-6-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/cpufeature.c | 10 ++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/kvm/sys_regs.c | 2 +- 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 464e828a994d..d9a78bdec409 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -33,6 +33,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64zfr0; u32 reg_id_dfr0; + u32 reg_id_dfr1; u32 reg_id_isar0; u32 reg_id_isar1; u32 reg_id_isar2; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d0ea916b8528..c1c97e08a799 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -140,6 +140,7 @@ #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) #define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) @@ -767,6 +768,8 @@ #define ID_ISAR4_WITHSHIFTS_SHIFT 4 #define ID_ISAR4_UNPRIV_SHIFT 0 +#define ID_DFR1_MTPMU_SHIFT 0 + #define ID_ISAR0_DIVIDE_SHIFT 24 #define ID_ISAR0_DEBUG_SHIFT 20 #define ID_ISAR0_COPROC_SHIFT 16 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b81b74d6dc20..7a7ddbdc9c55 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -457,6 +457,11 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_dfr1[] = { + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_MTPMU_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */ @@ -527,6 +532,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), + ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1), /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), @@ -720,6 +726,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); @@ -835,6 +842,8 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); + taint |= check_update_ftr_reg(SYS_ID_DFR1_EL1, cpu, + info->reg_id_dfr1, boot->reg_id_dfr1); taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, info->reg_id_isar0, boot->reg_id_isar0); taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, @@ -998,6 +1007,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_PFR1_EL1); read_sysreg_case(SYS_ID_PFR2_EL1); read_sysreg_case(SYS_ID_DFR0_EL1); + read_sysreg_case(SYS_ID_DFR1_EL1); read_sysreg_case(SYS_ID_MMFR0_EL1); read_sysreg_case(SYS_ID_MMFR1_EL1); read_sysreg_case(SYS_ID_MMFR2_EL1); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index cb79b083f97f..50a281703d9d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -362,6 +362,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) /* Update the 32bit ID registers only if AArch32 is implemented */ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); + info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1); info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b784b156edb3..0723cfbff7e9 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1457,7 +1457,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(MVFR2_EL1), ID_UNALLOCATED(3,3), ID_SANITISED(ID_PFR2_EL1), - ID_UNALLOCATED(3,5), + ID_HIDDEN(ID_DFR1_EL1), ID_UNALLOCATED(3,6), ID_UNALLOCATED(3,7), From 152accf8476f0447de57d9ffaf0c5ab578ed3d40 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:43 +0530 Subject: [PATCH 138/148] arm64/cpufeature: Introduce ID_MMFR5 CPU register This adds basic building blocks required for ID_MMFR5 CPU register which provides information about the implemented memory model and memory management support in AArch32 state. This is added per ARM DDI 0487F.a specification. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: James Morse Cc: Suzuki K Poulose Cc: kvmarm@lists.cs.columbia.edu Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-7-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/cpufeature.c | 10 ++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/kvm/sys_regs.c | 2 +- 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d9a78bdec409..e1f5ef437671 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -45,6 +45,7 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr1; u32 reg_id_mmfr2; u32 reg_id_mmfr3; + u32 reg_id_mmfr5; u32 reg_id_pfr0; u32 reg_id_pfr1; u32 reg_id_pfr2; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1c97e08a799..b7f549d09c1c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -147,6 +147,7 @@ #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) #define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) #define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) #define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) @@ -793,6 +794,8 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_MMFR5_ETS_SHIFT 0 + #define ID_PFR2_SSBS_SHIFT 4 #define ID_PFR2_CSV3_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7a7ddbdc9c55..a1cafa8cc0ad 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -408,6 +408,11 @@ static const struct arm64_ftr_bits ftr_id_isar4[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_mmfr5[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_ETS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_isar6[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0), @@ -533,6 +538,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1), + ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), @@ -738,6 +744,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); @@ -872,6 +879,8 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, info->reg_id_mmfr2, boot->reg_id_mmfr2); taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu, + info->reg_id_mmfr5, boot->reg_id_mmfr5); taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, info->reg_id_pfr0, boot->reg_id_pfr0); taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, @@ -1012,6 +1021,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_MMFR1_EL1); read_sysreg_case(SYS_ID_MMFR2_EL1); read_sysreg_case(SYS_ID_MMFR3_EL1); + read_sysreg_case(SYS_ID_MMFR5_EL1); read_sysreg_case(SYS_ID_ISAR0_EL1); read_sysreg_case(SYS_ID_ISAR1_EL1); read_sysreg_case(SYS_ID_ISAR2_EL1); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 50a281703d9d..54579bf08f74 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -374,6 +374,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0723cfbff7e9..7d7a39b01135 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1458,7 +1458,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(3,3), ID_SANITISED(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - ID_UNALLOCATED(3,6), + ID_SANITISED(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ From 0ae43a99fe91399d9e5b2d0756e16e97b21cc187 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:44 +0530 Subject: [PATCH 139/148] arm64/cpufeature: Add remaining feature bits in ID_PFR0 register Enable DIT and CSV2 feature bits in ID_PFR0 register as per ARM DDI 0487F.a specification. Except RAS and AMU, all other feature bits are now enabled. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-8-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/cpufeature.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b7f549d09c1c..02b1246e7dbf 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -796,6 +796,9 @@ #define ID_MMFR5_ETS_SHIFT 0 +#define ID_PFR0_DIT_SHIFT 24 +#define ID_PFR0_CSV2_SHIFT 16 + #define ID_PFR2_SSBS_SHIFT 4 #define ID_PFR2_CSV3_SHIFT 0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a1cafa8cc0ad..976cb87b59be 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -425,6 +425,8 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = { }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */ From fcd6535322cccf21830031f389c302346f767c47 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:45 +0530 Subject: [PATCH 140/148] arm64/cpufeature: Add remaining feature bits in ID_MMFR4 register Enable all remaining feature bits like EVT, CCIDX, LSM, HPDS, CnP, XNX, SpecSEI in ID_MMFR4 register per ARM DDI 0487F.a. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-9-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 8 ++++++++ arch/arm64/kernel/cpufeature.c | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 02b1246e7dbf..0a0cbb3add89 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -794,6 +794,14 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_MMFR4_EVT_SHIFT 28 +#define ID_MMFR4_CCIDX_SHIFT 24 +#define ID_MMFR4_LSM_SHIFT 20 +#define ID_MMFR4_HPDS_SHIFT 16 +#define ID_MMFR4_CNP_SHIFT 12 +#define ID_MMFR4_XNX_SHIFT 8 +#define ID_MMFR4_SPECSEI_SHIFT 0 + #define ID_MMFR5_ETS_SHIFT 0 #define ID_PFR0_DIT_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 976cb87b59be..353912a5699a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -392,7 +392,20 @@ static const struct arm64_ftr_bits ftr_id_isar5[] = { }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CCIDX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */ + /* + * SpecSEI = 1 indicates that the PE might generate an SError on an + * external abort on speculative read. It is safe to assume that an + * SError might be generated than it will not be. Hence it has been + * classified as FTR_HIGHER_SAFE. + */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_SPECSEI_SHIFT, 4, 0), ARM64_FTR_END, }; From 7cd51a5a84d115cd49c43e90b083ca60873874e5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:46 +0530 Subject: [PATCH 141/148] arm64/cpufeature: Add remaining feature bits in ID_AA64ISAR0 register Enable TLB features bit in ID_AA64ISAR0 register as per ARM DDI 0487F.a specification. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-10-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0a0cbb3add89..ea075cc08c8f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -601,6 +601,7 @@ /* id_aa64isar0 */ #define ID_AA64ISAR0_RNDR_SHIFT 60 +#define ID_AA64ISAR0_TLB_SHIFT 56 #define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 353912a5699a..41f6e9b26d18 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -179,6 +179,7 @@ static bool __system_matches_cap(unsigned int n); */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), From 011e5f5bf529f8ec2988ef7667d1a52f83273c36 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:47 +0530 Subject: [PATCH 142/148] arm64/cpufeature: Add remaining feature bits in ID_AA64PFR0 register Enable MPAM and SEL2 features bits in ID_AA64PFR0 register as per ARM DDI 0487F.a specification. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1589881254-10082-11-git-send-email-anshuman.khandual@arm.com [will: Make SEL2 a NONSTRICT feature per Suzuki] Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ea075cc08c8f..638f6108860f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -645,6 +645,8 @@ #define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_AMU_SHIFT 44 +#define ID_AA64PFR0_MPAM_SHIFT 40 +#define ID_AA64PFR0_SEL2_SHIFT 36 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_GIC_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 41f6e9b26d18..68744871a65d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -222,6 +222,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), From 14e270fa5c4cde8f6cc0240a2ab007b58a367f16 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:48 +0530 Subject: [PATCH 143/148] arm64/cpufeature: Add remaining feature bits in ID_AA64PFR1 register Enable the following features bits in ID_AA64PFR1 register as per ARM DDI 0487F.a specification. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-12-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 638f6108860f..fa9d02ca4b25 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -670,7 +670,11 @@ #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 +#define ID_AA64PFR1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_MTE_SHIFT 8 #define ID_AA64PFR1_SSBS_SHIFT 4 +#define ID_AA64PFR1_BT_SHIFT 0 #define ID_AA64PFR1_SSBS_PSTATE_NI 0 #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 68744871a65d..02f8e105cd68 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -238,6 +238,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_END, }; From 858b8a8039d001d0bae23ceb7227b322b451fb07 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 19 May 2020 15:10:54 +0530 Subject: [PATCH 144/148] arm64/cpuinfo: Add ID_MMFR4_EL1 into the cpuinfo_arm64 context ID_MMFR4_EL1 has been missing in the CPU context (i.e cpuinfo_arm64). This just adds the register along with other required changes. Cc: Catalin Marinas Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Anshuman Khandual Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1589881254-10082-18-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/kernel/cpufeature.c | 4 ++++ arch/arm64/kernel/cpuinfo.c | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index e1f5ef437671..7faae6ff3ab4 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -45,6 +45,7 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr1; u32 reg_id_mmfr2; u32 reg_id_mmfr3; + u32 reg_id_mmfr4; u32 reg_id_mmfr5; u32 reg_id_pfr0; u32 reg_id_pfr1; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 02f8e105cd68..ada9f6f9b0f6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -764,6 +764,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); @@ -899,6 +900,8 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, info->reg_id_mmfr2, boot->reg_id_mmfr2); taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_MMFR4_EL1, cpu, + info->reg_id_mmfr4, boot->reg_id_mmfr4); taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu, info->reg_id_mmfr5, boot->reg_id_mmfr5); taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, @@ -1041,6 +1044,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_MMFR1_EL1); read_sysreg_case(SYS_ID_MMFR2_EL1); read_sysreg_case(SYS_ID_MMFR3_EL1); + read_sysreg_case(SYS_ID_MMFR4_EL1); read_sysreg_case(SYS_ID_MMFR5_EL1); read_sysreg_case(SYS_ID_ISAR0_EL1); read_sysreg_case(SYS_ID_ISAR1_EL1); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 54579bf08f74..465ef72f061a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -374,6 +374,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1); info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); From 09cda9a71350e61d8803058470697b95f3d3b4cb Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 9 May 2020 17:34:30 +0800 Subject: [PATCH 145/148] ACPI/IORT: Remove the unused __get_pci_rid() Since commit bc8648d49a95 ("ACPI/IORT: Handle PCI aliases properly for IOMMUs"), __get_pci_rid() has become actually unused and can be removed. Signed-off-by: Zenghui Yu Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20200509093430.1983-1-yuzenghui@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 9c40709c2f4e..28a6b387e80e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -772,15 +772,6 @@ void acpi_configure_pmsi_domain(struct device *dev) dev_set_msi_domain(dev, msi_domain); } -static int __maybe_unused __get_pci_rid(struct pci_dev *pdev, u16 alias, - void *data) -{ - u32 *rid = data; - - *rid = alias; - return 0; -} - #ifdef CONFIG_IOMMU_API static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) { From 3577dd37c703e0d599e8c244917b8e369f38a45a Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 27 May 2020 15:34:36 +0530 Subject: [PATCH 146/148] arm64/cpufeature: Add get_arm64_ftr_reg_nowarn() There is no way to proceed when requested register could not be searched in arm64_ftr_reg[]. Requesting for a non present register would be an error as well. Hence lets just WARN_ON() when search fails in get_arm64_ftr_reg() rather than checking for return value and doing a BUG_ON() instead in some individual callers. But there are also caller instances that dont error out when register search fails. Add a new helper get_arm64_ftr_reg_nowarn() for such cases. Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Suzuki K Poulose Cc: Mark Brown Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1590573876-19120-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 44 ++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ada9f6f9b0f6..7437b8c19528 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -595,16 +595,16 @@ static int search_cmp_ftr_reg(const void *id, const void *regp) } /* - * get_arm64_ftr_reg - Lookup a feature register entry using its - * sys_reg() encoding. With the array arm64_ftr_regs sorted in the - * ascending order of sys_id , we use binary search to find a matching + * get_arm64_ftr_reg_nowarn - Looks up a feature register entry using + * its sys_reg() encoding. With the array arm64_ftr_regs sorted in the + * ascending order of sys_id, we use binary search to find a matching * entry. * * returns - Upon success, matching ftr_reg entry for id. * - NULL on failure. It is upto the caller to decide * the impact of a failure. */ -static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id) { const struct __ftr_reg_entry *ret; @@ -618,6 +618,27 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) return NULL; } +/* + * get_arm64_ftr_reg - Looks up a feature register entry using + * its sys_reg() encoding. This calls get_arm64_ftr_reg_nowarn(). + * + * returns - Upon success, matching ftr_reg entry for id. + * - NULL on failure but with an WARN_ON(). + */ +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +{ + struct arm64_ftr_reg *reg; + + reg = get_arm64_ftr_reg_nowarn(sys_id); + + /* + * Requesting a non-existent register search is an error. Warn + * and let the caller handle it. + */ + WARN_ON(!reg); + return reg; +} + static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) { @@ -679,7 +700,8 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); - BUG_ON(!reg); + if (!reg) + return; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); @@ -813,7 +835,9 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); - BUG_ON(!regp); + if (!regp) + return 0; + update_cpu_ftr_reg(regp, val); if ((boot & regp->strict_mask) == (val & regp->strict_mask)) return 0; @@ -827,7 +851,7 @@ static void relax_cpu_ftr_reg(u32 sys_id, int field) const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); - if (WARN_ON(!regp)) + if (!regp) return; for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) { @@ -1020,8 +1044,8 @@ u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); - /* We shouldn't get a request for an unsupported register */ - BUG_ON(!regp); + if (!regp) + return 0; return regp->sys_val; } @@ -2626,7 +2650,7 @@ static int emulate_sys_reg(u32 id, u64 *valp) if (sys_reg_CRm(id) == 0) return emulate_id_reg(id, valp); - regp = get_arm64_ftr_reg(id); + regp = get_arm64_ftr_reg_nowarn(id); if (regp) *valp = arm64_ftr_reg_user_value(regp); else From b130a8f70cbbf907c399799bd1073a78763ca0e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 28 May 2020 14:12:58 +0100 Subject: [PATCH 147/148] KVM: arm64: Check advertised Stage-2 page size capability With ARMv8.5-GTG, the hardware (or more likely a hypervisor) can advertise the supported Stage-2 page sizes. Let's check this at boot time. Reviewed-by: Suzuki K Poulose Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/cpufeature.c | 18 +++++++++++++++ arch/arm64/kvm/reset.c | 37 +++++++++++++++++++++++++++++-- virt/kvm/arm/arm.c | 4 +--- 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32c8a675e5a4..7dd8fefa6aec 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -670,7 +670,7 @@ static inline int kvm_arm_have_ssbd(void) void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); -void kvm_set_ipa_limit(void); +int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fa9d02ca4b25..efe368ee4996 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -703,6 +703,9 @@ #define ID_AA64ZFR0_SVEVER_SVE2 0x1 /* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 +#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 +#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 #define ID_AA64MMFR0_TGRAN16_SHIFT 20 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7437b8c19528..b3202a99e559 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -267,6 +267,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + /* + * Page size not being supported at Stage-2 is not fatal. You + * just give up KVM if PAGE_SIZE isn't supported there. Go fix + * your favourite nesting hypervisor. + * + * There is a small corner case where the hypervisor explicitly + * advertises a given granule size at Stage-2 (value 2) on some + * vCPUs, and uses the fallback to Stage-1 (value 0) for other + * vCPUs. Although this is not forbidden by the architecture, it + * indicates that the hypervisor is being silly (or buggy). + * + * We make no effort to cope with this and pretend that if these + * fields are inconsistent across vCPUs, then it isn't worth + * trying to bring KVM up. + */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1), /* * We already refuse to boot CPUs that don't support our configured * page size, so we can only detect mismatches for a page size other diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d8800ef4f42d..70cd7bcca433 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -337,14 +337,45 @@ u32 get_kvm_ipa_limit(void) return kvm_ipa_limit; } -void kvm_set_ipa_limit(void) +int kvm_set_ipa_limit(void) { - unsigned int ipa_max, pa_max, va_max, parange; + unsigned int ipa_max, pa_max, va_max, parange, tgran_2; u64 mmfr0; mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_PARANGE_SHIFT); + + /* + * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at + * Stage-2. If not, things will stop very quickly. + */ + switch (PAGE_SIZE) { + default: + case SZ_4K: + tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT; + break; + case SZ_16K: + tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT; + break; + case SZ_64K: + tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT; + break; + } + + switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { + default: + case 1: + kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); + return -EINVAL; + case 0: + kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); + break; + case 2: + kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); + break; + } + pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); /* Clamp the IPA limit to the PA size supported by the kernel */ @@ -378,6 +409,8 @@ void kvm_set_ipa_limit(void) "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); kvm_ipa_limit = ipa_max; kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); + + return 0; } /* diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 48d0ec44ad77..53b3ba9173ba 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1387,9 +1387,7 @@ static inline void hyp_cpu_pm_exit(void) static int init_common_resources(void) { - kvm_set_ipa_limit(); - - return 0; + return kvm_set_ipa_limit(); } static int init_subsystems(void) From fe677be989146b8a8c0f26fe626c6567c4cd3837 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 28 May 2020 14:12:59 +0100 Subject: [PATCH 148/148] KVM: arm64: Move __load_guest_stage2 to kvm_mmu.h Having __load_guest_stage2 in kvm_hyp.h is quickly going to trigger a circular include problem. In order to avoid this, let's move it to kvm_mmu.h, where it will be a better fit anyway. In the process, drop the __hyp_text annotation, which doesn't help as the function is marked as __always_inline. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_hyp.h | 18 ------------------ arch/arm64/include/asm/kvm_mmu.h | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 238d2e049694..dcb63bf94105 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #define __hyp_text __section(.hyp.text) notrace @@ -88,22 +87,5 @@ void deactivate_traps_vhe_put(void); u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); void __noreturn __hyp_do_panic(unsigned long, ...); -/* - * Must be called from hyp code running at EL2 with an updated VTTBR - * and interrupts disabled. - */ -static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) -{ - write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL1/EL0 translation regime used by - * the guest. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); -} - #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 30b0e8d6b895..1abe58bbbf13 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -604,5 +604,22 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } +/* + * Must be called from hyp code running at EL2 with an updated VTTBR + * and interrupts disabled. + */ +static __always_inline void __load_guest_stage2(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vtcr, vtcr_el2); + write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL1/EL0 translation regime used by + * the guest. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */