2019-06-03 12:44:50 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-03-05 18:49:33 +07:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
#ifndef __ASM_MODULE_H
|
|
|
|
#define __ASM_MODULE_H
|
|
|
|
|
|
|
|
#include <asm-generic/module.h>
|
|
|
|
|
|
|
|
#define MODULE_ARCH_VERMAGIC "aarch64"
|
|
|
|
|
2015-11-24 18:37:35 +07:00
|
|
|
#ifdef CONFIG_ARM64_MODULE_PLTS
|
arm64: module: split core and init PLT sections
The arm64 module PLT code allocates all PLT entries in a single core
section, since the overhead of having a separate init PLT section is
not justified by the small number of PLT entries usually required for
init code.
However, the core and init module regions are allocated independently,
and there is a corner case where the core region may be allocated from
the VMALLOC region if the dedicated module region is exhausted, but the
init region, being much smaller, can still be allocated from the module
region. This leads to relocation failures if the distance between those
regions exceeds 128 MB. (In fact, this corner case is highly unlikely to
occur on arm64, but the issue has been observed on ARM, whose module
region is much smaller).
So split the core and init PLT regions, and name the latter ".init.plt"
so it gets allocated along with (and sufficiently close to) the .init
sections that it serves. Also, given that init PLT entries may need to
be emitted for branches that target the core module, modify the logic
that disregards defined symbols to only disregard symbols that are
defined in the same section as the relocated branch instruction.
Since there may now be two PLT entries associated with each entry in
the symbol table, we can no longer hijack the symbol::st_size fields
to record the addresses of PLT entries as we emit them for zero-addend
relocations. So instead, perform an explicit comparison to check for
duplicate entries.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
|
|
|
struct mod_plt_sec {
|
2018-11-06 01:53:23 +07:00
|
|
|
int plt_shndx;
|
2015-11-24 18:37:35 +07:00
|
|
|
int plt_num_entries;
|
|
|
|
int plt_max_entries;
|
|
|
|
};
|
arm64: module: split core and init PLT sections
The arm64 module PLT code allocates all PLT entries in a single core
section, since the overhead of having a separate init PLT section is
not justified by the small number of PLT entries usually required for
init code.
However, the core and init module regions are allocated independently,
and there is a corner case where the core region may be allocated from
the VMALLOC region if the dedicated module region is exhausted, but the
init region, being much smaller, can still be allocated from the module
region. This leads to relocation failures if the distance between those
regions exceeds 128 MB. (In fact, this corner case is highly unlikely to
occur on arm64, but the issue has been observed on ARM, whose module
region is much smaller).
So split the core and init PLT regions, and name the latter ".init.plt"
so it gets allocated along with (and sufficiently close to) the .init
sections that it serves. Also, given that init PLT entries may need to
be emitted for branches that target the core module, modify the logic
that disregards defined symbols to only disregard symbols that are
defined in the same section as the relocated branch instruction.
Since there may now be two PLT entries associated with each entry in
the symbol table, we can no longer hijack the symbol::st_size fields
to record the addresses of PLT entries as we emit them for zero-addend
relocations. So instead, perform an explicit comparison to check for
duplicate entries.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
|
|
|
|
|
|
|
struct mod_arch_specific {
|
|
|
|
struct mod_plt_sec core;
|
|
|
|
struct mod_plt_sec init;
|
2017-06-07 00:00:22 +07:00
|
|
|
|
|
|
|
/* for CONFIG_DYNAMIC_FTRACE */
|
arm64: implement ftrace with regs
This patch implements FTRACE_WITH_REGS for arm64, which allows a traced
function's arguments (and some other registers) to be captured into a
struct pt_regs, allowing these to be inspected and/or modified. This is
a building block for live-patching, where a function's arguments may be
forwarded to another function. This is also necessary to enable ftrace
and in-kernel pointer authentication at the same time, as it allows the
LR value to be captured and adjusted prior to signing.
Using GCC's -fpatchable-function-entry=N option, we can have the
compiler insert a configurable number of NOPs between the function entry
point and the usual prologue. This also ensures functions are AAPCS
compliant (e.g. disabling inter-procedural register allocation).
For example, with -fpatchable-function-entry=2, GCC 8.1.0 compiles the
following:
| unsigned long bar(void);
|
| unsigned long foo(void)
| {
| return bar() + 1;
| }
... to:
| <foo>:
| nop
| nop
| stp x29, x30, [sp, #-16]!
| mov x29, sp
| bl 0 <bar>
| add x0, x0, #0x1
| ldp x29, x30, [sp], #16
| ret
This patch builds the kernel with -fpatchable-function-entry=2,
prefixing each function with two NOPs. To trace a function, we replace
these NOPs with a sequence that saves the LR into a GPR, then calls an
ftrace entry assembly function which saves this and other relevant
registers:
| mov x9, x30
| bl <ftrace-entry>
Since patchable functions are AAPCS compliant (and the kernel does not
use x18 as a platform register), x9-x18 can be safely clobbered in the
patched sequence and the ftrace entry code.
There are now two ftrace entry functions, ftrace_regs_entry (which saves
all GPRs), and ftrace_entry (which saves the bare minimum). A PLT is
allocated for each within modules.
Signed-off-by: Torsten Duwe <duwe@suse.de>
[Mark: rework asm, comments, PLTs, initialization, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Julien Thierry <jthierry@redhat.com>
Cc: Will Deacon <will@kernel.org>
2019-02-08 22:10:19 +07:00
|
|
|
struct plt_entry *ftrace_trampolines;
|
arm64: module: split core and init PLT sections
The arm64 module PLT code allocates all PLT entries in a single core
section, since the overhead of having a separate init PLT section is
not justified by the small number of PLT entries usually required for
init code.
However, the core and init module regions are allocated independently,
and there is a corner case where the core region may be allocated from
the VMALLOC region if the dedicated module region is exhausted, but the
init region, being much smaller, can still be allocated from the module
region. This leads to relocation failures if the distance between those
regions exceeds 128 MB. (In fact, this corner case is highly unlikely to
occur on arm64, but the issue has been observed on ARM, whose module
region is much smaller).
So split the core and init PLT regions, and name the latter ".init.plt"
so it gets allocated along with (and sufficiently close to) the .init
sections that it serves. Also, given that init PLT entries may need to
be emitted for branches that target the core module, modify the logic
that disregards defined symbols to only disregard symbols that are
defined in the same section as the relocated branch instruction.
Since there may now be two PLT entries associated with each entry in
the symbol table, we can no longer hijack the symbol::st_size fields
to record the addresses of PLT entries as we emit them for zero-addend
relocations. So instead, perform an explicit comparison to check for
duplicate entries.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
|
|
|
};
|
2015-11-24 18:37:35 +07:00
|
|
|
#endif
|
|
|
|
|
2018-11-06 01:53:23 +07:00
|
|
|
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
|
|
|
|
void *loc, const Elf64_Rela *rela,
|
2015-11-24 18:37:35 +07:00
|
|
|
Elf64_Sym *sym);
|
|
|
|
|
2018-11-06 01:53:23 +07:00
|
|
|
u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
|
|
|
|
void *loc, u64 val);
|
arm64/kernel: don't ban ADRP to work around Cortex-A53 erratum #843419
Working around Cortex-A53 erratum #843419 involves special handling of
ADRP instructions that end up in the last two instruction slots of a
4k page, or whose output register gets overwritten without having been
read. (Note that the latter instruction sequence is never emitted by
a properly functioning compiler, which is why it is disregarded by the
handling of the same erratum in the bfd.ld linker which we rely on for
the core kernel)
Normally, this gets taken care of by the linker, which can spot such
sequences at final link time, and insert a veneer if the ADRP ends up
at a vulnerable offset. However, linux kernel modules are partially
linked ELF objects, and so there is no 'final link time' other than the
runtime loading of the module, at which time all the static relocations
are resolved.
For this reason, we have implemented the #843419 workaround for modules
by avoiding ADRP instructions altogether, by using the large C model,
and by passing -mpc-relative-literal-loads to recent versions of GCC
that may emit adrp/ldr pairs to perform literal loads. However, this
workaround forces us to keep literal data mixed with the instructions
in the executable .text segment, and literal data may inadvertently
turn into an exploitable speculative gadget depending on the relative
offsets of arbitrary symbols.
So let's reimplement this workaround in a way that allows us to switch
back to the small C model, and to drop the -mpc-relative-literal-loads
GCC switch, by patching affected ADRP instructions at runtime:
- ADRP instructions that do not appear at 4k relative offset 0xff8 or
0xffc are ignored
- ADRP instructions that are within 1 MB of their target symbol are
converted into ADR instructions
- remaining ADRP instructions are redirected via a veneer that performs
the load using an unaffected movn/movk sequence.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: tidied up ADRP -> ADR instruction patching.]
[will: use ULL suffix for 64-bit immediate]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-03-07 00:15:33 +07:00
|
|
|
|
arm64: add support for kernel ASLR
This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.
If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-26 20:12:01 +07:00
|
|
|
#ifdef CONFIG_RANDOMIZE_BASE
|
|
|
|
extern u64 module_alloc_base;
|
|
|
|
#else
|
|
|
|
#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
|
|
|
|
#endif
|
|
|
|
|
2017-11-21 00:41:29 +07:00
|
|
|
struct plt_entry {
|
|
|
|
/*
|
|
|
|
* A program that conforms to the AArch64 Procedure Call Standard
|
|
|
|
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
|
|
|
|
* IP1 (x17) may be inserted at any branch instruction that is
|
|
|
|
* exposed to a relocation that supports long branches. Since that
|
|
|
|
* is exactly what we are dealing with here, we are free to use x16
|
|
|
|
* as a scratch register in the PLT veneers.
|
|
|
|
*/
|
2018-11-22 15:46:46 +07:00
|
|
|
__le32 adrp; /* adrp x16, .... */
|
|
|
|
__le32 add; /* add x16, x16, #0x.... */
|
2017-11-21 00:41:29 +07:00
|
|
|
__le32 br; /* br x16 */
|
|
|
|
};
|
|
|
|
|
2018-11-22 15:46:46 +07:00
|
|
|
static inline bool is_forbidden_offset_for_adrp(void *place)
|
2017-11-21 00:41:29 +07:00
|
|
|
{
|
2018-11-22 15:46:46 +07:00
|
|
|
return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
|
|
|
|
cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
|
|
|
|
((u64)place & 0xfff) >= 0xff8;
|
2017-11-21 00:41:29 +07:00
|
|
|
}
|
|
|
|
|
2018-11-22 15:46:46 +07:00
|
|
|
struct plt_entry get_plt_entry(u64 dst, void *pc);
|
|
|
|
bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
|
2017-11-21 00:41:29 +07:00
|
|
|
|
2019-04-08 02:06:16 +07:00
|
|
|
static inline bool plt_entry_is_initialized(const struct plt_entry *e)
|
|
|
|
{
|
|
|
|
return e->adrp || e->add || e->br;
|
|
|
|
}
|
|
|
|
|
2012-03-05 18:49:33 +07:00
|
|
|
#endif /* __ASM_MODULE_H */
|