linux_dsm_epyc7002/arch/arm64/kernel/module-plts.c

198 lines
5.9 KiB
C
Raw Normal View History

/*
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
* Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
static bool in_init(const struct module *mod, void *loc)
{
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
}
u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym)
{
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
&mod->arch.init;
struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
int i = pltsec->plt_num_entries;
u64 val = sym->st_value + rela->r_addend;
plt[i] = get_plt_entry(val);
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
/*
* Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
return (u64)&plt[i - 1];
pltsec->plt_num_entries++;
BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
return (u64)&plt[i];
}
#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
static int cmp_rela(const void *a, const void *b)
{
const Elf64_Rela *x = a, *y = b;
int i;
/* sort by type, symbol index and addend */
i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
if (i == 0)
i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
if (i == 0)
i = cmp_3way(x->r_addend, y->r_addend);
return i;
}
static bool duplicate_rel(const Elf64_Rela *rela, int num)
{
/*
* Entries are sorted by type, symbol index and addend. That means
* that, if a duplicate entry exists, it must be in the preceding
* slot.
*/
return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
}
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
Elf64_Word dstidx)
{
unsigned int ret = 0;
Elf64_Sym *s;
int i;
for (i = 0; i < num; i++) {
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
/*
* We only have to consider branch targets that resolve
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
* to symbols that are defined in a different section.
* This is not simply a heuristic, it is a fundamental
* limitation, since there is no guaranteed way to emit
* PLT entries sufficiently close to the branch if the
* section size exceeds the range of a branch
* instruction. So ignore relocations against defined
* symbols if they live in the same section as the
* relocation target.
*/
s = syms + ELF64_R_SYM(rela[i].r_info);
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
if (s->st_shndx == dstidx)
break;
/*
* Jump relocations with non-zero addends against
* undefined symbols are supported by the ELF spec, but
* do not occur in practice (e.g., 'jump n bytes past
* the entry point of undefined function symbol f').
* So we need to support them, but there is no need to
* take them into consideration when trying to optimize
* this code. So let's only check for duplicates when
* the addend is zero: this allows us to record the PLT
* entry address in the symbol table itself, rather than
* having to search the list for duplicates each time we
* emit one.
*/
if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
ret++;
break;
}
}
return ret;
}
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
Elf_Shdr *tramp = NULL;
int i;
/*
* Find the empty .plt section so we can expand it to store the PLT
* entries. Record the symtab address as well.
*/
for (i = 0; i < ehdr->e_shnum; i++) {
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
mod->arch.core.plt = sechdrs + i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
mod->arch.init.plt = sechdrs + i;
else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
if (!mod->arch.core.plt || !mod->arch.init.plt) {
pr_err("%s: module PLT section(s) missing\n", mod->name);
return -ENOEXEC;
}
if (!syms) {
pr_err("%s: module symtab section missing\n", mod->name);
return -ENOEXEC;
}
for (i = 0; i < ehdr->e_shnum; i++) {
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
if (sechdrs[i].sh_type != SHT_RELA)
continue;
/* ignore relocations that operate on non-exec sections */
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
/* sort by type, symbol index and addend */
sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
core_plts += count_plts(syms, rels, numrels,
sechdrs[i].sh_info);
else
init_plts += count_plts(syms, rels, numrels,
sechdrs[i].sh_info);
}
arm64: module: split core and init PLT sections The arm64 module PLT code allocates all PLT entries in a single core section, since the overhead of having a separate init PLT section is not justified by the small number of PLT entries usually required for init code. However, the core and init module regions are allocated independently, and there is a corner case where the core region may be allocated from the VMALLOC region if the dedicated module region is exhausted, but the init region, being much smaller, can still be allocated from the module region. This leads to relocation failures if the distance between those regions exceeds 128 MB. (In fact, this corner case is highly unlikely to occur on arm64, but the issue has been observed on ARM, whose module region is much smaller). So split the core and init PLT regions, and name the latter ".init.plt" so it gets allocated along with (and sufficiently close to) the .init sections that it serves. Also, given that init PLT entries may need to be emitted for branches that target the core module, modify the logic that disregards defined symbols to only disregard symbols that are defined in the same section as the relocated branch instruction. Since there may now be two PLT entries associated with each entry in the symbol table, we can no longer hijack the symbol::st_size fields to record the addresses of PLT entries as we emit them for zero-addend relocations. So instead, perform an explicit comparison to check for duplicate entries. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-02-22 05:12:57 +07:00
mod->arch.core.plt->sh_type = SHT_NOBITS;
mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry);
mod->arch.core.plt_num_entries = 0;
mod->arch.core.plt_max_entries = core_plts;
mod->arch.init.plt->sh_type = SHT_NOBITS;
mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
mod->arch.init.plt_num_entries = 0;
mod->arch.init.plt_max_entries = init_plts;
if (tramp) {
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
tramp->sh_size = sizeof(struct plt_entry);
}
return 0;
}