linux_dsm_epyc7002/drivers/firmware/efi/libstub/arm32-stub.c
Kairui Song 220dd7699c x86, efi: Never relocate kernel below lowest acceptable address
Currently, kernel fails to boot on some HyperV VMs when using EFI.
And it's a potential issue on all x86 platforms.

It's caused by broken kernel relocation on EFI systems, when below three
conditions are met:

1. Kernel image is not loaded to the default address (LOAD_PHYSICAL_ADDR)
   by the loader.
2. There isn't enough room to contain the kernel, starting from the
   default load address (eg. something else occupied part the region).
3. In the memmap provided by EFI firmware, there is a memory region
   starts below LOAD_PHYSICAL_ADDR, and suitable for containing the
   kernel.

EFI stub will perform a kernel relocation when condition 1 is met. But
due to condition 2, EFI stub can't relocate kernel to the preferred
address, so it fallback to ask EFI firmware to alloc lowest usable memory
region, got the low region mentioned in condition 3, and relocated
kernel there.

It's incorrect to relocate the kernel below LOAD_PHYSICAL_ADDR. This
is the lowest acceptable kernel relocation address.

The first thing goes wrong is in arch/x86/boot/compressed/head_64.S.
Kernel decompression will force use LOAD_PHYSICAL_ADDR as the output
address if kernel is located below it. Then the relocation before
decompression, which move kernel to the end of the decompression buffer,
will overwrite other memory region, as there is no enough memory there.

To fix it, just don't let EFI stub relocate the kernel to any address
lower than lowest acceptable address.

[ ardb: introduce efi_low_alloc_above() to reduce the scope of the change ]

Signed-off-by: Kairui Song <kasong@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: https://lkml.kernel.org/r/20191029173755.27149-6-ardb@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-10-31 09:40:19 +01:00

256 lines
7.4 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2013 Linaro Ltd; <roy.franz@linaro.org>
*/
#include <linux/efi.h>
#include <asm/efi.h>
#include "efistub.h"
efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
{
int block;
/* non-LPAE kernels can run anywhere */
if (!IS_ENABLED(CONFIG_ARM_LPAE))
return EFI_SUCCESS;
/* LPAE kernels need compatible hardware */
block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
if (block < 5) {
pr_efi_err(sys_table_arg, "This LPAE kernel is not supported by your CPU\n");
return EFI_UNSUPPORTED;
}
return EFI_SUCCESS;
}
static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID;
struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg)
{
struct screen_info *si;
efi_status_t status;
/*
* Unlike on arm64, where we can directly fill out the screen_info
* structure from the stub, we need to allocate a buffer to hold
* its contents while we hand over to the kernel proper from the
* decompressor.
*/
status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
sizeof(*si), (void **)&si);
if (status != EFI_SUCCESS)
return NULL;
status = efi_call_early(install_configuration_table,
&screen_info_guid, si);
if (status == EFI_SUCCESS)
return si;
efi_call_early(free_pool, si);
return NULL;
}
void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si)
{
if (!si)
return;
efi_call_early(install_configuration_table, &screen_info_guid, NULL);
efi_call_early(free_pool, si);
}
static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg,
unsigned long dram_base,
unsigned long *reserve_addr,
unsigned long *reserve_size)
{
efi_physical_addr_t alloc_addr;
efi_memory_desc_t *memory_map;
unsigned long nr_pages, map_size, desc_size, buff_size;
efi_status_t status;
unsigned long l;
struct efi_boot_memmap map = {
.map = &memory_map,
.map_size = &map_size,
.desc_size = &desc_size,
.desc_ver = NULL,
.key_ptr = NULL,
.buff_size = &buff_size,
};
/*
* Reserve memory for the uncompressed kernel image. This is
* all that prevents any future allocations from conflicting
* with the kernel. Since we can't tell from the compressed
* image how much DRAM the kernel actually uses (due to BSS
* size uncertainty) we allocate the maximum possible size.
* Do this very early, as prints can cause memory allocations
* that may conflict with this.
*/
alloc_addr = dram_base + MAX_UNCOMP_KERNEL_SIZE;
nr_pages = MAX_UNCOMP_KERNEL_SIZE / EFI_PAGE_SIZE;
status = efi_call_early(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
EFI_BOOT_SERVICES_DATA, nr_pages, &alloc_addr);
if (status == EFI_SUCCESS) {
if (alloc_addr == dram_base) {
*reserve_addr = alloc_addr;
*reserve_size = MAX_UNCOMP_KERNEL_SIZE;
return EFI_SUCCESS;
}
/*
* If we end up here, the allocation succeeded but starts below
* dram_base. This can only occur if the real base of DRAM is
* not a multiple of 128 MB, in which case dram_base will have
* been rounded up. Since this implies that a part of the region
* was already occupied, we need to fall through to the code
* below to ensure that the existing allocations don't conflict.
* For this reason, we use EFI_BOOT_SERVICES_DATA above and not
* EFI_LOADER_DATA, which we wouldn't able to distinguish from
* allocations that we want to disallow.
*/
}
/*
* If the allocation above failed, we may still be able to proceed:
* if the only allocations in the region are of types that will be
* released to the OS after ExitBootServices(), the decompressor can
* safely overwrite them.
*/
status = efi_get_memory_map(sys_table_arg, &map);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table_arg,
"reserve_kernel_base(): Unable to retrieve memory map.\n");
return status;
}
for (l = 0; l < map_size; l += desc_size) {
efi_memory_desc_t *desc;
u64 start, end;
desc = (void *)memory_map + l;
start = desc->phys_addr;
end = start + desc->num_pages * EFI_PAGE_SIZE;
/* Skip if entry does not intersect with region */
if (start >= dram_base + MAX_UNCOMP_KERNEL_SIZE ||
end <= dram_base)
continue;
switch (desc->type) {
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
/* Ignore types that are released to the OS anyway */
continue;
case EFI_CONVENTIONAL_MEMORY:
/*
* Reserve the intersection between this entry and the
* region.
*/
start = max(start, (u64)dram_base);
end = min(end, (u64)dram_base + MAX_UNCOMP_KERNEL_SIZE);
status = efi_call_early(allocate_pages,
EFI_ALLOCATE_ADDRESS,
EFI_LOADER_DATA,
(end - start) / EFI_PAGE_SIZE,
&start);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table_arg,
"reserve_kernel_base(): alloc failed.\n");
goto out;
}
break;
case EFI_LOADER_CODE:
case EFI_LOADER_DATA:
/*
* These regions may be released and reallocated for
* another purpose (including EFI_RUNTIME_SERVICE_DATA)
* at any time during the execution of the OS loader,
* so we cannot consider them as safe.
*/
default:
/*
* Treat any other allocation in the region as unsafe */
status = EFI_OUT_OF_RESOURCES;
goto out;
}
}
status = EFI_SUCCESS;
out:
efi_call_early(free_pool, memory_map);
return status;
}
efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
unsigned long *image_addr,
unsigned long *image_size,
unsigned long *reserve_addr,
unsigned long *reserve_size,
unsigned long dram_base,
efi_loaded_image_t *image)
{
unsigned long kernel_base;
efi_status_t status;
/*
* Verify that the DRAM base address is compatible with the ARM
* boot protocol, which determines the base of DRAM by masking
* off the low 27 bits of the address at which the zImage is
* loaded. These assumptions are made by the decompressor,
* before any memory map is available.
*/
kernel_base = round_up(dram_base, SZ_128M);
/*
* Note that some platforms (notably, the Raspberry Pi 2) put
* spin-tables and other pieces of firmware at the base of RAM,
* abusing the fact that the window of TEXT_OFFSET bytes at the
* base of the kernel image is only partially used at the moment.
* (Up to 5 pages are used for the swapper page tables)
*/
kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE;
status = reserve_kernel_base(sys_table, kernel_base, reserve_addr,
reserve_size);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n");
return status;
}
/*
* Relocate the zImage, so that it appears in the lowest 128 MB
* memory window.
*/
*image_size = image->image_size;
status = efi_relocate_kernel(sys_table, image_addr, *image_size,
*image_size,
kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table, "Failed to relocate kernel.\n");
efi_free(sys_table, *reserve_size, *reserve_addr);
*reserve_size = 0;
return status;
}
/*
* Check to see if we were able to allocate memory low enough
* in memory. The kernel determines the base of DRAM from the
* address at which the zImage is loaded.
*/
if (*image_addr + *image_size > dram_base + ZIMAGE_OFFSET_LIMIT) {
pr_efi_err(sys_table, "Failed to relocate kernel, no low memory available.\n");
efi_free(sys_table, *reserve_size, *reserve_addr);
*reserve_size = 0;
efi_free(sys_table, *image_size, *image_addr);
*image_size = 0;
return EFI_LOAD_ERROR;
}
return EFI_SUCCESS;
}