2016-06-24 00:54:48 +07:00
|
|
|
/*
|
|
|
|
* kexec for arm64
|
|
|
|
*
|
|
|
|
* Copyright (C) Linaro.
|
|
|
|
* Copyright (C) Huawei Futurewei Technologies.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
|
2017-04-03 09:24:36 +07:00
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/irq.h>
|
|
|
|
#include <linux/kernel.h>
|
2016-06-24 00:54:48 +07:00
|
|
|
#include <linux/kexec.h>
|
2017-04-03 09:24:35 +07:00
|
|
|
#include <linux/page-flags.h>
|
2016-06-24 00:54:48 +07:00
|
|
|
#include <linux/smp.h>
|
|
|
|
|
|
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include <asm/cpu_ops.h>
|
2017-11-02 19:12:34 +07:00
|
|
|
#include <asm/daifflags.h>
|
2017-04-03 09:24:37 +07:00
|
|
|
#include <asm/memory.h>
|
2017-04-03 09:24:34 +07:00
|
|
|
#include <asm/mmu.h>
|
2016-06-24 00:54:48 +07:00
|
|
|
#include <asm/mmu_context.h>
|
2017-04-03 09:24:34 +07:00
|
|
|
#include <asm/page.h>
|
2016-06-24 00:54:48 +07:00
|
|
|
|
|
|
|
#include "cpu-reset.h"
|
|
|
|
|
|
|
|
/* Global variables for the arm64_relocate_new_kernel routine. */
|
|
|
|
extern const unsigned char arm64_relocate_new_kernel[];
|
|
|
|
extern const unsigned long arm64_relocate_new_kernel_size;
|
|
|
|
|
2016-06-24 00:54:48 +07:00
|
|
|
/**
|
|
|
|
* kexec_image_info - For debugging output.
|
|
|
|
*/
|
|
|
|
#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
|
|
|
|
static void _kexec_image_info(const char *func, int line,
|
|
|
|
const struct kimage *kimage)
|
|
|
|
{
|
|
|
|
unsigned long i;
|
|
|
|
|
|
|
|
pr_debug("%s:%d:\n", func, line);
|
|
|
|
pr_debug(" kexec kimage info:\n");
|
|
|
|
pr_debug(" type: %d\n", kimage->type);
|
|
|
|
pr_debug(" start: %lx\n", kimage->start);
|
|
|
|
pr_debug(" head: %lx\n", kimage->head);
|
|
|
|
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
|
|
|
|
|
|
|
|
for (i = 0; i < kimage->nr_segments; i++) {
|
|
|
|
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
|
|
|
i,
|
|
|
|
kimage->segment[i].mem,
|
|
|
|
kimage->segment[i].mem + kimage->segment[i].memsz,
|
|
|
|
kimage->segment[i].memsz,
|
|
|
|
kimage->segment[i].memsz / PAGE_SIZE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-24 00:54:48 +07:00
|
|
|
void machine_kexec_cleanup(struct kimage *kimage)
|
|
|
|
{
|
|
|
|
/* Empty routine needed to avoid build errors. */
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* machine_kexec_prepare - Prepare for a kexec reboot.
|
|
|
|
*
|
|
|
|
* Called from the core kexec code when a kernel image is loaded.
|
|
|
|
* Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
|
|
|
|
* are stuck in the kernel. This avoids a panic once we hit machine_kexec().
|
|
|
|
*/
|
|
|
|
int machine_kexec_prepare(struct kimage *kimage)
|
|
|
|
{
|
2016-06-24 00:54:48 +07:00
|
|
|
kexec_image_info(kimage);
|
|
|
|
|
2016-06-24 00:54:48 +07:00
|
|
|
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
|
|
|
|
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
|
|
|
|
*/
|
|
|
|
static void kexec_list_flush(struct kimage *kimage)
|
|
|
|
{
|
|
|
|
kimage_entry_t *entry;
|
|
|
|
|
|
|
|
for (entry = &kimage->head; ; entry++) {
|
|
|
|
unsigned int flag;
|
|
|
|
void *addr;
|
|
|
|
|
|
|
|
/* flush the list entries. */
|
|
|
|
__flush_dcache_area(entry, sizeof(kimage_entry_t));
|
|
|
|
|
|
|
|
flag = *entry & IND_FLAGS;
|
|
|
|
if (flag == IND_DONE)
|
|
|
|
break;
|
|
|
|
|
|
|
|
addr = phys_to_virt(*entry & PAGE_MASK);
|
|
|
|
|
|
|
|
switch (flag) {
|
|
|
|
case IND_INDIRECTION:
|
|
|
|
/* Set entry point just before the new list page. */
|
|
|
|
entry = (kimage_entry_t *)addr - 1;
|
|
|
|
break;
|
|
|
|
case IND_SOURCE:
|
|
|
|
/* flush the source pages. */
|
|
|
|
__flush_dcache_area(addr, PAGE_SIZE);
|
|
|
|
break;
|
|
|
|
case IND_DESTINATION:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
|
|
|
|
*/
|
|
|
|
static void kexec_segment_flush(const struct kimage *kimage)
|
|
|
|
{
|
|
|
|
unsigned long i;
|
|
|
|
|
|
|
|
pr_debug("%s:\n", __func__);
|
|
|
|
|
|
|
|
for (i = 0; i < kimage->nr_segments; i++) {
|
|
|
|
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
|
|
|
i,
|
|
|
|
kimage->segment[i].mem,
|
|
|
|
kimage->segment[i].mem + kimage->segment[i].memsz,
|
|
|
|
kimage->segment[i].memsz,
|
|
|
|
kimage->segment[i].memsz / PAGE_SIZE);
|
|
|
|
|
|
|
|
__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
|
|
|
|
kimage->segment[i].memsz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* machine_kexec - Do the kexec reboot.
|
|
|
|
*
|
|
|
|
* Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
|
|
|
|
*/
|
|
|
|
void machine_kexec(struct kimage *kimage)
|
|
|
|
{
|
|
|
|
phys_addr_t reboot_code_buffer_phys;
|
|
|
|
void *reboot_code_buffer;
|
2017-04-03 09:24:36 +07:00
|
|
|
bool in_kexec_crash = (kimage == kexec_crash_image);
|
|
|
|
bool stuck_cpus = cpus_are_stuck_in_kernel();
|
2016-06-24 00:54:48 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* New cpus may have become stuck_in_kernel after we loaded the image.
|
|
|
|
*/
|
2017-04-03 09:24:36 +07:00
|
|
|
BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
|
|
|
|
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
|
|
|
|
"Some CPUs may be stale, kdump will be unreliable.\n");
|
2016-06-24 00:54:48 +07:00
|
|
|
|
|
|
|
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
|
|
|
|
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
|
|
|
|
|
2016-06-24 00:54:48 +07:00
|
|
|
kexec_image_info(kimage);
|
|
|
|
|
|
|
|
pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
|
|
|
|
kimage->control_code_page);
|
|
|
|
pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
|
|
|
|
&reboot_code_buffer_phys);
|
|
|
|
pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
|
|
|
|
reboot_code_buffer);
|
|
|
|
pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
|
|
|
|
arm64_relocate_new_kernel);
|
|
|
|
pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
|
|
|
|
__func__, __LINE__, arm64_relocate_new_kernel_size,
|
|
|
|
arm64_relocate_new_kernel_size);
|
|
|
|
|
2016-06-24 00:54:48 +07:00
|
|
|
/*
|
|
|
|
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
|
|
|
|
* after the kernel is shut down.
|
|
|
|
*/
|
|
|
|
memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
|
|
|
|
arm64_relocate_new_kernel_size);
|
|
|
|
|
|
|
|
/* Flush the reboot_code_buffer in preparation for its execution. */
|
|
|
|
__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
|
2018-07-30 22:29:21 +07:00
|
|
|
__flush_icache_range((uintptr_t)reboot_code_buffer,
|
2016-06-24 00:54:48 +07:00
|
|
|
arm64_relocate_new_kernel_size);
|
|
|
|
|
|
|
|
/* Flush the kimage list and its buffers. */
|
|
|
|
kexec_list_flush(kimage);
|
|
|
|
|
|
|
|
/* Flush the new image if already in place. */
|
2017-04-03 09:24:34 +07:00
|
|
|
if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
|
2016-06-24 00:54:48 +07:00
|
|
|
kexec_segment_flush(kimage);
|
|
|
|
|
|
|
|
pr_info("Bye!\n");
|
|
|
|
|
2017-11-02 19:12:34 +07:00
|
|
|
local_daif_mask();
|
2016-06-24 00:54:48 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* cpu_soft_restart will shutdown the MMU, disable data caches, then
|
|
|
|
* transfer control to the reboot_code_buffer which contains a copy of
|
|
|
|
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
|
|
|
|
* uses physical addressing to relocate the new image to its final
|
|
|
|
* position and transfers control to the image entry point when the
|
|
|
|
* relocation is complete.
|
|
|
|
*/
|
|
|
|
|
2018-07-02 20:17:53 +07:00
|
|
|
cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, 0);
|
2016-06-24 00:54:48 +07:00
|
|
|
|
|
|
|
BUG(); /* Should never get here. */
|
|
|
|
}
|
|
|
|
|
2017-04-03 09:24:36 +07:00
|
|
|
static void machine_kexec_mask_interrupts(void)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
struct irq_desc *desc;
|
|
|
|
|
|
|
|
for_each_irq_desc(i, desc) {
|
|
|
|
struct irq_chip *chip;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
chip = irq_desc_get_chip(desc);
|
|
|
|
if (!chip)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First try to remove the active state. If this
|
|
|
|
* fails, try to EOI the interrupt.
|
|
|
|
*/
|
|
|
|
ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
|
|
|
|
|
|
|
|
if (ret && irqd_irq_inprogress(&desc->irq_data) &&
|
|
|
|
chip->irq_eoi)
|
|
|
|
chip->irq_eoi(&desc->irq_data);
|
|
|
|
|
|
|
|
if (chip->irq_mask)
|
|
|
|
chip->irq_mask(&desc->irq_data);
|
|
|
|
|
|
|
|
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
|
|
|
chip->irq_disable(&desc->irq_data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
|
|
|
|
*/
|
2016-06-24 00:54:48 +07:00
|
|
|
void machine_crash_shutdown(struct pt_regs *regs)
|
|
|
|
{
|
2017-04-03 09:24:36 +07:00
|
|
|
local_irq_disable();
|
|
|
|
|
|
|
|
/* shutdown non-crashing cpus */
|
2017-08-17 09:24:27 +07:00
|
|
|
crash_smp_send_stop();
|
2017-04-03 09:24:36 +07:00
|
|
|
|
|
|
|
/* for crashing cpu */
|
|
|
|
crash_save_cpu(regs, smp_processor_id());
|
|
|
|
machine_kexec_mask_interrupts();
|
|
|
|
|
|
|
|
pr_info("Starting crashdump kernel...\n");
|
2016-06-24 00:54:48 +07:00
|
|
|
}
|
2017-04-03 09:24:34 +07:00
|
|
|
|
|
|
|
void arch_kexec_protect_crashkres(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
kexec_segment_flush(kexec_crash_image);
|
|
|
|
|
|
|
|
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
|
|
|
set_memory_valid(
|
|
|
|
__phys_to_virt(kexec_crash_image->segment[i].mem),
|
|
|
|
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void arch_kexec_unprotect_crashkres(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
|
|
|
set_memory_valid(
|
|
|
|
__phys_to_virt(kexec_crash_image->segment[i].mem),
|
|
|
|
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
|
|
|
|
}
|
2017-04-03 09:24:35 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_HIBERNATION
|
|
|
|
/*
|
|
|
|
* To preserve the crash dump kernel image, the relevant memory segments
|
|
|
|
* should be mapped again around the hibernation.
|
|
|
|
*/
|
|
|
|
void crash_prepare_suspend(void)
|
|
|
|
{
|
|
|
|
if (kexec_crash_image)
|
|
|
|
arch_kexec_unprotect_crashkres();
|
|
|
|
}
|
|
|
|
|
|
|
|
void crash_post_resume(void)
|
|
|
|
{
|
|
|
|
if (kexec_crash_image)
|
|
|
|
arch_kexec_protect_crashkres();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* crash_is_nosave
|
|
|
|
*
|
|
|
|
* Return true only if a page is part of reserved memory for crash dump kernel,
|
|
|
|
* but does not hold any data of loaded kernel image.
|
|
|
|
*
|
|
|
|
* Note that all the pages in crash dump kernel memory have been initially
|
|
|
|
* marked as Reserved in kexec_reserve_crashkres_pages().
|
|
|
|
*
|
|
|
|
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
|
|
|
|
* from the hibernation iamge. crash_is_nosave() does thich check for crash
|
|
|
|
* dump kernel and will reduce the total size of hibernation image.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool crash_is_nosave(unsigned long pfn)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
phys_addr_t addr;
|
|
|
|
|
|
|
|
if (!crashk_res.end)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* in reserved memory? */
|
|
|
|
addr = __pfn_to_phys(pfn);
|
|
|
|
if ((addr < crashk_res.start) || (crashk_res.end < addr))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!kexec_crash_image)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* not part of loaded kernel image? */
|
|
|
|
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
|
|
|
if (addr >= kexec_crash_image->segment[i].mem &&
|
|
|
|
addr < (kexec_crash_image->segment[i].mem +
|
|
|
|
kexec_crash_image->segment[i].memsz))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
|
|
|
|
{
|
|
|
|
unsigned long addr;
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
|
|
|
page = phys_to_page(addr);
|
|
|
|
ClearPageReserved(page);
|
|
|
|
free_reserved_page(page);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_HIBERNATION */
|
2017-04-03 09:24:37 +07:00
|
|
|
|
|
|
|
void arch_crash_save_vmcoreinfo(void)
|
|
|
|
{
|
|
|
|
VMCOREINFO_NUMBER(VA_BITS);
|
|
|
|
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
|
|
|
|
vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
|
|
|
|
kimage_voffset);
|
|
|
|
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
|
|
|
|
PHYS_OFFSET);
|
arm64, kaslr: export offset in VMCOREINFO ELF notes
Include KASLR offset in arm64 VMCOREINFO ELF notes to assist in
debugging. vmcore parsing in user-space already expects this value in
the notes and we are providing it for portability of those existing
tools with x86.
Ideally we would like core code to do this (so that way this
information won't be missed when an architecture adds KASLR support),
but mips has CONFIG_RANDOMIZE_BASE, and doesn't provide kaslr_offset(),
so I am not sure if this is needed for mips (and other such similar arch
cases in future). So, lets keep this architecture specific for now.
As an example of a user-space use-case, consider the
makedumpfile user-space utility which will need fixup to use this
KASLR offset to work with cases where we need to find a way to
translate symbol address from vmlinux to kernel run time address
in case of KASLR boot on arm64.
I have already submitted the makedumpfile user-space patch upstream
and the maintainer has suggested to wait for the kernel changes to be
included (see [0]).
I tested this on my qualcomm amberwing board both for KASLR and
non-KASLR boot cases:
Without this patch:
# cat > scrub.conf << EOF
[vmlinux]
erase jiffies
erase init_task.utime
for tsk in init_task.tasks.next within task_struct:tasks
erase tsk.utime
endfor
EOF
# makedumpfile --split -d 31 -x vmlinux --config scrub.conf vmcore dumpfile_{1,2,3}
readpage_elf: Attempt to read non-existent page at 0xffffa8a5bf180000.
readmem: type_addr: 1, addr:ffffa8a5bf180000, size:8
vaddr_to_paddr_arm64: Can't read pgd
readmem: Can't convert a virtual address(ffff0000092a542c) to physical
address.
readmem: type_addr: 0, addr:ffff0000092a542c, size:390
check_release: Can't get the address of system_utsname
After this patch check_release() is ok, and also we are able to erase
symbol from vmcore (I checked this with kernel 4.18.0-rc4+):
# makedumpfile --split -d 31 -x vmlinux --config scrub.conf vmcore dumpfile_{1,2,3}
The kernel version is not supported.
The makedumpfile operation may be incomplete.
Checking for memory holes : [100.0 %] \
Checking for memory holes : [100.0 %] |
Checking foExcluding unnecessary pages : [100.0 %]
\
Excluding unnecessary pages : [100.0 %] \
The dumpfiles are saved to dumpfile_1, dumpfile_2, and dumpfile_3.
makedumpfile Completed.
[0] https://www.spinics.net/lists/kexec/msg21195.html
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Acked-by: James Morse <james.morse@arm.com>
Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2018-07-30 13:24:43 +07:00
|
|
|
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
|
2017-04-03 09:24:37 +07:00
|
|
|
}
|