linux_dsm_epyc7002/arch/arm64/include/asm/smp.h
Hoeun Ryu a88ce63b64 arm64: kexec: have own crash_smp_send_stop() for crash dump for nonpanic cores
Commit 0ee5941 : (x86/panic: replace smp_send_stop() with kdump friendly
version in panic path) introduced crash_smp_send_stop() which is a weak
function and can be overridden by architecture codes to fix the side effect
caused by commit f06e515 : (kernel/panic.c: add "crash_kexec_post_
notifiers" option).

 ARM64 architecture uses the weak version function and the problem is that
the weak function simply calls smp_send_stop() which makes other CPUs
offline and takes away the chance to save crash information for nonpanic
CPUs in machine_crash_shutdown() when crash_kexec_post_notifiers kernel
option is enabled.

 Calling smp_send_crash_stop() in machine_crash_shutdown() is useless
because all nonpanic CPUs are already offline by smp_send_stop() in this
case and smp_send_crash_stop() only works against online CPUs.

 The result is that secondary CPUs registers are not saved by
crash_save_cpu() and the vmcore file misreports these CPUs as being
offline.

 crash_smp_send_stop() is implemented to fix this problem by replacing the
existing smp_send_crash_stop() and adding a check for multiple calling to
the function. The function (strong symbol version) saves crash information
for nonpanic CPUs and machine_crash_shutdown() tries to save crash
information for nonpanic CPUs only when crash_kexec_post_notifiers kernel
option is disabled.

* crash_kexec_post_notifiers : false

  panic()
    __crash_kexec()
      machine_crash_shutdown()
        crash_smp_send_stop()    <= save crash dump for nonpanic cores

* crash_kexec_post_notifiers : true

  panic()
    crash_smp_send_stop()        <= save crash dump for nonpanic cores
    __crash_kexec()
      machine_crash_shutdown()
        crash_smp_send_stop()    <= just return.

Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-08-21 18:01:04 +01:00

157 lines
4.2 KiB
C

/*
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
/* Values for secondary_data.status */
#define CPU_MMU_OFF (-1)
#define CPU_BOOT_SUCCESS (0)
/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
#define CPU_KILL_ME (1)
/* The cpu couldn't die gracefully and is looping in the kernel */
#define CPU_STUCK_IN_KERNEL (2)
/* Fatal system error detected by secondary CPU, crash the system */
#define CPU_PANIC_KERNEL (3)
#ifndef __ASSEMBLY__
#include <asm/percpu.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
/*
* We don't use this_cpu_read(cpu_number) as that has implicit writes to
* preempt_count, and associated (compiler) barriers, that we'd like to avoid
* the expense of. If we're preemptible, the value can be stale at use anyway.
* And we can't use this_cpu_ptr() either, as that winds up recursing back
* here under CONFIG_DEBUG_PREEMPT=y.
*/
#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number))
struct seq_file;
/*
* generate IPI list text
*/
extern void show_ipi_list(struct seq_file *p, int prec);
/*
* Called from C code, this handles an IPI.
*/
extern void handle_IPI(int ipinr, struct pt_regs *regs);
/*
* Discover the set of possible CPUs and determine their
* SMP operations.
*/
extern void smp_init_cpus(void);
/*
* Provide a function to raise an IPI cross call on CPUs in callmap.
*/
extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
extern void (*__smp_cross_call)(const struct cpumask *, unsigned int);
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
*/
asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
* @stack - sp for the secondary CPU
* @status - Result passed back from the secondary CPU to
* indicate failure.
*/
struct secondary_data {
void *stack;
struct task_struct *task;
long status;
};
extern struct secondary_data secondary_data;
extern long __early_cpu_boot_status;
extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
#else
static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
{
BUILD_BUG();
}
#endif
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void);
extern void cpu_die_early(void);
static inline void cpu_park_loop(void)
{
for (;;) {
wfe();
wfi();
}
}
static inline void update_cpu_boot_status(int val)
{
WRITE_ONCE(secondary_data.status, val);
/* Ensure the visibility of the status update */
dsb(ishst);
}
/*
* The calling secondary CPU has detected serious configuration mismatch,
* which calls for a kernel panic. Update the boot status and park the calling
* CPU.
*/
static inline void cpu_panic_kernel(void)
{
update_cpu_boot_status(CPU_PANIC_KERNEL);
cpu_park_loop();
}
/*
* If a secondary CPU enters the kernel but fails to come online,
* (e.g. due to mismatched features), and cannot exit the kernel,
* we increment cpus_stuck_in_kernel and leave the CPU in a
* quiesecent loop within the kernel text. The memory containing
* this loop must not be re-used for anything else as the 'stuck'
* core is executing it.
*
* This function is used to inhibit features like kexec and hibernate.
*/
bool cpus_are_stuck_in_kernel(void);
extern void crash_smp_send_stop(void);
extern bool smp_crash_stop_failed(void);
#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */