linux_dsm_epyc7002/arch/mips/kernel/process.c

817 lines
19 KiB
C
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1994 - 1999, 2000 by Ralf Baechle and others.
* Copyright (C) 2005, 2006 by Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2004 Thiemo Seufer
* Copyright (C) 2013 Imagination Technologies Ltd.
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/tick.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/personality.h>
#include <linux/sys.h>
#include <linux/init.h>
#include <linux/completion.h>
#include <linux/kallsyms.h>
#include <linux/random.h>
2015-01-08 19:17:37 +07:00
#include <linux/prctl.h>
#include <asm/asm.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 17:06:19 +07:00
#include <asm/dsemul.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
#include <asm/irq.h>
#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
#include <asm/processor.h>
#include <asm/reg.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/elf.h>
#include <asm/isadep.h>
#include <asm/inst.h>
#include <asm/stacktrace.h>
#include <asm/irq_regs.h>
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
play_dead();
}
#endif
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
{
unsigned long status;
/* New thread loses kernel privileges. */
status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
status |= KU_USER;
regs->cp0_status = status;
lose_fpu(0);
clear_thread_flag(TIF_MSA_CTX_LIVE);
clear_used_math();
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 17:06:19 +07:00
atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
init_dsp();
regs->cp0_epc = pc;
regs->regs[29] = sp;
}
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 17:06:19 +07:00
void exit_thread(struct task_struct *tsk)
{
/*
* User threads may have allocated a delay slot emulation frame.
* If so, clean up that allocation.
*/
if (!(current->flags & PF_KTHREAD))
dsemul_thread_cleanup(tsk);
}
MIPS: fork: Fix MSA/FPU/DSP context duplication race There is a race in the MIPS fork code which allows the child to get a stale copy of parent MSA/FPU/DSP state that is active in hardware registers when the fork() is called. This is because copy_thread() saves the live register state into the child context only if the hardware is currently in use, apparently on the assumption that the hardware state cannot have been saved and disabled since the initial duplication of the task_struct. However preemption is certainly possible during this window. An example sequence of events is as follows: 1) The parent userland process puts important data into saved floating point registers ($f20-$f31), which are then dirty compared to the process' stored context. 2) The parent process calls fork() which does a clone system call. 3) In the kernel, do_fork() -> copy_process() -> dup_task_struct() -> arch_dup_task_struct() (which uses the weakly defined default implementation). This duplicates the parent process' task context, which includes a stale version of its FP context from when it was last saved, probably some time before (1). 4) At some point before copy_process() calls copy_thread(), such as when duplicating the memory map, the process is desceduled. Perhaps it is preempted asynchronously, or perhaps it sleeps while blocked on a mutex. The dirty FP state in the FP registers is saved to the parent process' context and the FPU is disabled. 5) When the process is rescheduled again it continues copying state until it gets to copy_thread(), which checks whether the FPU is in use, so that it can copy that dirty state to the child process' task context. Because of the deschedule however the FPU is not in use, so the child process' context is left with stale FP context from the last time the parent saved it (some time before (1)). 6) When the new child process is scheduled it reads the important data from the saved floating point register, and ends up doing a NULL pointer dereference as a result of the stale data. This use of saved floating point registers across function calls can be triggered fairly easily by explicitly using inline asm with a current (MIPS R2) compiler, but is far more likely to happen unintentionally with a MIPS R6 compiler where the FP registers are more likely to get used as scratch registers for storing non-fp data. It is easily fixed, in the same way that other architectures do it, by overriding the implementation of arch_dup_task_struct() to sync the dirty hardware state to the parent process' task context *prior* to duplicating it, rather than copying straight to the child process' task context in copy_thread(). Note, the FPU hardware is not disabled so the parent process may continue executing with the live register context, but now the child process is guaranteed to have an identical copy of it at that point. Signed-off-by: James Hogan <james.hogan@imgtec.com> Reported-by: Matthew Fortune <matthew.fortune@imgtec.com> Tested-by: Markos Chandras <markos.chandras@imgtec.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9075/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-01-19 17:30:54 +07:00
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
/*
* Save any process state which is live in hardware registers to the
* parent context prior to duplication. This prevents the new child
* state becoming stale if the parent is preempted before copy_thread()
* gets a chance to save the parent's live hardware registers to the
* child context.
*/
preempt_disable();
if (is_msa_enabled())
save_msa(current);
else if (is_fpu_owner())
_save_fp(current);
save_dsp(current);
preempt_enable();
*dst = *src;
return 0;
}
/*
* Copy architecture-specific thread state
*/
int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
{
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs, *regs = current_pt_regs();
unsigned long childksp;
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
/* set up new TSS. */
childregs = (struct pt_regs *) childksp - 1;
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
memset(childregs, 0, sizeof(struct pt_regs));
ti->addr_limit = KERNEL_DS;
p->thread.reg16 = usp; /* fn */
p->thread.reg17 = kthread_arg;
p->thread.reg29 = childksp;
p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
((status & (ST0_KUC | ST0_IEC)) << 2);
#else
status |= ST0_EXL;
#endif
childregs->cp0_status = status;
return 0;
}
/* user thread */
*childregs = *regs;
childregs->regs[7] = 0; /* Clear error flag */
childregs->regs[2] = 0; /* Child gets zero as return value */
if (usp)
childregs->regs[29] = usp;
ti->addr_limit = USER_DS;
p->thread.reg29 = (unsigned long) childregs;
p->thread.reg31 = (unsigned long) ret_from_fork;
/*
* New tasks lose permission to use the fpu. This accelerates context
* switching for most programs since they don't use the fpu.
*/
childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDMSA);
clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
#ifdef CONFIG_MIPS_MT_FPAFF
clear_tsk_thread_flag(p, TIF_FPUBOUND);
#endif /* CONFIG_MIPS_MT_FPAFF */
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 17:06:19 +07:00
atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
if (clone_flags & CLONE_SETTLS)
ti->tp_value = tls;
return 0;
}
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
struct mips_frame_info {
void *func;
unsigned long func_size;
int frame_size;
int pc_offset;
};
#define J_TARGET(pc,target) \
(((unsigned long)(pc) & 0xf0000000) | ((target) << 2))
MIPS: Calculate microMIPS ra properly when unwinding the stack get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Cc: <stable@vger.kernel.org> # v3.10+ Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-11-07 22:07:06 +07:00
static inline int is_ra_save_ins(union mips_instruction *ip, int *poff)
{
#ifdef CONFIG_CPU_MICROMIPS
/*
* swsp ra,offset
* swm16 reglist,offset(sp)
* swm32 reglist,offset(sp)
* sw32 ra,offset(sp)
* jradiussp - NOT SUPPORTED
*
* microMIPS is way more fun...
*/
if (mm_insn_16bit(ip->halfword[1])) {
MIPS: Calculate microMIPS ra properly when unwinding the stack get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Cc: <stable@vger.kernel.org> # v3.10+ Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-11-07 22:07:06 +07:00
switch (ip->mm16_r5_format.opcode) {
case mm_swsp16_op:
if (ip->mm16_r5_format.rt != 31)
return 0;
*poff = ip->mm16_r5_format.simmediate;
*poff = (*poff << 2) / sizeof(ulong);
return 1;
case mm_pool16c_op:
switch (ip->mm16_m_format.func) {
case mm_swm16_op:
*poff = ip->mm16_m_format.imm;
*poff += 1 + ip->mm16_m_format.rlist;
*poff = (*poff << 2) / sizeof(ulong);
return 1;
default:
return 0;
}
default:
return 0;
}
}
MIPS: Calculate microMIPS ra properly when unwinding the stack get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Cc: <stable@vger.kernel.org> # v3.10+ Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-11-07 22:07:06 +07:00
switch (ip->i_format.opcode) {
case mm_sw32_op:
if (ip->i_format.rs != 29)
return 0;
if (ip->i_format.rt != 31)
return 0;
*poff = ip->i_format.simmediate / sizeof(ulong);
return 1;
case mm_pool32b_op:
switch (ip->mm_m_format.func) {
case mm_swm32_func:
if (ip->mm_m_format.rd < 0x10)
return 0;
if (ip->mm_m_format.base != 29)
return 0;
*poff = ip->mm_m_format.simmediate;
*poff += (ip->mm_m_format.rd & 0xf) * sizeof(u32);
*poff /= sizeof(ulong);
return 1;
default:
return 0;
}
default:
return 0;
}
#else
/* sw / sd $ra, offset($sp) */
MIPS: Calculate microMIPS ra properly when unwinding the stack get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Cc: <stable@vger.kernel.org> # v3.10+ Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-11-07 22:07:06 +07:00
if ((ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) &&
ip->i_format.rs == 29 && ip->i_format.rt == 31) {
*poff = ip->i_format.simmediate / sizeof(ulong);
return 1;
}
return 0;
#endif
}
static inline int is_jump_ins(union mips_instruction *ip)
{
#ifdef CONFIG_CPU_MICROMIPS
/*
* jr16,jrc,jalr16,jalr16
* jal
* jalr/jr,jalr.hb/jr.hb,jalrs,jalrs.hb
* jraddiusp - NOT SUPPORTED
*
* microMIPS is kind of more fun...
*/
if (mm_insn_16bit(ip->halfword[1])) {
if ((ip->mm16_r5_format.opcode == mm_pool16c_op &&
(ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op))
return 1;
return 0;
}
if (ip->j_format.opcode == mm_j32_op)
return 1;
if (ip->j_format.opcode == mm_jal32_op)
return 1;
if (ip->r_format.opcode != mm_pool32a_op ||
ip->r_format.func != mm_pool32axf_op)
return 0;
return ((ip->u_format.uimmediate >> 6) & mm_jalr_op) == mm_jalr_op;
#else
if (ip->j_format.opcode == j_op)
return 1;
if (ip->j_format.opcode == jal_op)
return 1;
if (ip->r_format.opcode != spec_op)
return 0;
return ip->r_format.func == jalr_op || ip->r_format.func == jr_op;
#endif
}
static inline int is_sp_move_ins(union mips_instruction *ip)
{
#ifdef CONFIG_CPU_MICROMIPS
/*
* addiusp -imm
* addius5 sp,-imm
* addiu32 sp,sp,-imm
* jradiussp - NOT SUPPORTED
*
* microMIPS is not more fun...
*/
if (mm_insn_16bit(ip->halfword[1])) {
return (ip->mm16_r3_format.opcode == mm_pool16d_op &&
ip->mm16_r3_format.simmediate && mm_addiusp_func) ||
(ip->mm16_r5_format.opcode == mm_pool16d_op &&
ip->mm16_r5_format.rt == 29);
}
return ip->mm_i_format.opcode == mm_addiu32_op &&
ip->mm_i_format.rt == 29 && ip->mm_i_format.rs == 29;
#else
/* addiu/daddiu sp,sp,-imm */
if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
return 0;
if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
return 1;
#endif
return 0;
}
static int get_frame_info(struct mips_frame_info *info)
{
bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS);
union mips_instruction insn, *ip, *ip_end;
const unsigned int max_insns = 128;
MIPS: Handle non word sized instructions when examining frame Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-08 19:22:30 +07:00
unsigned int last_insn_size = 0;
unsigned int i;
MIPS: Fix issues in backtraces I saw two problems when doing backtraces: The compiler was putting a "fast return" at the top of some functions, before it set up the frame. The backtrace code would stop when it saw a jump instruction, so it would never get to the stack frame setup and would thus misinterpret it. To fix this, don't look for jump instructions until the frame setup has been seen. The assembly code here is: ffffffff80b885a0 <serial8250_handle_irq>: ffffffff80b885a0: c8a00003 bbit0 a1,0x0,ffffffff80b885b0 <serial8250_handle_irq+0x10> ffffffff80b885a4: 0000102d move v0,zero ffffffff80b885a8: 03e00008 jr ra ffffffff80b885ac: 00000000 nop ffffffff80b885b0: 67bdffd0 daddiu sp,sp,-48 ffffffff80b885b4: ffb00008 sd s0,8(sp) The second problem was the compiler was putting the last instruction of the frame save in the delay slot of the jump instruction. If it saved the RA in there, the backtrace could would miss it and misinterpret the frame. To fix this, make sure to process the instruction after the first jump seen. The assembly code for this is: ffffffff80806fd0 <plat_irq_dispatch>: ffffffff80806fd0: 67bdffd0 daddiu sp,sp,-48 ffffffff80806fd4: ffb30020 sd s3,32(sp) ffffffff80806fd8: 24130018 li s3,24 ffffffff80806fdc: ffb20018 sd s2,24(sp) ffffffff80806fe0: 3c12811c lui s2,0x811c ffffffff80806fe4: ffb10010 sd s1,16(sp) ffffffff80806fe8: 3c11811c lui s1,0x811c ffffffff80806fec: ffb00008 sd s0,8(sp) ffffffff80806ff0: 3c10811c lui s0,0x811c ffffffff80806ff4: 08201c03 j ffffffff8080700c <plat_irq_dispa tch+0x3c> ffffffff80806ff8: ffbf0028 sd ra,40(sp) Signed-off-by: Corey Minyard <cminyard@mvista.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16992/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-11 01:27:37 +07:00
bool saw_jump = false;
info->pc_offset = -1;
info->frame_size = 0;
ip = (void *)msk_isa16_mode((ulong)info->func);
if (!ip)
goto err;
ip_end = (void *)ip + info->func_size;
MIPS: Handle non word sized instructions when examining frame Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-08 19:22:30 +07:00
for (i = 0; i < max_insns && ip < ip_end; i++) {
ip = (void *)ip + last_insn_size;
if (is_mmips && mm_insn_16bit(ip->halfword[0])) {
insn.halfword[0] = 0;
insn.halfword[1] = ip->halfword[0];
MIPS: Handle non word sized instructions when examining frame Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-08 19:22:30 +07:00
last_insn_size = 2;
} else if (is_mmips) {
insn.halfword[0] = ip->halfword[1];
insn.halfword[1] = ip->halfword[0];
MIPS: Handle non word sized instructions when examining frame Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-08 19:22:30 +07:00
last_insn_size = 4;
} else {
insn.word = ip->word;
MIPS: Handle non word sized instructions when examining frame Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-08 19:22:30 +07:00
last_insn_size = 4;
}
if (!info->frame_size) {
if (is_sp_move_ins(&insn))
{
#ifdef CONFIG_CPU_MICROMIPS
if (mm_insn_16bit(ip->halfword[0]))
{
unsigned short tmp;
if (ip->halfword[0] & mm_addiusp_func)
{
tmp = (((ip->halfword[0] >> 1) & 0x1ff) << 2);
info->frame_size = -(signed short)(tmp | ((tmp & 0x100) ? 0xfe00 : 0));
} else {
tmp = (ip->halfword[0] >> 1);
info->frame_size = -(signed short)(tmp & 0xf);
}
} else
#endif
info->frame_size = - ip->i_format.simmediate;
}
continue;
MIPS: Fix issues in backtraces I saw two problems when doing backtraces: The compiler was putting a "fast return" at the top of some functions, before it set up the frame. The backtrace code would stop when it saw a jump instruction, so it would never get to the stack frame setup and would thus misinterpret it. To fix this, don't look for jump instructions until the frame setup has been seen. The assembly code here is: ffffffff80b885a0 <serial8250_handle_irq>: ffffffff80b885a0: c8a00003 bbit0 a1,0x0,ffffffff80b885b0 <serial8250_handle_irq+0x10> ffffffff80b885a4: 0000102d move v0,zero ffffffff80b885a8: 03e00008 jr ra ffffffff80b885ac: 00000000 nop ffffffff80b885b0: 67bdffd0 daddiu sp,sp,-48 ffffffff80b885b4: ffb00008 sd s0,8(sp) The second problem was the compiler was putting the last instruction of the frame save in the delay slot of the jump instruction. If it saved the RA in there, the backtrace could would miss it and misinterpret the frame. To fix this, make sure to process the instruction after the first jump seen. The assembly code for this is: ffffffff80806fd0 <plat_irq_dispatch>: ffffffff80806fd0: 67bdffd0 daddiu sp,sp,-48 ffffffff80806fd4: ffb30020 sd s3,32(sp) ffffffff80806fd8: 24130018 li s3,24 ffffffff80806fdc: ffb20018 sd s2,24(sp) ffffffff80806fe0: 3c12811c lui s2,0x811c ffffffff80806fe4: ffb10010 sd s1,16(sp) ffffffff80806fe8: 3c11811c lui s1,0x811c ffffffff80806fec: ffb00008 sd s0,8(sp) ffffffff80806ff0: 3c10811c lui s0,0x811c ffffffff80806ff4: 08201c03 j ffffffff8080700c <plat_irq_dispa tch+0x3c> ffffffff80806ff8: ffbf0028 sd ra,40(sp) Signed-off-by: Corey Minyard <cminyard@mvista.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16992/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-11 01:27:37 +07:00
} else if (!saw_jump && is_jump_ins(ip)) {
/*
* If we see a jump instruction, we are finished
* with the frame save.
*
* Some functions can have a shortcut return at
* the beginning of the function, so don't start
* looking for jump instruction until we see the
* frame setup.
*
* The RA save instruction can get put into the
* delay slot of the jump instruction, so look
* at the next instruction, too.
*/
saw_jump = true;
continue;
}
MIPS: Calculate microMIPS ra properly when unwinding the stack get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Cc: <stable@vger.kernel.org> # v3.10+ Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-11-07 22:07:06 +07:00
if (info->pc_offset == -1 &&
is_ra_save_ins(&insn, &info->pc_offset))
break;
MIPS: Fix issues in backtraces I saw two problems when doing backtraces: The compiler was putting a "fast return" at the top of some functions, before it set up the frame. The backtrace code would stop when it saw a jump instruction, so it would never get to the stack frame setup and would thus misinterpret it. To fix this, don't look for jump instructions until the frame setup has been seen. The assembly code here is: ffffffff80b885a0 <serial8250_handle_irq>: ffffffff80b885a0: c8a00003 bbit0 a1,0x0,ffffffff80b885b0 <serial8250_handle_irq+0x10> ffffffff80b885a4: 0000102d move v0,zero ffffffff80b885a8: 03e00008 jr ra ffffffff80b885ac: 00000000 nop ffffffff80b885b0: 67bdffd0 daddiu sp,sp,-48 ffffffff80b885b4: ffb00008 sd s0,8(sp) The second problem was the compiler was putting the last instruction of the frame save in the delay slot of the jump instruction. If it saved the RA in there, the backtrace could would miss it and misinterpret the frame. To fix this, make sure to process the instruction after the first jump seen. The assembly code for this is: ffffffff80806fd0 <plat_irq_dispatch>: ffffffff80806fd0: 67bdffd0 daddiu sp,sp,-48 ffffffff80806fd4: ffb30020 sd s3,32(sp) ffffffff80806fd8: 24130018 li s3,24 ffffffff80806fdc: ffb20018 sd s2,24(sp) ffffffff80806fe0: 3c12811c lui s2,0x811c ffffffff80806fe4: ffb10010 sd s1,16(sp) ffffffff80806fe8: 3c11811c lui s1,0x811c ffffffff80806fec: ffb00008 sd s0,8(sp) ffffffff80806ff0: 3c10811c lui s0,0x811c ffffffff80806ff4: 08201c03 j ffffffff8080700c <plat_irq_dispa tch+0x3c> ffffffff80806ff8: ffbf0028 sd ra,40(sp) Signed-off-by: Corey Minyard <cminyard@mvista.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16992/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-08-11 01:27:37 +07:00
if (saw_jump)
break;
}
if (info->frame_size && info->pc_offset >= 0) /* nested */
return 0;
if (info->pc_offset < 0) /* leaf */
return 1;
/* prologue seems bogus... */
err:
return -1;
}
static struct mips_frame_info schedule_mfi __read_mostly;
#ifdef CONFIG_KALLSYMS
static unsigned long get___schedule_addr(void)
{
return kallsyms_lookup_name("__schedule");
}
#else
static unsigned long get___schedule_addr(void)
{
union mips_instruction *ip = (void *)schedule;
int max_insns = 8;
int i;
for (i = 0; i < max_insns; i++, ip++) {
if (ip->j_format.opcode == j_op)
return J_TARGET(ip, ip->j_format.target);
}
return 0;
}
#endif
static int __init frame_info_init(void)
{
unsigned long size = 0;
#ifdef CONFIG_KALLSYMS
unsigned long ofs;
#endif
unsigned long addr;
addr = get___schedule_addr();
if (!addr)
addr = (unsigned long)schedule;
#ifdef CONFIG_KALLSYMS
kallsyms_lookup_size_offset(addr, &size, &ofs);
#endif
schedule_mfi.func = (void *)addr;
schedule_mfi.func_size = size;
get_frame_info(&schedule_mfi);
/*
* Without schedule() frame info, result given by
* thread_saved_pc() and get_wchan() are not reliable.
*/
if (schedule_mfi.pc_offset < 0)
printk("Can't analyze schedule() prologue at %p\n", schedule);
return 0;
}
arch_initcall(frame_info_init);
/*
* Return saved PC of a blocked thread.
*/
unsigned long thread_saved_pc(struct task_struct *tsk)
{
struct thread_struct *t = &tsk->thread;
/* New born processes are a special case */
if (t->reg31 == (unsigned long) ret_from_fork)
return t->reg31;
if (schedule_mfi.pc_offset < 0)
return 0;
return ((unsigned long *)t->reg29)[schedule_mfi.pc_offset];
}
#ifdef CONFIG_KALLSYMS
/* generic stack unwinding function */
unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
unsigned long *sp,
unsigned long pc,
unsigned long *ra)
{
MIPS: IRQ Stack: Unwind IRQ stack onto task stack When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-03-21 21:52:25 +07:00
unsigned long low, high, irq_stack_high;
struct mips_frame_info info;
unsigned long size, ofs;
MIPS: IRQ Stack: Unwind IRQ stack onto task stack When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-03-21 21:52:25 +07:00
struct pt_regs *regs;
int leaf;
if (!stack_page)
return 0;
/*
MIPS: IRQ Stack: Unwind IRQ stack onto task stack When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-03-21 21:52:25 +07:00
* IRQ stacks start at IRQ_STACK_START
* task stacks at THREAD_SIZE - 32
*/
MIPS: IRQ Stack: Unwind IRQ stack onto task stack When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-03-21 21:52:25 +07:00
low = stack_page;
if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) {
high = stack_page + IRQ_STACK_START;
irq_stack_high = high;
} else {
high = stack_page + THREAD_SIZE - 32;
irq_stack_high = 0;
}
/*
* If we reached the top of the interrupt stack, start unwinding
* the interrupted task stack.
*/
if (unlikely(*sp == irq_stack_high)) {
unsigned long task_sp = *(unsigned long *)*sp;
/*
* Check that the pointer saved in the IRQ stack head points to
* something within the stack of the current task
*/
if (!object_is_on_stack((void *)task_sp))
return 0;
/*
* Follow pointer to tasks kernel stack frame where interrupted
* state was saved.
*/
regs = (struct pt_regs *)task_sp;
pc = regs->cp0_epc;
if (!user_mode(regs) && __kernel_text_address(pc)) {
*sp = regs->regs[29];
*ra = regs->regs[31];
return pc;
}
return 0;
}
if (!kallsyms_lookup_size_offset(pc, &size, &ofs))
return 0;
/*
* Return ra if an exception occurred at the first instruction
*/
if (unlikely(ofs == 0)) {
pc = *ra;
*ra = 0;
return pc;
}
info.func = (void *)(pc - ofs);
info.func_size = ofs; /* analyze from start to ofs */
leaf = get_frame_info(&info);
if (leaf < 0)
return 0;
MIPS: IRQ Stack: Unwind IRQ stack onto task stack When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2017-03-21 21:52:25 +07:00
if (*sp < low || *sp + info.frame_size > high)
return 0;
if (leaf)
/*
* For some extreme cases, get_frame_info() can
* consider wrongly a nested function as a leaf
* one. In that cases avoid to return always the
* same value.
*/
pc = pc != *ra ? *ra : 0;
else
pc = ((unsigned long *)(*sp))[info.pc_offset];
*sp += info.frame_size;
*ra = 0;
return __kernel_text_address(pc) ? pc : 0;
}
EXPORT_SYMBOL(unwind_stack_by_address);
/* used by show_backtrace() */
unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
unsigned long pc, unsigned long *ra)
{
unsigned long stack_page = 0;
int cpu;
for_each_possible_cpu(cpu) {
if (on_irq_stack(cpu, *sp)) {
stack_page = (unsigned long)irq_stack[cpu];
break;
}
}
if (!stack_page)
stack_page = (unsigned long)task_stack_page(task);
return unwind_stack_by_address(stack_page, sp, pc, ra);
}
#endif
/*
* get_wchan - a maintenance nightmare^W^Wpain in the ass ...
*/
unsigned long get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
#ifdef CONFIG_KALLSYMS
unsigned long sp;
unsigned long ra = 0;
#endif
if (!task || task == current || task->state == TASK_RUNNING)
goto out;
if (!task_stack_page(task))
goto out;
pc = thread_saved_pc(task);
#ifdef CONFIG_KALLSYMS
sp = task->thread.reg29 + schedule_mfi.frame_size;
while (in_sched_functions(pc))
pc = unwind_stack(task, &sp, pc, &ra);
#endif
out:
return pc;
}
/*
* Don't forget that the stack pointer must be aligned on a 8 bytes
* boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
*/
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() & ~PAGE_MASK;
return sp & ALMASK;
}
static void arch_dump_stack(void *info)
{
struct pt_regs *regs;
regs = get_irq_regs();
if (regs)
show_regs(regs);
dump_stack();
}
nmi_backtrace: add more trigger_*_cpu_backtrace() methods Patch series "improvements to the nmi_backtrace code" v9. This patch series modifies the trigger_xxx_backtrace() NMI-based remote backtracing code to make it more flexible, and makes a few small improvements along the way. The motivation comes from the task isolation code, where there are scenarios where we want to be able to diagnose a case where some cpu is about to interrupt a task-isolated cpu. It can be helpful to see both where the interrupting cpu is, and also an approximation of where the cpu that is being interrupted is. The nmi_backtrace framework allows us to discover the stack of the interrupted cpu. I've tested that the change works as desired on tile, and build-tested x86, arm, mips, and sparc64. For x86 I confirmed that the generic cpuidle stuff as well as the architecture-specific routines are in the new cpuidle section. For arm, mips, and sparc I just build-tested it and made sure the generic cpuidle routines were in the new cpuidle section, but I didn't attempt to figure out which the platform-specific idle routines might be. That might be more usefully done by someone with platform experience in follow-up patches. This patch (of 4): Currently you can only request a backtrace of either all cpus, or all cpus but yourself. It can also be helpful to request a remote backtrace of a single cpu, and since we want that, the logical extension is to support a cpumask as the underlying primitive. This change modifies the existing lib/nmi_backtrace.c code to take a cpumask as its basic primitive, and modifies the linux/nmi.h code to use the new "cpumask" method instead. The existing clients of nmi_backtrace (arm and x86) are converted to using the new cpumask approach in this change. The other users of the backtracing API (sparc64 and mips) are converted to use the cpumask approach rather than the all/allbutself approach. The mips code ignored the "include_self" boolean but with this change it will now also dump a local backtrace if requested. Link: http://lkml.kernel.org/r/1472487169-14923-2-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com> Tested-by: Daniel Thompson <daniel.thompson@linaro.org> [arm] Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 07:02:45 +07:00
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_backtrace: add more trigger_*_cpu_backtrace() methods Patch series "improvements to the nmi_backtrace code" v9. This patch series modifies the trigger_xxx_backtrace() NMI-based remote backtracing code to make it more flexible, and makes a few small improvements along the way. The motivation comes from the task isolation code, where there are scenarios where we want to be able to diagnose a case where some cpu is about to interrupt a task-isolated cpu. It can be helpful to see both where the interrupting cpu is, and also an approximation of where the cpu that is being interrupted is. The nmi_backtrace framework allows us to discover the stack of the interrupted cpu. I've tested that the change works as desired on tile, and build-tested x86, arm, mips, and sparc64. For x86 I confirmed that the generic cpuidle stuff as well as the architecture-specific routines are in the new cpuidle section. For arm, mips, and sparc I just build-tested it and made sure the generic cpuidle routines were in the new cpuidle section, but I didn't attempt to figure out which the platform-specific idle routines might be. That might be more usefully done by someone with platform experience in follow-up patches. This patch (of 4): Currently you can only request a backtrace of either all cpus, or all cpus but yourself. It can also be helpful to request a remote backtrace of a single cpu, and since we want that, the logical extension is to support a cpumask as the underlying primitive. This change modifies the existing lib/nmi_backtrace.c code to take a cpumask as its basic primitive, and modifies the linux/nmi.h code to use the new "cpumask" method instead. The existing clients of nmi_backtrace (arm and x86) are converted to using the new cpumask approach in this change. The other users of the backtracing API (sparc64 and mips) are converted to use the cpumask approach rather than the all/allbutself approach. The mips code ignored the "include_self" boolean but with this change it will now also dump a local backtrace if requested. Link: http://lkml.kernel.org/r/1472487169-14923-2-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com> Tested-by: Daniel Thompson <daniel.thompson@linaro.org> [arm] Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 07:02:45 +07:00
long this_cpu = get_cpu();
if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
dump_stack();
smp_call_function_many(mask, arch_dump_stack, NULL, 1);
put_cpu();
}
2015-01-08 19:17:37 +07:00
int mips_get_process_fp_mode(struct task_struct *task)
{
int value = 0;
if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS))
value |= PR_FP_MODE_FR;
if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS))
value |= PR_FP_MODE_FRE;
return value;
}
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 18:43:58 +07:00
static void prepare_for_fp_mode_switch(void *info)
{
struct mm_struct *mm = info;
if (current->mm == mm)
lose_fpu(1);
}
2015-01-08 19:17:37 +07:00
int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
{
const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
struct task_struct *t;
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 18:43:58 +07:00
int max_users;
2015-01-08 19:17:37 +07:00
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
/* Avoid inadvertently triggering emulation */
if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
!(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
2015-01-08 19:17:37 +07:00
return -EOPNOTSUPP;
if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre)
2015-01-08 19:17:37 +07:00
return -EOPNOTSUPP;
/* FR = 0 not supported in MIPS R6 */
if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6)
return -EOPNOTSUPP;
/* Proceed with the mode switch */
preempt_disable();
2015-01-08 19:17:37 +07:00
/* Save FP & vector context, then disable FPU & MSA */
if (task->signal == current->signal)
lose_fpu(1);
/* Prevent any threads from obtaining live FP context */
atomic_set(&task->mm->context.fp_mode_switching, 1);
smp_mb__after_atomic();
/*
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 18:43:58 +07:00
* If there are multiple online CPUs then force any which are running
* threads in this process to lose their FPU context, which they can't
* regain until fp_mode_switching is cleared later.
2015-01-08 19:17:37 +07:00
*/
if (num_online_cpus() > 1) {
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 18:43:58 +07:00
/* No need to send an IPI for the local CPU */
max_users = (task->mm == current->mm) ? 1 : 0;
2015-01-08 19:17:37 +07:00
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 18:43:58 +07:00
if (atomic_read(&current->mm->mm_users) > max_users)
smp_call_function(prepare_for_fp_mode_switch,
(void *)current->mm, 1);
2015-01-08 19:17:37 +07:00
}
/*
* There are now no threads of the process with live FP context, so it
* is safe to proceed with the FP mode switch.
*/
for_each_thread(task, t) {
/* Update desired FP register width */
if (value & PR_FP_MODE_FR) {
clear_tsk_thread_flag(t, TIF_32BIT_FPREGS);
} else {
set_tsk_thread_flag(t, TIF_32BIT_FPREGS);
clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE);
}
/* Update desired FP single layout */
if (value & PR_FP_MODE_FRE)
set_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
else
clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
}
/* Allow threads to use FP again */
atomic_set(&task->mm->context.fp_mode_switching, 0);
preempt_enable();
2015-01-08 19:17:37 +07:00
return 0;
}
#if defined(CONFIG_32BIT) || defined(CONFIG_MIPS32_O32)
void mips_dump_regs32(u32 *uregs, const struct pt_regs *regs)
{
unsigned int i;
for (i = MIPS32_EF_R1; i <= MIPS32_EF_R31; i++) {
/* k0/k1 are copied as zero. */
if (i == MIPS32_EF_R26 || i == MIPS32_EF_R27)
uregs[i] = 0;
else
uregs[i] = regs->regs[i - MIPS32_EF_R0];
}
uregs[MIPS32_EF_LO] = regs->lo;
uregs[MIPS32_EF_HI] = regs->hi;
uregs[MIPS32_EF_CP0_EPC] = regs->cp0_epc;
uregs[MIPS32_EF_CP0_BADVADDR] = regs->cp0_badvaddr;
uregs[MIPS32_EF_CP0_STATUS] = regs->cp0_status;
uregs[MIPS32_EF_CP0_CAUSE] = regs->cp0_cause;
}
#endif /* CONFIG_32BIT || CONFIG_MIPS32_O32 */
#ifdef CONFIG_64BIT
void mips_dump_regs64(u64 *uregs, const struct pt_regs *regs)
{
unsigned int i;
for (i = MIPS64_EF_R1; i <= MIPS64_EF_R31; i++) {
/* k0/k1 are copied as zero. */
if (i == MIPS64_EF_R26 || i == MIPS64_EF_R27)
uregs[i] = 0;
else
uregs[i] = regs->regs[i - MIPS64_EF_R0];
}
uregs[MIPS64_EF_LO] = regs->lo;
uregs[MIPS64_EF_HI] = regs->hi;
uregs[MIPS64_EF_CP0_EPC] = regs->cp0_epc;
uregs[MIPS64_EF_CP0_BADVADDR] = regs->cp0_badvaddr;
uregs[MIPS64_EF_CP0_STATUS] = regs->cp0_status;
uregs[MIPS64_EF_CP0_CAUSE] = regs->cp0_cause;
}
#endif /* CONFIG_64BIT */