mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
2464cc4c34
After a treclaim, we expect to be in non-transactional state. If we
don't clear the current thread's MSR[TS] before we get preempted, then
tm_recheckpoint_new_task() will recheckpoint and we get rescheduled in
suspended transaction state.
When handling a signal caught in transactional state,
handle_rt_signal64() calls get_tm_stackpointer() that treclaims the
transaction using tm_reclaim_current() but without clearing the
thread's MSR[TS]. This can cause the TM Bad Thing exception below if
later we pagefault and get preempted trying to access the user's
sigframe, using __put_user(). Afterwards, when we are rescheduled back
into do_page_fault() (but now in suspended state since the thread's
MSR[TS] was not cleared), upon executing 'rfid' after completion of
the page fault handling, the exception is raised because a transition
from suspended to non-transactional state is invalid.
Unexpected TM Bad Thing exception at c00000000000de44 (msr 0x8000000302a03031) tm_scratch=800000010280b033
Oops: Unrecoverable exception, sig: 6 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 25 PID: 15547 Comm: a.out Not tainted 5.4.0-rc2 #32
NIP: c00000000000de44 LR: c000000000034728 CTR: 0000000000000000
REGS: c00000003fe7bd70 TRAP: 0700 Not tainted (5.4.0-rc2)
MSR: 8000000302a03031 <SF,VEC,VSX,FP,ME,IR,DR,LE,TM[SE]> CR: 44000884 XER: 00000000
CFAR: c00000000000dda4 IRQMASK: 0
PACATMSCRATCH: 800000010280b033
GPR00: c000000000034728 c000000f65a17c80 c000000001662800 00007fffacf3fd78
GPR04: 0000000000001000 0000000000001000 0000000000000000 c000000f611f8af0
GPR08: 0000000000000000 0000000078006001 0000000000000000 000c000000000000
GPR12: c000000f611f84b0 c00000003ffcb200 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000f611f8140
GPR24: 0000000000000000 00007fffacf3fd68 c000000f65a17d90 c000000f611f7800
GPR28: c000000f65a17e90 c000000f65a17e90 c000000001685e18 00007fffacf3f000
NIP [c00000000000de44] fast_exception_return+0xf4/0x1b0
LR [c000000000034728] handle_rt_signal64+0x78/0xc50
Call Trace:
[c000000f65a17c80] [c000000000034710] handle_rt_signal64+0x60/0xc50 (unreliable)
[c000000f65a17d30] [c000000000023640] do_notify_resume+0x330/0x460
[c000000f65a17e20] [c00000000000dcc4] ret_from_except_lite+0x70/0x74
Instruction dump:
7c4ff120 e8410170 7c5a03a6 38400000 f8410060 e8010070 e8410080 e8610088
60000000 60000000 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed0989
---[ end trace 93094aa44b442f87 ]---
The simplified sequence of events that triggers the above exception is:
... # userspace in NON-TRANSACTIONAL state
tbegin # userspace in TRANSACTIONAL state
signal delivery # kernelspace in SUSPENDED state
handle_rt_signal64()
get_tm_stackpointer()
treclaim # kernelspace in NON-TRANSACTIONAL state
__put_user()
page fault happens. We will never get back here because of the TM Bad Thing exception.
page fault handling kicks in and we voluntarily preempt ourselves
do_page_fault()
__schedule()
__switch_to(other_task)
our task is rescheduled and we recheckpoint because the thread's MSR[TS] was not cleared
__switch_to(our_task)
switch_to_tm()
tm_recheckpoint_new_task()
trechkpt # kernelspace in SUSPENDED state
The page fault handling resumes, but now we are in suspended transaction state
do_page_fault() completes
rfid <----- trying to get back where the page fault happened (we were non-transactional back then)
TM Bad Thing # illegal transition from suspended to non-transactional
This patch fixes that issue by clearing the current thread's MSR[TS]
just after treclaim in get_tm_stackpointer() so that we stay in
non-transactional state in case we are preempted. In order to make
treclaim and clearing the thread's MSR[TS] atomic from a preemption
perspective when CONFIG_PREEMPT is set, preempt_disable/enable() is
used. It's also necessary to save the previous value of the thread's
MSR before get_tm_stackpointer() is called so that it can be exposed
to the signal handler later in setup_tm_sigcontexts() to inform the
userspace MSR at the moment of the signal delivery.
Found with tm-signal-context-force-tm kernel selftest.
Fixes: 2b0a576d15
("powerpc: Add new transactional memory state to the signal context")
Cc: stable@vger.kernel.org # v3.9
Signed-off-by: Gustavo Luiz Duarte <gustavold@linux.ibm.com>
Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200211033831.11165-1-gustavold@linux.ibm.com
227 lines
6.2 KiB
C
227 lines
6.2 KiB
C
/*
|
|
* Common signal handling code for both 32 and 64 bits
|
|
*
|
|
* Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
|
|
* Extracted from signal_32.c and signal_64.c
|
|
*
|
|
* This file is subject to the terms and conditions of the GNU General
|
|
* Public License. See the file README.legal in the main directory of
|
|
* this archive for more details.
|
|
*/
|
|
|
|
#include <linux/tracehook.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/uprobes.h>
|
|
#include <linux/key.h>
|
|
#include <linux/context_tracking.h>
|
|
#include <linux/livepatch.h>
|
|
#include <linux/syscalls.h>
|
|
#include <asm/hw_breakpoint.h>
|
|
#include <linux/uaccess.h>
|
|
#include <asm/unistd.h>
|
|
#include <asm/debug.h>
|
|
#include <asm/tm.h>
|
|
|
|
#include "signal.h"
|
|
|
|
/* Log an error when sending an unhandled signal to a process. Controlled
|
|
* through debug.exception-trace sysctl.
|
|
*/
|
|
|
|
int show_unhandled_signals = 1;
|
|
|
|
/*
|
|
* Allocate space for the signal frame
|
|
*/
|
|
void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
|
|
size_t frame_size, int is_32)
|
|
{
|
|
unsigned long oldsp, newsp;
|
|
|
|
/* Default to using normal stack */
|
|
oldsp = get_clean_sp(sp, is_32);
|
|
oldsp = sigsp(oldsp, ksig);
|
|
newsp = (oldsp - frame_size) & ~0xFUL;
|
|
|
|
/* Check access */
|
|
if (!access_ok((void __user *)newsp, oldsp - newsp))
|
|
return NULL;
|
|
|
|
return (void __user *)newsp;
|
|
}
|
|
|
|
static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
|
|
int has_handler)
|
|
{
|
|
unsigned long ret = regs->gpr[3];
|
|
int restart = 1;
|
|
|
|
/* syscall ? */
|
|
if (TRAP(regs) != 0x0C00)
|
|
return;
|
|
|
|
/* error signalled ? */
|
|
if (!(regs->ccr & 0x10000000))
|
|
return;
|
|
|
|
switch (ret) {
|
|
case ERESTART_RESTARTBLOCK:
|
|
case ERESTARTNOHAND:
|
|
/* ERESTARTNOHAND means that the syscall should only be
|
|
* restarted if there was no handler for the signal, and since
|
|
* we only get here if there is a handler, we dont restart.
|
|
*/
|
|
restart = !has_handler;
|
|
break;
|
|
case ERESTARTSYS:
|
|
/* ERESTARTSYS means to restart the syscall if there is no
|
|
* handler or the handler was registered with SA_RESTART
|
|
*/
|
|
restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0;
|
|
break;
|
|
case ERESTARTNOINTR:
|
|
/* ERESTARTNOINTR means that the syscall should be
|
|
* called again after the signal handler returns.
|
|
*/
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
if (restart) {
|
|
if (ret == ERESTART_RESTARTBLOCK)
|
|
regs->gpr[0] = __NR_restart_syscall;
|
|
else
|
|
regs->gpr[3] = regs->orig_gpr3;
|
|
regs->nip -= 4;
|
|
regs->result = 0;
|
|
} else {
|
|
regs->result = -EINTR;
|
|
regs->gpr[3] = EINTR;
|
|
regs->ccr |= 0x10000000;
|
|
}
|
|
}
|
|
|
|
static void do_signal(struct task_struct *tsk)
|
|
{
|
|
sigset_t *oldset = sigmask_to_save();
|
|
struct ksignal ksig = { .sig = 0 };
|
|
int ret;
|
|
int is32 = is_32bit_task();
|
|
|
|
BUG_ON(tsk != current);
|
|
|
|
get_signal(&ksig);
|
|
|
|
/* Is there any syscall restart business here ? */
|
|
check_syscall_restart(tsk->thread.regs, &ksig.ka, ksig.sig > 0);
|
|
|
|
if (ksig.sig <= 0) {
|
|
/* No signal to deliver -- put the saved sigmask back */
|
|
restore_saved_sigmask();
|
|
tsk->thread.regs->trap = 0;
|
|
return; /* no signals delivered */
|
|
}
|
|
|
|
#ifndef CONFIG_PPC_ADV_DEBUG_REGS
|
|
/*
|
|
* Reenable the DABR before delivering the signal to
|
|
* user space. The DABR will have been cleared if it
|
|
* triggered inside the kernel.
|
|
*/
|
|
if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type)
|
|
__set_breakpoint(&tsk->thread.hw_brk);
|
|
#endif
|
|
/* Re-enable the breakpoints for the signal stack */
|
|
thread_change_pc(tsk, tsk->thread.regs);
|
|
|
|
rseq_signal_deliver(&ksig, tsk->thread.regs);
|
|
|
|
if (is32) {
|
|
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
|
|
ret = handle_rt_signal32(&ksig, oldset, tsk);
|
|
else
|
|
ret = handle_signal32(&ksig, oldset, tsk);
|
|
} else {
|
|
ret = handle_rt_signal64(&ksig, oldset, tsk);
|
|
}
|
|
|
|
tsk->thread.regs->trap = 0;
|
|
signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
|
|
}
|
|
|
|
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
|
|
{
|
|
user_exit();
|
|
|
|
/* Check valid addr_limit, TIF check is done there */
|
|
addr_limit_user_check();
|
|
|
|
if (thread_info_flags & _TIF_UPROBE)
|
|
uprobe_notify_resume(regs);
|
|
|
|
if (thread_info_flags & _TIF_PATCH_PENDING)
|
|
klp_update_patch_state(current);
|
|
|
|
if (thread_info_flags & _TIF_SIGPENDING) {
|
|
BUG_ON(regs != current->thread.regs);
|
|
do_signal(current);
|
|
}
|
|
|
|
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
|
clear_thread_flag(TIF_NOTIFY_RESUME);
|
|
tracehook_notify_resume(regs);
|
|
rseq_handle_notify_resume(NULL, regs);
|
|
}
|
|
|
|
user_enter();
|
|
}
|
|
|
|
unsigned long get_tm_stackpointer(struct task_struct *tsk)
|
|
{
|
|
/* When in an active transaction that takes a signal, we need to be
|
|
* careful with the stack. It's possible that the stack has moved back
|
|
* up after the tbegin. The obvious case here is when the tbegin is
|
|
* called inside a function that returns before a tend. In this case,
|
|
* the stack is part of the checkpointed transactional memory state.
|
|
* If we write over this non transactionally or in suspend, we are in
|
|
* trouble because if we get a tm abort, the program counter and stack
|
|
* pointer will be back at the tbegin but our in memory stack won't be
|
|
* valid anymore.
|
|
*
|
|
* To avoid this, when taking a signal in an active transaction, we
|
|
* need to use the stack pointer from the checkpointed state, rather
|
|
* than the speculated state. This ensures that the signal context
|
|
* (written tm suspended) will be written below the stack required for
|
|
* the rollback. The transaction is aborted because of the treclaim,
|
|
* so any memory written between the tbegin and the signal will be
|
|
* rolled back anyway.
|
|
*
|
|
* For signals taken in non-TM or suspended mode, we use the
|
|
* normal/non-checkpointed stack pointer.
|
|
*/
|
|
|
|
unsigned long ret = tsk->thread.regs->gpr[1];
|
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
BUG_ON(tsk != current);
|
|
|
|
if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
|
|
preempt_disable();
|
|
tm_reclaim_current(TM_CAUSE_SIGNAL);
|
|
if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
|
|
ret = tsk->thread.ckpt_regs.gpr[1];
|
|
|
|
/*
|
|
* If we treclaim, we must clear the current thread's TM bits
|
|
* before re-enabling preemption. Otherwise we might be
|
|
* preempted and have the live MSR[TS] changed behind our back
|
|
* (tm_recheckpoint_new_task() would recheckpoint). Besides, we
|
|
* enter the signal handler in non-transactional state.
|
|
*/
|
|
tsk->thread.regs->msr &= ~MSR_TS_MASK;
|
|
preempt_enable();
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|