linux_dsm_epyc7002/arch/sparc/kernel/ptrace_64.c

1202 lines
28 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* ptrace.c: Sparc process tracing support.
*
* Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
* Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*
* Based upon code written by Ross Biro, Linus Torvalds, Bob Manson,
* and David Mosberger.
*
* Added Linux support -miguel (weird, eh?, the original code was meant
* to emulate SunOS).
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/smp.h>
#include <linux/security.h>
#include <linux/seccomp.h>
#include <linux/audit.h>
#include <linux/signal.h>
#include <linux/regset.h>
#include <linux/tracehook.h>
#include <trace/syscall.h>
#include <linux/compat.h>
#include <linux/elf.h>
#include <linux/context_tracking.h>
#include <asm/asi.h>
#include <asm/pgtable.h>
#include <linux/uaccess.h>
#include <asm/psrcompat.h>
#include <asm/visasm.h>
#include <asm/spitfire.h>
#include <asm/page.h>
#include <asm/cpudata.h>
#include <asm/cacheflush.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
#include "entry.h"
/* #define ALLOW_INIT_TRACING */
struct pt_regs_offset {
const char *name;
int offset;
};
#define REG_OFFSET_NAME(n, r) \
{.name = n, .offset = (PT_V9_##r)}
#define REG_OFFSET_END {.name = NULL, .offset = 0}
static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_NAME("g0", G0),
REG_OFFSET_NAME("g1", G1),
REG_OFFSET_NAME("g2", G2),
REG_OFFSET_NAME("g3", G3),
REG_OFFSET_NAME("g4", G4),
REG_OFFSET_NAME("g5", G5),
REG_OFFSET_NAME("g6", G6),
REG_OFFSET_NAME("g7", G7),
REG_OFFSET_NAME("i0", I0),
REG_OFFSET_NAME("i1", I1),
REG_OFFSET_NAME("i2", I2),
REG_OFFSET_NAME("i3", I3),
REG_OFFSET_NAME("i4", I4),
REG_OFFSET_NAME("i5", I5),
REG_OFFSET_NAME("i6", I6),
REG_OFFSET_NAME("i7", I7),
REG_OFFSET_NAME("tstate", TSTATE),
REG_OFFSET_NAME("pc", TPC),
REG_OFFSET_NAME("npc", TNPC),
REG_OFFSET_NAME("y", Y),
REG_OFFSET_NAME("lr", I7),
REG_OFFSET_END,
};
/*
* Called by kernel/ptrace.c when detaching..
*
* Make sure single step bits etc are not set.
*/
void ptrace_disable(struct task_struct *child)
{
/* nothing to do */
}
/* To get the necessary page struct, access_process_vm() first calls
* get_user_pages(). This has done a flush_dcache_page() on the
* accessed page. Then our caller (copy_{to,from}_user_page()) did
* to memcpy to read/write the data from that page.
*
* Now, the only thing we have to do is:
* 1) flush the D-cache if it's possible than an illegal alias
* has been created
* 2) flush the I-cache if this is pre-cheetah and we did a write
*/
void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
unsigned long uaddr, void *kaddr,
unsigned long len, int write)
{
BUG_ON(len > PAGE_SIZE);
if (tlb_type == hypervisor)
return;
preempt_disable();
#ifdef DCACHE_ALIASING_POSSIBLE
/* If bit 13 of the kernel address we used to access the
* user page is the same as the virtual address that page
* is mapped to in the user's address space, we can skip the
* D-cache flush.
*/
if ((uaddr ^ (unsigned long) kaddr) & (1UL << 13)) {
unsigned long start = __pa(kaddr);
unsigned long end = start + len;
unsigned long dcache_line_size;
dcache_line_size = local_cpu_data().dcache_line_size;
if (tlb_type == spitfire) {
for (; start < end; start += dcache_line_size)
spitfire_put_dcache_tag(start & 0x3fe0, 0x0);
} else {
start &= ~(dcache_line_size - 1);
for (; start < end; start += dcache_line_size)
__asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t"
"membar #Sync"
: /* no outputs */
: "r" (start),
"i" (ASI_DCACHE_INVALIDATE));
}
}
#endif
if (write && tlb_type == spitfire) {
unsigned long start = (unsigned long) kaddr;
unsigned long end = start + len;
unsigned long icache_line_size;
icache_line_size = local_cpu_data().icache_line_size;
for (; start < end; start += icache_line_size)
flushi(start);
}
preempt_enable();
}
EXPORT_SYMBOL_GPL(flush_ptrace_access);
static int get_from_target(struct task_struct *target, unsigned long uaddr,
void *kbuf, int len)
{
if (target == current) {
if (copy_from_user(kbuf, (void __user *) uaddr, len))
return -EFAULT;
} else {
int len2 = access_process_vm(target, uaddr, kbuf, len,
FOLL_FORCE);
if (len2 != len)
return -EFAULT;
}
return 0;
}
static int set_to_target(struct task_struct *target, unsigned long uaddr,
void *kbuf, int len)
{
if (target == current) {
if (copy_to_user((void __user *) uaddr, kbuf, len))
return -EFAULT;
} else {
int len2 = access_process_vm(target, uaddr, kbuf, len,
FOLL_FORCE | FOLL_WRITE);
if (len2 != len)
return -EFAULT;
}
return 0;
}
static int regwindow64_get(struct task_struct *target,
const struct pt_regs *regs,
struct reg_window *wbuf)
{
unsigned long rw_addr = regs->u_regs[UREG_I6];
sparc64: Make montmul/montsqr/mpmul usable in 32-bit threads. The Montgomery Multiply, Montgomery Square, and Multiple-Precision Multiply instructions work by loading a combination of the floating point and multiple register windows worth of integer registers with the inputs. These values are 64-bit. But for 32-bit userland processes we only save the low 32-bits of each integer register during a register spill. This is because the register window save area is in the user stack and has a fixed layout. Therefore, the only way to use these instruction in 32-bit mode is to perform the following sequence: 1) Load the top-32bits of a choosen integer register with a sentinel, say "-1". This will be in the outer-most register window. The idea is that we're trying to see if the outer-most register window gets spilled, and thus the 64-bit values were truncated. 2) Load all the inputs for the montmul/montsqr/mpmul instruction, down to the inner-most register window. 3) Execute the opcode. 4) Traverse back up to the outer-most register window. 5) Check the sentinel, if it's still "-1" store the results. Otherwise retry the entire sequence. This retry is extremely troublesome. If you're just unlucky and an interrupt or other trap happens, it'll push that outer-most window to the stack and clear the sentinel when we restore it. We could retry forever and never make forward progress if interrupts arrive at a fast enough rate (consider perf events as one example). So we have do limited retries and fallback to software which is extremely non-deterministic. Luckily it's very straightforward to provide a mechanism to let 32-bit applications use a 64-bit stack. Stacks in 64-bit mode are biased by 2047 bytes, which means that the lowest bit is set in the actual %sp register value. So if we see bit zero set in a 32-bit application's stack we treat it like a 64-bit stack. Runtime detection of such a facility is tricky, and cumbersome at best. For example, just trying to use a biased stack and seeing if it works is hard to recover from (the signal handler will need to use an alt stack, plus something along the lines of longjmp). Therefore, we add a system call to report a bitmask of arch specific features like this in a cheap and less hairy way. With help from Andy Polyakov. Signed-off-by: David S. Miller <davem@davemloft.net>
2012-10-27 05:18:37 +07:00
if (!test_thread_64bit_stack(rw_addr)) {
struct reg_window32 win32;
int i;
if (get_from_target(target, rw_addr, &win32, sizeof(win32)))
return -EFAULT;
for (i = 0; i < 8; i++)
wbuf->locals[i] = win32.locals[i];
for (i = 0; i < 8; i++)
wbuf->ins[i] = win32.ins[i];
} else {
rw_addr += STACK_BIAS;
if (get_from_target(target, rw_addr, wbuf, sizeof(*wbuf)))
return -EFAULT;
}
return 0;
}
static int regwindow64_set(struct task_struct *target,
const struct pt_regs *regs,
struct reg_window *wbuf)
{
unsigned long rw_addr = regs->u_regs[UREG_I6];
sparc64: Make montmul/montsqr/mpmul usable in 32-bit threads. The Montgomery Multiply, Montgomery Square, and Multiple-Precision Multiply instructions work by loading a combination of the floating point and multiple register windows worth of integer registers with the inputs. These values are 64-bit. But for 32-bit userland processes we only save the low 32-bits of each integer register during a register spill. This is because the register window save area is in the user stack and has a fixed layout. Therefore, the only way to use these instruction in 32-bit mode is to perform the following sequence: 1) Load the top-32bits of a choosen integer register with a sentinel, say "-1". This will be in the outer-most register window. The idea is that we're trying to see if the outer-most register window gets spilled, and thus the 64-bit values were truncated. 2) Load all the inputs for the montmul/montsqr/mpmul instruction, down to the inner-most register window. 3) Execute the opcode. 4) Traverse back up to the outer-most register window. 5) Check the sentinel, if it's still "-1" store the results. Otherwise retry the entire sequence. This retry is extremely troublesome. If you're just unlucky and an interrupt or other trap happens, it'll push that outer-most window to the stack and clear the sentinel when we restore it. We could retry forever and never make forward progress if interrupts arrive at a fast enough rate (consider perf events as one example). So we have do limited retries and fallback to software which is extremely non-deterministic. Luckily it's very straightforward to provide a mechanism to let 32-bit applications use a 64-bit stack. Stacks in 64-bit mode are biased by 2047 bytes, which means that the lowest bit is set in the actual %sp register value. So if we see bit zero set in a 32-bit application's stack we treat it like a 64-bit stack. Runtime detection of such a facility is tricky, and cumbersome at best. For example, just trying to use a biased stack and seeing if it works is hard to recover from (the signal handler will need to use an alt stack, plus something along the lines of longjmp). Therefore, we add a system call to report a bitmask of arch specific features like this in a cheap and less hairy way. With help from Andy Polyakov. Signed-off-by: David S. Miller <davem@davemloft.net>
2012-10-27 05:18:37 +07:00
if (!test_thread_64bit_stack(rw_addr)) {
struct reg_window32 win32;
int i;
for (i = 0; i < 8; i++)
win32.locals[i] = wbuf->locals[i];
for (i = 0; i < 8; i++)
win32.ins[i] = wbuf->ins[i];
if (set_to_target(target, rw_addr, &win32, sizeof(win32)))
return -EFAULT;
} else {
rw_addr += STACK_BIAS;
if (set_to_target(target, rw_addr, wbuf, sizeof(*wbuf)))
return -EFAULT;
}
return 0;
}
enum sparc_regset {
REGSET_GENERAL,
REGSET_FP,
};
static int genregs64_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
const struct pt_regs *regs = task_pt_regs(target);
int ret;
if (target == current)
flushw_user();
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
regs->u_regs,
0, 16 * sizeof(u64));
if (!ret && count && pos < (32 * sizeof(u64))) {
struct reg_window window;
if (regwindow64_get(target, regs, &window))
return -EFAULT;
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&window,
16 * sizeof(u64),
32 * sizeof(u64));
}
if (!ret) {
/* TSTATE, TPC, TNPC */
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&regs->tstate,
32 * sizeof(u64),
35 * sizeof(u64));
}
if (!ret) {
unsigned long y = regs->y;
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&y,
35 * sizeof(u64),
36 * sizeof(u64));
}
if (!ret) {
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
36 * sizeof(u64), -1);
}
return ret;
}
static int genregs64_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct pt_regs *regs = task_pt_regs(target);
int ret;
if (target == current)
flushw_user();
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
regs->u_regs,
0, 16 * sizeof(u64));
if (!ret && count && pos < (32 * sizeof(u64))) {
struct reg_window window;
if (regwindow64_get(target, regs, &window))
return -EFAULT;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&window,
16 * sizeof(u64),
32 * sizeof(u64));
if (!ret &&
regwindow64_set(target, regs, &window))
return -EFAULT;
}
if (!ret && count > 0) {
unsigned long tstate;
/* TSTATE */
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&tstate,
32 * sizeof(u64),
33 * sizeof(u64));
if (!ret) {
sparc: Fix debugger syscall restart interactions. So, forever, we've had this ptrace_signal_deliver implementation which tries to handle all of the nasties that can occur when the debugger looks at a process about to take a signal. It's meant to address all of these issues inside of the kernel so that the debugger need not be mindful of such things. Problem is, this doesn't work. The idea was that we should do the syscall restart business first, so that the debugger captures that state. Otherwise, if the debugger for example saves the child's state, makes the child execute something else, then restores the saved state, we won't handle the syscall restart properly because we lose the "we're in a syscall" state. The code here worked for most cases, but if the debugger actually passes the signal through to the child unaltered, it's possible that we would do a syscall restart when we shouldn't have. In particular this breaks the case of debugging a process under a gdb which is being debugged by yet another gdb. gdb uses sigsuspend to wait for SIGCHLD of the inferior, but if gdb itself is being debugged by a top-level gdb we get a ptrace_stop(). The top-level gdb does a PTRACE_CONT with SIGCHLD to let the inferior gdb see the signal. But ptrace_signal_deliver() assumed the debugger would cancel out the signal and therefore did a syscall restart, because the return error was ERESTARTNOHAND. Fix this by simply making ptrace_signal_deliver() a nop, and providing a way for the debugger to control system call restarting properly: 1) Report a "in syscall" software bit in regs->{tstate,psr}. It is set early on in trap entry to a system call and is fully visible to the debugger via ptrace() and regsets. 2) Test this bit right before doing a syscall restart. We have to do a final recheck right after get_signal_to_deliver() in case the debugger cleared the bit during ptrace_stop(). 3) Clear the bit in trap return so we don't accidently try to set that bit in the real register. As a result we also get a ptrace_{is,clear}_syscall() for sparc32 just like sparc64 has. M68K has this same exact bug, and is now the only other user of the ptrace_signal_deliver hook. It needs to be fixed in the same exact way as sparc. Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-11 16:07:19 +07:00
/* Only the condition codes and the "in syscall"
* state can be modified in the %tstate register.
*/
sparc: Fix debugger syscall restart interactions. So, forever, we've had this ptrace_signal_deliver implementation which tries to handle all of the nasties that can occur when the debugger looks at a process about to take a signal. It's meant to address all of these issues inside of the kernel so that the debugger need not be mindful of such things. Problem is, this doesn't work. The idea was that we should do the syscall restart business first, so that the debugger captures that state. Otherwise, if the debugger for example saves the child's state, makes the child execute something else, then restores the saved state, we won't handle the syscall restart properly because we lose the "we're in a syscall" state. The code here worked for most cases, but if the debugger actually passes the signal through to the child unaltered, it's possible that we would do a syscall restart when we shouldn't have. In particular this breaks the case of debugging a process under a gdb which is being debugged by yet another gdb. gdb uses sigsuspend to wait for SIGCHLD of the inferior, but if gdb itself is being debugged by a top-level gdb we get a ptrace_stop(). The top-level gdb does a PTRACE_CONT with SIGCHLD to let the inferior gdb see the signal. But ptrace_signal_deliver() assumed the debugger would cancel out the signal and therefore did a syscall restart, because the return error was ERESTARTNOHAND. Fix this by simply making ptrace_signal_deliver() a nop, and providing a way for the debugger to control system call restarting properly: 1) Report a "in syscall" software bit in regs->{tstate,psr}. It is set early on in trap entry to a system call and is fully visible to the debugger via ptrace() and regsets. 2) Test this bit right before doing a syscall restart. We have to do a final recheck right after get_signal_to_deliver() in case the debugger cleared the bit during ptrace_stop(). 3) Clear the bit in trap return so we don't accidently try to set that bit in the real register. As a result we also get a ptrace_{is,clear}_syscall() for sparc32 just like sparc64 has. M68K has this same exact bug, and is now the only other user of the ptrace_signal_deliver hook. It needs to be fixed in the same exact way as sparc. Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-11 16:07:19 +07:00
tstate &= (TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL);
regs->tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL);
regs->tstate |= tstate;
}
}
if (!ret) {
/* TPC, TNPC */
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&regs->tpc,
33 * sizeof(u64),
35 * sizeof(u64));
}
if (!ret) {
unsigned long y = regs->y;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&y,
35 * sizeof(u64),
36 * sizeof(u64));
if (!ret)
regs->y = y;
}
if (!ret)
ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
36 * sizeof(u64), -1);
return ret;
}
static int fpregs64_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
const unsigned long *fpregs = task_thread_info(target)->fpregs;
unsigned long fprs, fsr, gsr;
int ret;
if (target == current)
save_and_clear_fpu();
fprs = task_thread_info(target)->fpsaved[0];
if (fprs & FPRS_DL)
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
fpregs,
0, 16 * sizeof(u64));
else
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
0,
16 * sizeof(u64));
if (!ret) {
if (fprs & FPRS_DU)
ret = user_regset_copyout(&pos, &count,
&kbuf, &ubuf,
fpregs + 16,
16 * sizeof(u64),
32 * sizeof(u64));
else
ret = user_regset_copyout_zero(&pos, &count,
&kbuf, &ubuf,
16 * sizeof(u64),
32 * sizeof(u64));
}
if (fprs & FPRS_FEF) {
fsr = task_thread_info(target)->xfsr[0];
gsr = task_thread_info(target)->gsr[0];
} else {
fsr = gsr = 0;
}
if (!ret)
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&fsr,
32 * sizeof(u64),
33 * sizeof(u64));
if (!ret)
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&gsr,
33 * sizeof(u64),
34 * sizeof(u64));
if (!ret)
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&fprs,
34 * sizeof(u64),
35 * sizeof(u64));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
35 * sizeof(u64), -1);
return ret;
}
static int fpregs64_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
unsigned long *fpregs = task_thread_info(target)->fpregs;
unsigned long fprs;
int ret;
if (target == current)
save_and_clear_fpu();
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
fpregs,
0, 32 * sizeof(u64));
if (!ret)
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
task_thread_info(target)->xfsr,
32 * sizeof(u64),
33 * sizeof(u64));
if (!ret)
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
task_thread_info(target)->gsr,
33 * sizeof(u64),
34 * sizeof(u64));
fprs = task_thread_info(target)->fpsaved[0];
if (!ret && count > 0) {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fprs,
34 * sizeof(u64),
35 * sizeof(u64));
}
fprs |= (FPRS_FEF | FPRS_DL | FPRS_DU);
task_thread_info(target)->fpsaved[0] = fprs;
if (!ret)
ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
35 * sizeof(u64), -1);
return ret;
}
static const struct user_regset sparc64_regsets[] = {
/* Format is:
* G0 --> G7
* O0 --> O7
* L0 --> L7
* I0 --> I7
* TSTATE, TPC, TNPC, Y
*/
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
.n = 36,
.size = sizeof(u64), .align = sizeof(u64),
.get = genregs64_get, .set = genregs64_set
},
/* Format is:
* F0 --> F63
* FSR
* GSR
* FPRS
*/
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
.n = 35,
.size = sizeof(u64), .align = sizeof(u64),
.get = fpregs64_get, .set = fpregs64_set
},
};
static const struct user_regset_view user_sparc64_view = {
.name = "sparc64", .e_machine = EM_SPARCV9,
.regsets = sparc64_regsets, .n = ARRAY_SIZE(sparc64_regsets)
};
#ifdef CONFIG_COMPAT
static int genregs32_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
const struct pt_regs *regs = task_pt_regs(target);
compat_ulong_t __user *reg_window;
compat_ulong_t *k = kbuf;
compat_ulong_t __user *u = ubuf;
compat_ulong_t reg;
if (target == current)
flushw_user();
pos /= sizeof(reg);
count /= sizeof(reg);
if (kbuf) {
for (; count > 0 && pos < 16; count--)
*k++ = regs->u_regs[pos++];
reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
reg_window -= 16;
if (target == current) {
for (; count > 0 && pos < 32; count--) {
if (get_user(*k++, &reg_window[pos++]))
return -EFAULT;
}
} else {
for (; count > 0 && pos < 32; count--) {
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
k, sizeof(*k),
FOLL_FORCE)
!= sizeof(*k))
return -EFAULT;
k++;
pos++;
}
}
} else {
for (; count > 0 && pos < 16; count--) {
if (put_user((compat_ulong_t) regs->u_regs[pos++], u++))
return -EFAULT;
}
reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
reg_window -= 16;
if (target == current) {
for (; count > 0 && pos < 32; count--) {
if (get_user(reg, &reg_window[pos++]) ||
put_user(reg, u++))
return -EFAULT;
}
} else {
for (; count > 0 && pos < 32; count--) {
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
&reg, sizeof(reg),
FOLL_FORCE)
!= sizeof(reg))
return -EFAULT;
if (access_process_vm(target,
(unsigned long) u,
&reg, sizeof(reg),
FOLL_FORCE | FOLL_WRITE)
!= sizeof(reg))
return -EFAULT;
pos++;
u++;
}
}
}
while (count > 0) {
switch (pos) {
case 32: /* PSR */
reg = tstate_to_psr(regs->tstate);
break;
case 33: /* PC */
reg = regs->tpc;
break;
case 34: /* NPC */
reg = regs->tnpc;
break;
case 35: /* Y */
reg = regs->y;
break;
case 36: /* WIM */
case 37: /* TBR */
reg = 0;
break;
default:
goto finish;
}
if (kbuf)
*k++ = reg;
else if (put_user(reg, u++))
return -EFAULT;
pos++;
count--;
}
finish:
pos *= sizeof(reg);
count *= sizeof(reg);
return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
38 * sizeof(reg), -1);
}
static int genregs32_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct pt_regs *regs = task_pt_regs(target);
compat_ulong_t __user *reg_window;
const compat_ulong_t *k = kbuf;
const compat_ulong_t __user *u = ubuf;
compat_ulong_t reg;
if (target == current)
flushw_user();
pos /= sizeof(reg);
count /= sizeof(reg);
if (kbuf) {
for (; count > 0 && pos < 16; count--)
regs->u_regs[pos++] = *k++;
reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
reg_window -= 16;
if (target == current) {
for (; count > 0 && pos < 32; count--) {
if (put_user(*k++, &reg_window[pos++]))
return -EFAULT;
}
} else {
for (; count > 0 && pos < 32; count--) {
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
(void *) k,
sizeof(*k),
FOLL_FORCE | FOLL_WRITE)
!= sizeof(*k))
return -EFAULT;
k++;
pos++;
}
}
} else {
for (; count > 0 && pos < 16; count--) {
if (get_user(reg, u++))
return -EFAULT;
regs->u_regs[pos++] = reg;
}
reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
reg_window -= 16;
if (target == current) {
for (; count > 0 && pos < 32; count--) {
if (get_user(reg, u++) ||
put_user(reg, &reg_window[pos++]))
return -EFAULT;
}
} else {
for (; count > 0 && pos < 32; count--) {
if (access_process_vm(target,
(unsigned long)
u,
&reg, sizeof(reg),
FOLL_FORCE)
!= sizeof(reg))
return -EFAULT;
if (access_process_vm(target,
(unsigned long)
&reg_window[pos],
&reg, sizeof(reg),
FOLL_FORCE | FOLL_WRITE)
!= sizeof(reg))
return -EFAULT;
pos++;
u++;
}
}
}
while (count > 0) {
unsigned long tstate;
if (kbuf)
reg = *k++;
else if (get_user(reg, u++))
return -EFAULT;
switch (pos) {
case 32: /* PSR */
tstate = regs->tstate;
sparc: Fix debugger syscall restart interactions. So, forever, we've had this ptrace_signal_deliver implementation which tries to handle all of the nasties that can occur when the debugger looks at a process about to take a signal. It's meant to address all of these issues inside of the kernel so that the debugger need not be mindful of such things. Problem is, this doesn't work. The idea was that we should do the syscall restart business first, so that the debugger captures that state. Otherwise, if the debugger for example saves the child's state, makes the child execute something else, then restores the saved state, we won't handle the syscall restart properly because we lose the "we're in a syscall" state. The code here worked for most cases, but if the debugger actually passes the signal through to the child unaltered, it's possible that we would do a syscall restart when we shouldn't have. In particular this breaks the case of debugging a process under a gdb which is being debugged by yet another gdb. gdb uses sigsuspend to wait for SIGCHLD of the inferior, but if gdb itself is being debugged by a top-level gdb we get a ptrace_stop(). The top-level gdb does a PTRACE_CONT with SIGCHLD to let the inferior gdb see the signal. But ptrace_signal_deliver() assumed the debugger would cancel out the signal and therefore did a syscall restart, because the return error was ERESTARTNOHAND. Fix this by simply making ptrace_signal_deliver() a nop, and providing a way for the debugger to control system call restarting properly: 1) Report a "in syscall" software bit in regs->{tstate,psr}. It is set early on in trap entry to a system call and is fully visible to the debugger via ptrace() and regsets. 2) Test this bit right before doing a syscall restart. We have to do a final recheck right after get_signal_to_deliver() in case the debugger cleared the bit during ptrace_stop(). 3) Clear the bit in trap return so we don't accidently try to set that bit in the real register. As a result we also get a ptrace_{is,clear}_syscall() for sparc32 just like sparc64 has. M68K has this same exact bug, and is now the only other user of the ptrace_signal_deliver hook. It needs to be fixed in the same exact way as sparc. Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-11 16:07:19 +07:00
tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL);
tstate |= psr_to_tstate_icc(reg);
sparc: Fix debugger syscall restart interactions. So, forever, we've had this ptrace_signal_deliver implementation which tries to handle all of the nasties that can occur when the debugger looks at a process about to take a signal. It's meant to address all of these issues inside of the kernel so that the debugger need not be mindful of such things. Problem is, this doesn't work. The idea was that we should do the syscall restart business first, so that the debugger captures that state. Otherwise, if the debugger for example saves the child's state, makes the child execute something else, then restores the saved state, we won't handle the syscall restart properly because we lose the "we're in a syscall" state. The code here worked for most cases, but if the debugger actually passes the signal through to the child unaltered, it's possible that we would do a syscall restart when we shouldn't have. In particular this breaks the case of debugging a process under a gdb which is being debugged by yet another gdb. gdb uses sigsuspend to wait for SIGCHLD of the inferior, but if gdb itself is being debugged by a top-level gdb we get a ptrace_stop(). The top-level gdb does a PTRACE_CONT with SIGCHLD to let the inferior gdb see the signal. But ptrace_signal_deliver() assumed the debugger would cancel out the signal and therefore did a syscall restart, because the return error was ERESTARTNOHAND. Fix this by simply making ptrace_signal_deliver() a nop, and providing a way for the debugger to control system call restarting properly: 1) Report a "in syscall" software bit in regs->{tstate,psr}. It is set early on in trap entry to a system call and is fully visible to the debugger via ptrace() and regsets. 2) Test this bit right before doing a syscall restart. We have to do a final recheck right after get_signal_to_deliver() in case the debugger cleared the bit during ptrace_stop(). 3) Clear the bit in trap return so we don't accidently try to set that bit in the real register. As a result we also get a ptrace_{is,clear}_syscall() for sparc32 just like sparc64 has. M68K has this same exact bug, and is now the only other user of the ptrace_signal_deliver hook. It needs to be fixed in the same exact way as sparc. Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-11 16:07:19 +07:00
if (reg & PSR_SYSCALL)
tstate |= TSTATE_SYSCALL;
regs->tstate = tstate;
break;
case 33: /* PC */
regs->tpc = reg;
break;
case 34: /* NPC */
regs->tnpc = reg;
break;
case 35: /* Y */
regs->y = reg;
break;
case 36: /* WIM */
case 37: /* TBR */
break;
default:
goto finish;
}
pos++;
count--;
}
finish:
pos *= sizeof(reg);
count *= sizeof(reg);
return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
38 * sizeof(reg), -1);
}
static int fpregs32_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
const unsigned long *fpregs = task_thread_info(target)->fpregs;
compat_ulong_t enabled;
unsigned long fprs;
compat_ulong_t fsr;
int ret = 0;
if (target == current)
save_and_clear_fpu();
fprs = task_thread_info(target)->fpsaved[0];
if (fprs & FPRS_FEF) {
fsr = task_thread_info(target)->xfsr[0];
enabled = 1;
} else {
fsr = 0;
enabled = 0;
}
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
fpregs,
0, 32 * sizeof(u32));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
32 * sizeof(u32),
33 * sizeof(u32));
if (!ret)
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&fsr,
33 * sizeof(u32),
34 * sizeof(u32));
if (!ret) {
compat_ulong_t val;
val = (enabled << 8) | (8 << 16);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&val,
34 * sizeof(u32),
35 * sizeof(u32));
}
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
35 * sizeof(u32), -1);
return ret;
}
static int fpregs32_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
unsigned long *fpregs = task_thread_info(target)->fpregs;
unsigned long fprs;
int ret;
if (target == current)
save_and_clear_fpu();
fprs = task_thread_info(target)->fpsaved[0];
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
fpregs,
0, 32 * sizeof(u32));
if (!ret)
user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
32 * sizeof(u32),
33 * sizeof(u32));
if (!ret && count > 0) {
compat_ulong_t fsr;
unsigned long val;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fsr,
33 * sizeof(u32),
34 * sizeof(u32));
if (!ret) {
val = task_thread_info(target)->xfsr[0];
val &= 0xffffffff00000000UL;
val |= fsr;
task_thread_info(target)->xfsr[0] = val;
}
}
fprs |= (FPRS_FEF | FPRS_DL);
task_thread_info(target)->fpsaved[0] = fprs;
if (!ret)
ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
34 * sizeof(u32), -1);
return ret;
}
static const struct user_regset sparc32_regsets[] = {
/* Format is:
* G0 --> G7
* O0 --> O7
* L0 --> L7
* I0 --> I7
* PSR, PC, nPC, Y, WIM, TBR
*/
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
.n = 38,
.size = sizeof(u32), .align = sizeof(u32),
.get = genregs32_get, .set = genregs32_set
},
/* Format is:
* F0 --> F31
* empty 32-bit word
* FSR (32--bit word)
* FPU QUEUE COUNT (8-bit char)
* FPU QUEUE ENTRYSIZE (8-bit char)
* FPU ENABLED (8-bit char)
* empty 8-bit char
* FPU QUEUE (64 32-bit ints)
*/
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
.n = 99,
.size = sizeof(u32), .align = sizeof(u32),
.get = fpregs32_get, .set = fpregs32_set
},
};
static const struct user_regset_view user_sparc32_view = {
.name = "sparc", .e_machine = EM_SPARC,
.regsets = sparc32_regsets, .n = ARRAY_SIZE(sparc32_regsets)
};
#endif /* CONFIG_COMPAT */
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_32BIT))
return &user_sparc32_view;
#endif
return &user_sparc64_view;
}
#ifdef CONFIG_COMPAT
struct compat_fps {
unsigned int regs[32];
unsigned int fsr;
unsigned int flags;
unsigned int extra;
unsigned int fpqd;
struct compat_fq {
unsigned int insnaddr;
unsigned int insn;
} fpq[16];
};
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
{
const struct user_regset_view *view = task_user_regset_view(current);
compat_ulong_t caddr2 = task_pt_regs(current)->u_regs[UREG_I4];
struct pt_regs32 __user *pregs;
struct compat_fps __user *fps;
unsigned long addr2 = caddr2;
unsigned long addr = caddr;
unsigned long data = cdata;
int ret;
pregs = (struct pt_regs32 __user *) addr;
fps = (struct compat_fps __user *) addr;
switch (request) {
case PTRACE_PEEKUSR:
ret = (addr != 0) ? -EIO : 0;
break;
case PTRACE_GETREGS:
ret = copy_regset_to_user(child, view, REGSET_GENERAL,
32 * sizeof(u32),
4 * sizeof(u32),
&pregs->psr);
if (!ret)
ret = copy_regset_to_user(child, view, REGSET_GENERAL,
1 * sizeof(u32),
15 * sizeof(u32),
&pregs->u_regs[0]);
break;
case PTRACE_SETREGS:
ret = copy_regset_from_user(child, view, REGSET_GENERAL,
32 * sizeof(u32),
4 * sizeof(u32),
&pregs->psr);
if (!ret)
ret = copy_regset_from_user(child, view, REGSET_GENERAL,
1 * sizeof(u32),
15 * sizeof(u32),
&pregs->u_regs[0]);
break;
case PTRACE_GETFPREGS:
ret = copy_regset_to_user(child, view, REGSET_FP,
0 * sizeof(u32),
32 * sizeof(u32),
&fps->regs[0]);
if (!ret)
ret = copy_regset_to_user(child, view, REGSET_FP,
33 * sizeof(u32),
1 * sizeof(u32),
&fps->fsr);
if (!ret) {
if (__put_user(0, &fps->flags) ||
__put_user(0, &fps->extra) ||
__put_user(0, &fps->fpqd) ||
clear_user(&fps->fpq[0], 32 * sizeof(unsigned int)))
ret = -EFAULT;
}
break;
case PTRACE_SETFPREGS:
ret = copy_regset_from_user(child, view, REGSET_FP,
0 * sizeof(u32),
32 * sizeof(u32),
&fps->regs[0]);
if (!ret)
ret = copy_regset_from_user(child, view, REGSET_FP,
33 * sizeof(u32),
1 * sizeof(u32),
&fps->fsr);
break;
case PTRACE_READTEXT:
case PTRACE_READDATA:
ret = ptrace_readdata(child, addr,
(char __user *)addr2, data);
if (ret == data)
ret = 0;
else if (ret >= 0)
ret = -EIO;
break;
case PTRACE_WRITETEXT:
case PTRACE_WRITEDATA:
ret = ptrace_writedata(child, (char __user *) addr2,
addr, data);
if (ret == data)
ret = 0;
else if (ret >= 0)
ret = -EIO;
break;
default:
if (request == PTRACE_SPARC_DETACH)
request = PTRACE_DETACH;
ret = compat_ptrace_request(child, request, addr, data);
break;
}
return ret;
}
#endif /* CONFIG_COMPAT */
struct fps {
unsigned int regs[64];
unsigned long fsr;
};
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
const struct user_regset_view *view = task_user_regset_view(current);
unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4];
struct pt_regs __user *pregs;
struct fps __user *fps;
void __user *addr2p;
int ret;
pregs = (struct pt_regs __user *) addr;
fps = (struct fps __user *) addr;
addr2p = (void __user *) addr2;
switch (request) {
case PTRACE_PEEKUSR:
ret = (addr != 0) ? -EIO : 0;
break;
case PTRACE_GETREGS64:
ret = copy_regset_to_user(child, view, REGSET_GENERAL,
1 * sizeof(u64),
15 * sizeof(u64),
&pregs->u_regs[0]);
if (!ret) {
/* XXX doesn't handle 'y' register correctly XXX */
ret = copy_regset_to_user(child, view, REGSET_GENERAL,
32 * sizeof(u64),
4 * sizeof(u64),
&pregs->tstate);
}
break;
case PTRACE_SETREGS64:
ret = copy_regset_from_user(child, view, REGSET_GENERAL,
1 * sizeof(u64),
15 * sizeof(u64),
&pregs->u_regs[0]);
if (!ret) {
/* XXX doesn't handle 'y' register correctly XXX */
ret = copy_regset_from_user(child, view, REGSET_GENERAL,
32 * sizeof(u64),
4 * sizeof(u64),
&pregs->tstate);
}
break;
case PTRACE_GETFPREGS64:
ret = copy_regset_to_user(child, view, REGSET_FP,
0 * sizeof(u64),
33 * sizeof(u64),
fps);
break;
case PTRACE_SETFPREGS64:
ret = copy_regset_from_user(child, view, REGSET_FP,
0 * sizeof(u64),
33 * sizeof(u64),
fps);
break;
case PTRACE_READTEXT:
case PTRACE_READDATA:
ret = ptrace_readdata(child, addr, addr2p, data);
if (ret == data)
ret = 0;
else if (ret >= 0)
ret = -EIO;
break;
case PTRACE_WRITETEXT:
case PTRACE_WRITEDATA:
ret = ptrace_writedata(child, addr2p, addr, data);
if (ret == data)
ret = 0;
else if (ret >= 0)
ret = -EIO;
break;
default:
if (request == PTRACE_SPARC_DETACH)
request = PTRACE_DETACH;
ret = ptrace_request(child, request, addr, data);
break;
}
return ret;
}
asmlinkage int syscall_trace_enter(struct pt_regs *regs)
{
int ret = 0;
/* do the secure computing check first */
secure_computing_strict(regs->u_regs[UREG_G1]);
if (test_thread_flag(TIF_NOHZ))
user_exit();
if (test_thread_flag(TIF_SYSCALL_TRACE))
ret = tracehook_report_syscall_entry(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->u_regs[UREG_G1]);
audit_syscall_entry(regs->u_regs[UREG_G1], regs->u_regs[UREG_I0],
regs->u_regs[UREG_I1], regs->u_regs[UREG_I2],
regs->u_regs[UREG_I3]);
return ret;
}
asmlinkage void syscall_trace_leave(struct pt_regs *regs)
{
if (test_thread_flag(TIF_NOHZ))
user_exit();
Audit: push audit success and retcode into arch ptrace.h The audit system previously expected arches calling to audit_syscall_exit to supply as arguments if the syscall was a success and what the return code was. Audit also provides a helper AUDITSC_RESULT which was supposed to simplify things by converting from negative retcodes to an audit internal magic value stating success or failure. This helper was wrong and could indicate that a valid pointer returned to userspace was a failed syscall. The fix is to fix the layering foolishness. We now pass audit_syscall_exit a struct pt_reg and it in turns calls back into arch code to collect the return value and to determine if the syscall was a success or failure. We also define a generic is_syscall_success() macro which determines success/failure based on if the value is < -MAX_ERRNO. This works for arches like x86 which do not use a separate mechanism to indicate syscall failure. We make both the is_syscall_success() and regs_return_value() static inlines instead of macros. The reason is because the audit function must take a void* for the regs. (uml calls theirs struct uml_pt_regs instead of just struct pt_regs so audit_syscall_exit can't take a struct pt_regs). Since the audit function takes a void* we need to use static inlines to cast it back to the arch correct structure to dereference it. The other major change is that on some arches, like ia64, MIPS and ppc, we change regs_return_value() to give us the negative value on syscall failure. THE only other user of this macro, kretprobe_example.c, won't notice and it makes the value signed consistently for the audit functions across all archs. In arch/sh/kernel/ptrace_64.c I see that we were using regs[9] in the old audit code as the return value. But the ptrace_64.h code defined the macro regs_return_value() as regs[3]. I have no idea which one is correct, but this patch now uses the regs_return_value() function, so it now uses regs[3]. For powerpc we previously used regs->result but now use the regs_return_value() function which uses regs->gprs[3]. regs->gprs[3] is always positive so the regs_return_value(), much like ia64 makes it negative before calling the audit code when appropriate. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: H. Peter Anvin <hpa@zytor.com> [for x86 portion] Acked-by: Tony Luck <tony.luck@intel.com> [for ia64] Acked-by: Richard Weinberger <richard@nod.at> [for uml] Acked-by: David S. Miller <davem@davemloft.net> [for sparc] Acked-by: Ralf Baechle <ralf@linux-mips.org> [for mips] Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [for ppc]
2012-01-04 02:23:06 +07:00
audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->u_regs[UREG_I0]);
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, 0);
if (test_thread_flag(TIF_NOHZ))
user_enter();
}
/**
* regs_query_register_offset() - query register offset from its name
* @name: the name of a register
*
* regs_query_register_offset() returns the offset of a register in struct
* pt_regs from its name. If the name is invalid, this returns -EINVAL;
*/
int regs_query_register_offset(const char *name)
{
const struct pt_regs_offset *roff;
for (roff = regoffset_table; roff->name != NULL; roff++)
if (!strcmp(roff->name, name))
return roff->offset;
return -EINVAL;
}
/**
* regs_within_kernel_stack() - check the address in the stack
* @regs: pt_regs which contains kernel stack pointer.
* @addr: address which is checked.
*
* regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
* If @addr is within the kernel stack, it returns true. If not, returns false.
*/
static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr)
{
unsigned long ksp = kernel_stack_pointer(regs) + STACK_BIAS;
return ((addr & ~(THREAD_SIZE - 1)) ==
(ksp & ~(THREAD_SIZE - 1)));
}
/**
* regs_get_kernel_stack_nth() - get Nth entry of the stack
* @regs: pt_regs which contains kernel stack pointer.
* @n: stack entry number.
*
* regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
* is specified by @regs. If the @n th entry is NOT in the kernel stack,
* this returns 0.
*/
unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
{
unsigned long ksp = kernel_stack_pointer(regs) + STACK_BIAS;
unsigned long *addr = (unsigned long *)ksp;
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
return *addr;
else
return 0;
}