linux_dsm_epyc7002/arch/tile/kernel/traps.c
Eric W. Biederman cc731525f2 signal: Remove kernel interal si_code magic
struct siginfo is a union and the kernel since 2.4 has been hiding a union
tag in the high 16bits of si_code using the values:
__SI_KILL
__SI_TIMER
__SI_POLL
__SI_FAULT
__SI_CHLD
__SI_RT
__SI_MESGQ
__SI_SYS

While this looks plausible on the surface, in practice this situation has
not worked well.

- Injected positive signals are not copied to user space properly
  unless they have these magic high bits set.

- Injected positive signals are not reported properly by signalfd
  unless they have these magic high bits set.

- These kernel internal values leaked to userspace via ptrace_peek_siginfo

- It was possible to inject these kernel internal values and cause the
  the kernel to misbehave.

- Kernel developers got confused and expected these kernel internal values
  in userspace in kernel self tests.

- Kernel developers got confused and set si_code to __SI_FAULT which
  is SI_USER in userspace which causes userspace to think an ordinary user
  sent the signal and that it was not kernel generated.

- The values make it impossible to reorganize the code to transform
  siginfo_copy_to_user into a plain copy_to_user.  As si_code must
  be massaged before being passed to userspace.

So remove these kernel internal si codes and make the kernel code simpler
and more maintainable.

To replace these kernel internal magic si_codes introduce the helper
function siginfo_layout, that takes a signal number and an si_code and
computes which union member of siginfo is being used.  Have
siginfo_layout return an enumeration so that gcc will have enough
information to warn if a switch statement does not handle all of union
members.

A couple of architectures have a messed up ABI that defines signal
specific duplications of SI_USER which causes more special cases in
siginfo_layout than I would like.  The good news is only problem
architectures pay the cost.

Update all of the code that used the previous magic __SI_ values to
use the new SIL_ values and to call siginfo_layout to get those
values.  Escept where not all of the cases are handled remove the
defaults in the switch statements so that if a new case is missed in
the future the lack will show up at compile time.

Modify the code that copies siginfo si_code to userspace to just copy
the value and not cast si_code to a short first.  The high bits are no
longer used to hold a magic union member.

Fixup the siginfo header files to stop including the __SI_ values in
their constants and for the headers that were missing it to properly
update the number of si_codes for each signal type.

The fixes to copy_siginfo_from_user32 implementations has the
interesting property that several of them perviously should never have
worked as the __SI_ values they depended up where kernel internal.
With that dependency gone those implementations should work much
better.

The idea of not passing the __SI_ values out to userspace and then
not reinserting them has been tested with criu and criu worked without
changes.

Ref: 2.4.0-test1
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2017-07-24 14:30:28 -05:00

420 lines
10 KiB
C

/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
#include <linux/nmi.h>
#include <asm/stack.h>
#include <asm/traps.h>
#include <asm/setup.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
#include <arch/opcode.h>
void __init trap_init(void)
{
/* Nothing needed here since we link code at .intrpt */
}
int unaligned_fixup = 1;
static int __init setup_unaligned_fixup(char *str)
{
/*
* Say "=-1" to completely disable it. If you just do "=0", we
* will still parse the instruction, then fire a SIGBUS with
* the correct address from inside the single_step code.
*/
if (kstrtoint(str, 0, &unaligned_fixup) != 0)
return 0;
pr_info("Fixups for unaligned data accesses are %s\n",
unaligned_fixup >= 0 ?
(unaligned_fixup ? "enabled" : "disabled") :
"completely disabled");
return 1;
}
__setup("unaligned_fixup=", setup_unaligned_fixup);
#if CHIP_HAS_TILE_DMA()
static int dma_disabled;
static int __init nodma(char *str)
{
pr_info("User-space DMA is disabled\n");
dma_disabled = 1;
return 1;
}
__setup("nodma", nodma);
/* How to decode SPR_GPV_REASON */
#define IRET_ERROR (1U << 31)
#define MT_ERROR (1U << 30)
#define MF_ERROR (1U << 29)
#define SPR_INDEX ((1U << 15) - 1)
#define SPR_MPL_SHIFT 9 /* starting bit position for MPL encoded in SPR */
/*
* See if this GPV is just to notify the kernel of SPR use and we can
* retry the user instruction after adjusting some MPLs suitably.
*/
static int retry_gpv(unsigned int gpv_reason)
{
int mpl;
if (gpv_reason & IRET_ERROR)
return 0;
BUG_ON((gpv_reason & (MT_ERROR|MF_ERROR)) == 0);
mpl = (gpv_reason & SPR_INDEX) >> SPR_MPL_SHIFT;
if (mpl == INT_DMA_NOTIFY && !dma_disabled) {
/* User is turning on DMA. Allow it and retry. */
printk(KERN_DEBUG "Process %d/%s is now enabled for DMA\n",
current->pid, current->comm);
BUG_ON(current->thread.tile_dma_state.enabled);
current->thread.tile_dma_state.enabled = 1;
grant_dma_mpls();
return 1;
}
return 0;
}
#endif /* CHIP_HAS_TILE_DMA() */
extern tile_bundle_bits bpt_code;
asm(".pushsection .rodata.bpt_code,\"a\";"
".align 8;"
"bpt_code: bpt;"
".size bpt_code,.-bpt_code;"
".popsection");
static int special_ill(tile_bundle_bits bundle, int *sigp, int *codep)
{
int sig, code, maxcode;
if (bundle == bpt_code) {
*sigp = SIGTRAP;
*codep = TRAP_BRKPT;
return 1;
}
/* If it's a "raise" bundle, then "ill" must be in pipe X1. */
#ifdef __tilegx__
if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0)
return 0;
if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1)
return 0;
if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1)
return 0;
if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1)
return 0;
#else
if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK)
return 0;
if (get_Opcode_X1(bundle) != SHUN_0_OPCODE_X1)
return 0;
if (get_UnShOpcodeExtension_X1(bundle) != UN_0_SHUN_0_OPCODE_X1)
return 0;
if (get_UnOpcodeExtension_X1(bundle) != ILL_UN_0_SHUN_0_OPCODE_X1)
return 0;
#endif
/* Check that the magic distinguishers are set to mean "raise". */
if (get_Dest_X1(bundle) != 29 || get_SrcA_X1(bundle) != 37)
return 0;
/* There must be an "addli zero, zero, VAL" in X0. */
if (get_Opcode_X0(bundle) != ADDLI_OPCODE_X0)
return 0;
if (get_Dest_X0(bundle) != TREG_ZERO)
return 0;
if (get_SrcA_X0(bundle) != TREG_ZERO)
return 0;
/*
* Validate the proposed signal number and si_code value.
* Note that we embed these in the static instruction itself
* so that we perturb the register state as little as possible
* at the time of the actual fault; it's unlikely you'd ever
* need to dynamically choose which kind of fault to raise
* from user space.
*/
sig = get_Imm16_X0(bundle) & 0x3f;
switch (sig) {
case SIGILL:
maxcode = NSIGILL;
break;
case SIGFPE:
maxcode = NSIGFPE;
break;
case SIGSEGV:
maxcode = NSIGSEGV;
break;
case SIGBUS:
maxcode = NSIGBUS;
break;
case SIGTRAP:
maxcode = NSIGTRAP;
break;
default:
return 0;
}
code = (get_Imm16_X0(bundle) >> 6) & 0xf;
if (code <= 0 || code > maxcode)
return 0;
/* Make it the requested signal. */
*sigp = sig;
*codep = code;
return 1;
}
static const char *const int_name[] = {
[INT_MEM_ERROR] = "Memory error",
[INT_ILL] = "Illegal instruction",
[INT_GPV] = "General protection violation",
[INT_UDN_ACCESS] = "UDN access",
[INT_IDN_ACCESS] = "IDN access",
#if CHIP_HAS_SN()
[INT_SN_ACCESS] = "SN access",
#endif
[INT_SWINT_3] = "Software interrupt 3",
[INT_SWINT_2] = "Software interrupt 2",
[INT_SWINT_0] = "Software interrupt 0",
[INT_UNALIGN_DATA] = "Unaligned data",
[INT_DOUBLE_FAULT] = "Double fault",
#ifdef __tilegx__
[INT_ILL_TRANS] = "Illegal virtual address",
#endif
};
static int do_bpt(struct pt_regs *regs)
{
unsigned long bundle, bcode, bpt;
bundle = *(unsigned long *)instruction_pointer(regs);
/*
* bpt shoule be { bpt; nop }, which is 0x286a44ae51485000ULL.
* we encode the unused least significant bits for other purpose.
*/
bpt = bundle & ~((1ULL << 12) - 1);
if (bpt != TILE_BPT_BUNDLE)
return 0;
bcode = bundle & ((1ULL << 12) - 1);
/*
* notify the kprobe handlers, if instruction is likely to
* pertain to them.
*/
switch (bcode) {
/* breakpoint_insn */
case 0:
notify_die(DIE_BREAK, "debug", regs, bundle,
INT_ILL, SIGTRAP);
break;
/* compiled_bpt */
case DIE_COMPILED_BPT:
notify_die(DIE_COMPILED_BPT, "debug", regs, bundle,
INT_ILL, SIGTRAP);
break;
/* breakpoint2_insn */
case DIE_SSTEPBP:
notify_die(DIE_SSTEPBP, "single_step", regs, bundle,
INT_ILL, SIGTRAP);
break;
default:
return 0;
}
return 1;
}
void __kprobes do_trap(struct pt_regs *regs, int fault_num,
unsigned long reason)
{
siginfo_t info = { 0 };
int signo, code;
unsigned long address = 0;
tile_bundle_bits instr;
int is_kernel = !user_mode(regs);
/* Handle breakpoints, etc. */
if (is_kernel && fault_num == INT_ILL && do_bpt(regs))
return;
/* Re-enable interrupts, if they were previously enabled. */
if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
local_irq_enable();
/*
* If it hits in kernel mode and we can't fix it up, just exit the
* current process and hope for the best.
*/
if (is_kernel) {
const char *name;
char buf[100];
if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */
return;
if (fault_num >= 0 &&
fault_num < ARRAY_SIZE(int_name) &&
int_name[fault_num] != NULL)
name = int_name[fault_num];
else
name = "Unknown interrupt";
if (fault_num == INT_GPV)
snprintf(buf, sizeof(buf), "; GPV_REASON %#lx", reason);
#ifdef __tilegx__
else if (fault_num == INT_ILL_TRANS)
snprintf(buf, sizeof(buf), "; address %#lx", reason);
#endif
else
buf[0] = '\0';
pr_alert("Kernel took bad trap %d (%s) at PC %#lx%s\n",
fault_num, name, regs->pc, buf);
show_regs(regs);
do_exit(SIGKILL); /* FIXME: implement i386 die() */
}
switch (fault_num) {
case INT_MEM_ERROR:
signo = SIGBUS;
code = BUS_OBJERR;
break;
case INT_ILL:
if (copy_from_user(&instr, (void __user *)regs->pc,
sizeof(instr))) {
pr_err("Unreadable instruction for INT_ILL: %#lx\n",
regs->pc);
do_exit(SIGKILL);
}
if (!special_ill(instr, &signo, &code)) {
signo = SIGILL;
code = ILL_ILLOPC;
}
address = regs->pc;
break;
case INT_GPV:
#if CHIP_HAS_TILE_DMA()
if (retry_gpv(reason))
return;
#endif
/*FALLTHROUGH*/
case INT_UDN_ACCESS:
case INT_IDN_ACCESS:
#if CHIP_HAS_SN()
case INT_SN_ACCESS:
#endif
signo = SIGILL;
code = ILL_PRVREG;
address = regs->pc;
break;
case INT_SWINT_3:
case INT_SWINT_2:
case INT_SWINT_0:
signo = SIGILL;
code = ILL_ILLTRP;
address = regs->pc;
break;
case INT_UNALIGN_DATA:
#ifndef __tilegx__ /* Emulated support for single step debugging */
if (unaligned_fixup >= 0) {
struct single_step_state *state =
current_thread_info()->step_state;
if (!state ||
(void __user *)(regs->pc) != state->buffer) {
single_step_once(regs);
return;
}
}
#endif
signo = SIGBUS;
code = BUS_ADRALN;
address = 0;
break;
case INT_DOUBLE_FAULT:
/*
* For double fault, "reason" is actually passed as
* SYSTEM_SAVE_K_2, the hypervisor's double-fault info, so
* we can provide the original fault number rather than
* the uninteresting "INT_DOUBLE_FAULT" so the user can
* learn what actually struck while PL0 ICS was set.
*/
fault_num = reason;
signo = SIGILL;
code = ILL_DBLFLT;
address = regs->pc;
break;
#ifdef __tilegx__
case INT_ILL_TRANS: {
/* Avoid a hardware erratum with the return address stack. */
fill_ra_stack();
signo = SIGSEGV;
address = reason;
code = SEGV_MAPERR;
break;
}
#endif
default:
panic("Unexpected do_trap interrupt number %d", fault_num);
}
info.si_signo = signo;
info.si_code = code;
info.si_addr = (void __user *)address;
if (signo == SIGILL)
info.si_trapno = fault_num;
if (signo != SIGTRAP)
trace_unhandled_signal("trap", regs, address, signo);
force_sig_info(signo, &info, current);
}
void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
{
nmi_enter();
switch (reason) {
#ifdef arch_trigger_cpumask_backtrace
case TILE_NMI_DUMP_STACK:
nmi_cpu_backtrace(regs);
break;
#endif
default:
panic("Unexpected do_nmi type %ld", reason);
}
nmi_exit();
}
/* Deprecated function currently only used here. */
extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
{
_dump_stack(dummy, pc, lr, sp, r52);
pr_emerg("Double fault: exiting\n");
machine_halt();
}