2006-01-08 16:01:31 +07:00
|
|
|
/*
|
2007-10-16 15:26:54 +07:00
|
|
|
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
2005-04-17 05:20:36 +07:00
|
|
|
* Licensed under the GPL
|
|
|
|
*/
|
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <asm/current.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/tlbflush.h>
|
2007-05-07 04:51:07 +07:00
|
|
|
#include "arch.h"
|
2007-10-16 15:26:54 +07:00
|
|
|
#include "as-layout.h"
|
|
|
|
#include "kern_util.h"
|
2006-01-08 16:01:31 +07:00
|
|
|
#include "os.h"
|
2007-10-16 15:26:54 +07:00
|
|
|
#include "sysdep/sigcontext.h"
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
/*
|
|
|
|
* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
|
|
|
|
* segv().
|
|
|
|
*/
|
2006-07-10 18:45:13 +07:00
|
|
|
int handle_page_fault(unsigned long address, unsigned long ip,
|
2005-04-17 05:20:36 +07:00
|
|
|
int is_write, int is_user, int *code_out)
|
|
|
|
{
|
|
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
struct vm_area_struct *vma;
|
|
|
|
pgd_t *pgd;
|
|
|
|
pud_t *pud;
|
|
|
|
pmd_t *pmd;
|
|
|
|
pte_t *pte;
|
|
|
|
int err = -EFAULT;
|
|
|
|
|
|
|
|
*code_out = SEGV_MAPERR;
|
2005-09-23 11:44:20 +07:00
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
/*
|
|
|
|
* If the fault was during atomic operation, don't take the fault, just
|
|
|
|
* fail.
|
|
|
|
*/
|
2005-09-23 11:44:20 +07:00
|
|
|
if (in_atomic())
|
|
|
|
goto out_nosemaphore;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
down_read(&mm->mmap_sem);
|
|
|
|
vma = find_vma(mm, address);
|
2007-10-16 15:26:54 +07:00
|
|
|
if (!vma)
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (vma->vm_start <= address)
|
2005-04-17 05:20:36 +07:00
|
|
|
goto good_area;
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (!(vma->vm_flags & VM_GROWSDOWN))
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (is_user && !ARCH_IS_STACKGROW(address))
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (expand_stack(vma, address))
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
|
|
|
|
2005-09-04 05:57:26 +07:00
|
|
|
good_area:
|
2005-04-17 05:20:36 +07:00
|
|
|
*code_out = SEGV_ACCERR;
|
2007-10-16 15:26:54 +07:00
|
|
|
if (is_write && !(vma->vm_flags & VM_WRITE))
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
2005-05-21 03:59:08 +07:00
|
|
|
|
2005-09-11 00:44:57 +07:00
|
|
|
/* Don't require VM_READ|VM_EXEC for write faults! */
|
2007-10-16 15:26:54 +07:00
|
|
|
if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
|
2007-05-07 04:51:24 +07:00
|
|
|
goto out;
|
2005-05-21 03:59:08 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
do {
|
2007-07-19 15:47:05 +07:00
|
|
|
int fault;
|
2005-09-04 05:57:26 +07:00
|
|
|
survive:
|
2007-07-19 15:47:05 +07:00
|
|
|
fault = handle_mm_fault(mm, vma, address, is_write);
|
|
|
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
|
|
|
if (fault & VM_FAULT_OOM) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out_of_memory;
|
|
|
|
} else if (fault & VM_FAULT_SIGBUS) {
|
|
|
|
err = -EACCES;
|
|
|
|
goto out;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
BUG();
|
|
|
|
}
|
2007-07-19 15:47:05 +07:00
|
|
|
if (fault & VM_FAULT_MAJOR)
|
|
|
|
current->maj_flt++;
|
|
|
|
else
|
|
|
|
current->min_flt++;
|
|
|
|
|
2005-09-04 05:57:26 +07:00
|
|
|
pgd = pgd_offset(mm, address);
|
|
|
|
pud = pud_offset(pgd, address);
|
|
|
|
pmd = pmd_offset(pud, address);
|
|
|
|
pte = pte_offset_kernel(pmd, address);
|
2007-10-16 15:26:54 +07:00
|
|
|
} while (!pte_present(*pte));
|
2005-04-17 05:20:36 +07:00
|
|
|
err = 0;
|
2007-10-16 15:26:54 +07:00
|
|
|
/*
|
|
|
|
* The below warning was added in place of
|
[PATCH] uml: remove bogus WARN_ON, triggerable harmlessly on a page fault race
The below warning was added in place of pte_mkyoung(); if (is_write)
pte_mkdirty();
In fact, if the PTE is not marked young/dirty, our dirty/accessed bit
emulation would cause the TLB permission not to be changed, and so we'd loop,
and given we don't support preemption yet, we'd busy-hang here.
However, I've seen this warning trigger without crashes during a loop of
concurrent kernel builds, at random times (i.e. like a race condition), and I
realized that two concurrent faults on the same page, one on read and one on
write, can trigger it. The read fault gets serviced and the PTE gets marked
writable but clean (it's possible on a shared-writable mapping), while the
generic code sees the PTE was already installed and returns without action. In
this case, we'll see another fault and service it normally.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-14 07:07:04 +07:00
|
|
|
* pte_mkyoung(); if (is_write) pte_mkdirty();
|
|
|
|
* If it's triggered, we'd see normally a hang here (a clean pte is
|
|
|
|
* marked read-only to emulate the dirty bit).
|
|
|
|
* However, the generic code can mark a PTE writable but clean on a
|
|
|
|
* concurrent read fault, triggering this harmlessly. So comment it out.
|
|
|
|
*/
|
|
|
|
#if 0
|
2005-09-11 00:44:58 +07:00
|
|
|
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
|
[PATCH] uml: remove bogus WARN_ON, triggerable harmlessly on a page fault race
The below warning was added in place of pte_mkyoung(); if (is_write)
pte_mkdirty();
In fact, if the PTE is not marked young/dirty, our dirty/accessed bit
emulation would cause the TLB permission not to be changed, and so we'd loop,
and given we don't support preemption yet, we'd busy-hang here.
However, I've seen this warning trigger without crashes during a loop of
concurrent kernel builds, at random times (i.e. like a race condition), and I
realized that two concurrent faults on the same page, one on read and one on
write, can trigger it. The read fault gets serviced and the PTE gets marked
writable but clean (it's possible on a shared-writable mapping), while the
generic code sees the PTE was already installed and returns without action. In
this case, we'll see another fault and service it normally.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-14 07:07:04 +07:00
|
|
|
#endif
|
2005-09-04 05:57:26 +07:00
|
|
|
flush_tlb_page(vma, address);
|
|
|
|
out:
|
2005-04-17 05:20:36 +07:00
|
|
|
up_read(&mm->mmap_sem);
|
2005-09-23 11:44:20 +07:00
|
|
|
out_nosemaphore:
|
2007-10-16 15:26:54 +07:00
|
|
|
return err;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We ran out of memory, or some other thing happened to us that made
|
|
|
|
* us unable to handle the page fault gracefully.
|
|
|
|
*/
|
|
|
|
out_of_memory:
|
2006-09-29 16:00:07 +07:00
|
|
|
if (is_init(current)) {
|
2005-04-17 05:20:36 +07:00
|
|
|
up_read(&mm->mmap_sem);
|
|
|
|
yield();
|
|
|
|
down_read(&mm->mmap_sem);
|
|
|
|
goto survive;
|
|
|
|
}
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2007-02-10 16:44:14 +07:00
|
|
|
static void bad_segv(struct faultinfo fi, unsigned long ip)
|
|
|
|
{
|
|
|
|
struct siginfo si;
|
|
|
|
|
|
|
|
si.si_signo = SIGSEGV;
|
|
|
|
si.si_code = SEGV_ACCERR;
|
|
|
|
si.si_addr = (void __user *) FAULT_ADDRESS(fi);
|
|
|
|
current->thread.arch.faultinfo = fi;
|
|
|
|
force_sig_info(SIGSEGV, &si, current);
|
|
|
|
}
|
|
|
|
|
2007-10-16 15:26:58 +07:00
|
|
|
static void segv_handler(int sig, struct uml_pt_regs *regs)
|
2006-01-08 16:01:32 +07:00
|
|
|
{
|
|
|
|
struct faultinfo * fi = UPT_FAULTINFO(regs);
|
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
|
2006-01-08 16:01:32 +07:00
|
|
|
bad_segv(*fi, UPT_IP(regs));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
|
|
|
|
}
|
|
|
|
|
[PATCH] uml: S390 preparation, abstract host page fault data
This patch removes the arch-specific fault/trap-infos from thread and
skas-regs.
It adds a new struct faultinfo, that is arch-specific defined in
sysdep/faultinfo.h.
The structure is inserted in thread.arch and thread.regs.skas and
thread.regs.tt
Now, segv and other trap-handlers can copy the contents from regs.X.faultinfo
to thread.arch.faultinfo with one simple assignment.
Also, the number of macros necessary is reduced to
FAULT_ADDRESS(struct faultinfo)
extracts the faulting address from faultinfo
FAULT_WRITE(struct faultinfo)
extracts the "is_write" flag
SEGV_IS_FIXABLE(struct faultinfo)
is true for the fixable segvs, i.e. (TRAP == 14)
on i386
UPT_FAULTINFO(regs)
result is (struct faultinfo *) to the faultinfo
in regs->skas.faultinfo
GET_FAULTINFO_FROM_SC(struct faultinfo, struct sigcontext *)
copies the relevant parts of the sigcontext to
struct faultinfo.
On SIGSEGV, call user_signal() instead of handle_segv(), if the architecture
provides the information needed in PTRACE_FAULTINFO, or if PTRACE_FAULTINFO is
missing, because segv-stub will provide the info.
The benefit of the change is, that in case of a non-fixable SIGSEGV, we can
give user processes a SIGSEGV, instead of possibly looping on pagefault
handling.
Since handle_segv() sikked arch_fixup() implicitly by passing ip==0 to segv(),
I changed segv() to call arch_fixup() only, if !is_user.
Signed-off-by: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-06 06:15:31 +07:00
|
|
|
/*
|
|
|
|
* We give a *copy* of the faultinfo in the regs to segv.
|
|
|
|
* This must be done, since nesting SEGVs could overwrite
|
|
|
|
* the info in the regs. A pointer to the info then would
|
|
|
|
* give us bad data!
|
|
|
|
*/
|
2007-05-07 04:51:24 +07:00
|
|
|
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
|
2007-10-16 15:26:58 +07:00
|
|
|
struct uml_pt_regs *regs)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
struct siginfo si;
|
|
|
|
void *catcher;
|
|
|
|
int err;
|
2007-05-07 04:51:24 +07:00
|
|
|
int is_write = FAULT_WRITE(fi);
|
|
|
|
unsigned long address = FAULT_ADDRESS(fi);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
if (!is_user && (address >= start_vm) && (address < end_vm)) {
|
2007-05-07 04:51:24 +07:00
|
|
|
flush_tlb_kernel_vm();
|
|
|
|
return 0;
|
|
|
|
}
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (current->mm == NULL) {
|
2007-05-07 04:51:25 +07:00
|
|
|
show_regs(container_of(regs, struct pt_regs, regs));
|
2007-10-16 15:26:54 +07:00
|
|
|
panic("Segfault with no mm");
|
2007-05-07 04:51:25 +07:00
|
|
|
}
|
2005-09-23 11:44:16 +07:00
|
|
|
|
2005-10-01 01:58:59 +07:00
|
|
|
if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
|
2007-10-16 15:26:54 +07:00
|
|
|
err = handle_page_fault(address, ip, is_write, is_user,
|
|
|
|
&si.si_code);
|
2005-09-23 11:44:16 +07:00
|
|
|
else {
|
|
|
|
err = -EFAULT;
|
2007-10-16 15:26:54 +07:00
|
|
|
/*
|
|
|
|
* A thread accessed NULL, we get a fault, but CR2 is invalid.
|
|
|
|
* This code is used in __do_copy_from_user() of TT mode.
|
|
|
|
* XXX tt mode is gone, so maybe this isn't needed any more
|
|
|
|
*/
|
2005-09-23 11:44:16 +07:00
|
|
|
address = 0;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
catcher = current->thread.fault_catcher;
|
2007-10-16 15:26:54 +07:00
|
|
|
if (!err)
|
2007-05-07 04:51:24 +07:00
|
|
|
return 0;
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (catcher != NULL) {
|
2005-04-17 05:20:36 +07:00
|
|
|
current->thread.fault_addr = (void *) address;
|
|
|
|
do_longjmp(catcher, 1);
|
2006-07-10 18:45:13 +07:00
|
|
|
}
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (current->thread.fault_addr != NULL)
|
2005-04-17 05:20:36 +07:00
|
|
|
panic("fault_addr set but no fault catcher");
|
2007-10-16 15:26:54 +07:00
|
|
|
else if (!is_user && arch_fixup(ip, regs))
|
2007-05-07 04:51:24 +07:00
|
|
|
return 0;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
if (!is_user) {
|
2007-05-07 04:51:25 +07:00
|
|
|
show_regs(container_of(regs, struct pt_regs, regs));
|
2006-07-10 18:45:13 +07:00
|
|
|
panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
|
2005-04-17 05:20:36 +07:00
|
|
|
address, ip);
|
2007-05-07 04:51:25 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-09-04 05:57:26 +07:00
|
|
|
if (err == -EACCES) {
|
2005-04-17 05:20:36 +07:00
|
|
|
si.si_signo = SIGBUS;
|
|
|
|
si.si_errno = 0;
|
|
|
|
si.si_code = BUS_ADRERR;
|
2006-03-31 17:30:15 +07:00
|
|
|
si.si_addr = (void __user *)address;
|
2007-05-07 04:51:24 +07:00
|
|
|
current->thread.arch.faultinfo = fi;
|
2005-04-17 05:20:36 +07:00
|
|
|
force_sig_info(SIGBUS, &si, current);
|
2005-09-04 05:57:26 +07:00
|
|
|
} else if (err == -ENOMEM) {
|
2007-10-16 15:26:54 +07:00
|
|
|
printk(KERN_INFO "VM: killing process %s\n", current->comm);
|
2005-04-17 05:20:36 +07:00
|
|
|
do_exit(SIGKILL);
|
2005-09-04 05:57:26 +07:00
|
|
|
} else {
|
|
|
|
BUG_ON(err != -EFAULT);
|
2005-04-17 05:20:36 +07:00
|
|
|
si.si_signo = SIGSEGV;
|
2006-03-31 17:30:15 +07:00
|
|
|
si.si_addr = (void __user *) address;
|
2007-05-07 04:51:24 +07:00
|
|
|
current->thread.arch.faultinfo = fi;
|
2005-04-17 05:20:36 +07:00
|
|
|
force_sig_info(SIGSEGV, &si, current);
|
|
|
|
}
|
2007-05-07 04:51:24 +07:00
|
|
|
return 0;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2007-10-16 15:26:58 +07:00
|
|
|
void relay_signal(int sig, struct uml_pt_regs *regs)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2007-10-16 15:26:54 +07:00
|
|
|
if (arch_handle_signal(sig, regs))
|
2006-09-26 13:33:03 +07:00
|
|
|
return;
|
|
|
|
|
2007-10-16 15:26:54 +07:00
|
|
|
if (!UPT_IS_USER(regs)) {
|
|
|
|
if (sig == SIGBUS)
|
|
|
|
printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
|
|
|
|
"mount likely just ran out of space\n");
|
2005-04-17 05:20:36 +07:00
|
|
|
panic("Kernel mode signal %d", sig);
|
2006-09-26 13:33:03 +07:00
|
|
|
}
|
|
|
|
|
2007-05-07 04:51:24 +07:00
|
|
|
current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
|
2005-04-17 05:20:36 +07:00
|
|
|
force_sig(sig, current);
|
|
|
|
}
|
|
|
|
|
2007-10-16 15:26:58 +07:00
|
|
|
static void bus_handler(int sig, struct uml_pt_regs *regs)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2007-10-16 15:26:54 +07:00
|
|
|
if (current->thread.fault_catcher != NULL)
|
2005-04-17 05:20:36 +07:00
|
|
|
do_longjmp(current->thread.fault_catcher, 1);
|
|
|
|
else relay_signal(sig, regs);
|
|
|
|
}
|
|
|
|
|
2007-10-16 15:26:58 +07:00
|
|
|
static void winch(int sig, struct uml_pt_regs *regs)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
do_IRQ(WINCH_IRQ, regs);
|
|
|
|
}
|
|
|
|
|
2006-09-27 15:50:37 +07:00
|
|
|
const struct kern_handlers handlinfo_kern = {
|
|
|
|
.relay_signal = relay_signal,
|
|
|
|
.winch = winch,
|
|
|
|
.bus_handler = bus_handler,
|
|
|
|
.page_fault = segv_handler,
|
|
|
|
.sigio_handler = sigio_handler,
|
|
|
|
.timer_handler = timer_handler
|
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
void trap_init(void)
|
|
|
|
{
|
|
|
|
}
|