linux_dsm_epyc7002/arch/xtensa/mm/fault.c

243 lines
5.9 KiB
C
Raw Normal View History

// TODO VM_EXEC flag work-around, cache aliasing
/*
* arch/xtensa/mm/fault.c
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2001 - 2005 Tensilica Inc.
*
* Chris Zankel <chris@zankel.net>
* Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/hardirq.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/pgalloc.h>
unsigned long asid_cache = ASID_USER_FIRST;
void bad_page_fault(struct pt_regs*, unsigned long, int);
#undef DEBUG_PAGE_FAULT
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
*
* Note: does not handle Miss and MultiHit.
*/
void do_page_fault(struct pt_regs *regs)
{
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
unsigned int exccause = regs->exccause;
unsigned int address = regs->excvaddr;
siginfo_t info;
int is_write, is_exec;
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 15:47:05 +07:00
int fault;
info.si_code = SEGV_MAPERR;
/* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
*/
if (address >= TASK_SIZE && !user_mode(regs))
goto vmalloc_fault;
/* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
if (in_atomic() || !mm) {
bad_page_fault(regs, address, SIGSEGV);
return;
}
is_write = (exccause == EXCCAUSE_STORE_CACHE_ATTRIBUTE) ? 1 : 0;
is_exec = (exccause == EXCCAUSE_ITLB_PRIVILEGE ||
exccause == EXCCAUSE_ITLB_MISS ||
exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0;
#ifdef DEBUG_PAGE_FAULT
printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid,
address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
#endif
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (expand_stack(vma, address))
goto bad_area;
/* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
good_area:
info.si_code = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else if (is_exec) {
if (!(vma->vm_flags & VM_EXEC))
goto bad_area;
} else /* Allow read even from write-only pages. */
if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
goto bad_area;
/* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
survive:
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 15:47:05 +07:00
fault = handle_mm_fault(mm, vma, address, is_write);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
else if (fault & VM_FAULT_SIGBUS)
goto do_sigbus;
BUG();
}
mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Bryan Wu <bryan.wu@analog.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 15:47:05 +07:00
if (fault & VM_FAULT_MAJOR)
current->maj_flt++;
else
current->min_flt++;
up_read(&mm->mmap_sem);
return;
/* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
bad_area:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
current->thread.bad_vaddr = address;
current->thread.error_code = is_write;
info.si_signo = SIGSEGV;
info.si_errno = 0;
/* info.si_code has been set above */
info.si_addr = (void *) address;
force_sig_info(SIGSEGV, &info, current);
return;
}
bad_page_fault(regs, address, SIGSEGV);
return;
/* We ran out of memory, or some other thing happened to us that made
* us unable to handle the page fault gracefully.
*/
out_of_memory:
up_read(&mm->mmap_sem);
pid namespaces: define is_global_init() and is_container_init() is_init() is an ambiguous name for the pid==1 check. Split it into is_global_init() and is_container_init(). A cgroup init has it's tsk->pid == 1. A global init also has it's tsk->pid == 1 and it's active pid namespace is the init_pid_ns. But rather than check the active pid namespace, compare the task structure with 'init_pid_ns.child_reaper', which is initialized during boot to the /sbin/init process and never changes. Changelog: 2.6.22-rc4-mm2-pidns1: - Use 'init_pid_ns.child_reaper' to determine if a given task is the global init (/sbin/init) process. This would improve performance and remove dependence on the task_pid(). 2.6.21-mm2-pidns2: - [Sukadev Bhattiprolu] Changed is_container_init() calls in {powerpc, ppc,avr32}/traps.c for the _exception() call to is_global_init(). This way, we kill only the cgroup if the cgroup's init has a bug rather than force a kernel panic. [akpm@linux-foundation.org: fix comment] [sukadev@us.ibm.com: Use is_global_init() in arch/m32r/mm/fault.c] [bunk@stusta.de: kernel/pid.c: remove unused exports] [sukadev@us.ibm.com: Fix capability.c to work with threaded init] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Acked-by: Pavel Emelianov <xemul@openvz.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Herbert Poetzel <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 13:39:52 +07:00
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
}
printk("VM: killing process %s\n", current->comm);
if (user_mode(regs))
During VM oom condition, kill all threads in process group We have had complaints where a threaded application is left in a bad state after one of it's threads is killed when we hit a VM: out_of_memory condition. Killing just one of the process threads can leave the application in a bad state, whereas killing the entire process group would allow for the application to restart, or be otherwise handled, and makes it very obvious that something has gone wrong. This change allows the entire process group to be taken down, rather than just the one thread. Signed-off-by: Will Schmidt <will_schmidt@vnet.ibm.com> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Haavard Skinnemoen <hskinnemoen@atmel.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Andi Kleen <ak@suse.de> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Matthew Wilcox <willy@debian.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Zankel <chris@zankel.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-16 15:24:18 +07:00
do_group_exit(SIGKILL);
bad_page_fault(regs, address, SIGKILL);
return;
do_sigbus:
up_read(&mm->mmap_sem);
/* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
current->thread.bad_vaddr = address;
info.si_code = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRERR;
info.si_addr = (void *) address;
force_sig_info(SIGBUS, &info, current);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
bad_page_fault(regs, address, SIGBUS);
vmalloc_fault:
{
/* Synchronize this task's top level page-table
* with the 'reference' page table.
*/
struct mm_struct *act_mm = current->active_mm;
int index = pgd_index(address);
pgd_t *pgd, *pgd_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
if (act_mm == NULL)
goto bad_page_fault;
pgd = act_mm->pgd + index;
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k))
goto bad_page_fault;
pgd_val(*pgd) = pgd_val(*pgd_k);
pmd = pmd_offset(pgd, address);
pmd_k = pmd_offset(pgd_k, address);
if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
goto bad_page_fault;
pmd_val(*pmd) = pmd_val(*pmd_k);
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
goto bad_page_fault;
return;
}
bad_page_fault:
bad_page_fault(regs, address, SIGKILL);
return;
}
void
bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
extern void die(const char*, struct pt_regs*, long);
const struct exception_table_entry *entry;
/* Are we prepared to handle this kernel fault? */
if ((entry = search_exception_tables(regs->pc)) != NULL) {
#ifdef DEBUG_PAGE_FAULT
printk(KERN_DEBUG "%s: Exception at pc=%#010lx (%lx)\n",
current->comm, regs->pc, entry->fixup);
#endif
current->thread.bad_uaddr = address;
regs->pc = entry->fixup;
return;
}
/* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
printk(KERN_ALERT "Unable to handle kernel paging request at virtual "
"address %08lx\n pc = %08lx, ra = %08lx\n",
address, regs->pc, regs->areg[0]);
die("Oops", regs, sig);
do_exit(sig);
}