2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* PowerPC64 SLB support.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
|
2009-05-12 14:11:13 +07:00
|
|
|
* Based on earlier code written by:
|
2005-04-17 05:20:36 +07:00
|
|
|
* Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
|
|
|
|
* Copyright (c) 2001 Dave Engebretsen
|
|
|
|
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
#include <asm/asm-prototypes.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#include <asm/paca.h>
|
|
|
|
#include <asm/cputable.h>
|
2005-11-07 07:06:55 +07:00
|
|
|
#include <asm/cacheflush.h>
|
2006-08-07 13:19:19 +07:00
|
|
|
#include <asm/smp.h>
|
|
|
|
#include <linux/compiler.h>
|
2018-03-26 17:04:48 +07:00
|
|
|
#include <linux/context_tracking.h>
|
2017-02-04 06:16:44 +07:00
|
|
|
#include <linux/mm_types.h>
|
|
|
|
|
2007-10-30 02:24:19 +07:00
|
|
|
#include <asm/udbg.h>
|
2011-04-05 06:56:18 +07:00
|
|
|
#include <asm/code-patching.h>
|
2005-11-07 07:06:55 +07:00
|
|
|
|
2015-08-13 14:07:54 +07:00
|
|
|
enum slb_index {
|
|
|
|
LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */
|
2018-09-14 22:30:48 +07:00
|
|
|
KSTACK_INDEX = 1, /* Kernel stack map */
|
2015-08-13 14:07:54 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
[POWERPC] Bolt in SLB entry for kernel stack on secondary cpus
This fixes a regression reported by Kamalesh Bulabel where a POWER4
machine would crash because of an SLB miss at a point where the SLB
miss exception was unrecoverable. This regression is tracked at:
http://bugzilla.kernel.org/show_bug.cgi?id=10082
SLB misses at such points shouldn't happen because the kernel stack is
the only memory accessed other than things in the first segment of the
linear mapping (which is mapped at all times by entry 0 of the SLB).
The context switch code ensures that SLB entry 2 covers the kernel
stack, if it is not already covered by entry 0. None of entries 0
to 2 are ever replaced by the SLB miss handler.
Where this went wrong is that the context switch code assumes it
doesn't have to write to SLB entry 2 if the new kernel stack is in the
same segment as the old kernel stack, since entry 2 should already be
correct. However, when we start up a secondary cpu, it calls
slb_initialize, which doesn't set up entry 2. This is correct for
the boot cpu, where we will be using a stack in the kernel BSS at this
point (i.e. init_thread_union), but not necessarily for secondary
cpus, whose initial stack can be allocated anywhere. This doesn't
cause any immediate problem since the SLB miss handler will just
create an SLB entry somewhere else to cover the initial stack.
In fact it's possible for the cpu to go quite a long time without SLB
entry 2 being valid. Eventually, though, the entry created by the SLB
miss handler will get overwritten by some other entry, and if the next
access to the stack is at an unrecoverable point, we get the crash.
This fixes the problem by making slb_initialize create a suitable
entry for the kernel stack, if we are on a secondary cpu and the stack
isn't covered by SLB entry 0. This requires initializing the
get_paca()->kstack field earlier, so I do that in smp_create_idle
where the current field is initialized. This also abstracts a bit of
the computation that mk_esid_data in slb.c does so that it can be used
in slb_initialize.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-05-02 11:29:12 +07:00
|
|
|
#define slb_esid_mask(ssize) \
|
|
|
|
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
|
|
|
|
|
2007-10-11 17:37:10 +07:00
|
|
|
static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
|
2015-08-13 14:07:54 +07:00
|
|
|
enum slb_index index)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2015-08-13 14:07:54 +07:00
|
|
|
return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
|
2007-10-11 17:37:10 +07:00
|
|
|
unsigned long flags)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
return (vsid << slb_vsid_shift(ssize)) | flags |
|
2007-10-11 17:37:10 +07:00
|
|
|
((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
|
|
|
|
unsigned long flags)
|
|
|
|
{
|
|
|
|
return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
|
|
|
|
}
|
|
|
|
|
2007-10-11 17:37:10 +07:00
|
|
|
static inline void slb_shadow_update(unsigned long ea, int ssize,
|
2007-08-03 08:55:39 +07:00
|
|
|
unsigned long flags,
|
2015-08-13 14:07:54 +07:00
|
|
|
enum slb_index index)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2015-08-13 14:11:18 +07:00
|
|
|
struct slb_shadow *p = get_slb_shadow();
|
|
|
|
|
2006-08-07 13:19:19 +07:00
|
|
|
/*
|
|
|
|
* Clear the ESID first so the entry is not valid while we are
|
2007-08-24 13:58:37 +07:00
|
|
|
* updating it. No write barriers are needed here, provided
|
|
|
|
* we only update the current CPU's SLB shadow buffer.
|
2006-08-07 13:19:19 +07:00
|
|
|
*/
|
2018-05-30 17:31:22 +07:00
|
|
|
WRITE_ONCE(p->save_area[index].esid, 0);
|
|
|
|
WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
|
|
|
|
WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
|
2006-08-07 13:19:19 +07:00
|
|
|
}
|
|
|
|
|
2015-08-13 14:07:54 +07:00
|
|
|
static inline void slb_shadow_clear(enum slb_index index)
|
2006-08-07 13:19:19 +07:00
|
|
|
{
|
2018-08-23 11:56:08 +07:00
|
|
|
WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2007-10-11 17:37:10 +07:00
|
|
|
static inline void create_shadowed_slbe(unsigned long ea, int ssize,
|
|
|
|
unsigned long flags,
|
2015-08-13 14:07:54 +07:00
|
|
|
enum slb_index index)
|
2007-08-25 10:14:28 +07:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Updating the shadow buffer before writing the SLB ensures
|
|
|
|
* we don't get a stale entry here if we get preempted by PHYP
|
|
|
|
* between these two statements.
|
|
|
|
*/
|
2015-08-13 14:07:54 +07:00
|
|
|
slb_shadow_update(ea, ssize, flags, index);
|
2007-08-25 10:14:28 +07:00
|
|
|
|
|
|
|
asm volatile("slbmte %0,%1" :
|
2007-10-11 17:37:10 +07:00
|
|
|
: "r" (mk_vsid_data(ea, ssize, flags)),
|
2015-08-13 14:07:54 +07:00
|
|
|
"r" (mk_esid_data(ea, ssize, index))
|
2007-08-25 10:14:28 +07:00
|
|
|
: "memory" );
|
|
|
|
}
|
|
|
|
|
2018-08-10 13:42:48 +07:00
|
|
|
/*
|
|
|
|
* Insert bolted entries into SLB (which may not be empty, so don't clear
|
|
|
|
* slb_cache_ptr).
|
|
|
|
*/
|
|
|
|
void __slb_restore_bolted_realmode(void)
|
|
|
|
{
|
|
|
|
struct slb_shadow *p = get_slb_shadow();
|
|
|
|
enum slb_index index;
|
|
|
|
|
|
|
|
/* No isync needed because realmode. */
|
|
|
|
for (index = 0; index < SLB_NUM_BOLTED; index++) {
|
|
|
|
asm volatile("slbmte %0,%1" :
|
|
|
|
: "r" (be64_to_cpu(p->save_area[index].vsid)),
|
|
|
|
"r" (be64_to_cpu(p->save_area[index].esid)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert the bolted entries into an empty SLB.
|
|
|
|
* This is not the same as rebolt because the bolted segments are not
|
|
|
|
* changed, just loaded from the shadow area.
|
|
|
|
*/
|
|
|
|
void slb_restore_bolted_realmode(void)
|
|
|
|
{
|
|
|
|
__slb_restore_bolted_realmode();
|
|
|
|
get_paca()->slb_cache_ptr = 0;
|
2018-09-14 22:30:53 +07:00
|
|
|
|
|
|
|
get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
|
|
|
|
get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
|
2018-08-10 13:42:48 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This flushes all SLB entries including 0, so it must be realmode.
|
|
|
|
*/
|
|
|
|
void slb_flush_all_realmode(void)
|
|
|
|
{
|
|
|
|
asm volatile("slbmte %0,%0; slbia" : : "r" (0));
|
|
|
|
}
|
|
|
|
|
2018-09-14 22:30:49 +07:00
|
|
|
void slb_flush_and_rebolt(void)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
/* If you change this make sure you change SLB_NUM_BOLTED
|
2014-05-15 19:38:03 +07:00
|
|
|
* and PR KVM appropriately too. */
|
2018-09-14 22:30:48 +07:00
|
|
|
unsigned long linear_llp, lflags;
|
2007-10-11 17:37:10 +07:00
|
|
|
unsigned long ksp_esid_data, ksp_vsid_data;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2018-09-14 22:30:49 +07:00
|
|
|
WARN_ON(!irqs_disabled());
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can't take a PMU exception in the following code, so hard
|
|
|
|
* disable interrupts.
|
|
|
|
*/
|
|
|
|
hard_irq_disable();
|
|
|
|
|
2005-11-07 07:06:55 +07:00
|
|
|
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
|
|
|
|
lflags = SLB_VSID_KERNEL | linear_llp;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2015-08-13 14:07:54 +07:00
|
|
|
ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, KSTACK_INDEX);
|
2007-10-11 17:37:10 +07:00
|
|
|
if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) {
|
2005-04-17 05:20:36 +07:00
|
|
|
ksp_esid_data &= ~SLB_ESID_V;
|
2007-10-11 17:37:10 +07:00
|
|
|
ksp_vsid_data = 0;
|
2015-08-13 14:07:54 +07:00
|
|
|
slb_shadow_clear(KSTACK_INDEX);
|
2007-08-10 18:04:07 +07:00
|
|
|
} else {
|
|
|
|
/* Update stack entry; others don't change */
|
2015-08-13 14:07:54 +07:00
|
|
|
slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, KSTACK_INDEX);
|
2013-08-06 23:01:46 +07:00
|
|
|
ksp_vsid_data =
|
2015-08-13 14:07:54 +07:00
|
|
|
be64_to_cpu(get_slb_shadow()->save_area[KSTACK_INDEX].vsid);
|
2007-08-10 18:04:07 +07:00
|
|
|
}
|
2006-08-07 13:19:19 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* We need to do this all in asm, so we're sure we don't touch
|
|
|
|
* the stack between the slbia and rebolting it. */
|
|
|
|
asm volatile("isync\n"
|
|
|
|
"slbia\n"
|
2018-09-14 22:30:48 +07:00
|
|
|
/* Slot 1 - kernel stack */
|
2005-04-17 05:20:36 +07:00
|
|
|
"slbmte %0,%1\n"
|
|
|
|
"isync"
|
2018-09-14 22:30:48 +07:00
|
|
|
:: "r"(ksp_vsid_data),
|
2005-04-17 05:20:36 +07:00
|
|
|
"r"(ksp_esid_data)
|
|
|
|
: "memory");
|
|
|
|
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 12:17:54 +07:00
|
|
|
get_paca()->slb_cache_ptr = 0;
|
2018-09-14 22:30:53 +07:00
|
|
|
|
|
|
|
get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
|
|
|
|
get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 12:17:54 +07:00
|
|
|
}
|
|
|
|
|
2018-09-11 21:27:15 +07:00
|
|
|
void slb_save_contents(struct slb_entry *slb_ptr)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
unsigned long e, v;
|
|
|
|
|
|
|
|
/* Save slb_cache_ptr value. */
|
|
|
|
get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
|
|
|
|
|
|
|
|
if (!slb_ptr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < mmu_slb_size; i++) {
|
|
|
|
asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
|
|
|
|
asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
|
|
|
|
slb_ptr->esid = e;
|
|
|
|
slb_ptr->vsid = v;
|
|
|
|
slb_ptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void slb_dump_contents(struct slb_entry *slb_ptr)
|
|
|
|
{
|
|
|
|
int i, n;
|
|
|
|
unsigned long e, v;
|
|
|
|
unsigned long llp;
|
|
|
|
|
|
|
|
if (!slb_ptr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
|
2018-09-14 22:30:53 +07:00
|
|
|
pr_err("Last SLB entry inserted at slot %u\n", get_paca()->stab_rr);
|
2018-09-11 21:27:15 +07:00
|
|
|
|
|
|
|
for (i = 0; i < mmu_slb_size; i++) {
|
|
|
|
e = slb_ptr->esid;
|
|
|
|
v = slb_ptr->vsid;
|
|
|
|
slb_ptr++;
|
|
|
|
|
|
|
|
if (!e && !v)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pr_err("%02d %016lx %016lx\n", i, e, v);
|
|
|
|
|
|
|
|
if (!(e & SLB_ESID_V)) {
|
|
|
|
pr_err("\n");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
llp = v & SLB_VSID_LLP;
|
|
|
|
if (v & SLB_VSID_B_1T) {
|
|
|
|
pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
|
|
|
|
GET_ESID_1T(e),
|
|
|
|
(v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
|
|
|
|
} else {
|
|
|
|
pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
|
|
|
|
GET_ESID(e),
|
|
|
|
(v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pr_err("----------------------------------\n");
|
|
|
|
|
|
|
|
/* Dump slb cache entires as well. */
|
|
|
|
pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
|
|
|
|
pr_err("Valid SLB cache entries:\n");
|
|
|
|
n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
|
|
|
|
pr_err("Rest of SLB cache entries:\n");
|
|
|
|
for (i = n; i < SLB_CACHE_ENTRIES; i++)
|
|
|
|
pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
|
|
|
|
}
|
|
|
|
|
2007-08-03 08:55:39 +07:00
|
|
|
void slb_vmalloc_update(void)
|
|
|
|
{
|
|
|
|
slb_flush_and_rebolt();
|
|
|
|
}
|
|
|
|
|
2007-10-31 01:59:33 +07:00
|
|
|
/* Helper function to compare esids. There are four cases to handle.
|
|
|
|
* 1. The system is not 1T segment size capable. Use the GET_ESID compare.
|
|
|
|
* 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
|
|
|
|
* 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match.
|
|
|
|
* 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
|
|
|
|
*/
|
|
|
|
static inline int esids_match(unsigned long addr1, unsigned long addr2)
|
|
|
|
{
|
|
|
|
int esid_1t_count;
|
|
|
|
|
|
|
|
/* System is not 1T segment size capable. */
|
2011-04-07 02:48:50 +07:00
|
|
|
if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
|
2007-10-31 01:59:33 +07:00
|
|
|
return (GET_ESID(addr1) == GET_ESID(addr2));
|
|
|
|
|
|
|
|
esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
|
|
|
|
((addr2 >> SID_SHIFT_1T) != 0));
|
|
|
|
|
|
|
|
/* both addresses are < 1T */
|
|
|
|
if (esid_1t_count == 0)
|
|
|
|
return (GET_ESID(addr1) == GET_ESID(addr2));
|
|
|
|
|
|
|
|
/* One address < 1T, the other > 1T. Not a match */
|
|
|
|
if (esid_1t_count == 1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Both addresses are > 1T. */
|
|
|
|
return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Flush all user entries from the segment table of the current processor. */
|
|
|
|
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
unsigned long pc = KSTK_EIP(tsk);
|
|
|
|
unsigned long stack = KSTK_ESP(tsk);
|
2009-07-14 03:53:53 +07:00
|
|
|
unsigned long exec_base;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 12:17:54 +07:00
|
|
|
/*
|
|
|
|
* We need interrupts hard-disabled here, not just soft-disabled,
|
|
|
|
* so that a PMU interrupt can't occur, which might try to access
|
|
|
|
* user memory (to get a stack trace) and possible cause an SLB miss
|
|
|
|
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
|
|
|
|
*/
|
|
|
|
hard_irq_disable();
|
2018-09-14 22:30:50 +07:00
|
|
|
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
|
|
|
/*
|
|
|
|
* SLBIA IH=3 invalidates all Class=1 SLBEs and their
|
|
|
|
* associated lookaside structures, which matches what
|
|
|
|
* switch_slb wants. So ARCH_300 does not use the slb
|
|
|
|
* cache.
|
|
|
|
*/
|
|
|
|
asm volatile("isync ; " PPC_SLBIA(3)" ; isync");
|
|
|
|
} else {
|
|
|
|
unsigned long offset = get_paca()->slb_cache_ptr;
|
|
|
|
|
|
|
|
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
|
|
|
|
offset <= SLB_CACHE_ENTRIES) {
|
|
|
|
unsigned long slbie_data = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
asm volatile("isync" : : : "memory");
|
|
|
|
for (i = 0; i < offset; i++) {
|
|
|
|
/* EA */
|
|
|
|
slbie_data = (unsigned long)
|
|
|
|
get_paca()->slb_cache[i] << SID_SHIFT;
|
|
|
|
slbie_data |= user_segment_size(slbie_data)
|
|
|
|
<< SLBIE_SSIZE_SHIFT;
|
|
|
|
slbie_data |= SLBIE_C; /* user slbs have C=1 */
|
|
|
|
asm volatile("slbie %0" : : "r" (slbie_data));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Workaround POWER5 < DD2.1 issue */
|
|
|
|
if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
|
|
|
|
asm volatile("slbie %0" : : "r" (slbie_data));
|
|
|
|
|
|
|
|
asm volatile("isync" : : : "memory");
|
|
|
|
} else {
|
|
|
|
struct slb_shadow *p = get_slb_shadow();
|
|
|
|
unsigned long ksp_esid_data =
|
|
|
|
be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
|
|
|
|
unsigned long ksp_vsid_data =
|
|
|
|
be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
|
|
|
|
|
|
|
|
asm volatile("isync\n"
|
|
|
|
PPC_SLBIA(1) "\n"
|
|
|
|
"slbmte %0,%1\n"
|
|
|
|
"isync"
|
|
|
|
:: "r"(ksp_vsid_data),
|
|
|
|
"r"(ksp_esid_data));
|
2018-09-14 22:30:53 +07:00
|
|
|
|
|
|
|
get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2018-09-14 22:30:50 +07:00
|
|
|
get_paca()->slb_cache_ptr = 0;
|
2018-09-14 22:30:46 +07:00
|
|
|
}
|
2018-09-14 22:30:53 +07:00
|
|
|
get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* preload some userspace segments into the SLB.
|
2009-07-14 03:53:53 +07:00
|
|
|
* Almost all 32 and 64bit PowerPC executables are linked at
|
|
|
|
* 0x10000000 so it makes sense to preload this segment.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2009-07-14 03:53:53 +07:00
|
|
|
exec_base = 0x10000000;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-07-14 03:53:52 +07:00
|
|
|
if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
|
2009-07-14 03:53:53 +07:00
|
|
|
is_kernel_addr(exec_base))
|
2005-04-17 05:20:36 +07:00
|
|
|
return;
|
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
slb_allocate_user(mm, pc);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-07-14 03:53:52 +07:00
|
|
|
if (!esids_match(pc, stack))
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
slb_allocate_user(mm, stack);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-07-14 03:53:53 +07:00
|
|
|
if (!esids_match(pc, exec_base) &&
|
|
|
|
!esids_match(stack, exec_base))
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
slb_allocate_user(mm, exec_base);
|
2005-11-07 07:06:55 +07:00
|
|
|
}
|
|
|
|
|
2009-08-28 19:06:29 +07:00
|
|
|
void slb_set_size(u16 size)
|
|
|
|
{
|
|
|
|
mmu_slb_size = size;
|
|
|
|
}
|
|
|
|
|
2018-09-14 22:30:52 +07:00
|
|
|
static void cpu_flush_slb(void *parm)
|
|
|
|
{
|
|
|
|
struct mm_struct *mm = parm;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (mm != current->active_mm)
|
|
|
|
return;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
slb_flush_and_rebolt();
|
|
|
|
local_irq_restore(flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
void core_flush_all_slbs(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
on_each_cpu(cpu_flush_slb, mm, 1);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
void slb_initialize(void)
|
|
|
|
{
|
2006-06-15 07:45:18 +07:00
|
|
|
unsigned long linear_llp, vmalloc_llp, io_llp;
|
2018-09-14 22:30:48 +07:00
|
|
|
unsigned long lflags;
|
2005-11-07 07:06:55 +07:00
|
|
|
static int slb_encoding_inited;
|
[POWERPC] vmemmap fixes to use smaller pages
This changes vmemmap to use a different region (region 0xf) of the
address space, and to configure the page size of that region
dynamically at boot.
The problem with the current approach of always using 16M pages is that
it's not well suited to machines that have small amounts of memory such
as small partitions on pseries, or PS3's.
In fact, on the PS3, failure to allocate the 16M page backing vmmemmap
tends to prevent hotplugging the HV's "additional" memory, thus limiting
the available memory even more, from my experience down to something
like 80M total, which makes it really not very useable.
The logic used by my match to choose the vmemmap page size is:
- If 16M pages are available and there's 1G or more RAM at boot,
use that size.
- Else if 64K pages are available, use that
- Else use 4K pages
I've tested on a POWER6 (16M pages) and on an iSeries POWER3 (4K pages)
and it seems to work fine.
Note that I intend to change the way we organize the kernel regions &
SLBs so the actual region will change from 0xf back to something else at
one point, as I simplify the SLB miss handler, but that will be for a
later patch.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-04-30 12:41:48 +07:00
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
|
|
unsigned long vmemmap_llp;
|
|
|
|
#endif
|
2005-11-07 07:06:55 +07:00
|
|
|
|
|
|
|
/* Prepare our SLB miss handler based on our page size */
|
|
|
|
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
|
2006-06-15 07:45:18 +07:00
|
|
|
io_llp = mmu_psize_defs[mmu_io_psize].sllp;
|
|
|
|
vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
|
|
|
|
get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
|
[POWERPC] vmemmap fixes to use smaller pages
This changes vmemmap to use a different region (region 0xf) of the
address space, and to configure the page size of that region
dynamically at boot.
The problem with the current approach of always using 16M pages is that
it's not well suited to machines that have small amounts of memory such
as small partitions on pseries, or PS3's.
In fact, on the PS3, failure to allocate the 16M page backing vmmemmap
tends to prevent hotplugging the HV's "additional" memory, thus limiting
the available memory even more, from my experience down to something
like 80M total, which makes it really not very useable.
The logic used by my match to choose the vmemmap page size is:
- If 16M pages are available and there's 1G or more RAM at boot,
use that size.
- Else if 64K pages are available, use that
- Else use 4K pages
I've tested on a POWER6 (16M pages) and on an iSeries POWER3 (4K pages)
and it seems to work fine.
Note that I intend to change the way we organize the kernel regions &
SLBs so the actual region will change from 0xf back to something else at
one point, as I simplify the SLB miss handler, but that will be for a
later patch.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-04-30 12:41:48 +07:00
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
|
|
vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
|
|
|
|
#endif
|
2005-11-07 07:06:55 +07:00
|
|
|
if (!slb_encoding_inited) {
|
|
|
|
slb_encoding_inited = 1;
|
2009-06-18 01:13:51 +07:00
|
|
|
pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
|
|
|
|
pr_devel("SLB: io LLP = %04lx\n", io_llp);
|
[POWERPC] vmemmap fixes to use smaller pages
This changes vmemmap to use a different region (region 0xf) of the
address space, and to configure the page size of that region
dynamically at boot.
The problem with the current approach of always using 16M pages is that
it's not well suited to machines that have small amounts of memory such
as small partitions on pseries, or PS3's.
In fact, on the PS3, failure to allocate the 16M page backing vmmemmap
tends to prevent hotplugging the HV's "additional" memory, thus limiting
the available memory even more, from my experience down to something
like 80M total, which makes it really not very useable.
The logic used by my match to choose the vmemmap page size is:
- If 16M pages are available and there's 1G or more RAM at boot,
use that size.
- Else if 64K pages are available, use that
- Else use 4K pages
I've tested on a POWER6 (16M pages) and on an iSeries POWER3 (4K pages)
and it seems to work fine.
Note that I intend to change the way we organize the kernel regions &
SLBs so the actual region will change from 0xf back to something else at
one point, as I simplify the SLB miss handler, but that will be for a
later patch.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-04-30 12:41:48 +07:00
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
2009-06-18 01:13:51 +07:00
|
|
|
pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
|
[POWERPC] vmemmap fixes to use smaller pages
This changes vmemmap to use a different region (region 0xf) of the
address space, and to configure the page size of that region
dynamically at boot.
The problem with the current approach of always using 16M pages is that
it's not well suited to machines that have small amounts of memory such
as small partitions on pseries, or PS3's.
In fact, on the PS3, failure to allocate the 16M page backing vmmemmap
tends to prevent hotplugging the HV's "additional" memory, thus limiting
the available memory even more, from my experience down to something
like 80M total, which makes it really not very useable.
The logic used by my match to choose the vmemmap page size is:
- If 16M pages are available and there's 1G or more RAM at boot,
use that size.
- Else if 64K pages are available, use that
- Else use 4K pages
I've tested on a POWER6 (16M pages) and on an iSeries POWER3 (4K pages)
and it seems to work fine.
Note that I intend to change the way we organize the kernel regions &
SLBs so the actual region will change from 0xf back to something else at
one point, as I simplify the SLB miss handler, but that will be for a
later patch.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-04-30 12:41:48 +07:00
|
|
|
#endif
|
2005-11-07 07:06:55 +07:00
|
|
|
}
|
|
|
|
|
2018-09-14 22:30:45 +07:00
|
|
|
get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
|
2018-09-14 22:30:53 +07:00
|
|
|
get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
|
|
|
|
get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
|
2006-11-14 08:57:38 +07:00
|
|
|
|
2005-11-07 07:06:55 +07:00
|
|
|
lflags = SLB_VSID_KERNEL | linear_llp;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2015-07-29 14:09:59 +07:00
|
|
|
/* Invalidate the entire SLB (even entry 0) & all the ERATS */
|
2007-08-25 10:14:28 +07:00
|
|
|
asm volatile("isync":::"memory");
|
|
|
|
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
|
|
|
|
asm volatile("isync; slbia; isync":::"memory");
|
2015-08-13 14:07:54 +07:00
|
|
|
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
|
2007-08-25 10:14:28 +07:00
|
|
|
|
[POWERPC] Bolt in SLB entry for kernel stack on secondary cpus
This fixes a regression reported by Kamalesh Bulabel where a POWER4
machine would crash because of an SLB miss at a point where the SLB
miss exception was unrecoverable. This regression is tracked at:
http://bugzilla.kernel.org/show_bug.cgi?id=10082
SLB misses at such points shouldn't happen because the kernel stack is
the only memory accessed other than things in the first segment of the
linear mapping (which is mapped at all times by entry 0 of the SLB).
The context switch code ensures that SLB entry 2 covers the kernel
stack, if it is not already covered by entry 0. None of entries 0
to 2 are ever replaced by the SLB miss handler.
Where this went wrong is that the context switch code assumes it
doesn't have to write to SLB entry 2 if the new kernel stack is in the
same segment as the old kernel stack, since entry 2 should already be
correct. However, when we start up a secondary cpu, it calls
slb_initialize, which doesn't set up entry 2. This is correct for
the boot cpu, where we will be using a stack in the kernel BSS at this
point (i.e. init_thread_union), but not necessarily for secondary
cpus, whose initial stack can be allocated anywhere. This doesn't
cause any immediate problem since the SLB miss handler will just
create an SLB entry somewhere else to cover the initial stack.
In fact it's possible for the cpu to go quite a long time without SLB
entry 2 being valid. Eventually, though, the entry created by the SLB
miss handler will get overwritten by some other entry, and if the next
access to the stack is at an unrecoverable point, we get the crash.
This fixes the problem by making slb_initialize create a suitable
entry for the kernel stack, if we are on a secondary cpu and the stack
isn't covered by SLB entry 0. This requires initializing the
get_paca()->kstack field earlier, so I do that in smp_create_idle
where the current field is initialized. This also abstracts a bit of
the computation that mk_esid_data in slb.c does so that it can be used
in slb_initialize.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-05-02 11:29:12 +07:00
|
|
|
/* For the boot cpu, we're running on the stack in init_thread_union,
|
|
|
|
* which is in the first segment of the linear mapping, and also
|
|
|
|
* get_paca()->kstack hasn't been initialized yet.
|
|
|
|
* For secondary cpus, we need to bolt the kernel stack entry now.
|
|
|
|
*/
|
2015-08-13 14:07:54 +07:00
|
|
|
slb_shadow_clear(KSTACK_INDEX);
|
[POWERPC] Bolt in SLB entry for kernel stack on secondary cpus
This fixes a regression reported by Kamalesh Bulabel where a POWER4
machine would crash because of an SLB miss at a point where the SLB
miss exception was unrecoverable. This regression is tracked at:
http://bugzilla.kernel.org/show_bug.cgi?id=10082
SLB misses at such points shouldn't happen because the kernel stack is
the only memory accessed other than things in the first segment of the
linear mapping (which is mapped at all times by entry 0 of the SLB).
The context switch code ensures that SLB entry 2 covers the kernel
stack, if it is not already covered by entry 0. None of entries 0
to 2 are ever replaced by the SLB miss handler.
Where this went wrong is that the context switch code assumes it
doesn't have to write to SLB entry 2 if the new kernel stack is in the
same segment as the old kernel stack, since entry 2 should already be
correct. However, when we start up a secondary cpu, it calls
slb_initialize, which doesn't set up entry 2. This is correct for
the boot cpu, where we will be using a stack in the kernel BSS at this
point (i.e. init_thread_union), but not necessarily for secondary
cpus, whose initial stack can be allocated anywhere. This doesn't
cause any immediate problem since the SLB miss handler will just
create an SLB entry somewhere else to cover the initial stack.
In fact it's possible for the cpu to go quite a long time without SLB
entry 2 being valid. Eventually, though, the entry created by the SLB
miss handler will get overwritten by some other entry, and if the next
access to the stack is at an unrecoverable point, we get the crash.
This fixes the problem by making slb_initialize create a suitable
entry for the kernel stack, if we are on a secondary cpu and the stack
isn't covered by SLB entry 0. This requires initializing the
get_paca()->kstack field earlier, so I do that in smp_create_idle
where the current field is initialized. This also abstracts a bit of
the computation that mk_esid_data in slb.c does so that it can be used
in slb_initialize.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-05-02 11:29:12 +07:00
|
|
|
if (raw_smp_processor_id() != boot_cpuid &&
|
|
|
|
(get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
|
|
|
|
create_shadowed_slbe(get_paca()->kstack,
|
2015-08-13 14:07:54 +07:00
|
|
|
mmu_kernel_ssize, lflags, KSTACK_INDEX);
|
2008-01-15 13:29:33 +07:00
|
|
|
|
2007-08-25 10:14:28 +07:00
|
|
|
asm volatile("isync":::"memory");
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
static void slb_cache_update(unsigned long esid_data)
|
2018-03-26 17:04:48 +07:00
|
|
|
{
|
|
|
|
int slb_cache_index;
|
|
|
|
|
2018-09-14 22:30:50 +07:00
|
|
|
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
|
|
|
return; /* ISAv3.0B and later does not use slb_cache */
|
|
|
|
|
2018-03-26 17:04:48 +07:00
|
|
|
/*
|
|
|
|
* Now update slb cache entries
|
|
|
|
*/
|
|
|
|
slb_cache_index = get_paca()->slb_cache_ptr;
|
|
|
|
if (slb_cache_index < SLB_CACHE_ENTRIES) {
|
|
|
|
/*
|
|
|
|
* We have space in slb cache for optimized switch_slb().
|
|
|
|
* Top 36 bits from esid_data as per ISA
|
|
|
|
*/
|
|
|
|
get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
|
|
|
|
get_paca()->slb_cache_ptr++;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Our cache is full and the current cache content strictly
|
|
|
|
* doesn't indicate the active SLB conents. Bump the ptr
|
|
|
|
* so that switch_slb() will ignore the cache.
|
|
|
|
*/
|
|
|
|
get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-14 22:30:53 +07:00
|
|
|
static enum slb_index alloc_slb_index(bool kernel)
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
{
|
|
|
|
enum slb_index index;
|
|
|
|
|
2018-09-14 22:30:53 +07:00
|
|
|
/*
|
|
|
|
* The allocation bitmaps can become out of synch with the SLB
|
|
|
|
* when the _switch code does slbie when bolting a new stack
|
|
|
|
* segment and it must not be anywhere else in the SLB. This leaves
|
|
|
|
* a kernel allocated entry that is unused in the SLB. With very
|
|
|
|
* large systems or small segment sizes, the bitmaps could slowly
|
|
|
|
* fill with these entries. They will eventually be cleared out
|
|
|
|
* by the round robin allocator in that case, so it's probably not
|
|
|
|
* worth accounting for.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SLBs beyond 32 entries are allocated with stab_rr only
|
|
|
|
* POWER7/8/9 have 32 SLB entries, this could be expanded if a
|
|
|
|
* future CPU has more.
|
|
|
|
*/
|
|
|
|
if (get_paca()->slb_used_bitmap != U32_MAX) {
|
|
|
|
index = ffz(get_paca()->slb_used_bitmap);
|
|
|
|
get_paca()->slb_used_bitmap |= 1U << index;
|
|
|
|
if (kernel)
|
|
|
|
get_paca()->slb_kern_bitmap |= 1U << index;
|
|
|
|
} else {
|
|
|
|
/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
|
|
|
|
index = get_paca()->stab_rr;
|
|
|
|
if (index < (mmu_slb_size - 1))
|
|
|
|
index++;
|
|
|
|
else
|
|
|
|
index = SLB_NUM_BOLTED;
|
|
|
|
get_paca()->stab_rr = index;
|
|
|
|
if (index < 32) {
|
|
|
|
if (kernel)
|
|
|
|
get_paca()->slb_kern_bitmap |= 1U << index;
|
|
|
|
else
|
|
|
|
get_paca()->slb_kern_bitmap &= ~(1U << index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BUG_ON(index < SLB_NUM_BOLTED);
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
|
|
|
|
return index;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long slb_insert_entry(unsigned long ea, unsigned long context,
|
|
|
|
unsigned long flags, int ssize, bool kernel)
|
2018-03-26 17:04:48 +07:00
|
|
|
{
|
|
|
|
unsigned long vsid;
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
unsigned long vsid_data, esid_data;
|
|
|
|
enum slb_index index;
|
|
|
|
|
|
|
|
vsid = get_vsid(context, ea, ssize);
|
|
|
|
if (!vsid)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-09-14 22:30:53 +07:00
|
|
|
index = alloc_slb_index(kernel);
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
|
|
|
|
vsid_data = __mk_vsid_data(vsid, ssize, flags);
|
|
|
|
esid_data = mk_esid_data(ea, ssize, index);
|
2018-03-26 17:04:48 +07:00
|
|
|
|
|
|
|
/*
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
* No need for an isync before or after this slbmte. The exception
|
|
|
|
* we enter with and the rfid we exit with are context synchronizing.
|
|
|
|
* Also we only handle user segments here.
|
2018-03-26 17:04:48 +07:00
|
|
|
*/
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
|
|
|
|
|
|
|
|
if (!kernel)
|
|
|
|
slb_cache_update(esid_data);
|
|
|
|
|
|
|
|
return 0;
|
2018-03-26 17:04:48 +07:00
|
|
|
}
|
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
static long slb_allocate_kernel(unsigned long ea, unsigned long id)
|
2018-03-26 17:04:48 +07:00
|
|
|
{
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
unsigned long context;
|
|
|
|
unsigned long flags;
|
|
|
|
int ssize;
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
|
|
|
|
return -EFAULT;
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
if (id == KERNEL_REGION_ID) {
|
|
|
|
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
|
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
|
|
} else if (id == VMEMMAP_REGION_ID) {
|
|
|
|
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
|
|
|
|
#endif
|
|
|
|
} else if (id == VMALLOC_REGION_ID) {
|
|
|
|
if (ea < H_VMALLOC_END)
|
|
|
|
flags = get_paca()->vmalloc_sllp;
|
|
|
|
else
|
|
|
|
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
|
|
|
|
} else {
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize = MMU_SEGSIZE_1T;
|
|
|
|
if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
|
|
|
|
ssize = MMU_SEGSIZE_256M;
|
|
|
|
|
|
|
|
context = id - KERNEL_REGION_CONTEXT_OFFSET;
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
return slb_insert_entry(ea, context, flags, ssize, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
|
|
|
|
{
|
|
|
|
unsigned long context;
|
|
|
|
unsigned long flags;
|
|
|
|
int bpsize;
|
|
|
|
int ssize;
|
2018-03-26 17:04:48 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* consider this as bad access if we take a SLB miss
|
|
|
|
* on an address above addr limit.
|
|
|
|
*/
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
if (ea >= mm->context.slb_addr_limit)
|
|
|
|
return -EFAULT;
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
context = get_ea_context(&mm->context, ea);
|
2018-03-26 17:04:48 +07:00
|
|
|
if (!context)
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
return -EFAULT;
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
if (unlikely(ea >= H_PGTABLE_RANGE)) {
|
|
|
|
WARN_ON(1);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2018-03-26 17:04:48 +07:00
|
|
|
|
powerpc/64s/hash: convert SLB miss handlers to C
This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.
This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.
Arbitrary kernel memory may not be accessed when handling kernel space
SLB misses, so care should be taken there. However user SLB misses can
access any kernel memory, which can be used to move some fields out of
the paca (in later patches).
User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.
[ Credit to Aneesh for bug fixes, error checks, and improvements to bad
address handling, etc ]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Since RFC:
- Added MSR[RI] handling
- Fixed up a register loss bug exposed by irq tracing (Aneesh)
- Reject misses outside the defined kernel regions (Aneesh)
- Added several more sanity checks and error handling (Aneesh), we may
look at consolidating these tests and tightenig up the code but for
a first pass we decided it's better to check carefully.
Since v1:
- Fixed SLB cache corruption (Aneesh)
- Fixed untidy SLBE allocation "leak" in get_vsid error case
- Now survives some stress testing on real hardware
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-09-14 22:30:51 +07:00
|
|
|
ssize = user_segment_size(ea);
|
|
|
|
|
|
|
|
bpsize = get_slice_psize(mm, ea);
|
|
|
|
flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
|
|
|
|
|
|
|
|
return slb_insert_entry(ea, context, flags, ssize, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
long do_slb_fault(struct pt_regs *regs, unsigned long ea)
|
|
|
|
{
|
|
|
|
unsigned long id = REGION_ID(ea);
|
|
|
|
|
|
|
|
/* IRQs are not reconciled here, so can't check irqs_disabled */
|
|
|
|
VM_WARN_ON(mfmsr() & MSR_EE);
|
|
|
|
|
|
|
|
if (unlikely(!(regs->msr & MSR_RI)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SLB kernel faults must be very careful not to touch anything
|
|
|
|
* that is not bolted. E.g., PACA and global variables are okay,
|
|
|
|
* mm->context stuff is not.
|
|
|
|
*
|
|
|
|
* SLB user faults can access all of kernel memory, but must be
|
|
|
|
* careful not to touch things like IRQ state because it is not
|
|
|
|
* "reconciled" here. The difficulty is that we must use
|
|
|
|
* fast_exception_return to return from kernel SLB faults without
|
|
|
|
* looking at possible non-bolted memory. We could test user vs
|
|
|
|
* kernel faults in the interrupt handler asm and do a full fault,
|
|
|
|
* reconcile, ret_from_except for user faults which would make them
|
|
|
|
* first class kernel code. But for performance it's probably nicer
|
|
|
|
* if they go via fast_exception_return too.
|
|
|
|
*/
|
|
|
|
if (id >= KERNEL_REGION_ID) {
|
|
|
|
return slb_allocate_kernel(ea, id);
|
|
|
|
} else {
|
|
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
|
|
|
|
if (unlikely(!mm))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return slb_allocate_user(mm, ea);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
|
|
|
|
{
|
|
|
|
if (err == -EFAULT) {
|
|
|
|
if (user_mode(regs))
|
|
|
|
_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
|
|
|
|
else
|
|
|
|
bad_page_fault(regs, ea, SIGSEGV);
|
|
|
|
} else if (err == -EINVAL) {
|
|
|
|
unrecoverable_exception(regs);
|
|
|
|
} else {
|
|
|
|
BUG();
|
|
|
|
}
|
2018-03-26 17:04:48 +07:00
|
|
|
}
|