2005-11-19 16:17:32 +07:00
|
|
|
#ifndef _ASM_POWERPC_PGTABLE_H
|
|
|
|
#define _ASM_POWERPC_PGTABLE_H
|
2005-12-17 04:43:46 +07:00
|
|
|
#ifdef __KERNEL__
|
2005-11-19 16:17:32 +07:00
|
|
|
|
2007-06-13 11:52:56 +07:00
|
|
|
#ifndef __ASSEMBLY__
|
2013-11-18 16:28:13 +07:00
|
|
|
#include <linux/mmdebug.h>
|
2007-06-13 11:52:56 +07:00
|
|
|
#include <asm/processor.h> /* For TASK_SIZE */
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/page.h>
|
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC.
The "old" way was split in 3 different parts depending on the processor type:
- Hash with per-page exec support (64-bit and >= POWER4 only) does it
at hashing time, by preventing exec on unclean pages and cleaning pages
on exec faults.
- Everything without per-page exec support (32-bit hash, 8xx, and
64-bit < POWER4) does it for all page going to user space in update_mmu_cache().
- Embedded with per-page exec support does it from do_page_fault() on
exec faults, in a way similar to what the hash code does.
That leads to confusion, and bugs. For example, the method using update_mmu_cache()
is racy on SMP where another processor can see the new PTE and hash it in before
we have cleaned the cache, and then blow trying to execute. This is hard to hit but
I think it has bitten us in the past.
Also, it's inefficient for embedded where we always end up having to do at least
one more page fault.
This reworks the whole thing by moving the cache sync into two main call sites,
though we keep different behaviours depending on the HW capability. The call
sites are set_pte_at() which is now made out of line, and ptep_set_access_flags()
which joins the former in pgtable.c
The base idea for Embedded with per-page exec support, is that we now do the
flush at set_pte_at() time when coming from an exec fault, which allows us
to avoid the double fault problem completely (we can even improve the situation
more by implementing TLB preload in update_mmu_cache() but that's for later).
If for some reason we didn't do it there and we try to execute, we'll hit
the page fault, which will do a minor fault, which will hit ptep_set_access_flags()
to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make
this guys also perform the I/D cache sync for exec faults now. This second path
is the catch all for things that weren't cleaned at set_pte_at() time.
For cpus without per-pag exec support, we always do the sync at set_pte_at(),
thus guaranteeing that when the PTE is visible to other processors, the cache
is clean.
For the 64-bit hash with per-page exec support case, we keep the old mechanism
for now. I'll look into changing it later, once I've reworked a bit how we
use _PAGE_EXEC.
This is also a first step for adding _PAGE_EXEC support for embedded platforms
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-02-10 23:02:37 +07:00
|
|
|
|
2007-06-13 11:52:56 +07:00
|
|
|
struct mm_struct;
|
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC.
The "old" way was split in 3 different parts depending on the processor type:
- Hash with per-page exec support (64-bit and >= POWER4 only) does it
at hashing time, by preventing exec on unclean pages and cleaning pages
on exec faults.
- Everything without per-page exec support (32-bit hash, 8xx, and
64-bit < POWER4) does it for all page going to user space in update_mmu_cache().
- Embedded with per-page exec support does it from do_page_fault() on
exec faults, in a way similar to what the hash code does.
That leads to confusion, and bugs. For example, the method using update_mmu_cache()
is racy on SMP where another processor can see the new PTE and hash it in before
we have cleaned the cache, and then blow trying to execute. This is hard to hit but
I think it has bitten us in the past.
Also, it's inefficient for embedded where we always end up having to do at least
one more page fault.
This reworks the whole thing by moving the cache sync into two main call sites,
though we keep different behaviours depending on the HW capability. The call
sites are set_pte_at() which is now made out of line, and ptep_set_access_flags()
which joins the former in pgtable.c
The base idea for Embedded with per-page exec support, is that we now do the
flush at set_pte_at() time when coming from an exec fault, which allows us
to avoid the double fault problem completely (we can even improve the situation
more by implementing TLB preload in update_mmu_cache() but that's for later).
If for some reason we didn't do it there and we try to execute, we'll hit
the page fault, which will do a minor fault, which will hit ptep_set_access_flags()
to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make
this guys also perform the I/D cache sync for exec faults now. This second path
is the catch all for things that weren't cleaned at set_pte_at() time.
For cpus without per-pag exec support, we always do the sync at set_pte_at(),
thus guaranteeing that when the PTE is visible to other processors, the cache
is clean.
For the 64-bit hash with per-page exec support case, we keep the old mechanism
for now. I'll look into changing it later, once I've reworked a bit how we
use _PAGE_EXEC.
This is also a first step for adding _PAGE_EXEC support for embedded platforms
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-02-10 23:02:37 +07:00
|
|
|
|
2007-06-13 11:52:56 +07:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
|
2007-04-30 13:30:56 +07:00
|
|
|
#if defined(CONFIG_PPC64)
|
|
|
|
# include <asm/pgtable-ppc64.h>
|
2005-11-19 16:17:32 +07:00
|
|
|
#else
|
2007-04-30 13:30:56 +07:00
|
|
|
# include <asm/pgtable-ppc32.h>
|
2005-08-05 16:39:06 +07:00
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-04-28 16:37:27 +07:00
|
|
|
/*
|
|
|
|
* We save the slot number & secondary bit in the second half of the
|
|
|
|
* PTE page. We use the 8 bytes per each pte entry.
|
|
|
|
*/
|
|
|
|
#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#ifndef __ASSEMBLY__
|
2008-12-19 02:13:51 +07:00
|
|
|
|
2012-09-10 09:52:57 +07:00
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
|
2009-03-20 02:34:09 +07:00
|
|
|
/* Generic accessors to PTE bits */
|
|
|
|
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
|
|
|
|
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
|
|
|
|
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
|
|
|
|
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
|
|
|
|
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
|
|
|
|
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
|
|
|
|
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
|
|
|
|
|
2013-11-18 16:28:13 +07:00
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
|
|
|
|
|
|
static inline int pte_present(pte_t pte)
|
|
|
|
{
|
|
|
|
return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA);
|
|
|
|
}
|
|
|
|
|
2014-06-05 06:06:30 +07:00
|
|
|
#define pte_present_nonuma pte_present_nonuma
|
|
|
|
static inline int pte_present_nonuma(pte_t pte)
|
|
|
|
{
|
|
|
|
return pte_val(pte) & (_PAGE_PRESENT);
|
|
|
|
}
|
|
|
|
|
2013-11-18 16:28:13 +07:00
|
|
|
#define pte_numa pte_numa
|
|
|
|
static inline int pte_numa(pte_t pte)
|
|
|
|
{
|
|
|
|
return (pte_val(pte) &
|
|
|
|
(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define pte_mknonnuma pte_mknonnuma
|
|
|
|
static inline pte_t pte_mknonnuma(pte_t pte)
|
|
|
|
{
|
|
|
|
pte_val(pte) &= ~_PAGE_NUMA;
|
|
|
|
pte_val(pte) |= _PAGE_PRESENT | _PAGE_ACCESSED;
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define pte_mknuma pte_mknuma
|
|
|
|
static inline pte_t pte_mknuma(pte_t pte)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We should not set _PAGE_NUMA on non present ptes. Also clear the
|
|
|
|
* present bit so that hash_page will return 1 and we collect this
|
|
|
|
* as numa fault.
|
|
|
|
*/
|
|
|
|
if (pte_present(pte)) {
|
|
|
|
pte_val(pte) |= _PAGE_NUMA;
|
|
|
|
pte_val(pte) &= ~_PAGE_PRESENT;
|
|
|
|
} else
|
|
|
|
VM_BUG_ON(1);
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2014-02-12 10:43:38 +07:00
|
|
|
#define ptep_set_numa ptep_set_numa
|
|
|
|
static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep)
|
|
|
|
{
|
|
|
|
if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
|
|
|
|
VM_BUG_ON(1);
|
|
|
|
|
|
|
|
pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-11-18 16:28:13 +07:00
|
|
|
#define pmd_numa pmd_numa
|
|
|
|
static inline int pmd_numa(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return pte_numa(pmd_pte(pmd));
|
|
|
|
}
|
|
|
|
|
2014-02-12 10:43:38 +07:00
|
|
|
#define pmdp_set_numa pmdp_set_numa
|
|
|
|
static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pmd_t *pmdp)
|
|
|
|
{
|
|
|
|
if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
|
|
|
|
VM_BUG_ON(1);
|
|
|
|
|
|
|
|
pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-11-18 16:28:13 +07:00
|
|
|
#define pmd_mknonnuma pmd_mknonnuma
|
|
|
|
static inline pmd_t pmd_mknonnuma(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return pte_pmd(pte_mknonnuma(pmd_pte(pmd)));
|
|
|
|
}
|
|
|
|
|
|
|
|
#define pmd_mknuma pmd_mknuma
|
|
|
|
static inline pmd_t pmd_mknuma(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return pte_pmd(pte_mknuma(pmd_pte(pmd)));
|
|
|
|
}
|
|
|
|
|
|
|
|
# else
|
|
|
|
|
|
|
|
static inline int pte_present(pte_t pte)
|
|
|
|
{
|
|
|
|
return pte_val(pte) & _PAGE_PRESENT;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
|
2009-03-20 02:34:09 +07:00
|
|
|
/* Conversion functions: convert a page and protection to a page entry,
|
|
|
|
* and a page entry and page directory to the page they refer to.
|
|
|
|
*
|
|
|
|
* Even if PTEs can be unsigned long long, a PFN is always an unsigned
|
|
|
|
* long for now.
|
|
|
|
*/
|
|
|
|
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
|
|
|
|
return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
|
|
|
|
pgprot_val(pgprot)); }
|
|
|
|
static inline unsigned long pte_pfn(pte_t pte) {
|
|
|
|
return pte_val(pte) >> PTE_RPN_SHIFT; }
|
|
|
|
|
|
|
|
/* Keep these as a macros to avoid include dependency mess */
|
|
|
|
#define pte_page(x) pfn_to_page(pte_pfn(x))
|
|
|
|
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
|
|
|
|
|
|
/* Generic modifiers for PTE bits */
|
|
|
|
static inline pte_t pte_wrprotect(pte_t pte) {
|
|
|
|
pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
|
|
|
|
static inline pte_t pte_mkclean(pte_t pte) {
|
|
|
|
pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
|
|
|
|
static inline pte_t pte_mkold(pte_t pte) {
|
|
|
|
pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
|
|
|
|
static inline pte_t pte_mkwrite(pte_t pte) {
|
|
|
|
pte_val(pte) |= _PAGE_RW; return pte; }
|
|
|
|
static inline pte_t pte_mkdirty(pte_t pte) {
|
|
|
|
pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
|
|
|
static inline pte_t pte_mkyoung(pte_t pte) {
|
|
|
|
pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
|
|
|
static inline pte_t pte_mkspecial(pte_t pte) {
|
|
|
|
pte_val(pte) |= _PAGE_SPECIAL; return pte; }
|
|
|
|
static inline pte_t pte_mkhuge(pte_t pte) {
|
|
|
|
return pte; }
|
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
|
|
{
|
|
|
|
pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC.
The "old" way was split in 3 different parts depending on the processor type:
- Hash with per-page exec support (64-bit and >= POWER4 only) does it
at hashing time, by preventing exec on unclean pages and cleaning pages
on exec faults.
- Everything without per-page exec support (32-bit hash, 8xx, and
64-bit < POWER4) does it for all page going to user space in update_mmu_cache().
- Embedded with per-page exec support does it from do_page_fault() on
exec faults, in a way similar to what the hash code does.
That leads to confusion, and bugs. For example, the method using update_mmu_cache()
is racy on SMP where another processor can see the new PTE and hash it in before
we have cleaned the cache, and then blow trying to execute. This is hard to hit but
I think it has bitten us in the past.
Also, it's inefficient for embedded where we always end up having to do at least
one more page fault.
This reworks the whole thing by moving the cache sync into two main call sites,
though we keep different behaviours depending on the HW capability. The call
sites are set_pte_at() which is now made out of line, and ptep_set_access_flags()
which joins the former in pgtable.c
The base idea for Embedded with per-page exec support, is that we now do the
flush at set_pte_at() time when coming from an exec fault, which allows us
to avoid the double fault problem completely (we can even improve the situation
more by implementing TLB preload in update_mmu_cache() but that's for later).
If for some reason we didn't do it there and we try to execute, we'll hit
the page fault, which will do a minor fault, which will hit ptep_set_access_flags()
to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make
this guys also perform the I/D cache sync for exec faults now. This second path
is the catch all for things that weren't cleaned at set_pte_at() time.
For cpus without per-pag exec support, we always do the sync at set_pte_at(),
thus guaranteeing that when the PTE is visible to other processors, the cache
is clean.
For the 64-bit hash with per-page exec support case, we keep the old mechanism
for now. I'll look into changing it later, once I've reworked a bit how we
use _PAGE_EXEC.
This is also a first step for adding _PAGE_EXEC support for embedded platforms
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-02-10 23:02:37 +07:00
|
|
|
/* Insert a PTE, top-level function is out of line. It uses an inline
|
|
|
|
* low level function in the respective pgtable-* files
|
|
|
|
*/
|
|
|
|
extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
|
|
|
pte_t pte);
|
|
|
|
|
|
|
|
/* This low level function performs the actual PTE insertion
|
|
|
|
* Setting the PTE depends on the MMU type and other factors. It's
|
|
|
|
* an horrible mess that I'm not going to try to clean up now but
|
|
|
|
* I'm keeping it in one place rather than spread around
|
|
|
|
*/
|
|
|
|
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep, pte_t pte, int percpu)
|
|
|
|
{
|
|
|
|
#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
|
|
|
|
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
|
|
|
|
* helper pte_update() which does an atomic update. We need to do that
|
|
|
|
* because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
|
|
|
|
* per-CPU PTE such as a kmap_atomic, we do a simple update preserving
|
|
|
|
* the hash bits instead (ie, same as the non-SMP case)
|
|
|
|
*/
|
|
|
|
if (percpu)
|
|
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
|
|
else
|
|
|
|
pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
|
|
|
|
|
2009-08-17 11:36:32 +07:00
|
|
|
#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
|
|
|
|
/* Second case is 32-bit with 64-bit PTE. In this case, we
|
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC.
The "old" way was split in 3 different parts depending on the processor type:
- Hash with per-page exec support (64-bit and >= POWER4 only) does it
at hashing time, by preventing exec on unclean pages and cleaning pages
on exec faults.
- Everything without per-page exec support (32-bit hash, 8xx, and
64-bit < POWER4) does it for all page going to user space in update_mmu_cache().
- Embedded with per-page exec support does it from do_page_fault() on
exec faults, in a way similar to what the hash code does.
That leads to confusion, and bugs. For example, the method using update_mmu_cache()
is racy on SMP where another processor can see the new PTE and hash it in before
we have cleaned the cache, and then blow trying to execute. This is hard to hit but
I think it has bitten us in the past.
Also, it's inefficient for embedded where we always end up having to do at least
one more page fault.
This reworks the whole thing by moving the cache sync into two main call sites,
though we keep different behaviours depending on the HW capability. The call
sites are set_pte_at() which is now made out of line, and ptep_set_access_flags()
which joins the former in pgtable.c
The base idea for Embedded with per-page exec support, is that we now do the
flush at set_pte_at() time when coming from an exec fault, which allows us
to avoid the double fault problem completely (we can even improve the situation
more by implementing TLB preload in update_mmu_cache() but that's for later).
If for some reason we didn't do it there and we try to execute, we'll hit
the page fault, which will do a minor fault, which will hit ptep_set_access_flags()
to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make
this guys also perform the I/D cache sync for exec faults now. This second path
is the catch all for things that weren't cleaned at set_pte_at() time.
For cpus without per-pag exec support, we always do the sync at set_pte_at(),
thus guaranteeing that when the PTE is visible to other processors, the cache
is clean.
For the 64-bit hash with per-page exec support case, we keep the old mechanism
for now. I'll look into changing it later, once I've reworked a bit how we
use _PAGE_EXEC.
This is also a first step for adding _PAGE_EXEC support for embedded platforms
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-02-10 23:02:37 +07:00
|
|
|
* can just store as long as we do the two halves in the right order
|
|
|
|
* with a barrier in between. This is possible because we take care,
|
|
|
|
* in the hash code, to pre-invalidate if the PTE was already hashed,
|
|
|
|
* which synchronizes us with any concurrent invalidation.
|
|
|
|
* In the percpu case, we also fallback to the simple update preserving
|
|
|
|
* the hash bits
|
|
|
|
*/
|
|
|
|
if (percpu) {
|
|
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#if _PAGE_HASHPTE != 0
|
|
|
|
if (pte_val(*ptep) & _PAGE_HASHPTE)
|
|
|
|
flush_hash_entry(mm, ptep, addr);
|
|
|
|
#endif
|
|
|
|
__asm__ __volatile__("\
|
|
|
|
stw%U0%X0 %2,%0\n\
|
|
|
|
eieio\n\
|
|
|
|
stw%U0%X0 %L2,%1"
|
|
|
|
: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
|
|
|
|
: "r" (pte) : "memory");
|
|
|
|
|
|
|
|
#elif defined(CONFIG_PPC_STD_MMU_32)
|
|
|
|
/* Third case is 32-bit hash table in UP mode, we need to preserve
|
|
|
|
* the _PAGE_HASHPTE bit since we may not have invalidated the previous
|
|
|
|
* translation in the hash yet (done in a subsequent flush_tlb_xxx())
|
|
|
|
* and see we need to keep track that this PTE needs invalidating
|
|
|
|
*/
|
|
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
|
|
|
|
|
|
#else
|
|
|
|
/* Anything else just stores the PTE normally. That covers all 64-bit
|
2009-08-17 11:36:32 +07:00
|
|
|
* cases, and 32-bit non-hash with 32-bit PTEs.
|
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC.
The "old" way was split in 3 different parts depending on the processor type:
- Hash with per-page exec support (64-bit and >= POWER4 only) does it
at hashing time, by preventing exec on unclean pages and cleaning pages
on exec faults.
- Everything without per-page exec support (32-bit hash, 8xx, and
64-bit < POWER4) does it for all page going to user space in update_mmu_cache().
- Embedded with per-page exec support does it from do_page_fault() on
exec faults, in a way similar to what the hash code does.
That leads to confusion, and bugs. For example, the method using update_mmu_cache()
is racy on SMP where another processor can see the new PTE and hash it in before
we have cleaned the cache, and then blow trying to execute. This is hard to hit but
I think it has bitten us in the past.
Also, it's inefficient for embedded where we always end up having to do at least
one more page fault.
This reworks the whole thing by moving the cache sync into two main call sites,
though we keep different behaviours depending on the HW capability. The call
sites are set_pte_at() which is now made out of line, and ptep_set_access_flags()
which joins the former in pgtable.c
The base idea for Embedded with per-page exec support, is that we now do the
flush at set_pte_at() time when coming from an exec fault, which allows us
to avoid the double fault problem completely (we can even improve the situation
more by implementing TLB preload in update_mmu_cache() but that's for later).
If for some reason we didn't do it there and we try to execute, we'll hit
the page fault, which will do a minor fault, which will hit ptep_set_access_flags()
to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make
this guys also perform the I/D cache sync for exec faults now. This second path
is the catch all for things that weren't cleaned at set_pte_at() time.
For cpus without per-pag exec support, we always do the sync at set_pte_at(),
thus guaranteeing that when the PTE is visible to other processors, the cache
is clean.
For the 64-bit hash with per-page exec support case, we keep the old mechanism
for now. I'll look into changing it later, once I've reworked a bit how we
use _PAGE_EXEC.
This is also a first step for adding _PAGE_EXEC support for embedded platforms
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-02-10 23:02:37 +07:00
|
|
|
*/
|
|
|
|
*ptep = pte;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
|
|
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
|
|
|
pte_t *ptep, pte_t entry, int dirty);
|
|
|
|
|
2008-12-19 02:13:51 +07:00
|
|
|
/*
|
|
|
|
* Macro to mark a page protection value as "uncacheable".
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
|
|
|
|
_PAGE_WRITETHRU)
|
|
|
|
|
|
|
|
#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
|
|
_PAGE_NO_CACHE | _PAGE_GUARDED))
|
|
|
|
|
|
|
|
#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
|
|
_PAGE_NO_CACHE))
|
|
|
|
|
|
|
|
#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
|
|
_PAGE_COHERENT))
|
|
|
|
|
|
|
|
#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
|
|
_PAGE_COHERENT | _PAGE_WRITETHRU))
|
|
|
|
|
2011-10-27 09:58:45 +07:00
|
|
|
#define pgprot_cached_noncoherent(prot) \
|
|
|
|
(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
|
|
|
|
|
2011-03-01 03:00:47 +07:00
|
|
|
#define pgprot_writecombine pgprot_noncached_wc
|
2008-12-19 02:13:51 +07:00
|
|
|
|
|
|
|
struct file;
|
|
|
|
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
|
|
unsigned long size, pgprot_t vma_prot);
|
|
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
|
|
|
2007-06-13 11:52:56 +07:00
|
|
|
/*
|
|
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
|
|
* for zero-mapped memory areas etc..
|
|
|
|
*/
|
|
|
|
extern unsigned long empty_zero_page[];
|
|
|
|
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
|
|
|
|
|
|
extern pgd_t swapper_pg_dir[];
|
|
|
|
|
|
|
|
extern void paging_init(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* kern_addr_valid is intended to indicate whether an address is a valid
|
|
|
|
* kernel address. Most 32-bit archs define it as always true (like this)
|
|
|
|
* but most 64-bit archs actually perform a test. What should we do here?
|
|
|
|
*/
|
|
|
|
#define kern_addr_valid(addr) (1)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm-generic/pgtable.h>
|
2008-07-25 13:21:11 +07:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This gets called at the end of handling a page fault, when
|
|
|
|
* the kernel has put a new PTE into the page table for the process.
|
|
|
|
* We use it to ensure coherency between the i-cache and d-cache
|
|
|
|
* for the page which has just been mapped in.
|
|
|
|
* On machines which use an MMU hash table, we use this to put a
|
|
|
|
* corresponding HPTE into the hash table ahead of time, instead of
|
|
|
|
* waiting for the inevitable extra hash-table miss exception.
|
|
|
|
*/
|
MM: Pass a PTE pointer to update_mmu_cache() rather than the PTE itself
On VIVT ARM, when we have multiple shared mappings of the same file
in the same MM, we need to ensure that we have coherency across all
copies. We do this via make_coherent() by making the pages
uncacheable.
This used to work fine, until we allowed highmem with highpte - we
now have a page table which is mapped as required, and is not available
for modification via update_mmu_cache().
Ralf Beache suggested getting rid of the PTE value passed to
update_mmu_cache():
On MIPS update_mmu_cache() calls __update_tlb() which walks pagetables
to construct a pointer to the pte again. Passing a pte_t * is much
more elegant. Maybe we might even replace the pte argument with the
pte_t?
Ben Herrenschmidt would also like the pte pointer for PowerPC:
Passing the ptep in there is exactly what I want. I want that
-instead- of the PTE value, because I have issue on some ppc cases,
for I$/D$ coherency, where set_pte_at() may decide to mask out the
_PAGE_EXEC.
So, pass in the mapped page table pointer into update_mmu_cache(), and
remove the PTE value, updating all implementations and call sites to
suit.
Includes a fix from Stephen Rothwell:
sparc: fix fallout from update_mmu_cache API change
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2009-12-18 23:40:18 +07:00
|
|
|
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
|
2008-07-25 13:21:11 +07:00
|
|
|
|
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different
pagetable layout: that is, the bottem level table of pointers to
hugepages is a different size, and may branch off from the normal page
tables at a different level. Every hugepage aware path that needs to
walk the pagetables must therefore look up the hugepage size from the
slice info first, and work out the correct way to walk the pagetables
accordingly. Future hardware is likely to add more possible hugepage
sizes, more layout options and more mess.
This patch, therefore reworks the handling of hugepage pagetables to
reduce this complexity. In the new scheme, instead of having to
consult the slice mask, pagetable walking code can check a flag in the
PGD/PUD/PMD entries to see where to branch off to hugepage pagetables,
and the entry also contains the information (eseentially hugepage
shift) necessary to then interpret that table without recourse to the
slice mask. This scheme can be extended neatly to handle multiple
levels of self-describing "special" hugepage pagetables, although for
now we assume only one level exists.
This approach means that only the pagetable allocation path needs to
know how the pagetables should be set out. All other (hugepage)
pagetable walking paths can just interpret the structure as they go.
There already was a flag bit in PGD/PUD/PMD entries for hugepage
directory pointers, but it was only used for debug. We alter that
flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable
pointer (normally it would be 1 since the pointer lies in the linear
mapping). This means that asm pagetable walking can test for (and
punt on) hugepage pointers with the same test that checks for
unpopulated page directory entries (beq becomes bge), since hugepage
pointers will always be positive, and normal pointers always negative.
While we're at it, we get rid of the confusing (and grep defeating)
#defining of hugepte_shift to be the same thing as mmu_huge_psizes.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-10-27 02:24:31 +07:00
|
|
|
extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
|
|
|
|
unsigned long end, int write, struct page **pages, int *nr);
|
|
|
|
|
2013-04-28 16:37:30 +07:00
|
|
|
extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
|
|
|
unsigned long end, int write, struct page **pages, int *nr);
|
2013-06-20 16:00:15 +07:00
|
|
|
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define pmd_large(pmd) 0
|
|
|
|
#define has_transparent_hugepage() 0
|
|
|
|
#endif
|
2013-06-20 16:00:16 +07:00
|
|
|
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
|
|
|
|
unsigned *shift);
|
2013-11-15 12:31:15 +07:00
|
|
|
|
|
|
|
static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
|
|
|
|
unsigned long *pte_sizep)
|
|
|
|
{
|
|
|
|
pte_t *ptep;
|
|
|
|
unsigned long ps = *pte_sizep;
|
|
|
|
unsigned int shift;
|
|
|
|
|
|
|
|
ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
|
|
|
|
if (!ptep)
|
|
|
|
return NULL;
|
|
|
|
if (shift)
|
|
|
|
*pte_sizep = 1ul << shift;
|
|
|
|
else
|
|
|
|
*pte_sizep = PAGE_SIZE;
|
|
|
|
|
|
|
|
if (ps > *pte_sizep)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return ptep;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
2005-12-17 04:43:46 +07:00
|
|
|
#endif /* __KERNEL__ */
|
2005-11-19 16:17:32 +07:00
|
|
|
#endif /* _ASM_POWERPC_PGTABLE_H */
|