2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
|
|
* for more details.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2003 Ralf Baechle
|
|
|
|
*/
|
|
|
|
#ifndef _ASM_PGTABLE_H
|
|
|
|
#define _ASM_PGTABLE_H
|
|
|
|
|
2013-04-08 21:06:35 +07:00
|
|
|
#include <linux/mm_types.h>
|
2012-10-18 18:54:15 +07:00
|
|
|
#include <linux/mmzone.h>
|
2005-09-04 05:56:16 +07:00
|
|
|
#ifdef CONFIG_32BIT
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/pgtable-32.h>
|
|
|
|
#endif
|
2005-09-04 05:56:16 +07:00
|
|
|
#ifdef CONFIG_64BIT
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/pgtable-64.h>
|
|
|
|
#endif
|
|
|
|
|
2005-07-14 07:17:05 +07:00
|
|
|
#include <asm/io.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/pgtable-bits.h>
|
|
|
|
|
2005-11-07 15:59:43 +07:00
|
|
|
struct mm_struct;
|
|
|
|
struct vm_area_struct;
|
|
|
|
|
2016-04-19 15:25:03 +07:00
|
|
|
#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
|
MIPS: Fix possible corruption of cache mode by mprotect.
The following testcase may result in a page table entries with a invalid
CCA field being generated:
static void *bindstack;
static int sysrqfd;
static void protect_low(int protect)
{
mprotect(bindstack, BINDSTACK_SIZE, protect);
}
static void sigbus_handler(int signal, siginfo_t * info, void *context)
{
void *addr = info->si_addr;
write(sysrqfd, "x", 1);
printf("sigbus, fault address %p (should not happen, but might)\n",
addr);
abort();
}
static void run_bind_test(void)
{
unsigned int *p = bindstack;
p[0] = 0xf001f001;
write(sysrqfd, "x", 1);
/* Set trap on access to p[0] */
protect_low(PROT_NONE);
write(sysrqfd, "x", 1);
/* Clear trap on access to p[0] */
protect_low(PROT_READ | PROT_WRITE | PROT_EXEC);
write(sysrqfd, "x", 1);
/* Check the contents of p[0] */
if (p[0] != 0xf001f001) {
write(sysrqfd, "x", 1);
/* Reached, but shouldn't be */
printf("badness, shouldn't happen but does\n");
abort();
}
}
int main(void)
{
struct sigaction sa;
sysrqfd = open("/proc/sysrq-trigger", O_WRONLY);
if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) {
perror("sigprocmask");
return 0;
}
sa.sa_sigaction = sigbus_handler;
sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART;
if (sigaction(SIGBUS, &sa, NULL)) {
perror("sigaction");
return 0;
}
bindstack = mmap(NULL,
BINDSTACK_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (bindstack == MAP_FAILED) {
perror("mmap bindstack");
return 0;
}
printf("bindstack: %p\n", bindstack);
run_bind_test();
printf("done\n");
return 0;
}
There are multiple ingredients for this:
1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3
on all platforms except SB1 where it's CCA 5.
2) _page_cachable_default must have bits set which are not set
_CACHE_CACHABLE_NONCOHERENT.
3) Either the defective version of pte_modify for XPA or the standard
version must be in used. However pte_modify for the 36 bit address
space support is no affected.
In that case additional bits in the final CCA mode may generate an invalid
value for the CCA field. On the R10000 system where this was tracked
down for example a CCA 7 has been observed, which is Uncached Accelerated.
Fixed by:
1) Using the proper CCA mode for PAGE_NONE just like for all the other
PAGE_* pte/pmd bits.
2) Fix the two affected variants of pte_modify.
Further code inspection also shows the same issue to exist in pmd_modify
which would affect huge page systems.
Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE
and pmd_modify issue found by me.
The history of this goes back beyond Linus' git history. Chris Dearman's
commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of
the cache attribute at run time.") missed the opportunity to fix this
but it was originally introduced in lmo commit
d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.")
and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option
CONFIG_MIPS_UNCACHED.")
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reported-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
2016-07-01 20:01:01 +07:00
|
|
|
_page_cachable_default)
|
2016-04-19 15:25:03 +07:00
|
|
|
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
|
2007-09-19 06:58:24 +07:00
|
|
|
_page_cachable_default)
|
2016-04-19 15:25:03 +07:00
|
|
|
#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
|
2015-02-27 07:16:37 +07:00
|
|
|
_page_cachable_default)
|
2016-04-19 15:25:03 +07:00
|
|
|
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | \
|
2007-09-19 06:58:24 +07:00
|
|
|
_page_cachable_default)
|
2005-04-17 05:20:36 +07:00
|
|
|
#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
|
2007-09-19 06:58:24 +07:00
|
|
|
_PAGE_GLOBAL | _page_cachable_default)
|
2014-03-03 19:08:40 +07:00
|
|
|
#define PAGE_KERNEL_NC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
|
|
|
|
_PAGE_GLOBAL | _CACHE_CACHABLE_NONCOHERENT)
|
2016-04-19 15:25:03 +07:00
|
|
|
#define PAGE_USERIO __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
|
2007-09-19 06:58:24 +07:00
|
|
|
_page_cachable_default)
|
2005-04-17 05:20:36 +07:00
|
|
|
#define PAGE_KERNEL_UNCACHED __pgprot(_PAGE_PRESENT | __READABLE | \
|
|
|
|
__WRITEABLE | _PAGE_GLOBAL | _CACHE_UNCACHED)
|
|
|
|
|
|
|
|
/*
|
2010-02-11 06:12:47 +07:00
|
|
|
* If _PAGE_NO_EXEC is not defined, we can't do page protection for
|
|
|
|
* execute, and consider it to be the same as read. Also, write
|
|
|
|
* permissions imply read permissions. This is the closest we can get
|
|
|
|
* by reasonable means..
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
|
2007-09-19 06:58:24 +07:00
|
|
|
/*
|
|
|
|
* Dummy values to fill the table in mmap.c
|
|
|
|
* The real values will be generated at runtime
|
|
|
|
*/
|
|
|
|
#define __P000 __pgprot(0)
|
|
|
|
#define __P001 __pgprot(0)
|
|
|
|
#define __P010 __pgprot(0)
|
|
|
|
#define __P011 __pgprot(0)
|
|
|
|
#define __P100 __pgprot(0)
|
|
|
|
#define __P101 __pgprot(0)
|
|
|
|
#define __P110 __pgprot(0)
|
|
|
|
#define __P111 __pgprot(0)
|
|
|
|
|
|
|
|
#define __S000 __pgprot(0)
|
|
|
|
#define __S001 __pgprot(0)
|
|
|
|
#define __S010 __pgprot(0)
|
|
|
|
#define __S011 __pgprot(0)
|
|
|
|
#define __S100 __pgprot(0)
|
|
|
|
#define __S101 __pgprot(0)
|
|
|
|
#define __S110 __pgprot(0)
|
|
|
|
#define __S111 __pgprot(0)
|
|
|
|
|
|
|
|
extern unsigned long _page_cachable_default;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ZERO_PAGE is a global shared page that is always zero; used
|
|
|
|
* for zero-mapped memory areas etc..
|
|
|
|
*/
|
|
|
|
|
|
|
|
extern unsigned long empty_zero_page;
|
|
|
|
extern unsigned long zero_page_mask;
|
|
|
|
|
|
|
|
#define ZERO_PAGE(vaddr) \
|
2006-10-19 18:19:59 +07:00
|
|
|
(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
|
2012-12-13 04:52:36 +07:00
|
|
|
#define __HAVE_COLOR_ZERO_PAGE
|
2009-09-22 07:03:34 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
extern void paging_init(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Conversion functions: convert a page and protection to a page entry,
|
|
|
|
* and a page entry and page directory to the page they refer to.
|
|
|
|
*/
|
2007-03-19 23:36:42 +07:00
|
|
|
#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd))
|
2012-10-18 18:54:15 +07:00
|
|
|
|
|
|
|
#define __pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
|
|
|
|
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define pmd_page(pmd) __pmd_page(pmd)
|
|
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
|
2006-09-26 13:31:48 +07:00
|
|
|
#define pmd_page_vaddr(pmd) pmd_val(pmd)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
MIPS: mm: Use the Hardware Page Table Walker if the core supports it
The Hardware Page Table Walker aims to speed up TLB refill exceptions
by handling them in the hardware level instead of having a software
TLB refill handler. However, a TLB refill exception can still be
thrown in certain cases such as, synchronus exceptions, or address
translation or memory errors during the HTW operation. As a result of
which, HTW must not be considered a complete replacement for the TLB
refill software handler, but rather a fast-path for it.
For HTW to work, the PWBase register must contain the task's page
global directory address so the HTW will kick in on TLB refill
exceptions.
Due to HTW being a separate engine embedded deep in the CPU pipeline,
we need to restart the HTW everytime a PTE changes to avoid HTW
fetching a old entry from the page tables. It's also necessary to
restart the HTW on context switches to prevent it from fetching a
page from the previous process. Finally, since HTW is using the
entryhi register to write the translations to the TLB, it's necessary
to stop the HTW whenever the entryhi changes (eg for tlb probe
perations) and enable it back afterwards.
== Performance ==
The following trivial test was used to measure the performance of the
HTW. Using the same root filesystem, the following command was used
to measure the number of tlb refill handler executions with and
without (using 'nohtw' kernel parameter) HTW support. The kernel was
modified to use a scratch register as a counter for the TLB refill
exceptions.
find /usr -type f -exec ls -lh {} \;
HTW Enabled:
TLB refill exceptions: 12306
HTW Disabled:
TLB refill exceptions: 17805
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/7336/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-07-14 18:47:09 +07:00
|
|
|
#define htw_stop() \
|
|
|
|
do { \
|
2015-01-26 20:04:33 +07:00
|
|
|
unsigned long flags; \
|
|
|
|
\
|
2015-01-26 16:40:34 +07:00
|
|
|
if (cpu_has_htw) { \
|
2015-01-26 20:04:33 +07:00
|
|
|
local_irq_save(flags); \
|
|
|
|
if(!raw_current_cpu_data.htw_seq++) { \
|
|
|
|
write_c0_pwctl(read_c0_pwctl() & \
|
|
|
|
~(1 << MIPS_PWCTL_PWEN_SHIFT)); \
|
|
|
|
back_to_back_c0_hazard(); \
|
|
|
|
} \
|
|
|
|
local_irq_restore(flags); \
|
2015-01-26 16:40:34 +07:00
|
|
|
} \
|
MIPS: mm: Use the Hardware Page Table Walker if the core supports it
The Hardware Page Table Walker aims to speed up TLB refill exceptions
by handling them in the hardware level instead of having a software
TLB refill handler. However, a TLB refill exception can still be
thrown in certain cases such as, synchronus exceptions, or address
translation or memory errors during the HTW operation. As a result of
which, HTW must not be considered a complete replacement for the TLB
refill software handler, but rather a fast-path for it.
For HTW to work, the PWBase register must contain the task's page
global directory address so the HTW will kick in on TLB refill
exceptions.
Due to HTW being a separate engine embedded deep in the CPU pipeline,
we need to restart the HTW everytime a PTE changes to avoid HTW
fetching a old entry from the page tables. It's also necessary to
restart the HTW on context switches to prevent it from fetching a
page from the previous process. Finally, since HTW is using the
entryhi register to write the translations to the TLB, it's necessary
to stop the HTW whenever the entryhi changes (eg for tlb probe
perations) and enable it back afterwards.
== Performance ==
The following trivial test was used to measure the performance of the
HTW. Using the same root filesystem, the following command was used
to measure the number of tlb refill handler executions with and
without (using 'nohtw' kernel parameter) HTW support. The kernel was
modified to use a scratch register as a counter for the TLB refill
exceptions.
find /usr -type f -exec ls -lh {} \;
HTW Enabled:
TLB refill exceptions: 12306
HTW Disabled:
TLB refill exceptions: 17805
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/7336/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-07-14 18:47:09 +07:00
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define htw_start() \
|
|
|
|
do { \
|
2015-01-26 20:04:33 +07:00
|
|
|
unsigned long flags; \
|
|
|
|
\
|
MIPS: mm: Use the Hardware Page Table Walker if the core supports it
The Hardware Page Table Walker aims to speed up TLB refill exceptions
by handling them in the hardware level instead of having a software
TLB refill handler. However, a TLB refill exception can still be
thrown in certain cases such as, synchronus exceptions, or address
translation or memory errors during the HTW operation. As a result of
which, HTW must not be considered a complete replacement for the TLB
refill software handler, but rather a fast-path for it.
For HTW to work, the PWBase register must contain the task's page
global directory address so the HTW will kick in on TLB refill
exceptions.
Due to HTW being a separate engine embedded deep in the CPU pipeline,
we need to restart the HTW everytime a PTE changes to avoid HTW
fetching a old entry from the page tables. It's also necessary to
restart the HTW on context switches to prevent it from fetching a
page from the previous process. Finally, since HTW is using the
entryhi register to write the translations to the TLB, it's necessary
to stop the HTW whenever the entryhi changes (eg for tlb probe
perations) and enable it back afterwards.
== Performance ==
The following trivial test was used to measure the performance of the
HTW. Using the same root filesystem, the following command was used
to measure the number of tlb refill handler executions with and
without (using 'nohtw' kernel parameter) HTW support. The kernel was
modified to use a scratch register as a counter for the TLB refill
exceptions.
find /usr -type f -exec ls -lh {} \;
HTW Enabled:
TLB refill exceptions: 12306
HTW Disabled:
TLB refill exceptions: 17805
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/7336/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-07-14 18:47:09 +07:00
|
|
|
if (cpu_has_htw) { \
|
2015-01-26 20:04:33 +07:00
|
|
|
local_irq_save(flags); \
|
|
|
|
if (!--raw_current_cpu_data.htw_seq) { \
|
|
|
|
write_c0_pwctl(read_c0_pwctl() | \
|
|
|
|
(1 << MIPS_PWCTL_PWEN_SHIFT)); \
|
|
|
|
back_to_back_c0_hazard(); \
|
|
|
|
} \
|
|
|
|
local_irq_restore(flags); \
|
MIPS: mm: Use the Hardware Page Table Walker if the core supports it
The Hardware Page Table Walker aims to speed up TLB refill exceptions
by handling them in the hardware level instead of having a software
TLB refill handler. However, a TLB refill exception can still be
thrown in certain cases such as, synchronus exceptions, or address
translation or memory errors during the HTW operation. As a result of
which, HTW must not be considered a complete replacement for the TLB
refill software handler, but rather a fast-path for it.
For HTW to work, the PWBase register must contain the task's page
global directory address so the HTW will kick in on TLB refill
exceptions.
Due to HTW being a separate engine embedded deep in the CPU pipeline,
we need to restart the HTW everytime a PTE changes to avoid HTW
fetching a old entry from the page tables. It's also necessary to
restart the HTW on context switches to prevent it from fetching a
page from the previous process. Finally, since HTW is using the
entryhi register to write the translations to the TLB, it's necessary
to stop the HTW whenever the entryhi changes (eg for tlb probe
perations) and enable it back afterwards.
== Performance ==
The following trivial test was used to measure the performance of the
HTW. Using the same root filesystem, the following command was used
to measure the number of tlb refill handler executions with and
without (using 'nohtw' kernel parameter) HTW support. The kernel was
modified to use a scratch register as a counter for the TLB refill
exceptions.
find /usr -type f -exec ls -lh {} \;
HTW Enabled:
TLB refill exceptions: 12306
HTW Disabled:
TLB refill exceptions: 17805
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/7336/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2014-07-14 18:47:09 +07:00
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
MIPS: Sync icache & dcache in set_pte_at
It's possible for pages to become visible prior to update_mmu_cache
running if a thread within the same address space preempts the current
thread or runs simultaneously on another CPU. That is, the following
scenario is possible:
CPU0 CPU1
write to page
flush_dcache_page
flush_icache_page
set_pte_at
map page
update_mmu_cache
If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid
& visible, and update_mmu_cache where the dcache flush occurs then CPU1s
icache will fill from stale data (unless it fills from the dcache, in
which case all is good, but most MIPS CPUs don't have this property).
Commit 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
attempted to fix that by performing the dcache flush in
flush_icache_page such that it occurs before the set_pte_at call makes
the page visible. However it has the problem that not all code that
writes to pages exposed to userland call flush_icache_page. There are
many callers of set_pte_at under mm/ and only 2 of them do call
flush_icache_page. Thus the race window between a page becoming visible
& being coherent between the icache & dcache remains open in some cases.
To illustrate some of the cases, a WARN was added to __update_cache with
this patch applied that triggered in cases where a page about to be
flushed from the dcache was not the last page provided to
flush_icache_page. That is, backtraces were obtained for cases in which
the race window is left open without this patch. The 2 standout examples
follow.
When forking a process:
[ 15.271842] [<80417630>] __update_cache+0xcc/0x188
[ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac
[ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac
[ 15.289028] [<80429f80>] do_fork+0xe4/0x420
[ 15.293747] [<80413808>] handle_sys+0x128/0x14c
When exec'ing an ELF binary:
[ 14.445964] [<80417630>] __update_cache+0xcc/0x188
[ 14.451369] [<80538d88>] move_page_tables+0x414/0x498
[ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318
[ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0
[ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214
[ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c
[ 14.480324] [<8055f938>] do_execve+0x38/0x44
[ 14.485137] [<80413808>] handle_sys+0x128/0x14c
These code paths write into a page, call flush_dcache_page then call
set_pte_at without flush_icache_page inbetween. The end result is that
the icache can become corrupted & userland processes may execute
unexpected or invalid code, typically resulting in a reserved
instruction exception, a trap or a segfault.
Fix this race condition fully by performing any cache maintenance
required to keep the icache & dcache in sync in set_pte_at, before the
page is made valid. This has the added bonus of ensuring the cache
maintenance always happens in one location, rather than being duplicated
in flush_icache_page & update_mmu_cache. It also matches the way other
architectures solve the same problem (see arm, ia64 & powerpc).
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com>
Cc: Lars Persson <lars.persson@axis.com>
Fixes: 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
Cc: Steven J. Hill <sjhill@realitydiluted.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable <stable@vger.kernel.org> # v4.1+
Patchwork: https://patchwork.linux-mips.org/patch/12722/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-03-01 09:37:59 +07:00
|
|
|
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep, pte_t pteval);
|
|
|
|
|
2014-11-22 06:16:48 +07:00
|
|
|
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
|
2016-04-19 15:25:05 +07:00
|
|
|
#ifdef CONFIG_XPA
|
|
|
|
# define pte_none(pte) (!(((pte).pte_high) & ~_PAGE_GLOBAL))
|
|
|
|
#else
|
|
|
|
# define pte_none(pte) (!(((pte).pte_low | (pte).pte_high) & ~_PAGE_GLOBAL))
|
|
|
|
#endif
|
|
|
|
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
#define pte_present(pte) ((pte).pte_low & _PAGE_PRESENT)
|
MIPS: Sync icache & dcache in set_pte_at
It's possible for pages to become visible prior to update_mmu_cache
running if a thread within the same address space preempts the current
thread or runs simultaneously on another CPU. That is, the following
scenario is possible:
CPU0 CPU1
write to page
flush_dcache_page
flush_icache_page
set_pte_at
map page
update_mmu_cache
If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid
& visible, and update_mmu_cache where the dcache flush occurs then CPU1s
icache will fill from stale data (unless it fills from the dcache, in
which case all is good, but most MIPS CPUs don't have this property).
Commit 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
attempted to fix that by performing the dcache flush in
flush_icache_page such that it occurs before the set_pte_at call makes
the page visible. However it has the problem that not all code that
writes to pages exposed to userland call flush_icache_page. There are
many callers of set_pte_at under mm/ and only 2 of them do call
flush_icache_page. Thus the race window between a page becoming visible
& being coherent between the icache & dcache remains open in some cases.
To illustrate some of the cases, a WARN was added to __update_cache with
this patch applied that triggered in cases where a page about to be
flushed from the dcache was not the last page provided to
flush_icache_page. That is, backtraces were obtained for cases in which
the race window is left open without this patch. The 2 standout examples
follow.
When forking a process:
[ 15.271842] [<80417630>] __update_cache+0xcc/0x188
[ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac
[ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac
[ 15.289028] [<80429f80>] do_fork+0xe4/0x420
[ 15.293747] [<80413808>] handle_sys+0x128/0x14c
When exec'ing an ELF binary:
[ 14.445964] [<80417630>] __update_cache+0xcc/0x188
[ 14.451369] [<80538d88>] move_page_tables+0x414/0x498
[ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318
[ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0
[ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214
[ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c
[ 14.480324] [<8055f938>] do_execve+0x38/0x44
[ 14.485137] [<80413808>] handle_sys+0x128/0x14c
These code paths write into a page, call flush_dcache_page then call
set_pte_at without flush_icache_page inbetween. The end result is that
the icache can become corrupted & userland processes may execute
unexpected or invalid code, typically resulting in a reserved
instruction exception, a trap or a segfault.
Fix this race condition fully by performing any cache maintenance
required to keep the icache & dcache in sync in set_pte_at, before the
page is made valid. This has the added bonus of ensuring the cache
maintenance always happens in one location, rather than being duplicated
in flush_icache_page & update_mmu_cache. It also matches the way other
architectures solve the same problem (see arm, ia64 & powerpc).
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com>
Cc: Lars Persson <lars.persson@axis.com>
Fixes: 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
Cc: Steven J. Hill <sjhill@realitydiluted.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable <stable@vger.kernel.org> # v4.1+
Patchwork: https://patchwork.linux-mips.org/patch/12722/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-03-01 09:37:59 +07:00
|
|
|
#define pte_no_exec(pte) ((pte).pte_low & _PAGE_NO_EXEC)
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void set_pte(pte_t *ptep, pte_t pte)
|
|
|
|
{
|
|
|
|
ptep->pte_high = pte.pte_high;
|
|
|
|
smp_wmb();
|
|
|
|
ptep->pte_low = pte.pte_low;
|
|
|
|
|
2016-04-19 15:25:05 +07:00
|
|
|
#ifdef CONFIG_XPA
|
2015-02-27 07:16:38 +07:00
|
|
|
if (pte.pte_high & _PAGE_GLOBAL) {
|
2016-04-19 15:25:05 +07:00
|
|
|
#else
|
|
|
|
if (pte.pte_low & _PAGE_GLOBAL) {
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
pte_t *buddy = ptep_buddy(ptep);
|
|
|
|
/*
|
|
|
|
* Make sure the buddy is global too (if it's !none,
|
|
|
|
* it better already be global)
|
|
|
|
*/
|
2016-04-19 15:25:05 +07:00
|
|
|
if (pte_none(*buddy)) {
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
buddy->pte_low |= _PAGE_GLOBAL;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
buddy->pte_high |= _PAGE_GLOBAL;
|
2016-04-19 15:25:05 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
|
|
{
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte_t null = __pte(0);
|
|
|
|
|
2015-01-26 16:40:36 +07:00
|
|
|
htw_stop();
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Preserve global status for the pair */
|
2016-08-04 03:45:50 +07:00
|
|
|
if (IS_ENABLED(CONFIG_XPA)) {
|
2016-04-19 15:25:05 +07:00
|
|
|
if (ptep_buddy(ptep)->pte_high & _PAGE_GLOBAL)
|
|
|
|
null.pte_high = _PAGE_GLOBAL;
|
|
|
|
} else {
|
|
|
|
if (ptep_buddy(ptep)->pte_low & _PAGE_GLOBAL)
|
|
|
|
null.pte_low = null.pte_high = _PAGE_GLOBAL;
|
|
|
|
}
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
|
|
|
|
set_pte_at(mm, addr, ptep, null);
|
2015-01-26 16:40:36 +07:00
|
|
|
htw_start();
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
#else
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
|
|
|
|
#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL))
|
|
|
|
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
|
MIPS: Sync icache & dcache in set_pte_at
It's possible for pages to become visible prior to update_mmu_cache
running if a thread within the same address space preempts the current
thread or runs simultaneously on another CPU. That is, the following
scenario is possible:
CPU0 CPU1
write to page
flush_dcache_page
flush_icache_page
set_pte_at
map page
update_mmu_cache
If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid
& visible, and update_mmu_cache where the dcache flush occurs then CPU1s
icache will fill from stale data (unless it fills from the dcache, in
which case all is good, but most MIPS CPUs don't have this property).
Commit 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
attempted to fix that by performing the dcache flush in
flush_icache_page such that it occurs before the set_pte_at call makes
the page visible. However it has the problem that not all code that
writes to pages exposed to userland call flush_icache_page. There are
many callers of set_pte_at under mm/ and only 2 of them do call
flush_icache_page. Thus the race window between a page becoming visible
& being coherent between the icache & dcache remains open in some cases.
To illustrate some of the cases, a WARN was added to __update_cache with
this patch applied that triggered in cases where a page about to be
flushed from the dcache was not the last page provided to
flush_icache_page. That is, backtraces were obtained for cases in which
the race window is left open without this patch. The 2 standout examples
follow.
When forking a process:
[ 15.271842] [<80417630>] __update_cache+0xcc/0x188
[ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac
[ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac
[ 15.289028] [<80429f80>] do_fork+0xe4/0x420
[ 15.293747] [<80413808>] handle_sys+0x128/0x14c
When exec'ing an ELF binary:
[ 14.445964] [<80417630>] __update_cache+0xcc/0x188
[ 14.451369] [<80538d88>] move_page_tables+0x414/0x498
[ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318
[ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0
[ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214
[ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c
[ 14.480324] [<8055f938>] do_execve+0x38/0x44
[ 14.485137] [<80413808>] handle_sys+0x128/0x14c
These code paths write into a page, call flush_dcache_page then call
set_pte_at without flush_icache_page inbetween. The end result is that
the icache can become corrupted & userland processes may execute
unexpected or invalid code, typically resulting in a reserved
instruction exception, a trap or a segfault.
Fix this race condition fully by performing any cache maintenance
required to keep the icache & dcache in sync in set_pte_at, before the
page is made valid. This has the added bonus of ensuring the cache
maintenance always happens in one location, rather than being duplicated
in flush_icache_page & update_mmu_cache. It also matches the way other
architectures solve the same problem (see arm, ia64 & powerpc).
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com>
Cc: Lars Persson <lars.persson@axis.com>
Fixes: 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
Cc: Steven J. Hill <sjhill@realitydiluted.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable <stable@vger.kernel.org> # v4.1+
Patchwork: https://patchwork.linux-mips.org/patch/12722/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-03-01 09:37:59 +07:00
|
|
|
#define pte_no_exec(pte) (pte_val(pte) & _PAGE_NO_EXEC)
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Certain architectures need to do special things when pte's
|
|
|
|
* within a page table are directly modified. Thus, the following
|
|
|
|
* hook is made available.
|
|
|
|
*/
|
|
|
|
static inline void set_pte(pte_t *ptep, pte_t pteval)
|
|
|
|
{
|
|
|
|
*ptep = pteval;
|
|
|
|
#if !defined(CONFIG_CPU_R3000) && !defined(CONFIG_CPU_TX39XX)
|
|
|
|
if (pte_val(pteval) & _PAGE_GLOBAL) {
|
|
|
|
pte_t *buddy = ptep_buddy(ptep);
|
|
|
|
/*
|
|
|
|
* Make sure the buddy is global too (if it's !none,
|
|
|
|
* it better already be global)
|
|
|
|
*/
|
2015-08-04 07:48:43 +07:00
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
/*
|
|
|
|
* For SMP, multiple CPUs can race, so we need to do
|
|
|
|
* this atomically.
|
|
|
|
*/
|
|
|
|
unsigned long page_global = _PAGE_GLOBAL;
|
|
|
|
unsigned long tmp;
|
|
|
|
|
2015-09-07 17:42:30 +07:00
|
|
|
if (kernel_uses_llsc && R10000_LLSC_WAR) {
|
|
|
|
__asm__ __volatile__ (
|
|
|
|
" .set arch=r4000 \n"
|
|
|
|
" .set push \n"
|
|
|
|
" .set noreorder \n"
|
|
|
|
"1:" __LL "%[tmp], %[buddy] \n"
|
|
|
|
" bnez %[tmp], 2f \n"
|
|
|
|
" or %[tmp], %[tmp], %[global] \n"
|
|
|
|
__SC "%[tmp], %[buddy] \n"
|
|
|
|
" beqzl %[tmp], 1b \n"
|
|
|
|
" nop \n"
|
|
|
|
"2: \n"
|
|
|
|
" .set pop \n"
|
|
|
|
" .set mips0 \n"
|
|
|
|
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
|
2015-08-04 07:48:43 +07:00
|
|
|
: [global] "r" (page_global));
|
2015-09-07 17:42:30 +07:00
|
|
|
} else if (kernel_uses_llsc) {
|
|
|
|
__asm__ __volatile__ (
|
|
|
|
" .set "MIPS_ISA_ARCH_LEVEL" \n"
|
|
|
|
" .set push \n"
|
|
|
|
" .set noreorder \n"
|
|
|
|
"1:" __LL "%[tmp], %[buddy] \n"
|
|
|
|
" bnez %[tmp], 2f \n"
|
|
|
|
" or %[tmp], %[tmp], %[global] \n"
|
|
|
|
__SC "%[tmp], %[buddy] \n"
|
|
|
|
" beqz %[tmp], 1b \n"
|
|
|
|
" nop \n"
|
|
|
|
"2: \n"
|
|
|
|
" .set pop \n"
|
|
|
|
" .set mips0 \n"
|
|
|
|
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
|
|
|
|
: [global] "r" (page_global));
|
|
|
|
}
|
2015-08-04 07:48:43 +07:00
|
|
|
#else /* !CONFIG_SMP */
|
2005-04-17 05:20:36 +07:00
|
|
|
if (pte_none(*buddy))
|
|
|
|
pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL;
|
2015-08-04 07:48:43 +07:00
|
|
|
#endif /* CONFIG_SMP */
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
|
|
{
|
2015-01-26 16:40:36 +07:00
|
|
|
htw_stop();
|
2005-04-17 05:20:36 +07:00
|
|
|
#if !defined(CONFIG_CPU_R3000) && !defined(CONFIG_CPU_TX39XX)
|
|
|
|
/* Preserve global status for the pair */
|
|
|
|
if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL)
|
|
|
|
set_pte_at(mm, addr, ptep, __pte(_PAGE_GLOBAL));
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
set_pte_at(mm, addr, ptep, __pte(0));
|
2015-01-26 16:40:36 +07:00
|
|
|
htw_start();
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
MIPS: Sync icache & dcache in set_pte_at
It's possible for pages to become visible prior to update_mmu_cache
running if a thread within the same address space preempts the current
thread or runs simultaneously on another CPU. That is, the following
scenario is possible:
CPU0 CPU1
write to page
flush_dcache_page
flush_icache_page
set_pte_at
map page
update_mmu_cache
If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid
& visible, and update_mmu_cache where the dcache flush occurs then CPU1s
icache will fill from stale data (unless it fills from the dcache, in
which case all is good, but most MIPS CPUs don't have this property).
Commit 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
attempted to fix that by performing the dcache flush in
flush_icache_page such that it occurs before the set_pte_at call makes
the page visible. However it has the problem that not all code that
writes to pages exposed to userland call flush_icache_page. There are
many callers of set_pte_at under mm/ and only 2 of them do call
flush_icache_page. Thus the race window between a page becoming visible
& being coherent between the icache & dcache remains open in some cases.
To illustrate some of the cases, a WARN was added to __update_cache with
this patch applied that triggered in cases where a page about to be
flushed from the dcache was not the last page provided to
flush_icache_page. That is, backtraces were obtained for cases in which
the race window is left open without this patch. The 2 standout examples
follow.
When forking a process:
[ 15.271842] [<80417630>] __update_cache+0xcc/0x188
[ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac
[ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac
[ 15.289028] [<80429f80>] do_fork+0xe4/0x420
[ 15.293747] [<80413808>] handle_sys+0x128/0x14c
When exec'ing an ELF binary:
[ 14.445964] [<80417630>] __update_cache+0xcc/0x188
[ 14.451369] [<80538d88>] move_page_tables+0x414/0x498
[ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318
[ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0
[ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214
[ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c
[ 14.480324] [<8055f938>] do_execve+0x38/0x44
[ 14.485137] [<80413808>] handle_sys+0x128/0x14c
These code paths write into a page, call flush_dcache_page then call
set_pte_at without flush_icache_page inbetween. The end result is that
the icache can become corrupted & userland processes may execute
unexpected or invalid code, typically resulting in a reserved
instruction exception, a trap or a segfault.
Fix this race condition fully by performing any cache maintenance
required to keep the icache & dcache in sync in set_pte_at, before the
page is made valid. This has the added bonus of ensuring the cache
maintenance always happens in one location, rather than being duplicated
in flush_icache_page & update_mmu_cache. It also matches the way other
architectures solve the same problem (see arm, ia64 & powerpc).
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com>
Cc: Lars Persson <lars.persson@axis.com>
Fixes: 4d46a67a3eb8 ("MIPS: Fix race condition in lazy cache flushing.")
Cc: Steven J. Hill <sjhill@realitydiluted.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable <stable@vger.kernel.org> # v4.1+
Patchwork: https://patchwork.linux-mips.org/patch/12722/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-03-01 09:37:59 +07:00
|
|
|
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep, pte_t pteval)
|
|
|
|
{
|
|
|
|
extern void __update_cache(unsigned long address, pte_t pte);
|
|
|
|
|
|
|
|
if (!pte_present(pteval))
|
|
|
|
goto cache_sync_done;
|
|
|
|
|
|
|
|
if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval)))
|
|
|
|
goto cache_sync_done;
|
|
|
|
|
|
|
|
__update_cache(addr, pteval);
|
|
|
|
cache_sync_done:
|
|
|
|
set_pte(ptep, pteval);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
2005-02-10 19:19:59 +07:00
|
|
|
* (pmds are folded into puds so this doesn't get actually called,
|
2005-04-17 05:20:36 +07:00
|
|
|
* but the define is needed for a generic inline function.)
|
|
|
|
*/
|
|
|
|
#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while(0)
|
2005-02-10 19:19:59 +07:00
|
|
|
|
2009-12-05 04:52:36 +07:00
|
|
|
#ifndef __PAGETABLE_PMD_FOLDED
|
2005-02-10 19:19:59 +07:00
|
|
|
/*
|
|
|
|
* (puds are folded into pgds so this doesn't get actually called,
|
|
|
|
* but the define is needed for a generic inline function.)
|
|
|
|
*/
|
|
|
|
#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while(0)
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-08-01 21:25:28 +07:00
|
|
|
#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1)
|
|
|
|
#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1)
|
|
|
|
#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-08-13 18:44:41 +07:00
|
|
|
/*
|
|
|
|
* We used to declare this array with size but gcc 3.3 and older are not able
|
|
|
|
* to find that this expression is a constant, so the size is dropped.
|
|
|
|
*/
|
|
|
|
extern pgd_t swapper_pg_dir[];
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The following only work if pte_present() is true.
|
|
|
|
* Undefined behaviour if not..
|
|
|
|
*/
|
2014-11-22 06:16:48 +07:00
|
|
|
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; }
|
|
|
|
static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; }
|
|
|
|
static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; }
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline pte_t pte_wrprotect(pte_t pte)
|
|
|
|
{
|
2015-02-27 07:16:38 +07:00
|
|
|
pte.pte_low &= ~_PAGE_WRITE;
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
pte.pte_low &= ~_PAGE_SILENT_WRITE;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_high &= ~_PAGE_SILENT_WRITE;
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkclean(pte_t pte)
|
|
|
|
{
|
2015-02-27 07:16:38 +07:00
|
|
|
pte.pte_low &= ~_PAGE_MODIFIED;
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
pte.pte_low &= ~_PAGE_SILENT_WRITE;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_high &= ~_PAGE_SILENT_WRITE;
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkold(pte_t pte)
|
|
|
|
{
|
2015-02-27 07:16:38 +07:00
|
|
|
pte.pte_low &= ~_PAGE_ACCESSED;
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
pte.pte_low &= ~_PAGE_SILENT_READ;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_high &= ~_PAGE_SILENT_READ;
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkwrite(pte_t pte)
|
|
|
|
{
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_low |= _PAGE_WRITE;
|
2016-04-19 15:25:05 +07:00
|
|
|
if (pte.pte_low & _PAGE_MODIFIED) {
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
pte.pte_low |= _PAGE_SILENT_WRITE;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_high |= _PAGE_SILENT_WRITE;
|
2016-04-19 15:25:05 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkdirty(pte_t pte)
|
|
|
|
{
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_low |= _PAGE_MODIFIED;
|
2016-04-19 15:25:05 +07:00
|
|
|
if (pte.pte_low & _PAGE_WRITE) {
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
pte.pte_low |= _PAGE_SILENT_WRITE;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_high |= _PAGE_SILENT_WRITE;
|
2016-04-19 15:25:05 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkyoung(pte_t pte)
|
|
|
|
{
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_low |= _PAGE_ACCESSED;
|
2016-04-19 15:25:05 +07:00
|
|
|
if (!(pte.pte_low & _PAGE_NO_READ)) {
|
2016-08-04 03:45:50 +07:00
|
|
|
if (!IS_ENABLED(CONFIG_XPA))
|
2016-04-19 15:25:05 +07:00
|
|
|
pte.pte_low |= _PAGE_SILENT_READ;
|
[MIPS] Fix marking buddy of pte global for MIPS32 w/36-bit physical address
In case of CONFIG_64BIT_PHYS_ADDR, set_pte() and pte_clear() functions
only set _PAGE_GLOBAL bit in the pte_low field of the buddy PTEs,
forgetting to propagate ito to pte_high. Thus, the both pages might not
really be made global for the CPU (since it AND's the G-bit of the
odd / even PTEs together to decide whether they're global or not). Thus,
if only a single page is allocated via vmalloc() or ioremap(), it's not
really global for CPU (and it must be, since this is kernel mapping),
and thus its ASID is compared against the current process' one -- so,
we'll get into trouble sooner or later... Also, pte_none() will fail
on global pages because _PAGE_GLOBAL bit is set in both pte_low and
pte_high, and pte_val() will return u64 value consisting of those fields
concateneted.
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2006-04-17 02:27:21 +07:00
|
|
|
pte.pte_high |= _PAGE_SILENT_READ;
|
2016-04-19 15:25:05 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
|
|
|
|
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
|
|
|
|
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
|
|
|
|
|
|
|
|
static inline pte_t pte_wrprotect(pte_t pte)
|
|
|
|
{
|
|
|
|
pte_val(pte) &= ~(_PAGE_WRITE | _PAGE_SILENT_WRITE);
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkclean(pte_t pte)
|
|
|
|
{
|
2014-11-13 22:52:01 +07:00
|
|
|
pte_val(pte) &= ~(_PAGE_MODIFIED | _PAGE_SILENT_WRITE);
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkold(pte_t pte)
|
|
|
|
{
|
2014-11-13 22:52:01 +07:00
|
|
|
pte_val(pte) &= ~(_PAGE_ACCESSED | _PAGE_SILENT_READ);
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkwrite(pte_t pte)
|
|
|
|
{
|
|
|
|
pte_val(pte) |= _PAGE_WRITE;
|
|
|
|
if (pte_val(pte) & _PAGE_MODIFIED)
|
|
|
|
pte_val(pte) |= _PAGE_SILENT_WRITE;
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkdirty(pte_t pte)
|
|
|
|
{
|
|
|
|
pte_val(pte) |= _PAGE_MODIFIED;
|
|
|
|
if (pte_val(pte) & _PAGE_WRITE)
|
|
|
|
pte_val(pte) |= _PAGE_SILENT_WRITE;
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkyoung(pte_t pte)
|
|
|
|
{
|
|
|
|
pte_val(pte) |= _PAGE_ACCESSED;
|
2015-02-27 07:16:37 +07:00
|
|
|
if (!(pte_val(pte) & _PAGE_NO_READ))
|
|
|
|
pte_val(pte) |= _PAGE_SILENT_READ;
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
2009-05-28 07:47:43 +07:00
|
|
|
|
2015-02-19 23:18:50 +07:00
|
|
|
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
|
2009-05-28 07:47:43 +07:00
|
|
|
static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; }
|
|
|
|
|
|
|
|
static inline pte_t pte_mkhuge(pte_t pte)
|
|
|
|
{
|
|
|
|
pte_val(pte) |= _PAGE_HUGE;
|
|
|
|
return pte;
|
|
|
|
}
|
2015-02-19 23:18:50 +07:00
|
|
|
#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|
mm: introduce pte_special pte bit
s390 for one, cannot implement VM_MIXEDMAP with pfn_valid, due to their memory
model (which is more dynamic than most). Instead, they had proposed to
implement it with an additional path through vm_normal_page(), using a bit in
the pte to determine whether or not the page should be refcounted:
vm_normal_page()
{
...
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma->vm_flags & VM_MIXEDMAP) {
#ifdef s390
if (!mixedmap_refcount_pte(pte))
return NULL;
#else
if (!pfn_valid(pfn))
return NULL;
#endif
goto out;
}
...
}
This is fine, however if we are allowed to use a bit in the pte to determine
refcountedness, we can use that to _completely_ replace all the vma based
schemes. So instead of adding more cases to the already complex vma-based
scheme, we can have a clearly seperate and simple pte-based scheme (and get
slightly better code generation in the process):
vm_normal_page()
{
#ifdef s390
if (!mixedmap_refcount_pte(pte))
return NULL;
return pte_page(pte);
#else
...
#endif
}
And finally, we may rather make this concept usable by any architecture rather
than making it s390 only, so implement a new type of pte state for this.
Unfortunately the old vma based code must stay, because some architectures may
not be able to spare pte bits. This makes vm_normal_page a little bit more
ugly than we would like, but the 2 cases are clearly seperate.
So introduce a pte_special pte state, and use it in mm/memory.c. It is
currently a noop for all architectures, so this doesn't actually result in any
compiled code changes to mm/memory.o.
BTW:
I haven't put vm_normal_page() into arch code as-per an earlier suggestion.
The reason is that, regardless of where vm_normal_page is actually
implemented, the *abstraction* is still exactly the same. Also, while it
depends on whether the architecture has pte_special or not, that is the
only two possible cases, and it really isn't an arch specific function --
the role of the arch code should be to provide primitive functions and
accessors with which to build the core code; pte_special does that. We do
not want architectures to know or care about vm_normal_page itself, and
we definitely don't want them being able to invent something new there
out of sight of mm/ code. If we made vm_normal_page an arch function, then
we have to make vm_insert_mixed (next patch) an arch function too. So I
don't think moving it to arch code fundamentally improves any abstractions,
while it does practically make the code more difficult to follow, for both
mm and arch developers, and easier to misuse.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Cc: Jared Hulbert <jaredeh@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 16:13:00 +07:00
|
|
|
static inline int pte_special(pte_t pte) { return 0; }
|
|
|
|
static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
2013-01-22 18:59:30 +07:00
|
|
|
* Macro to make mark a page protection value as "uncacheable". Note
|
2005-04-17 05:20:36 +07:00
|
|
|
* that "protection" is really a misnomer here as the protection value
|
|
|
|
* contains the memory attribute bits, dirty bits, and various other
|
|
|
|
* bits as well.
|
|
|
|
*/
|
|
|
|
#define pgprot_noncached pgprot_noncached
|
|
|
|
|
|
|
|
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
|
|
|
|
{
|
|
|
|
unsigned long prot = pgprot_val(_prot);
|
|
|
|
|
|
|
|
prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED;
|
|
|
|
|
|
|
|
return __pgprot(prot);
|
|
|
|
}
|
|
|
|
|
2015-07-24 22:16:10 +07:00
|
|
|
#define pgprot_writecombine pgprot_writecombine
|
|
|
|
|
2014-07-18 16:51:33 +07:00
|
|
|
static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
|
|
|
|
{
|
|
|
|
unsigned long prot = pgprot_val(_prot);
|
|
|
|
|
|
|
|
/* cpu_data[0].writecombine is already shifted by _CACHE_SHIFT */
|
|
|
|
prot = (prot & ~_CACHE_MASK) | cpu_data[0].writecombine;
|
|
|
|
|
|
|
|
return __pgprot(prot);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Conversion functions: convert a page and protection to a page entry,
|
|
|
|
* and a page entry and page directory to the page they refer to.
|
|
|
|
*/
|
|
|
|
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
|
|
|
2016-04-19 15:25:05 +07:00
|
|
|
#if defined(CONFIG_XPA)
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
|
|
{
|
2015-02-27 07:16:38 +07:00
|
|
|
pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
|
2014-11-13 22:52:01 +07:00
|
|
|
pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
|
2015-02-27 07:16:38 +07:00
|
|
|
pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK;
|
MIPS: Fix possible corruption of cache mode by mprotect.
The following testcase may result in a page table entries with a invalid
CCA field being generated:
static void *bindstack;
static int sysrqfd;
static void protect_low(int protect)
{
mprotect(bindstack, BINDSTACK_SIZE, protect);
}
static void sigbus_handler(int signal, siginfo_t * info, void *context)
{
void *addr = info->si_addr;
write(sysrqfd, "x", 1);
printf("sigbus, fault address %p (should not happen, but might)\n",
addr);
abort();
}
static void run_bind_test(void)
{
unsigned int *p = bindstack;
p[0] = 0xf001f001;
write(sysrqfd, "x", 1);
/* Set trap on access to p[0] */
protect_low(PROT_NONE);
write(sysrqfd, "x", 1);
/* Clear trap on access to p[0] */
protect_low(PROT_READ | PROT_WRITE | PROT_EXEC);
write(sysrqfd, "x", 1);
/* Check the contents of p[0] */
if (p[0] != 0xf001f001) {
write(sysrqfd, "x", 1);
/* Reached, but shouldn't be */
printf("badness, shouldn't happen but does\n");
abort();
}
}
int main(void)
{
struct sigaction sa;
sysrqfd = open("/proc/sysrq-trigger", O_WRONLY);
if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) {
perror("sigprocmask");
return 0;
}
sa.sa_sigaction = sigbus_handler;
sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART;
if (sigaction(SIGBUS, &sa, NULL)) {
perror("sigaction");
return 0;
}
bindstack = mmap(NULL,
BINDSTACK_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (bindstack == MAP_FAILED) {
perror("mmap bindstack");
return 0;
}
printf("bindstack: %p\n", bindstack);
run_bind_test();
printf("done\n");
return 0;
}
There are multiple ingredients for this:
1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3
on all platforms except SB1 where it's CCA 5.
2) _page_cachable_default must have bits set which are not set
_CACHE_CACHABLE_NONCOHERENT.
3) Either the defective version of pte_modify for XPA or the standard
version must be in used. However pte_modify for the 36 bit address
space support is no affected.
In that case additional bits in the final CCA mode may generate an invalid
value for the CCA field. On the R10000 system where this was tracked
down for example a CCA 7 has been observed, which is Uncached Accelerated.
Fixed by:
1) Using the proper CCA mode for PAGE_NONE just like for all the other
PAGE_* pte/pmd bits.
2) Fix the two affected variants of pte_modify.
Further code inspection also shows the same issue to exist in pmd_modify
which would affect huge page systems.
Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE
and pmd_modify issue found by me.
The history of this goes back beyond Linus' git history. Chris Dearman's
commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of
the cache attribute at run time.") missed the opportunity to fix this
but it was originally introduced in lmo commit
d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.")
and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option
CONFIG_MIPS_UNCACHED.")
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reported-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
2016-07-01 20:01:01 +07:00
|
|
|
pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
|
2005-04-17 05:20:36 +07:00
|
|
|
return pte;
|
|
|
|
}
|
2016-04-19 15:25:05 +07:00
|
|
|
#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
|
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
|
|
{
|
|
|
|
pte.pte_low &= _PAGE_CHG_MASK;
|
|
|
|
pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
|
|
|
|
pte.pte_low |= pgprot_val(newprot);
|
|
|
|
pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
|
|
|
|
return pte;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
#else
|
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
|
|
{
|
MIPS: Fix possible corruption of cache mode by mprotect.
The following testcase may result in a page table entries with a invalid
CCA field being generated:
static void *bindstack;
static int sysrqfd;
static void protect_low(int protect)
{
mprotect(bindstack, BINDSTACK_SIZE, protect);
}
static void sigbus_handler(int signal, siginfo_t * info, void *context)
{
void *addr = info->si_addr;
write(sysrqfd, "x", 1);
printf("sigbus, fault address %p (should not happen, but might)\n",
addr);
abort();
}
static void run_bind_test(void)
{
unsigned int *p = bindstack;
p[0] = 0xf001f001;
write(sysrqfd, "x", 1);
/* Set trap on access to p[0] */
protect_low(PROT_NONE);
write(sysrqfd, "x", 1);
/* Clear trap on access to p[0] */
protect_low(PROT_READ | PROT_WRITE | PROT_EXEC);
write(sysrqfd, "x", 1);
/* Check the contents of p[0] */
if (p[0] != 0xf001f001) {
write(sysrqfd, "x", 1);
/* Reached, but shouldn't be */
printf("badness, shouldn't happen but does\n");
abort();
}
}
int main(void)
{
struct sigaction sa;
sysrqfd = open("/proc/sysrq-trigger", O_WRONLY);
if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) {
perror("sigprocmask");
return 0;
}
sa.sa_sigaction = sigbus_handler;
sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART;
if (sigaction(SIGBUS, &sa, NULL)) {
perror("sigaction");
return 0;
}
bindstack = mmap(NULL,
BINDSTACK_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (bindstack == MAP_FAILED) {
perror("mmap bindstack");
return 0;
}
printf("bindstack: %p\n", bindstack);
run_bind_test();
printf("done\n");
return 0;
}
There are multiple ingredients for this:
1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3
on all platforms except SB1 where it's CCA 5.
2) _page_cachable_default must have bits set which are not set
_CACHE_CACHABLE_NONCOHERENT.
3) Either the defective version of pte_modify for XPA or the standard
version must be in used. However pte_modify for the 36 bit address
space support is no affected.
In that case additional bits in the final CCA mode may generate an invalid
value for the CCA field. On the R10000 system where this was tracked
down for example a CCA 7 has been observed, which is Uncached Accelerated.
Fixed by:
1) Using the proper CCA mode for PAGE_NONE just like for all the other
PAGE_* pte/pmd bits.
2) Fix the two affected variants of pte_modify.
Further code inspection also shows the same issue to exist in pmd_modify
which would affect huge page systems.
Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE
and pmd_modify issue found by me.
The history of this goes back beyond Linus' git history. Chris Dearman's
commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of
the cache attribute at run time.") missed the opportunity to fix this
but it was originally introduced in lmo commit
d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.")
and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option
CONFIG_MIPS_UNCACHED.")
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reported-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
2016-07-01 20:01:01 +07:00
|
|
|
return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
|
|
|
|
(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
|
|
|
|
pte_t pte);
|
|
|
|
|
|
|
|
static inline void update_mmu_cache(struct vm_area_struct *vma,
|
MM: Pass a PTE pointer to update_mmu_cache() rather than the PTE itself
On VIVT ARM, when we have multiple shared mappings of the same file
in the same MM, we need to ensure that we have coherency across all
copies. We do this via make_coherent() by making the pages
uncacheable.
This used to work fine, until we allowed highmem with highpte - we
now have a page table which is mapped as required, and is not available
for modification via update_mmu_cache().
Ralf Beache suggested getting rid of the PTE value passed to
update_mmu_cache():
On MIPS update_mmu_cache() calls __update_tlb() which walks pagetables
to construct a pointer to the pte again. Passing a pte_t * is much
more elegant. Maybe we might even replace the pte argument with the
pte_t?
Ben Herrenschmidt would also like the pte pointer for PowerPC:
Passing the ptep in there is exactly what I want. I want that
-instead- of the PTE value, because I have issue on some ppc cases,
for I$/D$ coherency, where set_pte_at() may decide to mask out the
_PAGE_EXEC.
So, pass in the mapped page table pointer into update_mmu_cache(), and
remove the PTE value, updating all implementations and call sites to
suit.
Includes a fix from Stephen Rothwell:
sparc: fix fallout from update_mmu_cache API change
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2009-12-18 23:40:18 +07:00
|
|
|
unsigned long address, pte_t *ptep)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
MM: Pass a PTE pointer to update_mmu_cache() rather than the PTE itself
On VIVT ARM, when we have multiple shared mappings of the same file
in the same MM, we need to ensure that we have coherency across all
copies. We do this via make_coherent() by making the pages
uncacheable.
This used to work fine, until we allowed highmem with highpte - we
now have a page table which is mapped as required, and is not available
for modification via update_mmu_cache().
Ralf Beache suggested getting rid of the PTE value passed to
update_mmu_cache():
On MIPS update_mmu_cache() calls __update_tlb() which walks pagetables
to construct a pointer to the pte again. Passing a pte_t * is much
more elegant. Maybe we might even replace the pte argument with the
pte_t?
Ben Herrenschmidt would also like the pte pointer for PowerPC:
Passing the ptep in there is exactly what I want. I want that
-instead- of the PTE value, because I have issue on some ppc cases,
for I$/D$ coherency, where set_pte_at() may decide to mask out the
_PAGE_EXEC.
So, pass in the mapped page table pointer into update_mmu_cache(), and
remove the PTE value, updating all implementations and call sites to
suit.
Includes a fix from Stephen Rothwell:
sparc: fix fallout from update_mmu_cache API change
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2009-12-18 23:40:18 +07:00
|
|
|
pte_t pte = *ptep;
|
2005-04-17 05:20:36 +07:00
|
|
|
__update_tlb(vma, address, pte);
|
|
|
|
}
|
|
|
|
|
2012-10-18 18:54:15 +07:00
|
|
|
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
|
|
|
|
unsigned long address, pmd_t *pmdp)
|
|
|
|
{
|
|
|
|
pte_t pte = *(pte_t *)pmdp;
|
|
|
|
|
|
|
|
__update_tlb(vma, address, pte);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define kern_addr_valid(addr) (1)
|
|
|
|
|
2014-11-22 06:16:48 +07:00
|
|
|
#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
2005-04-17 05:20:36 +07:00
|
|
|
extern int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot);
|
|
|
|
|
|
|
|
static inline int io_remap_pfn_range(struct vm_area_struct *vma,
|
|
|
|
unsigned long vaddr,
|
|
|
|
unsigned long pfn,
|
|
|
|
unsigned long size,
|
|
|
|
pgprot_t prot)
|
|
|
|
{
|
2014-11-22 06:22:09 +07:00
|
|
|
phys_addr_t phys_addr_high = fixup_bigphys_addr(pfn << PAGE_SHIFT, size);
|
2005-04-11 19:24:16 +07:00
|
|
|
return remap_pfn_range(vma, vaddr, phys_addr_high >> PAGE_SHIFT, size, prot);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2013-05-11 23:13:10 +07:00
|
|
|
#define io_remap_pfn_range io_remap_pfn_range
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|
|
|
|
|
2012-10-18 18:54:15 +07:00
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
|
arch: fix has_transparent_hugepage()
I've just discovered that the useful-sounding has_transparent_hugepage()
is actually an architecture-dependent minefield: on some arches it only
builds if CONFIG_TRANSPARENT_HUGEPAGE=y, on others it's also there when
not, but on some of those (arm and arm64) it then gives the wrong
answer; and on mips alone it's marked __init, which would crash if
called later (but so far it has not been called later).
Straighten this out: make it available to all configs, with a sensible
default in asm-generic/pgtable.h, removing its definitions from those
arches (arc, arm, arm64, sparc, tile) which are served by the default,
adding #define has_transparent_hugepage has_transparent_hugepage to
those (mips, powerpc, s390, x86) which need to override the default at
runtime, and removing the __init from mips (but maybe that kind of code
should be avoided after init: set a static variable the first time it's
called).
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Vineet Gupta <vgupta@synopsys.com> [arch/arc]
Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> [arch/s390]
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-20 07:13:00 +07:00
|
|
|
#define has_transparent_hugepage has_transparent_hugepage
|
2012-10-18 18:54:15 +07:00
|
|
|
extern int has_transparent_hugepage(void);
|
|
|
|
|
|
|
|
static inline int pmd_trans_huge(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return !!(pmd_val(pmd) & _PAGE_HUGE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mkhuge(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) |= _PAGE_HUGE;
|
|
|
|
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pmd_t *pmdp, pmd_t pmd);
|
|
|
|
|
|
|
|
#define __HAVE_ARCH_PMD_WRITE
|
|
|
|
static inline int pmd_write(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return !!(pmd_val(pmd) & _PAGE_WRITE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_wrprotect(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) &= ~(_PAGE_WRITE | _PAGE_SILENT_WRITE);
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) |= _PAGE_WRITE;
|
|
|
|
if (pmd_val(pmd) & _PAGE_MODIFIED)
|
|
|
|
pmd_val(pmd) |= _PAGE_SILENT_WRITE;
|
|
|
|
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int pmd_dirty(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return !!(pmd_val(pmd) & _PAGE_MODIFIED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mkclean(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) &= ~(_PAGE_MODIFIED | _PAGE_SILENT_WRITE);
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mkdirty(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) |= _PAGE_MODIFIED;
|
|
|
|
if (pmd_val(pmd) & _PAGE_WRITE)
|
|
|
|
pmd_val(pmd) |= _PAGE_SILENT_WRITE;
|
|
|
|
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int pmd_young(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return !!(pmd_val(pmd) & _PAGE_ACCESSED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mkold(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) &= ~(_PAGE_ACCESSED|_PAGE_SILENT_READ);
|
|
|
|
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mkyoung(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) |= _PAGE_ACCESSED;
|
|
|
|
|
2015-02-27 07:16:37 +07:00
|
|
|
if (!(pmd_val(pmd) & _PAGE_NO_READ))
|
|
|
|
pmd_val(pmd) |= _PAGE_SILENT_READ;
|
2012-10-18 18:54:15 +07:00
|
|
|
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extern to avoid header file madness */
|
|
|
|
extern pmd_t mk_pmd(struct page *page, pgprot_t prot);
|
|
|
|
|
|
|
|
static inline unsigned long pmd_pfn(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return pmd_val(pmd) >> _PFN_SHIFT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct page *pmd_page(pmd_t pmd)
|
|
|
|
{
|
|
|
|
if (pmd_trans_huge(pmd))
|
|
|
|
return pfn_to_page(pmd_pfn(pmd));
|
|
|
|
|
|
|
|
return pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
|
|
|
|
{
|
2016-06-17 05:50:31 +07:00
|
|
|
pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) |
|
MIPS: Fix possible corruption of cache mode by mprotect.
The following testcase may result in a page table entries with a invalid
CCA field being generated:
static void *bindstack;
static int sysrqfd;
static void protect_low(int protect)
{
mprotect(bindstack, BINDSTACK_SIZE, protect);
}
static void sigbus_handler(int signal, siginfo_t * info, void *context)
{
void *addr = info->si_addr;
write(sysrqfd, "x", 1);
printf("sigbus, fault address %p (should not happen, but might)\n",
addr);
abort();
}
static void run_bind_test(void)
{
unsigned int *p = bindstack;
p[0] = 0xf001f001;
write(sysrqfd, "x", 1);
/* Set trap on access to p[0] */
protect_low(PROT_NONE);
write(sysrqfd, "x", 1);
/* Clear trap on access to p[0] */
protect_low(PROT_READ | PROT_WRITE | PROT_EXEC);
write(sysrqfd, "x", 1);
/* Check the contents of p[0] */
if (p[0] != 0xf001f001) {
write(sysrqfd, "x", 1);
/* Reached, but shouldn't be */
printf("badness, shouldn't happen but does\n");
abort();
}
}
int main(void)
{
struct sigaction sa;
sysrqfd = open("/proc/sysrq-trigger", O_WRONLY);
if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) {
perror("sigprocmask");
return 0;
}
sa.sa_sigaction = sigbus_handler;
sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART;
if (sigaction(SIGBUS, &sa, NULL)) {
perror("sigaction");
return 0;
}
bindstack = mmap(NULL,
BINDSTACK_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (bindstack == MAP_FAILED) {
perror("mmap bindstack");
return 0;
}
printf("bindstack: %p\n", bindstack);
run_bind_test();
printf("done\n");
return 0;
}
There are multiple ingredients for this:
1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3
on all platforms except SB1 where it's CCA 5.
2) _page_cachable_default must have bits set which are not set
_CACHE_CACHABLE_NONCOHERENT.
3) Either the defective version of pte_modify for XPA or the standard
version must be in used. However pte_modify for the 36 bit address
space support is no affected.
In that case additional bits in the final CCA mode may generate an invalid
value for the CCA field. On the R10000 system where this was tracked
down for example a CCA 7 has been observed, which is Uncached Accelerated.
Fixed by:
1) Using the proper CCA mode for PAGE_NONE just like for all the other
PAGE_* pte/pmd bits.
2) Fix the two affected variants of pte_modify.
Further code inspection also shows the same issue to exist in pmd_modify
which would affect huge page systems.
Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE
and pmd_modify issue found by me.
The history of this goes back beyond Linus' git history. Chris Dearman's
commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of
the cache attribute at run time.") missed the opportunity to fix this
but it was originally introduced in lmo commit
d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.")
and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option
CONFIG_MIPS_UNCACHED.")
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reported-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
2016-07-01 20:01:01 +07:00
|
|
|
(pgprot_val(newprot) & ~_PAGE_CHG_MASK);
|
2012-10-18 18:54:15 +07:00
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pmd_t pmd_mknotpresent(pmd_t pmd)
|
|
|
|
{
|
|
|
|
pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY);
|
|
|
|
|
|
|
|
return pmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-06-25 06:57:44 +07:00
|
|
|
* The generic version pmdp_huge_get_and_clear uses a version of pmd_clear() with a
|
2012-10-18 18:54:15 +07:00
|
|
|
* different prototype.
|
|
|
|
*/
|
2015-06-25 06:57:44 +07:00
|
|
|
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
|
|
|
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
|
|
|
|
unsigned long address, pmd_t *pmdp)
|
2012-10-18 18:54:15 +07:00
|
|
|
{
|
|
|
|
pmd_t old = *pmdp;
|
|
|
|
|
|
|
|
pmd_clear(pmdp);
|
|
|
|
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm-generic/pgtable.h>
|
|
|
|
|
2009-11-11 12:59:23 +07:00
|
|
|
/*
|
|
|
|
* uncached accelerated TLB map for video memory access
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
|
|
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
|
|
|
|
|
|
struct file;
|
|
|
|
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
|
|
unsigned long size, pgprot_t vma_prot);
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* We provide our own get_unmapped area to cope with the virtual aliasing
|
|
|
|
* constraints placed on us by the cache architecture.
|
|
|
|
*/
|
|
|
|
#define HAVE_ARCH_UNMAPPED_AREA
|
2011-05-18 02:27:49 +07:00
|
|
|
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* No page table caches to initialise
|
|
|
|
*/
|
|
|
|
#define pgtable_cache_init() do { } while (0)
|
|
|
|
|
|
|
|
#endif /* _ASM_PGTABLE_H */
|