x86/mm/pat: Emulate PAT when it is disabled

In the case when PAT is disabled on the command line with
"nopat" or when virtualization doesn't support PAT (correctly) -
see

  9d34cfdf47 ("x86: Don't rely on VMWare emulating PAT MSR correctly").

we emulate it using the PWT and PCD cache attribute bits. Get
rid of boot_pat_state while at it.

Based on a conglomerate patch from Toshi Kani.

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: http://lkml.kernel.org/r/1433436928-31903-3-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Borislav Petkov 2015-06-04 18:55:10 +02:00 committed by Ingo Molnar
parent 9dac629094
commit 9cd25aac1f
4 changed files with 60 additions and 34 deletions

View File

@ -6,7 +6,7 @@
bool pat_enabled(void); bool pat_enabled(void);
extern void pat_init(void); extern void pat_init(void);
void pat_init_cache_modes(void); void pat_init_cache_modes(u64);
extern int reserve_memtype(u64 start, u64 end, extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);

View File

@ -40,7 +40,7 @@
*/ */
uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
[_PAGE_CACHE_MODE_WB ] = 0 | 0 , [_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
[_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD,
[_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
[_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
[_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
@ -50,11 +50,11 @@ EXPORT_SYMBOL(__cachemode2pte_tbl);
uint8_t __pte2cachemode_tbl[8] = { uint8_t __pte2cachemode_tbl[8] = {
[__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
[__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
[__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
[__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
}; };

View File

@ -68,8 +68,6 @@ static int __init pat_debug_setup(char *str)
} }
__setup("debugpat", pat_debug_setup); __setup("debugpat", pat_debug_setup);
static u64 __read_mostly boot_pat_state;
#ifdef CONFIG_X86_PAT #ifdef CONFIG_X86_PAT
/* /*
* X86 PAT uses page flags WC and Uncached together to keep track of * X86 PAT uses page flags WC and Uncached together to keep track of
@ -177,14 +175,12 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
* configuration. * configuration.
* Using lower indices is preferred, so we start with highest index. * Using lower indices is preferred, so we start with highest index.
*/ */
void pat_init_cache_modes(void) void pat_init_cache_modes(u64 pat)
{ {
int i;
enum page_cache_mode cache; enum page_cache_mode cache;
char pat_msg[33]; char pat_msg[33];
u64 pat; int i;
rdmsrl(MSR_IA32_CR_PAT, pat);
pat_msg[32] = 0; pat_msg[32] = 0;
for (i = 7; i >= 0; i--) { for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7, cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
@ -198,24 +194,33 @@ void pat_init_cache_modes(void)
static void pat_bsp_init(u64 pat) static void pat_bsp_init(u64 pat)
{ {
u64 tmp_pat;
if (!cpu_has_pat) { if (!cpu_has_pat) {
pat_disable("PAT not supported by CPU."); pat_disable("PAT not supported by CPU.");
return; return;
} }
rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); if (!pat_enabled())
if (!boot_pat_state) { goto done;
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
pat_disable("PAT MSR is 0, disabled."); pat_disable("PAT MSR is 0, disabled.");
return; return;
} }
wrmsrl(MSR_IA32_CR_PAT, pat); wrmsrl(MSR_IA32_CR_PAT, pat);
pat_init_cache_modes(); done:
pat_init_cache_modes(pat);
} }
static void pat_ap_init(u64 pat) static void pat_ap_init(u64 pat)
{ {
if (!pat_enabled())
return;
if (!cpu_has_pat) { if (!cpu_has_pat) {
/* /*
* If this happens we are on a secondary CPU, but switched to * If this happens we are on a secondary CPU, but switched to
@ -231,12 +236,31 @@ void pat_init(void)
{ {
u64 pat; u64 pat;
if (!pat_enabled()) if (!pat_enabled()) {
return;
/* /*
* Set PWT to Write-Combining. All other bits stay the same: * No PAT. Emulate the PAT table that corresponds to the two
* cache bits, PWT (Write Through) and PCD (Cache Disable). This
* setup is the same as the BIOS default setup when the system
* has PAT but the "nopat" boot option has been specified. This
* emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
* *
* PTE encoding used:
*
* PCD
* |PWT PAT
* || slot
* 00 0 WB : _PAGE_CACHE_MODE_WB
* 01 1 WT : _PAGE_CACHE_MODE_WT
* 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
* 11 3 UC : _PAGE_CACHE_MODE_UC
*
* NOTE: When WC or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
} else {
/*
* PTE encoding used in Linux: * PTE encoding used in Linux:
* PAT * PAT
* |PCD * |PCD
@ -250,6 +274,7 @@ void pat_init(void)
*/ */
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
}
if (!boot_cpu_done) { if (!boot_cpu_done) {
pat_bsp_init(pat); pat_bsp_init(pat);

View File

@ -1467,6 +1467,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
{ {
struct physdev_set_iopl set_iopl; struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0; unsigned long initrd_start = 0;
u64 pat;
int rc; int rc;
if (!xen_start_info) if (!xen_start_info)
@ -1574,8 +1575,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
* Modify the cache mode translation tables to match Xen's PAT * Modify the cache mode translation tables to match Xen's PAT
* configuration. * configuration.
*/ */
rdmsrl(MSR_IA32_CR_PAT, pat);
pat_init_cache_modes(); pat_init_cache_modes(pat);
/* keep using Xen gdt for now; no urgent need to change it */ /* keep using Xen gdt for now; no urgent need to change it */