mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 18:30:54 +07:00
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: - Extend the memmap= boot parameter syntax to allow the redeclaration and dropping of existing ranges, and to support all e820 range types (Jan H. Schönherr) - Improve the W+X boot time security checks to remove false positive warnings on Xen (Jan Beulich) - Support booting as Xen PVH guest (Juergen Gross) - Improved 5-level paging (LA57) support, in particular it's possible now to have a single kernel image for both 4-level and 5-level hardware (Kirill A. Shutemov) - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky) - Preparatory commits for hardware-encrypted RAM support on Intel CPUs. (Kirill A. Shutemov) - Improved Intel-MID support (Andy Shevchenko) - Show EFI page tables in page_tables debug files (Andy Lutomirski) - ... plus misc fixes and smaller cleanups * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits) x86/cpu/tme: Fix spelling: "configuation" -> "configuration" x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT x86/mm: Update comment in detect_tme() regarding x86_phys_bits x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic x86/mm: Remove pointless checks in vmalloc_fault x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf x86/pconfig: Detect PCONFIG targets x86/tme: Detect if TME and MKTME is activated by BIOS x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G x86/boot/compressed/64: Use page table in trampoline memory x86/boot/compressed/64: Use stack from trampoline memory x86/boot/compressed/64: Make sure we have a 32-bit code segment x86/mm: Do not use paravirtualized calls in native_set_p4d() kdump, vmcoreinfo: Export pgtable_l5_enabled value x86/boot/compressed/64: Prepare new top-level page table for trampoline x86/boot/compressed/64: Set up trampoline memory x86/boot/compressed/64: Save and restore trampoline memory ...
This commit is contained in:
commit
d22fff8141
@ -2248,6 +2248,15 @@
|
||||
The memory region may be marked as e820 type 12 (0xc)
|
||||
and is NVDIMM or ADR memory.
|
||||
|
||||
memmap=<size>%<offset>-<oldtype>+<newtype>
|
||||
[KNL,ACPI] Convert memory within the specified region
|
||||
from <oldtype> to <newtype>. If "-<oldtype>" is left
|
||||
out, the whole region will be marked as <newtype>,
|
||||
even if previously unavailable. If "+<newtype>" is left
|
||||
out, matching memory will be removed. Types are
|
||||
specified as e820 types, e.g., 1 = RAM, 2 = reserved,
|
||||
3 = ACPI, 12 = PRAM.
|
||||
|
||||
memory_corruption_check=0/1 [X86]
|
||||
Some BIOSes seem to corrupt the first 64k of
|
||||
memory when doing things like suspend/resume.
|
||||
|
@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt
|
||||
|
||||
CONFIG_X86_5LEVEL=y enables the feature.
|
||||
|
||||
So far, a kernel compiled with the option enabled will be able to boot
|
||||
only on machines that supports the feature -- see for 'la57' flag in
|
||||
/proc/cpuinfo.
|
||||
|
||||
The plan is to implement boot-time switching between 4- and 5-level paging
|
||||
in the future.
|
||||
Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
|
||||
In this case additional page table level -- p4d -- will be folded at
|
||||
runtime.
|
||||
|
||||
== User-space and large virtual address space ==
|
||||
|
||||
|
@ -1461,6 +1461,8 @@ config X86_PAE
|
||||
|
||||
config X86_5LEVEL
|
||||
bool "Enable 5-level page tables support"
|
||||
select DYNAMIC_MEMORY_LAYOUT
|
||||
select SPARSEMEM_VMEMMAP
|
||||
depends on X86_64
|
||||
---help---
|
||||
5-level paging enables access to larger address space:
|
||||
@ -1469,8 +1471,8 @@ config X86_5LEVEL
|
||||
|
||||
It will be supported by future Intel CPUs.
|
||||
|
||||
Note: a kernel with this option enabled can only be booted
|
||||
on machines that support the feature.
|
||||
A kernel with the option enabled can be booted on machines that
|
||||
support 4- or 5-level paging.
|
||||
|
||||
See Documentation/x86/x86_64/5level-paging.txt for more
|
||||
information.
|
||||
@ -1595,10 +1597,6 @@ config ARCH_HAVE_MEMORY_PRESENT
|
||||
def_bool y
|
||||
depends on X86_32 && DISCONTIGMEM
|
||||
|
||||
config NEED_NODE_MEMMAP_SIZE
|
||||
def_bool y
|
||||
depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
|
||||
|
||||
config ARCH_FLATMEM_ENABLE
|
||||
def_bool y
|
||||
depends on X86_32 && !NUMA
|
||||
@ -2174,10 +2172,17 @@ config PHYSICAL_ALIGN
|
||||
|
||||
Don't change this unless you know what you are doing.
|
||||
|
||||
config DYNAMIC_MEMORY_LAYOUT
|
||||
bool
|
||||
---help---
|
||||
This option makes base addresses of vmalloc and vmemmap as well as
|
||||
__PAGE_OFFSET movable during boot.
|
||||
|
||||
config RANDOMIZE_MEMORY
|
||||
bool "Randomize the kernel memory sections"
|
||||
depends on X86_64
|
||||
depends on RANDOMIZE_BASE
|
||||
select DYNAMIC_MEMORY_LAYOUT
|
||||
default RANDOMIZE_BASE
|
||||
---help---
|
||||
Randomizes the base virtual address of kernel memory sections
|
||||
|
@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
|
||||
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
|
||||
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
|
||||
ifdef CONFIG_X86_64
|
||||
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
|
||||
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
|
||||
vmlinux-objs-y += $(obj)/mem_encrypt.o
|
||||
vmlinux-objs-y += $(obj)/pgtable_64.o
|
||||
endif
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include "pgtable.h"
|
||||
|
||||
/*
|
||||
* Locally defined symbols should be marked hidden:
|
||||
@ -304,55 +305,77 @@ ENTRY(startup_64)
|
||||
/* Set up the stack */
|
||||
leaq boot_stack_end(%rbx), %rsp
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/*
|
||||
* Check if we need to enable 5-level paging.
|
||||
* RSI holds real mode data and need to be preserved across
|
||||
* a function call.
|
||||
*/
|
||||
pushq %rsi
|
||||
call l5_paging_required
|
||||
popq %rsi
|
||||
|
||||
/* If l5_paging_required() returned zero, we're done here. */
|
||||
cmpq $0, %rax
|
||||
je lvl5
|
||||
|
||||
/*
|
||||
* At this point we are in long mode with 4-level paging enabled,
|
||||
* but we want to enable 5-level paging.
|
||||
* but we might want to enable 5-level paging or vice versa.
|
||||
*
|
||||
* The problem is that we cannot do it directly. Setting LA57 in
|
||||
* long mode would trigger #GP. So we need to switch off long mode
|
||||
* first.
|
||||
* The problem is that we cannot do it directly. Setting or clearing
|
||||
* CR4.LA57 in long mode would trigger #GP. So we need to switch off
|
||||
* long mode and paging first.
|
||||
*
|
||||
* NOTE: This is not going to work if bootloader put us above 4G
|
||||
* limit.
|
||||
* We also need a trampoline in lower memory to switch over from
|
||||
* 4- to 5-level paging for cases when the bootloader puts the kernel
|
||||
* above 4G, but didn't enable 5-level paging for us.
|
||||
*
|
||||
* The first step is go into compatibility mode.
|
||||
* The same trampoline can be used to switch from 5- to 4-level paging
|
||||
* mode, like when starting 4-level paging kernel via kexec() when
|
||||
* original kernel worked in 5-level paging mode.
|
||||
*
|
||||
* For the trampoline, we need the top page table to reside in lower
|
||||
* memory as we don't have a way to load 64-bit values into CR3 in
|
||||
* 32-bit mode.
|
||||
*
|
||||
* We go though the trampoline even if we don't have to: if we're
|
||||
* already in a desired paging mode. This way the trampoline code gets
|
||||
* tested on every boot.
|
||||
*/
|
||||
|
||||
/* Clear additional page table */
|
||||
leaq lvl5_pgtable(%rbx), %rdi
|
||||
xorq %rax, %rax
|
||||
movq $(PAGE_SIZE/8), %rcx
|
||||
rep stosq
|
||||
/* Make sure we have GDT with 32-bit code segment */
|
||||
leaq gdt(%rip), %rax
|
||||
movq %rax, gdt64+2(%rip)
|
||||
lgdt gdt64(%rip)
|
||||
|
||||
/*
|
||||
* Setup current CR3 as the first and only entry in a new top level
|
||||
* page table.
|
||||
* paging_prepare() sets up the trampoline and checks if we need to
|
||||
* enable 5-level paging.
|
||||
*
|
||||
* Address of the trampoline is returned in RAX.
|
||||
* Non zero RDX on return means we need to enable 5-level paging.
|
||||
*
|
||||
* RSI holds real mode data and needs to be preserved across
|
||||
* this function call.
|
||||
*/
|
||||
movq %cr3, %rdi
|
||||
leaq 0x7 (%rdi), %rax
|
||||
movq %rax, lvl5_pgtable(%rbx)
|
||||
pushq %rsi
|
||||
call paging_prepare
|
||||
popq %rsi
|
||||
|
||||
/* Save the trampoline address in RCX */
|
||||
movq %rax, %rcx
|
||||
|
||||
/*
|
||||
* Load the address of trampoline_return() into RDI.
|
||||
* It will be used by the trampoline to return to the main code.
|
||||
*/
|
||||
leaq trampoline_return(%rip), %rdi
|
||||
|
||||
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
|
||||
pushq $__KERNEL32_CS
|
||||
leaq compatible_mode(%rip), %rax
|
||||
leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
|
||||
pushq %rax
|
||||
lretq
|
||||
lvl5:
|
||||
#endif
|
||||
trampoline_return:
|
||||
/* Restore the stack, the 32-bit trampoline uses its own stack */
|
||||
leaq boot_stack_end(%rbx), %rsp
|
||||
|
||||
/*
|
||||
* cleanup_trampoline() would restore trampoline memory.
|
||||
*
|
||||
* RSI holds real mode data and needs to be preserved across
|
||||
* this function call.
|
||||
*/
|
||||
pushq %rsi
|
||||
call cleanup_trampoline
|
||||
popq %rsi
|
||||
|
||||
/* Zero EFLAGS */
|
||||
pushq $0
|
||||
@ -490,46 +513,82 @@ relocated:
|
||||
jmp *%rax
|
||||
|
||||
.code32
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
compatible_mode:
|
||||
/* Setup data and stack segments */
|
||||
/*
|
||||
* This is the 32-bit trampoline that will be copied over to low memory.
|
||||
*
|
||||
* RDI contains the return address (might be above 4G).
|
||||
* ECX contains the base address of the trampoline memory.
|
||||
* Non zero RDX on return means we need to enable 5-level paging.
|
||||
*/
|
||||
ENTRY(trampoline_32bit_src)
|
||||
/* Set up data and stack segments */
|
||||
movl $__KERNEL_DS, %eax
|
||||
movl %eax, %ds
|
||||
movl %eax, %ss
|
||||
|
||||
/* Set up new stack */
|
||||
leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
|
||||
|
||||
/* Disable paging */
|
||||
movl %cr0, %eax
|
||||
btrl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Point CR3 to 5-level paging */
|
||||
leal lvl5_pgtable(%ebx), %eax
|
||||
movl %eax, %cr3
|
||||
/* Check what paging mode we want to be in after the trampoline */
|
||||
cmpl $0, %edx
|
||||
jz 1f
|
||||
|
||||
/* Enable PAE and LA57 mode */
|
||||
/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
|
||||
movl %cr4, %eax
|
||||
orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
|
||||
testl $X86_CR4_LA57, %eax
|
||||
jnz 3f
|
||||
jmp 2f
|
||||
1:
|
||||
/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
|
||||
movl %cr4, %eax
|
||||
testl $X86_CR4_LA57, %eax
|
||||
jz 3f
|
||||
2:
|
||||
/* Point CR3 to the trampoline's new top level page table */
|
||||
leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
|
||||
movl %eax, %cr3
|
||||
3:
|
||||
/* Enable PAE and LA57 (if required) paging modes */
|
||||
movl $X86_CR4_PAE, %eax
|
||||
cmpl $0, %edx
|
||||
jz 1f
|
||||
orl $X86_CR4_LA57, %eax
|
||||
1:
|
||||
movl %eax, %cr4
|
||||
|
||||
/* Calculate address we are running at */
|
||||
call 1f
|
||||
1: popl %edi
|
||||
subl $1b, %edi
|
||||
/* Calculate address of paging_enabled() once we are executing in the trampoline */
|
||||
leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
|
||||
|
||||
/* Prepare stack for far return to Long Mode */
|
||||
/* Prepare the stack for far return to Long Mode */
|
||||
pushl $__KERNEL_CS
|
||||
leal lvl5(%edi), %eax
|
||||
push %eax
|
||||
pushl %eax
|
||||
|
||||
/* Enable paging back */
|
||||
/* Enable paging again */
|
||||
movl $(X86_CR0_PG | X86_CR0_PE), %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
lret
|
||||
#endif
|
||||
|
||||
.code64
|
||||
paging_enabled:
|
||||
/* Return from the trampoline */
|
||||
jmp *%rdi
|
||||
|
||||
/*
|
||||
* The trampoline code has a size limit.
|
||||
* Make sure we fail to compile if the trampoline code grows
|
||||
* beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
|
||||
*/
|
||||
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
|
||||
|
||||
.code32
|
||||
no_longmode:
|
||||
/* This isn't an x86-64 CPU so hang */
|
||||
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
@ -537,6 +596,11 @@ no_longmode:
|
||||
#include "../../kernel/verify_cpu.S"
|
||||
|
||||
.data
|
||||
gdt64:
|
||||
.word gdt_end - gdt
|
||||
.long 0
|
||||
.word 0
|
||||
.quad 0
|
||||
gdt:
|
||||
.word gdt_end - gdt
|
||||
.long gdt
|
||||
@ -585,7 +649,3 @@ boot_stack_end:
|
||||
.balign 4096
|
||||
pgtable:
|
||||
.fill BOOT_PGT_SIZE, 1, 0
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
lvl5_pgtable:
|
||||
.fill PAGE_SIZE, 1, 0
|
||||
#endif
|
||||
|
@ -46,6 +46,12 @@
|
||||
#define STATIC
|
||||
#include <linux/decompress/mm.h>
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
unsigned int pgtable_l5_enabled __ro_after_init;
|
||||
unsigned int pgdir_shift __ro_after_init = 39;
|
||||
unsigned int ptrs_per_p4d __ro_after_init = 1;
|
||||
#endif
|
||||
|
||||
extern unsigned long get_cmd_line_ptr(void);
|
||||
|
||||
/* Simplified build-specific string for starting entropy. */
|
||||
@ -723,6 +729,14 @@ void choose_random_location(unsigned long input,
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
if (__read_cr4() & X86_CR4_LA57) {
|
||||
pgtable_l5_enabled = 1;
|
||||
pgdir_shift = 48;
|
||||
ptrs_per_p4d = 512;
|
||||
}
|
||||
#endif
|
||||
|
||||
boot_params->hdr.loadflags |= KASLR_FLAG;
|
||||
|
||||
/* Prepare to add new identity pagetables on demand. */
|
||||
|
@ -16,13 +16,6 @@
|
||||
#define __pa(x) ((unsigned long)(x))
|
||||
#define __va(x) ((void *)((unsigned long)(x)))
|
||||
|
||||
/*
|
||||
* The pgtable.h and mm/ident_map.c includes make use of the SME related
|
||||
* information which is not used in the compressed image support. Un-define
|
||||
* the SME support to avoid any compile and link errors.
|
||||
*/
|
||||
#undef CONFIG_AMD_MEM_ENCRYPT
|
||||
|
||||
/* No PAGE_TABLE_ISOLATION support needed either: */
|
||||
#undef CONFIG_PAGE_TABLE_ISOLATION
|
||||
|
||||
@ -85,13 +78,14 @@ static struct x86_mapping_info mapping_info;
|
||||
/* Locates and clears a region for a new top level page table. */
|
||||
void initialize_identity_maps(void)
|
||||
{
|
||||
unsigned long sev_me_mask = get_sev_encryption_mask();
|
||||
/* If running as an SEV guest, the encryption mask is required. */
|
||||
set_sev_encryption_mask();
|
||||
|
||||
/* Init mapping_info with run-time function/buffer pointers. */
|
||||
mapping_info.alloc_pgt_page = alloc_pgt_page;
|
||||
mapping_info.context = &pgt_data;
|
||||
mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask;
|
||||
mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask;
|
||||
mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
|
||||
mapping_info.kernpg_flag = _KERNPG_TABLE;
|
||||
|
||||
/*
|
||||
* It should be impossible for this not to already be true,
|
@ -88,9 +88,7 @@ ENTRY(get_sev_encryption_bit)
|
||||
ENDPROC(get_sev_encryption_bit)
|
||||
|
||||
.code64
|
||||
ENTRY(get_sev_encryption_mask)
|
||||
xor %rax, %rax
|
||||
|
||||
ENTRY(set_sev_encryption_mask)
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
push %rbp
|
||||
push %rdx
|
||||
@ -101,9 +99,7 @@ ENTRY(get_sev_encryption_mask)
|
||||
testl %eax, %eax
|
||||
jz .Lno_sev_mask
|
||||
|
||||
xor %rdx, %rdx
|
||||
bts %rax, %rdx /* Create the encryption mask */
|
||||
mov %rdx, %rax /* ... and return it */
|
||||
bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
|
||||
|
||||
.Lno_sev_mask:
|
||||
movq %rbp, %rsp /* Restore original stack pointer */
|
||||
@ -112,9 +108,16 @@ ENTRY(get_sev_encryption_mask)
|
||||
pop %rbp
|
||||
#endif
|
||||
|
||||
xor %rax, %rax
|
||||
ret
|
||||
ENDPROC(get_sev_encryption_mask)
|
||||
ENDPROC(set_sev_encryption_mask)
|
||||
|
||||
.data
|
||||
enc_bit:
|
||||
.int 0xffffffff
|
||||
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
.balign 8
|
||||
GLOBAL(sme_me_mask)
|
||||
.quad 0
|
||||
#endif
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "misc.h"
|
||||
#include "error.h"
|
||||
#include "pgtable.h"
|
||||
#include "../string.h"
|
||||
#include "../voffset.h"
|
||||
|
||||
@ -169,16 +170,6 @@ void __puthex(unsigned long value)
|
||||
}
|
||||
}
|
||||
|
||||
static bool l5_supported(void)
|
||||
{
|
||||
/* Check if leaf 7 is supported. */
|
||||
if (native_cpuid_eax(0) < 7)
|
||||
return 0;
|
||||
|
||||
/* Check if la57 is supported. */
|
||||
return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
|
||||
}
|
||||
|
||||
#if CONFIG_X86_NEED_RELOCS
|
||||
static void handle_relocations(void *output, unsigned long output_len,
|
||||
unsigned long virt_addr)
|
||||
@ -376,12 +367,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
||||
console_init();
|
||||
debug_putstr("early console in extract_kernel\n");
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
|
||||
error("This linux kernel as configured requires 5-level paging\n"
|
||||
"This CPU does not support the required 'cr4.la57' feature\n"
|
||||
"Unable to boot - please use a kernel appropriate for your CPU\n");
|
||||
}
|
||||
|
||||
free_mem_ptr = heap; /* Heap */
|
||||
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
|
||||
|
||||
@ -392,6 +377,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
||||
debug_putaddr(output_len);
|
||||
debug_putaddr(kernel_total_size);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Report address of 32-bit trampoline */
|
||||
debug_putaddr(trampoline_32bit);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The memory hole needed for the kernel is the larger of either
|
||||
* the entire decompressed kernel plus relocation table, or the
|
||||
|
@ -12,6 +12,11 @@
|
||||
#undef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#undef CONFIG_KASAN
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/* cpu_feature_enabled() cannot be used that early */
|
||||
#define pgtable_l5_enabled __pgtable_l5_enabled
|
||||
#endif
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/screen_info.h>
|
||||
#include <linux/elf.h>
|
||||
@ -109,6 +114,6 @@ static inline void console_init(void)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
unsigned long get_sev_encryption_mask(void);
|
||||
void set_sev_encryption_mask(void);
|
||||
|
||||
#endif
|
||||
|
20
arch/x86/boot/compressed/pgtable.h
Normal file
20
arch/x86/boot/compressed/pgtable.h
Normal file
@ -0,0 +1,20 @@
|
||||
#ifndef BOOT_COMPRESSED_PAGETABLE_H
|
||||
#define BOOT_COMPRESSED_PAGETABLE_H
|
||||
|
||||
#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
|
||||
|
||||
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
|
||||
|
||||
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
|
||||
#define TRAMPOLINE_32BIT_CODE_SIZE 0x60
|
||||
|
||||
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
|
||||
extern unsigned long *trampoline_32bit;
|
||||
|
||||
extern void trampoline_32bit_src(void *return_ptr);
|
||||
|
||||
#endif /* __ASSEMBLER__ */
|
||||
#endif /* BOOT_COMPRESSED_PAGETABLE_H */
|
@ -1,4 +1,6 @@
|
||||
#include <asm/processor.h>
|
||||
#include "pgtable.h"
|
||||
#include "../string.h"
|
||||
|
||||
/*
|
||||
* __force_order is used by special_insns.h asm code to force instruction
|
||||
@ -9,20 +11,144 @@
|
||||
*/
|
||||
unsigned long __force_order;
|
||||
|
||||
int l5_paging_required(void)
|
||||
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
|
||||
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
|
||||
|
||||
struct paging_config {
|
||||
unsigned long trampoline_start;
|
||||
unsigned long l5_required;
|
||||
};
|
||||
|
||||
/* Buffer to preserve trampoline memory */
|
||||
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
|
||||
|
||||
/*
|
||||
* The page table is going to be used instead of page table in the trampoline
|
||||
* memory.
|
||||
*
|
||||
* It must not be in BSS as BSS is cleared after cleanup_trampoline().
|
||||
*/
|
||||
static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
|
||||
|
||||
/*
|
||||
* Trampoline address will be printed by extract_kernel() for debugging
|
||||
* purposes.
|
||||
*
|
||||
* Avoid putting the pointer into .bss as it will be cleared between
|
||||
* paging_prepare() and extract_kernel().
|
||||
*/
|
||||
unsigned long *trampoline_32bit __section(.data);
|
||||
|
||||
struct paging_config paging_prepare(void)
|
||||
{
|
||||
/* Check if leaf 7 is supported. */
|
||||
struct paging_config paging_config = {};
|
||||
unsigned long bios_start, ebda_start;
|
||||
|
||||
if (native_cpuid_eax(0) < 7)
|
||||
return 0;
|
||||
/*
|
||||
* Check if LA57 is desired and supported.
|
||||
*
|
||||
* There are two parts to the check:
|
||||
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
|
||||
* - if the machine supports 5-level paging:
|
||||
* + CPUID leaf 7 is supported
|
||||
* + the leaf has the feature bit set
|
||||
*
|
||||
* That's substitute for boot_cpu_has() in early boot code.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
|
||||
native_cpuid_eax(0) >= 7 &&
|
||||
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
|
||||
paging_config.l5_required = 1;
|
||||
}
|
||||
|
||||
/* Check if la57 is supported. */
|
||||
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
|
||||
return 0;
|
||||
/*
|
||||
* Find a suitable spot for the trampoline.
|
||||
* This code is based on reserve_bios_regions().
|
||||
*/
|
||||
|
||||
/* Check if 5-level paging has already been enabled. */
|
||||
if (native_read_cr4() & X86_CR4_LA57)
|
||||
return 0;
|
||||
ebda_start = *(unsigned short *)0x40e << 4;
|
||||
bios_start = *(unsigned short *)0x413 << 10;
|
||||
|
||||
return 1;
|
||||
if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
|
||||
bios_start = BIOS_START_MAX;
|
||||
|
||||
if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
|
||||
bios_start = ebda_start;
|
||||
|
||||
/* Place the trampoline just below the end of low memory, aligned to 4k */
|
||||
paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
|
||||
paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
|
||||
|
||||
trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
|
||||
|
||||
/* Preserve trampoline memory */
|
||||
memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
|
||||
|
||||
/* Clear trampoline memory first */
|
||||
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
|
||||
|
||||
/* Copy trampoline code in place */
|
||||
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
|
||||
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
|
||||
|
||||
/*
|
||||
* The code below prepares page table in trampoline memory.
|
||||
*
|
||||
* The new page table will be used by trampoline code for switching
|
||||
* from 4- to 5-level paging or vice versa.
|
||||
*
|
||||
* If switching is not required, the page table is unused: trampoline
|
||||
* code wouldn't touch CR3.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We are not going to use the page table in trampoline memory if we
|
||||
* are already in the desired paging mode.
|
||||
*/
|
||||
if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
|
||||
goto out;
|
||||
|
||||
if (paging_config.l5_required) {
|
||||
/*
|
||||
* For 4- to 5-level paging transition, set up current CR3 as
|
||||
* the first and the only entry in a new top-level page table.
|
||||
*/
|
||||
trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
|
||||
} else {
|
||||
unsigned long src;
|
||||
|
||||
/*
|
||||
* For 5- to 4-level paging transition, copy page table pointed
|
||||
* by first entry in the current top-level page table as our
|
||||
* new top-level page table.
|
||||
*
|
||||
* We cannot just point to the page table from trampoline as it
|
||||
* may be above 4G.
|
||||
*/
|
||||
src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
|
||||
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
|
||||
(void *)src, PAGE_SIZE);
|
||||
}
|
||||
|
||||
out:
|
||||
return paging_config;
|
||||
}
|
||||
|
||||
void cleanup_trampoline(void)
|
||||
{
|
||||
void *trampoline_pgtable;
|
||||
|
||||
trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
|
||||
|
||||
/*
|
||||
* Move the top level page table out of trampoline memory,
|
||||
* if it's there.
|
||||
*/
|
||||
if ((void *)__native_read_cr3() == trampoline_pgtable) {
|
||||
memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
|
||||
native_write_cr3((unsigned long)top_pgtable);
|
||||
}
|
||||
|
||||
/* Restore trampoline memory */
|
||||
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
|
||||
}
|
||||
|
@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
|
||||
* Change top bits to match most significant bit (47th or 56th bit
|
||||
* depending on paging mode) in the address.
|
||||
*/
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
|
||||
"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
|
||||
#else
|
||||
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
|
||||
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
|
||||
#endif
|
||||
|
||||
/* If this changed %rcx, it was not canonical */
|
||||
cmpq %rcx, %r11
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/realmode.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI
|
||||
# include <asm/pgtable_types.h>
|
||||
@ -133,6 +134,14 @@ static inline bool acpi_has_cpu_in_madt(void)
|
||||
return !!acpi_lapic;
|
||||
}
|
||||
|
||||
#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
|
||||
static inline u64 acpi_arch_get_root_pointer(void)
|
||||
{
|
||||
return x86_init.acpi.get_root_pointer();
|
||||
}
|
||||
|
||||
void acpi_generic_reduced_hw_init(void);
|
||||
|
||||
#else /* !CONFIG_ACPI */
|
||||
|
||||
#define acpi_lapic 0
|
||||
@ -142,6 +151,8 @@ static inline void acpi_noirq_set(void) { }
|
||||
static inline void acpi_disable_pci(void) { }
|
||||
static inline void disable_acpi(void) { }
|
||||
|
||||
static inline void acpi_generic_reduced_hw_init(void) { }
|
||||
|
||||
#endif /* !CONFIG_ACPI */
|
||||
|
||||
#define ARCH_HAS_POWER_INIT 1
|
||||
|
65
arch/x86/include/asm/intel_pconfig.h
Normal file
65
arch/x86/include/asm/intel_pconfig.h
Normal file
@ -0,0 +1,65 @@
|
||||
#ifndef _ASM_X86_INTEL_PCONFIG_H
|
||||
#define _ASM_X86_INTEL_PCONFIG_H
|
||||
|
||||
#include <asm/asm.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
enum pconfig_target {
|
||||
INVALID_TARGET = 0,
|
||||
MKTME_TARGET = 1,
|
||||
PCONFIG_TARGET_NR
|
||||
};
|
||||
|
||||
int pconfig_target_supported(enum pconfig_target target);
|
||||
|
||||
enum pconfig_leaf {
|
||||
MKTME_KEY_PROGRAM = 0,
|
||||
PCONFIG_LEAF_INVALID,
|
||||
};
|
||||
|
||||
#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
|
||||
|
||||
/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
|
||||
|
||||
/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
|
||||
#define MKTME_KEYID_SET_KEY_DIRECT 0
|
||||
#define MKTME_KEYID_SET_KEY_RANDOM 1
|
||||
#define MKTME_KEYID_CLEAR_KEY 2
|
||||
#define MKTME_KEYID_NO_ENCRYPT 3
|
||||
|
||||
/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
|
||||
#define MKTME_AES_XTS_128 (1 << 8)
|
||||
|
||||
/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
|
||||
#define MKTME_PROG_SUCCESS 0
|
||||
#define MKTME_INVALID_PROG_CMD 1
|
||||
#define MKTME_ENTROPY_ERROR 2
|
||||
#define MKTME_INVALID_KEYID 3
|
||||
#define MKTME_INVALID_ENC_ALG 4
|
||||
#define MKTME_DEVICE_BUSY 5
|
||||
|
||||
/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
|
||||
struct mktme_key_program {
|
||||
u16 keyid;
|
||||
u32 keyid_ctrl;
|
||||
u8 __rsvd[58];
|
||||
u8 key_field_1[64];
|
||||
u8 key_field_2[64];
|
||||
} __packed __aligned(256);
|
||||
|
||||
static inline int mktme_key_program(struct mktme_key_program *key_program)
|
||||
{
|
||||
unsigned long rax = MKTME_KEY_PROGRAM;
|
||||
|
||||
if (!pconfig_target_supported(MKTME_TARGET))
|
||||
return -ENXIO;
|
||||
|
||||
asm volatile(PCONFIG
|
||||
: "=a" (rax), "=b" (key_program)
|
||||
: "0" (rax), "1" (key_program)
|
||||
: "memory", "cc");
|
||||
|
||||
return rax;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_INTEL_PCONFIG_H */
|
@ -5,10 +5,6 @@
|
||||
unsigned long kaslr_get_random_long(const char *purpose);
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
extern unsigned long page_offset_base;
|
||||
extern unsigned long vmalloc_base;
|
||||
extern unsigned long vmemmap_base;
|
||||
|
||||
void kernel_randomize_memory(void);
|
||||
#else
|
||||
static inline void kernel_randomize_memory(void) { }
|
||||
|
@ -22,6 +22,7 @@
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
|
||||
extern u64 sme_me_mask;
|
||||
extern bool sev_enabled;
|
||||
|
||||
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
|
||||
unsigned long decrypted_kernel_vaddr,
|
||||
|
@ -11,6 +11,10 @@
|
||||
extern unsigned long max_pfn;
|
||||
extern unsigned long phys_base;
|
||||
|
||||
extern unsigned long page_offset_base;
|
||||
extern unsigned long vmalloc_base;
|
||||
extern unsigned long vmemmap_base;
|
||||
|
||||
static inline unsigned long __phys_addr_nodebug(unsigned long x)
|
||||
{
|
||||
unsigned long y = x - __START_KERNEL_map;
|
||||
|
@ -37,26 +37,24 @@
|
||||
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
|
||||
* what Xen requires.
|
||||
*/
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
|
||||
#else
|
||||
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
|
||||
#endif
|
||||
#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
|
||||
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
|
||||
#define __PAGE_OFFSET page_offset_base
|
||||
#else
|
||||
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
|
||||
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
|
||||
|
||||
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
|
||||
|
||||
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
|
||||
#define __PHYSICAL_MASK_SHIFT 52
|
||||
#define __VIRTUAL_MASK_SHIFT 56
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
|
||||
#else
|
||||
#define __PHYSICAL_MASK_SHIFT 46
|
||||
#define __VIRTUAL_MASK_SHIFT 47
|
||||
#endif
|
||||
|
||||
|
@ -568,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d)
|
||||
return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
|
||||
}
|
||||
|
||||
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
pgdval_t val = native_pgd_val(pgd);
|
||||
|
||||
PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
|
||||
PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd));
|
||||
}
|
||||
|
||||
static inline void pgd_clear(pgd_t *pgdp)
|
||||
{
|
||||
set_pgd(pgdp, __pgd(0));
|
||||
}
|
||||
#define set_pgd(pgdp, pgdval) do { \
|
||||
if (pgtable_l5_enabled) \
|
||||
__set_pgd(pgdp, pgdval); \
|
||||
else \
|
||||
set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
|
||||
} while (0)
|
||||
|
||||
#define pgd_clear(pgdp) do { \
|
||||
if (pgtable_l5_enabled) \
|
||||
set_pgd(pgdp, __pgd(0)); \
|
||||
} while (0)
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 5 */
|
||||
|
||||
|
@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
|
||||
{
|
||||
if (!pgtable_l5_enabled)
|
||||
return;
|
||||
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
|
||||
}
|
||||
@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
|
||||
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
|
||||
unsigned long address)
|
||||
{
|
||||
___p4d_free_tlb(tlb, p4d);
|
||||
if (pgtable_l5_enabled)
|
||||
___p4d_free_tlb(tlb, p4d);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
|
||||
|
@ -44,5 +44,6 @@ typedef union {
|
||||
*/
|
||||
#define PTRS_PER_PTE 512
|
||||
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS 36
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
|
||||
|
@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
|
||||
|
||||
#ifndef __PAGETABLE_P4D_FOLDED
|
||||
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
|
||||
#define pgd_clear(pgd) native_pgd_clear(pgd)
|
||||
#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
|
||||
#endif
|
||||
|
||||
#ifndef set_p4d
|
||||
@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address)
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
static inline int pgd_present(pgd_t pgd)
|
||||
{
|
||||
if (!pgtable_l5_enabled)
|
||||
return 1;
|
||||
return pgd_flags(pgd) & _PAGE_PRESENT;
|
||||
}
|
||||
|
||||
@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
|
||||
/* to find an entry in a page-table-directory. */
|
||||
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
|
||||
{
|
||||
if (!pgtable_l5_enabled)
|
||||
return (p4d_t *)pgd;
|
||||
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
|
||||
}
|
||||
|
||||
@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd)
|
||||
{
|
||||
unsigned long ignore_flags = _PAGE_USER;
|
||||
|
||||
if (!pgtable_l5_enabled)
|
||||
return 0;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
|
||||
ignore_flags |= _PAGE_NX;
|
||||
|
||||
@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd)
|
||||
|
||||
static inline int pgd_none(pgd_t pgd)
|
||||
{
|
||||
if (!pgtable_l5_enabled)
|
||||
return 0;
|
||||
/*
|
||||
* There is no need to do a workaround for the KNL stray
|
||||
* A/D bit erratum here. PGDs only point to page tables
|
||||
|
@ -34,6 +34,8 @@ static inline void check_pgt_cache(void) { }
|
||||
void paging_init(void);
|
||||
void sync_initial_page_table(void);
|
||||
|
||||
static inline int pgd_large(pgd_t pgd) { return 0; }
|
||||
|
||||
/*
|
||||
* Define this if things work differently on an i386 and an i486:
|
||||
* it will (on an i486) warn about kernel memory accesses that are
|
||||
|
@ -15,6 +15,8 @@
|
||||
# include <asm/pgtable-2level_types.h>
|
||||
#endif
|
||||
|
||||
#define pgtable_l5_enabled 0
|
||||
|
||||
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
||||
|
||||
|
@ -218,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
|
||||
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
|
||||
{
|
||||
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
|
||||
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
|
||||
#else
|
||||
*p4dp = p4d;
|
||||
#endif
|
||||
pgd_t pgd;
|
||||
|
||||
if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
|
||||
*p4dp = p4d;
|
||||
return;
|
||||
}
|
||||
|
||||
pgd = native_make_pgd(native_p4d_val(p4d));
|
||||
pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd);
|
||||
*p4dp = native_make_p4d(native_pgd_val(pgd));
|
||||
}
|
||||
|
||||
static inline void native_p4d_clear(p4d_t *p4d)
|
||||
{
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
native_set_p4d(p4d, native_make_p4d(0));
|
||||
#else
|
||||
native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
*pgdp = pti_set_user_pgd(pgdp, pgd);
|
||||
#else
|
||||
*pgdp = pgd;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void native_pgd_clear(pgd_t *pgd)
|
||||
|
@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t;
|
||||
|
||||
typedef struct { pteval_t pte; } pte_t;
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
extern unsigned int __pgtable_l5_enabled;
|
||||
#ifndef pgtable_l5_enabled
|
||||
#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
|
||||
#endif
|
||||
#else
|
||||
#define pgtable_l5_enabled 0
|
||||
#endif
|
||||
|
||||
extern unsigned int pgdir_shift;
|
||||
extern unsigned int ptrs_per_p4d;
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#define SHARED_KERNEL_PMD 0
|
||||
@ -29,24 +41,28 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
/*
|
||||
* PGDIR_SHIFT determines what a top-level page table entry can map
|
||||
*/
|
||||
#define PGDIR_SHIFT 48
|
||||
#define PGDIR_SHIFT pgdir_shift
|
||||
#define PTRS_PER_PGD 512
|
||||
|
||||
/*
|
||||
* 4th level page in 5-level paging case
|
||||
*/
|
||||
#define P4D_SHIFT 39
|
||||
#define PTRS_PER_P4D 512
|
||||
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE - 1))
|
||||
#define P4D_SHIFT 39
|
||||
#define MAX_PTRS_PER_P4D 512
|
||||
#define PTRS_PER_P4D ptrs_per_p4d
|
||||
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE - 1))
|
||||
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS 52
|
||||
|
||||
#else /* CONFIG_X86_5LEVEL */
|
||||
|
||||
/*
|
||||
* PGDIR_SHIFT determines what a top-level page table entry can map
|
||||
*/
|
||||
#define PGDIR_SHIFT 39
|
||||
#define PTRS_PER_PGD 512
|
||||
#define PGDIR_SHIFT 39
|
||||
#define PTRS_PER_PGD 512
|
||||
#define MAX_PTRS_PER_P4D 1
|
||||
|
||||
#endif /* CONFIG_X86_5LEVEL */
|
||||
|
||||
@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
* range must not overlap with anything except the KASAN shadow area, which
|
||||
* is correct as KASAN disables KASLR.
|
||||
*/
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
#define MAXMEM (1UL << MAX_PHYSMEM_BITS)
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
# define VMALLOC_SIZE_TB _AC(12800, UL)
|
||||
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
|
||||
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
|
||||
# define LDT_PGD_ENTRY _AC(-112, UL)
|
||||
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
|
||||
#else
|
||||
# define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
|
||||
# define LDT_PGD_ENTRY _AC(-3, UL)
|
||||
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
|
||||
#endif
|
||||
#define LDT_PGD_ENTRY_L4 -3UL
|
||||
#define LDT_PGD_ENTRY_L5 -112UL
|
||||
#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
|
||||
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define __VMALLOC_BASE_L4 0xffffc90000000000
|
||||
#define __VMALLOC_BASE_L5 0xffa0000000000000
|
||||
|
||||
#define VMALLOC_SIZE_TB_L4 32UL
|
||||
#define VMALLOC_SIZE_TB_L5 12800UL
|
||||
|
||||
#define __VMEMMAP_BASE_L4 0xffffea0000000000
|
||||
#define __VMEMMAP_BASE_L5 0xffd4000000000000
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
|
||||
# define VMALLOC_START vmalloc_base
|
||||
# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
|
||||
# define VMEMMAP_START vmemmap_base
|
||||
#else
|
||||
# define VMALLOC_START __VMALLOC_BASE
|
||||
# define VMEMMAP_START __VMEMMAP_BASE
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
# define VMALLOC_START __VMALLOC_BASE_L4
|
||||
# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
|
||||
# define VMEMMAP_START __VMEMMAP_BASE_L4
|
||||
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
|
||||
|
||||
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
|
||||
#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
|
||||
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
/* The module sections ends with the start of the fixmap */
|
||||
|
@ -53,12 +53,6 @@
|
||||
# define NEED_MOVBE 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
|
||||
#else
|
||||
# define NEED_LA57 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
/* Paravirtualized systems may not have PSE or PGE available */
|
||||
@ -104,7 +98,7 @@
|
||||
#define REQUIRED_MASK13 0
|
||||
#define REQUIRED_MASK14 0
|
||||
#define REQUIRED_MASK15 0
|
||||
#define REQUIRED_MASK16 (NEED_LA57)
|
||||
#define REQUIRED_MASK16 0
|
||||
#define REQUIRED_MASK17 0
|
||||
#define REQUIRED_MASK18 0
|
||||
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
|
||||
|
@ -27,13 +27,8 @@
|
||||
# endif
|
||||
#else /* CONFIG_X86_32 */
|
||||
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
|
||||
# ifdef CONFIG_X86_5LEVEL
|
||||
# define MAX_PHYSADDR_BITS 52
|
||||
# define MAX_PHYSMEM_BITS 52
|
||||
# else
|
||||
# define MAX_PHYSADDR_BITS 44
|
||||
# define MAX_PHYSMEM_BITS 46
|
||||
# endif
|
||||
# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
|
||||
# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_SPARSEMEM */
|
||||
|
@ -130,6 +130,16 @@ struct x86_hyper_init {
|
||||
void (*init_mem_mapping)(void);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_init_acpi - x86 ACPI init functions
|
||||
* @get_root_pointer: get RSDP address
|
||||
* @reduced_hw_early_init: hardware reduced platform early init
|
||||
*/
|
||||
struct x86_init_acpi {
|
||||
u64 (*get_root_pointer)(void);
|
||||
void (*reduced_hw_early_init)(void);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_init_ops - functions for platform specific setup
|
||||
*
|
||||
@ -144,6 +154,7 @@ struct x86_init_ops {
|
||||
struct x86_init_iommu iommu;
|
||||
struct x86_init_pci pci;
|
||||
struct x86_hyper_init hyper;
|
||||
struct x86_init_acpi acpi;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
|
||||
*
|
||||
* We initialize the Hardware-reduced ACPI model here:
|
||||
*/
|
||||
void __init acpi_generic_reduced_hw_init(void)
|
||||
{
|
||||
/*
|
||||
* Override x86_init functions and bypass legacy PIC in
|
||||
* hardware reduced ACPI mode.
|
||||
*/
|
||||
x86_init.timers.timer_init = x86_init_noop;
|
||||
x86_init.irqs.pre_vector_init = x86_init_noop;
|
||||
legacy_pic = &null_legacy_pic;
|
||||
}
|
||||
|
||||
static void __init acpi_reduced_hw_init(void)
|
||||
{
|
||||
if (acpi_gbl_reduced_hardware) {
|
||||
/*
|
||||
* Override x86_init functions and bypass legacy pic
|
||||
* in Hardware-reduced ACPI mode
|
||||
*/
|
||||
x86_init.timers.timer_init = x86_init_noop;
|
||||
x86_init.irqs.pre_vector_init = x86_init_noop;
|
||||
legacy_pic = &null_legacy_pic;
|
||||
}
|
||||
if (acpi_gbl_reduced_hardware)
|
||||
x86_init.acpi.reduced_hw_early_init();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
|
||||
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
|
||||
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
|
||||
|
@ -509,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
#define MSR_IA32_TME_ACTIVATE 0x982
|
||||
|
||||
/* Helpers to access TME_ACTIVATE MSR */
|
||||
#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
|
||||
#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
|
||||
|
||||
#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
|
||||
#define TME_ACTIVATE_POLICY_AES_XTS_128 0
|
||||
|
||||
#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
|
||||
|
||||
#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
|
||||
#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
|
||||
|
||||
/* Values for mktme_status (SW only construct) */
|
||||
#define MKTME_ENABLED 0
|
||||
#define MKTME_DISABLED 1
|
||||
#define MKTME_UNINITIALIZED 2
|
||||
static int mktme_status = MKTME_UNINITIALIZED;
|
||||
|
||||
static void detect_tme(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 tme_activate, tme_policy, tme_crypto_algs;
|
||||
int keyid_bits = 0, nr_keyids = 0;
|
||||
static u64 tme_activate_cpu0 = 0;
|
||||
|
||||
rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
|
||||
|
||||
if (mktme_status != MKTME_UNINITIALIZED) {
|
||||
if (tme_activate != tme_activate_cpu0) {
|
||||
/* Broken BIOS? */
|
||||
pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
|
||||
pr_err_once("x86/tme: MKTME is not usable\n");
|
||||
mktme_status = MKTME_DISABLED;
|
||||
|
||||
/* Proceed. We may need to exclude bits from x86_phys_bits. */
|
||||
}
|
||||
} else {
|
||||
tme_activate_cpu0 = tme_activate;
|
||||
}
|
||||
|
||||
if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
|
||||
pr_info_once("x86/tme: not enabled by BIOS\n");
|
||||
mktme_status = MKTME_DISABLED;
|
||||
return;
|
||||
}
|
||||
|
||||
if (mktme_status != MKTME_UNINITIALIZED)
|
||||
goto detect_keyid_bits;
|
||||
|
||||
pr_info("x86/tme: enabled by BIOS\n");
|
||||
|
||||
tme_policy = TME_ACTIVATE_POLICY(tme_activate);
|
||||
if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
|
||||
pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
|
||||
|
||||
tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
|
||||
if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
|
||||
pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
|
||||
tme_crypto_algs);
|
||||
mktme_status = MKTME_DISABLED;
|
||||
}
|
||||
detect_keyid_bits:
|
||||
keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
|
||||
nr_keyids = (1UL << keyid_bits) - 1;
|
||||
if (nr_keyids) {
|
||||
pr_info_once("x86/mktme: enabled by BIOS\n");
|
||||
pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
|
||||
} else {
|
||||
pr_info_once("x86/mktme: disabled by BIOS\n");
|
||||
}
|
||||
|
||||
if (mktme_status == MKTME_UNINITIALIZED) {
|
||||
/* MKTME is usable */
|
||||
mktme_status = MKTME_ENABLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* KeyID bits effectively lower the number of physical address
|
||||
* bits. Update cpuinfo_x86::x86_phys_bits accordingly.
|
||||
*/
|
||||
c->x86_phys_bits -= keyid_bits;
|
||||
}
|
||||
|
||||
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 epb;
|
||||
@ -679,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c)
|
||||
if (cpu_has(c, X86_FEATURE_VMX))
|
||||
detect_vmx_virtcap(c);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TME))
|
||||
detect_tme(c);
|
||||
|
||||
init_intel_energy_perf(c);
|
||||
|
||||
init_intel_misc_features(c);
|
||||
|
82
arch/x86/kernel/cpu/intel_pconfig.c
Normal file
82
arch/x86/kernel/cpu/intel_pconfig.c
Normal file
@ -0,0 +1,82 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Intel PCONFIG instruction support.
|
||||
*
|
||||
* Copyright (C) 2017 Intel Corporation
|
||||
*
|
||||
* Author:
|
||||
* Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/intel_pconfig.h>
|
||||
|
||||
#define PCONFIG_CPUID 0x1b
|
||||
|
||||
#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1)
|
||||
|
||||
/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */
|
||||
enum {
|
||||
PCONFIG_CPUID_SUBLEAF_INVALID = 0,
|
||||
PCONFIG_CPUID_SUBLEAF_TARGETID = 1,
|
||||
};
|
||||
|
||||
/* Bitmask of supported targets */
|
||||
static u64 targets_supported __read_mostly;
|
||||
|
||||
int pconfig_target_supported(enum pconfig_target target)
|
||||
{
|
||||
/*
|
||||
* We would need to re-think the implementation once we get > 64
|
||||
* PCONFIG targets. Spec allows up to 2^32 targets.
|
||||
*/
|
||||
BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64);
|
||||
|
||||
if (WARN_ON_ONCE(target >= 64))
|
||||
return 0;
|
||||
return targets_supported & (1ULL << target);
|
||||
}
|
||||
|
||||
static int __init intel_pconfig_init(void)
|
||||
{
|
||||
int subleaf;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_PCONFIG))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Scan subleafs of PCONFIG CPUID leaf.
|
||||
*
|
||||
* Subleafs of the same type need not to be consecutive.
|
||||
*
|
||||
* Stop on the first invalid subleaf type. All subleafs after the first
|
||||
* invalid are invalid too.
|
||||
*/
|
||||
for (subleaf = 0; subleaf < INT_MAX; subleaf++) {
|
||||
struct cpuid_regs regs;
|
||||
|
||||
cpuid_count(PCONFIG_CPUID, subleaf,
|
||||
®s.eax, ®s.ebx, ®s.ecx, ®s.edx);
|
||||
|
||||
switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) {
|
||||
case PCONFIG_CPUID_SUBLEAF_INVALID:
|
||||
/* Stop on the first invalid subleaf */
|
||||
goto out;
|
||||
case PCONFIG_CPUID_SUBLEAF_TARGETID:
|
||||
/* Mark supported PCONFIG targets */
|
||||
if (regs.ebx < 64)
|
||||
targets_supported |= (1ULL << regs.ebx);
|
||||
if (regs.ecx < 64)
|
||||
targets_supported |= (1ULL << regs.ecx);
|
||||
if (regs.edx < 64)
|
||||
targets_supported |= (1ULL << regs.edx);
|
||||
break;
|
||||
default:
|
||||
/* Unknown CPUID.PCONFIG subleaf: ignore */
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(intel_pconfig_init);
|
@ -1095,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn)
|
||||
* a legal address.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Build time check to see if we have a spare virtual bit. Don't want
|
||||
* to leave this until run time because most developers don't have a
|
||||
* system that can exercise this code path. This will only become a
|
||||
* problem if/when we move beyond 5-level page tables.
|
||||
*
|
||||
* Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
|
||||
*/
|
||||
#if PGDIR_SHIFT + 9 < 63
|
||||
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
|
||||
#else
|
||||
#error "no unused virtual bit available"
|
||||
#endif
|
||||
|
||||
if (set_memory_np(decoy_addr, 1))
|
||||
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
|
||||
@ -2357,6 +2345,12 @@ static __init int mcheck_init_device(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Check if we have a spare virtual bit. This will only become
|
||||
* a problem if/when we move beyond 5-level page tables.
|
||||
*/
|
||||
MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
|
||||
|
||||
if (!mce_available(&boot_cpu_data)) {
|
||||
err = -EIO;
|
||||
goto err_out;
|
||||
|
@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p)
|
||||
} else if (*p == '!') {
|
||||
start_at = memparse(p+1, &p);
|
||||
e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
|
||||
} else if (*p == '%') {
|
||||
enum e820_type from = 0, to = 0;
|
||||
|
||||
start_at = memparse(p + 1, &p);
|
||||
if (*p == '-')
|
||||
from = simple_strtoull(p + 1, &p, 0);
|
||||
if (*p == '+')
|
||||
to = simple_strtoull(p + 1, &p, 0);
|
||||
if (*p != '\0')
|
||||
return -EINVAL;
|
||||
if (from && to)
|
||||
e820__range_update(start_at, mem_size, from, to);
|
||||
else if (to)
|
||||
e820__range_add(start_at, mem_size, to);
|
||||
else if (from)
|
||||
e820__range_remove(start_at, mem_size, from, 1);
|
||||
else
|
||||
e820__range_remove(start_at, mem_size, 0, 0);
|
||||
} else {
|
||||
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
|
||||
}
|
||||
|
@ -32,6 +32,11 @@
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/kasan.h>
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#undef pgtable_l5_enabled
|
||||
#define pgtable_l5_enabled __pgtable_l5_enabled
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Manage page tables very early on.
|
||||
*/
|
||||
@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
|
||||
static unsigned int __initdata next_early_pgt;
|
||||
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
unsigned int __pgtable_l5_enabled __ro_after_init;
|
||||
EXPORT_SYMBOL(__pgtable_l5_enabled);
|
||||
unsigned int pgdir_shift __ro_after_init = 39;
|
||||
EXPORT_SYMBOL(pgdir_shift);
|
||||
unsigned int ptrs_per_p4d __ro_after_init = 1;
|
||||
EXPORT_SYMBOL(ptrs_per_p4d);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
|
||||
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
|
||||
EXPORT_SYMBOL(page_offset_base);
|
||||
unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
|
||||
EXPORT_SYMBOL(vmalloc_base);
|
||||
unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
|
||||
EXPORT_SYMBOL(vmemmap_base);
|
||||
#endif
|
||||
|
||||
#define __head __section(.head.text)
|
||||
|
||||
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
|
||||
@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
|
||||
return ptr - (void *)_text + (void *)physaddr;
|
||||
}
|
||||
|
||||
static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
|
||||
{
|
||||
return fixup_pointer(ptr, physaddr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
|
||||
{
|
||||
return fixup_pointer(ptr, physaddr);
|
||||
}
|
||||
|
||||
static bool __head check_la57_support(unsigned long physaddr)
|
||||
{
|
||||
if (native_cpuid_eax(0) < 7)
|
||||
return false;
|
||||
|
||||
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
|
||||
return false;
|
||||
|
||||
*fixup_int(&pgtable_l5_enabled, physaddr) = 1;
|
||||
*fixup_int(&pgdir_shift, physaddr) = 48;
|
||||
*fixup_int(&ptrs_per_p4d, physaddr) = 512;
|
||||
*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
|
||||
*fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
|
||||
*fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static bool __head check_la57_support(unsigned long physaddr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned long __head __startup_64(unsigned long physaddr,
|
||||
struct boot_params *bp)
|
||||
{
|
||||
@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
p4dval_t *p4d;
|
||||
pudval_t *pud;
|
||||
pmdval_t *pmd, pmd_entry;
|
||||
bool la57;
|
||||
int i;
|
||||
unsigned int *next_pgt_ptr;
|
||||
|
||||
la57 = check_la57_support(physaddr);
|
||||
|
||||
/* Is the address too large? */
|
||||
if (physaddr >> MAX_PHYSMEM_BITS)
|
||||
for (;;);
|
||||
@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
/* Fixup the physical addresses in the page table */
|
||||
|
||||
pgd = fixup_pointer(&early_top_pgt, physaddr);
|
||||
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
|
||||
p = pgd + pgd_index(__START_KERNEL_map);
|
||||
if (la57)
|
||||
*p = (unsigned long)level4_kernel_pgt;
|
||||
else
|
||||
*p = (unsigned long)level3_kernel_pgt;
|
||||
*p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
if (la57) {
|
||||
p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
|
||||
p4d[511] += load_delta;
|
||||
}
|
||||
@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
|
||||
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
if (la57) {
|
||||
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
||||
|
||||
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
||||
@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
* Fixup phys_base - remove the memory encryption mask to obtain
|
||||
* the true physical address.
|
||||
*/
|
||||
p = fixup_pointer(&phys_base, physaddr);
|
||||
*p += load_delta - sme_get_me_mask();
|
||||
*fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
|
||||
|
||||
/* Encrypt the kernel and related (if SME is active) */
|
||||
sme_encrypt_kernel(bp);
|
||||
@ -206,7 +271,7 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
|
||||
* critical -- __PAGE_OFFSET would point us back into the dynamic
|
||||
* range and we might end up looping forever...
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (!pgtable_l5_enabled)
|
||||
p4d_p = pgd_p;
|
||||
else if (pgd)
|
||||
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
||||
@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
|
||||
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
|
||||
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
|
||||
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
|
||||
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
|
||||
MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
|
||||
(__START_KERNEL & PGDIR_MASK)));
|
||||
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
||||
|
||||
|
@ -39,12 +39,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#define l4_index(x) (((x) >> 39) & 511)
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
|
||||
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
#endif
|
||||
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
|
||||
L4_START_KERNEL = l4_index(__START_KERNEL_map)
|
||||
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
.text
|
||||
@ -125,7 +125,10 @@ ENTRY(secondary_startup_64)
|
||||
/* Enable PAE mode, PGE and LA57 */
|
||||
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
testl $1, __pgtable_l5_enabled(%rip)
|
||||
jz 1f
|
||||
orl $X86_CR4_LA57, %ecx
|
||||
1:
|
||||
#endif
|
||||
movq %rcx, %cr4
|
||||
|
||||
@ -374,12 +377,7 @@ GLOBAL(name)
|
||||
|
||||
__INITDATA
|
||||
NEXT_PGD_PAGE(early_top_pgt)
|
||||
.fill 511,8,0
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
#else
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
#endif
|
||||
.fill 512,8,0
|
||||
.fill PTI_USER_PGD_FILL,8,0
|
||||
|
||||
NEXT_PAGE(early_dynamic_pgts)
|
||||
@ -390,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
NEXT_PGD_PAGE(init_top_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
||||
.org init_top_pgt + L4_PAGE_OFFSET*8, 0
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
.org init_top_pgt + PGD_START_KERNEL*8, 0
|
||||
.org init_top_pgt + L4_START_KERNEL*8, 0
|
||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
.fill PTI_USER_PGD_FILL,8,0
|
||||
|
@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void)
|
||||
{
|
||||
VMCOREINFO_NUMBER(phys_base);
|
||||
VMCOREINFO_SYMBOL(init_top_pgt);
|
||||
VMCOREINFO_NUMBER(pgtable_l5_enabled);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
VMCOREINFO_SYMBOL(node_data);
|
||||
|
@ -189,9 +189,7 @@ struct ist_info ist_info;
|
||||
#endif
|
||||
|
||||
#else
|
||||
struct cpuinfo_x86 boot_cpu_data __read_mostly = {
|
||||
.x86_phys_bits = MAX_PHYSMEM_BITS,
|
||||
};
|
||||
struct cpuinfo_x86 boot_cpu_data __read_mostly;
|
||||
EXPORT_SYMBOL(boot_cpu_data);
|
||||
#endif
|
||||
|
||||
@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
__flush_tlb_all();
|
||||
#else
|
||||
printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
||||
boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/pci_x86.h>
|
||||
@ -26,10 +27,11 @@
|
||||
|
||||
void x86_init_noop(void) { }
|
||||
void __init x86_init_uint_noop(unsigned int unused) { }
|
||||
int __init iommu_init_noop(void) { return 0; }
|
||||
void iommu_shutdown_noop(void) { }
|
||||
bool __init bool_x86_init_noop(void) { return false; }
|
||||
void x86_op_int_noop(int cpu) { }
|
||||
static int __init iommu_init_noop(void) { return 0; }
|
||||
static void iommu_shutdown_noop(void) { }
|
||||
static bool __init bool_x86_init_noop(void) { return false; }
|
||||
static void x86_op_int_noop(int cpu) { }
|
||||
static u64 u64_x86_init_noop(void) { return 0; }
|
||||
|
||||
/*
|
||||
* The platform setup functions are preset with the default functions
|
||||
@ -91,6 +93,11 @@ struct x86_init_ops x86_init __initdata = {
|
||||
.x2apic_available = bool_x86_init_noop,
|
||||
.init_mem_mapping = x86_init_noop,
|
||||
},
|
||||
|
||||
.acpi = {
|
||||
.get_root_pointer = u64_x86_init_noop,
|
||||
.reduced_hw_early_init = acpi_generic_reduced_hw_init,
|
||||
},
|
||||
};
|
||||
|
||||
struct x86_cpuinit_ops x86_cpuinit = {
|
||||
|
@ -1,12 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
|
||||
KCOV_INSTRUMENT_tlb.o := n
|
||||
KCOV_INSTRUMENT_mem_encrypt.o := n
|
||||
# Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c
|
||||
KCOV_INSTRUMENT_tlb.o := n
|
||||
KCOV_INSTRUMENT_mem_encrypt.o := n
|
||||
KCOV_INSTRUMENT_mem_encrypt_identity.o := n
|
||||
|
||||
KASAN_SANITIZE_mem_encrypt.o := n
|
||||
KASAN_SANITIZE_mem_encrypt.o := n
|
||||
KASAN_SANITIZE_mem_encrypt_identity.o := n
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_mem_encrypt.o = -pg
|
||||
CFLAGS_REMOVE_mem_encrypt.o = -pg
|
||||
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
|
||||
endif
|
||||
|
||||
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||
@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_physaddr.o := $(nostackp)
|
||||
CFLAGS_setup_nx.o := $(nostackp)
|
||||
CFLAGS_mem_encrypt_identity.o := $(nostackp)
|
||||
|
||||
CFLAGS_fault.o := -I$(src)/../include/asm/trace
|
||||
|
||||
@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
|
||||
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
|
||||
|
||||
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
|
||||
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
|
||||
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
|
||||
|
@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = {
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
|
||||
extern pgd_t *efi_pgd;
|
||||
static struct dentry *pe_efi;
|
||||
|
||||
static int ptdump_show_efi(struct seq_file *m, void *v)
|
||||
{
|
||||
if (efi_pgd)
|
||||
ptdump_walk_pgd_level_debugfs(m, efi_pgd, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptdump_open_efi(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, ptdump_show_efi, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations ptdump_efi_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ptdump_open_efi,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct dentry *dir, *pe_knl, *pe_curknl;
|
||||
|
||||
static int __init pt_dump_debug_init(void)
|
||||
@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void)
|
||||
if (!pe_curusr)
|
||||
goto err;
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
|
||||
pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
|
||||
if (!pe_efi)
|
||||
goto err;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
err:
|
||||
debugfs_remove_recursive(dir);
|
||||
|
@ -29,6 +29,7 @@
|
||||
struct pg_state {
|
||||
int level;
|
||||
pgprot_t current_prot;
|
||||
pgprotval_t effective_prot;
|
||||
unsigned long start_address;
|
||||
unsigned long current_address;
|
||||
const struct addr_marker *marker;
|
||||
@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = {
|
||||
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
|
||||
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
|
||||
#ifdef CONFIG_KASAN
|
||||
[KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
|
||||
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
|
||||
/*
|
||||
* These fields get initialized with the (dynamic)
|
||||
* KASAN_SHADOW_{START,END} values in pt_dump_init().
|
||||
*/
|
||||
[KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
|
||||
[KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
|
||||
#endif
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
[LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
|
||||
[LDT_NR] = { 0UL, "LDT remap" },
|
||||
#endif
|
||||
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u)
|
||||
* print what we collected so far.
|
||||
*/
|
||||
static void note_page(struct seq_file *m, struct pg_state *st,
|
||||
pgprot_t new_prot, int level)
|
||||
pgprot_t new_prot, pgprotval_t new_eff, int level)
|
||||
{
|
||||
pgprotval_t prot, cur;
|
||||
pgprotval_t prot, cur, eff;
|
||||
static const char units[] = "BKMGTPE";
|
||||
|
||||
/*
|
||||
@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st,
|
||||
*/
|
||||
prot = pgprot_val(new_prot);
|
||||
cur = pgprot_val(st->current_prot);
|
||||
eff = st->effective_prot;
|
||||
|
||||
if (!st->level) {
|
||||
/* First entry */
|
||||
st->current_prot = new_prot;
|
||||
st->effective_prot = new_eff;
|
||||
st->level = level;
|
||||
st->marker = address_markers;
|
||||
st->lines = 0;
|
||||
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
|
||||
st->marker->name);
|
||||
} else if (prot != cur || level != st->level ||
|
||||
} else if (prot != cur || new_eff != eff || level != st->level ||
|
||||
st->current_address >= st->marker[1].start_address) {
|
||||
const char *unit = units;
|
||||
unsigned long delta;
|
||||
int width = sizeof(unsigned long) * 2;
|
||||
pgprotval_t pr = pgprot_val(st->current_prot);
|
||||
|
||||
if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
|
||||
if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) {
|
||||
WARN_ONCE(1,
|
||||
"x86/mm: Found insecure W+X mapping at address %p/%pS\n",
|
||||
(void *)st->start_address,
|
||||
@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st,
|
||||
|
||||
st->start_address = st->current_address;
|
||||
st->current_prot = new_prot;
|
||||
st->effective_prot = new_eff;
|
||||
st->level = level;
|
||||
}
|
||||
}
|
||||
|
||||
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
|
||||
static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
|
||||
{
|
||||
return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
|
||||
((prot1 | prot2) & _PAGE_NX);
|
||||
}
|
||||
|
||||
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
||||
pgprotval_t eff_in, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pte_t *start;
|
||||
pgprotval_t prot;
|
||||
pgprotval_t prot, eff;
|
||||
|
||||
start = (pte_t *)pmd_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
prot = pte_flags(*start);
|
||||
eff = effective_prot(eff_in, prot);
|
||||
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
|
||||
note_page(m, st, __pgprot(prot), 5);
|
||||
note_page(m, st, __pgprot(prot), eff, 5);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
|
||||
void *pt)
|
||||
{
|
||||
if (__pa(pt) == __pa(kasan_zero_pmd) ||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
__pa(pt) == __pa(kasan_zero_p4d) ||
|
||||
#endif
|
||||
(pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
|
||||
__pa(pt) == __pa(kasan_zero_pud)) {
|
||||
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
|
||||
note_page(m, st, __pgprot(prot), 5);
|
||||
note_page(m, st, __pgprot(prot), 0, 5);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
|
||||
|
||||
#if PTRS_PER_PMD > 1
|
||||
|
||||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
|
||||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
|
||||
pgprotval_t eff_in, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pmd_t *start, *pmd_start;
|
||||
pgprotval_t prot;
|
||||
pgprotval_t prot, eff;
|
||||
|
||||
pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
|
||||
if (!pmd_none(*start)) {
|
||||
prot = pmd_flags(*start);
|
||||
eff = effective_prot(eff_in, prot);
|
||||
if (pmd_large(*start) || !pmd_present(*start)) {
|
||||
prot = pmd_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 4);
|
||||
note_page(m, st, __pgprot(prot), eff, 4);
|
||||
} else if (!kasan_page_table(m, st, pmd_start)) {
|
||||
walk_pte_level(m, st, *start,
|
||||
walk_pte_level(m, st, *start, eff,
|
||||
P + i * PMD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 4);
|
||||
note_page(m, st, __pgprot(0), 0, 4);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
|
||||
#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p)
|
||||
#define pud_large(a) pmd_large(__pmd(pud_val(a)))
|
||||
#define pud_none(a) pmd_none(__pmd(pud_val(a)))
|
||||
#endif
|
||||
|
||||
#if PTRS_PER_PUD > 1
|
||||
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
|
||||
pgprotval_t eff_in, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pud_t *start, *pud_start;
|
||||
pgprotval_t prot;
|
||||
pgprotval_t prot, eff;
|
||||
pud_t *prev_pud = NULL;
|
||||
|
||||
pud_start = start = (pud_t *)p4d_page_vaddr(addr);
|
||||
@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
|
||||
if (!pud_none(*start)) {
|
||||
prot = pud_flags(*start);
|
||||
eff = effective_prot(eff_in, prot);
|
||||
if (pud_large(*start) || !pud_present(*start)) {
|
||||
prot = pud_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 3);
|
||||
note_page(m, st, __pgprot(prot), eff, 3);
|
||||
} else if (!kasan_page_table(m, st, pud_start)) {
|
||||
walk_pmd_level(m, st, *start,
|
||||
walk_pmd_level(m, st, *start, eff,
|
||||
P + i * PUD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 3);
|
||||
note_page(m, st, __pgprot(0), 0, 3);
|
||||
|
||||
prev_pud = start;
|
||||
start++;
|
||||
@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
|
||||
}
|
||||
|
||||
#else
|
||||
#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
|
||||
#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p)
|
||||
#define p4d_large(a) pud_large(__pud(p4d_val(a)))
|
||||
#define p4d_none(a) pud_none(__pud(p4d_val(a)))
|
||||
#endif
|
||||
|
||||
#if PTRS_PER_P4D > 1
|
||||
|
||||
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
|
||||
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
pgprotval_t eff_in, unsigned long P)
|
||||
{
|
||||
int i;
|
||||
p4d_t *start, *p4d_start;
|
||||
pgprotval_t prot;
|
||||
pgprotval_t prot, eff;
|
||||
|
||||
if (PTRS_PER_P4D == 1)
|
||||
return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P);
|
||||
|
||||
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
|
||||
|
||||
for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
|
||||
if (!p4d_none(*start)) {
|
||||
prot = p4d_flags(*start);
|
||||
eff = effective_prot(eff_in, prot);
|
||||
if (p4d_large(*start) || !p4d_present(*start)) {
|
||||
prot = p4d_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 2);
|
||||
note_page(m, st, __pgprot(prot), eff, 2);
|
||||
} else if (!kasan_page_table(m, st, p4d_start)) {
|
||||
walk_pud_level(m, st, *start,
|
||||
walk_pud_level(m, st, *start, eff,
|
||||
P + i * P4D_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 2);
|
||||
note_page(m, st, __pgprot(0), 0, 2);
|
||||
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
|
||||
#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
|
||||
#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
|
||||
#endif
|
||||
#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
|
||||
#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
|
||||
|
||||
static inline bool is_hypervisor_range(int idx)
|
||||
{
|
||||
@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
#else
|
||||
pgd_t *start = swapper_pg_dir;
|
||||
#endif
|
||||
pgprotval_t prot;
|
||||
pgprotval_t prot, eff;
|
||||
int i;
|
||||
struct pg_state st = {};
|
||||
|
||||
@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
for (i = 0; i < PTRS_PER_PGD; i++) {
|
||||
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
|
||||
if (!pgd_none(*start) && !is_hypervisor_range(i)) {
|
||||
prot = pgd_flags(*start);
|
||||
#ifdef CONFIG_X86_PAE
|
||||
eff = _PAGE_USER | _PAGE_RW;
|
||||
#else
|
||||
eff = prot;
|
||||
#endif
|
||||
if (pgd_large(*start) || !pgd_present(*start)) {
|
||||
prot = pgd_flags(*start);
|
||||
note_page(m, &st, __pgprot(prot), 1);
|
||||
note_page(m, &st, __pgprot(prot), eff, 1);
|
||||
} else {
|
||||
walk_p4d_level(m, &st, *start,
|
||||
walk_p4d_level(m, &st, *start, eff,
|
||||
i * PGD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, &st, __pgprot(0), 1);
|
||||
note_page(m, &st, __pgprot(0), 0, 1);
|
||||
|
||||
cond_resched();
|
||||
start++;
|
||||
@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
|
||||
/* Flush out the last page */
|
||||
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
|
||||
note_page(m, &st, __pgprot(0), 0);
|
||||
note_page(m, &st, __pgprot(0), 0, 0);
|
||||
if (!checkwx)
|
||||
return;
|
||||
if (st.wx_pages)
|
||||
@ -570,6 +592,13 @@ static int __init pt_dump_init(void)
|
||||
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
|
||||
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
|
||||
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
|
||||
#endif
|
||||
#ifdef CONFIG_KASAN
|
||||
address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
|
||||
address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
|
||||
|
@ -417,11 +417,11 @@ void vmalloc_sync_all(void)
|
||||
*/
|
||||
static noinline int vmalloc_fault(unsigned long address)
|
||||
{
|
||||
pgd_t *pgd, *pgd_ref;
|
||||
p4d_t *p4d, *p4d_ref;
|
||||
pud_t *pud, *pud_ref;
|
||||
pmd_t *pmd, *pmd_ref;
|
||||
pte_t *pte, *pte_ref;
|
||||
pgd_t *pgd, *pgd_k;
|
||||
p4d_t *p4d, *p4d_k;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
/* Make sure we are in vmalloc area: */
|
||||
if (!(address >= VMALLOC_START && address < VMALLOC_END))
|
||||
@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address)
|
||||
* case just flush:
|
||||
*/
|
||||
pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
|
||||
pgd_ref = pgd_offset_k(address);
|
||||
if (pgd_none(*pgd_ref))
|
||||
pgd_k = pgd_offset_k(address);
|
||||
if (pgd_none(*pgd_k))
|
||||
return -1;
|
||||
|
||||
if (CONFIG_PGTABLE_LEVELS > 4) {
|
||||
if (pgtable_l5_enabled) {
|
||||
if (pgd_none(*pgd)) {
|
||||
set_pgd(pgd, *pgd_ref);
|
||||
set_pgd(pgd, *pgd_k);
|
||||
arch_flush_lazy_mmu_mode();
|
||||
} else {
|
||||
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
||||
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
|
||||
}
|
||||
}
|
||||
|
||||
/* With 4-level paging, copying happens on the p4d level. */
|
||||
p4d = p4d_offset(pgd, address);
|
||||
p4d_ref = p4d_offset(pgd_ref, address);
|
||||
if (p4d_none(*p4d_ref))
|
||||
p4d_k = p4d_offset(pgd_k, address);
|
||||
if (p4d_none(*p4d_k))
|
||||
return -1;
|
||||
|
||||
if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) {
|
||||
set_p4d(p4d, *p4d_ref);
|
||||
if (p4d_none(*p4d) && !pgtable_l5_enabled) {
|
||||
set_p4d(p4d, *p4d_k);
|
||||
arch_flush_lazy_mmu_mode();
|
||||
} else {
|
||||
BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
|
||||
BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
|
||||
}
|
||||
|
||||
/*
|
||||
* Below here mismatches are bugs because these lower tables
|
||||
* are shared:
|
||||
*/
|
||||
BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
pud_ref = pud_offset(p4d_ref, address);
|
||||
if (pud_none(*pud_ref))
|
||||
if (pud_none(*pud))
|
||||
return -1;
|
||||
|
||||
if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
|
||||
BUG();
|
||||
|
||||
if (pud_large(*pud))
|
||||
return 0;
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
pmd_ref = pmd_offset(pud_ref, address);
|
||||
if (pmd_none(*pmd_ref))
|
||||
if (pmd_none(*pmd))
|
||||
return -1;
|
||||
|
||||
if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
|
||||
BUG();
|
||||
|
||||
if (pmd_large(*pmd))
|
||||
return 0;
|
||||
|
||||
pte_ref = pte_offset_kernel(pmd_ref, address);
|
||||
if (!pte_present(*pte_ref))
|
||||
return -1;
|
||||
|
||||
pte = pte_offset_kernel(pmd, address);
|
||||
|
||||
/*
|
||||
* Don't use pte_page here, because the mappings can point
|
||||
* outside mem_map, and the NUMA hash lookup cannot handle
|
||||
* that:
|
||||
*/
|
||||
if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
|
||||
BUG();
|
||||
if (!pte_present(*pte))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
||||
result = ident_p4d_init(info, p4d, addr, next);
|
||||
if (result)
|
||||
return result;
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
if (pgtable_l5_enabled) {
|
||||
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
|
||||
} else {
|
||||
/*
|
||||
|
@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str)
|
||||
}
|
||||
__setup("noexec32=", nonx32_setup);
|
||||
|
||||
/*
|
||||
* When memory was added make sure all the processes MM have
|
||||
* suitable PGD entries in the local PGD level page.
|
||||
*/
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
static void sync_global_pgds_l5(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
spin_unlock(&pgd_lock);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
|
||||
static void sync_global_pgds_l4(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
* With folded p4d, pgd_none() is always false, we need to
|
||||
* handle synchonization on p4d level.
|
||||
*/
|
||||
BUILD_BUG_ON(pgd_none(*pgd_ref));
|
||||
MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
|
||||
p4d_ref = p4d_offset(pgd_ref, addr);
|
||||
|
||||
if (p4d_none(*p4d_ref))
|
||||
@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
spin_unlock(&pgd_lock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* When memory was added make sure all the processes MM have
|
||||
* suitable PGD entries in the local PGD level page.
|
||||
*/
|
||||
void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
{
|
||||
if (pgtable_l5_enabled)
|
||||
sync_global_pgds_l5(start, end);
|
||||
else
|
||||
sync_global_pgds_l4(start, end);
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: This function is marked __ref because it calls __init function
|
||||
@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
|
||||
unsigned long vaddr = (unsigned long)__va(paddr);
|
||||
int i = p4d_index(vaddr);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (!pgtable_l5_enabled)
|
||||
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
|
||||
|
||||
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
|
||||
@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
page_size_mask);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (pgtable_l5_enabled)
|
||||
pgd_populate(&init_mm, pgd, p4d);
|
||||
else
|
||||
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
|
||||
@ -1089,7 +1095,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
|
||||
* 5-level case we should free them. This code will have to change
|
||||
* to adapt for boot-time switching between 4 and 5 level page tables.
|
||||
*/
|
||||
if (CONFIG_PGTABLE_LEVELS == 5)
|
||||
if (pgtable_l5_enabled)
|
||||
free_pud_table(pud_base, p4d);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,12 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define DISABLE_BRANCH_PROFILING
|
||||
#define pr_fmt(fmt) "kasan: " fmt
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/* Too early to use cpu_feature_enabled() */
|
||||
#define pgtable_l5_enabled __pgtable_l5_enabled
|
||||
#endif
|
||||
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kdebug.h>
|
||||
@ -19,7 +25,7 @@
|
||||
|
||||
extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||
|
||||
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||
static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||
|
||||
static __init void *early_alloc(size_t size, int nid, bool panic)
|
||||
{
|
||||
@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start,
|
||||
* With folded p4d, pgd_clear() is nop, use p4d_clear()
|
||||
* instead.
|
||||
*/
|
||||
if (CONFIG_PGTABLE_LEVELS < 5)
|
||||
p4d_clear(p4d_offset(pgd, start));
|
||||
else
|
||||
if (pgtable_l5_enabled)
|
||||
pgd_clear(pgd);
|
||||
else
|
||||
p4d_clear(p4d_offset(pgd, start));
|
||||
}
|
||||
|
||||
pgd = pgd_offset_k(start);
|
||||
@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
|
||||
{
|
||||
unsigned long p4d;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (!pgtable_l5_enabled)
|
||||
return (p4d_t *)pgd;
|
||||
|
||||
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
|
||||
@ -272,7 +278,7 @@ void __init kasan_early_init(void)
|
||||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
kasan_zero_pud[i] = __pud(pud_val);
|
||||
|
||||
for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
|
||||
for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
|
||||
kasan_zero_p4d[i] = __p4d(p4d_val);
|
||||
|
||||
kasan_map_early_shadow(early_top_pgt);
|
||||
@ -303,7 +309,7 @@ void __init kasan_init(void)
|
||||
* bunch of things like kernel code, modules, EFI mapping, etc.
|
||||
* We need to take extra steps to not overwrite them.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
if (pgtable_l5_enabled) {
|
||||
void *ptr;
|
||||
|
||||
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
|
||||
|
@ -34,23 +34,12 @@
|
||||
#define TB_SHIFT 40
|
||||
|
||||
/*
|
||||
* Virtual address start and end range for randomization.
|
||||
*
|
||||
* The end address could depend on more configuration options to make the
|
||||
* highest amount of space for randomization available, but that's too hard
|
||||
* to keep straight and caused issues already.
|
||||
*/
|
||||
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
|
||||
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
|
||||
|
||||
/* Default values */
|
||||
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
|
||||
EXPORT_SYMBOL(page_offset_base);
|
||||
unsigned long vmalloc_base = __VMALLOC_BASE;
|
||||
EXPORT_SYMBOL(vmalloc_base);
|
||||
unsigned long vmemmap_base = __VMEMMAP_BASE;
|
||||
EXPORT_SYMBOL(vmemmap_base);
|
||||
|
||||
/*
|
||||
* Memory regions randomized by KASLR (except modules that use a separate logic
|
||||
* earlier during boot). The list is ordered based on virtual addresses. This
|
||||
@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region {
|
||||
unsigned long *base;
|
||||
unsigned long size_tb;
|
||||
} kaslr_regions[] = {
|
||||
{ &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
|
||||
{ &vmalloc_base, VMALLOC_SIZE_TB },
|
||||
{ &page_offset_base, 0 },
|
||||
{ &vmalloc_base, 0 },
|
||||
{ &vmemmap_base, 1 },
|
||||
};
|
||||
|
||||
@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void)
|
||||
void __init kernel_randomize_memory(void)
|
||||
{
|
||||
size_t i;
|
||||
unsigned long vaddr = vaddr_start;
|
||||
unsigned long vaddr_start, vaddr;
|
||||
unsigned long rand, memory_tb;
|
||||
struct rnd_state rand_state;
|
||||
unsigned long remain_entropy;
|
||||
|
||||
vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
|
||||
vaddr = vaddr_start;
|
||||
|
||||
/*
|
||||
* These BUILD_BUG_ON checks ensure the memory layout is consistent
|
||||
* with the vaddr_start/vaddr_end variables. These checks are very
|
||||
@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void)
|
||||
if (!kaslr_memory_enabled())
|
||||
return;
|
||||
|
||||
kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
|
||||
kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
|
||||
|
||||
/*
|
||||
* Update Physical memory mapping to available and
|
||||
* add padding if needed (especially for memory hotplug support).
|
||||
@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void)
|
||||
*/
|
||||
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
|
||||
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (pgtable_l5_enabled)
|
||||
entropy = (rand % (entropy + 1)) & P4D_MASK;
|
||||
else
|
||||
entropy = (rand % (entropy + 1)) & PUD_MASK;
|
||||
@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void)
|
||||
* randomization alignment.
|
||||
*/
|
||||
vaddr += get_padding(&kaslr_regions[i]);
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (pgtable_l5_enabled)
|
||||
vaddr = round_up(vaddr + 1, P4D_SIZE);
|
||||
else
|
||||
vaddr = round_up(vaddr + 1, PUD_SIZE);
|
||||
@ -217,7 +212,7 @@ void __meminit init_trampoline(void)
|
||||
return;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
if (pgtable_l5_enabled)
|
||||
init_trampoline_p4d();
|
||||
else
|
||||
init_trampoline_pud();
|
||||
|
@ -25,17 +25,12 @@
|
||||
#include <asm/bootparam.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/cmdline.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
|
||||
static char sme_cmdline_on[] __initdata = "on";
|
||||
static char sme_cmdline_off[] __initdata = "off";
|
||||
|
||||
/*
|
||||
* Since SME related variables are set early in the boot process they must
|
||||
* reside in the .data section so as not to be zeroed out when the .bss
|
||||
@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask);
|
||||
DEFINE_STATIC_KEY_FALSE(sev_enable_key);
|
||||
EXPORT_SYMBOL_GPL(sev_enable_key);
|
||||
|
||||
static bool sev_enabled __section(.data);
|
||||
bool sev_enabled __section(.data);
|
||||
|
||||
/* Buffer used for early in-place encryption by BSP, no locking needed */
|
||||
static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
|
||||
@ -463,574 +458,3 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
|
||||
/* Make the SWIOTLB buffer area decrypted */
|
||||
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
struct sme_populate_pgd_data {
|
||||
void *pgtable_area;
|
||||
pgd_t *pgd;
|
||||
|
||||
pmdval_t pmd_flags;
|
||||
pteval_t pte_flags;
|
||||
unsigned long paddr;
|
||||
|
||||
unsigned long vaddr;
|
||||
unsigned long vaddr_end;
|
||||
};
|
||||
|
||||
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
unsigned long pgd_start, pgd_end, pgd_size;
|
||||
pgd_t *pgd_p;
|
||||
|
||||
pgd_start = ppd->vaddr & PGDIR_MASK;
|
||||
pgd_end = ppd->vaddr_end & PGDIR_MASK;
|
||||
|
||||
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
|
||||
|
||||
pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
|
||||
|
||||
memset(pgd_p, 0, pgd_size);
|
||||
}
|
||||
|
||||
#define PGD_FLAGS _KERNPG_TABLE_NOENC
|
||||
#define P4D_FLAGS _KERNPG_TABLE_NOENC
|
||||
#define PUD_FLAGS _KERNPG_TABLE_NOENC
|
||||
#define PMD_FLAGS _KERNPG_TABLE_NOENC
|
||||
|
||||
#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
|
||||
|
||||
#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
|
||||
#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
|
||||
(_PAGE_PAT | _PAGE_PWT))
|
||||
|
||||
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
|
||||
|
||||
#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
|
||||
|
||||
#define PTE_FLAGS_DEC PTE_FLAGS
|
||||
#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
|
||||
(_PAGE_PAT | _PAGE_PWT))
|
||||
|
||||
#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
|
||||
|
||||
static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
pgd_t *pgd_p;
|
||||
p4d_t *p4d_p;
|
||||
pud_t *pud_p;
|
||||
pmd_t *pmd_p;
|
||||
|
||||
pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
|
||||
if (native_pgd_val(*pgd_p)) {
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
|
||||
else
|
||||
pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
|
||||
} else {
|
||||
pgd_t pgd;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
p4d_p = ppd->pgtable_area;
|
||||
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
|
||||
ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
|
||||
|
||||
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
|
||||
} else {
|
||||
pud_p = ppd->pgtable_area;
|
||||
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
||||
ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
|
||||
|
||||
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
|
||||
}
|
||||
native_set_pgd(pgd_p, pgd);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
p4d_p += p4d_index(ppd->vaddr);
|
||||
if (native_p4d_val(*p4d_p)) {
|
||||
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
|
||||
} else {
|
||||
p4d_t p4d;
|
||||
|
||||
pud_p = ppd->pgtable_area;
|
||||
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
|
||||
ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
|
||||
|
||||
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
|
||||
native_set_p4d(p4d_p, p4d);
|
||||
}
|
||||
}
|
||||
|
||||
pud_p += pud_index(ppd->vaddr);
|
||||
if (native_pud_val(*pud_p)) {
|
||||
if (native_pud_val(*pud_p) & _PAGE_PSE)
|
||||
return NULL;
|
||||
|
||||
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
|
||||
} else {
|
||||
pud_t pud;
|
||||
|
||||
pmd_p = ppd->pgtable_area;
|
||||
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
|
||||
ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
|
||||
|
||||
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
|
||||
native_set_pud(pud_p, pud);
|
||||
}
|
||||
|
||||
return pmd_p;
|
||||
}
|
||||
|
||||
static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
pmd_t *pmd_p;
|
||||
|
||||
pmd_p = sme_prepare_pgd(ppd);
|
||||
if (!pmd_p)
|
||||
return;
|
||||
|
||||
pmd_p += pmd_index(ppd->vaddr);
|
||||
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
|
||||
native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
|
||||
}
|
||||
|
||||
static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
pmd_t *pmd_p;
|
||||
pte_t *pte_p;
|
||||
|
||||
pmd_p = sme_prepare_pgd(ppd);
|
||||
if (!pmd_p)
|
||||
return;
|
||||
|
||||
pmd_p += pmd_index(ppd->vaddr);
|
||||
if (native_pmd_val(*pmd_p)) {
|
||||
if (native_pmd_val(*pmd_p) & _PAGE_PSE)
|
||||
return;
|
||||
|
||||
pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
|
||||
} else {
|
||||
pmd_t pmd;
|
||||
|
||||
pte_p = ppd->pgtable_area;
|
||||
memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
|
||||
ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
|
||||
|
||||
pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
|
||||
native_set_pmd(pmd_p, pmd);
|
||||
}
|
||||
|
||||
pte_p += pte_index(ppd->vaddr);
|
||||
if (!native_pte_val(*pte_p))
|
||||
native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
|
||||
}
|
||||
|
||||
static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
while (ppd->vaddr < ppd->vaddr_end) {
|
||||
sme_populate_pgd_large(ppd);
|
||||
|
||||
ppd->vaddr += PMD_PAGE_SIZE;
|
||||
ppd->paddr += PMD_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
while (ppd->vaddr < ppd->vaddr_end) {
|
||||
sme_populate_pgd(ppd);
|
||||
|
||||
ppd->vaddr += PAGE_SIZE;
|
||||
ppd->paddr += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
|
||||
pmdval_t pmd_flags, pteval_t pte_flags)
|
||||
{
|
||||
unsigned long vaddr_end;
|
||||
|
||||
ppd->pmd_flags = pmd_flags;
|
||||
ppd->pte_flags = pte_flags;
|
||||
|
||||
/* Save original end value since we modify the struct value */
|
||||
vaddr_end = ppd->vaddr_end;
|
||||
|
||||
/* If start is not 2MB aligned, create PTE entries */
|
||||
ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
|
||||
__sme_map_range_pte(ppd);
|
||||
|
||||
/* Create PMD entries */
|
||||
ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
|
||||
__sme_map_range_pmd(ppd);
|
||||
|
||||
/* If end is not 2MB aligned, create PTE entries */
|
||||
ppd->vaddr_end = vaddr_end;
|
||||
__sme_map_range_pte(ppd);
|
||||
}
|
||||
|
||||
static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
|
||||
}
|
||||
|
||||
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
|
||||
}
|
||||
|
||||
static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
|
||||
}
|
||||
|
||||
static unsigned long __init sme_pgtable_calc(unsigned long len)
|
||||
{
|
||||
unsigned long p4d_size, pud_size, pmd_size, pte_size;
|
||||
unsigned long total;
|
||||
|
||||
/*
|
||||
* Perform a relatively simplistic calculation of the pagetable
|
||||
* entries that are needed. Those mappings will be covered mostly
|
||||
* by 2MB PMD entries so we can conservatively calculate the required
|
||||
* number of P4D, PUD and PMD structures needed to perform the
|
||||
* mappings. For mappings that are not 2MB aligned, PTE mappings
|
||||
* would be needed for the start and end portion of the address range
|
||||
* that fall outside of the 2MB alignment. This results in, at most,
|
||||
* two extra pages to hold PTE entries for each range that is mapped.
|
||||
* Incrementing the count for each covers the case where the addresses
|
||||
* cross entries.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
|
||||
p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
|
||||
pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
|
||||
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
|
||||
} else {
|
||||
p4d_size = 0;
|
||||
pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
|
||||
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
|
||||
}
|
||||
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
|
||||
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
|
||||
pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
|
||||
|
||||
total = p4d_size + pud_size + pmd_size + pte_size;
|
||||
|
||||
/*
|
||||
* Now calculate the added pagetable structures needed to populate
|
||||
* the new pagetables.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
|
||||
p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
|
||||
pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
|
||||
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
|
||||
} else {
|
||||
p4d_size = 0;
|
||||
pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
|
||||
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
|
||||
}
|
||||
pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
|
||||
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
|
||||
|
||||
total += p4d_size + pud_size + pmd_size;
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
|
||||
{
|
||||
unsigned long workarea_start, workarea_end, workarea_len;
|
||||
unsigned long execute_start, execute_end, execute_len;
|
||||
unsigned long kernel_start, kernel_end, kernel_len;
|
||||
unsigned long initrd_start, initrd_end, initrd_len;
|
||||
struct sme_populate_pgd_data ppd;
|
||||
unsigned long pgtable_area_len;
|
||||
unsigned long decrypted_base;
|
||||
|
||||
if (!sme_active())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Prepare for encrypting the kernel and initrd by building new
|
||||
* pagetables with the necessary attributes needed to encrypt the
|
||||
* kernel in place.
|
||||
*
|
||||
* One range of virtual addresses will map the memory occupied
|
||||
* by the kernel and initrd as encrypted.
|
||||
*
|
||||
* Another range of virtual addresses will map the memory occupied
|
||||
* by the kernel and initrd as decrypted and write-protected.
|
||||
*
|
||||
* The use of write-protect attribute will prevent any of the
|
||||
* memory from being cached.
|
||||
*/
|
||||
|
||||
/* Physical addresses gives us the identity mapped virtual addresses */
|
||||
kernel_start = __pa_symbol(_text);
|
||||
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
|
||||
kernel_len = kernel_end - kernel_start;
|
||||
|
||||
initrd_start = 0;
|
||||
initrd_end = 0;
|
||||
initrd_len = 0;
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
initrd_len = (unsigned long)bp->hdr.ramdisk_size |
|
||||
((unsigned long)bp->ext_ramdisk_size << 32);
|
||||
if (initrd_len) {
|
||||
initrd_start = (unsigned long)bp->hdr.ramdisk_image |
|
||||
((unsigned long)bp->ext_ramdisk_image << 32);
|
||||
initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
|
||||
initrd_len = initrd_end - initrd_start;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set the encryption workarea to be immediately after the kernel */
|
||||
workarea_start = kernel_end;
|
||||
|
||||
/*
|
||||
* Calculate required number of workarea bytes needed:
|
||||
* executable encryption area size:
|
||||
* stack page (PAGE_SIZE)
|
||||
* encryption routine page (PAGE_SIZE)
|
||||
* intermediate copy buffer (PMD_PAGE_SIZE)
|
||||
* pagetable structures for the encryption of the kernel
|
||||
* pagetable structures for workarea (in case not currently mapped)
|
||||
*/
|
||||
execute_start = workarea_start;
|
||||
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
|
||||
execute_len = execute_end - execute_start;
|
||||
|
||||
/*
|
||||
* One PGD for both encrypted and decrypted mappings and a set of
|
||||
* PUDs and PMDs for each of the encrypted and decrypted mappings.
|
||||
*/
|
||||
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
|
||||
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
|
||||
if (initrd_len)
|
||||
pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
|
||||
|
||||
/* PUDs and PMDs needed in the current pagetables for the workarea */
|
||||
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
|
||||
|
||||
/*
|
||||
* The total workarea includes the executable encryption area and
|
||||
* the pagetable area. The start of the workarea is already 2MB
|
||||
* aligned, align the end of the workarea on a 2MB boundary so that
|
||||
* we don't try to create/allocate PTE entries from the workarea
|
||||
* before it is mapped.
|
||||
*/
|
||||
workarea_len = execute_len + pgtable_area_len;
|
||||
workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Set the address to the start of where newly created pagetable
|
||||
* structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
|
||||
* structures are created when the workarea is added to the current
|
||||
* pagetables and when the new encrypted and decrypted kernel
|
||||
* mappings are populated.
|
||||
*/
|
||||
ppd.pgtable_area = (void *)execute_end;
|
||||
|
||||
/*
|
||||
* Make sure the current pagetable structure has entries for
|
||||
* addressing the workarea.
|
||||
*/
|
||||
ppd.pgd = (pgd_t *)native_read_cr3_pa();
|
||||
ppd.paddr = workarea_start;
|
||||
ppd.vaddr = workarea_start;
|
||||
ppd.vaddr_end = workarea_end;
|
||||
sme_map_range_decrypted(&ppd);
|
||||
|
||||
/* Flush the TLB - no globals so cr3 is enough */
|
||||
native_write_cr3(__native_read_cr3());
|
||||
|
||||
/*
|
||||
* A new pagetable structure is being built to allow for the kernel
|
||||
* and initrd to be encrypted. It starts with an empty PGD that will
|
||||
* then be populated with new PUDs and PMDs as the encrypted and
|
||||
* decrypted kernel mappings are created.
|
||||
*/
|
||||
ppd.pgd = ppd.pgtable_area;
|
||||
memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
|
||||
ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
|
||||
|
||||
/*
|
||||
* A different PGD index/entry must be used to get different
|
||||
* pagetable entries for the decrypted mapping. Choose the next
|
||||
* PGD index and convert it to a virtual address to be used as
|
||||
* the base of the mapping.
|
||||
*/
|
||||
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
|
||||
if (initrd_len) {
|
||||
unsigned long check_base;
|
||||
|
||||
check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
|
||||
decrypted_base = max(decrypted_base, check_base);
|
||||
}
|
||||
decrypted_base <<= PGDIR_SHIFT;
|
||||
|
||||
/* Add encrypted kernel (identity) mappings */
|
||||
ppd.paddr = kernel_start;
|
||||
ppd.vaddr = kernel_start;
|
||||
ppd.vaddr_end = kernel_end;
|
||||
sme_map_range_encrypted(&ppd);
|
||||
|
||||
/* Add decrypted, write-protected kernel (non-identity) mappings */
|
||||
ppd.paddr = kernel_start;
|
||||
ppd.vaddr = kernel_start + decrypted_base;
|
||||
ppd.vaddr_end = kernel_end + decrypted_base;
|
||||
sme_map_range_decrypted_wp(&ppd);
|
||||
|
||||
if (initrd_len) {
|
||||
/* Add encrypted initrd (identity) mappings */
|
||||
ppd.paddr = initrd_start;
|
||||
ppd.vaddr = initrd_start;
|
||||
ppd.vaddr_end = initrd_end;
|
||||
sme_map_range_encrypted(&ppd);
|
||||
/*
|
||||
* Add decrypted, write-protected initrd (non-identity) mappings
|
||||
*/
|
||||
ppd.paddr = initrd_start;
|
||||
ppd.vaddr = initrd_start + decrypted_base;
|
||||
ppd.vaddr_end = initrd_end + decrypted_base;
|
||||
sme_map_range_decrypted_wp(&ppd);
|
||||
}
|
||||
|
||||
/* Add decrypted workarea mappings to both kernel mappings */
|
||||
ppd.paddr = workarea_start;
|
||||
ppd.vaddr = workarea_start;
|
||||
ppd.vaddr_end = workarea_end;
|
||||
sme_map_range_decrypted(&ppd);
|
||||
|
||||
ppd.paddr = workarea_start;
|
||||
ppd.vaddr = workarea_start + decrypted_base;
|
||||
ppd.vaddr_end = workarea_end + decrypted_base;
|
||||
sme_map_range_decrypted(&ppd);
|
||||
|
||||
/* Perform the encryption */
|
||||
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
|
||||
kernel_len, workarea_start, (unsigned long)ppd.pgd);
|
||||
|
||||
if (initrd_len)
|
||||
sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
|
||||
initrd_len, workarea_start,
|
||||
(unsigned long)ppd.pgd);
|
||||
|
||||
/*
|
||||
* At this point we are running encrypted. Remove the mappings for
|
||||
* the decrypted areas - all that is needed for this is to remove
|
||||
* the PGD entry/entries.
|
||||
*/
|
||||
ppd.vaddr = kernel_start + decrypted_base;
|
||||
ppd.vaddr_end = kernel_end + decrypted_base;
|
||||
sme_clear_pgd(&ppd);
|
||||
|
||||
if (initrd_len) {
|
||||
ppd.vaddr = initrd_start + decrypted_base;
|
||||
ppd.vaddr_end = initrd_end + decrypted_base;
|
||||
sme_clear_pgd(&ppd);
|
||||
}
|
||||
|
||||
ppd.vaddr = workarea_start + decrypted_base;
|
||||
ppd.vaddr_end = workarea_end + decrypted_base;
|
||||
sme_clear_pgd(&ppd);
|
||||
|
||||
/* Flush the TLB - no globals so cr3 is enough */
|
||||
native_write_cr3(__native_read_cr3());
|
||||
}
|
||||
|
||||
void __init __nostackprotector sme_enable(struct boot_params *bp)
|
||||
{
|
||||
const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
unsigned long feature_mask;
|
||||
bool active_by_default;
|
||||
unsigned long me_mask;
|
||||
char buffer[16];
|
||||
u64 msr;
|
||||
|
||||
/* Check for the SME/SEV support leaf */
|
||||
eax = 0x80000000;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (eax < 0x8000001f)
|
||||
return;
|
||||
|
||||
#define AMD_SME_BIT BIT(0)
|
||||
#define AMD_SEV_BIT BIT(1)
|
||||
/*
|
||||
* Set the feature mask (SME or SEV) based on whether we are
|
||||
* running under a hypervisor.
|
||||
*/
|
||||
eax = 1;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
|
||||
|
||||
/*
|
||||
* Check for the SME/SEV feature:
|
||||
* CPUID Fn8000_001F[EAX]
|
||||
* - Bit 0 - Secure Memory Encryption support
|
||||
* - Bit 1 - Secure Encrypted Virtualization support
|
||||
* CPUID Fn8000_001F[EBX]
|
||||
* - Bits 5:0 - Pagetable bit position used to indicate encryption
|
||||
*/
|
||||
eax = 0x8000001f;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (!(eax & feature_mask))
|
||||
return;
|
||||
|
||||
me_mask = 1UL << (ebx & 0x3f);
|
||||
|
||||
/* Check if memory encryption is enabled */
|
||||
if (feature_mask == AMD_SME_BIT) {
|
||||
/* For SME, check the SYSCFG MSR */
|
||||
msr = __rdmsr(MSR_K8_SYSCFG);
|
||||
if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
|
||||
return;
|
||||
} else {
|
||||
/* For SEV, check the SEV MSR */
|
||||
msr = __rdmsr(MSR_AMD64_SEV);
|
||||
if (!(msr & MSR_AMD64_SEV_ENABLED))
|
||||
return;
|
||||
|
||||
/* SEV state cannot be controlled by a command line option */
|
||||
sme_me_mask = me_mask;
|
||||
sev_enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fixups have not been applied to phys_base yet and we're running
|
||||
* identity mapped, so we must obtain the address to the SME command
|
||||
* line argument data using rip-relative addressing.
|
||||
*/
|
||||
asm ("lea sme_cmdline_arg(%%rip), %0"
|
||||
: "=r" (cmdline_arg)
|
||||
: "p" (sme_cmdline_arg));
|
||||
asm ("lea sme_cmdline_on(%%rip), %0"
|
||||
: "=r" (cmdline_on)
|
||||
: "p" (sme_cmdline_on));
|
||||
asm ("lea sme_cmdline_off(%%rip), %0"
|
||||
: "=r" (cmdline_off)
|
||||
: "p" (sme_cmdline_off));
|
||||
|
||||
if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
|
||||
active_by_default = true;
|
||||
else
|
||||
active_by_default = false;
|
||||
|
||||
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
|
||||
((u64)bp->ext_cmd_line_ptr << 32));
|
||||
|
||||
cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
|
||||
|
||||
if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
|
||||
sme_me_mask = me_mask;
|
||||
else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
|
||||
sme_me_mask = 0;
|
||||
else
|
||||
sme_me_mask = active_by_default ? me_mask : 0;
|
||||
}
|
||||
|
564
arch/x86/mm/mem_encrypt_identity.c
Normal file
564
arch/x86/mm/mem_encrypt_identity.c
Normal file
@ -0,0 +1,564 @@
|
||||
/*
|
||||
* AMD Memory Encryption Support
|
||||
*
|
||||
* Copyright (C) 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#define DISABLE_BRANCH_PROFILING
|
||||
|
||||
/*
|
||||
* Since we're dealing with identity mappings, physical and virtual
|
||||
* addresses are the same, so override these defines which are ultimately
|
||||
* used by the headers in misc.h.
|
||||
*/
|
||||
#define __pa(x) ((unsigned long)(x))
|
||||
#define __va(x) ((void *)((unsigned long)(x)))
|
||||
|
||||
/*
|
||||
* Special hack: we have to be careful, because no indirections are
|
||||
* allowed here, and paravirt_ops is a kind of one. As it will only run in
|
||||
* baremetal anyway, we just keep it from happening. (This list needs to
|
||||
* be extended when new paravirt and debugging variants are added.)
|
||||
*/
|
||||
#undef CONFIG_PARAVIRT
|
||||
#undef CONFIG_PARAVIRT_SPINLOCKS
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/cmdline.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
#define PGD_FLAGS _KERNPG_TABLE_NOENC
|
||||
#define P4D_FLAGS _KERNPG_TABLE_NOENC
|
||||
#define PUD_FLAGS _KERNPG_TABLE_NOENC
|
||||
#define PMD_FLAGS _KERNPG_TABLE_NOENC
|
||||
|
||||
#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
|
||||
|
||||
#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
|
||||
#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
|
||||
(_PAGE_PAT | _PAGE_PWT))
|
||||
|
||||
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
|
||||
|
||||
#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
|
||||
|
||||
#define PTE_FLAGS_DEC PTE_FLAGS
|
||||
#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
|
||||
(_PAGE_PAT | _PAGE_PWT))
|
||||
|
||||
#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
|
||||
|
||||
struct sme_populate_pgd_data {
|
||||
void *pgtable_area;
|
||||
pgd_t *pgd;
|
||||
|
||||
pmdval_t pmd_flags;
|
||||
pteval_t pte_flags;
|
||||
unsigned long paddr;
|
||||
|
||||
unsigned long vaddr;
|
||||
unsigned long vaddr_end;
|
||||
};
|
||||
|
||||
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
|
||||
static char sme_cmdline_on[] __initdata = "on";
|
||||
static char sme_cmdline_off[] __initdata = "off";
|
||||
|
||||
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
unsigned long pgd_start, pgd_end, pgd_size;
|
||||
pgd_t *pgd_p;
|
||||
|
||||
pgd_start = ppd->vaddr & PGDIR_MASK;
|
||||
pgd_end = ppd->vaddr_end & PGDIR_MASK;
|
||||
|
||||
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
|
||||
|
||||
pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
|
||||
|
||||
memset(pgd_p, 0, pgd_size);
|
||||
}
|
||||
|
||||
static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
pgd = ppd->pgd + pgd_index(ppd->vaddr);
|
||||
if (pgd_none(*pgd)) {
|
||||
p4d = ppd->pgtable_area;
|
||||
memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D);
|
||||
ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D;
|
||||
set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d)));
|
||||
}
|
||||
|
||||
p4d = p4d_offset(pgd, ppd->vaddr);
|
||||
if (p4d_none(*p4d)) {
|
||||
pud = ppd->pgtable_area;
|
||||
memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD);
|
||||
ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD;
|
||||
set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud)));
|
||||
}
|
||||
|
||||
pud = pud_offset(p4d, ppd->vaddr);
|
||||
if (pud_none(*pud)) {
|
||||
pmd = ppd->pgtable_area;
|
||||
memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD);
|
||||
ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD;
|
||||
set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
|
||||
}
|
||||
|
||||
if (pud_large(*pud))
|
||||
return NULL;
|
||||
|
||||
return pud;
|
||||
}
|
||||
|
||||
static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
pud = sme_prepare_pgd(ppd);
|
||||
if (!pud)
|
||||
return;
|
||||
|
||||
pmd = pmd_offset(pud, ppd->vaddr);
|
||||
if (pmd_large(*pmd))
|
||||
return;
|
||||
|
||||
set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
|
||||
}
|
||||
|
||||
static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pud = sme_prepare_pgd(ppd);
|
||||
if (!pud)
|
||||
return;
|
||||
|
||||
pmd = pmd_offset(pud, ppd->vaddr);
|
||||
if (pmd_none(*pmd)) {
|
||||
pte = ppd->pgtable_area;
|
||||
memset(pte, 0, sizeof(pte) * PTRS_PER_PTE);
|
||||
ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE;
|
||||
set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
|
||||
}
|
||||
|
||||
if (pmd_large(*pmd))
|
||||
return;
|
||||
|
||||
pte = pte_offset_map(pmd, ppd->vaddr);
|
||||
if (pte_none(*pte))
|
||||
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
|
||||
}
|
||||
|
||||
static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
while (ppd->vaddr < ppd->vaddr_end) {
|
||||
sme_populate_pgd_large(ppd);
|
||||
|
||||
ppd->vaddr += PMD_PAGE_SIZE;
|
||||
ppd->paddr += PMD_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
while (ppd->vaddr < ppd->vaddr_end) {
|
||||
sme_populate_pgd(ppd);
|
||||
|
||||
ppd->vaddr += PAGE_SIZE;
|
||||
ppd->paddr += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
|
||||
pmdval_t pmd_flags, pteval_t pte_flags)
|
||||
{
|
||||
unsigned long vaddr_end;
|
||||
|
||||
ppd->pmd_flags = pmd_flags;
|
||||
ppd->pte_flags = pte_flags;
|
||||
|
||||
/* Save original end value since we modify the struct value */
|
||||
vaddr_end = ppd->vaddr_end;
|
||||
|
||||
/* If start is not 2MB aligned, create PTE entries */
|
||||
ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
|
||||
__sme_map_range_pte(ppd);
|
||||
|
||||
/* Create PMD entries */
|
||||
ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
|
||||
__sme_map_range_pmd(ppd);
|
||||
|
||||
/* If end is not 2MB aligned, create PTE entries */
|
||||
ppd->vaddr_end = vaddr_end;
|
||||
__sme_map_range_pte(ppd);
|
||||
}
|
||||
|
||||
static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
|
||||
}
|
||||
|
||||
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
|
||||
}
|
||||
|
||||
static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
|
||||
{
|
||||
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
|
||||
}
|
||||
|
||||
static unsigned long __init sme_pgtable_calc(unsigned long len)
|
||||
{
|
||||
unsigned long entries = 0, tables = 0;
|
||||
|
||||
/*
|
||||
* Perform a relatively simplistic calculation of the pagetable
|
||||
* entries that are needed. Those mappings will be covered mostly
|
||||
* by 2MB PMD entries so we can conservatively calculate the required
|
||||
* number of P4D, PUD and PMD structures needed to perform the
|
||||
* mappings. For mappings that are not 2MB aligned, PTE mappings
|
||||
* would be needed for the start and end portion of the address range
|
||||
* that fall outside of the 2MB alignment. This results in, at most,
|
||||
* two extra pages to hold PTE entries for each range that is mapped.
|
||||
* Incrementing the count for each covers the case where the addresses
|
||||
* cross entries.
|
||||
*/
|
||||
|
||||
/* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */
|
||||
if (PTRS_PER_P4D > 1)
|
||||
entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D;
|
||||
entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD;
|
||||
entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD;
|
||||
entries += 2 * sizeof(pte_t) * PTRS_PER_PTE;
|
||||
|
||||
/*
|
||||
* Now calculate the added pagetable structures needed to populate
|
||||
* the new pagetables.
|
||||
*/
|
||||
|
||||
if (PTRS_PER_P4D > 1)
|
||||
tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D;
|
||||
tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD;
|
||||
tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD;
|
||||
|
||||
return entries + tables;
|
||||
}
|
||||
|
||||
void __init sme_encrypt_kernel(struct boot_params *bp)
|
||||
{
|
||||
unsigned long workarea_start, workarea_end, workarea_len;
|
||||
unsigned long execute_start, execute_end, execute_len;
|
||||
unsigned long kernel_start, kernel_end, kernel_len;
|
||||
unsigned long initrd_start, initrd_end, initrd_len;
|
||||
struct sme_populate_pgd_data ppd;
|
||||
unsigned long pgtable_area_len;
|
||||
unsigned long decrypted_base;
|
||||
|
||||
if (!sme_active())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Prepare for encrypting the kernel and initrd by building new
|
||||
* pagetables with the necessary attributes needed to encrypt the
|
||||
* kernel in place.
|
||||
*
|
||||
* One range of virtual addresses will map the memory occupied
|
||||
* by the kernel and initrd as encrypted.
|
||||
*
|
||||
* Another range of virtual addresses will map the memory occupied
|
||||
* by the kernel and initrd as decrypted and write-protected.
|
||||
*
|
||||
* The use of write-protect attribute will prevent any of the
|
||||
* memory from being cached.
|
||||
*/
|
||||
|
||||
/* Physical addresses gives us the identity mapped virtual addresses */
|
||||
kernel_start = __pa_symbol(_text);
|
||||
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
|
||||
kernel_len = kernel_end - kernel_start;
|
||||
|
||||
initrd_start = 0;
|
||||
initrd_end = 0;
|
||||
initrd_len = 0;
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
initrd_len = (unsigned long)bp->hdr.ramdisk_size |
|
||||
((unsigned long)bp->ext_ramdisk_size << 32);
|
||||
if (initrd_len) {
|
||||
initrd_start = (unsigned long)bp->hdr.ramdisk_image |
|
||||
((unsigned long)bp->ext_ramdisk_image << 32);
|
||||
initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
|
||||
initrd_len = initrd_end - initrd_start;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set the encryption workarea to be immediately after the kernel */
|
||||
workarea_start = kernel_end;
|
||||
|
||||
/*
|
||||
* Calculate required number of workarea bytes needed:
|
||||
* executable encryption area size:
|
||||
* stack page (PAGE_SIZE)
|
||||
* encryption routine page (PAGE_SIZE)
|
||||
* intermediate copy buffer (PMD_PAGE_SIZE)
|
||||
* pagetable structures for the encryption of the kernel
|
||||
* pagetable structures for workarea (in case not currently mapped)
|
||||
*/
|
||||
execute_start = workarea_start;
|
||||
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
|
||||
execute_len = execute_end - execute_start;
|
||||
|
||||
/*
|
||||
* One PGD for both encrypted and decrypted mappings and a set of
|
||||
* PUDs and PMDs for each of the encrypted and decrypted mappings.
|
||||
*/
|
||||
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
|
||||
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
|
||||
if (initrd_len)
|
||||
pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
|
||||
|
||||
/* PUDs and PMDs needed in the current pagetables for the workarea */
|
||||
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
|
||||
|
||||
/*
|
||||
* The total workarea includes the executable encryption area and
|
||||
* the pagetable area. The start of the workarea is already 2MB
|
||||
* aligned, align the end of the workarea on a 2MB boundary so that
|
||||
* we don't try to create/allocate PTE entries from the workarea
|
||||
* before it is mapped.
|
||||
*/
|
||||
workarea_len = execute_len + pgtable_area_len;
|
||||
workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Set the address to the start of where newly created pagetable
|
||||
* structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
|
||||
* structures are created when the workarea is added to the current
|
||||
* pagetables and when the new encrypted and decrypted kernel
|
||||
* mappings are populated.
|
||||
*/
|
||||
ppd.pgtable_area = (void *)execute_end;
|
||||
|
||||
/*
|
||||
* Make sure the current pagetable structure has entries for
|
||||
* addressing the workarea.
|
||||
*/
|
||||
ppd.pgd = (pgd_t *)native_read_cr3_pa();
|
||||
ppd.paddr = workarea_start;
|
||||
ppd.vaddr = workarea_start;
|
||||
ppd.vaddr_end = workarea_end;
|
||||
sme_map_range_decrypted(&ppd);
|
||||
|
||||
/* Flush the TLB - no globals so cr3 is enough */
|
||||
native_write_cr3(__native_read_cr3());
|
||||
|
||||
/*
|
||||
* A new pagetable structure is being built to allow for the kernel
|
||||
* and initrd to be encrypted. It starts with an empty PGD that will
|
||||
* then be populated with new PUDs and PMDs as the encrypted and
|
||||
* decrypted kernel mappings are created.
|
||||
*/
|
||||
ppd.pgd = ppd.pgtable_area;
|
||||
memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
|
||||
ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
|
||||
|
||||
/*
|
||||
* A different PGD index/entry must be used to get different
|
||||
* pagetable entries for the decrypted mapping. Choose the next
|
||||
* PGD index and convert it to a virtual address to be used as
|
||||
* the base of the mapping.
|
||||
*/
|
||||
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
|
||||
if (initrd_len) {
|
||||
unsigned long check_base;
|
||||
|
||||
check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
|
||||
decrypted_base = max(decrypted_base, check_base);
|
||||
}
|
||||
decrypted_base <<= PGDIR_SHIFT;
|
||||
|
||||
/* Add encrypted kernel (identity) mappings */
|
||||
ppd.paddr = kernel_start;
|
||||
ppd.vaddr = kernel_start;
|
||||
ppd.vaddr_end = kernel_end;
|
||||
sme_map_range_encrypted(&ppd);
|
||||
|
||||
/* Add decrypted, write-protected kernel (non-identity) mappings */
|
||||
ppd.paddr = kernel_start;
|
||||
ppd.vaddr = kernel_start + decrypted_base;
|
||||
ppd.vaddr_end = kernel_end + decrypted_base;
|
||||
sme_map_range_decrypted_wp(&ppd);
|
||||
|
||||
if (initrd_len) {
|
||||
/* Add encrypted initrd (identity) mappings */
|
||||
ppd.paddr = initrd_start;
|
||||
ppd.vaddr = initrd_start;
|
||||
ppd.vaddr_end = initrd_end;
|
||||
sme_map_range_encrypted(&ppd);
|
||||
/*
|
||||
* Add decrypted, write-protected initrd (non-identity) mappings
|
||||
*/
|
||||
ppd.paddr = initrd_start;
|
||||
ppd.vaddr = initrd_start + decrypted_base;
|
||||
ppd.vaddr_end = initrd_end + decrypted_base;
|
||||
sme_map_range_decrypted_wp(&ppd);
|
||||
}
|
||||
|
||||
/* Add decrypted workarea mappings to both kernel mappings */
|
||||
ppd.paddr = workarea_start;
|
||||
ppd.vaddr = workarea_start;
|
||||
ppd.vaddr_end = workarea_end;
|
||||
sme_map_range_decrypted(&ppd);
|
||||
|
||||
ppd.paddr = workarea_start;
|
||||
ppd.vaddr = workarea_start + decrypted_base;
|
||||
ppd.vaddr_end = workarea_end + decrypted_base;
|
||||
sme_map_range_decrypted(&ppd);
|
||||
|
||||
/* Perform the encryption */
|
||||
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
|
||||
kernel_len, workarea_start, (unsigned long)ppd.pgd);
|
||||
|
||||
if (initrd_len)
|
||||
sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
|
||||
initrd_len, workarea_start,
|
||||
(unsigned long)ppd.pgd);
|
||||
|
||||
/*
|
||||
* At this point we are running encrypted. Remove the mappings for
|
||||
* the decrypted areas - all that is needed for this is to remove
|
||||
* the PGD entry/entries.
|
||||
*/
|
||||
ppd.vaddr = kernel_start + decrypted_base;
|
||||
ppd.vaddr_end = kernel_end + decrypted_base;
|
||||
sme_clear_pgd(&ppd);
|
||||
|
||||
if (initrd_len) {
|
||||
ppd.vaddr = initrd_start + decrypted_base;
|
||||
ppd.vaddr_end = initrd_end + decrypted_base;
|
||||
sme_clear_pgd(&ppd);
|
||||
}
|
||||
|
||||
ppd.vaddr = workarea_start + decrypted_base;
|
||||
ppd.vaddr_end = workarea_end + decrypted_base;
|
||||
sme_clear_pgd(&ppd);
|
||||
|
||||
/* Flush the TLB - no globals so cr3 is enough */
|
||||
native_write_cr3(__native_read_cr3());
|
||||
}
|
||||
|
||||
void __init sme_enable(struct boot_params *bp)
|
||||
{
|
||||
const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
unsigned long feature_mask;
|
||||
bool active_by_default;
|
||||
unsigned long me_mask;
|
||||
char buffer[16];
|
||||
u64 msr;
|
||||
|
||||
/* Check for the SME/SEV support leaf */
|
||||
eax = 0x80000000;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (eax < 0x8000001f)
|
||||
return;
|
||||
|
||||
#define AMD_SME_BIT BIT(0)
|
||||
#define AMD_SEV_BIT BIT(1)
|
||||
/*
|
||||
* Set the feature mask (SME or SEV) based on whether we are
|
||||
* running under a hypervisor.
|
||||
*/
|
||||
eax = 1;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
|
||||
|
||||
/*
|
||||
* Check for the SME/SEV feature:
|
||||
* CPUID Fn8000_001F[EAX]
|
||||
* - Bit 0 - Secure Memory Encryption support
|
||||
* - Bit 1 - Secure Encrypted Virtualization support
|
||||
* CPUID Fn8000_001F[EBX]
|
||||
* - Bits 5:0 - Pagetable bit position used to indicate encryption
|
||||
*/
|
||||
eax = 0x8000001f;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (!(eax & feature_mask))
|
||||
return;
|
||||
|
||||
me_mask = 1UL << (ebx & 0x3f);
|
||||
|
||||
/* Check if memory encryption is enabled */
|
||||
if (feature_mask == AMD_SME_BIT) {
|
||||
/* For SME, check the SYSCFG MSR */
|
||||
msr = __rdmsr(MSR_K8_SYSCFG);
|
||||
if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
|
||||
return;
|
||||
} else {
|
||||
/* For SEV, check the SEV MSR */
|
||||
msr = __rdmsr(MSR_AMD64_SEV);
|
||||
if (!(msr & MSR_AMD64_SEV_ENABLED))
|
||||
return;
|
||||
|
||||
/* SEV state cannot be controlled by a command line option */
|
||||
sme_me_mask = me_mask;
|
||||
sev_enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fixups have not been applied to phys_base yet and we're running
|
||||
* identity mapped, so we must obtain the address to the SME command
|
||||
* line argument data using rip-relative addressing.
|
||||
*/
|
||||
asm ("lea sme_cmdline_arg(%%rip), %0"
|
||||
: "=r" (cmdline_arg)
|
||||
: "p" (sme_cmdline_arg));
|
||||
asm ("lea sme_cmdline_on(%%rip), %0"
|
||||
: "=r" (cmdline_on)
|
||||
: "p" (sme_cmdline_on));
|
||||
asm ("lea sme_cmdline_off(%%rip), %0"
|
||||
: "=r" (cmdline_off)
|
||||
: "p" (sme_cmdline_off));
|
||||
|
||||
if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
|
||||
active_by_default = true;
|
||||
else
|
||||
active_by_default = false;
|
||||
|
||||
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
|
||||
((u64)bp->ext_cmd_line_ptr << 32));
|
||||
|
||||
cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
|
||||
|
||||
if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
|
||||
sme_me_mask = me_mask;
|
||||
else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
|
||||
sme_me_mask = 0;
|
||||
else
|
||||
sme_me_mask = active_by_default ? me_mask : 0;
|
||||
}
|
@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end)
|
||||
}
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
unsigned long nr_pages = end_pfn - start_pfn;
|
||||
|
||||
if (!nr_pages)
|
||||
return 0;
|
||||
|
||||
return (nr_pages + 1) * sizeof(struct page);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern unsigned long highend_pfn, highstart_pfn;
|
||||
|
@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
|
||||
unsigned long sp = current_stack_pointer;
|
||||
pgd_t *pgd = pgd_offset(mm, sp);
|
||||
|
||||
if (CONFIG_PGTABLE_LEVELS > 4) {
|
||||
if (pgtable_l5_enabled) {
|
||||
if (unlikely(pgd_none(*pgd))) {
|
||||
pgd_t *pgd_ref = pgd_offset_k(sp);
|
||||
|
||||
@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
{
|
||||
int cpu;
|
||||
|
||||
struct flush_tlb_info info = {
|
||||
struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
|
||||
.mm = mm,
|
||||
};
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/mc146818rtc.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/reboot.h>
|
||||
@ -190,7 +191,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
|
||||
early_code_mapping_set_exec(0);
|
||||
}
|
||||
|
||||
static pgd_t *efi_pgd;
|
||||
pgd_t *efi_pgd;
|
||||
EXPORT_SYMBOL_GPL(efi_pgd);
|
||||
|
||||
/*
|
||||
* We need our own copy of the higher levels of the page tables
|
||||
@ -225,7 +227,7 @@ int __init efi_alloc_page_tables(void)
|
||||
|
||||
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
|
||||
if (!pud) {
|
||||
if (CONFIG_PGTABLE_LEVELS > 4)
|
||||
if (pgtable_l5_enabled)
|
||||
free_page((unsigned long) pgd_page_vaddr(*pgd));
|
||||
free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
|
||||
return -ENOMEM;
|
||||
@ -255,8 +257,8 @@ void efi_sync_low_kernel_mappings(void)
|
||||
* only span a single PGD entry and that the entry also maps
|
||||
* other important kernel regions.
|
||||
*/
|
||||
BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
|
||||
BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
|
||||
MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
|
||||
MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
|
||||
(EFI_VA_END & PGDIR_MASK));
|
||||
|
||||
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
|
||||
|
@ -199,6 +199,12 @@ void __init x86_intel_mid_early_setup(void)
|
||||
|
||||
legacy_pic = &null_legacy_pic;
|
||||
|
||||
/*
|
||||
* Do nothing for now as everything needed done in
|
||||
* x86_intel_mid_early_setup() below.
|
||||
*/
|
||||
x86_init.acpi.reduced_hw_early_init = x86_init_noop;
|
||||
|
||||
pm_power_off = intel_mid_power_off;
|
||||
machine_ops.emergency_restart = intel_mid_reboot;
|
||||
|
||||
|
@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
p4d_t *p4d;
|
||||
p4d_t *p4d = NULL;
|
||||
|
||||
/*
|
||||
* The new mapping only has to cover the page containing the image
|
||||
@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
|
||||
* tables used by the image kernel.
|
||||
*/
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
if (pgtable_l5_enabled) {
|
||||
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!p4d)
|
||||
return -ENOMEM;
|
||||
@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
|
||||
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
|
||||
set_pud(pud + pud_index(restore_jump_address),
|
||||
__pud(__pa(pmd) | _KERNPG_TABLE));
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
if (p4d) {
|
||||
set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
|
||||
set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
|
||||
} else {
|
||||
|
@ -18,9 +18,6 @@ config XEN_PV
|
||||
bool "Xen PV guest support"
|
||||
default y
|
||||
depends on XEN
|
||||
# XEN_PV is not ready to work with 5-level paging.
|
||||
# Changes to hypervisor are also required.
|
||||
depends on !X86_5LEVEL
|
||||
select XEN_HAVE_PVMMU
|
||||
select XEN_HAVE_VPMU
|
||||
help
|
||||
@ -79,6 +76,4 @@ config XEN_DEBUG_FS
|
||||
config XEN_PVH
|
||||
bool "Support for running as a PVH guest"
|
||||
depends on XEN && XEN_PVHVM && ACPI
|
||||
# Pre-built page tables are not ready to handle 5-level paging.
|
||||
depends on !X86_5LEVEL
|
||||
def_bool n
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/e820/api.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
#include <asm/xen/interface.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
@ -16,15 +17,20 @@
|
||||
/*
|
||||
* PVH variables.
|
||||
*
|
||||
* xen_pvh and pvh_bootparams need to live in data segment since they
|
||||
* are used after startup_{32|64}, which clear .bss, are invoked.
|
||||
* xen_pvh pvh_bootparams and pvh_start_info need to live in data segment
|
||||
* since they are used after startup_{32|64}, which clear .bss, are invoked.
|
||||
*/
|
||||
bool xen_pvh __attribute__((section(".data"))) = 0;
|
||||
struct boot_params pvh_bootparams __attribute__((section(".data")));
|
||||
struct hvm_start_info pvh_start_info __attribute__((section(".data")));
|
||||
|
||||
struct hvm_start_info pvh_start_info;
|
||||
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
|
||||
|
||||
static u64 pvh_get_root_pointer(void)
|
||||
{
|
||||
return pvh_start_info.rsdp_paddr;
|
||||
}
|
||||
|
||||
static void __init init_pvh_bootparams(void)
|
||||
{
|
||||
struct xen_memory_map memmap;
|
||||
@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void)
|
||||
*/
|
||||
pvh_bootparams.hdr.version = 0x212;
|
||||
pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
|
||||
|
||||
x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -538,6 +538,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
__visible p4dval_t xen_p4d_val(p4d_t p4d)
|
||||
{
|
||||
return pte_mfn_to_pfn(p4d.p4d);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
|
||||
|
||||
__visible p4d_t xen_make_p4d(p4dval_t p4d)
|
||||
{
|
||||
p4d = pte_pfn_to_mfn(p4d);
|
||||
|
||||
return native_make_p4d(p4d);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
|
||||
@ -2411,6 +2427,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
|
||||
.alloc_pud = xen_alloc_pmd_init,
|
||||
.release_pud = xen_release_pmd_init,
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
|
||||
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
|
||||
#endif
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
.activate_mm = xen_activate_mm,
|
||||
|
@ -189,12 +189,15 @@ early_param("acpi_rsdp", setup_acpi_rsdp);
|
||||
|
||||
acpi_physical_address __init acpi_os_get_root_pointer(void)
|
||||
{
|
||||
acpi_physical_address pa = 0;
|
||||
acpi_physical_address pa;
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
if (acpi_rsdp)
|
||||
return acpi_rsdp;
|
||||
#endif
|
||||
pa = acpi_arch_get_root_pointer();
|
||||
if (pa)
|
||||
return pa;
|
||||
|
||||
if (efi_enabled(EFI_CONFIG_TABLES)) {
|
||||
if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define P4D_SHIFT PGDIR_SHIFT
|
||||
#define P4D_SIZE PGDIR_SIZE
|
||||
#define P4D_MASK PGDIR_MASK
|
||||
#define MAX_PTRS_PER_P4D 1
|
||||
#define PTRS_PER_P4D 1
|
||||
|
||||
#define p4d_t pgd_t
|
||||
|
@ -8,10 +8,11 @@
|
||||
|
||||
typedef struct { pgd_t pgd; } p4d_t;
|
||||
|
||||
#define P4D_SHIFT PGDIR_SHIFT
|
||||
#define PTRS_PER_P4D 1
|
||||
#define P4D_SIZE (1UL << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE-1))
|
||||
#define P4D_SHIFT PGDIR_SHIFT
|
||||
#define MAX_PTRS_PER_P4D 1
|
||||
#define PTRS_PER_P4D 1
|
||||
#define P4D_SIZE (1UL << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE-1))
|
||||
|
||||
/*
|
||||
* The "pgd_xxx()" functions here are trivial for a folded two-level
|
||||
|
@ -623,6 +623,13 @@ bool acpi_gtdt_c3stop(int type);
|
||||
int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
|
||||
#endif
|
||||
|
||||
#ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER
|
||||
static inline u64 acpi_arch_get_root_pointer(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* !CONFIG_ACPI */
|
||||
|
||||
#define acpi_disabled 1
|
||||
|
@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
|
||||
extern pte_t kasan_zero_pte[PTRS_PER_PTE];
|
||||
extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
|
||||
extern pud_t kasan_zero_pud[PTRS_PER_PUD];
|
||||
extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
|
||||
extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
|
||||
|
||||
void kasan_populate_zero_shadow(const void *shadow_start,
|
||||
const void *shadow_end);
|
||||
|
@ -816,10 +816,6 @@ int local_memory_node(int node_id);
|
||||
static inline int local_memory_node(int node_id) { return node_id; };
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
|
||||
unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
|
||||
*/
|
||||
@ -1289,7 +1285,6 @@ struct mminit_pfnnid_cache {
|
||||
#endif
|
||||
|
||||
void memory_present(int nid, unsigned long start, unsigned long end);
|
||||
unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
|
||||
|
||||
/*
|
||||
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
|
||||
|
@ -31,7 +31,7 @@
|
||||
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
|
||||
p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
|
||||
#endif
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
|
||||
|
22
mm/sparse.c
22
mm/sparse.c
@ -235,28 +235,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Only used by the i386 NUMA architecures, but relatively
|
||||
* generic code.
|
||||
*/
|
||||
unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
unsigned long pfn;
|
||||
unsigned long nr_pages = 0;
|
||||
|
||||
mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
|
||||
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
|
||||
if (nid != early_pfn_to_nid(pfn))
|
||||
continue;
|
||||
|
||||
if (pfn_present(pfn))
|
||||
nr_pages += PAGES_PER_SECTION;
|
||||
}
|
||||
|
||||
return nr_pages * sizeof(struct page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Subtle, we encode the real pfn into the mem_map such that
|
||||
* the identity pfn - section_mem_map will return the actual
|
||||
|
@ -84,18 +84,19 @@
|
||||
* This is made more complicated by various memory models and PAE.
|
||||
*/
|
||||
|
||||
#ifndef MAX_PHYSMEM_BITS
|
||||
#ifdef CONFIG_HIGHMEM64G
|
||||
#define MAX_PHYSMEM_BITS 36
|
||||
#else /* !CONFIG_HIGHMEM64G */
|
||||
#ifndef MAX_POSSIBLE_PHYSMEM_BITS
|
||||
#ifdef MAX_PHYSMEM_BITS
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
|
||||
#else
|
||||
/*
|
||||
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
|
||||
* be PAGE_SHIFT
|
||||
*/
|
||||
#define MAX_PHYSMEM_BITS BITS_PER_LONG
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
|
||||
#endif
|
||||
#endif
|
||||
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
|
||||
|
||||
#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* Memory for allocating for handle keeps object position by
|
||||
|
Loading…
Reference in New Issue
Block a user