linux_dsm_epyc7002/arch/riscv/kernel/head.S
Anup Patel 671f9a3e2e RISC-V: Setup initial page tables in two stages
Currently, the setup_vm() does initial page table setup in one-shot
very early before enabling MMU. Due to this, the setup_vm() has to map
all possible kernel virtual addresses since it does not know size and
location of RAM. This means we have kernel mappings for non-existent
RAM and any buggy driver (or kernel) code doing out-of-bound access
to RAM will not fault and cause underterministic behaviour.

Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
516 KB) of memory for initial page tables which is never freed. The
memory required for initial page tables will further increase if
we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)

This patch implements two-staged initial page table setup, as follows:
1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
a early page table (i.e. early_pg_dir). The early_pg_dir will be used
only by boot HART so it can be freed as-part of init memory free-up.
2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
banks in the final page table (i.e. swapper_pg_dir). The boot HART
will start using swapper_pg_dir at the end of setup_vm_final(). All
non-boot HARTs directly use the swapper_pg_dir created by boot HART.

We have following advantages with this new approach:
1. Kernel mappings for non-existent RAM don't exists anymore.
2. Memory consumed by initial page tables is now indpendent of the
chosen PAGE_OFFSET.
3. Memory consumed by initial page tables on RV64 system is 2 pages
(i.e. 8 KB) which has significantly reduced and these pages will be
freed as-part of the init memory free-up.

The patch also provides a foundation for implementing strict kernel
mappings where we protect kernel text and rodata using PTE permissions.

Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Anup Patel <anup.patel@wdc.com>
[paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
2019-07-09 09:08:04 -07:00

167 lines
3.5 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 Regents of the University of California
*/
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/asm.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/csr.h>
__INIT
ENTRY(_start)
/* Mask all interrupts */
csrw CSR_SIE, zero
csrw CSR_SIP, zero
/* Load the global pointer */
.option push
.option norelax
la gp, __global_pointer$
.option pop
/*
* Disable FPU to detect illegal usage of
* floating point in kernel space
*/
li t0, SR_FS
csrc sstatus, t0
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
li a2, 1
amoadd.w a3, a2, (a3)
bnez a3, .Lsecondary_start
/* Clear BSS for flat non-ELF images */
la a3, __bss_start
la a4, __bss_stop
ble a4, a3, clear_bss_done
clear_bss:
REG_S zero, (a3)
add a3, a3, RISCV_SZPTR
blt a3, a4, clear_bss
clear_bss_done:
/* Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
la a2, boot_cpu_hartid
REG_S a0, (a2)
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
mv a0, s1
call setup_vm
la a0, early_pg_dir
call relocate
/* Restore C environment */
la tp, init_task
sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union + THREAD_SIZE
/* Start the kernel */
call parse_dtb
tail start_kernel
relocate:
/* Relocate return address */
li a1, PAGE_OFFSET
la a2, _start
sub a1, a1, a2
add ra, ra, a1
/* Point stvec to virtual address of intruction after satp write */
la a2, 1f
add a2, a2, a1
csrw CSR_STVEC, a2
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
li a1, SATP_MODE
or a2, a2, a1
/*
* Load trampoline page directory, which will cause us to trap to
* stvec if VA != PA, or simply fall through if VA == PA. We need a
* full fence here because setup_vm() just wrote these PTEs and we need
* to ensure the new translations are in use.
*/
la a0, trampoline_pg_dir
srl a0, a0, PAGE_SHIFT
or a0, a0, a1
sfence.vma
csrw CSR_SATP, a0
.align 2
1:
/* Set trap vector to spin forever to help debug */
la a0, .Lsecondary_park
csrw CSR_STVEC, a0
/* Reload the global pointer */
.option push
.option norelax
la gp, __global_pointer$
.option pop
/*
* Switch to kernel page tables. A full fence is necessary in order to
* avoid using the trampoline translations, which are only correct for
* the first superpage. Fetching the fence is guarnteed to work
* because that first superpage is translated the same way.
*/
csrw CSR_SATP, a2
sfence.vma
ret
.Lsecondary_start:
#ifdef CONFIG_SMP
li a1, CONFIG_NR_CPUS
bgeu a0, a1, .Lsecondary_park
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_STVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
la a2, __cpu_up_task_pointer
add a1, a3, a1
add a2, a3, a2
/*
* This hart didn't win the lottery, so we wait for the winning hart to
* get far enough along the boot process that it should continue.
*/
.Lwait_for_cpu_up:
/* FIXME: We should WFI to save some energy here. */
REG_L sp, (a1)
REG_L tp, (a2)
beqz sp, .Lwait_for_cpu_up
beqz tp, .Lwait_for_cpu_up
fence
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
call relocate
tail smp_callin
#endif
.align 2
.Lsecondary_park:
/* We lack SMP support or have too many harts, so park this hart */
wfi
j .Lsecondary_park
END(_start)
__PAGE_ALIGNED_BSS
/* Empty zero page */
.balign PAGE_SIZE