linux_dsm_epyc7002/arch/s390/include/asm/page.h
Gerald Schaefer 5f490a520b s390/mm: fix dynamic pagetable upgrade for hugetlbfs
Commit ee71d16d22 ("s390/mm: make TASK_SIZE independent from the number
of page table levels") changed the logic of TASK_SIZE and also removed the
arch_mmap_check() implementation for s390. This combination has a subtle
effect on how get_unmapped_area() for hugetlbfs pages works. It is now
possible that a user process establishes a hugetlbfs mapping at an address
above 4 TB, without triggering a dynamic pagetable upgrade from 3 to 4
levels.

This is because hugetlbfs mappings will not use mm->get_unmapped_area, but
rather file->f_op->get_unmapped_area, which currently is the generic
implementation of hugetlb_get_unmapped_area() that does not know about s390
dynamic pagetable upgrades, but with the new definition of TASK_SIZE, it
will now allow mappings above 4 TB.

Subsequent access to such a mapped address above 4 TB will result in a page
fault loop, because the CPU cannot translate such a large address with 3
pagetable levels. The fault handler will try to map in a hugepage at the
address, but due to the folded pagetable logic it will end up with creating
entries in the 3 level pagetable, possibly overwriting existing mappings,
and then it all repeats when the access is retried.

Apart from the page fault loop, this can have various nasty effects, e.g.
kernel panic from one of the BUG_ON() checks in memory management code,
or even data loss if an existing mapping gets overwritten.

Fix this by implementing HAVE_ARCH_HUGETLB_UNMAPPED_AREA support for s390,
providing an s390 version for hugetlb_get_unmapped_area() with pagetable
upgrade support similar to arch_get_unmapped_area(), which will then be
used instead of the generic version.

Fixes: ee71d16d22 ("s390/mm: make TASK_SIZE independent from the number of page table levels")
Cc: <stable@vger.kernel.org> # 4.12+
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2020-01-30 13:07:54 +01:00

186 lines
5.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* S390 version
* Copyright IBM Corp. 1999, 2000
* Author(s): Hartmut Penner (hp@de.ibm.com)
*/
#ifndef _S390_PAGE_H
#define _S390_PAGE_H
#include <linux/const.h>
#include <asm/types.h>
#define _PAGE_SHIFT 12
#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
#define _PAGE_MASK (~(_PAGE_SIZE - 1))
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT _PAGE_SHIFT
#define PAGE_SIZE _PAGE_SIZE
#define PAGE_MASK _PAGE_MASK
#define PAGE_DEFAULT_ACC 0
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
#define HPAGE_SHIFT 20
#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HUGE_MAX_HSTATE 2
#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define ARCH_HAS_HUGE_PTE_TYPE
#define ARCH_HAS_PREPARE_HUGEPAGE
#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#include <asm/setup.h>
#ifndef __ASSEMBLY__
void __storage_key_init_range(unsigned long start, unsigned long end);
static inline void storage_key_init_range(unsigned long start, unsigned long end)
{
if (PAGE_DEFAULT_KEY)
__storage_key_init_range(start, end);
}
#define clear_page(page) memset((page), 0, PAGE_SIZE)
/*
* copy_page uses the mvcl instruction with 0xb0 padding byte in order to
* bypass caches when copying a page. Especially when copying huge pages
* this keeps L1 and L2 data caches alive.
*/
static inline void copy_page(void *to, void *from)
{
register void *reg2 asm ("2") = to;
register unsigned long reg3 asm ("3") = 0x1000;
register void *reg4 asm ("4") = from;
register unsigned long reg5 asm ("5") = 0xb0001000;
asm volatile(
" mvcl 2,4"
: "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5)
: : "memory", "cc");
}
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
*/
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct { unsigned long pgste; } pgste_t;
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef pte_t *pgtable_t;
#define pgprot_val(x) ((x).pgprot)
#define pgste_val(x) ((x).pgste)
#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).pmd)
#define pud_val(x) ((x).pud)
#define p4d_val(x) ((x).p4d)
#define pgd_val(x) ((x).pgd)
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
#define __p4d(x) ((p4d_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
static inline void page_set_storage_key(unsigned long addr,
unsigned char skey, int mapped)
{
if (!mapped)
asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
: : "d" (skey), "a" (addr));
else
asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
}
static inline unsigned char page_get_storage_key(unsigned long addr)
{
unsigned char skey;
asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
return skey;
}
static inline int page_reset_referenced(unsigned long addr)
{
int cc;
asm volatile(
" rrbe 0,%1\n"
" ipm %0\n"
" srl %0,28\n"
: "=d" (cc) : "a" (addr) : "cc");
return cc;
}
/* Bits int the storage key */
#define _PAGE_CHANGED 0x02 /* HW changed bit */
#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
void arch_set_page_dat(struct page *page, int order);
void arch_set_page_nodat(struct page *page, int order);
int arch_test_page_nodat(struct page *page);
void arch_set_page_states(int make_stable);
static inline int devmem_is_allowed(unsigned long pfn)
{
return 0;
}
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
#endif /* !__ASSEMBLY__ */
#define __PAGE_OFFSET 0x0UL
#define PAGE_OFFSET 0x0UL
#define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)(unsigned long)(x))
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
#define pfn_to_kaddr(pfn) pfn_to_virt(pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
#endif /* _S390_PAGE_H */