mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-13 01:26:32 +07:00
e35735b9a5
Unroll clear_page 8 times. A simple microbenchmark which allocates and frees a zeroed page: for (i = 0; i < iterations; i++) { unsigned long p = __get_free_page(GFP_KERNEL | __GFP_ZERO); free_page(p); } improves 20% on POWER8. This assumes cacheline sizes won't grow beyond 512 bytes or page sizes wont drop below 1kB, which is unlikely, but we could add a runtime check during early init if it makes people nervous. Michael found that some versions of gcc produce quite bad code (all multiplies), so we give gcc a hand by using shifts and adds. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
188 lines
5.0 KiB
C
188 lines
5.0 KiB
C
#ifndef _ASM_POWERPC_PAGE_64_H
|
|
#define _ASM_POWERPC_PAGE_64_H
|
|
|
|
/*
|
|
* Copyright (C) 2001 PPC64 Team, IBM Corp
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
/*
|
|
* We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux
|
|
* specific, every notion of page number shared with the firmware, TCEs,
|
|
* iommu, etc... still uses a page size of 4K.
|
|
*/
|
|
#define HW_PAGE_SHIFT 12
|
|
#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT)
|
|
#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1))
|
|
|
|
/*
|
|
* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
|
|
* HW_PAGE_SHIFT, that is 4K pages.
|
|
*/
|
|
#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT)
|
|
|
|
/* Segment size; normal 256M segments */
|
|
#define SID_SHIFT 28
|
|
#define SID_MASK ASM_CONST(0xfffffffff)
|
|
#define ESID_MASK 0xfffffffff0000000UL
|
|
#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
|
|
|
|
/* 1T segments */
|
|
#define SID_SHIFT_1T 40
|
|
#define SID_MASK_1T 0xffffffUL
|
|
#define ESID_MASK_1T 0xffffff0000000000UL
|
|
#define GET_ESID_1T(x) (((x) >> SID_SHIFT_1T) & SID_MASK_1T)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <asm/cache.h>
|
|
|
|
typedef unsigned long pte_basic_t;
|
|
|
|
static inline void clear_page(void *addr)
|
|
{
|
|
unsigned long iterations;
|
|
unsigned long onex, twox, fourx, eightx;
|
|
|
|
iterations = ppc64_caches.dlines_per_page / 8;
|
|
|
|
/*
|
|
* Some verisions of gcc use multiply instructions to
|
|
* calculate the offsets so lets give it a hand to
|
|
* do better.
|
|
*/
|
|
onex = ppc64_caches.dline_size;
|
|
twox = onex << 1;
|
|
fourx = onex << 2;
|
|
eightx = onex << 3;
|
|
|
|
asm volatile(
|
|
"mtctr %1 # clear_page\n\
|
|
.balign 16\n\
|
|
1: dcbz 0,%0\n\
|
|
dcbz %3,%0\n\
|
|
dcbz %4,%0\n\
|
|
dcbz %5,%0\n\
|
|
dcbz %6,%0\n\
|
|
dcbz %7,%0\n\
|
|
dcbz %8,%0\n\
|
|
dcbz %9,%0\n\
|
|
add %0,%0,%10\n\
|
|
bdnz+ 1b"
|
|
: "=&r" (addr)
|
|
: "r" (iterations), "0" (addr), "b" (onex), "b" (twox),
|
|
"b" (twox+onex), "b" (fourx), "b" (fourx+onex),
|
|
"b" (twox+fourx), "b" (eightx-onex), "r" (eightx)
|
|
: "ctr", "memory");
|
|
}
|
|
|
|
extern void copy_page(void *to, void *from);
|
|
|
|
/* Log 2 of page table size */
|
|
extern u64 ppc64_pft_size;
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
|
|
|
#define SLICE_LOW_SHIFT 28
|
|
#define SLICE_HIGH_SHIFT 40
|
|
|
|
#define SLICE_LOW_TOP (0x100000000ul)
|
|
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
|
|
#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
|
|
|
|
#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
|
|
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
|
|
|
|
/*
|
|
* 1 bit per slice and we have one slice per 1TB
|
|
* Right now we support only 64TB.
|
|
* IF we change this we will have to change the type
|
|
* of high_slices
|
|
*/
|
|
#define SLICE_MASK_SIZE 8
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
struct slice_mask {
|
|
u16 low_slices;
|
|
u64 high_slices;
|
|
};
|
|
|
|
struct mm_struct;
|
|
|
|
extern unsigned long slice_get_unmapped_area(unsigned long addr,
|
|
unsigned long len,
|
|
unsigned long flags,
|
|
unsigned int psize,
|
|
int topdown);
|
|
|
|
extern unsigned int get_slice_psize(struct mm_struct *mm,
|
|
unsigned long addr);
|
|
|
|
extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
|
|
extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
|
|
unsigned long len, unsigned int psize);
|
|
|
|
#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT)
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
#else
|
|
#define slice_init()
|
|
#ifdef CONFIG_PPC_STD_MMU_64
|
|
#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
|
|
#define slice_set_user_psize(mm, psize) \
|
|
do { \
|
|
(mm)->context.user_psize = (psize); \
|
|
(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
|
|
} while (0)
|
|
#else /* CONFIG_PPC_STD_MMU_64 */
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
#define get_slice_psize(mm, addr) MMU_PAGE_64K
|
|
#else /* CONFIG_PPC_64K_PAGES */
|
|
#define get_slice_psize(mm, addr) MMU_PAGE_4K
|
|
#endif /* !CONFIG_PPC_64K_PAGES */
|
|
#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
|
|
#endif /* !CONFIG_PPC_STD_MMU_64 */
|
|
|
|
#define slice_set_range_psize(mm, start, len, psize) \
|
|
slice_set_user_psize((mm), (psize))
|
|
#define slice_mm_new_context(mm) 1
|
|
#endif /* CONFIG_PPC_MM_SLICES */
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
|
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
|
|
#endif
|
|
|
|
#endif /* !CONFIG_HUGETLB_PAGE */
|
|
|
|
#define VM_DATA_DEFAULT_FLAGS \
|
|
(is_32bit_task() ? \
|
|
VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
|
|
|
|
/*
|
|
* This is the default if a program doesn't have a PT_GNU_STACK
|
|
* program header entry. The PPC64 ELF ABI has a non executable stack
|
|
* stack by default, so in the absence of a PT_GNU_STACK program header
|
|
* we turn execute permission off.
|
|
*/
|
|
#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
|
|
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
|
|
|
|
#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
|
|
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
|
|
|
|
#define VM_STACK_DEFAULT_FLAGS \
|
|
(is_32bit_task() ? \
|
|
VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
|
|
|
|
#include <asm-generic/getorder.h>
|
|
|
|
#endif /* _ASM_POWERPC_PAGE_64_H */
|