linux_dsm_epyc7002/arch/sparc/mm/hugetlbpage.c
Nitin Gupta 7bc3777ca1 sparc64: Trim page tables for 8M hugepages
For PMD aligned (8M) hugepages, we currently allocate
all four page table levels which is wasteful. We now
allocate till PMD level only which saves memory usage
from page tables.

Also, when freeing page table for 8M hugepage backed region,
make sure we don't try to access non-existent PTE level.

Orabug: 22630259

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-07-29 10:49:16 -07:00

303 lines
6.7 KiB
C

/*
* SPARC64 Huge TLB page support.
*
* Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
/* Slightly simplified from the non-hugepage variant because by
* definition we don't have to worry about any page coloring stuff
*/
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
unsigned long addr,
unsigned long len,
unsigned long pgoff,
unsigned long flags)
{
unsigned long task_size = TASK_SIZE;
struct vm_unmapped_area_info info;
if (test_thread_flag(TIF_32BIT))
task_size = STACK_TOP32;
info.flags = 0;
info.length = len;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = min(task_size, VA_EXCLUDE_START);
info.align_mask = PAGE_MASK & ~HPAGE_MASK;
info.align_offset = 0;
addr = vm_unmapped_area(&info);
if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
VM_BUG_ON(addr != -ENOMEM);
info.low_limit = VA_EXCLUDE_END;
info.high_limit = task_size;
addr = vm_unmapped_area(&info);
}
return addr;
}
static unsigned long
hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len,
const unsigned long pgoff,
const unsigned long flags)
{
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
/* This should only ever run for 32-bit processes. */
BUG_ON(!test_thread_flag(TIF_32BIT));
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
info.align_mask = PAGE_MASK & ~HPAGE_MASK;
info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
* can happen with large stack limits and large mmap()
* allocations.
*/
if (addr & ~PAGE_MASK) {
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = STACK_TOP32;
addr = vm_unmapped_area(&info);
}
return addr;
}
unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long task_size = TASK_SIZE;
if (test_thread_flag(TIF_32BIT))
task_size = STACK_TOP32;
if (len & ~HPAGE_MASK)
return -EINVAL;
if (len > task_size)
return -ENOMEM;
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
}
if (addr) {
addr = ALIGN(addr, HPAGE_SIZE);
vma = find_vma(mm, addr);
if (task_size - len >= addr &&
(!vma || addr + len <= vma->vm_start))
return addr;
}
if (mm->get_unmapped_area == arch_get_unmapped_area)
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
else
return hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud)
pte = (pte_t *)pmd_alloc(mm, pud, addr);
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
pud = pud_offset(pgd, addr);
if (!pud_none(*pud))
pte = (pte_t *)pmd_offset(pud, addr);
}
return pte;
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
pte_t orig;
if (!pte_present(*ptep) && pte_present(entry))
mm->context.hugetlb_pte_count++;
addr &= HPAGE_MASK;
orig = *ptep;
*ptep = entry;
/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
maybe_tlb_batch_add(mm, addr, ptep, orig, 0);
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t entry;
entry = *ptep;
if (pte_present(entry))
mm->context.hugetlb_pte_count--;
addr &= HPAGE_MASK;
*ptep = __pte(0UL);
/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0);
return entry;
}
int pmd_huge(pmd_t pmd)
{
return !pmd_none(pmd) &&
(pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
}
int pud_huge(pud_t pud)
{
return 0;
}
static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr)
{
pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
pte_free_tlb(tlb, token, addr);
atomic_long_dec(&tlb->mm->nr_ptes);
}
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
pmd_t *pmd;
unsigned long next;
unsigned long start;
start = addr;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
if (pmd_none(*pmd))
continue;
if (is_hugetlb_pmd(*pmd))
pmd_clear(pmd);
else
hugetlb_free_pte_range(tlb, pmd, addr);
} while (pmd++, addr = next, addr != end);
start &= PUD_MASK;
if (start < floor)
return;
if (ceiling) {
ceiling &= PUD_MASK;
if (!ceiling)
return;
}
if (end - 1 > ceiling - 1)
return;
pmd = pmd_offset(pud, start);
pud_clear(pud);
pmd_free_tlb(tlb, pmd, start);
mm_dec_nr_pmds(tlb->mm);
}
static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
pud_t *pud;
unsigned long next;
unsigned long start;
start = addr;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
ceiling);
} while (pud++, addr = next, addr != end);
start &= PGDIR_MASK;
if (start < floor)
return;
if (ceiling) {
ceiling &= PGDIR_MASK;
if (!ceiling)
return;
}
if (end - 1 > ceiling - 1)
return;
pud = pud_offset(pgd, start);
pgd_clear(pgd);
pud_free_tlb(tlb, pud, start);
}
void hugetlb_free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
pgd_t *pgd;
unsigned long next;
pgd = pgd_offset(tlb->mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} while (pgd++, addr = next, addr != end);
}