mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-24 20:31:34 +07:00
301 lines
9.2 KiB
C
301 lines
9.2 KiB
C
|
// SPDX-License-Identifier: GPL-2.0
|
||
|
#include <linux/mm.h>
|
||
|
#include <linux/mm_types.h>
|
||
|
#include <linux/hugetlb.h>
|
||
|
#include <linux/bitops.h>
|
||
|
#include <linux/mmu_notifier.h>
|
||
|
#include <asm/cacheflush.h>
|
||
|
#include <asm/tlbflush.h>
|
||
|
|
||
|
/**
|
||
|
* struct apply_as - Closure structure for apply_as_range
|
||
|
* @base: struct pfn_range_apply we derive from
|
||
|
* @start: Address of first modified pte
|
||
|
* @end: Address of last modified pte + 1
|
||
|
* @total: Total number of modified ptes
|
||
|
* @vma: Pointer to the struct vm_area_struct we're currently operating on
|
||
|
*/
|
||
|
struct apply_as {
|
||
|
struct pfn_range_apply base;
|
||
|
unsigned long start;
|
||
|
unsigned long end;
|
||
|
unsigned long total;
|
||
|
struct vm_area_struct *vma;
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* apply_pt_wrprotect - Leaf pte callback to write-protect a pte
|
||
|
* @pte: Pointer to the pte
|
||
|
* @token: Page table token, see apply_to_pfn_range()
|
||
|
* @addr: The virtual page address
|
||
|
* @closure: Pointer to a struct pfn_range_apply embedded in a
|
||
|
* struct apply_as
|
||
|
*
|
||
|
* The function write-protects a pte and records the range in
|
||
|
* virtual address space of touched ptes for efficient range TLB flushes.
|
||
|
*
|
||
|
* Return: Always zero.
|
||
|
*/
|
||
|
static int apply_pt_wrprotect(pte_t *pte, pgtable_t token,
|
||
|
unsigned long addr,
|
||
|
struct pfn_range_apply *closure)
|
||
|
{
|
||
|
struct apply_as *aas = container_of(closure, typeof(*aas), base);
|
||
|
pte_t ptent = *pte;
|
||
|
|
||
|
if (pte_write(ptent)) {
|
||
|
pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
|
||
|
|
||
|
ptent = pte_wrprotect(old_pte);
|
||
|
ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
|
||
|
aas->total++;
|
||
|
aas->start = min(aas->start, addr);
|
||
|
aas->end = max(aas->end, addr + PAGE_SIZE);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* struct apply_as_clean - Closure structure for apply_as_clean
|
||
|
* @base: struct apply_as we derive from
|
||
|
* @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
|
||
|
* @bitmap: Bitmap with one bit for each page offset in the address_space range
|
||
|
* covered.
|
||
|
* @start: Address_space page offset of first modified pte relative
|
||
|
* to @bitmap_pgoff
|
||
|
* @end: Address_space page offset of last modified pte relative
|
||
|
* to @bitmap_pgoff
|
||
|
*/
|
||
|
struct apply_as_clean {
|
||
|
struct apply_as base;
|
||
|
pgoff_t bitmap_pgoff;
|
||
|
unsigned long *bitmap;
|
||
|
pgoff_t start;
|
||
|
pgoff_t end;
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* apply_pt_clean - Leaf pte callback to clean a pte
|
||
|
* @pte: Pointer to the pte
|
||
|
* @token: Page table token, see apply_to_pfn_range()
|
||
|
* @addr: The virtual page address
|
||
|
* @closure: Pointer to a struct pfn_range_apply embedded in a
|
||
|
* struct apply_as_clean
|
||
|
*
|
||
|
* The function cleans a pte and records the range in
|
||
|
* virtual address space of touched ptes for efficient TLB flushes.
|
||
|
* It also records dirty ptes in a bitmap representing page offsets
|
||
|
* in the address_space, as well as the first and last of the bits
|
||
|
* touched.
|
||
|
*
|
||
|
* Return: Always zero.
|
||
|
*/
|
||
|
static int apply_pt_clean(pte_t *pte, pgtable_t token,
|
||
|
unsigned long addr,
|
||
|
struct pfn_range_apply *closure)
|
||
|
{
|
||
|
struct apply_as *aas = container_of(closure, typeof(*aas), base);
|
||
|
struct apply_as_clean *clean = container_of(aas, typeof(*clean), base);
|
||
|
pte_t ptent = *pte;
|
||
|
|
||
|
if (pte_dirty(ptent)) {
|
||
|
pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) +
|
||
|
aas->vma->vm_pgoff - clean->bitmap_pgoff;
|
||
|
pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
|
||
|
|
||
|
ptent = pte_mkclean(old_pte);
|
||
|
ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
|
||
|
|
||
|
aas->total++;
|
||
|
aas->start = min(aas->start, addr);
|
||
|
aas->end = max(aas->end, addr + PAGE_SIZE);
|
||
|
|
||
|
__set_bit(pgoff, clean->bitmap);
|
||
|
clean->start = min(clean->start, pgoff);
|
||
|
clean->end = max(clean->end, pgoff + 1);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* apply_as_range - Apply a pte callback to all PTEs pointing into a range
|
||
|
* of an address_space.
|
||
|
* @mapping: Pointer to the struct address_space
|
||
|
* @aas: Closure structure
|
||
|
* @first_index: First page offset in the address_space
|
||
|
* @nr: Number of incremental page offsets to cover
|
||
|
*
|
||
|
* Return: Number of ptes touched. Note that this number might be larger
|
||
|
* than @nr if there are overlapping vmas
|
||
|
*/
|
||
|
static unsigned long apply_as_range(struct address_space *mapping,
|
||
|
struct apply_as *aas,
|
||
|
pgoff_t first_index, pgoff_t nr)
|
||
|
{
|
||
|
struct vm_area_struct *vma;
|
||
|
pgoff_t vba, vea, cba, cea;
|
||
|
unsigned long start_addr, end_addr;
|
||
|
struct mmu_notifier_range range;
|
||
|
|
||
|
i_mmap_lock_read(mapping);
|
||
|
vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
|
||
|
first_index + nr - 1) {
|
||
|
unsigned long vm_flags = READ_ONCE(vma->vm_flags);
|
||
|
|
||
|
/*
|
||
|
* We can only do advisory flag tests below, since we can't
|
||
|
* require the vm's mmap_sem to be held to protect the flags.
|
||
|
* Therefore, callers that strictly depend on specific mmap
|
||
|
* flags to remain constant throughout the operation must
|
||
|
* either ensure those flags are immutable for all relevant
|
||
|
* vmas or can't use this function. Fixing this properly would
|
||
|
* require the vma::vm_flags to be protected by a separate
|
||
|
* lock taken after the i_mmap_lock
|
||
|
*/
|
||
|
|
||
|
/* Skip non-applicable VMAs */
|
||
|
if ((vm_flags & (VM_SHARED | VM_WRITE)) !=
|
||
|
(VM_SHARED | VM_WRITE))
|
||
|
continue;
|
||
|
|
||
|
/* Warn on and skip VMAs whose flags indicate illegal usage */
|
||
|
if (WARN_ON((vm_flags & (VM_HUGETLB | VM_IO)) != VM_IO))
|
||
|
continue;
|
||
|
|
||
|
/* Clip to the vma */
|
||
|
vba = vma->vm_pgoff;
|
||
|
vea = vba + vma_pages(vma);
|
||
|
cba = first_index;
|
||
|
cba = max(cba, vba);
|
||
|
cea = first_index + nr;
|
||
|
cea = min(cea, vea);
|
||
|
|
||
|
/* Translate to virtual address */
|
||
|
start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
|
||
|
end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
|
||
|
if (start_addr >= end_addr)
|
||
|
continue;
|
||
|
|
||
|
aas->base.mm = vma->vm_mm;
|
||
|
aas->vma = vma;
|
||
|
aas->start = end_addr;
|
||
|
aas->end = start_addr;
|
||
|
|
||
|
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0,
|
||
|
vma, vma->vm_mm, start_addr, end_addr);
|
||
|
mmu_notifier_invalidate_range_start(&range);
|
||
|
|
||
|
/* Needed when we only change protection? */
|
||
|
flush_cache_range(vma, start_addr, end_addr);
|
||
|
|
||
|
/*
|
||
|
* We're not using tlb_gather_mmu() since typically
|
||
|
* only a small subrange of PTEs are affected.
|
||
|
*/
|
||
|
inc_tlb_flush_pending(vma->vm_mm);
|
||
|
|
||
|
/* Should not error since aas->base.alloc == 0 */
|
||
|
WARN_ON(apply_to_pfn_range(&aas->base, start_addr,
|
||
|
end_addr - start_addr));
|
||
|
if (aas->end > aas->start)
|
||
|
flush_tlb_range(vma, aas->start, aas->end);
|
||
|
|
||
|
mmu_notifier_invalidate_range_end(&range);
|
||
|
dec_tlb_flush_pending(vma->vm_mm);
|
||
|
}
|
||
|
i_mmap_unlock_read(mapping);
|
||
|
|
||
|
return aas->total;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* apply_as_wrprotect - Write-protect all ptes in an address_space range
|
||
|
* @mapping: The address_space we want to write protect
|
||
|
* @first_index: The first page offset in the range
|
||
|
* @nr: Number of incremental page offsets to cover
|
||
|
*
|
||
|
* WARNING: This function should only be used for address spaces whose
|
||
|
* vmas are marked VM_IO and that do not contain huge pages.
|
||
|
* To avoid interference with COW'd pages, vmas not marked VM_SHARED are
|
||
|
* simply skipped.
|
||
|
*
|
||
|
* Return: The number of ptes actually write-protected. Note that
|
||
|
* already write-protected ptes are not counted.
|
||
|
*/
|
||
|
unsigned long apply_as_wrprotect(struct address_space *mapping,
|
||
|
pgoff_t first_index, pgoff_t nr)
|
||
|
{
|
||
|
struct apply_as aas = {
|
||
|
.base = {
|
||
|
.alloc = 0,
|
||
|
.ptefn = apply_pt_wrprotect,
|
||
|
},
|
||
|
.total = 0,
|
||
|
};
|
||
|
|
||
|
return apply_as_range(mapping, &aas, first_index, nr);
|
||
|
}
|
||
|
EXPORT_SYMBOL_GPL(apply_as_wrprotect);
|
||
|
|
||
|
/**
|
||
|
* apply_as_clean - Clean all ptes in an address_space range
|
||
|
* @mapping: The address_space we want to clean
|
||
|
* @first_index: The first page offset in the range
|
||
|
* @nr: Number of incremental page offsets to cover
|
||
|
* @bitmap_pgoff: The page offset of the first bit in @bitmap
|
||
|
* @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
|
||
|
* cover the whole range @first_index..@first_index + @nr.
|
||
|
* @start: Pointer to number of the first set bit in @bitmap.
|
||
|
* is modified as new bits are set by the function.
|
||
|
* @end: Pointer to the number of the last set bit in @bitmap.
|
||
|
* none set. The value is modified as new bits are set by the function.
|
||
|
*
|
||
|
* Note: When this function returns there is no guarantee that a CPU has
|
||
|
* not already dirtied new ptes. However it will not clean any ptes not
|
||
|
* reported in the bitmap.
|
||
|
*
|
||
|
* If a caller needs to make sure all dirty ptes are picked up and none
|
||
|
* additional are added, it first needs to write-protect the address-space
|
||
|
* range and make sure new writers are blocked in page_mkwrite() or
|
||
|
* pfn_mkwrite(). And then after a TLB flush following the write-protection
|
||
|
* pick up all dirty bits.
|
||
|
*
|
||
|
* WARNING: This function should only be used for address spaces whose
|
||
|
* vmas are marked VM_IO and that do not contain huge pages.
|
||
|
* To avoid interference with COW'd pages, vmas not marked VM_SHARED are
|
||
|
* simply skipped.
|
||
|
*
|
||
|
* Return: The number of dirty ptes actually cleaned.
|
||
|
*/
|
||
|
unsigned long apply_as_clean(struct address_space *mapping,
|
||
|
pgoff_t first_index, pgoff_t nr,
|
||
|
pgoff_t bitmap_pgoff,
|
||
|
unsigned long *bitmap,
|
||
|
pgoff_t *start,
|
||
|
pgoff_t *end)
|
||
|
{
|
||
|
bool none_set = (*start >= *end);
|
||
|
struct apply_as_clean clean = {
|
||
|
.base = {
|
||
|
.base = {
|
||
|
.alloc = 0,
|
||
|
.ptefn = apply_pt_clean,
|
||
|
},
|
||
|
.total = 0,
|
||
|
},
|
||
|
.bitmap_pgoff = bitmap_pgoff,
|
||
|
.bitmap = bitmap,
|
||
|
.start = none_set ? nr : *start,
|
||
|
.end = none_set ? 0 : *end,
|
||
|
};
|
||
|
unsigned long ret = apply_as_range(mapping, &clean.base, first_index,
|
||
|
nr);
|
||
|
|
||
|
*start = clean.start;
|
||
|
*end = clean.end;
|
||
|
return ret;
|
||
|
}
|
||
|
EXPORT_SYMBOL_GPL(apply_as_clean);
|