mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
15b244a88e
We are adding support for DMA memory pre-registration to be used in conjunction with VFIO. The idea is that the userspace which is going to run a guest may want to pre-register a user space memory region so it all gets pinned once and never goes away. Having this done, a hypervisor will not have to pin/unpin pages on every DMA map/unmap request. This is going to help with multiple pinning of the same memory. Another use of it is in-kernel real mode (mmu off) acceleration of DMA requests where real time translation of guest physical to host physical addresses is non-trivial and may fail as linux ptes may be temporarily invalid. Also, having cached host physical addresses (compared to just pinning at the start and then walking the page table again on every H_PUT_TCE), we can be sure that the addresses which we put into TCE table are the ones we already pinned. This adds a list of memory regions to mm_context_t. Each region consists of a header and a list of physical addresses. This adds API to: 1. register/unregister memory regions; 2. do final cleanup (which puts all pre-registered pages); 3. do userspace to physical address translation; 4. manage usage counters; multiple registration of the same memory is allowed (once per container). This implements 2 counters per registered memory region: - @mapped: incremented on every DMA mapping; decremented on unmapping; initialized to 1 when a region is just registered; once it becomes zero, no more mappings allowe; - @used: incremented on every "register" ioctl; decremented on "unregister"; unregistration is allowed for DMA mapped regions unless it is the very last reference. For the very last reference this checks that the region is still mapped and returns -EBUSY so the userspace gets to know that memory is still pinned and unregistration needs to be retried; @used remains 1. Host physical addresses are stored in vmalloc'ed array. In order to access these in the real mode (mmu off), there is a real_vmalloc_addr() helper. In-kernel acceleration patchset will move it from KVM to MMU code. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
153 lines
3.4 KiB
C
153 lines
3.4 KiB
C
/*
|
|
* MMU context allocation for 64-bit kernels.
|
|
*
|
|
* Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/export.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/pgalloc.h>
|
|
|
|
#include "icswx.h"
|
|
|
|
static DEFINE_SPINLOCK(mmu_context_lock);
|
|
static DEFINE_IDA(mmu_context_ida);
|
|
|
|
int __init_new_context(void)
|
|
{
|
|
int index;
|
|
int err;
|
|
|
|
again:
|
|
if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
|
|
return -ENOMEM;
|
|
|
|
spin_lock(&mmu_context_lock);
|
|
err = ida_get_new_above(&mmu_context_ida, 1, &index);
|
|
spin_unlock(&mmu_context_lock);
|
|
|
|
if (err == -EAGAIN)
|
|
goto again;
|
|
else if (err)
|
|
return err;
|
|
|
|
if (index > MAX_USER_CONTEXT) {
|
|
spin_lock(&mmu_context_lock);
|
|
ida_remove(&mmu_context_ida, index);
|
|
spin_unlock(&mmu_context_lock);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return index;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__init_new_context);
|
|
|
|
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
int index;
|
|
|
|
index = __init_new_context();
|
|
if (index < 0)
|
|
return index;
|
|
|
|
/* The old code would re-promote on fork, we don't do that
|
|
* when using slices as it could cause problem promoting slices
|
|
* that have been forced down to 4K
|
|
*/
|
|
if (slice_mm_new_context(mm))
|
|
slice_set_user_psize(mm, mmu_virtual_psize);
|
|
subpage_prot_init_new_context(mm);
|
|
mm->context.id = index;
|
|
#ifdef CONFIG_PPC_ICSWX
|
|
mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
|
|
if (!mm->context.cop_lockp) {
|
|
__destroy_context(index);
|
|
subpage_prot_free(mm);
|
|
mm->context.id = MMU_NO_CONTEXT;
|
|
return -ENOMEM;
|
|
}
|
|
spin_lock_init(mm->context.cop_lockp);
|
|
#endif /* CONFIG_PPC_ICSWX */
|
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
mm->context.pte_frag = NULL;
|
|
#endif
|
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
|
mm_iommu_init(&mm->context);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
void __destroy_context(int context_id)
|
|
{
|
|
spin_lock(&mmu_context_lock);
|
|
ida_remove(&mmu_context_ida, context_id);
|
|
spin_unlock(&mmu_context_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__destroy_context);
|
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
static void destroy_pagetable_page(struct mm_struct *mm)
|
|
{
|
|
int count;
|
|
void *pte_frag;
|
|
struct page *page;
|
|
|
|
pte_frag = mm->context.pte_frag;
|
|
if (!pte_frag)
|
|
return;
|
|
|
|
page = virt_to_page(pte_frag);
|
|
/* drop all the pending references */
|
|
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
|
|
/* We allow PTE_FRAG_NR fragments from a PTE page */
|
|
count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
|
|
if (!count) {
|
|
pgtable_page_dtor(page);
|
|
free_hot_cold_page(page, 0);
|
|
}
|
|
}
|
|
|
|
#else
|
|
static inline void destroy_pagetable_page(struct mm_struct *mm)
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
|
|
void destroy_context(struct mm_struct *mm)
|
|
{
|
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
|
mm_iommu_cleanup(&mm->context);
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_ICSWX
|
|
drop_cop(mm->context.acop, mm);
|
|
kfree(mm->context.cop_lockp);
|
|
mm->context.cop_lockp = NULL;
|
|
#endif /* CONFIG_PPC_ICSWX */
|
|
|
|
destroy_pagetable_page(mm);
|
|
__destroy_context(mm->context.id);
|
|
subpage_prot_free(mm);
|
|
mm->context.id = MMU_NO_CONTEXT;
|
|
}
|