linux_dsm_epyc7002/drivers/gpu/drm/ttm/ttm_bo_util.c

854 lines
20 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/**************************************************************************
*
* Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
*/
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/drm_vma_manager.h>
#include <linux/io.h>
#include <linux/highmem.h>
#include <linux/wait.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/dma-resv.h>
struct ttm_transfer_obj {
struct ttm_buffer_object base;
struct ttm_buffer_object *bo;
};
void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
{
ttm_bo_mem_put(bo, &bo->mem);
}
int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
struct ttm_tt *ttm = bo->ttm;
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
if (old_mem->mem_type != TTM_PL_SYSTEM) {
ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS)
pr_err("Failed to expire sync object before unbinding TTM\n");
return ret;
}
ttm_tt_unbind(ttm);
ttm_bo_free_old_node(bo);
ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM,
TTM_PL_MASK_MEM);
old_mem->mem_type = TTM_PL_SYSTEM;
}
ret = ttm_tt_set_placement_caching(ttm, new_mem->placement);
if (unlikely(ret != 0))
return ret;
if (new_mem->mem_type != TTM_PL_SYSTEM) {
ret = ttm_tt_bind(ttm, new_mem, ctx);
if (unlikely(ret != 0))
return ret;
}
*old_mem = *new_mem;
new_mem->mm_node = NULL;
return 0;
}
EXPORT_SYMBOL(ttm_bo_move_ttm);
int ttm_mem_io_lock(struct ttm_mem_type_manager *man, bool interruptible)
{
if (likely(man->io_reserve_fastpath))
return 0;
if (interruptible)
return mutex_lock_interruptible(&man->io_reserve_mutex);
mutex_lock(&man->io_reserve_mutex);
return 0;
}
void ttm_mem_io_unlock(struct ttm_mem_type_manager *man)
{
if (likely(man->io_reserve_fastpath))
return;
mutex_unlock(&man->io_reserve_mutex);
}
static int ttm_mem_io_evict(struct ttm_mem_type_manager *man)
{
struct ttm_buffer_object *bo;
if (!man->use_io_reserve_lru || list_empty(&man->io_reserve_lru))
return -EAGAIN;
bo = list_first_entry(&man->io_reserve_lru,
struct ttm_buffer_object,
io_reserve_lru);
list_del_init(&bo->io_reserve_lru);
ttm_bo_unmap_virtual_locked(bo);
return 0;
}
int ttm_mem_io_reserve(struct ttm_bo_device *bdev,
struct ttm_mem_reg *mem)
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
int ret = 0;
if (!bdev->driver->io_mem_reserve)
return 0;
if (likely(man->io_reserve_fastpath))
return bdev->driver->io_mem_reserve(bdev, mem);
if (bdev->driver->io_mem_reserve &&
mem->bus.io_reserved_count++ == 0) {
retry:
ret = bdev->driver->io_mem_reserve(bdev, mem);
if (ret == -EAGAIN) {
ret = ttm_mem_io_evict(man);
if (ret == 0)
goto retry;
}
}
return ret;
}
void ttm_mem_io_free(struct ttm_bo_device *bdev,
struct ttm_mem_reg *mem)
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
if (likely(man->io_reserve_fastpath))
return;
if (bdev->driver->io_mem_reserve &&
--mem->bus.io_reserved_count == 0 &&
bdev->driver->io_mem_free)
bdev->driver->io_mem_free(bdev, mem);
}
int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo)
{
struct ttm_mem_reg *mem = &bo->mem;
int ret;
if (!mem->bus.io_reserved_vm) {
struct ttm_mem_type_manager *man =
&bo->bdev->man[mem->mem_type];
ret = ttm_mem_io_reserve(bo->bdev, mem);
if (unlikely(ret != 0))
return ret;
mem->bus.io_reserved_vm = true;
if (man->use_io_reserve_lru)
list_add_tail(&bo->io_reserve_lru,
&man->io_reserve_lru);
}
return 0;
}
void ttm_mem_io_free_vm(struct ttm_buffer_object *bo)
{
struct ttm_mem_reg *mem = &bo->mem;
if (mem->bus.io_reserved_vm) {
mem->bus.io_reserved_vm = false;
list_del_init(&bo->io_reserve_lru);
ttm_mem_io_free(bo->bdev, mem);
}
}
static int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
void **virtual)
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
int ret;
void *addr;
*virtual = NULL;
(void) ttm_mem_io_lock(man, false);
ret = ttm_mem_io_reserve(bdev, mem);
ttm_mem_io_unlock(man);
if (ret || !mem->bus.is_iomem)
return ret;
if (mem->bus.addr) {
addr = mem->bus.addr;
} else {
if (mem->placement & TTM_PL_FLAG_WC)
addr = ioremap_wc(mem->bus.base + mem->bus.offset, mem->bus.size);
else
addr = ioremap(mem->bus.base + mem->bus.offset, mem->bus.size);
if (!addr) {
(void) ttm_mem_io_lock(man, false);
ttm_mem_io_free(bdev, mem);
ttm_mem_io_unlock(man);
return -ENOMEM;
}
}
*virtual = addr;
return 0;
}
static void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
void *virtual)
{
struct ttm_mem_type_manager *man;
man = &bdev->man[mem->mem_type];
if (virtual && mem->bus.addr == NULL)
iounmap(virtual);
(void) ttm_mem_io_lock(man, false);
ttm_mem_io_free(bdev, mem);
ttm_mem_io_unlock(man);
}
static int ttm_copy_io_page(void *dst, void *src, unsigned long page)
{
uint32_t *dstP =
(uint32_t *) ((unsigned long)dst + (page << PAGE_SHIFT));
uint32_t *srcP =
(uint32_t *) ((unsigned long)src + (page << PAGE_SHIFT));
int i;
for (i = 0; i < PAGE_SIZE / sizeof(uint32_t); ++i)
iowrite32(ioread32(srcP++), dstP++);
return 0;
}
#ifdef CONFIG_X86
#define __ttm_kmap_atomic_prot(__page, __prot) kmap_atomic_prot(__page, __prot)
#define __ttm_kunmap_atomic(__addr) kunmap_atomic(__addr)
#else
#define __ttm_kmap_atomic_prot(__page, __prot) vmap(&__page, 1, 0, __prot)
#define __ttm_kunmap_atomic(__addr) vunmap(__addr)
#endif
/**
* ttm_kmap_atomic_prot - Efficient kernel map of a single page with
* specified page protection.
*
* @page: The page to map.
* @prot: The page protection.
*
* This function maps a TTM page using the kmap_atomic api if available,
* otherwise falls back to vmap. The user must make sure that the
* specified page does not have an aliased mapping with a different caching
* policy unless the architecture explicitly allows it. Also mapping and
* unmapping using this api must be correctly nested. Unmapping should
* occur in the reverse order of mapping.
*/
void *ttm_kmap_atomic_prot(struct page *page, pgprot_t prot)
{
if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL))
return kmap_atomic(page);
else
return __ttm_kmap_atomic_prot(page, prot);
}
EXPORT_SYMBOL(ttm_kmap_atomic_prot);
/**
* ttm_kunmap_atomic_prot - Unmap a page that was mapped using
* ttm_kmap_atomic_prot.
*
* @addr: The virtual address from the map.
* @prot: The page protection.
*/
void ttm_kunmap_atomic_prot(void *addr, pgprot_t prot)
{
if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL))
kunmap_atomic(addr);
else
__ttm_kunmap_atomic(addr);
}
EXPORT_SYMBOL(ttm_kunmap_atomic_prot);
static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
unsigned long page,
pgprot_t prot)
{
struct page *d = ttm->pages[page];
void *dst;
if (!d)
return -ENOMEM;
src = (void *)((unsigned long)src + (page << PAGE_SHIFT));
dst = ttm_kmap_atomic_prot(d, prot);
if (!dst)
return -ENOMEM;
memcpy_fromio(dst, src, PAGE_SIZE);
ttm_kunmap_atomic_prot(dst, prot);
return 0;
}
static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
unsigned long page,
pgprot_t prot)
{
struct page *s = ttm->pages[page];
void *src;
if (!s)
return -ENOMEM;
dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT));
src = ttm_kmap_atomic_prot(s, prot);
if (!src)
return -ENOMEM;
memcpy_toio(dst, src, PAGE_SIZE);
ttm_kunmap_atomic_prot(src, prot);
return 0;
}
int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
struct ttm_tt *ttm = bo->ttm;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg old_copy = *old_mem;
void *old_iomap;
void *new_iomap;
int ret;
unsigned long i;
unsigned long page;
unsigned long add = 0;
int dir;
ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
if (ret)
return ret;
ret = ttm_mem_reg_ioremap(bdev, old_mem, &old_iomap);
if (ret)
return ret;
ret = ttm_mem_reg_ioremap(bdev, new_mem, &new_iomap);
if (ret)
goto out;
/*
* Single TTM move. NOP.
*/
if (old_iomap == NULL && new_iomap == NULL)
goto out2;
/*
* Don't move nonexistent data. Clear destination instead.
*/
if (old_iomap == NULL &&
(ttm == NULL || (ttm->state == tt_unpopulated &&
!(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)))) {
memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE);
goto out2;
}
/*
* TTM might be null for moves within the same region.
*/
if (ttm) {
ret = ttm_tt_populate(ttm, ctx);
if (ret)
goto out1;
}
add = 0;
dir = 1;
if ((old_mem->mem_type == new_mem->mem_type) &&
(new_mem->start < old_mem->start + old_mem->size)) {
dir = -1;
add = new_mem->num_pages - 1;
}
for (i = 0; i < new_mem->num_pages; ++i) {
page = i * dir + add;
if (old_iomap == NULL) {
pgprot_t prot = ttm_io_prot(old_mem->placement,
PAGE_KERNEL);
ret = ttm_copy_ttm_io_page(ttm, new_iomap, page,
prot);
} else if (new_iomap == NULL) {
pgprot_t prot = ttm_io_prot(new_mem->placement,
PAGE_KERNEL);
ret = ttm_copy_io_ttm_page(ttm, old_iomap, page,
prot);
} else {
ret = ttm_copy_io_page(new_iomap, old_iomap, page);
}
if (ret)
goto out1;
}
mb();
out2:
old_copy = *old_mem;
*old_mem = *new_mem;
new_mem->mm_node = NULL;
if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
ttm_tt_destroy(ttm);
bo->ttm = NULL;
}
out1:
ttm_mem_reg_iounmap(bdev, old_mem, new_iomap);
out:
ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap);
/*
* On error, keep the mm node!
*/
if (!ret)
ttm_bo_mem_put(bo, &old_copy);
return ret;
}
EXPORT_SYMBOL(ttm_bo_move_memcpy);
static void ttm_transfered_destroy(struct ttm_buffer_object *bo)
{
struct ttm_transfer_obj *fbo;
fbo = container_of(bo, struct ttm_transfer_obj, base);
ttm_bo_put(fbo->bo);
kfree(fbo);
}
/**
* ttm_buffer_object_transfer
*
* @bo: A pointer to a struct ttm_buffer_object.
* @new_obj: A pointer to a pointer to a newly created ttm_buffer_object,
* holding the data of @bo with the old placement.
*
* This is a utility function that may be called after an accelerated move
* has been scheduled. A new buffer object is created as a placeholder for
* the old data while it's being copied. When that buffer object is idle,
* it can be destroyed, releasing the space of the old placement.
* Returns:
* !0: Failure.
*/
static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
struct ttm_buffer_object **new_obj)
{
struct ttm_transfer_obj *fbo;
int ret;
fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
if (!fbo)
return -ENOMEM;
fbo->base = *bo;
fbo->base.mem.placement |= TTM_PL_FLAG_NO_EVICT;
ttm_bo_get(bo);
fbo->bo = bo;
/**
* Fix up members that we shouldn't copy directly:
* TODO: Explicit member copy would probably be better here.
*/
atomic_inc(&ttm_bo_glob.bo_count);
INIT_LIST_HEAD(&fbo->base.ddestroy);
INIT_LIST_HEAD(&fbo->base.lru);
INIT_LIST_HEAD(&fbo->base.swap);
INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
fbo->base.moving = NULL;
drm_vma_node_reset(&fbo->base.base.vma_node);
kref_init(&fbo->base.kref);
fbo->base.destroy = &ttm_transfered_destroy;
fbo->base.acc_size = 0;
if (bo->type != ttm_bo_type_sg)
fbo->base.base.resv = &fbo->base.base._resv;
dma_resv_init(&fbo->base.base._resv);
fbo->base.base.dev = NULL;
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
*new_obj = &fbo->base;
return 0;
}
pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
{
/* Cached mappings need no adjustment */
if (caching_flags & TTM_PL_FLAG_CACHED)
return tmp;
#if defined(__i386__) || defined(__x86_64__)
if (caching_flags & TTM_PL_FLAG_WC)
tmp = pgprot_writecombine(tmp);
else if (boot_cpu_data.x86 > 3)
tmp = pgprot_noncached(tmp);
#endif
#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
drm: Permit video-buffers writecombine mapping for MIPS Since commit 4b050ba7a66c ("MIPS: pgtable.h: Implement the pgprot_writecombine function for MIPS") and commit c4687b15a848 ("MIPS: Fix definition of pgprot_writecombine()") write-combine vma mapping is available to be used by kernel subsystems for MIPS. In particular the uncached accelerated attribute is requested to be set by ioremap_wc() method and by generic PCI memory pages/ranges mapping methods. The same is done by the drm_io_prot()/ttm_io_prot() functions in case if write-combine flag is set for vma's passed for mapping. But for some reason the pgprot_writecombine() method calling is ifdefed to be a platform-specific with MIPS system being marked as lacking of one. At the very least it doesn't reflect the current MIPS platform implementation. So in order to improve the DRM subsystem performance on MIPS with UCA mapping enabled, we need to have pgprot_writecombine() called for buffers, which need store operations being combined. In case if particular MIPS chip doesn't support the UCA attribute, the mapping will fall back to noncached. Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Burton <paul.burton@mips.com> Cc: James Hogan <jhogan@kernel.org> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Paul Burton <paul.burton@mips.com> Signed-off-by: Vadim V. Vlasov <vadim.vlasov@t-platforms.ru> Signed-off-by: Serge Semin <fancer.lancer@gmail.com> Signed-off-by: Sean Paul <seanpaul@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190423123122.32573-1-fancer.lancer@gmail.com
2019-04-23 19:31:22 +07:00
defined(__powerpc__) || defined(__mips__)
if (caching_flags & TTM_PL_FLAG_WC)
tmp = pgprot_writecombine(tmp);
else
tmp = pgprot_noncached(tmp);
#endif
drm: Permit video-buffers writecombine mapping for MIPS Since commit 4b050ba7a66c ("MIPS: pgtable.h: Implement the pgprot_writecombine function for MIPS") and commit c4687b15a848 ("MIPS: Fix definition of pgprot_writecombine()") write-combine vma mapping is available to be used by kernel subsystems for MIPS. In particular the uncached accelerated attribute is requested to be set by ioremap_wc() method and by generic PCI memory pages/ranges mapping methods. The same is done by the drm_io_prot()/ttm_io_prot() functions in case if write-combine flag is set for vma's passed for mapping. But for some reason the pgprot_writecombine() method calling is ifdefed to be a platform-specific with MIPS system being marked as lacking of one. At the very least it doesn't reflect the current MIPS platform implementation. So in order to improve the DRM subsystem performance on MIPS with UCA mapping enabled, we need to have pgprot_writecombine() called for buffers, which need store operations being combined. In case if particular MIPS chip doesn't support the UCA attribute, the mapping will fall back to noncached. Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Burton <paul.burton@mips.com> Cc: James Hogan <jhogan@kernel.org> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Paul Burton <paul.burton@mips.com> Signed-off-by: Vadim V. Vlasov <vadim.vlasov@t-platforms.ru> Signed-off-by: Serge Semin <fancer.lancer@gmail.com> Signed-off-by: Sean Paul <seanpaul@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20190423123122.32573-1-fancer.lancer@gmail.com
2019-04-23 19:31:22 +07:00
#if defined(__sparc__)
tmp = pgprot_noncached(tmp);
#endif
return tmp;
}
EXPORT_SYMBOL(ttm_io_prot);
static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
unsigned long offset,
unsigned long size,
struct ttm_bo_kmap_obj *map)
{
struct ttm_mem_reg *mem = &bo->mem;
if (bo->mem.bus.addr) {
map->bo_kmap_type = ttm_bo_map_premapped;
map->virtual = (void *)(((u8 *)bo->mem.bus.addr) + offset);
} else {
map->bo_kmap_type = ttm_bo_map_iomap;
if (mem->placement & TTM_PL_FLAG_WC)
map->virtual = ioremap_wc(bo->mem.bus.base + bo->mem.bus.offset + offset,
size);
else
map->virtual = ioremap(bo->mem.bus.base + bo->mem.bus.offset + offset,
size);
}
return (!map->virtual) ? -ENOMEM : 0;
}
static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
unsigned long start_page,
unsigned long num_pages,
struct ttm_bo_kmap_obj *map)
{
struct ttm_mem_reg *mem = &bo->mem;
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false
};
struct ttm_tt *ttm = bo->ttm;
pgprot_t prot;
int ret;
BUG_ON(!ttm);
ret = ttm_tt_populate(ttm, &ctx);
if (ret)
return ret;
if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) {
/*
* We're mapping a single page, and the desired
* page protection is consistent with the bo.
*/
map->bo_kmap_type = ttm_bo_map_kmap;
map->page = ttm->pages[start_page];
map->virtual = kmap(map->page);
} else {
/*
* We need to use vmap to get the desired page protection
* or to make the buffer object look contiguous.
*/
prot = ttm_io_prot(mem->placement, PAGE_KERNEL);
map->bo_kmap_type = ttm_bo_map_vmap;
map->virtual = vmap(ttm->pages + start_page, num_pages,
0, prot);
}
return (!map->virtual) ? -ENOMEM : 0;
}
int ttm_bo_kmap(struct ttm_buffer_object *bo,
unsigned long start_page, unsigned long num_pages,
struct ttm_bo_kmap_obj *map)
{
struct ttm_mem_type_manager *man =
&bo->bdev->man[bo->mem.mem_type];
unsigned long offset, size;
int ret;
map->virtual = NULL;
map->bo = bo;
if (num_pages > bo->num_pages)
return -EINVAL;
if (start_page > bo->num_pages)
return -EINVAL;
(void) ttm_mem_io_lock(man, false);
ret = ttm_mem_io_reserve(bo->bdev, &bo->mem);
ttm_mem_io_unlock(man);
if (ret)
return ret;
if (!bo->mem.bus.is_iomem) {
return ttm_bo_kmap_ttm(bo, start_page, num_pages, map);
} else {
offset = start_page << PAGE_SHIFT;
size = num_pages << PAGE_SHIFT;
return ttm_bo_ioremap(bo, offset, size, map);
}
}
EXPORT_SYMBOL(ttm_bo_kmap);
void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
{
struct ttm_buffer_object *bo = map->bo;
struct ttm_mem_type_manager *man =
&bo->bdev->man[bo->mem.mem_type];
if (!map->virtual)
return;
switch (map->bo_kmap_type) {
case ttm_bo_map_iomap:
iounmap(map->virtual);
break;
case ttm_bo_map_vmap:
vunmap(map->virtual);
break;
case ttm_bo_map_kmap:
kunmap(map->page);
break;
case ttm_bo_map_premapped:
break;
default:
BUG();
}
(void) ttm_mem_io_lock(man, false);
ttm_mem_io_free(map->bo->bdev, &map->bo->mem);
ttm_mem_io_unlock(man);
map->virtual = NULL;
map->page = NULL;
}
EXPORT_SYMBOL(ttm_bo_kunmap);
int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
dma-buf: Rename struct fence to dma_fence I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
2016-10-25 19:00:45 +07:00
struct dma_fence *fence,
bool evict,
struct ttm_mem_reg *new_mem)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
struct ttm_buffer_object *ghost_obj;
dma_resv_add_excl_fence(bo->base.resv, fence);
if (evict) {
ret = ttm_bo_wait(bo, false, false);
if (ret)
return ret;
if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
ttm_tt_destroy(bo->ttm);
bo->ttm = NULL;
}
ttm_bo_free_old_node(bo);
} else {
/**
* This should help pipeline ordinary buffer moves.
*
* Hang old buffer memory on a new buffer object,
* and leave it to be released when the GPU
* operation has completed.
*/
dma-buf: Rename struct fence to dma_fence I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
2016-10-25 19:00:45 +07:00
dma_fence_put(bo->moving);
bo->moving = dma_fence_get(fence);
ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
return ret;
dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
/**
* If we're not moving to fixed memory, the TTM object
* needs to stay alive. Otherwhise hang it on the ghost
* bo to be unbound and destroyed.
*/
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED))
ghost_obj->ttm = NULL;
else
bo->ttm = NULL;
dma_resv_unlock(&ghost_obj->base._resv);
ttm_bo_put(ghost_obj);
}
*old_mem = *new_mem;
new_mem->mm_node = NULL;
return 0;
}
EXPORT_SYMBOL(ttm_bo_move_accel_cleanup);
int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
dma-buf: Rename struct fence to dma_fence I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
2016-10-25 19:00:45 +07:00
struct dma_fence *fence, bool evict,
struct ttm_mem_reg *new_mem)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_type_manager *from = &bdev->man[old_mem->mem_type];
struct ttm_mem_type_manager *to = &bdev->man[new_mem->mem_type];
int ret;
dma_resv_add_excl_fence(bo->base.resv, fence);
if (!evict) {
struct ttm_buffer_object *ghost_obj;
/**
* This should help pipeline ordinary buffer moves.
*
* Hang old buffer memory on a new buffer object,
* and leave it to be released when the GPU
* operation has completed.
*/
dma-buf: Rename struct fence to dma_fence I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
2016-10-25 19:00:45 +07:00
dma_fence_put(bo->moving);
bo->moving = dma_fence_get(fence);
ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
return ret;
dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
/**
* If we're not moving to fixed memory, the TTM object
* needs to stay alive. Otherwhise hang it on the ghost
* bo to be unbound and destroyed.
*/
if (!(to->flags & TTM_MEMTYPE_FLAG_FIXED))
ghost_obj->ttm = NULL;
else
bo->ttm = NULL;
dma_resv_unlock(&ghost_obj->base._resv);
ttm_bo_put(ghost_obj);
} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
/**
* BO doesn't have a TTM we need to bind/unbind. Just remember
* this eviction and free up the allocation
*/
spin_lock(&from->move_lock);
dma-buf: Rename struct fence to dma_fence I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
2016-10-25 19:00:45 +07:00
if (!from->move || dma_fence_is_later(fence, from->move)) {
dma_fence_put(from->move);
from->move = dma_fence_get(fence);
}
spin_unlock(&from->move_lock);
ttm_bo_free_old_node(bo);
dma-buf: Rename struct fence to dma_fence I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk> Acked-by: Sumit Semwal <sumit.semwal@linaro.org> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk
2016-10-25 19:00:45 +07:00
dma_fence_put(bo->moving);
bo->moving = dma_fence_get(fence);
} else {
/**
* Last resort, wait for the move to be completed.
*
* Should never happen in pratice.
*/
ret = ttm_bo_wait(bo, false, false);
if (ret)
return ret;
if (to->flags & TTM_MEMTYPE_FLAG_FIXED) {
ttm_tt_destroy(bo->ttm);
bo->ttm = NULL;
}
ttm_bo_free_old_node(bo);
}
*old_mem = *new_mem;
new_mem->mm_node = NULL;
return 0;
}
EXPORT_SYMBOL(ttm_bo_pipeline_move);
int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
{
struct ttm_buffer_object *ghost;
int ret;
ret = ttm_buffer_object_transfer(bo, &ghost);
if (ret)
return ret;
ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv);
/* Last resort, wait for the BO to be idle when we are OOM */
if (ret)
ttm_bo_wait(bo, false, false);
memset(&bo->mem, 0, sizeof(bo->mem));
bo->mem.mem_type = TTM_PL_SYSTEM;
bo->ttm = NULL;
dma_resv_unlock(&ghost->base._resv);
ttm_bo_put(ghost);
return 0;
}