diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 83588e9840f8..a7850bbffbe0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -90,6 +90,7 @@ obj-y += gem/ gem-y += \ gem/i915_gem_busy.o \ gem/i915_gem_clflush.o \ + gem/i915_gem_client_blt.o \ gem/i915_gem_context.o \ gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ @@ -97,6 +98,7 @@ gem-y += \ gem/i915_gem_fence.o \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ + gem/i915_gem_object_blt.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c new file mode 100644 index 000000000000..4899ca1dd76c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ +#include "i915_gem_client_blt.h" + +#include "i915_gem_object_blt.h" +#include "intel_drv.h" + +struct i915_sleeve { + struct i915_vma *vma; + struct drm_i915_gem_object *obj; + struct sg_table *pages; + struct i915_page_sizes page_sizes; +}; + +static int vma_set_pages(struct i915_vma *vma) +{ + struct i915_sleeve *sleeve = vma->private; + + vma->pages = sleeve->pages; + vma->page_sizes = sleeve->page_sizes; + + return 0; +} + +static void vma_clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + vma->pages = NULL; +} + +static int vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + return vma->vm->vma_ops.bind_vma(vma, cache_level, flags); +} + +static void vma_unbind(struct i915_vma *vma) +{ + vma->vm->vma_ops.unbind_vma(vma); +} + +static const struct i915_vma_ops proxy_vma_ops = { + .set_pages = vma_set_pages, + .clear_pages = vma_clear_pages, + .bind_vma = vma_bind, + .unbind_vma = vma_unbind, +}; + +static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, + struct drm_i915_gem_object *obj, + struct sg_table *pages, + struct i915_page_sizes *page_sizes) +{ + struct i915_sleeve *sleeve; + struct i915_vma *vma; + int err; + + sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL); + if (!sleeve) + return ERR_PTR(-ENOMEM); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_free; + } + + vma->private = sleeve; + vma->ops = &proxy_vma_ops; + + sleeve->vma = vma; + sleeve->obj = i915_gem_object_get(obj); + sleeve->pages = pages; + sleeve->page_sizes = *page_sizes; + + return sleeve; + +err_free: + kfree(sleeve); + return ERR_PTR(err); +} + +static void destroy_sleeve(struct i915_sleeve *sleeve) +{ + i915_gem_object_put(sleeve->obj); + kfree(sleeve); +} + +struct clear_pages_work { + struct dma_fence dma; + struct dma_fence_cb cb; + struct i915_sw_fence wait; + struct work_struct work; + struct irq_work irq_work; + struct i915_sleeve *sleeve; + struct intel_context *ce; + u32 value; +}; + +static const char *clear_pages_work_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *clear_pages_work_timeline_name(struct dma_fence *fence) +{ + return "clear"; +} + +static void clear_pages_work_release(struct dma_fence *fence) +{ + struct clear_pages_work *w = container_of(fence, typeof(*w), dma); + + destroy_sleeve(w->sleeve); + + i915_sw_fence_fini(&w->wait); + + BUILD_BUG_ON(offsetof(typeof(*w), dma)); + dma_fence_free(&w->dma); +} + +static const struct dma_fence_ops clear_pages_work_ops = { + .get_driver_name = clear_pages_work_driver_name, + .get_timeline_name = clear_pages_work_timeline_name, + .release = clear_pages_work_release, +}; + +static void clear_pages_signal_irq_worker(struct irq_work *work) +{ + struct clear_pages_work *w = container_of(work, typeof(*w), irq_work); + + dma_fence_signal(&w->dma); + dma_fence_put(&w->dma); +} + +static void clear_pages_dma_fence_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct clear_pages_work *w = container_of(cb, typeof(*w), cb); + + if (fence->error) + dma_fence_set_error(&w->dma, fence->error); + + /* + * Push the signalling of the fence into yet another worker to avoid + * the nightmare locking around the fence spinlock. + */ + irq_work_queue(&w->irq_work); +} + +static void clear_pages_worker(struct work_struct *work) +{ + struct clear_pages_work *w = container_of(work, typeof(*w), work); + struct drm_i915_private *i915 = w->ce->gem_context->i915; + struct drm_i915_gem_object *obj = w->sleeve->obj; + struct i915_vma *vma = w->sleeve->vma; + struct i915_request *rq; + int err = w->dma.error; + + if (unlikely(err)) + goto out_signal; + + if (obj->cache_dirty) { + obj->write_domain = 0; + if (i915_gem_object_has_struct_page(obj)) + drm_clflush_sg(w->sleeve->pages); + obj->cache_dirty = false; + } + + /* XXX: we need to kill this */ + mutex_lock(&i915->drm.struct_mutex); + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_unlock; + + rq = i915_request_create(w->ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + /* There's no way the fence has signalled */ + if (dma_fence_add_callback(&rq->fence, &w->cb, + clear_pages_dma_fence_cb)) + GEM_BUG_ON(1); + + if (w->ce->engine->emit_init_breadcrumb) { + err = w->ce->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_request; + } + + /* XXX: more feverish nightmares await */ + i915_vma_lock(vma); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (err) + goto out_request; + + err = intel_emit_vma_fill_blt(rq, vma, w->value); +out_request: + if (unlikely(err)) { + i915_request_skip(rq, err); + err = 0; + } + + i915_request_add(rq); +out_unpin: + i915_vma_unpin(vma); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); +out_signal: + if (unlikely(err)) { + dma_fence_set_error(&w->dma, err); + dma_fence_signal(&w->dma); + dma_fence_put(&w->dma); + } +} + +static int __i915_sw_fence_call +clear_pages_work_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clear_pages_work *w = container_of(fence, typeof(*w), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&w->work); + break; + + case FENCE_FREE: + dma_fence_put(&w->dma); + break; + } + + return NOTIFY_DONE; +} + +static DEFINE_SPINLOCK(fence_lock); + +/* XXX: better name please */ +int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, + struct intel_context *ce, + struct sg_table *pages, + struct i915_page_sizes *page_sizes, + u32 value) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_context *ctx = ce->gem_context; + struct i915_address_space *vm; + struct clear_pages_work *work; + struct i915_sleeve *sleeve; + int err; + + vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + + sleeve = create_sleeve(vm, obj, pages, page_sizes); + if (IS_ERR(sleeve)) + return PTR_ERR(sleeve); + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + destroy_sleeve(sleeve); + return -ENOMEM; + } + + work->value = value; + work->sleeve = sleeve; + work->ce = ce; + + INIT_WORK(&work->work, clear_pages_worker); + + init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); + + dma_fence_init(&work->dma, + &clear_pages_work_ops, + &fence_lock, + i915->mm.unordered_timeline, + 0); + i915_sw_fence_init(&work->wait, clear_pages_work_notify); + + i915_gem_object_lock(obj); + err = i915_sw_fence_await_reservation(&work->wait, + obj->resv, NULL, + true, I915_FENCE_TIMEOUT, + I915_FENCE_GFP); + if (err < 0) { + dma_fence_set_error(&work->dma, err); + } else { + reservation_object_add_excl_fence(obj->resv, &work->dma); + err = 0; + } + i915_gem_object_unlock(obj); + + dma_fence_get(&work->dma); + i915_sw_fence_commit(&work->wait); + + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_client_blt.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h new file mode 100644 index 000000000000..3dbd28c22ff5 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ +#ifndef __I915_GEM_CLIENT_BLT_H__ +#define __I915_GEM_CLIENT_BLT_H__ + +#include + +struct drm_i915_gem_object; +struct i915_page_sizes; +struct intel_context; +struct sg_table; + +int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, + struct intel_context *ce, + struct sg_table *pages, + struct i915_page_sizes *page_sizes, + u32 value); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c new file mode 100644 index 000000000000..fc8ee7ef3d69 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_gem_object_blt.h" + +#include "i915_gem_clflush.h" +#include "intel_drv.h" + +int intel_emit_vma_fill_blt(struct i915_request *rq, + struct i915_vma *vma, + u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + *cs++ = value; + *cs++ = MI_NOOP; + } else { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = vma->node.start; + *cs++ = value; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, + struct intel_context *ce, + u32 value) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_context *ctx = ce->gem_context; + struct i915_address_space *vm; + struct i915_request *rq; + struct i915_vma *vma; + int err; + + /* XXX: ce->vm please */ + vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (unlikely(err)) + return err; + + if (obj->cache_dirty & ~obj->cache_coherent) { + i915_gem_object_lock(obj); + i915_gem_clflush_object(obj, 0); + i915_gem_object_unlock(obj); + } + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + err = i915_request_await_object(rq, obj, true); + if (unlikely(err)) + goto out_request; + + if (ce->engine->emit_init_breadcrumb) { + err = ce->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_request; + } + + i915_vma_lock(vma); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (unlikely(err)) + goto out_request; + + err = intel_emit_vma_fill_blt(rq, vma, value); +out_request: + if (unlikely(err)) + i915_request_skip(rq, err); + + i915_request_add(rq); +out_unpin: + i915_vma_unpin(vma); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_object_blt.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h new file mode 100644 index 000000000000..7ec7de6ac0c0 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_OBJECT_BLT_H__ +#define __I915_GEM_OBJECT_BLT_H__ + +#include + +struct drm_i915_gem_object; +struct intel_context; +struct i915_request; +struct i915_vma; + +int intel_emit_vma_fill_blt(struct i915_request *rq, + struct i915_vma *vma, + u32 value); + +int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, + struct intel_context *ce, + u32 value); + +#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c new file mode 100644 index 000000000000..f3a5eb807c1c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "selftests/igt_flush_test.h" +#include "selftests/mock_drm.h" +#include "mock_context.h" + +static int igt_client_fill(void *arg) +{ + struct intel_context *ce = arg; + struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_gem_object *obj; + struct rnd_state prng; + IGT_TIMEOUT(end); + u32 *vaddr; + int err = 0; + + prandom_seed_state(&prng, i915_selftest.random_seed); + + do { + u32 sz = prandom_u32_state(&prng) % SZ_32M; + u32 val = prandom_u32_state(&prng); + u32 i; + + sz = round_up(sz, PAGE_SIZE); + + pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + + obj = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_flush; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put; + } + + /* + * XXX: The goal is move this to get_pages, so try to dirty the + * CPU cache first to check that we do the required clflush + * before scheduling the blt for !llc platforms. This matches + * some version of reality where at get_pages the pages + * themselves may not yet be coherent with the GPU(swap-in). If + * we are missing the flush then we should see the stale cache + * values after we do the set_to_cpu_domain and pick it up as a + * test failure. + */ + memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + obj->cache_dirty = true; + + err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, + &obj->mm.page_sizes, + val); + if (err) + goto err_unpin; + + /* + * XXX: For now do the wait without the object resv lock to + * ensure we don't deadlock. + */ + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + goto err_unpin; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_cpu_domain(obj, false); + i915_gem_object_unlock(obj); + if (err) + goto err_unpin; + + for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%u]=%x, expected=%x\n", i, + vaddr[i], val); + err = -EINVAL; + goto err_unpin; + } + } + + i915_gem_object_unpin_map(obj); + i915_gem_object_put(obj); + } while (!time_after(jiffies, end)); + + goto err_flush; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); +err_flush: + mutex_lock(&i915->drm.struct_mutex); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + if (err == -ENOMEM) + err = 0; + + return err; +} + +int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_client_fill), + }; + + if (i915_terminally_wedged(i915)) + return 0; + + if (!HAS_ENGINE(i915, BCS0)) + return 0; + + return i915_subtests(tests, i915->engine[BCS0]->kernel_context); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c new file mode 100644 index 000000000000..8de568d2c792 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "selftests/igt_flush_test.h" +#include "selftests/mock_drm.h" +#include "mock_context.h" + +static int igt_fill_blt(void *arg) +{ + struct intel_context *ce = arg; + struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_gem_object *obj; + struct rnd_state prng; + IGT_TIMEOUT(end); + u32 *vaddr; + int err = 0; + + prandom_seed_state(&prng, i915_selftest.random_seed); + + do { + u32 sz = prandom_u32_state(&prng) % SZ_32M; + u32 val = prandom_u32_state(&prng); + u32 i; + + sz = round_up(sz, PAGE_SIZE); + + pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + + obj = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(obj)) { + err = PTR_ERR(vaddr); + goto err_flush; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put; + } + + /* + * Make sure the potentially async clflush does its job, if + * required. + */ + memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + obj->cache_dirty = true; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_fill_blt(obj, ce, val); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + goto err_unpin; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_cpu_domain(obj, false); + i915_gem_object_unlock(obj); + if (err) + goto err_unpin; + + for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%u]=%x, expected=%x\n", i, + vaddr[i], val); + err = -EINVAL; + goto err_unpin; + } + } + + i915_gem_object_unpin_map(obj); + i915_gem_object_put(obj); + } while (!time_after(jiffies, end)); + + goto err_flush; + +err_unpin: + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); +err_flush: + mutex_lock(&i915->drm.struct_mutex); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + if (err == -ENOMEM) + err = 0; + + return err; +} + +int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_fill_blt), + }; + + if (i915_terminally_wedged(i915)) + return 0; + + if (!HAS_ENGINE(i915, BCS0)) + return 0; + + return i915_subtests(tests, i915->engine[BCS0]->kernel_context); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index a34ece53a771..eec31e36aca7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -180,6 +180,7 @@ #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) #define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) #define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) #define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) #define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 9bda36a598b3..d5dc4427d664 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -25,6 +25,8 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) +selftest(blt, i915_gem_object_blt_live_selftests) +selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests)