mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
drm/i915: use a separate context for gpu relocs
The CS pre-parser can pre-fetch commands across memory sync points and starting from gen12 it is able to pre-fetch across BB_START and BB_END boundaries as well, so when we emit gpu relocs the pre-parser might fetch the target location of the reloc before the memory write lands. The parser can't pre-fetch across the ctx switch, so we use a separate context to guarantee that the memory is synchronized before the parser can get to it. Note that there is no risk of the CS doing a lite restore from the reloc context to the user context, even if the two have the same hw_id, because since gen11 the CS also checks the LRCA when deciding if it can lite-restore. v2: limit new context to gen12+, release in eb_destroy, add a comment in emit_fini_breadcrumb (Chris). Suggested-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190827185805.21799-1-daniele.ceraolospurio@intel.com
This commit is contained in:
parent
45e9c829eb
commit
8a9a982767
@ -252,6 +252,7 @@ struct i915_execbuffer {
|
|||||||
bool has_fence : 1;
|
bool has_fence : 1;
|
||||||
bool needs_unfenced : 1;
|
bool needs_unfenced : 1;
|
||||||
|
|
||||||
|
struct intel_context *ce;
|
||||||
struct i915_request *rq;
|
struct i915_request *rq;
|
||||||
u32 *rq_cmd;
|
u32 *rq_cmd;
|
||||||
unsigned int rq_size;
|
unsigned int rq_size;
|
||||||
@ -880,6 +881,9 @@ static void eb_destroy(const struct i915_execbuffer *eb)
|
|||||||
{
|
{
|
||||||
GEM_BUG_ON(eb->reloc_cache.rq);
|
GEM_BUG_ON(eb->reloc_cache.rq);
|
||||||
|
|
||||||
|
if (eb->reloc_cache.ce)
|
||||||
|
intel_context_put(eb->reloc_cache.ce);
|
||||||
|
|
||||||
if (eb->lut_size > 0)
|
if (eb->lut_size > 0)
|
||||||
kfree(eb->buckets);
|
kfree(eb->buckets);
|
||||||
}
|
}
|
||||||
@ -903,6 +907,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
|
|||||||
cache->has_fence = cache->gen < 4;
|
cache->has_fence = cache->gen < 4;
|
||||||
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
|
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
|
||||||
cache->node.allocated = false;
|
cache->node.allocated = false;
|
||||||
|
cache->ce = NULL;
|
||||||
cache->rq = NULL;
|
cache->rq = NULL;
|
||||||
cache->rq_size = 0;
|
cache->rq_size = 0;
|
||||||
}
|
}
|
||||||
@ -1168,7 +1173,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
|
|||||||
if (err)
|
if (err)
|
||||||
goto err_unmap;
|
goto err_unmap;
|
||||||
|
|
||||||
rq = i915_request_create(eb->context);
|
rq = intel_context_create_request(cache->ce);
|
||||||
if (IS_ERR(rq)) {
|
if (IS_ERR(rq)) {
|
||||||
err = PTR_ERR(rq);
|
err = PTR_ERR(rq);
|
||||||
goto err_unpin;
|
goto err_unpin;
|
||||||
@ -1239,6 +1244,29 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
|
|||||||
if (!intel_engine_can_store_dword(eb->engine))
|
if (!intel_engine_can_store_dword(eb->engine))
|
||||||
return ERR_PTR(-ENODEV);
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
if (!cache->ce) {
|
||||||
|
struct intel_context *ce;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The CS pre-parser can pre-fetch commands across
|
||||||
|
* memory sync points and starting gen12 it is able to
|
||||||
|
* pre-fetch across BB_START and BB_END boundaries
|
||||||
|
* (within the same context). We therefore use a
|
||||||
|
* separate context gen12+ to guarantee that the reloc
|
||||||
|
* writes land before the parser gets to the target
|
||||||
|
* memory location.
|
||||||
|
*/
|
||||||
|
if (cache->gen >= 12)
|
||||||
|
ce = intel_context_create(eb->context->gem_context,
|
||||||
|
eb->engine);
|
||||||
|
else
|
||||||
|
ce = intel_context_get(eb->context);
|
||||||
|
if (IS_ERR(ce))
|
||||||
|
return ERR_CAST(ce);
|
||||||
|
|
||||||
|
cache->ce = ce;
|
||||||
|
}
|
||||||
|
|
||||||
err = __reloc_gpu_alloc(eb, vma, len);
|
err = __reloc_gpu_alloc(eb, vma, len);
|
||||||
if (unlikely(err))
|
if (unlikely(err))
|
||||||
return ERR_PTR(err);
|
return ERR_PTR(err);
|
||||||
|
@ -2931,6 +2931,24 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
|
|||||||
return gen8_emit_fini_breadcrumb_footer(request, cs);
|
return gen8_emit_fini_breadcrumb_footer(request, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that the CS instruction pre-parser will not stall on the breadcrumb
|
||||||
|
* flush and will continue pre-fetching the instructions after it before the
|
||||||
|
* memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
|
||||||
|
* BB_START/END instructions, so, even though we might pre-fetch the pre-amble
|
||||||
|
* of the next request before the memory has been flushed, we're guaranteed that
|
||||||
|
* we won't access the batch itself too early.
|
||||||
|
* However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
|
||||||
|
* so, if the current request is modifying an instruction in the next request on
|
||||||
|
* the same intel_context, we might pre-fetch and then execute the pre-update
|
||||||
|
* instruction. To avoid this, the users of self-modifying code should either
|
||||||
|
* disable the parser around the code emitting the memory writes, via a new flag
|
||||||
|
* added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
|
||||||
|
* the in-kernel use-cases we've opted to use a separate context, see
|
||||||
|
* reloc_gpu() as an example.
|
||||||
|
* All the above applies only to the instructions themselves. Non-inline data
|
||||||
|
* used by the instructions is not pre-fetched.
|
||||||
|
*/
|
||||||
static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
|
static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
|
||||||
u32 *cs)
|
u32 *cs)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user