drm/i915: Track the context's seqno in its own timeline HWSP

Now that we have allocated ourselves a cacheline to store a breadcrumb,
we can emit a write from the GPU into the timeline's HWSP of the
per-context seqno as we complete each request. This drops the mirroring
of the per-engine HWSP and allows each context to operate independently.
We do not need to unwind the per-context timeline, and so requests are
always consistent with the timeline breadcrumb, greatly simplifying the
completion checks as we no longer need to be concerned about the
global_seqno changing mid check.

One complication though is that we have to be wary that the request may
outlive the HWSP and so avoid touching the potentially danging pointer
after we have retired the fence. We also have to guard our access of the
HWSP with RCU, the release of the obj->mm.pages should already be RCU-safe.

At this point, we are emitting both per-context and global seqno and
still using the single per-engine execution timeline for resolving
interrupts.

v2: s/fake_complete/mark_complete/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-5-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2019-01-28 18:18:11 +00:00
parent 8ba306a6a3
commit 5013eb8cd6
10 changed files with 139 additions and 60 deletions

View File

@ -2890,7 +2890,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
*/
spin_lock_irqsave(&engine->timeline.lock, flags);
list_for_each_entry(request, &engine->timeline.requests, link) {
if (__i915_request_completed(request, request->global_seqno))
if (i915_request_completed(request))
continue;
active = request;

View File

@ -199,6 +199,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
spin_unlock(&engine->timeline.lock);
spin_lock(&rq->lock);
i915_request_mark_complete(rq);
if (!i915_request_signaled(rq))
dma_fence_signal_locked(&rq->fence);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
@ -621,7 +622,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
rq->ring = ce->ring;
rq->timeline = ce->ring->timeline;
GEM_BUG_ON(rq->timeline == &engine->timeline);
rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
rq->hwsp_seqno = rq->timeline->hwsp_seqno;
spin_lock_init(&rq->lock);
dma_fence_init(&rq->fence,

View File

@ -289,6 +289,7 @@ long i915_request_wait(struct i915_request *rq,
static inline bool i915_request_signaled(const struct i915_request *rq)
{
/* The request may live longer than its HWSP, so check flags first! */
return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
}
@ -340,32 +341,23 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
*/
static inline bool i915_request_started(const struct i915_request *rq)
{
u32 seqno;
if (i915_request_signaled(rq))
return true;
seqno = i915_request_global_seqno(rq);
if (!seqno) /* not yet submitted to HW */
return false;
return i915_seqno_passed(hwsp_seqno(rq), seqno - 1);
}
static inline bool
__i915_request_completed(const struct i915_request *rq, u32 seqno)
{
GEM_BUG_ON(!seqno);
return i915_seqno_passed(hwsp_seqno(rq), seqno) &&
seqno == i915_request_global_seqno(rq);
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
}
static inline bool i915_request_completed(const struct i915_request *rq)
{
u32 seqno;
if (i915_request_signaled(rq))
return true;
seqno = i915_request_global_seqno(rq);
if (!seqno)
return false;
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
}
return __i915_request_completed(rq, seqno);
static inline void i915_request_mark_complete(struct i915_request *rq)
{
rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
}
void i915_retire_requests(struct drm_i915_private *i915);

View File

@ -760,6 +760,7 @@ static void nop_submit_request(struct i915_request *request)
spin_lock_irqsave(&request->engine->timeline.lock, flags);
__i915_request_submit(request);
i915_request_mark_complete(request);
intel_engine_write_global_seqno(request->engine, request->global_seqno);
spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
}

View File

@ -270,6 +270,10 @@ int i915_timeline_pin(struct i915_timeline *tl)
if (err)
goto unpin;
tl->hwsp_offset =
i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(tl->hwsp_offset);
return 0;
unpin:

View File

@ -660,10 +660,16 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
frame->rq.ring = &frame->ring;
frame->rq.timeline = &frame->timeline;
dw = i915_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;
dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
i915_timeline_fini(&frame->timeline);
i915_timeline_unpin(&frame->timeline);
out_timeline:
i915_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
return dw;
@ -1426,9 +1432,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
char hdr[80];
snprintf(hdr, sizeof(hdr),
"\t\tELSP[%d] count=%d, ring->start=%08x, rq: ",
"\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",
idx, count,
i915_ggtt_offset(rq->ring->vma));
i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset);
print_request(m, rq, hdr);
} else {
drm_printf(m, "\t\tELSP[%d] idle\n", idx);
@ -1538,6 +1545,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
rq->timeline->hwsp_offset);
print_request_ring(m, rq);
}

View File

@ -832,10 +832,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(rq, &engine->timeline.requests, link) {
GEM_BUG_ON(!rq->global_seqno);
if (i915_request_signaled(rq))
continue;
if (!i915_request_signaled(rq))
dma_fence_set_error(&rq->fence, -EIO);
dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
}
/* Flush the queued requests to the timeline list (for retiring). */
@ -845,9 +845,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
priolist_for_each_request_consume(rq, rn, p, i) {
list_del_init(&rq->sched.link);
dma_fence_set_error(&rq->fence, -EIO);
__i915_request_submit(rq);
dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@ -2044,10 +2044,17 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
cs = gen8_emit_ggtt_write(cs, request->global_seqno,
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
request->timeline->hwsp_offset);
cs = gen8_emit_ggtt_write(cs,
request->global_seqno,
intel_hws_seqno_address(request->engine));
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@ -2056,18 +2063,20 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
cs = gen8_emit_ggtt_write_rcs(cs,
request->global_seqno,
intel_hws_seqno_address(request->engine),
request->fence.seqno,
request->timeline->hwsp_offset,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
cs = gen8_emit_ggtt_write_rcs(cs,
request->global_seqno,
intel_hws_seqno_address(request->engine),
PIPE_CONTROL_CS_STALL);
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;

View File

@ -326,6 +326,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL);
*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->fence.seqno;
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->global_seqno;
@ -427,6 +432,13 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
*cs++ = rq->timeline->hwsp_offset;
*cs++ = rq->fence.seqno;
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = (PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
*cs++ = intel_hws_seqno_address(rq->engine);
*cs++ = rq->global_seqno;
@ -441,10 +453,19 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->global_seqno;
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@ -457,14 +478,21 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->global_seqno;
for (i = 0; i < GEN7_XCS_WA; i++) {
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR;
*cs++ = rq->global_seqno;
*cs++ = I915_GEM_HWS_SEQNO_ADDR;
*cs++ = rq->fence.seqno;
}
*cs++ = MI_FLUSH_DW;
@ -472,7 +500,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
*cs++ = 0;
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@ -738,7 +765,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
rq = NULL;
spin_lock_irqsave(&tl->lock, flags);
list_for_each_entry(pos, &tl->requests, link) {
if (!__i915_request_completed(pos, pos->global_seqno)) {
if (!i915_request_completed(pos)) {
rq = pos;
break;
}
@ -880,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(request, &engine->timeline.requests, link) {
GEM_BUG_ON(!request->global_seqno);
if (i915_request_signaled(request))
continue;
if (!i915_request_signaled(request))
dma_fence_set_error(&request->fence, -EIO);
dma_fence_set_error(&request->fence, -EIO);
i915_request_mark_complete(request);
}
intel_write_status_page(engine,
@ -907,14 +934,20 @@ static void i9xx_submit_request(struct i915_request *request)
static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR;
*cs++ = rq->fence.seqno;
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR;
*cs++ = rq->global_seqno;
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@ -927,8 +960,15 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR;
*cs++ = rq->fence.seqno;
BUILD_BUG_ON(GEN5_WA_STORES < 1);
for (i = 0; i < GEN5_WA_STORES; i++) {
*cs++ = MI_STORE_DWORD_INDEX;
@ -937,6 +977,7 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
}
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@ -1169,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring)
GEM_BUG_ON(ring->vaddr);
ret = i915_timeline_pin(ring->timeline);
if (ret)
return ret;
flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@ -1185,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring)
else
ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
if (unlikely(ret))
return ret;
goto unpin_timeline;
}
ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret))
return ret;
goto unpin_timeline;
if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma);
else
addr = i915_gem_object_pin_map(vma->obj, map);
if (IS_ERR(addr))
goto err;
if (IS_ERR(addr)) {
ret = PTR_ERR(addr);
goto unpin_ring;
}
vma->obj->pin_global++;
ring->vaddr = addr;
return 0;
err:
unpin_ring:
i915_vma_unpin(vma);
return PTR_ERR(addr);
unpin_timeline:
i915_timeline_unpin(ring->timeline);
return ret;
}
void intel_ring_reset(struct intel_ring *ring, u32 tail)
@ -1235,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring)
ring->vma->obj->pin_global--;
i915_vma_unpin(ring->vma);
i915_timeline_unpin(ring->timeline);
}
static struct i915_vma *

View File

@ -440,11 +440,6 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
return 0;
}
static u32 hwsp_address(const struct i915_timeline *tl)
{
return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
}
static struct i915_request *
tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
{
@ -463,7 +458,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
if (IS_ERR(rq))
goto out_unpin;
err = emit_ggtt_store_dw(rq, hwsp_address(tl), value);
err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
i915_request_add(rq);
if (err)
rq = ERR_PTR(err);

View File

@ -30,6 +30,17 @@ struct mock_ring {
struct i915_timeline timeline;
};
static void mock_timeline_pin(struct i915_timeline *tl)
{
tl->pin_count++;
}
static void mock_timeline_unpin(struct i915_timeline *tl)
{
GEM_BUG_ON(!tl->pin_count);
tl->pin_count--;
}
static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
{
const unsigned long sz = PAGE_SIZE / 2;
@ -76,6 +87,8 @@ static void advance(struct mock_request *request)
{
list_del_init(&request->link);
mock_seqno_advance(request->base.engine, request->base.global_seqno);
i915_request_mark_complete(&request->base);
GEM_BUG_ON(!i915_request_completed(&request->base));
}
static void hw_delay_complete(struct timer_list *t)
@ -108,6 +121,7 @@ static void hw_delay_complete(struct timer_list *t)
static void mock_context_unpin(struct intel_context *ce)
{
mock_timeline_unpin(ce->ring->timeline);
i915_gem_context_put(ce->gem_context);
}
@ -129,6 +143,7 @@ mock_context_pin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx)
{
struct intel_context *ce = to_intel_context(ctx, engine);
int err = -ENOMEM;
if (ce->pin_count++)
return ce;
@ -139,13 +154,15 @@ mock_context_pin(struct intel_engine_cs *engine,
goto err;
}
mock_timeline_pin(ce->ring->timeline);
ce->ops = &mock_context_ops;
i915_gem_context_get(ctx);
return ce;
err:
ce->pin_count = 0;
return ERR_PTR(-ENOMEM);
return ERR_PTR(err);
}
static int mock_request_alloc(struct i915_request *request)