2019-05-23 02:31:55 +07:00
|
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
/*
|
|
|
|
* Copyright © 2018 Intel Corporation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "i915_selftest.h"
|
|
|
|
#include "selftests/igt_reset.h"
|
2019-05-23 02:31:56 +07:00
|
|
|
#include "selftests/igt_atomic.h"
|
2019-05-23 02:31:55 +07:00
|
|
|
|
|
|
|
static int igt_global_reset(void *arg)
|
|
|
|
{
|
2019-07-13 02:29:53 +07:00
|
|
|
struct intel_gt *gt = arg;
|
2019-05-23 02:31:55 +07:00
|
|
|
unsigned int reset_count;
|
2019-07-13 02:29:53 +07:00
|
|
|
intel_wakeref_t wakeref;
|
2019-05-23 02:31:55 +07:00
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* Check that we can issue a global GPU reset */
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_global_reset_lock(gt);
|
2019-10-07 22:45:31 +07:00
|
|
|
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
reset_count = i915_reset_count(>->i915->gpu_error);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
intel_gt_reset(gt, ALL_ENGINES, NULL);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
if (i915_reset_count(>->i915->gpu_error) == reset_count) {
|
2019-05-23 02:31:55 +07:00
|
|
|
pr_err("No GPU reset recorded!\n");
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-10-07 22:45:31 +07:00
|
|
|
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_global_reset_unlock(gt);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
if (intel_gt_is_wedged(gt))
|
2019-05-23 02:31:55 +07:00
|
|
|
err = -EIO;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_wedged_reset(void *arg)
|
|
|
|
{
|
2019-07-13 02:29:53 +07:00
|
|
|
struct intel_gt *gt = arg;
|
2019-05-23 02:31:55 +07:00
|
|
|
intel_wakeref_t wakeref;
|
|
|
|
|
|
|
|
/* Check that we can recover a wedged device with a GPU reset */
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_global_reset_lock(gt);
|
2019-10-07 22:45:31 +07:00
|
|
|
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
intel_gt_set_wedged(gt);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
GEM_BUG_ON(!intel_gt_is_wedged(gt));
|
|
|
|
intel_gt_reset(gt, ALL_ENGINES, NULL);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-10-07 22:45:31 +07:00
|
|
|
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_global_reset_unlock(gt);
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
return intel_gt_is_wedged(gt) ? -EIO : 0;
|
2019-05-23 02:31:55 +07:00
|
|
|
}
|
|
|
|
|
2019-05-23 02:31:56 +07:00
|
|
|
static int igt_atomic_reset(void *arg)
|
|
|
|
{
|
2019-07-13 02:29:53 +07:00
|
|
|
struct intel_gt *gt = arg;
|
2019-05-23 02:31:56 +07:00
|
|
|
const typeof(*igt_atomic_phases) *p;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* Check that the resets are usable from atomic context */
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
intel_gt_pm_get(gt);
|
|
|
|
igt_global_reset_lock(gt);
|
2019-05-23 02:31:56 +07:00
|
|
|
|
|
|
|
/* Flush any requests before we get started and check basics */
|
2019-07-13 02:29:53 +07:00
|
|
|
if (!igt_force_reset(gt))
|
2019-05-23 02:31:56 +07:00
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
for (p = igt_atomic_phases; p->name; p++) {
|
2019-06-26 22:45:48 +07:00
|
|
|
intel_engine_mask_t awake;
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
GEM_TRACE("__intel_gt_reset under %s\n", p->name);
|
2019-05-23 02:31:56 +07:00
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
awake = reset_prepare(gt);
|
2019-06-26 20:44:33 +07:00
|
|
|
p->critical_section_begin();
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
err = __intel_gt_reset(gt, ALL_ENGINES);
|
2019-06-26 20:44:33 +07:00
|
|
|
|
2019-05-23 02:31:56 +07:00
|
|
|
p->critical_section_end();
|
2019-07-13 02:29:53 +07:00
|
|
|
reset_finish(gt, awake);
|
2019-05-23 02:31:56 +07:00
|
|
|
|
|
|
|
if (err) {
|
2019-07-13 02:29:53 +07:00
|
|
|
pr_err("__intel_gt_reset failed under %s\n", p->name);
|
2019-05-23 02:31:56 +07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* As we poke around the guts, do a full reset before continuing. */
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_force_reset(gt);
|
2019-05-23 02:31:56 +07:00
|
|
|
|
|
|
|
unlock:
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_global_reset_unlock(gt);
|
|
|
|
intel_gt_pm_put(gt);
|
2019-05-23 02:31:56 +07:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-06-26 20:44:33 +07:00
|
|
|
static int igt_atomic_engine_reset(void *arg)
|
|
|
|
{
|
2019-07-13 02:29:53 +07:00
|
|
|
struct intel_gt *gt = arg;
|
2019-06-26 20:44:33 +07:00
|
|
|
const typeof(*igt_atomic_phases) *p;
|
|
|
|
struct intel_engine_cs *engine;
|
|
|
|
enum intel_engine_id id;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* Check that the resets are usable from atomic context */
|
|
|
|
|
2019-09-28 04:17:47 +07:00
|
|
|
if (!intel_has_reset_engine(gt))
|
2019-06-26 20:44:33 +07:00
|
|
|
return 0;
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
if (USES_GUC_SUBMISSION(gt->i915))
|
2019-06-26 20:44:33 +07:00
|
|
|
return 0;
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
intel_gt_pm_get(gt);
|
|
|
|
igt_global_reset_lock(gt);
|
2019-06-26 20:44:33 +07:00
|
|
|
|
|
|
|
/* Flush any requests before we get started and check basics */
|
2019-07-13 02:29:53 +07:00
|
|
|
if (!igt_force_reset(gt))
|
2019-06-26 20:44:33 +07:00
|
|
|
goto out_unlock;
|
|
|
|
|
2019-10-17 16:45:00 +07:00
|
|
|
for_each_engine(engine, gt, id) {
|
drm/i915/selftests: Flush interrupts before disabling tasklets
When setting up the system to perform the atomic reset, we need to
serialise with any ongoing interrupt tasklet or else:
<0> [472.951428] i915_sel-4442 0d..1 466527056us : __i915_request_submit: rcs0 fence 11659:2, current 0
<0> [472.951554] i915_sel-4442 0d..1 466527059us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes
<0> [472.951681] i915_sel-4442 0d..1 466527061us : trace_ports: rcs0: submit { 11659:2, 0:0 }
<0> [472.951805] i915_sel-4442 0.... 466527114us : __igt_atomic_reset_engine: i915_reset_engine(rcs0:active) under hardirq
<0> [472.951932] i915_sel-4442 0d... 466527115us : intel_engine_reset: rcs0 flags=11d
<0> [472.952056] i915_sel-4442 0d... 466527117us : execlists_reset_prepare: rcs0: depth<-1
<0> [472.952179] i915_sel-4442 0d... 466527119us : intel_engine_stop_cs: rcs0
<0> [472.952305] <idle>-0 1..s1 466527119us : process_csb: rcs0 cs-irq head=3, tail=4
<0> [472.952431] i915_sel-4442 0d... 466527122us : __intel_gt_reset: engine_mask=1
<0> [472.952557] <idle>-0 1..s1 466527124us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000
<0> [472.952683] <idle>-0 1..s1 466527130us : trace_ports: rcs0: promote { 11659:2*, 0:0 }
<0> [472.952808] i915_sel-4442 0d... 466527131us : execlists_reset: rcs0
<0> [472.952933] i915_sel-4442 0d..1 466527133us : process_csb: rcs0 cs-irq head=3, tail=4
<0> [472.953059] i915_sel-4442 0d..1 466527134us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000
<0> [472.953185] i915_sel-4442 0d..1 466527136us : trace_ports: rcs0: preempted { 11659:2*, 0:0 }
<0> [472.953310] i915_sel-4442 0d..1 466527150us : assert_pending_valid: Nothing pending for promotion!
<0> [472.953436] i915_sel-4442 0d..1 466527158us : process_csb: process_csb:1930 GEM_BUG_ON(!assert_pending_valid(execlists, "promote"))
We have the same CSB events being seen by process_csb() on two different
processors. One being issued by the reset in the test, the other by the
interrupt; this scenario is supposed to be prevented by flushing the
interrupt tasklet with tasklet_disable() before we enter the atomic
reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112069
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191023232443.17450-1-chris@chris-wilson.co.uk
2019-10-24 06:24:43 +07:00
|
|
|
tasklet_disable(&engine->execlists.tasklet);
|
2019-06-26 20:44:33 +07:00
|
|
|
intel_engine_pm_get(engine);
|
|
|
|
|
|
|
|
for (p = igt_atomic_phases; p->name; p++) {
|
2019-07-13 02:29:53 +07:00
|
|
|
GEM_TRACE("intel_engine_reset(%s) under %s\n",
|
2019-06-26 20:44:33 +07:00
|
|
|
engine->name, p->name);
|
|
|
|
|
|
|
|
p->critical_section_begin();
|
2019-07-13 02:29:53 +07:00
|
|
|
err = intel_engine_reset(engine, NULL);
|
2019-06-26 20:44:33 +07:00
|
|
|
p->critical_section_end();
|
|
|
|
|
|
|
|
if (err) {
|
2019-07-13 02:29:53 +07:00
|
|
|
pr_err("intel_engine_reset(%s) failed under %s\n",
|
2019-06-26 20:44:33 +07:00
|
|
|
engine->name, p->name);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
intel_engine_pm_put(engine);
|
|
|
|
tasklet_enable(&engine->execlists.tasklet);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* As we poke around the guts, do a full reset before continuing. */
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_force_reset(gt);
|
2019-06-26 20:44:33 +07:00
|
|
|
|
|
|
|
out_unlock:
|
2019-07-13 02:29:53 +07:00
|
|
|
igt_global_reset_unlock(gt);
|
|
|
|
intel_gt_pm_put(gt);
|
2019-06-26 20:44:33 +07:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-05-23 02:31:55 +07:00
|
|
|
int intel_reset_live_selftests(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
|
|
|
SUBTEST(igt_global_reset), /* attempt to recover GPU first */
|
|
|
|
SUBTEST(igt_wedged_reset),
|
2019-05-23 02:31:56 +07:00
|
|
|
SUBTEST(igt_atomic_reset),
|
2019-06-26 20:44:33 +07:00
|
|
|
SUBTEST(igt_atomic_engine_reset),
|
2019-05-23 02:31:55 +07:00
|
|
|
};
|
2019-07-13 02:29:53 +07:00
|
|
|
struct intel_gt *gt = &i915->gt;
|
2019-05-23 02:31:55 +07:00
|
|
|
|
2019-09-28 04:17:47 +07:00
|
|
|
if (!intel_has_gpu_reset(gt))
|
2019-05-23 02:31:55 +07:00
|
|
|
return 0;
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
if (intel_gt_is_wedged(gt))
|
2019-05-23 02:31:55 +07:00
|
|
|
return -EIO; /* we're long past hope of a successful reset */
|
|
|
|
|
2019-07-13 02:29:53 +07:00
|
|
|
return intel_gt_live_subtests(tests, gt);
|
2019-05-23 02:31:55 +07:00
|
|
|
}
|