mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-26 00:39:18 +07:00
93100fdeb4
When setting up the system to perform the atomic reset, we need to serialise with any ongoing interrupt tasklet or else: <0> [472.951428] i915_sel-4442 0d..1 466527056us : __i915_request_submit: rcs0 fence 11659:2, current 0 <0> [472.951554] i915_sel-4442 0d..1 466527059us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes <0> [472.951681] i915_sel-4442 0d..1 466527061us : trace_ports: rcs0: submit { 11659:2, 0:0 } <0> [472.951805] i915_sel-4442 0.... 466527114us : __igt_atomic_reset_engine: i915_reset_engine(rcs0:active) under hardirq <0> [472.951932] i915_sel-4442 0d... 466527115us : intel_engine_reset: rcs0 flags=11d <0> [472.952056] i915_sel-4442 0d... 466527117us : execlists_reset_prepare: rcs0: depth<-1 <0> [472.952179] i915_sel-4442 0d... 466527119us : intel_engine_stop_cs: rcs0 <0> [472.952305] <idle>-0 1..s1 466527119us : process_csb: rcs0 cs-irq head=3, tail=4 <0> [472.952431] i915_sel-4442 0d... 466527122us : __intel_gt_reset: engine_mask=1 <0> [472.952557] <idle>-0 1..s1 466527124us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000 <0> [472.952683] <idle>-0 1..s1 466527130us : trace_ports: rcs0: promote { 11659:2*, 0:0 } <0> [472.952808] i915_sel-4442 0d... 466527131us : execlists_reset: rcs0 <0> [472.952933] i915_sel-4442 0d..1 466527133us : process_csb: rcs0 cs-irq head=3, tail=4 <0> [472.953059] i915_sel-4442 0d..1 466527134us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000 <0> [472.953185] i915_sel-4442 0d..1 466527136us : trace_ports: rcs0: preempted { 11659:2*, 0:0 } <0> [472.953310] i915_sel-4442 0d..1 466527150us : assert_pending_valid: Nothing pending for promotion! <0> [472.953436] i915_sel-4442 0d..1 466527158us : process_csb: process_csb:1930 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")) We have the same CSB events being seen by process_csb() on two different processors. One being issued by the reset in the test, the other by the interrupt; this scenario is supposed to be prevented by flushing the interrupt tasklet with tasklet_disable() before we enter the atomic reset. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112069 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191023232443.17450-1-chris@chris-wilson.co.uk
181 lines
3.8 KiB
C
181 lines
3.8 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2018 Intel Corporation
|
|
*/
|
|
|
|
#include "i915_selftest.h"
|
|
#include "selftests/igt_reset.h"
|
|
#include "selftests/igt_atomic.h"
|
|
|
|
static int igt_global_reset(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
unsigned int reset_count;
|
|
intel_wakeref_t wakeref;
|
|
int err = 0;
|
|
|
|
/* Check that we can issue a global GPU reset */
|
|
|
|
igt_global_reset_lock(gt);
|
|
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
|
|
|
|
reset_count = i915_reset_count(>->i915->gpu_error);
|
|
|
|
intel_gt_reset(gt, ALL_ENGINES, NULL);
|
|
|
|
if (i915_reset_count(>->i915->gpu_error) == reset_count) {
|
|
pr_err("No GPU reset recorded!\n");
|
|
err = -EINVAL;
|
|
}
|
|
|
|
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
|
|
igt_global_reset_unlock(gt);
|
|
|
|
if (intel_gt_is_wedged(gt))
|
|
err = -EIO;
|
|
|
|
return err;
|
|
}
|
|
|
|
static int igt_wedged_reset(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
intel_wakeref_t wakeref;
|
|
|
|
/* Check that we can recover a wedged device with a GPU reset */
|
|
|
|
igt_global_reset_lock(gt);
|
|
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
|
|
|
|
intel_gt_set_wedged(gt);
|
|
|
|
GEM_BUG_ON(!intel_gt_is_wedged(gt));
|
|
intel_gt_reset(gt, ALL_ENGINES, NULL);
|
|
|
|
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
|
|
igt_global_reset_unlock(gt);
|
|
|
|
return intel_gt_is_wedged(gt) ? -EIO : 0;
|
|
}
|
|
|
|
static int igt_atomic_reset(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
const typeof(*igt_atomic_phases) *p;
|
|
int err = 0;
|
|
|
|
/* Check that the resets are usable from atomic context */
|
|
|
|
intel_gt_pm_get(gt);
|
|
igt_global_reset_lock(gt);
|
|
|
|
/* Flush any requests before we get started and check basics */
|
|
if (!igt_force_reset(gt))
|
|
goto unlock;
|
|
|
|
for (p = igt_atomic_phases; p->name; p++) {
|
|
intel_engine_mask_t awake;
|
|
|
|
GEM_TRACE("__intel_gt_reset under %s\n", p->name);
|
|
|
|
awake = reset_prepare(gt);
|
|
p->critical_section_begin();
|
|
|
|
err = __intel_gt_reset(gt, ALL_ENGINES);
|
|
|
|
p->critical_section_end();
|
|
reset_finish(gt, awake);
|
|
|
|
if (err) {
|
|
pr_err("__intel_gt_reset failed under %s\n", p->name);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* As we poke around the guts, do a full reset before continuing. */
|
|
igt_force_reset(gt);
|
|
|
|
unlock:
|
|
igt_global_reset_unlock(gt);
|
|
intel_gt_pm_put(gt);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int igt_atomic_engine_reset(void *arg)
|
|
{
|
|
struct intel_gt *gt = arg;
|
|
const typeof(*igt_atomic_phases) *p;
|
|
struct intel_engine_cs *engine;
|
|
enum intel_engine_id id;
|
|
int err = 0;
|
|
|
|
/* Check that the resets are usable from atomic context */
|
|
|
|
if (!intel_has_reset_engine(gt))
|
|
return 0;
|
|
|
|
if (USES_GUC_SUBMISSION(gt->i915))
|
|
return 0;
|
|
|
|
intel_gt_pm_get(gt);
|
|
igt_global_reset_lock(gt);
|
|
|
|
/* Flush any requests before we get started and check basics */
|
|
if (!igt_force_reset(gt))
|
|
goto out_unlock;
|
|
|
|
for_each_engine(engine, gt, id) {
|
|
tasklet_disable(&engine->execlists.tasklet);
|
|
intel_engine_pm_get(engine);
|
|
|
|
for (p = igt_atomic_phases; p->name; p++) {
|
|
GEM_TRACE("intel_engine_reset(%s) under %s\n",
|
|
engine->name, p->name);
|
|
|
|
p->critical_section_begin();
|
|
err = intel_engine_reset(engine, NULL);
|
|
p->critical_section_end();
|
|
|
|
if (err) {
|
|
pr_err("intel_engine_reset(%s) failed under %s\n",
|
|
engine->name, p->name);
|
|
break;
|
|
}
|
|
}
|
|
|
|
intel_engine_pm_put(engine);
|
|
tasklet_enable(&engine->execlists.tasklet);
|
|
if (err)
|
|
break;
|
|
}
|
|
|
|
/* As we poke around the guts, do a full reset before continuing. */
|
|
igt_force_reset(gt);
|
|
|
|
out_unlock:
|
|
igt_global_reset_unlock(gt);
|
|
intel_gt_pm_put(gt);
|
|
|
|
return err;
|
|
}
|
|
|
|
int intel_reset_live_selftests(struct drm_i915_private *i915)
|
|
{
|
|
static const struct i915_subtest tests[] = {
|
|
SUBTEST(igt_global_reset), /* attempt to recover GPU first */
|
|
SUBTEST(igt_wedged_reset),
|
|
SUBTEST(igt_atomic_reset),
|
|
SUBTEST(igt_atomic_engine_reset),
|
|
};
|
|
struct intel_gt *gt = &i915->gt;
|
|
|
|
if (!intel_has_gpu_reset(gt))
|
|
return 0;
|
|
|
|
if (intel_gt_is_wedged(gt))
|
|
return -EIO; /* we're long past hope of a successful reset */
|
|
|
|
return intel_gt_live_subtests(tests, gt);
|
|
}
|