linux_dsm_epyc7002/drivers/gpu/drm/i915/gt/selftest_reset.c
Chris Wilson 4b9bb9728c drm/i915: Only recover active engines
If we issue a reset to a currently idle engine, leave it idle
afterwards. This is useful to excise a linkage between reset and the
shrinker. When waking the engine, we need to pin the default context
image which we use for overwriting a guilty context -- if the engine is
idle we do not need this pinned image! However, this pinning means that
waking the engine acquires the FS_RECLAIM, and so may trigger the
shrinker. The shrinker itself may need to wait upon the GPU to unbind
and object and so may require services of reset; ergo we should avoid
the engine wake up path.

The danger in skipping the recovery for idle engines is that we leave the
engine with no context defined, which may interfere with the operation of
the power context on some older platforms. In practice, we should only
be resetting an active GPU but it something to look out for on Ironlake
(if memory serves).

Fixes: 79ffac8599 ("drm/i915: Invert the GEM wakeref hierarchy")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-2-chris@chris-wilson.co.uk
(cherry picked from commit 18398904ca)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2019-08-01 13:22:00 +03:00

181 lines
3.9 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2018 Intel Corporation
*/
#include "i915_selftest.h"
#include "selftests/igt_reset.h"
#include "selftests/igt_atomic.h"
static int igt_global_reset(void *arg)
{
struct intel_gt *gt = arg;
unsigned int reset_count;
intel_wakeref_t wakeref;
int err = 0;
/* Check that we can issue a global GPU reset */
igt_global_reset_lock(gt);
wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
reset_count = i915_reset_count(&gt->i915->gpu_error);
intel_gt_reset(gt, ALL_ENGINES, NULL);
if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
}
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
err = -EIO;
return err;
}
static int igt_wedged_reset(void *arg)
{
struct intel_gt *gt = arg;
intel_wakeref_t wakeref;
/* Check that we can recover a wedged device with a GPU reset */
igt_global_reset_lock(gt);
wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
intel_gt_set_wedged(gt);
GEM_BUG_ON(!intel_gt_is_wedged(gt));
intel_gt_reset(gt, ALL_ENGINES, NULL);
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
igt_global_reset_unlock(gt);
return intel_gt_is_wedged(gt) ? -EIO : 0;
}
static int igt_atomic_reset(void *arg)
{
struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
int err = 0;
/* Check that the resets are usable from atomic context */
intel_gt_pm_get(gt);
igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
if (!igt_force_reset(gt))
goto unlock;
for (p = igt_atomic_phases; p->name; p++) {
intel_engine_mask_t awake;
GEM_TRACE("__intel_gt_reset under %s\n", p->name);
awake = reset_prepare(gt);
p->critical_section_begin();
err = __intel_gt_reset(gt, ALL_ENGINES);
p->critical_section_end();
reset_finish(gt, awake);
if (err) {
pr_err("__intel_gt_reset failed under %s\n", p->name);
break;
}
}
/* As we poke around the guts, do a full reset before continuing. */
igt_force_reset(gt);
unlock:
igt_global_reset_unlock(gt);
intel_gt_pm_put(gt);
return err;
}
static int igt_atomic_engine_reset(void *arg)
{
struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = 0;
/* Check that the resets are usable from atomic context */
if (!intel_has_reset_engine(gt->i915))
return 0;
if (USES_GUC_SUBMISSION(gt->i915))
return 0;
intel_gt_pm_get(gt);
igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
if (!igt_force_reset(gt))
goto out_unlock;
for_each_engine(engine, gt->i915, id) {
tasklet_disable_nosync(&engine->execlists.tasklet);
intel_engine_pm_get(engine);
for (p = igt_atomic_phases; p->name; p++) {
GEM_TRACE("intel_engine_reset(%s) under %s\n",
engine->name, p->name);
p->critical_section_begin();
err = intel_engine_reset(engine, NULL);
p->critical_section_end();
if (err) {
pr_err("intel_engine_reset(%s) failed under %s\n",
engine->name, p->name);
break;
}
}
intel_engine_pm_put(engine);
tasklet_enable(&engine->execlists.tasklet);
if (err)
break;
}
/* As we poke around the guts, do a full reset before continuing. */
igt_force_reset(gt);
out_unlock:
igt_global_reset_unlock(gt);
intel_gt_pm_put(gt);
return err;
}
int intel_reset_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_global_reset), /* attempt to recover GPU first */
SUBTEST(igt_wedged_reset),
SUBTEST(igt_atomic_reset),
SUBTEST(igt_atomic_engine_reset),
};
struct intel_gt *gt = &i915->gt;
if (!intel_has_gpu_reset(gt->i915))
return 0;
if (intel_gt_is_wedged(gt))
return -EIO; /* we're long past hope of a successful reset */
return intel_gt_live_subtests(tests, gt);
}