linux_dsm_epyc7002/drivers/gpu/drm/i915/i915_reset.h
Chris Wilson 2caffbf117 drm/i915: Revoke mmaps and prevent access to fence registers across reset
Previously, we were able to rely on the recursive properties of
struct_mutex to allow us to serialise revoking mmaps and reacquiring the
FENCE registers with them being clobbered over a global device reset.
I then proceeded to throw out the baby with the bath water in order to
pursue a struct_mutex-less reset.

Perusing LWN for alternative strategies, the dilemma on how to serialise
access to a global resource on one side was answered by
https://lwn.net/Articles/202847/ -- Sleepable RCU:

    1  int readside(void) {
    2      int idx;
    3      rcu_read_lock();
    4	   if (nomoresrcu) {
    5          rcu_read_unlock();
    6	       return -EINVAL;
    7      }
    8	   idx = srcu_read_lock(&ss);
    9	   rcu_read_unlock();
    10	   /* SRCU read-side critical section. */
    11	   srcu_read_unlock(&ss, idx);
    12	   return 0;
    13 }
    14
    15 void cleanup(void)
    16 {
    17     nomoresrcu = 1;
    18     synchronize_rcu();
    19     synchronize_srcu(&ss);
    20     cleanup_srcu_struct(&ss);
    21 }

No more worrying about stop_machine, just an uber-complex mutex,
optimised for reads, with the overhead pushed to the rare reset path.

However, we do run the risk of a deadlock as we allocate underneath the
SRCU read lock, and the allocation may require a GPU reset, causing a
dependency cycle via the in-flight requests. We resolve that by declaring
the driver wedged and cancelling all in-flight rendering.

v2: Use expedited rcu barriers to match our earlier timing
characteristics.
v3: Try to annotate locking contexts for sparse
v4: Reduce selftest lock duration to avoid a reset deadlock with fences
v5: s/srcu/reset_backoff_srcu/
v6: Remove more stale comments

Testcase: igt/gem_mmap_gtt/hang
Fixes: eb8d0f5af4 ("drm/i915: Remove GPU reset dependence on struct_mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190208153708.20023-2-chris@chris-wilson.co.uk
2019-02-08 16:47:32 +00:00

64 lines
1.7 KiB
C

/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2008-2018 Intel Corporation
*/
#ifndef I915_RESET_H
#define I915_RESET_H
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/srcu.h>
struct drm_i915_private;
struct intel_engine_cs;
struct intel_guc;
__printf(4, 5)
void i915_handle_error(struct drm_i915_private *i915,
u32 engine_mask,
unsigned long flags,
const char *fmt, ...);
#define I915_ERROR_CAPTURE BIT(0)
void i915_clear_error_registers(struct drm_i915_private *i915);
void i915_reset(struct drm_i915_private *i915,
unsigned int stalled_mask,
const char *reason);
int i915_reset_engine(struct intel_engine_cs *engine,
const char *reason);
void i915_reset_request(struct i915_request *rq, bool guilty);
bool i915_reset_flush(struct drm_i915_private *i915);
int __must_check i915_reset_trylock(struct drm_i915_private *i915);
void i915_reset_unlock(struct drm_i915_private *i915, int tag);
bool intel_has_gpu_reset(struct drm_i915_private *i915);
bool intel_has_reset_engine(struct drm_i915_private *i915);
int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask);
int intel_reset_guc(struct drm_i915_private *i915);
struct i915_wedge_me {
struct delayed_work work;
struct drm_i915_private *i915;
const char *name;
};
void __i915_init_wedge(struct i915_wedge_me *w,
struct drm_i915_private *i915,
long timeout,
const char *name);
void __i915_fini_wedge(struct i915_wedge_me *w);
#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \
for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \
(W)->i915; \
__i915_fini_wedge((W)))
#endif /* I915_RESET_H */