mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 12:07:14 +07:00
3a7a92aba8
If the preempted context takes too long to relinquish control, e.g. it is stuck inside a shader with arbitration disabled, evict that context with an engine reset. This ensures that preemptions are reasonably responsive, providing a tighter QoS for the more important context at the cost of flagging unresponsive contexts more frequently (i.e. instead of using an ~10s hangcheck, we now evict at ~100ms). The challenge of lies in picking a timeout that can be reasonably serviced by HW for typical workloads, balancing the existing clients against the needs for responsiveness. Note that coupled with timeslicing, this will lead to rapid GPU "hang" detection with multiple active contexts vying for GPU time. The forced preemption mechanism can be compiled out with ./scripts/config --set-val DRM_I915_PREEMPT_TIMEOUT 0 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-2-chris@chris-wilson.co.uk
108 lines
2.5 KiB
C
108 lines
2.5 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2019 Intel Corporation
|
|
*/
|
|
|
|
#include <drm/drm_drv.h>
|
|
|
|
#include "i915_drv.h"
|
|
#include "i915_utils.h"
|
|
|
|
#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI"
|
|
#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \
|
|
"providing the dmesg log by booting with drm.debug=0xf"
|
|
|
|
void
|
|
__i915_printk(struct drm_i915_private *dev_priv, const char *level,
|
|
const char *fmt, ...)
|
|
{
|
|
static bool shown_bug_once;
|
|
struct device *kdev = dev_priv->drm.dev;
|
|
bool is_error = level[1] <= KERN_ERR[1];
|
|
bool is_debug = level[1] == KERN_DEBUG[1];
|
|
struct va_format vaf;
|
|
va_list args;
|
|
|
|
if (is_debug && !(drm_debug & DRM_UT_DRIVER))
|
|
return;
|
|
|
|
va_start(args, fmt);
|
|
|
|
vaf.fmt = fmt;
|
|
vaf.va = &args;
|
|
|
|
if (is_error)
|
|
dev_printk(level, kdev, "%pV", &vaf);
|
|
else
|
|
dev_printk(level, kdev, "[" DRM_NAME ":%ps] %pV",
|
|
__builtin_return_address(0), &vaf);
|
|
|
|
va_end(args);
|
|
|
|
if (is_error && !shown_bug_once) {
|
|
/*
|
|
* Ask the user to file a bug report for the error, except
|
|
* if they may have caused the bug by fiddling with unsafe
|
|
* module parameters.
|
|
*/
|
|
if (!test_taint(TAINT_USER))
|
|
dev_notice(kdev, "%s", FDO_BUG_MSG);
|
|
shown_bug_once = true;
|
|
}
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
|
|
static unsigned int i915_probe_fail_count;
|
|
|
|
int __i915_inject_load_error(struct drm_i915_private *i915, int err,
|
|
const char *func, int line)
|
|
{
|
|
if (i915_probe_fail_count >= i915_modparams.inject_load_failure)
|
|
return 0;
|
|
|
|
if (++i915_probe_fail_count < i915_modparams.inject_load_failure)
|
|
return 0;
|
|
|
|
__i915_printk(i915, KERN_INFO,
|
|
"Injecting failure %d at checkpoint %u [%s:%d]\n",
|
|
err, i915_modparams.inject_load_failure, func, line);
|
|
i915_modparams.inject_load_failure = 0;
|
|
return err;
|
|
}
|
|
|
|
bool i915_error_injected(void)
|
|
{
|
|
return i915_probe_fail_count && !i915_modparams.inject_load_failure;
|
|
}
|
|
|
|
#endif
|
|
|
|
void cancel_timer(struct timer_list *t)
|
|
{
|
|
if (!READ_ONCE(t->expires))
|
|
return;
|
|
|
|
del_timer(t);
|
|
WRITE_ONCE(t->expires, 0);
|
|
}
|
|
|
|
void set_timer_ms(struct timer_list *t, unsigned long timeout)
|
|
{
|
|
if (!timeout) {
|
|
cancel_timer(t);
|
|
return;
|
|
}
|
|
|
|
timeout = msecs_to_jiffies_timeout(timeout);
|
|
|
|
/*
|
|
* Paranoia to make sure the compiler computes the timeout before
|
|
* loading 'jiffies' as jiffies is volatile and may be updated in
|
|
* the background by a timer tick. All to reduce the complexity
|
|
* of the addition and reduce the risk of losing a jiffie.
|
|
*/
|
|
barrier();
|
|
|
|
mod_timer(t, jiffies + timeout);
|
|
}
|