mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 09:36:11 +07:00
e886196469
Having introduced per-context seqno, we now have a means to identity progress across the system without feel of rollback as befell the global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in advance of submission safe in the knowledge that our target seqno and address is stable. However, since we are telling the GPU to busy-spin on the target address until it matches the signaling seqno, we only want to do so when we are sure that busy-spin will be completed quickly. To achieve this we only submit the request to HW once the signaler is itself executing (modulo preemption causing us to wait longer), and we only do so for default and above priority requests (so that idle priority tasks never themselves hog the GPU waiting for others). As might be reasonably expected, HW semaphores excel in inter-engine synchronisation microbenchmarks (where the 3x reduced latency / increased throughput more than offset the power cost of spinning on a second ring) and have significant improvement (can be up to ~10%, most see no change) for single clients that utilize multiple engines (typically media players and transcoders), without regressing multiple clients that can saturate the system or changing the power envelope dramatically. v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway. v4: Tell the world and include it as part of scheduler caps. Testcase: igt/gem_exec_whisper Testcase: igt/benchmarks/gem_wsim Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-3-chris@chris-wilson.co.uk
101 lines
2.7 KiB
C
101 lines
2.7 KiB
C
/*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* i915_sw_fence.h - library routines for N:M synchronisation points
|
|
*
|
|
* Copyright (C) 2016 Intel Corporation
|
|
*/
|
|
|
|
#ifndef _I915_SW_FENCE_H_
|
|
#define _I915_SW_FENCE_H_
|
|
|
|
#include <linux/gfp.h>
|
|
#include <linux/kref.h>
|
|
#include <linux/notifier.h> /* for NOTIFY_DONE */
|
|
#include <linux/wait.h>
|
|
|
|
struct completion;
|
|
struct dma_fence;
|
|
struct dma_fence_ops;
|
|
struct reservation_object;
|
|
|
|
struct i915_sw_fence {
|
|
wait_queue_head_t wait;
|
|
unsigned long flags;
|
|
atomic_t pending;
|
|
};
|
|
|
|
#define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */
|
|
#define I915_SW_FENCE_PRIVATE_BIT 1 /* available for use by owner */
|
|
#define I915_SW_FENCE_MASK (~3)
|
|
|
|
enum i915_sw_fence_notify {
|
|
FENCE_COMPLETE,
|
|
FENCE_FREE
|
|
};
|
|
|
|
typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
|
|
enum i915_sw_fence_notify state);
|
|
#define __i915_sw_fence_call __aligned(4)
|
|
|
|
void __i915_sw_fence_init(struct i915_sw_fence *fence,
|
|
i915_sw_fence_notify_t fn,
|
|
const char *name,
|
|
struct lock_class_key *key);
|
|
#ifdef CONFIG_LOCKDEP
|
|
#define i915_sw_fence_init(fence, fn) \
|
|
do { \
|
|
static struct lock_class_key __key; \
|
|
\
|
|
__i915_sw_fence_init((fence), (fn), #fence, &__key); \
|
|
} while (0)
|
|
#else
|
|
#define i915_sw_fence_init(fence, fn) \
|
|
__i915_sw_fence_init((fence), (fn), NULL, NULL)
|
|
#endif
|
|
|
|
#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
|
|
void i915_sw_fence_fini(struct i915_sw_fence *fence);
|
|
#else
|
|
static inline void i915_sw_fence_fini(struct i915_sw_fence *fence) {}
|
|
#endif
|
|
|
|
void i915_sw_fence_commit(struct i915_sw_fence *fence);
|
|
|
|
int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
|
|
struct i915_sw_fence *after,
|
|
wait_queue_entry_t *wq);
|
|
int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
|
|
struct i915_sw_fence *after,
|
|
gfp_t gfp);
|
|
int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
|
|
struct dma_fence *dma,
|
|
unsigned long timeout,
|
|
gfp_t gfp);
|
|
int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
|
|
struct reservation_object *resv,
|
|
const struct dma_fence_ops *exclude,
|
|
bool write,
|
|
unsigned long timeout,
|
|
gfp_t gfp);
|
|
|
|
void i915_sw_fence_await(struct i915_sw_fence *fence);
|
|
void i915_sw_fence_complete(struct i915_sw_fence *fence);
|
|
|
|
static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence)
|
|
{
|
|
return atomic_read(&fence->pending) <= 0;
|
|
}
|
|
|
|
static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
|
|
{
|
|
return atomic_read(&fence->pending) < 0;
|
|
}
|
|
|
|
static inline void i915_sw_fence_wait(struct i915_sw_fence *fence)
|
|
{
|
|
wait_event(fence->wait, i915_sw_fence_done(fence));
|
|
}
|
|
|
|
#endif /* _I915_SW_FENCE_H_ */
|