mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-27 11:05:07 +07:00
6faf5916e6
The writing is on the wall for the existence of a single execution queue along each engine, and as a consequence we will not be able to track dependencies along the HW queue itself, i.e. we will not be able to use HW semaphores on gen7 as they use a global set of registers (and unlike gen8+ we can not effectively target memory to keep per-context seqno and dependencies). On the positive side, when we implement request reordering for gen7 we also can not presume a simple execution queue and would also require removing the current semaphore generation code. So this bring us another step closer to request reordering for ringbuffer submission! The negative side is that using interrupts to drive inter-engine synchronisation is much slower (4us -> 15us to do a nop on each of the 3 engines on ivb). This is much better than it was at the time of introducing the HW semaphores and equally important userspace weaned itself off intermixing dependent BLT/RENDER operations (the prime culprit was glyph rendering in UXA). So while we regress the microbenchmarks, it should not impact the user. References: https://bugs.freedesktop.org/show_bug.cgi?id=108888 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228140736.32606-2-chris@chris-wilson.co.uk
140 lines
4.2 KiB
C
140 lines
4.2 KiB
C
/*
|
|
* Copyright © 2016 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#ifndef I915_TIMELINE_H
|
|
#define I915_TIMELINE_H
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/kref.h>
|
|
|
|
#include "i915_request.h"
|
|
#include "i915_syncmap.h"
|
|
#include "i915_utils.h"
|
|
|
|
struct i915_timeline {
|
|
u64 fence_context;
|
|
u32 seqno;
|
|
|
|
spinlock_t lock;
|
|
#define TIMELINE_CLIENT 0 /* default subclass */
|
|
#define TIMELINE_ENGINE 1
|
|
|
|
/**
|
|
* List of breadcrumbs associated with GPU requests currently
|
|
* outstanding.
|
|
*/
|
|
struct list_head requests;
|
|
|
|
/* Contains an RCU guarded pointer to the last request. No reference is
|
|
* held to the request, users must carefully acquire a reference to
|
|
* the request using i915_gem_active_get_request_rcu(), or hold the
|
|
* struct_mutex.
|
|
*/
|
|
struct i915_gem_active last_request;
|
|
|
|
/**
|
|
* We track the most recent seqno that we wait on in every context so
|
|
* that we only have to emit a new await and dependency on a more
|
|
* recent sync point. As the contexts may be executed out-of-order, we
|
|
* have to track each individually and can not rely on an absolute
|
|
* global_seqno. When we know that all tracked fences are completed
|
|
* (i.e. when the driver is idle), we know that the syncmap is
|
|
* redundant and we can discard it without loss of generality.
|
|
*/
|
|
struct i915_syncmap *sync;
|
|
|
|
struct list_head link;
|
|
const char *name;
|
|
|
|
struct kref kref;
|
|
};
|
|
|
|
void i915_timeline_init(struct drm_i915_private *i915,
|
|
struct i915_timeline *tl,
|
|
const char *name);
|
|
void i915_timeline_fini(struct i915_timeline *tl);
|
|
|
|
static inline void
|
|
i915_timeline_set_subclass(struct i915_timeline *timeline,
|
|
unsigned int subclass)
|
|
{
|
|
lockdep_set_subclass(&timeline->lock, subclass);
|
|
|
|
/*
|
|
* Due to an interesting quirk in lockdep's internal debug tracking,
|
|
* after setting a subclass we must ensure the lock is used. Otherwise,
|
|
* nr_unused_locks is incremented once too often.
|
|
*/
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
local_irq_disable();
|
|
lock_map_acquire(&timeline->lock.dep_map);
|
|
lock_map_release(&timeline->lock.dep_map);
|
|
local_irq_enable();
|
|
#endif
|
|
}
|
|
|
|
struct i915_timeline *
|
|
i915_timeline_create(struct drm_i915_private *i915, const char *name);
|
|
|
|
static inline struct i915_timeline *
|
|
i915_timeline_get(struct i915_timeline *timeline)
|
|
{
|
|
kref_get(&timeline->kref);
|
|
return timeline;
|
|
}
|
|
|
|
void __i915_timeline_free(struct kref *kref);
|
|
static inline void i915_timeline_put(struct i915_timeline *timeline)
|
|
{
|
|
kref_put(&timeline->kref, __i915_timeline_free);
|
|
}
|
|
|
|
static inline int __i915_timeline_sync_set(struct i915_timeline *tl,
|
|
u64 context, u32 seqno)
|
|
{
|
|
return i915_syncmap_set(&tl->sync, context, seqno);
|
|
}
|
|
|
|
static inline int i915_timeline_sync_set(struct i915_timeline *tl,
|
|
const struct dma_fence *fence)
|
|
{
|
|
return __i915_timeline_sync_set(tl, fence->context, fence->seqno);
|
|
}
|
|
|
|
static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl,
|
|
u64 context, u32 seqno)
|
|
{
|
|
return i915_syncmap_is_later(&tl->sync, context, seqno);
|
|
}
|
|
|
|
static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
|
|
const struct dma_fence *fence)
|
|
{
|
|
return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
|
|
}
|
|
|
|
void i915_timelines_park(struct drm_i915_private *i915);
|
|
|
|
#endif
|