/* * SPDX-License-Identifier: MIT * * Copyright © 2019 Intel Corporation */ #include #include "gt/intel_engine_pm.h" #include "i915_drv.h" #include "i915_active.h" #include "i915_globals.h" #define BKL(ref) (&(ref)->i915->drm.struct_mutex) /* * Active refs memory management * * To be more economical with memory, we reap all the i915_active trees as * they idle (when we know the active requests are inactive) and allocate the * nodes from a local slab cache to hopefully reduce the fragmentation. */ static struct i915_global_active { struct i915_global base; struct kmem_cache *slab_cache; } global; struct active_node { struct i915_active_request base; struct i915_active *ref; struct rb_node node; u64 timeline; }; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) static void *active_debug_hint(void *addr) { struct i915_active *ref = addr; return (void *)ref->retire ?: (void *)ref; } static struct debug_obj_descr active_debug_desc = { .name = "i915_active", .debug_hint = active_debug_hint, }; static void debug_active_init(struct i915_active *ref) { debug_object_init(ref, &active_debug_desc); } static void debug_active_activate(struct i915_active *ref) { debug_object_activate(ref, &active_debug_desc); } static void debug_active_deactivate(struct i915_active *ref) { debug_object_deactivate(ref, &active_debug_desc); } static void debug_active_fini(struct i915_active *ref) { debug_object_free(ref, &active_debug_desc); } static void debug_active_assert(struct i915_active *ref) { debug_object_assert_init(ref, &active_debug_desc); } #else static inline void debug_active_init(struct i915_active *ref) { } static inline void debug_active_activate(struct i915_active *ref) { } static inline void debug_active_deactivate(struct i915_active *ref) { } static inline void debug_active_fini(struct i915_active *ref) { } static inline void debug_active_assert(struct i915_active *ref) { } #endif static void __active_park(struct i915_active *ref) { struct active_node *it, *n; rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { GEM_BUG_ON(i915_active_request_isset(&it->base)); kmem_cache_free(global.slab_cache, it); } ref->tree = RB_ROOT; } static void __active_retire(struct i915_active *ref) { GEM_BUG_ON(!ref->count); if (--ref->count) return; debug_active_deactivate(ref); /* return the unused nodes to our slabcache */ __active_park(ref); ref->retire(ref); } static void node_retire(struct i915_active_request *base, struct i915_request *rq) { __active_retire(container_of(base, struct active_node, base)->ref); } static void last_retire(struct i915_active_request *base, struct i915_request *rq) { __active_retire(container_of(base, struct i915_active, last)); } static struct i915_active_request * active_instance(struct i915_active *ref, u64 idx) { struct active_node *node; struct rb_node **p, *parent; struct i915_request *old; /* * We track the most recently used timeline to skip a rbtree search * for the common case, under typical loads we never need the rbtree * at all. We can reuse the last slot if it is empty, that is * after the previous activity has been retired, or if it matches the * current timeline. * * Note that we allow the timeline to be active simultaneously in * the rbtree and the last cache. We do this to avoid having * to search and replace the rbtree element for a new timeline, with * the cost being that we must be aware that the ref may be retired * twice for the same timeline (as the older rbtree element will be * retired before the new request added to last). */ old = i915_active_request_raw(&ref->last, BKL(ref)); if (!old || old->fence.context == idx) goto out; /* Move the currently active fence into the rbtree */ idx = old->fence.context; parent = NULL; p = &ref->tree.rb_node; while (*p) { parent = *p; node = rb_entry(parent, struct active_node, node); if (node->timeline == idx) goto replace; if (node->timeline < idx) p = &parent->rb_right; else p = &parent->rb_left; } node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); /* kmalloc may retire the ref->last (thanks shrinker)! */ if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) { kmem_cache_free(global.slab_cache, node); goto out; } if (unlikely(!node)) return ERR_PTR(-ENOMEM); i915_active_request_init(&node->base, NULL, node_retire); node->ref = ref; node->timeline = idx; rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); replace: /* * Overwrite the previous active slot in the rbtree with last, * leaving last zeroed. If the previous slot is still active, * we must be careful as we now only expect to receive one retire * callback not two, and so much undo the active counting for the * overwritten slot. */ if (i915_active_request_isset(&node->base)) { /* Retire ourselves from the old rq->active_list */ __list_del_entry(&node->base.link); ref->count--; GEM_BUG_ON(!ref->count); } GEM_BUG_ON(list_empty(&ref->last.link)); list_replace_init(&ref->last.link, &node->base.link); node->base.request = fetch_and_zero(&ref->last.request); out: return &ref->last; } void i915_active_init(struct drm_i915_private *i915, struct i915_active *ref, void (*retire)(struct i915_active *ref)) { debug_active_init(ref); ref->i915 = i915; ref->retire = retire; ref->tree = RB_ROOT; i915_active_request_init(&ref->last, NULL, last_retire); init_llist_head(&ref->barriers); ref->count = 0; } int i915_active_ref(struct i915_active *ref, u64 timeline, struct i915_request *rq) { struct i915_active_request *active; int err = 0; /* Prevent reaping in case we malloc/wait while building the tree */ i915_active_acquire(ref); active = active_instance(ref, timeline); if (IS_ERR(active)) { err = PTR_ERR(active); goto out; } if (!i915_active_request_isset(active)) ref->count++; __i915_active_request_set(active, rq); GEM_BUG_ON(!ref->count); out: i915_active_release(ref); return err; } bool i915_active_acquire(struct i915_active *ref) { debug_active_assert(ref); lockdep_assert_held(BKL(ref)); if (ref->count++) return false; debug_active_activate(ref); return true; } void i915_active_release(struct i915_active *ref) { debug_active_assert(ref); lockdep_assert_held(BKL(ref)); __active_retire(ref); } int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; int ret = 0; if (i915_active_acquire(ref)) goto out_release; ret = i915_active_request_retire(&ref->last, BKL(ref)); if (ret) goto out_release; rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { ret = i915_active_request_retire(&it->base, BKL(ref)); if (ret) break; } out_release: i915_active_release(ref); return ret; } int i915_request_await_active_request(struct i915_request *rq, struct i915_active_request *active) { struct i915_request *barrier = i915_active_request_raw(active, &rq->i915->drm.struct_mutex); return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; } int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) { struct active_node *it, *n; int err = 0; /* await allocates and so we need to avoid hitting the shrinker */ if (i915_active_acquire(ref)) goto out; /* was idle */ err = i915_request_await_active_request(rq, &ref->last); if (err) goto out; rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { err = i915_request_await_active_request(rq, &it->base); if (err) goto out; } out: i915_active_release(ref); return err; } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); GEM_BUG_ON(i915_active_request_isset(&ref->last)); GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); GEM_BUG_ON(ref->count); } #endif int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; struct llist_node *pos, *next; unsigned long tmp; int err; GEM_BUG_ON(!engine->mask); for_each_engine_masked(engine, i915, engine->mask, tmp) { struct intel_context *kctx = engine->kernel_context; struct active_node *node; node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); if (unlikely(!node)) { err = -ENOMEM; goto unwind; } i915_active_request_init(&node->base, (void *)engine, node_retire); node->timeline = kctx->ring->timeline->fence_context; node->ref = ref; ref->count++; intel_engine_pm_get(engine); llist_add((struct llist_node *)&node->base.link, &ref->barriers); } return 0; unwind: llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { struct active_node *node; node = container_of((struct list_head *)pos, typeof(*node), base.link); engine = (void *)rcu_access_pointer(node->base.request); intel_engine_pm_put(engine); kmem_cache_free(global.slab_cache, node); } return err; } void i915_active_acquire_barrier(struct i915_active *ref) { struct llist_node *pos, *next; i915_active_acquire(ref); llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { struct intel_engine_cs *engine; struct active_node *node; struct rb_node **p, *parent; node = container_of((struct list_head *)pos, typeof(*node), base.link); engine = (void *)rcu_access_pointer(node->base.request); RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); parent = NULL; p = &ref->tree.rb_node; while (*p) { parent = *p; if (rb_entry(parent, struct active_node, node)->timeline < node->timeline) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); llist_add((struct llist_node *)&node->base.link, &engine->barrier_tasks); intel_engine_pm_put(engine); } i915_active_release(ref); } void i915_request_add_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct llist_node *node, *next; llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) list_add_tail((struct list_head *)node, &rq->active_list); } int i915_active_request_set(struct i915_active_request *active, struct i915_request *rq) { int err; /* Must maintain ordering wrt previous active requests */ err = i915_request_await_active_request(rq, active); if (err) return err; __i915_active_request_set(active, rq); return 0; } void i915_active_retire_noop(struct i915_active_request *active, struct i915_request *request) { /* Space left intentionally blank */ } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_active.c" #endif static void i915_global_active_shrink(void) { kmem_cache_shrink(global.slab_cache); } static void i915_global_active_exit(void) { kmem_cache_destroy(global.slab_cache); } static struct i915_global_active global = { { .shrink = i915_global_active_shrink, .exit = i915_global_active_exit, } }; int __init i915_global_active_init(void) { global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); if (!global.slab_cache) return -ENOMEM; i915_global_register(&global.base); return 0; }