mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-27 18:24:03 +07:00
da23379f15
In order to avoid the malloc inside i915_globals_park() occurring underneath a lock connected to the shrinker (thus causing circular lockdeps warnings), move the rcu_worker to a global. <4> [39.085073] ====================================================== <4> [39.085273] WARNING: possible circular locking dependency detected <4> [39.085552] 5.1.0-rc3-CI-Trybot_4088+ #1 Tainted: G U <4> [39.085752] ------------------------------------------------------ <4> [39.085949] kswapd0/32 is trying to acquire lock: <4> [39.086121] 00000000004b5f91 (wakeref#3){+.+.}, at: intel_engine_pm_put+0x1b/0x40 [i915] <4> [39.086493] but task is already holding lock: <4> [39.086682] 00000000dd009a9a (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x30 <4> [39.086910] which lock already depends on the new lock. <4> [39.087139] the existing dependency chain (in reverse order) is: <4> [39.087356] -> #2 (fs_reclaim){+.+.}: <4> [39.087604] fs_reclaim_acquire.part.24+0x24/0x30 <4> [39.087785] kmem_cache_alloc_trace+0x2a/0x290 <4> [39.087998] i915_globals_park+0x22/0xa0 [i915] <4> [39.088478] idle_work_handler+0x1df/0x220 [i915] <4> [39.089016] process_one_work+0x245/0x610 <4> [39.089447] worker_thread+0x37/0x380 <4> [39.089956] kthread+0x119/0x130 <4> [39.090374] ret_from_fork+0x3a/0x50 <4> [39.090868] -> #1 (wakeref#4){+.+.}: <4> [39.091569] __mutex_lock+0x8c/0x960 <4> [39.092054] atomic_dec_and_mutex_lock+0x33/0x50 <4> [39.092521] intel_gt_pm_put+0x1b/0x40 [i915] <4> [39.093047] intel_engine_park+0xeb/0x1d0 [i915] <4> [39.093514] __intel_wakeref_put_once+0x10/0x30 [i915] <4> [39.094062] i915_request_retire+0x477/0xaf0 [i915] <4> [39.094547] ring_retire_requests+0x86/0x160 [i915] <4> [39.095110] i915_retire_requests+0x58/0xc0 [i915] <4> [39.095587] i915_gem_wait_for_idle.part.22+0xb2/0xf0 [i915] <4> [39.096142] switch_to_kernel_context_sync+0x2a/0x70 [i915] <4> [39.096633] i915_gem_init+0x59c/0x9c0 [i915] <4> [39.097174] i915_driver_load+0xd96/0x1880 [i915] <4> [39.097640] i915_pci_probe+0x29/0xa0 [i915] <4> [39.098145] pci_device_probe+0xa1/0x120 <4> [39.098607] really_probe+0xf3/0x3e0 <4> [39.099031] driver_probe_device+0x10a/0x120 <4> [39.099599] device_driver_attach+0x4b/0x50 <4> [39.100033] __driver_attach+0x97/0x130 <4> [39.100525] bus_for_each_dev+0x74/0xc0 <4> [39.100954] bus_add_driver+0x13f/0x210 <4> [39.101441] driver_register+0x56/0xe0 <4> [39.101891] do_one_initcall+0x58/0x2e0 <4> [39.102319] do_init_module+0x56/0x1ea <4> [39.102805] load_module+0x2701/0x29e0 <4> [39.103231] __se_sys_finit_module+0xd3/0xf0 <4> [39.103727] do_syscall_64+0x55/0x190 <4> [39.104153] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4> [39.104736] -> #0 (wakeref#3){+.+.}: <4> [39.105437] lock_acquire+0xa6/0x1c0 <4> [39.105923] __mutex_lock+0x8c/0x960 <4> [39.106345] atomic_dec_and_mutex_lock+0x33/0x50 <4> [39.106897] intel_engine_pm_put+0x1b/0x40 [i915] <4> [39.107375] i915_request_retire+0x477/0xaf0 [i915] <4> [39.107930] ring_retire_requests+0x86/0x160 [i915] <4> [39.108412] i915_retire_requests+0x58/0xc0 [i915] <4> [39.108934] i915_gem_shrink+0xd8/0x5b0 [i915] <4> [39.109431] i915_gem_shrinker_scan+0x59/0x130 [i915] <4> [39.109884] do_shrink_slab+0x131/0x3e0 <4> [39.110380] shrink_slab+0x228/0x2c0 <4> [39.110810] shrink_node+0x177/0x460 <4> [39.111317] balance_pgdat+0x239/0x580 <4> [39.111743] kswapd+0x186/0x570 <4> [39.112221] kthread+0x119/0x130 <4> [39.112641] ret_from_fork+0x3a/0x50 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190408091728.20207-3-chris@chris-wilson.co.uk
126 lines
2.7 KiB
C
126 lines
2.7 KiB
C
/*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright © 2019 Intel Corporation
|
|
*/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
#include "i915_active.h"
|
|
#include "i915_gem_context.h"
|
|
#include "i915_gem_object.h"
|
|
#include "i915_globals.h"
|
|
#include "i915_request.h"
|
|
#include "i915_scheduler.h"
|
|
#include "i915_vma.h"
|
|
|
|
static LIST_HEAD(globals);
|
|
|
|
static atomic_t active;
|
|
static atomic_t epoch;
|
|
static struct park_work {
|
|
struct rcu_work work;
|
|
int epoch;
|
|
} park;
|
|
|
|
static void i915_globals_shrink(void)
|
|
{
|
|
struct i915_global *global;
|
|
|
|
/*
|
|
* kmem_cache_shrink() discards empty slabs and reorders partially
|
|
* filled slabs to prioritise allocating from the mostly full slabs,
|
|
* with the aim of reducing fragmentation.
|
|
*/
|
|
list_for_each_entry(global, &globals, link)
|
|
global->shrink();
|
|
}
|
|
|
|
static void __i915_globals_park(struct work_struct *work)
|
|
{
|
|
/* Confirm nothing woke up in the last grace period */
|
|
if (park.epoch == atomic_read(&epoch))
|
|
i915_globals_shrink();
|
|
}
|
|
|
|
void __init i915_global_register(struct i915_global *global)
|
|
{
|
|
GEM_BUG_ON(!global->shrink);
|
|
GEM_BUG_ON(!global->exit);
|
|
|
|
list_add_tail(&global->link, &globals);
|
|
}
|
|
|
|
static void __i915_globals_cleanup(void)
|
|
{
|
|
struct i915_global *global, *next;
|
|
|
|
list_for_each_entry_safe_reverse(global, next, &globals, link)
|
|
global->exit();
|
|
}
|
|
|
|
static __initconst int (* const initfn[])(void) = {
|
|
i915_global_active_init,
|
|
i915_global_context_init,
|
|
i915_global_gem_context_init,
|
|
i915_global_objects_init,
|
|
i915_global_request_init,
|
|
i915_global_scheduler_init,
|
|
i915_global_vma_init,
|
|
};
|
|
|
|
int __init i915_globals_init(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(initfn); i++) {
|
|
int err;
|
|
|
|
err = initfn[i]();
|
|
if (err) {
|
|
__i915_globals_cleanup();
|
|
return err;
|
|
}
|
|
}
|
|
|
|
INIT_RCU_WORK(&park.work, __i915_globals_park);
|
|
return 0;
|
|
}
|
|
|
|
void i915_globals_park(void)
|
|
{
|
|
/*
|
|
* Defer shrinking the global slab caches (and other work) until
|
|
* after a RCU grace period has completed with no activity. This
|
|
* is to try and reduce the latency impact on the consumers caused
|
|
* by us shrinking the caches the same time as they are trying to
|
|
* allocate, with the assumption being that if we idle long enough
|
|
* for an RCU grace period to elapse since the last use, it is likely
|
|
* to be longer until we need the caches again.
|
|
*/
|
|
if (!atomic_dec_and_test(&active))
|
|
return;
|
|
|
|
park.epoch = atomic_inc_return(&epoch);
|
|
queue_rcu_work(system_wq, &park.work);
|
|
}
|
|
|
|
void i915_globals_unpark(void)
|
|
{
|
|
atomic_inc(&epoch);
|
|
atomic_inc(&active);
|
|
}
|
|
|
|
void __exit i915_globals_exit(void)
|
|
{
|
|
/* Flush any residual park_work */
|
|
atomic_inc(&epoch);
|
|
flush_rcu_work(&park.work);
|
|
|
|
__i915_globals_cleanup();
|
|
|
|
/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
|
|
rcu_barrier();
|
|
}
|