mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
3b7b314053
Commitbf5eb3de38
("slub: separate out sysfs_slab_release() from sysfs_slab_remove()") made slub sysfs file removals synchronous to kmem_cache shutdown. Unfortunately, this created a possible ABBA deadlock between slab_mutex and sysfs draining mechanism triggering the following lockdep warning. ====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-test+ #48 Not tainted ------------------------------------------------------- rmmod/1211 is trying to acquire lock: (s_active#120){++++.+}, at: [<ffffffff81308073>] kernfs_remove+0x23/0x40 but task is already holding lock: (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (slab_mutex){+.+.+.}: lock_acquire+0xf6/0x1f0 __mutex_lock+0x75/0x950 mutex_lock_nested+0x1b/0x20 slab_attr_store+0x75/0xd0 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x13c/0x1c0 __vfs_write+0x28/0x120 vfs_write+0xc8/0x1e0 SyS_write+0x49/0xa0 entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #0 (s_active#120){++++.+}: __lock_acquire+0x10ed/0x1260 lock_acquire+0xf6/0x1f0 __kernfs_remove+0x254/0x320 kernfs_remove+0x23/0x40 sysfs_remove_dir+0x51/0x80 kobject_del+0x18/0x50 __kmem_cache_shutdown+0x3e6/0x460 kmem_cache_destroy+0x1fb/0x2d0 kvm_exit+0x2d/0x80 [kvm] vmx_exit+0x19/0xa1b [kvm_intel] SyS_delete_module+0x198/0x1f0 entry_SYSCALL_64_fastpath+0x1f/0xc2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(slab_mutex); lock(s_active#120); lock(slab_mutex); lock(s_active#120); *** DEADLOCK *** 2 locks held by rmmod/1211: #0: (cpu_hotplug.dep_map){++++++}, at: [<ffffffff810a7877>] get_online_cpus+0x37/0x80 #1: (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0 stack backtrace: CPU: 3 PID: 1211 Comm: rmmod Not tainted 4.10.0-test+ #48 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 Call Trace: print_circular_bug+0x1be/0x210 __lock_acquire+0x10ed/0x1260 lock_acquire+0xf6/0x1f0 __kernfs_remove+0x254/0x320 kernfs_remove+0x23/0x40 sysfs_remove_dir+0x51/0x80 kobject_del+0x18/0x50 __kmem_cache_shutdown+0x3e6/0x460 kmem_cache_destroy+0x1fb/0x2d0 kvm_exit+0x2d/0x80 [kvm] vmx_exit+0x19/0xa1b [kvm_intel] SyS_delete_module+0x198/0x1f0 ? SyS_delete_module+0x5/0x1f0 entry_SYSCALL_64_fastpath+0x1f/0xc2 It'd be the cleanest to deal with the issue by removing sysfs files without holding slab_mutex before the rest of shutdown; however, given the current code structure, it is pretty difficult to do so. This patch punts sysfs file removal to a work item. Before commitbf5eb3de38
, the removal was punted to a RCU delayed work item which is executed after release. Now, we're punting to a different work item on shutdown which still maintains the goal removing the sysfs files earlier when destroying kmem_caches. Link: http://lkml.kernel.org/r/20170620204512.GI21326@htj.duckdns.org Fixes:bf5eb3de38
("slub: separate out sysfs_slab_release() from sysfs_slab_remove()") Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
141 lines
4.5 KiB
C
141 lines
4.5 KiB
C
#ifndef _LINUX_SLUB_DEF_H
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
/*
|
|
* SLUB : A Slab allocator without object queues.
|
|
*
|
|
* (C) 2007 SGI, Christoph Lameter
|
|
*/
|
|
#include <linux/kobject.h>
|
|
|
|
enum stat_item {
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
|
FREE_FASTPATH, /* Free to cpu slab */
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
|
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
|
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
|
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
|
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
|
|
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
|
|
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
|
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
|
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
|
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
struct kmem_cache_cpu {
|
|
void **freelist; /* Pointer to next available object */
|
|
unsigned long tid; /* Globally unique transaction id */
|
|
struct page *page; /* The slab from which we are allocating */
|
|
struct page *partial; /* Partially allocated frozen slabs */
|
|
#ifdef CONFIG_SLUB_STATS
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Word size structure that can be atomically updated or read and that
|
|
* contains both the order and the number of objects that a slab of the
|
|
* given order would contain.
|
|
*/
|
|
struct kmem_cache_order_objects {
|
|
unsigned long x;
|
|
};
|
|
|
|
/*
|
|
* Slab cache management.
|
|
*/
|
|
struct kmem_cache {
|
|
struct kmem_cache_cpu __percpu *cpu_slab;
|
|
/* Used for retriving partial slabs etc */
|
|
unsigned long flags;
|
|
unsigned long min_partial;
|
|
int size; /* The size of an object including meta data */
|
|
int object_size; /* The size of an object without meta data */
|
|
int offset; /* Free pointer offset. */
|
|
int cpu_partial; /* Number of per cpu partial objects to keep around */
|
|
struct kmem_cache_order_objects oo;
|
|
|
|
/* Allocation and freeing of slabs */
|
|
struct kmem_cache_order_objects max;
|
|
struct kmem_cache_order_objects min;
|
|
gfp_t allocflags; /* gfp flags to use on each alloc */
|
|
int refcount; /* Refcount for slab cache destroy */
|
|
void (*ctor)(void *);
|
|
int inuse; /* Offset to metadata */
|
|
int align; /* Alignment */
|
|
int reserved; /* Reserved bytes at the end of slabs */
|
|
const char *name; /* Name (only for display!) */
|
|
struct list_head list; /* List of slab caches */
|
|
int red_left_pad; /* Left redzone padding size */
|
|
#ifdef CONFIG_SYSFS
|
|
struct kobject kobj; /* For sysfs */
|
|
struct work_struct kobj_remove_work;
|
|
#endif
|
|
#ifdef CONFIG_MEMCG
|
|
struct memcg_cache_params memcg_params;
|
|
int max_attr_size; /* for propagation, maximum size of a stored attr */
|
|
#ifdef CONFIG_SYSFS
|
|
struct kset *memcg_kset;
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
/*
|
|
* Defragmentation by allocating from a remote node.
|
|
*/
|
|
int remote_node_defrag_ratio;
|
|
#endif
|
|
|
|
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
|
unsigned int *random_seq;
|
|
#endif
|
|
|
|
#ifdef CONFIG_KASAN
|
|
struct kasan_cache kasan_info;
|
|
#endif
|
|
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
|
};
|
|
|
|
#ifdef CONFIG_SYSFS
|
|
#define SLAB_SUPPORTS_SYSFS
|
|
void sysfs_slab_release(struct kmem_cache *);
|
|
#else
|
|
static inline void sysfs_slab_release(struct kmem_cache *s)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
void object_err(struct kmem_cache *s, struct page *page,
|
|
u8 *object, char *reason);
|
|
|
|
void *fixup_red_left(struct kmem_cache *s, void *p);
|
|
|
|
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
|
void *x) {
|
|
void *object = x - (x - page_address(page)) % cache->size;
|
|
void *last_object = page_address(page) +
|
|
(page->objects - 1) * cache->size;
|
|
void *result = (unlikely(object > last_object)) ? last_object : object;
|
|
|
|
result = fixup_red_left(cache, result);
|
|
return result;
|
|
}
|
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|