mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-13 07:56:47 +07:00
ecca47ce82
d911d98748
("kernfs: make kernfs_notify() trigger inotify events
too") added fsnotify triggering to kernfs_notify() which requires a
sleepable context. There are already existing users of
kernfs_notify() which invoke it from an atomic context and in general
it's silly to require a sleepable context for triggering a
notification.
The following is an invalid context bug triggerd by md invoking
sysfs_notify() from IO completion path.
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
2 locks held by swapper/1/0:
#0: (&(&vblk->vq_lock)->rlock){-.-...}, at: [<ffffffffa0039042>] virtblk_done+0x42/0xe0 [virtio_blk]
#1: (&(&bitmap->counts.lock)->rlock){-.....}, at: [<ffffffff81633718>] bitmap_endwrite+0x68/0x240
irq event stamp: 33518
hardirqs last enabled at (33515): [<ffffffff8102544f>] default_idle+0x1f/0x230
hardirqs last disabled at (33516): [<ffffffff818122ed>] common_interrupt+0x6d/0x72
softirqs last enabled at (33518): [<ffffffff810a1272>] _local_bh_enable+0x22/0x50
softirqs last disabled at (33517): [<ffffffff810a29e0>] irq_enter+0x60/0x80
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-0.rc2.git2.1.fc21.x86_64 #1
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
0000000000000000 f90db13964f4ee05 ffff88007d403b80 ffffffff81807b4c
0000000000000000 ffff88007d403ba8 ffffffff810d4f14 0000000000000000
0000000000441800 ffff880078fa1780 ffff88007d403c38 ffffffff8180caf2
Call Trace:
<IRQ> [<ffffffff81807b4c>] dump_stack+0x4d/0x66
[<ffffffff810d4f14>] __might_sleep+0x184/0x240
[<ffffffff8180caf2>] mutex_lock_nested+0x42/0x440
[<ffffffff812d76a0>] kernfs_notify+0x90/0x150
[<ffffffff8163377c>] bitmap_endwrite+0xcc/0x240
[<ffffffffa00de863>] close_write+0x93/0xb0 [raid1]
[<ffffffffa00df029>] r1_bio_write_done+0x29/0x50 [raid1]
[<ffffffffa00e0474>] raid1_end_write_request+0xe4/0x260 [raid1]
[<ffffffff813acb8b>] bio_endio+0x6b/0xa0
[<ffffffff813b46c4>] blk_update_request+0x94/0x420
[<ffffffff813bf0ea>] blk_mq_end_io+0x1a/0x70
[<ffffffffa00392c2>] virtblk_request_done+0x32/0x80 [virtio_blk]
[<ffffffff813c0648>] __blk_mq_complete_request+0x88/0x120
[<ffffffff813c070a>] blk_mq_complete_request+0x2a/0x30
[<ffffffffa0039066>] virtblk_done+0x66/0xe0 [virtio_blk]
[<ffffffffa002535a>] vring_interrupt+0x3a/0xa0 [virtio_ring]
[<ffffffff81116177>] handle_irq_event_percpu+0x77/0x340
[<ffffffff8111647d>] handle_irq_event+0x3d/0x60
[<ffffffff81119436>] handle_edge_irq+0x66/0x130
[<ffffffff8101c3e4>] handle_irq+0x84/0x150
[<ffffffff818146ad>] do_IRQ+0x4d/0xe0
[<ffffffff818122f2>] common_interrupt+0x72/0x72
<EOI> [<ffffffff8105f706>] ? native_safe_halt+0x6/0x10
[<ffffffff81025454>] default_idle+0x24/0x230
[<ffffffff81025f9f>] arch_cpu_idle+0xf/0x20
[<ffffffff810f5adc>] cpu_startup_entry+0x37c/0x7b0
[<ffffffff8104df1b>] start_secondary+0x25b/0x300
This patch fixes it by punting the notification delivery through a
work item. This ends up adding an extra pointer to kernfs_elem_attr
enlarging kernfs_node by a pointer, which is not ideal but not a very
big deal either. If this turns out to be an actual issue, we can move
kernfs_elem_attr->size to kernfs_node->iattr later.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
466 lines
14 KiB
C
466 lines
14 KiB
C
/*
|
|
* kernfs.h - pseudo filesystem decoupled from vfs locking
|
|
*
|
|
* This file is released under the GPLv2.
|
|
*/
|
|
|
|
#ifndef __LINUX_KERNFS_H
|
|
#define __LINUX_KERNFS_H
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/err.h>
|
|
#include <linux/list.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/lockdep.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/wait.h>
|
|
|
|
struct file;
|
|
struct dentry;
|
|
struct iattr;
|
|
struct seq_file;
|
|
struct vm_area_struct;
|
|
struct super_block;
|
|
struct file_system_type;
|
|
|
|
struct kernfs_open_node;
|
|
struct kernfs_iattrs;
|
|
|
|
enum kernfs_node_type {
|
|
KERNFS_DIR = 0x0001,
|
|
KERNFS_FILE = 0x0002,
|
|
KERNFS_LINK = 0x0004,
|
|
};
|
|
|
|
#define KERNFS_TYPE_MASK 0x000f
|
|
#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
|
|
|
|
enum kernfs_node_flag {
|
|
KERNFS_ACTIVATED = 0x0010,
|
|
KERNFS_NS = 0x0020,
|
|
KERNFS_HAS_SEQ_SHOW = 0x0040,
|
|
KERNFS_HAS_MMAP = 0x0080,
|
|
KERNFS_LOCKDEP = 0x0100,
|
|
KERNFS_STATIC_NAME = 0x0200,
|
|
KERNFS_SUICIDAL = 0x0400,
|
|
KERNFS_SUICIDED = 0x0800,
|
|
};
|
|
|
|
/* @flags for kernfs_create_root() */
|
|
enum kernfs_root_flag {
|
|
/*
|
|
* kernfs_nodes are created in the deactivated state and invisible.
|
|
* They require explicit kernfs_activate() to become visible. This
|
|
* can be used to make related nodes become visible atomically
|
|
* after all nodes are created successfully.
|
|
*/
|
|
KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001,
|
|
|
|
/*
|
|
* For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2)
|
|
* succeeds regardless of the RW permissions. sysfs had an extra
|
|
* layer of enforcement where open(2) fails with -EACCES regardless
|
|
* of CAP_DAC_OVERRIDE if the permission doesn't have the
|
|
* respective read or write access at all (none of S_IRUGO or
|
|
* S_IWUGO) or the respective operation isn't implemented. The
|
|
* following flag enables that behavior.
|
|
*/
|
|
KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002,
|
|
};
|
|
|
|
/* type-specific structures for kernfs_node union members */
|
|
struct kernfs_elem_dir {
|
|
unsigned long subdirs;
|
|
/* children rbtree starts here and goes through kn->rb */
|
|
struct rb_root children;
|
|
|
|
/*
|
|
* The kernfs hierarchy this directory belongs to. This fits
|
|
* better directly in kernfs_node but is here to save space.
|
|
*/
|
|
struct kernfs_root *root;
|
|
};
|
|
|
|
struct kernfs_elem_symlink {
|
|
struct kernfs_node *target_kn;
|
|
};
|
|
|
|
struct kernfs_elem_attr {
|
|
const struct kernfs_ops *ops;
|
|
struct kernfs_open_node *open;
|
|
loff_t size;
|
|
struct kernfs_node *notify_next; /* for kernfs_notify() */
|
|
};
|
|
|
|
/*
|
|
* kernfs_node - the building block of kernfs hierarchy. Each and every
|
|
* kernfs node is represented by single kernfs_node. Most fields are
|
|
* private to kernfs and shouldn't be accessed directly by kernfs users.
|
|
*
|
|
* As long as s_count reference is held, the kernfs_node itself is
|
|
* accessible. Dereferencing elem or any other outer entity requires
|
|
* active reference.
|
|
*/
|
|
struct kernfs_node {
|
|
atomic_t count;
|
|
atomic_t active;
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
struct lockdep_map dep_map;
|
|
#endif
|
|
/*
|
|
* Use kernfs_get_parent() and kernfs_name/path() instead of
|
|
* accessing the following two fields directly. If the node is
|
|
* never moved to a different parent, it is safe to access the
|
|
* parent directly.
|
|
*/
|
|
struct kernfs_node *parent;
|
|
const char *name;
|
|
|
|
struct rb_node rb;
|
|
|
|
const void *ns; /* namespace tag */
|
|
unsigned int hash; /* ns + name hash */
|
|
union {
|
|
struct kernfs_elem_dir dir;
|
|
struct kernfs_elem_symlink symlink;
|
|
struct kernfs_elem_attr attr;
|
|
};
|
|
|
|
void *priv;
|
|
|
|
unsigned short flags;
|
|
umode_t mode;
|
|
unsigned int ino;
|
|
struct kernfs_iattrs *iattr;
|
|
};
|
|
|
|
/*
|
|
* kernfs_syscall_ops may be specified on kernfs_create_root() to support
|
|
* syscalls. These optional callbacks are invoked on the matching syscalls
|
|
* and can perform any kernfs operations which don't necessarily have to be
|
|
* the exact operation requested. An active reference is held for each
|
|
* kernfs_node parameter.
|
|
*/
|
|
struct kernfs_syscall_ops {
|
|
int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
|
|
int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
|
|
|
|
int (*mkdir)(struct kernfs_node *parent, const char *name,
|
|
umode_t mode);
|
|
int (*rmdir)(struct kernfs_node *kn);
|
|
int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
|
|
const char *new_name);
|
|
};
|
|
|
|
struct kernfs_root {
|
|
/* published fields */
|
|
struct kernfs_node *kn;
|
|
unsigned int flags; /* KERNFS_ROOT_* flags */
|
|
|
|
/* private fields, do not use outside kernfs proper */
|
|
struct ida ino_ida;
|
|
struct kernfs_syscall_ops *syscall_ops;
|
|
|
|
/* list of kernfs_super_info of this root, protected by kernfs_mutex */
|
|
struct list_head supers;
|
|
|
|
wait_queue_head_t deactivate_waitq;
|
|
};
|
|
|
|
struct kernfs_open_file {
|
|
/* published fields */
|
|
struct kernfs_node *kn;
|
|
struct file *file;
|
|
void *priv;
|
|
|
|
/* private fields, do not use outside kernfs proper */
|
|
struct mutex mutex;
|
|
int event;
|
|
struct list_head list;
|
|
|
|
size_t atomic_write_len;
|
|
bool mmapped;
|
|
const struct vm_operations_struct *vm_ops;
|
|
};
|
|
|
|
struct kernfs_ops {
|
|
/*
|
|
* Read is handled by either seq_file or raw_read().
|
|
*
|
|
* If seq_show() is present, seq_file path is active. Other seq
|
|
* operations are optional and if not implemented, the behavior is
|
|
* equivalent to single_open(). @sf->private points to the
|
|
* associated kernfs_open_file.
|
|
*
|
|
* read() is bounced through kernel buffer and a read larger than
|
|
* PAGE_SIZE results in partial operation of PAGE_SIZE.
|
|
*/
|
|
int (*seq_show)(struct seq_file *sf, void *v);
|
|
|
|
void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
|
|
void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
|
|
void (*seq_stop)(struct seq_file *sf, void *v);
|
|
|
|
ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes,
|
|
loff_t off);
|
|
|
|
/*
|
|
* write() is bounced through kernel buffer. If atomic_write_len
|
|
* is not set, a write larger than PAGE_SIZE results in partial
|
|
* operations of PAGE_SIZE chunks. If atomic_write_len is set,
|
|
* writes upto the specified size are executed atomically but
|
|
* larger ones are rejected with -E2BIG.
|
|
*/
|
|
size_t atomic_write_len;
|
|
ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
|
|
loff_t off);
|
|
|
|
int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
struct lock_class_key lockdep_key;
|
|
#endif
|
|
};
|
|
|
|
#ifdef CONFIG_KERNFS
|
|
|
|
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
|
|
{
|
|
return kn->flags & KERNFS_TYPE_MASK;
|
|
}
|
|
|
|
/**
|
|
* kernfs_enable_ns - enable namespace under a directory
|
|
* @kn: directory of interest, should be empty
|
|
*
|
|
* This is to be called right after @kn is created to enable namespace
|
|
* under it. All children of @kn must have non-NULL namespace tags and
|
|
* only the ones which match the super_block's tag will be visible.
|
|
*/
|
|
static inline void kernfs_enable_ns(struct kernfs_node *kn)
|
|
{
|
|
WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
|
|
WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children));
|
|
kn->flags |= KERNFS_NS;
|
|
}
|
|
|
|
/**
|
|
* kernfs_ns_enabled - test whether namespace is enabled
|
|
* @kn: the node to test
|
|
*
|
|
* Test whether namespace filtering is enabled for the children of @ns.
|
|
*/
|
|
static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
|
|
{
|
|
return kn->flags & KERNFS_NS;
|
|
}
|
|
|
|
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
|
|
char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
|
|
size_t buflen);
|
|
void pr_cont_kernfs_name(struct kernfs_node *kn);
|
|
void pr_cont_kernfs_path(struct kernfs_node *kn);
|
|
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
|
|
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
|
|
const char *name, const void *ns);
|
|
void kernfs_get(struct kernfs_node *kn);
|
|
void kernfs_put(struct kernfs_node *kn);
|
|
|
|
struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
|
|
struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
|
|
|
|
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
|
|
unsigned int flags, void *priv);
|
|
void kernfs_destroy_root(struct kernfs_root *root);
|
|
|
|
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
|
|
const char *name, umode_t mode,
|
|
void *priv, const void *ns);
|
|
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
|
|
const char *name,
|
|
umode_t mode, loff_t size,
|
|
const struct kernfs_ops *ops,
|
|
void *priv, const void *ns,
|
|
bool name_is_static,
|
|
struct lock_class_key *key);
|
|
struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
|
|
const char *name,
|
|
struct kernfs_node *target);
|
|
void kernfs_activate(struct kernfs_node *kn);
|
|
void kernfs_remove(struct kernfs_node *kn);
|
|
void kernfs_break_active_protection(struct kernfs_node *kn);
|
|
void kernfs_unbreak_active_protection(struct kernfs_node *kn);
|
|
bool kernfs_remove_self(struct kernfs_node *kn);
|
|
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
|
|
const void *ns);
|
|
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
|
|
const char *new_name, const void *new_ns);
|
|
int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
|
|
void kernfs_notify(struct kernfs_node *kn);
|
|
|
|
const void *kernfs_super_ns(struct super_block *sb);
|
|
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
|
|
struct kernfs_root *root, unsigned long magic,
|
|
bool *new_sb_created, const void *ns);
|
|
void kernfs_kill_sb(struct super_block *sb);
|
|
|
|
void kernfs_init(void);
|
|
|
|
#else /* CONFIG_KERNFS */
|
|
|
|
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
|
|
{ return 0; } /* whatever */
|
|
|
|
static inline void kernfs_enable_ns(struct kernfs_node *kn) { }
|
|
|
|
static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
|
|
{ return false; }
|
|
|
|
static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
|
|
{ return -ENOSYS; }
|
|
|
|
static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
|
|
size_t buflen)
|
|
{ return NULL; }
|
|
|
|
static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
|
|
static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }
|
|
|
|
static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
|
|
{ return NULL; }
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
|
|
const void *ns)
|
|
{ return NULL; }
|
|
|
|
static inline void kernfs_get(struct kernfs_node *kn) { }
|
|
static inline void kernfs_put(struct kernfs_node *kn) { }
|
|
|
|
static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
|
|
{ return NULL; }
|
|
|
|
static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
|
|
{ return NULL; }
|
|
|
|
static inline struct kernfs_root *
|
|
kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags,
|
|
void *priv)
|
|
{ return ERR_PTR(-ENOSYS); }
|
|
|
|
static inline void kernfs_destroy_root(struct kernfs_root *root) { }
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
|
|
umode_t mode, void *priv, const void *ns)
|
|
{ return ERR_PTR(-ENOSYS); }
|
|
|
|
static inline struct kernfs_node *
|
|
__kernfs_create_file(struct kernfs_node *parent, const char *name,
|
|
umode_t mode, loff_t size, const struct kernfs_ops *ops,
|
|
void *priv, const void *ns, bool name_is_static,
|
|
struct lock_class_key *key)
|
|
{ return ERR_PTR(-ENOSYS); }
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_create_link(struct kernfs_node *parent, const char *name,
|
|
struct kernfs_node *target)
|
|
{ return ERR_PTR(-ENOSYS); }
|
|
|
|
static inline void kernfs_activate(struct kernfs_node *kn) { }
|
|
|
|
static inline void kernfs_remove(struct kernfs_node *kn) { }
|
|
|
|
static inline bool kernfs_remove_self(struct kernfs_node *kn)
|
|
{ return false; }
|
|
|
|
static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn,
|
|
const char *name, const void *ns)
|
|
{ return -ENOSYS; }
|
|
|
|
static inline int kernfs_rename_ns(struct kernfs_node *kn,
|
|
struct kernfs_node *new_parent,
|
|
const char *new_name, const void *new_ns)
|
|
{ return -ENOSYS; }
|
|
|
|
static inline int kernfs_setattr(struct kernfs_node *kn,
|
|
const struct iattr *iattr)
|
|
{ return -ENOSYS; }
|
|
|
|
static inline void kernfs_notify(struct kernfs_node *kn) { }
|
|
|
|
static inline const void *kernfs_super_ns(struct super_block *sb)
|
|
{ return NULL; }
|
|
|
|
static inline struct dentry *
|
|
kernfs_mount_ns(struct file_system_type *fs_type, int flags,
|
|
struct kernfs_root *root, unsigned long magic,
|
|
bool *new_sb_created, const void *ns)
|
|
{ return ERR_PTR(-ENOSYS); }
|
|
|
|
static inline void kernfs_kill_sb(struct super_block *sb) { }
|
|
|
|
static inline void kernfs_init(void) { }
|
|
|
|
#endif /* CONFIG_KERNFS */
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_find_and_get(struct kernfs_node *kn, const char *name)
|
|
{
|
|
return kernfs_find_and_get_ns(kn, name, NULL);
|
|
}
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
|
|
void *priv)
|
|
{
|
|
return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
|
|
}
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
|
|
umode_t mode, loff_t size, const struct kernfs_ops *ops,
|
|
void *priv, const void *ns)
|
|
{
|
|
struct lock_class_key *key = NULL;
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
key = (struct lock_class_key *)&ops->lockdep_key;
|
|
#endif
|
|
return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
|
|
false, key);
|
|
}
|
|
|
|
static inline struct kernfs_node *
|
|
kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
|
|
loff_t size, const struct kernfs_ops *ops, void *priv)
|
|
{
|
|
return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
|
|
}
|
|
|
|
static inline int kernfs_remove_by_name(struct kernfs_node *parent,
|
|
const char *name)
|
|
{
|
|
return kernfs_remove_by_name_ns(parent, name, NULL);
|
|
}
|
|
|
|
static inline int kernfs_rename(struct kernfs_node *kn,
|
|
struct kernfs_node *new_parent,
|
|
const char *new_name)
|
|
{
|
|
return kernfs_rename_ns(kn, new_parent, new_name, NULL);
|
|
}
|
|
|
|
static inline struct dentry *
|
|
kernfs_mount(struct file_system_type *fs_type, int flags,
|
|
struct kernfs_root *root, unsigned long magic,
|
|
bool *new_sb_created)
|
|
{
|
|
return kernfs_mount_ns(fs_type, flags, root,
|
|
magic, new_sb_created, NULL);
|
|
}
|
|
|
|
#endif /* __LINUX_KERNFS_H */
|