linux_dsm_epyc7002/fs/notify/fanotify/fanotify.c
Lino Sanfilippo 03a1cec1f1 fanotify: dont merge permission events
Boyd Yang reported a problem for the case that multiple threads of the same
thread group are waiting for a reponse for a permission event.
In this case it is possible that some of the threads are never woken up, even
if the response for the event has been received
(see http://marc.info/?l=linux-kernel&m=131822913806350&w=2).

The reason is that we are currently merging permission events if they belong to
the same thread group. But we are not prepared to wake up more than one waiter
for each event. We do

wait_event(group->fanotify_data.access_waitq, event->response ||
			atomic_read(&group->fanotify_data.bypass_perm));
and after that
  event->response = 0;

which is the reason that even if we woke up all waiters for the same event
some of them may see event->response being already set 0 again, then go back to
sleep and block forever.

With this patch we avoid that more than one thread is waiting for a response
by not merging permission events for the same thread group any more.

Reported-by: Boyd Yang <boyd.yang@gmail.com>
Signed-off-by: Lino Sanfilippo <LinoSanfilipp@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
2012-12-11 13:44:37 -05:00

235 lines
6.3 KiB
C

#include <linux/fanotify.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/wait.h>
static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
{
pr_debug("%s: old=%p new=%p\n", __func__, old, new);
if (old->to_tell == new->to_tell &&
old->data_type == new->data_type &&
old->tgid == new->tgid) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_PATH):
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* dont merge two permission events */
if ((old->mask & FAN_ALL_PERM_EVENTS) &&
(new->mask & FAN_ALL_PERM_EVENTS))
return false;
#endif
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
BUG();
};
}
return false;
}
/* and the list better be locked by something too! */
static struct fsnotify_event *fanotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *test_holder;
struct fsnotify_event *test_event = NULL;
struct fsnotify_event *new_event;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
list_for_each_entry_reverse(test_holder, list, event_list) {
if (should_merge(test_holder->event, event)) {
test_event = test_holder->event;
break;
}
}
if (!test_event)
return NULL;
fsnotify_get_event(test_event);
/* if they are exactly the same we are done */
if (test_event->mask == event->mask)
return test_event;
/*
* if the refcnt == 2 this is the only queue
* for this event and so we can update the mask
* in place.
*/
if (atomic_read(&test_event->refcnt) == 2) {
test_event->mask |= event->mask;
return test_event;
}
new_event = fsnotify_clone_event(test_event);
/* done with test_event */
fsnotify_put_event(test_event);
/* couldn't allocate memory, merge was not possible */
if (unlikely(!new_event))
return ERR_PTR(-ENOMEM);
/* build new event and replace it on the list */
new_event->mask = (test_event->mask | event->mask);
fsnotify_replace_event(test_holder, new_event);
/* we hold a reference on new_event from clone_event */
return new_event;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response_from_access(struct fsnotify_group *group,
struct fsnotify_event *event)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
wait_event(group->fanotify_data.access_waitq, event->response ||
atomic_read(&group->fanotify_data.bypass_perm));
if (!event->response) /* bypass_perm set */
return 0;
/* userspace responded, convert to something usable */
spin_lock(&event->lock);
switch (event->response) {
case FAN_ALLOW:
ret = 0;
break;
case FAN_DENY:
default:
ret = -EPERM;
}
event->response = 0;
spin_unlock(&event->lock);
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
return ret;
}
#endif
static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
struct fsnotify_event *event)
{
int ret = 0;
struct fsnotify_event *notify_event = NULL;
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
if (IS_ERR(notify_event))
return PTR_ERR(notify_event);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
/* if we merged we need to wait on the new event */
if (notify_event)
event = notify_event;
ret = fanotify_get_response_from_access(group, event);
}
#endif
if (notify_event)
fsnotify_put_event(notify_event);
return ret;
}
static bool fanotify_should_send_event(struct fsnotify_group *group,
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
struct path *path = data;
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
inode_mark, vfsmnt_mark, event_mask, data, data_type);
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
return false;
/* sorry, fanotify only gives a damn about files and dirs */
if (!S_ISREG(path->dentry->d_inode->i_mode) &&
!S_ISDIR(path->dentry->d_inode->i_mode))
return false;
if (inode_mark && vfsmnt_mark) {
marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
} else if (inode_mark) {
/*
* if the event is for a child and this inode doesn't care about
* events on the child, don't send it!
*/
if ((event_mask & FS_EVENT_ON_CHILD) &&
!(inode_mark->mask & FS_EVENT_ON_CHILD))
return false;
marks_mask = inode_mark->mask;
marks_ignored_mask = inode_mark->ignored_mask;
} else if (vfsmnt_mark) {
marks_mask = vfsmnt_mark->mask;
marks_ignored_mask = vfsmnt_mark->ignored_mask;
} else {
BUG();
}
if (S_ISDIR(path->dentry->d_inode->i_mode) &&
(marks_ignored_mask & FS_ISDIR))
return false;
if (event_mask & marks_mask & ~marks_ignored_mask)
return true;
return false;
}
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
struct user_struct *user;
user = group->fanotify_data.user;
atomic_dec(&user->fanotify_listeners);
free_uid(user);
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
.free_group_priv = fanotify_free_group_priv,
.free_event_priv = NULL,
.freeing_mark = NULL,
};