2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Directory notifications for Linux.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2000,2001,2002 Stephen Rothwell
|
|
|
|
*
|
2009-05-22 04:01:33 +07:00
|
|
|
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
|
|
|
|
* dnotify was largly rewritten to use the new fsnotify infrastructure
|
|
|
|
*
|
2005-04-17 05:20:36 +07:00
|
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2, or (at your option) any
|
|
|
|
* later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/sched.h>
|
2017-07-17 10:05:57 +07:00
|
|
|
#include <linux/sched/signal.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/dnotify.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/slab.h>
|
2008-04-24 18:44:08 +07:00
|
|
|
#include <linux/fdtable.h>
|
2009-05-22 04:01:33 +07:00
|
|
|
#include <linux/fsnotify_backend.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-03-26 16:37:24 +07:00
|
|
|
int dir_notify_enable __read_mostly = 1;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
static struct kmem_cache *dnotify_struct_cache __read_mostly;
|
2009-12-18 09:24:24 +07:00
|
|
|
static struct kmem_cache *dnotify_mark_cache __read_mostly;
|
2009-05-22 04:01:33 +07:00
|
|
|
static struct fsnotify_group *dnotify_group __read_mostly;
|
|
|
|
|
|
|
|
/*
|
2009-12-18 09:24:24 +07:00
|
|
|
* dnotify will attach one of these to each inode (i_fsnotify_marks) which
|
2009-05-22 04:01:33 +07:00
|
|
|
* is being watched by dnotify. If multiple userspace applications are watching
|
|
|
|
* the same directory with dnotify their information is chained in dn
|
|
|
|
*/
|
2009-12-18 09:24:24 +07:00
|
|
|
struct dnotify_mark {
|
|
|
|
struct fsnotify_mark fsn_mark;
|
2009-05-22 04:01:33 +07:00
|
|
|
struct dnotify_struct *dn;
|
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/*
|
|
|
|
* When a process starts or stops watching an inode the set of events which
|
|
|
|
* dnotify cares about for that inode may change. This function runs the
|
|
|
|
* list of everything receiving dnotify events about this directory and calculates
|
|
|
|
* the set of all those events. After it updates what dnotify is interested in
|
|
|
|
* it calls the fsnotify function so it can update the set of all events relevant
|
|
|
|
* to this inode.
|
|
|
|
*/
|
2009-12-18 09:24:24 +07:00
|
|
|
static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2016-12-21 22:03:59 +07:00
|
|
|
__u32 new_mask = 0;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct dnotify_struct *dn;
|
2009-12-18 09:24:24 +07:00
|
|
|
struct dnotify_mark *dn_mark = container_of(fsn_mark,
|
|
|
|
struct dnotify_mark,
|
|
|
|
fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
assert_spin_locked(&fsn_mark->lock);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
|
2009-05-22 04:01:33 +07:00
|
|
|
new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
|
2016-12-21 22:03:59 +07:00
|
|
|
if (fsn_mark->mask == new_mask)
|
2009-05-22 04:01:33 +07:00
|
|
|
return;
|
2016-12-21 22:03:59 +07:00
|
|
|
fsn_mark->mask = new_mask;
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2017-03-15 15:16:27 +07:00
|
|
|
fsnotify_recalc_mask(fsn_mark->connector);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/*
|
|
|
|
* Mains fsnotify call where events are delivered to dnotify.
|
|
|
|
* Find the dnotify mark on the relevant inode, run the list of dnotify structs
|
|
|
|
* on that mark and determine which of them has expressed interest in receiving
|
|
|
|
* events of this type. When found send the correct process and signal and
|
|
|
|
* destroy the dnotify struct if it was not registered to receive multiple
|
|
|
|
* events.
|
|
|
|
*/
|
|
|
|
static int dnotify_handle_event(struct fsnotify_group *group,
|
2014-01-22 06:48:14 +07:00
|
|
|
struct inode *inode,
|
2016-11-21 08:19:09 +07:00
|
|
|
u32 mask, const void *data, int data_type,
|
2019-04-27 00:51:03 +07:00
|
|
|
const struct qstr *file_name, u32 cookie,
|
2016-11-10 23:51:50 +07:00
|
|
|
struct fsnotify_iter_info *iter_info)
|
2009-05-22 04:01:33 +07:00
|
|
|
{
|
2018-04-21 06:10:50 +07:00
|
|
|
struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
|
2009-12-18 09:24:24 +07:00
|
|
|
struct dnotify_mark *dn_mark;
|
2009-05-22 04:01:33 +07:00
|
|
|
struct dnotify_struct *dn;
|
|
|
|
struct dnotify_struct **prev;
|
|
|
|
struct fown_struct *fown;
|
2014-01-22 06:48:14 +07:00
|
|
|
__u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2014-01-22 06:48:15 +07:00
|
|
|
/* not a dir, dnotify doesn't care */
|
|
|
|
if (!S_ISDIR(inode->i_mode))
|
|
|
|
return 0;
|
|
|
|
|
2018-04-21 06:10:50 +07:00
|
|
|
if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
|
|
|
|
return 0;
|
2010-07-28 21:18:39 +07:00
|
|
|
|
|
|
|
dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2010-07-28 21:18:39 +07:00
|
|
|
spin_lock(&inode_mark->lock);
|
2009-12-18 09:24:24 +07:00
|
|
|
prev = &dn_mark->dn;
|
2009-05-22 04:01:33 +07:00
|
|
|
while ((dn = *prev) != NULL) {
|
dnotify: ignore FS_EVENT_ON_CHILD
Mask off FS_EVENT_ON_CHILD in dnotify_handle_event(). Otherwise, when there
is more than one watch on a directory and dnotify_should_send_event()
succeeds, events with FS_EVENT_ON_CHILD set will trigger all watches and cause
spurious events.
This case was overlooked in commit e42e2773.
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
static void create_event(int s, siginfo_t* si, void* p)
{
printf("create\n");
}
static void delete_event(int s, siginfo_t* si, void* p)
{
printf("delete\n");
}
int main (void) {
struct sigaction action;
char *tmpdir, *file;
int fd1, fd2;
sigemptyset (&action.sa_mask);
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = create_event;
sigaction (SIGRTMIN + 0, &action, NULL);
action.sa_sigaction = delete_event;
sigaction (SIGRTMIN + 1, &action, NULL);
# define TMPDIR "/tmp/test.XXXXXX"
tmpdir = malloc(strlen(TMPDIR) + 1);
strcpy(tmpdir, TMPDIR);
mkdtemp(tmpdir);
# define TMPFILE "/file"
file = malloc(strlen(tmpdir) + strlen(TMPFILE) + 1);
sprintf(file, "%s/%s", tmpdir, TMPFILE);
fd1 = open (tmpdir, O_RDONLY);
fcntl(fd1, F_SETSIG, SIGRTMIN);
fcntl(fd1, F_NOTIFY, DN_MULTISHOT | DN_CREATE);
fd2 = open (tmpdir, O_RDONLY);
fcntl(fd2, F_SETSIG, SIGRTMIN + 1);
fcntl(fd2, F_NOTIFY, DN_MULTISHOT | DN_DELETE);
if (fork()) {
/* This triggers a create event */
creat(file, 0600);
/* This triggers a create and delete event (!) */
unlink(file);
} else {
sleep(1);
rmdir(tmpdir);
}
return 0;
}
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
2009-10-15 05:13:23 +07:00
|
|
|
if ((dn->dn_mask & test_mask) == 0) {
|
2009-05-22 04:01:33 +07:00
|
|
|
prev = &dn->dn_next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
fown = &dn->dn_filp->f_owner;
|
|
|
|
send_sigio(fown, dn->dn_fd, POLL_MSG);
|
|
|
|
if (dn->dn_mask & FS_DN_MULTISHOT)
|
|
|
|
prev = &dn->dn_next;
|
|
|
|
else {
|
|
|
|
*prev = dn->dn_next;
|
|
|
|
kmem_cache_free(dnotify_struct_cache, dn);
|
2010-07-28 21:18:39 +07:00
|
|
|
dnotify_recalc_inode_mask(inode_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-07-28 21:18:39 +07:00
|
|
|
spin_unlock(&inode_mark->lock);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
|
2009-05-22 04:01:33 +07:00
|
|
|
{
|
2009-12-18 09:24:24 +07:00
|
|
|
struct dnotify_mark *dn_mark = container_of(fsn_mark,
|
|
|
|
struct dnotify_mark,
|
|
|
|
fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
BUG_ON(dn_mark->dn);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
kmem_cache_free(dnotify_mark_cache, dn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
}
|
|
|
|
|
2017-08-30 19:39:02 +07:00
|
|
|
static const struct fsnotify_ops dnotify_fsnotify_ops = {
|
2009-05-22 04:01:33 +07:00
|
|
|
.handle_event = dnotify_handle_event,
|
2016-12-22 00:06:12 +07:00
|
|
|
.free_mark = dnotify_free_mark,
|
2009-05-22 04:01:33 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Called every time a file is closed. Looks first for a dnotify mark on the
|
2009-12-18 09:24:24 +07:00
|
|
|
* inode. If one is found run all of the ->dn structures attached to that
|
2009-05-22 04:01:33 +07:00
|
|
|
* mark for one relevant to this process closing the file and remove that
|
|
|
|
* dnotify_struct. If that was the last dnotify_struct also remove the
|
2009-12-18 09:24:24 +07:00
|
|
|
* fsnotify_mark.
|
2009-05-22 04:01:33 +07:00
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
void dnotify_flush(struct file *filp, fl_owner_t id)
|
|
|
|
{
|
2009-12-18 09:24:24 +07:00
|
|
|
struct fsnotify_mark *fsn_mark;
|
|
|
|
struct dnotify_mark *dn_mark;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct dnotify_struct *dn;
|
|
|
|
struct dnotify_struct **prev;
|
|
|
|
struct inode *inode;
|
2015-09-05 05:43:12 +07:00
|
|
|
bool free = false;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-01-24 05:07:38 +07:00
|
|
|
inode = file_inode(filp);
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!S_ISDIR(inode->i_mode))
|
|
|
|
return;
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2016-12-21 22:28:45 +07:00
|
|
|
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
|
2009-12-18 09:24:24 +07:00
|
|
|
if (!fsn_mark)
|
2009-05-22 04:01:33 +07:00
|
|
|
return;
|
2009-12-18 09:24:24 +07:00
|
|
|
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2013-07-09 05:59:44 +07:00
|
|
|
mutex_lock(&dnotify_group->mark_mutex);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
spin_lock(&fsn_mark->lock);
|
|
|
|
prev = &dn_mark->dn;
|
2005-04-17 05:20:36 +07:00
|
|
|
while ((dn = *prev) != NULL) {
|
|
|
|
if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
|
|
|
|
*prev = dn->dn_next;
|
2009-05-22 04:01:33 +07:00
|
|
|
kmem_cache_free(dnotify_struct_cache, dn);
|
2009-12-18 09:24:24 +07:00
|
|
|
dnotify_recalc_inode_mask(fsn_mark);
|
2005-04-17 05:20:36 +07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
prev = &dn->dn_next;
|
|
|
|
}
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
spin_unlock(&fsn_mark->lock);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2013-07-09 05:59:44 +07:00
|
|
|
/* nothing else could have found us thanks to the dnotify_groups
|
|
|
|
mark_mutex */
|
2015-09-05 05:43:12 +07:00
|
|
|
if (dn_mark->dn == NULL) {
|
|
|
|
fsnotify_detach_mark(fsn_mark);
|
|
|
|
free = true;
|
|
|
|
}
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2013-07-09 05:59:44 +07:00
|
|
|
mutex_unlock(&dnotify_group->mark_mutex);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2015-09-05 05:43:12 +07:00
|
|
|
if (free)
|
|
|
|
fsnotify_free_mark(fsn_mark);
|
2009-12-18 09:24:24 +07:00
|
|
|
fsnotify_put_mark(fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* this conversion is done only at watch creation */
|
|
|
|
static __u32 convert_arg(unsigned long arg)
|
|
|
|
{
|
|
|
|
__u32 new_mask = FS_EVENT_ON_CHILD;
|
|
|
|
|
|
|
|
if (arg & DN_MULTISHOT)
|
|
|
|
new_mask |= FS_DN_MULTISHOT;
|
|
|
|
if (arg & DN_DELETE)
|
|
|
|
new_mask |= (FS_DELETE | FS_MOVED_FROM);
|
|
|
|
if (arg & DN_MODIFY)
|
|
|
|
new_mask |= FS_MODIFY;
|
|
|
|
if (arg & DN_ACCESS)
|
|
|
|
new_mask |= FS_ACCESS;
|
|
|
|
if (arg & DN_ATTRIB)
|
|
|
|
new_mask |= FS_ATTRIB;
|
|
|
|
if (arg & DN_RENAME)
|
|
|
|
new_mask |= FS_DN_RENAME;
|
|
|
|
if (arg & DN_CREATE)
|
|
|
|
new_mask |= (FS_CREATE | FS_MOVED_TO);
|
|
|
|
|
|
|
|
return new_mask;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/*
|
|
|
|
* If multiple processes watch the same inode with dnotify there is only one
|
2009-12-18 09:24:24 +07:00
|
|
|
* dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
|
2009-05-22 04:01:33 +07:00
|
|
|
* onto that mark. This function either attaches the new dnotify_struct onto
|
|
|
|
* that list, or it |= the mask onto an existing dnofiy_struct.
|
|
|
|
*/
|
2009-12-18 09:24:24 +07:00
|
|
|
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
|
2009-05-22 04:01:33 +07:00
|
|
|
fl_owner_t id, int fd, struct file *filp, __u32 mask)
|
|
|
|
{
|
|
|
|
struct dnotify_struct *odn;
|
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
odn = dn_mark->dn;
|
2009-05-22 04:01:33 +07:00
|
|
|
while (odn != NULL) {
|
|
|
|
/* adding more events to existing dnofiy_struct? */
|
|
|
|
if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
|
|
|
|
odn->dn_fd = fd;
|
|
|
|
odn->dn_mask |= mask;
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
odn = odn->dn_next;
|
|
|
|
}
|
|
|
|
|
|
|
|
dn->dn_mask = mask;
|
|
|
|
dn->dn_fd = fd;
|
|
|
|
dn->dn_filp = filp;
|
|
|
|
dn->dn_owner = id;
|
2009-12-18 09:24:24 +07:00
|
|
|
dn->dn_next = dn_mark->dn;
|
|
|
|
dn_mark->dn = dn;
|
2009-05-22 04:01:33 +07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a process calls fcntl to attach a dnotify watch to a directory it ends
|
|
|
|
* up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be
|
|
|
|
* attached to the fsnotify_mark.
|
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
|
|
|
|
{
|
2009-12-18 09:24:24 +07:00
|
|
|
struct dnotify_mark *new_dn_mark, *dn_mark;
|
|
|
|
struct fsnotify_mark *new_fsn_mark, *fsn_mark;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct dnotify_struct *dn;
|
|
|
|
struct inode *inode;
|
|
|
|
fl_owner_t id = current->files;
|
2008-05-01 09:52:22 +07:00
|
|
|
struct file *f;
|
2009-05-22 04:01:33 +07:00
|
|
|
int destroy = 0, error = 0;
|
|
|
|
__u32 mask;
|
|
|
|
|
|
|
|
/* we use these to tell if we need to kfree */
|
2009-12-18 09:24:24 +07:00
|
|
|
new_fsn_mark = NULL;
|
2009-05-22 04:01:33 +07:00
|
|
|
dn = NULL;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
if (!dir_notify_enable) {
|
|
|
|
error = -EINVAL;
|
|
|
|
goto out_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* a 0 mask means we are explicitly removing the watch */
|
2005-04-17 05:20:36 +07:00
|
|
|
if ((arg & ~DN_MULTISHOT) == 0) {
|
|
|
|
dnotify_flush(filp, id);
|
2009-05-22 04:01:33 +07:00
|
|
|
error = 0;
|
|
|
|
goto out_err;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2009-05-22 04:01:33 +07:00
|
|
|
|
|
|
|
/* dnotify only works on directories */
|
2013-01-24 05:07:38 +07:00
|
|
|
inode = file_inode(filp);
|
2009-05-22 04:01:33 +07:00
|
|
|
if (!S_ISDIR(inode->i_mode)) {
|
|
|
|
error = -ENOTDIR;
|
|
|
|
goto out_err;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/* expect most fcntl to add new rather than augment old */
|
|
|
|
dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
|
|
|
|
if (!dn) {
|
|
|
|
error = -ENOMEM;
|
|
|
|
goto out_err;
|
|
|
|
}
|
2008-05-01 09:52:22 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
|
2009-12-18 09:24:24 +07:00
|
|
|
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
|
|
|
|
if (!new_dn_mark) {
|
2009-05-22 04:01:33 +07:00
|
|
|
error = -ENOMEM;
|
|
|
|
goto out_err;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
|
|
|
|
mask = convert_arg(arg);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
/* set up the new_fsn_mark and new_dn_mark */
|
|
|
|
new_fsn_mark = &new_dn_mark->fsn_mark;
|
2016-12-22 00:06:12 +07:00
|
|
|
fsnotify_init_mark(new_fsn_mark, dnotify_group);
|
2009-12-18 09:24:24 +07:00
|
|
|
new_fsn_mark->mask = mask;
|
|
|
|
new_dn_mark->dn = NULL;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/* this is needed to prevent the fcntl/close race described below */
|
2013-07-09 05:59:44 +07:00
|
|
|
mutex_lock(&dnotify_group->mark_mutex);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
/* add the new_fsn_mark or find an old one. */
|
2016-12-21 22:28:45 +07:00
|
|
|
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
|
2009-12-18 09:24:24 +07:00
|
|
|
if (fsn_mark) {
|
|
|
|
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
|
|
|
|
spin_lock(&fsn_mark->lock);
|
2009-05-22 04:01:33 +07:00
|
|
|
} else {
|
2018-04-21 06:10:55 +07:00
|
|
|
error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0);
|
2017-10-31 15:53:28 +07:00
|
|
|
if (error) {
|
|
|
|
mutex_unlock(&dnotify_group->mark_mutex);
|
|
|
|
goto out_err;
|
|
|
|
}
|
2009-12-18 09:24:24 +07:00
|
|
|
spin_lock(&new_fsn_mark->lock);
|
|
|
|
fsn_mark = new_fsn_mark;
|
|
|
|
dn_mark = new_dn_mark;
|
|
|
|
/* we used new_fsn_mark, so don't free it */
|
|
|
|
new_fsn_mark = NULL;
|
2009-05-22 04:01:33 +07:00
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
rcu_read_lock();
|
|
|
|
f = fcheck(fd);
|
|
|
|
rcu_read_unlock();
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-05-22 04:01:33 +07:00
|
|
|
/* if (f != filp) means that we lost a race and another task/thread
|
|
|
|
* actually closed the fd we are still playing with before we grabbed
|
2013-07-09 05:59:44 +07:00
|
|
|
* the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the
|
|
|
|
* fd is the only time we clean up the marks we need to get our mark
|
|
|
|
* off the list. */
|
2009-05-22 04:01:33 +07:00
|
|
|
if (f != filp) {
|
|
|
|
/* if we added ourselves, shoot ourselves, it's possible that
|
2009-12-18 09:24:24 +07:00
|
|
|
* the flush actually did shoot this fsn_mark. That's fine too
|
2009-05-22 04:01:33 +07:00
|
|
|
* since multiple calls to destroy_mark is perfectly safe, if
|
2009-12-18 09:24:24 +07:00
|
|
|
* we found a dn_mark already attached to the inode, just sod
|
2009-05-22 04:01:33 +07:00
|
|
|
* off silently as the flush at close time dealt with it.
|
|
|
|
*/
|
2009-12-18 09:24:24 +07:00
|
|
|
if (dn_mark == new_dn_mark)
|
2009-05-22 04:01:33 +07:00
|
|
|
destroy = 1;
|
2017-10-31 15:53:28 +07:00
|
|
|
error = 0;
|
2009-05-22 04:01:33 +07:00
|
|
|
goto out;
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2017-07-17 10:05:57 +07:00
|
|
|
__f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
error = attach_dn(dn, dn_mark, id, fd, filp, mask);
|
|
|
|
/* !error means that we attached the dn to the dn_mark, so don't free it */
|
2009-05-22 04:01:33 +07:00
|
|
|
if (!error)
|
|
|
|
dn = NULL;
|
|
|
|
/* -EEXIST means that we didn't add this new dn and used an old one.
|
|
|
|
* that isn't an error (and the unused dn should be freed) */
|
|
|
|
else if (error == -EEXIST)
|
|
|
|
error = 0;
|
|
|
|
|
2009-12-18 09:24:24 +07:00
|
|
|
dnotify_recalc_inode_mask(fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
out:
|
2009-12-18 09:24:24 +07:00
|
|
|
spin_unlock(&fsn_mark->lock);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
|
|
|
if (destroy)
|
2015-09-05 05:43:12 +07:00
|
|
|
fsnotify_detach_mark(fsn_mark);
|
2013-07-09 05:59:44 +07:00
|
|
|
mutex_unlock(&dnotify_group->mark_mutex);
|
2015-09-05 05:43:12 +07:00
|
|
|
if (destroy)
|
|
|
|
fsnotify_free_mark(fsn_mark);
|
2009-12-18 09:24:24 +07:00
|
|
|
fsnotify_put_mark(fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
out_err:
|
2009-12-18 09:24:24 +07:00
|
|
|
if (new_fsn_mark)
|
|
|
|
fsnotify_put_mark(new_fsn_mark);
|
2009-05-22 04:01:33 +07:00
|
|
|
if (dn)
|
|
|
|
kmem_cache_free(dnotify_struct_cache, dn);
|
|
|
|
return error;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init dnotify_init(void)
|
|
|
|
{
|
fs: fsnotify: account fsnotify metadata to kmemcg
Patch series "Directed kmem charging", v8.
The Linux kernel's memory cgroup allows limiting the memory usage of the
jobs running on the system to provide isolation between the jobs. All
the kernel memory allocated in the context of the job and marked with
__GFP_ACCOUNT will also be included in the memory usage and be limited
by the job's limit.
The kernel memory can only be charged to the memcg of the process in
whose context kernel memory was allocated. However there are cases
where the allocated kernel memory should be charged to the memcg
different from the current processes's memcg. This patch series
contains two such concrete use-cases i.e. fsnotify and buffer_head.
The fsnotify event objects can consume a lot of system memory for large
or unlimited queues if there is either no or slow listener. The events
are allocated in the context of the event producer. However they should
be charged to the event consumer. Similarly the buffer_head objects can
be allocated in a memcg different from the memcg of the page for which
buffer_head objects are being allocated.
To solve this issue, this patch series introduces mechanism to charge
kernel memory to a given memcg. In case of fsnotify events, the memcg
of the consumer can be used for charging and for buffer_head, the memcg
of the page can be charged. For directed charging, the caller can use
the scope API memalloc_[un]use_memcg() to specify the memcg to charge
for all the __GFP_ACCOUNT allocations within the scope.
This patch (of 2):
A lot of memory can be consumed by the events generated for the huge or
unlimited queues if there is either no or slow listener. This can cause
system level memory pressure or OOMs. So, it's better to account the
fsnotify kmem caches to the memcg of the listener.
However the listener can be in a different memcg than the memcg of the
producer and these allocations happen in the context of the event
producer. This patch introduces remote memcg charging API which the
producer can use to charge the allocations to the memcg of the listener.
There are seven fsnotify kmem caches and among them allocations from
dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
inotify_inode_mark_cachep happens in the context of syscall from the
listener. So, SLAB_ACCOUNT is enough for these caches.
The objects from fsnotify_mark_connector_cachep are not accounted as
they are small compared to the notification mark or events and it is
unclear whom to account connector to since it is shared by all events
attached to the inode.
The allocations from the event caches happen in the context of the event
producer. For such caches we will need to remote charge the allocations
to the listener's memcg. Thus we save the memcg reference in the
fsnotify_group structure of the listener.
This patch has also moved the members of fsnotify_group to keep the size
same, at least for 64 bit build, even with additional member by filling
the holes.
[shakeelb@google.com: use GFP_KERNEL_ACCOUNT rather than open-coding it]
Link: http://lkml.kernel.org/r/20180702215439.211597-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/20180627191250.209150-2-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-18 05:46:39 +07:00
|
|
|
dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
|
|
|
|
SLAB_PANIC|SLAB_ACCOUNT);
|
|
|
|
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
|
2009-05-22 04:01:33 +07:00
|
|
|
|
2009-12-18 09:24:22 +07:00
|
|
|
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
|
2009-05-22 04:01:33 +07:00
|
|
|
if (IS_ERR(dnotify_group))
|
|
|
|
panic("unable to allocate fsnotify group for dnotify\n");
|
2005-04-17 05:20:36 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(dnotify_init)
|