From a24cd490739586a7d2da3549a1844e1d7c4f4fc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2018 23:50:31 -0400 Subject: [PATCH 1/6] hypfs_kill_super(): deal with failed allocations hypfs_fill_super() might fail to allocate sbi; hypfs_kill_super() should not oops on that. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- arch/s390/hypfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 43bbe63e2992..06b513d192b9 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -320,7 +320,7 @@ static void hypfs_kill_super(struct super_block *sb) if (sb->s_root) hypfs_delete_tree(sb->s_root); - if (sb_info->update_file) + if (sb_info && sb_info->update_file) hypfs_remove(sb_info->update_file); kfree(sb->s_fs_info); sb->s_fs_info = NULL; From c66b23c2840446a82c389e4cb1a12eb2a71fa2e4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2018 23:56:44 -0400 Subject: [PATCH 2/6] jffs2_kill_sb(): deal with failed allocations jffs2_fill_super() might fail to allocate jffs2_sb_info; jffs2_kill_sb() must survive that. Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/jffs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f60dee7faf03..87bdf0f4cba1 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -342,7 +342,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!sb_rdonly(sb)) + if (c && !sb_rdonly(sb)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); From 659038428cb43a66e3eff71e2c845c9de3611a98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 3 Apr 2018 00:13:17 -0400 Subject: [PATCH 3/6] orangefs_kill_sb(): deal with allocation failures orangefs_fill_sb() might've failed to allocate ORANGEFS_SB(s); don't oops in that case. Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/orangefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 3ae5fdba0225..10796d3fe27d 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -579,6 +579,11 @@ void orangefs_kill_sb(struct super_block *sb) /* provided sb cleanup */ kill_anon_super(sb); + if (!ORANGEFS_SB(sb)) { + mutex_lock(&orangefs_request_mutex); + mutex_unlock(&orangefs_request_mutex); + return; + } /* * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock From 4a3877c4cedd95543f8726b0a98743ed8db0c0fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 3 Apr 2018 01:15:46 -0400 Subject: [PATCH 4/6] rpc_pipefs: fix double-dput() if we ever hit rpc_gssd_dummy_depopulate() dentry passed to it has refcount equal to 1. __rpc_rmpipe() drops it and dput() done after that hits an already freed dentry. Cc: stable@kernel.org Signed-off-by: Al Viro --- net/sunrpc/rpc_pipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 0f08934b2cea..c81ef5e6c981 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1375,6 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) struct dentry *clnt_dir = pipe_dentry->d_parent; struct dentry *gssd_dir = clnt_dir->d_parent; + dget(pipe_dentry); __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); From 8e04944f0ea8b838399049bdcda920ab36ae3b04 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 4 Apr 2018 19:53:07 +0900 Subject: [PATCH 5/6] mm,vmscan: Allow preallocating memory for register_shrinker(). syzbot is catching so many bugs triggered by commit 9ee332d99e4d5a97 ("sget(): handle failures of register_shrinker()"). That commit expected that calling kill_sb() from deactivate_locked_super() without successful fill_super() is safe, but the reality was different; some callers assign attributes which are needed for kill_sb() after sget() succeeds. For example, [1] is a report where sb->s_mode (which seems to be either FMODE_READ | FMODE_EXCL | FMODE_WRITE or FMODE_READ | FMODE_EXCL) is not assigned unless sget() succeeds. But it does not worth complicate sget() so that register_shrinker() failure path can safely call kill_block_super() via kill_sb(). Making alloc_super() fail if memory allocation for register_shrinker() failed is much simpler. Let's avoid calling deactivate_locked_super() from sget_userns() by preallocating memory for the shrinker and making register_shrinker() in sget_userns() never fail. [1] https://syzkaller.appspot.com/bug?id=588996a25a2587be2e3a54e8646728fb9cae44e7 Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Al Viro Cc: Michal Hocko Signed-off-by: Al Viro --- fs/super.c | 9 ++++----- include/linux/shrinker.h | 7 +++++-- mm/vmscan.c | 21 ++++++++++++++++++++- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/super.c b/fs/super.c index 5fa9a8d8d865..122c402049a2 100644 --- a/fs/super.c +++ b/fs/super.c @@ -167,6 +167,7 @@ static void destroy_unused_super(struct super_block *s) security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); + free_prealloced_shrinker(&s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } @@ -252,6 +253,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_shrink.count_objects = super_cache_count; s->s_shrink.batch = 1024; s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; + if (prealloc_shrinker(&s->s_shrink)) + goto fail; return s; fail: @@ -518,11 +521,7 @@ struct super_block *sget_userns(struct file_system_type *type, hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - err = register_shrinker(&s->s_shrink); - if (err) { - deactivate_locked_super(s); - s = ERR_PTR(err); - } + register_shrinker_prepared(&s->s_shrink); return s; } diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 388ff2936a87..6794490f25b2 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -75,6 +75,9 @@ struct shrinker { #define SHRINKER_NUMA_AWARE (1 << 0) #define SHRINKER_MEMCG_AWARE (1 << 1) -extern int register_shrinker(struct shrinker *); -extern void unregister_shrinker(struct shrinker *); +extern int prealloc_shrinker(struct shrinker *shrinker); +extern void register_shrinker_prepared(struct shrinker *shrinker); +extern int register_shrinker(struct shrinker *shrinker); +extern void unregister_shrinker(struct shrinker *shrinker); +extern void free_prealloced_shrinker(struct shrinker *shrinker); #endif diff --git a/mm/vmscan.c b/mm/vmscan.c index 8b920ce3ae02..9b697323a88c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -303,7 +303,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone /* * Add a shrinker callback to be called from the vm. */ -int register_shrinker(struct shrinker *shrinker) +int prealloc_shrinker(struct shrinker *shrinker) { size_t size = sizeof(*shrinker->nr_deferred); @@ -313,10 +313,29 @@ int register_shrinker(struct shrinker *shrinker) shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); if (!shrinker->nr_deferred) return -ENOMEM; + return 0; +} +void free_prealloced_shrinker(struct shrinker *shrinker) +{ + kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; +} + +void register_shrinker_prepared(struct shrinker *shrinker) +{ down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); up_write(&shrinker_rwsem); +} + +int register_shrinker(struct shrinker *shrinker) +{ + int err = prealloc_shrinker(shrinker); + + if (err) + return err; + register_shrinker_prepared(shrinker); return 0; } EXPORT_SYMBOL(register_shrinker); From 16a34adb9392b2fe4195267475ab5b472e55292c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2018 22:03:08 -0400 Subject: [PATCH 6/6] Don't leak MNT_INTERNAL away from internal mounts We want it only for the stuff created by SB_KERNMOUNT mounts, *not* for their copies. As it is, creating a deep stack of bindings of /proc/*/ns/* somewhere in a new namespace and exiting yields a stack overflow. Cc: stable@kernel.org Reported-by: Alexander Aring Bisected-by: Kirill Tkhai Tested-by: Kirill Tkhai Tested-by: Alexander Aring Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index e398f32d7541..8634d565b858 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1089,7 +1089,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;