From c04011fe8cbd80af1be6e12b53193bf3846750d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:47:43 +0200 Subject: [PATCH 01/50] fs: add a vfs_fchown helper Add a helper for struct file based chown operations. To be used by the initramfs code soon. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/open.c | 29 +++++++++++++++++------------ include/linux/fs.h | 2 ++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/open.c b/fs/open.c index 6cd48a61cda3..103c66309bee 100644 --- a/fs/open.c +++ b/fs/open.c @@ -740,23 +740,28 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group AT_SYMLINK_NOFOLLOW); } +int vfs_fchown(struct file *file, uid_t user, gid_t group) +{ + int error; + + error = mnt_want_write_file(file); + if (error) + return error; + audit_file(file); + error = chown_common(&file->f_path, user, group); + mnt_drop_write_file(file); + return error; +} + int ksys_fchown(unsigned int fd, uid_t user, gid_t group) { struct fd f = fdget(fd); int error = -EBADF; - if (!f.file) - goto out; - - error = mnt_want_write_file(f.file); - if (error) - goto out_fput; - audit_file(f.file); - error = chown_common(&f.file->f_path, user, group); - mnt_drop_write_file(f.file); -out_fput: - fdput(f); -out: + if (f.file) { + error = vfs_fchown(f.file, user, group); + fdput(f); + } return error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index f5abba86107d..0ddd64ca0b45 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1744,6 +1744,8 @@ int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); +int vfs_fchown(struct file *file, uid_t user, gid_t group); + extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT From 9e96c8c0e94eea2f69a9705f5d0f51928ea26c17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:55:05 +0200 Subject: [PATCH 02/50] fs: add a vfs_fchmod helper Add a helper for struct file based chmode operations. To be used by the initramfs code soon. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/open.c | 9 +++++++-- include/linux/fs.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/open.c b/fs/open.c index 103c66309bee..75166f071d28 100644 --- a/fs/open.c +++ b/fs/open.c @@ -602,14 +602,19 @@ static int chmod_common(const struct path *path, umode_t mode) return error; } +int vfs_fchmod(struct file *file, umode_t mode) +{ + audit_file(file); + return chmod_common(&file->f_path, mode); +} + int ksys_fchmod(unsigned int fd, umode_t mode) { struct fd f = fdget(fd); int err = -EBADF; if (f.file) { - audit_file(f.file); - err = chmod_common(&f.file->f_path, mode); + err = vfs_fchmod(f.file, mode); fdput(f); } return err; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ddd64ca0b45..635086726f20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1745,6 +1745,7 @@ int vfs_mkobj(struct dentry *, umode_t, void *); int vfs_fchown(struct file *file, uid_t user, gid_t group); +int vfs_fchmod(struct file *file, umode_t mode); extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); From 881627f353754dd05a8f3e1c1941f0dbbf47a170 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 10:49:35 +0200 Subject: [PATCH 03/50] init: remove the bstat helper The only caller of the bstat function becomes cleaner and simpler when open coding the function. Signed-off-by: Christoph Hellwig Reviewed-by: NeilBrown Acked-by: Song Liu Acked-by: Linus Torvalds --- init/do_mounts.h | 10 ---------- init/do_mounts_md.c | 8 ++++---- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/init/do_mounts.h b/init/do_mounts.h index 0bb0806de4ce..7513d1c14d13 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -20,16 +20,6 @@ static inline int create_dev(char *name, dev_t dev) return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); } -static inline u32 bstat(char *name) -{ - struct kstat stat; - if (vfs_stat(name, &stat) != 0) - return 0; - if (!S_ISBLK(stat.mode)) - return 0; - return stat.rdev; -} - #ifdef CONFIG_BLK_DEV_RAM int __init rd_load_disk(int n); diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index b84031528dd4..359363e85ccd 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -138,9 +138,9 @@ static void __init md_setup_drive(void) dev = MKDEV(MD_MAJOR, minor); create_dev(name, dev); for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { + struct kstat stat; char *p; char comp_name[64]; - u32 rdev; p = strchr(devname, ','); if (p) @@ -150,9 +150,9 @@ static void __init md_setup_drive(void) if (strncmp(devname, "/dev/", 5) == 0) devname += 5; snprintf(comp_name, 63, "/dev/%s", devname); - rdev = bstat(comp_name); - if (rdev) - dev = new_decode_dev(rdev); + if (vfs_stat(comp_name, &stat) == 0 && + S_ISBLK(stat.mode)) + dev = new_decode_dev(stat.rdev); if (!dev) { printk(KERN_WARNING "md: Unknown device name: %s\n", devname); break; From 4f5b246b37e024955c0fcca0c7f5952089052d1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 16:18:59 +0200 Subject: [PATCH 04/50] md: move the early init autodetect code to drivers/md/ Just like the NFS and CIFS root code this better lives with the driver it is tightly integrated with. Signed-off-by: Christoph Hellwig Acked-by: Song Liu Acked-by: Linus Torvalds --- drivers/md/Makefile | 3 +++ init/do_mounts_md.c => drivers/md/md-autodetect.c | 15 +++++++++++++-- include/linux/raid/detect.h | 8 ++++++++ init/Makefile | 1 - init/do_mounts.c | 1 + init/do_mounts.h | 10 ---------- 6 files changed, 25 insertions(+), 13 deletions(-) rename init/do_mounts_md.c => drivers/md/md-autodetect.c (96%) diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 31840f95cd40..6d3e234dc46a 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -43,6 +43,9 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_MD_CLUSTER) += md-cluster.o obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o +ifeq ($(CONFIG_BLK_DEV_MD),y) +obj-y += md-autodetect.o +endif obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o obj-$(CONFIG_DM_UNSTRIPED) += dm-unstripe.o diff --git a/init/do_mounts_md.c b/drivers/md/md-autodetect.c similarity index 96% rename from init/do_mounts_md.c rename to drivers/md/md-autodetect.c index 359363e85ccd..fe806f7b9759 100644 --- a/init/do_mounts_md.c +++ b/drivers/md/md-autodetect.c @@ -1,10 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include #include +#include #include #include -#include "do_mounts.h" - /* * When md (and any require personalities) are compiled into the kernel * (not a module), arrays can be assembles are boot time using with AUTODETECT @@ -114,6 +119,12 @@ static int __init md_setup(char *str) return 1; } +static inline int create_dev(char *name, dev_t dev) +{ + ksys_unlink(name); + return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); +} + static void __init md_setup_drive(void) { int minor, i, ent, partitioned; diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h index 37dd3f40cd31..1f029a71c3ef 100644 --- a/include/linux/raid/detect.h +++ b/include/linux/raid/detect.h @@ -1,3 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ void md_autodetect_dev(dev_t dev); + +#ifdef CONFIG_BLK_DEV_MD +void md_run_setup(void); +#else +static inline void md_run_setup(void) +{ +} +#endif diff --git a/init/Makefile b/init/Makefile index 57499b1ff471..6bc37f64b361 100644 --- a/init/Makefile +++ b/init/Makefile @@ -18,7 +18,6 @@ obj-y += init_task.o mounts-y := do_mounts.o mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o -mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o # dependencies on generated files need to be listed explicitly $(obj)/version.o: include/generated/compile.h diff --git a/init/do_mounts.c b/init/do_mounts.c index 29d326b6c29d..1a4dfa17fb28 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "do_mounts.h" diff --git a/init/do_mounts.h b/init/do_mounts.h index 7513d1c14d13..50d6c8941e15 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -41,13 +41,3 @@ bool __init initrd_load(void); static inline bool initrd_load(void) { return false; } #endif - -#ifdef CONFIG_BLK_DEV_MD - -void md_run_setup(void); - -#else - -static inline void md_run_setup(void) {} - -#endif From d82fa81c2364a464ae585eed523496aa00909c17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 15:00:24 +0200 Subject: [PATCH 05/50] md: replace the RAID_AUTORUN ioctl with a direct function call Instead of using a spcial RAID_AUTORUN ioctl that only exists for non-modular builds and is only called from the early init code, just call the actual function directly. Signed-off-by: Christoph Hellwig Reviewed-by: NeilBrown Acked-by: Song Liu Acked-by: Linus Torvalds --- drivers/md/md-autodetect.c | 10 ++-------- drivers/md/md.c | 14 +------------- drivers/md/md.h | 3 +++ 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index fe806f7b9759..0eb746211ed5 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -9,6 +9,7 @@ #include #include #include +#include "md.h" /* * When md (and any require personalities) are compiled into the kernel @@ -285,8 +286,6 @@ __setup("md=", md_setup); static void __init autodetect_raid(void) { - int fd; - /* * Since we don't want to detect and use half a raid array, we need to * wait for the known devices to complete their probing @@ -295,12 +294,7 @@ static void __init autodetect_raid(void) printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n"); wait_for_device_probe(); - - fd = ksys_open("/dev/md0", 0, 0); - if (fd >= 0) { - ksys_ioctl(fd, RAID_AUTORUN, raid_autopart); - ksys_close(fd); - } + md_autostart_arrays(raid_autopart); } void __init md_run_setup(void) diff --git a/drivers/md/md.c b/drivers/md/md.c index f567f536b529..6e9a48da4748 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -68,10 +68,6 @@ #include "md-bitmap.h" #include "md-cluster.h" -#ifndef MODULE -static void autostart_arrays(int part); -#endif - /* pers_list is a list of registered personalities protected * by pers_lock. * pers_lock does extra service to protect accesses to @@ -7421,7 +7417,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) case GET_DISK_INFO: case HOT_ADD_DISK: case HOT_REMOVE_DISK: - case RAID_AUTORUN: case RAID_VERSION: case RESTART_ARRAY_RW: case RUN_ARRAY: @@ -7467,13 +7462,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, case RAID_VERSION: err = get_version(argp); goto out; - -#ifndef MODULE - case RAID_AUTORUN: - err = 0; - autostart_arrays(arg); - goto out; -#endif default:; } @@ -9721,7 +9709,7 @@ void md_autodetect_dev(dev_t dev) } } -static void autostart_arrays(int part) +void md_autostart_arrays(int part) { struct md_rdev *rdev; struct detected_devices_node *node_detected_dev; diff --git a/drivers/md/md.h b/drivers/md/md.h index 612814d07d35..37315a3f28e9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -800,4 +800,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio !bio->bi_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } + +void md_autostart_arrays(int part); + #endif /* _MD_MD_H */ From a1d6bc018911390274e3904bdd28240cd96ddc54 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 14:56:41 +0200 Subject: [PATCH 06/50] md: remove the autoscan partition re-read devfs is long gone, and autoscan works just fine without this these days. Signed-off-by: Christoph Hellwig Reviewed-by: NeilBrown Acked-by: Song Liu Acked-by: Linus Torvalds --- drivers/md/md-autodetect.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index 0eb746211ed5..6bc9b734eee6 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -240,16 +240,6 @@ static void __init md_setup_drive(void) err = ksys_ioctl(fd, RUN_ARRAY, 0); if (err) printk(KERN_WARNING "md: starting md%d failed\n", minor); - else { - /* reread the partition table. - * I (neilb) and not sure why this is needed, but I cannot - * boot a kernel with devfs compiled in from partitioned md - * array without it - */ - ksys_close(fd); - fd = ksys_open(name, 0, 0); - ksys_ioctl(fd, BLKRRPART, 0); - } ksys_close(fd); } } From 1a6a050620e496abf42749a2e1d3882645cc053f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 16:33:01 +0200 Subject: [PATCH 07/50] md: remove the kernel version of md_u.h mdp_major can just move to drivers/md/md.h. Signed-off-by: Christoph Hellwig Acked-by: Song Liu Acked-by: Linus Torvalds --- drivers/md/md.h | 1 + include/linux/raid/md_u.h | 13 ------------- 2 files changed, 1 insertion(+), 13 deletions(-) delete mode 100644 include/linux/raid/md_u.h diff --git a/drivers/md/md.h b/drivers/md/md.h index 37315a3f28e9..6f8fff77ce10 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -801,6 +801,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio mddev->queue->limits.max_write_zeroes_sectors = 0; } +extern int mdp_major; void md_autostart_arrays(int part); #endif /* _MD_MD_H */ diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h deleted file mode 100644 index 8dfec085a20e..000000000000 --- a/include/linux/raid/md_u.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - md_u.h : user <=> kernel API between Linux raidtools and RAID drivers - Copyright (C) 1998 Ingo Molnar - -*/ -#ifndef _MD_U_H -#define _MD_U_H - -#include - -extern int mdp_major; -#endif From d1100488c3ce11b853785cf15ee4c19bfab4c842 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 16:56:05 +0200 Subject: [PATCH 08/50] md: simplify md_setup_drive Move the loop over the possible arrays into the caller to remove a level of indentation for the whole function. Signed-off-by: Christoph Hellwig Reviewed-by: NeilBrown Acked-by: Song Liu Acked-by: Linus Torvalds --- drivers/md/md-autodetect.c | 215 ++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 108 deletions(-) diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index 6bc9b734eee6..a43a8f158058 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -27,7 +27,7 @@ static int __initdata raid_noautodetect=1; #endif static int __initdata raid_autopart; -static struct { +static struct md_setup_args { int minor; int partitioned; int level; @@ -126,122 +126,117 @@ static inline int create_dev(char *name, dev_t dev) return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); } -static void __init md_setup_drive(void) +static void __init md_setup_drive(struct md_setup_args *args) { - int minor, i, ent, partitioned; + int minor, i, partitioned; dev_t dev; dev_t devices[MD_SB_DISKS+1]; + int fd; + int err = 0; + char *devname; + mdu_disk_info_t dinfo; + char name[16]; - for (ent = 0; ent < md_setup_ents ; ent++) { - int fd; - int err = 0; - char *devname; - mdu_disk_info_t dinfo; - char name[16]; + minor = args->minor; + partitioned = args->partitioned; + devname = args->device_names; - minor = md_setup_args[ent].minor; - partitioned = md_setup_args[ent].partitioned; - devname = md_setup_args[ent].device_names; + sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor); + if (partitioned) + dev = MKDEV(mdp_major, minor << MdpMinorShift); + else + dev = MKDEV(MD_MAJOR, minor); + create_dev(name, dev); + for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { + struct kstat stat; + char *p; + char comp_name[64]; - sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor); - if (partitioned) - dev = MKDEV(mdp_major, minor << MdpMinorShift); - else - dev = MKDEV(MD_MAJOR, minor); - create_dev(name, dev); - for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { - struct kstat stat; - char *p; - char comp_name[64]; + p = strchr(devname, ','); + if (p) + *p++ = 0; - p = strchr(devname, ','); - if (p) - *p++ = 0; - - dev = name_to_dev_t(devname); - if (strncmp(devname, "/dev/", 5) == 0) - devname += 5; - snprintf(comp_name, 63, "/dev/%s", devname); - if (vfs_stat(comp_name, &stat) == 0 && - S_ISBLK(stat.mode)) - dev = new_decode_dev(stat.rdev); - if (!dev) { - printk(KERN_WARNING "md: Unknown device name: %s\n", devname); - break; - } - - devices[i] = dev; - - devname = p; - } - devices[i] = 0; - - if (!i) - continue; - - printk(KERN_INFO "md: Loading md%s%d: %s\n", - partitioned ? "_d" : "", minor, - md_setup_args[ent].device_names); - - fd = ksys_open(name, 0, 0); - if (fd < 0) { - printk(KERN_ERR "md: open failed - cannot start " - "array %s\n", name); - continue; - } - if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { - printk(KERN_WARNING - "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", - minor); - ksys_close(fd); - continue; + dev = name_to_dev_t(devname); + if (strncmp(devname, "/dev/", 5) == 0) + devname += 5; + snprintf(comp_name, 63, "/dev/%s", devname); + if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode)) + dev = new_decode_dev(stat.rdev); + if (!dev) { + printk(KERN_WARNING "md: Unknown device name: %s\n", devname); + break; } - if (md_setup_args[ent].level != LEVEL_NONE) { - /* non-persistent */ - mdu_array_info_t ainfo; - ainfo.level = md_setup_args[ent].level; - ainfo.size = 0; - ainfo.nr_disks =0; - ainfo.raid_disks =0; - while (devices[ainfo.raid_disks]) - ainfo.raid_disks++; - ainfo.md_minor =minor; - ainfo.not_persistent = 1; - - ainfo.state = (1 << MD_SB_CLEAN); - ainfo.layout = 0; - ainfo.chunk_size = md_setup_args[ent].chunk; - err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); - for (i = 0; !err && i <= MD_SB_DISKS; i++) { - dev = devices[i]; - if (!dev) - break; - dinfo.number = i; - dinfo.raid_disk = i; - dinfo.state = (1<device_names); + + fd = ksys_open(name, 0, 0); + if (fd < 0) { + printk(KERN_ERR "md: open failed - cannot start " + "array %s\n", name); + return; + } + if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { + printk(KERN_WARNING + "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", + minor); + ksys_close(fd); + return; + } + + if (args->level != LEVEL_NONE) { + /* non-persistent */ + mdu_array_info_t ainfo; + ainfo.level = args->level; + ainfo.size = 0; + ainfo.nr_disks =0; + ainfo.raid_disks =0; + while (devices[ainfo.raid_disks]) + ainfo.raid_disks++; + ainfo.md_minor =minor; + ainfo.not_persistent = 1; + + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.layout = 0; + ainfo.chunk_size = args->chunk; + err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); + for (i = 0; !err && i <= MD_SB_DISKS; i++) { + dev = devices[i]; + if (!dev) + break; + dinfo.number = i; + dinfo.raid_disk = i; + dinfo.state = (1< Date: Sun, 7 Jun 2020 17:31:19 +0200 Subject: [PATCH 09/50] md: rewrite md_setup_drive to avoid ioctls md_setup_drive knows it works with md devices, so it is rather pointless to open a file descriptor and issue ioctls. Just call directly into the relevant low-level md routines after getting a handle to the device using blkdev_get_by_dev instead. Signed-off-by: Christoph Hellwig Reviewed-by: NeilBrown Acked-by: Song Liu Acked-by: Linus Torvalds --- drivers/md/md-autodetect.c | 134 +++++++++++++++++-------------------- drivers/md/md.c | 24 +++---- drivers/md/md.h | 8 +++ 3 files changed, 81 insertions(+), 85 deletions(-) diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index a43a8f158058..14b6e86814c0 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -120,37 +119,29 @@ static int __init md_setup(char *str) return 1; } -static inline int create_dev(char *name, dev_t dev) -{ - ksys_unlink(name); - return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); -} - static void __init md_setup_drive(struct md_setup_args *args) { - int minor, i, partitioned; - dev_t dev; - dev_t devices[MD_SB_DISKS+1]; - int fd; - int err = 0; - char *devname; - mdu_disk_info_t dinfo; + char *devname = args->device_names; + dev_t devices[MD_SB_DISKS + 1], mdev; + struct mdu_array_info_s ainfo = { }; + struct block_device *bdev; + struct mddev *mddev; + int err = 0, i; char name[16]; - minor = args->minor; - partitioned = args->partitioned; - devname = args->device_names; + if (args->partitioned) { + mdev = MKDEV(mdp_major, args->minor << MdpMinorShift); + sprintf(name, "md_d%d", args->minor); + } else { + mdev = MKDEV(MD_MAJOR, args->minor); + sprintf(name, "md%d", args->minor); + } - sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor); - if (partitioned) - dev = MKDEV(mdp_major, minor << MdpMinorShift); - else - dev = MKDEV(MD_MAJOR, minor); - create_dev(name, dev); for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { struct kstat stat; char *p; char comp_name[64]; + dev_t dev; p = strchr(devname, ','); if (p) @@ -163,7 +154,7 @@ static void __init md_setup_drive(struct md_setup_args *args) if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode)) dev = new_decode_dev(stat.rdev); if (!dev) { - printk(KERN_WARNING "md: Unknown device name: %s\n", devname); + pr_warn("md: Unknown device name: %s\n", devname); break; } @@ -175,68 +166,71 @@ static void __init md_setup_drive(struct md_setup_args *args) if (!i) return; - printk(KERN_INFO "md: Loading md%s%d: %s\n", - partitioned ? "_d" : "", minor, - args->device_names); + pr_info("md: Loading %s: %s\n", name, args->device_names); - fd = ksys_open(name, 0, 0); - if (fd < 0) { - printk(KERN_ERR "md: open failed - cannot start " - "array %s\n", name); + bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL); + if (IS_ERR(bdev)) { + pr_err("md: open failed - cannot start array %s\n", name); return; } - if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { - printk(KERN_WARNING - "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", - minor); - ksys_close(fd); - return; + + err = -EIO; + if (WARN(bdev->bd_disk->fops != &md_fops, + "Opening block device %x resulted in non-md device\n", + mdev)) + goto out_blkdev_put; + + mddev = bdev->bd_disk->private_data; + + err = mddev_lock(mddev); + if (err) { + pr_err("md: failed to lock array %s\n", name); + goto out_blkdev_put; + } + + if (!list_empty(&mddev->disks) || mddev->raid_disks) { + pr_warn("md: Ignoring %s, already autodetected. (Use raid=noautodetect)\n", + name); + goto out_unlock; } if (args->level != LEVEL_NONE) { /* non-persistent */ - mdu_array_info_t ainfo; ainfo.level = args->level; - ainfo.size = 0; - ainfo.nr_disks =0; - ainfo.raid_disks =0; + ainfo.md_minor = args->minor; + ainfo.not_persistent = 1; + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.chunk_size = args->chunk; while (devices[ainfo.raid_disks]) ainfo.raid_disks++; - ainfo.md_minor =minor; - ainfo.not_persistent = 1; + } - ainfo.state = (1 << MD_SB_CLEAN); - ainfo.layout = 0; - ainfo.chunk_size = args->chunk; - err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); - for (i = 0; !err && i <= MD_SB_DISKS; i++) { - dev = devices[i]; - if (!dev) - break; + err = md_set_array_info(mddev, &ainfo); + + for (i = 0; i <= MD_SB_DISKS && devices[i]; i++) { + struct mdu_disk_info_s dinfo = { + .major = MAJOR(devices[i]), + .minor = MINOR(devices[i]), + }; + + if (args->level != LEVEL_NONE) { dinfo.number = i; dinfo.raid_disk = i; - dinfo.state = (1<major_version != 0) { @@ -7055,7 +7052,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) } /* - * set_array_info is used two different ways + * md_set_array_info is used two different ways * The original usage is when creating a new array. * In this usage, raid_disks is > 0 and it together with * level, size, not_persistent,layout,chunksize determine the @@ -7067,9 +7064,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd) * The minor and patch _version numbers are also kept incase the * super_block handler wishes to interpret them. */ -static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info) { - if (info->raid_disks == 0) { /* just setting version number for superblock loading */ if (info->major_version < 0 || @@ -7560,7 +7556,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto unlock; } - err = set_array_info(mddev, &info); + err = md_set_array_info(mddev, &info); if (err) { pr_warn("md: couldn't set array info. %d\n", err); goto unlock; @@ -7614,7 +7610,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* Need to clear read-only for this */ break; else - err = add_new_disk(mddev, &info); + err = md_add_new_disk(mddev, &info); goto unlock; } break; @@ -7682,7 +7678,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&info, argp, sizeof(info))) err = -EFAULT; else - err = add_new_disk(mddev, &info); + err = md_add_new_disk(mddev, &info); goto unlock; } @@ -7808,7 +7804,7 @@ static int md_revalidate(struct gendisk *disk) mddev->changed = 0; return 0; } -static const struct block_device_operations md_fops = +const struct block_device_operations md_fops = { .owner = THIS_MODULE, .open = md_open, diff --git a/drivers/md/md.h b/drivers/md/md.h index 6f8fff77ce10..ada3106cb1dd 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -801,7 +801,15 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio mddev->queue->limits.max_write_zeroes_sectors = 0; } +struct mdu_array_info_s; +struct mdu_disk_info_s; + extern int mdp_major; void md_autostart_arrays(int part); +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); +int do_md_run(struct mddev *mddev); + +extern const struct block_device_operations md_fops; #endif /* _MD_MD_H */ From c8376994c86c4eb02b9a1032cd3a8d44c911d671 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Jun 2020 10:23:14 +0200 Subject: [PATCH 10/50] initrd: remove support for multiple floppies Remove the special handling for multiple floppies in the initrd code. No one should be using floppies for booting these days. (famous last words..) Includes a spelling fix from Colin Ian King . Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/arm/kernel/atags_parse.c | 2 - arch/sh/kernel/setup.c | 2 - arch/sparc/kernel/setup_32.c | 2 - arch/sparc/kernel/setup_64.c | 2 - arch/x86/kernel/setup.c | 2 - include/linux/initrd.h | 6 --- init/do_mounts.c | 69 ++++------------------------------- init/do_mounts.h | 1 - init/do_mounts_rd.c | 20 +++------- 9 files changed, 12 insertions(+), 94 deletions(-) diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index ce02f92f4ab2..6c12d9fe694e 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -91,8 +91,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); static int __init parse_tag_ramdisk(const struct tag *tag) { rd_image_start = tag->u.ramdisk.start; - rd_doload = (tag->u.ramdisk.flags & 1) == 0; - rd_prompt = (tag->u.ramdisk.flags & 2) == 0; if (tag->u.ramdisk.size) rd_size = tag->u.ramdisk.size; diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 67f5a3b44c2e..4144be650d41 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -290,8 +290,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_RAM rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); #endif if (!MOUNT_ROOT_RDONLY) diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 6d07b85b9e24..eea43a1aef1b 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -353,8 +353,6 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = old_decode_dev(root_dev); #ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif prom_setsync(prom_sync_me); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index f765fda871eb..d87244197d5c 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -659,8 +659,6 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = old_decode_dev(root_dev); #ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif task_thread_info(&init_task)->kregs = &fake_swapper_regs; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..b9a68d8e06d8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -870,8 +870,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_RAM rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, diff --git a/include/linux/initrd.h b/include/linux/initrd.h index aa5914355728..8db6f8c8030b 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -2,12 +2,6 @@ #define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ -/* 1 = load ramdisk, 0 = don't load */ -extern int rd_doload; - -/* 1 = prompt for ramdisk, 0 = don't prompt */ -extern int rd_prompt; - /* starting block # of image */ extern int rd_image_start; diff --git a/init/do_mounts.c b/init/do_mounts.c index 1a4dfa17fb28..a7f22cf58c7e 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -28,8 +28,6 @@ #include "do_mounts.h" -int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */ - int root_mountflags = MS_RDONLY | MS_SILENT; static char * __initdata root_device_name; static char __initdata saved_root_name[64]; @@ -39,7 +37,7 @@ dev_t ROOT_DEV; static int __init load_ramdisk(char *str) { - rd_doload = simple_strtol(str,NULL,0) & 3; + pr_warn("ignoring the deprecated load_ramdisk= option\n"); return 1; } __setup("load_ramdisk=", load_ramdisk); @@ -553,66 +551,20 @@ static int __init mount_cifs_root(void) } #endif -#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD) -void __init change_floppy(char *fmt, ...) -{ - struct termios termios; - char buf[80]; - char c; - int fd; - va_list args; - va_start(args, fmt); - vsprintf(buf, fmt, args); - va_end(args); - fd = ksys_open("/dev/root", O_RDWR | O_NDELAY, 0); - if (fd >= 0) { - ksys_ioctl(fd, FDEJECT, 0); - ksys_close(fd); - } - printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); - fd = ksys_open("/dev/console", O_RDWR, 0); - if (fd >= 0) { - ksys_ioctl(fd, TCGETS, (long)&termios); - termios.c_lflag &= ~ICANON; - ksys_ioctl(fd, TCSETSF, (long)&termios); - ksys_read(fd, &c, 1); - termios.c_lflag |= ICANON; - ksys_ioctl(fd, TCSETSF, (long)&termios); - ksys_close(fd); - } -} -#endif - void __init mount_root(void) { #ifdef CONFIG_ROOT_NFS if (ROOT_DEV == Root_NFS) { - if (mount_nfs_root()) - return; - - printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n"); - ROOT_DEV = Root_FD0; + if (!mount_nfs_root()) + printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n"); + return; } #endif #ifdef CONFIG_CIFS_ROOT if (ROOT_DEV == Root_CIFS) { - if (mount_cifs_root()) - return; - - printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n"); - ROOT_DEV = Root_FD0; - } -#endif -#ifdef CONFIG_BLK_DEV_FD - if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { - /* rd_doload is 2 for a dual initrd/ramload setup */ - if (rd_doload==2) { - if (rd_load_disk(1)) { - ROOT_DEV = Root_RAM1; - root_device_name = NULL; - } - } else - change_floppy("root floppy"); + if (!mount_cifs_root()) + printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n"); + return; } #endif #ifdef CONFIG_BLOCK @@ -631,8 +583,6 @@ void __init mount_root(void) */ void __init prepare_namespace(void) { - int is_floppy; - if (root_delay) { printk(KERN_INFO "Waiting %d sec before mounting root device...\n", root_delay); @@ -675,11 +625,6 @@ void __init prepare_namespace(void) async_synchronize_full(); } - is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; - - if (is_floppy && rd_doload && rd_load_disk(0)) - ROOT_DEV = Root_RAM0; - mount_root(); out: devtmpfs_mount(); diff --git a/init/do_mounts.h b/init/do_mounts.h index 50d6c8941e15..c855b3f0e06d 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -9,7 +9,6 @@ #include #include -void change_floppy(char *fmt, ...); void mount_block_root(char *name, int flags); void mount_root(void); extern int root_mountflags; diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 32fb049d18f9..0a5c3ebee61d 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -15,11 +15,9 @@ #include -int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ - static int __init prompt_ramdisk(char *str) { - rd_prompt = simple_strtol(str,NULL,0) & 1; + pr_warn("ignoring the deprecated prompt_ramdisk= option\n"); return 1; } __setup("prompt_ramdisk=", prompt_ramdisk); @@ -178,7 +176,7 @@ int __init rd_load_image(char *from) int res = 0; int in_fd, out_fd; unsigned long rd_blocks, devblocks; - int nblocks, i, disk; + int nblocks, i; char *buf = NULL; unsigned short rotate = 0; decompress_fn decompressor = NULL; @@ -243,21 +241,15 @@ int __init rd_load_image(char *from) printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); - for (i = 0, disk = 1; i < nblocks; i++) { + for (i = 0; i < nblocks; i++) { if (i && (i % devblocks == 0)) { - pr_cont("done disk #%d.\n", disk++); + pr_cont("done disk #1.\n"); rotate = 0; if (ksys_close(in_fd)) { printk("Error closing the disk.\n"); goto noclose_input; } - change_floppy("disk #%d", disk); - in_fd = ksys_open(from, O_RDONLY, 0); - if (in_fd < 0) { - printk("Error opening disk.\n"); - goto noclose_input; - } - printk("Loading disk #%d... ", disk); + break; } ksys_read(in_fd, buf, BLOCK_SIZE); ksys_write(out_fd, buf, BLOCK_SIZE); @@ -284,8 +276,6 @@ int __init rd_load_image(char *from) int __init rd_load_disk(int n) { - if (rd_prompt) - change_floppy("root floppy disk to be loaded into RAM disk"); create_dev("/dev/root", ROOT_DEV); create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n)); return rd_load_image("/dev/root"); From 899ac10cc0bdaaee3b908e390bf94d2f1ba5d512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 17:40:30 +0200 Subject: [PATCH 11/50] initrd: remove the BLKFLSBUF call in handle_initrd BLKFLSBUF used to be overloaded for the ramdisk driver to free the whole ramdisk, which was completely different behavior compared to all other drivers. But this magic overload got removed in commit ff26956875c2 ("brd: remove support for BLKFLSBUF"), so this call is entirely pointless now. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/do_mounts_initrd.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index d72beda824aa..e4f88e9e1c08 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -115,21 +115,12 @@ static void __init handle_initrd(void) if (!error) printk("okay\n"); else { - int fd = ksys_open("/dev/root.old", O_RDWR, 0); if (error == -ENOENT) printk("/initrd does not exist. Ignored.\n"); else printk("failed\n"); printk(KERN_NOTICE "Unmounting old root\n"); ksys_umount("/old", MNT_DETACH); - printk(KERN_NOTICE "Trying to free ramdisk memory ... "); - if (fd < 0) { - error = fd; - } else { - error = ksys_ioctl(fd, BLKFLSBUF, 0); - ksys_close(fd); - } - printk(!error ? "okay\n" : "failed\n"); } } From bef173299613404f55b11180d9a865861637f31d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 14:49:58 +0200 Subject: [PATCH 12/50] initrd: switch initrd loading to struct file based APIs There is no good reason to mess with file descriptors from in-kernel code, switch the initrd loading to struct file based read and writes instead. Also Pass an explicit offset instead of ->f_pos, and to make that easier, use file scope file structs and offsets everywhere except for identify_ramdisk_image instead of the current strange mix. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/read_write.c | 2 +- include/linux/syscalls.h | 1 - init/do_mounts_rd.c | 79 ++++++++++++++++++++-------------------- 3 files changed, 40 insertions(+), 42 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 4fb797822567..5db58b8c78d0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -301,7 +301,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); -off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) +static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) { off_t retval; struct fd f = fdget_pos(fd); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b951a87da987..10843a6adb77 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1246,7 +1246,6 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group); int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); -off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 0a5c3ebee61d..d4255c10432a 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -14,6 +14,8 @@ #include +static struct file *in_file, *out_file; +static loff_t in_pos, out_pos; static int __init prompt_ramdisk(char *str) { @@ -31,7 +33,7 @@ static int __init ramdisk_start_setup(char *str) } __setup("ramdisk_start=", ramdisk_start_setup); -static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); +static int __init crd_load(decompress_fn deco); /* * This routine tries to find a RAM disk image to load, and returns the @@ -53,7 +55,8 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); * lz4 */ static int __init -identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) +identify_ramdisk_image(struct file *file, loff_t pos, + decompress_fn *decompressor) { const int size = 512; struct minix_super_block *minixsb; @@ -64,6 +67,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) unsigned char *buf; const char *compress_name; unsigned long n; + int start_block = rd_image_start; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -78,8 +82,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read block 0 to test for compressed kernel */ - ksys_lseek(fd, start_block * BLOCK_SIZE, 0); - ksys_read(fd, buf, size); + pos = start_block * BLOCK_SIZE; + kernel_read(file, buf, size, &pos); *decompressor = decompress_method(buf, size, &compress_name); if (compress_name) { @@ -124,8 +128,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read 512 bytes further to check if cramfs is padded */ - ksys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0); - ksys_read(fd, buf, size); + pos = start_block * BLOCK_SIZE + 0x200; + kernel_read(file, buf, size, &pos); if (cramfsb->magic == CRAMFS_MAGIC) { printk(KERN_NOTICE @@ -138,8 +142,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read block 1 to test for minix and ext2 superblock */ - ksys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); - ksys_read(fd, buf, size); + pos = (start_block + 1) * BLOCK_SIZE; + kernel_read(file, buf, size, &pos); /* Try minix */ if (minixsb->s_magic == MINIX_SUPER_MAGIC || @@ -166,15 +170,22 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) start_block); done: - ksys_lseek(fd, start_block * BLOCK_SIZE, 0); kfree(buf); return nblocks; } +static unsigned long nr_blocks(struct file *file) +{ + struct inode *inode = file->f_mapping->host; + + if (!S_ISBLK(inode->i_mode)) + return 0; + return i_size_read(inode) >> 10; +} + int __init rd_load_image(char *from) { int res = 0; - int in_fd, out_fd; unsigned long rd_blocks, devblocks; int nblocks, i; char *buf = NULL; @@ -184,20 +195,21 @@ int __init rd_load_image(char *from) char rotator[4] = { '|' , '/' , '-' , '\\' }; #endif - out_fd = ksys_open("/dev/ram", O_RDWR, 0); - if (out_fd < 0) + out_file = filp_open("/dev/ram", O_RDWR, 0); + if (IS_ERR(out_file)) goto out; - in_fd = ksys_open(from, O_RDONLY, 0); - if (in_fd < 0) + in_file = filp_open(from, O_RDONLY, 0); + if (IS_ERR(in_file)) goto noclose_input; - nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); + in_pos = rd_image_start * BLOCK_SIZE; + nblocks = identify_ramdisk_image(in_file, in_pos, &decompressor); if (nblocks < 0) goto done; if (nblocks == 0) { - if (crd_load(in_fd, out_fd, decompressor) == 0) + if (crd_load(decompressor) == 0) goto successful_load; goto done; } @@ -206,11 +218,7 @@ int __init rd_load_image(char *from) * NOTE NOTE: nblocks is not actually blocks but * the number of kibibytes of data to load into a ramdisk. */ - if (ksys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0) - rd_blocks = 0; - else - rd_blocks >>= 1; - + rd_blocks = nr_blocks(out_file); if (nblocks > rd_blocks) { printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n", nblocks, rd_blocks); @@ -220,13 +228,10 @@ int __init rd_load_image(char *from) /* * OK, time to copy in the data */ - if (ksys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0) - devblocks = 0; - else - devblocks >>= 1; - if (strcmp(from, "/initrd.image") == 0) devblocks = nblocks; + else + devblocks = nr_blocks(in_file); if (devblocks == 0) { printk(KERN_ERR "RAMDISK: could not determine device size\n"); @@ -245,14 +250,11 @@ int __init rd_load_image(char *from) if (i && (i % devblocks == 0)) { pr_cont("done disk #1.\n"); rotate = 0; - if (ksys_close(in_fd)) { - printk("Error closing the disk.\n"); - goto noclose_input; - } + fput(in_file); break; } - ksys_read(in_fd, buf, BLOCK_SIZE); - ksys_write(out_fd, buf, BLOCK_SIZE); + kernel_read(in_file, buf, BLOCK_SIZE, &in_pos); + kernel_write(out_file, buf, BLOCK_SIZE, &out_pos); #if !defined(CONFIG_S390) if (!(i % 16)) { pr_cont("%c\b", rotator[rotate & 0x3]); @@ -265,9 +267,9 @@ int __init rd_load_image(char *from) successful_load: res = 1; done: - ksys_close(in_fd); + fput(in_file); noclose_input: - ksys_close(out_fd); + fput(out_file); out: kfree(buf); ksys_unlink("/dev/ram"); @@ -283,11 +285,10 @@ int __init rd_load_disk(int n) static int exit_code; static int decompress_error; -static int crd_infd, crd_outfd; static long __init compr_fill(void *buf, unsigned long len) { - long r = ksys_read(crd_infd, buf, len); + long r = kernel_read(in_file, buf, len, &in_pos); if (r < 0) printk(KERN_ERR "RAMDISK: error while reading compressed data"); else if (r == 0) @@ -297,7 +298,7 @@ static long __init compr_fill(void *buf, unsigned long len) static long __init compr_flush(void *window, unsigned long outcnt) { - long written = ksys_write(crd_outfd, window, outcnt); + long written = kernel_write(out_file, window, outcnt, &out_pos); if (written != outcnt) { if (decompress_error == 0) printk(KERN_ERR @@ -316,11 +317,9 @@ static void __init error(char *x) decompress_error = 1; } -static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) +static int __init crd_load(decompress_fn deco) { int result; - crd_infd = in_fd; - crd_outfd = out_fd; if (!deco) { pr_emerg("Invalid ramdisk decompression routine. " From f0ea68f1390ca11aae9e08a042c6dba5bc48eea2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 15:54:39 +0200 Subject: [PATCH 13/50] initrd: mark init_linuxrc as __init Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/do_mounts_initrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index e4f88e9e1c08..57ad5b2da8f5 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -56,7 +56,7 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); -static int init_linuxrc(struct subprocess_info *info, struct cred *new) +static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) { ksys_unshare(CLONE_FS | CLONE_FILES); console_on_rootfs(); From 9acc17baf1fd6369cf969b1130bcf8c4b616ecfb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Jul 2020 18:18:54 +0200 Subject: [PATCH 14/50] initrd: mark initrd support as deprecated The classic initial ramdisk has been replaced by the much more flexible and efficient initramfs a long time. Warn about it being removed soon. Includes a spelling fix from Colin Ian King . Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/do_mounts_initrd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 57ad5b2da8f5..e08669187d63 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -75,6 +75,8 @@ static void __init handle_initrd(void) extern char *envp_init[]; int error; + pr_warn("using deprecated initrd support, will be removed in 2021.\n"); + real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ From 9ab6b718491b1a3c3eeb52f32a428dc0cbf5bb80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 13:51:52 +0200 Subject: [PATCH 15/50] initramfs: remove the populate_initrd_image and clean_rootfs stubs If initrd support is not enable just print the warning directly instead of hiding the fact that we just failed behind two stub functions. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/initramfs.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 7a38012e1af7..d10404625c31 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -618,13 +618,7 @@ static void __init clean_rootfs(void) ksys_close(fd); kfree(buf); } -#else -static inline void clean_rootfs(void) -{ -} -#endif /* CONFIG_BLK_DEV_RAM */ -#ifdef CONFIG_BLK_DEV_RAM static void __init populate_initrd_image(char *err) { ssize_t written; @@ -644,11 +638,6 @@ static void __init populate_initrd_image(char *err) written, initrd_end - initrd_start); ksys_close(fd); } -#else -static void __init populate_initrd_image(char *err) -{ - printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); -} #endif /* CONFIG_BLK_DEV_RAM */ static int __init populate_rootfs(void) @@ -668,8 +657,12 @@ static int __init populate_rootfs(void) err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start); if (err) { +#ifdef CONFIG_BLK_DEV_RAM clean_rootfs(); populate_initrd_image(err); +#else + printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); +#endif } done: From b2a74d5f9d446ae33a381ed7ba5f283f09bed726 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 13:59:54 +0200 Subject: [PATCH 16/50] initramfs: remove clean_rootfs There is no point in trying to clean up after unpacking the initramfs failed, as it should never get past the magic number check. In addition the current code only removes file that are direct children of the root entry, which wasn't complete anyway Fixes: df52092f3c97 ("fastboot: remove duplicate unpack_to_rootfs()") Reported-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- init/initramfs.c | 49 +----------------------------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index d10404625c31..1bc977ebfe31 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -11,6 +11,7 @@ #include #include #include +#include static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -572,53 +573,6 @@ static inline bool kexec_free_initrd(void) #endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_BLK_DEV_RAM -#define BUF_SIZE 1024 -static void __init clean_rootfs(void) -{ - int fd; - void *buf; - struct linux_dirent64 *dirp; - int num; - - fd = ksys_open("/", O_RDONLY, 0); - WARN_ON(fd < 0); - if (fd < 0) - return; - buf = kzalloc(BUF_SIZE, GFP_KERNEL); - WARN_ON(!buf); - if (!buf) { - ksys_close(fd); - return; - } - - dirp = buf; - num = ksys_getdents64(fd, dirp, BUF_SIZE); - while (num > 0) { - while (num > 0) { - struct kstat st; - int ret; - - ret = vfs_lstat(dirp->d_name, &st); - WARN_ON_ONCE(ret); - if (!ret) { - if (S_ISDIR(st.mode)) - ksys_rmdir(dirp->d_name); - else - ksys_unlink(dirp->d_name); - } - - num -= dirp->d_reclen; - dirp = (void *)dirp + dirp->d_reclen; - } - dirp = buf; - memset(buf, 0, BUF_SIZE); - num = ksys_getdents64(fd, dirp, BUF_SIZE); - } - - ksys_close(fd); - kfree(buf); -} - static void __init populate_initrd_image(char *err) { ssize_t written; @@ -658,7 +612,6 @@ static int __init populate_rootfs(void) err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start); if (err) { #ifdef CONFIG_BLK_DEV_RAM - clean_rootfs(); populate_initrd_image(err); #else printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); From bf6419e4d5440c6d414a320506c5488857a5b001 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:56:19 +0200 Subject: [PATCH 17/50] initramfs: switch initramfs unpacking to struct file based APIs There is no good reason to mess with file descriptors from in-kernel code, switch the initramfs unpacking to struct file based write instead. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/initramfs.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 1bc977ebfe31..35c26668d463 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -13,13 +13,14 @@ #include #include -static ssize_t __init xwrite(int fd, const char *p, size_t count) +static ssize_t __init xwrite(struct file *file, const char *p, size_t count, + loff_t *pos) { ssize_t out = 0; /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */ while (count) { - ssize_t rv = ksys_write(fd, p, count); + ssize_t rv = kernel_write(file, p, count, pos); if (rv < 0) { if (rv == -EINTR || rv == -EAGAIN) @@ -317,7 +318,8 @@ static int __init maybe_link(void) return 0; } -static __initdata int wfd; +static __initdata struct file *wfile; +static __initdata loff_t wfile_pos; static int __init do_name(void) { @@ -334,16 +336,17 @@ static int __init do_name(void) int openflags = O_WRONLY|O_CREAT; if (ml != 1) openflags |= O_TRUNC; - wfd = ksys_open(collected, openflags, mode); + wfile = filp_open(collected, openflags, mode); + if (IS_ERR(wfile)) + return 0; + wfile_pos = 0; - if (wfd >= 0) { - ksys_fchown(wfd, uid, gid); - ksys_fchmod(wfd, mode); - if (body_len) - ksys_ftruncate(wfd, body_len); - vcollected = kstrdup(collected, GFP_KERNEL); - state = CopyFile; - } + vfs_fchown(wfile, uid, gid); + vfs_fchmod(wfile, mode); + if (body_len) + vfs_truncate(&wfile->f_path, body_len); + vcollected = kstrdup(collected, GFP_KERNEL); + state = CopyFile; } } else if (S_ISDIR(mode)) { ksys_mkdir(collected, mode); @@ -365,16 +368,16 @@ static int __init do_name(void) static int __init do_copy(void) { if (byte_count >= body_len) { - if (xwrite(wfd, victim, body_len) != body_len) + if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len) error("write error"); - ksys_close(wfd); + fput(wfile); do_utime(vcollected, mtime); kfree(vcollected); eat(body_len); state = SkipIt; return 0; } else { - if (xwrite(wfd, victim, byte_count) != byte_count) + if (xwrite(wfile, victim, byte_count, &wfile_pos) != byte_count) error("write error"); body_len -= byte_count; eat(byte_count); @@ -576,21 +579,23 @@ static inline bool kexec_free_initrd(void) static void __init populate_initrd_image(char *err) { ssize_t written; - int fd; + struct file *file; + loff_t pos = 0; unpack_to_rootfs(__initramfs_start, __initramfs_size); printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - fd = ksys_open("/initrd.image", O_WRONLY | O_CREAT, 0700); - if (fd < 0) + file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + if (IS_ERR(file)) return; - written = xwrite(fd, (char *)initrd_start, initrd_end - initrd_start); + written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start, + &pos); if (written != initrd_end - initrd_start) pr_err("/initrd.image: incomplete write (%zd != %ld)\n", written, initrd_end - initrd_start); - ksys_close(fd); + fput(file); } #endif /* CONFIG_BLK_DEV_RAM */ From 8f740636d9049a3f9665a0ec87a00822ec1120b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 17:44:28 +0200 Subject: [PATCH 18/50] init: open code setting up stdin/stdout/stderr Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but instead open a struct file for /dev/console and then install it as FD 0/1/2 manually. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/main.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/init/main.c b/init/main.c index 0ead83e86b5a..db0621dfbb04 100644 --- a/init/main.c +++ b/init/main.c @@ -1457,15 +1457,19 @@ static int __ref kernel_init(void *unused) "See Linux Documentation/admin-guide/init.rst for guidance."); } +/* Open /dev/console, for stdin/stdout/stderr, this should never fail */ void console_on_rootfs(void) { - /* Open the /dev/console as stdin, this should never fail */ - if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - pr_err("Warning: unable to open an initial console.\n"); + struct file *file = filp_open("/dev/console", O_RDWR, 0); - /* create stdout/stderr */ - (void) ksys_dup(0); - (void) ksys_dup(0); + if (IS_ERR(file)) { + pr_err("Warning: unable to open an initial console.\n"); + return; + } + get_file_rcu_many(file, 2); + fd_install(get_unused_fd_flags(0), file); + fd_install(get_unused_fd_flags(0), file); + fd_install(get_unused_fd_flags(0), file); } static noinline void __init kernel_init_freeable(void) From fb2da16cd70a5140acdd7a102e5cd3b697c3404f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 09:02:07 +0200 Subject: [PATCH 19/50] fs: remove ksys_getdents64 Just open code it in the only caller. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/readdir.c | 11 ++--------- include/linux/syscalls.h | 2 -- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/readdir.c b/fs/readdir.c index a49f07c11cfb..19434b3c982c 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -348,8 +348,8 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, return -EFAULT; } -int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, - unsigned int count) +SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct fd f; struct getdents_callback64 buf = { @@ -380,13 +380,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, return error; } - -SYSCALL_DEFINE3(getdents64, unsigned int, fd, - struct linux_dirent64 __user *, dirent, unsigned int, count) -{ - return ksys_getdents64(fd, dirent, count); -} - #ifdef CONFIG_COMPAT struct compat_old_linux_dirent { compat_ulong_t d_ino; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 10843a6adb77..a998651629c7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1243,8 +1243,6 @@ ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); -int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, - unsigned int count); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); From 166e07c37c6417c9713666268fc0eb89a9ce48b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jun 2020 15:03:21 +0200 Subject: [PATCH 20/50] fs: remove ksys_open Just open code it in the two callers. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/open.c | 11 ++++++++--- include/linux/syscalls.h | 11 ----------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/fs/open.c b/fs/open.c index 75166f071d28..ab3671af8a97 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1208,7 +1208,9 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) { - return ksys_open(filename, flags, mode); + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, filename, flags, mode); } SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, @@ -1270,9 +1272,12 @@ COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, fla */ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) { - return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); -} + int flags = O_CREAT | O_WRONLY | O_TRUNC; + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, pathname, flags, mode); +} #endif /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a998651629c7..363baaadf8e1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1374,17 +1374,6 @@ static inline int ksys_close(unsigned int fd) return __close_fd(current->files, fd); } -extern long do_sys_open(int dfd, const char __user *filename, int flags, - umode_t mode); - -static inline long ksys_open(const char __user *filename, int flags, - umode_t mode) -{ - if (force_o_largefile()) - flags |= O_LARGEFILE; - return do_sys_open(AT_FDCWD, filename, flags, mode); -} - extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) From bc1cd99a9ad7e2c768e7ea92ae9c6ad4a4e0f7f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:58:49 +0200 Subject: [PATCH 21/50] fs: remove ksys_dup Fold it into the only remaining caller. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/file.c | 7 +------ include/linux/syscalls.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/file.c b/fs/file.c index abb8b7081d7a..85b7993165dd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -985,7 +985,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) return ksys_dup3(oldfd, newfd, 0); } -int ksys_dup(unsigned int fildes) +SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); @@ -1000,11 +1000,6 @@ int ksys_dup(unsigned int fildes) return ret; } -SYSCALL_DEFINE1(dup, unsigned int, fildes) -{ - return ksys_dup(fildes); -} - int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 363baaadf8e1..b6d900574762 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1237,7 +1237,6 @@ asmlinkage long sys_ni_syscall(void); */ int ksys_umount(char __user *name, int flags); -int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); From b25ba7c3c9acdc4cf69f5bd69989819cabfc4e3b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:59:57 +0200 Subject: [PATCH 22/50] fs: remove ksys_fchmod Fold it into the only remaining caller. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/open.c | 7 +------ include/linux/syscalls.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/open.c b/fs/open.c index ab3671af8a97..b316dd6a86a8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -608,7 +608,7 @@ int vfs_fchmod(struct file *file, umode_t mode) return chmod_common(&file->f_path, mode); } -int ksys_fchmod(unsigned int fd, umode_t mode) +SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) { struct fd f = fdget(fd); int err = -EBADF; @@ -620,11 +620,6 @@ int ksys_fchmod(unsigned int fd, umode_t mode) return err; } -SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) -{ - return ksys_fchmod(fd, mode); -} - int do_fchmodat(int dfd, const char __user *filename, umode_t mode) { struct path path; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b6d900574762..39ff738997a1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1240,7 +1240,6 @@ int ksys_umount(char __user *name, int flags); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); -int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); From 863b67e15177a7cd0c27b3e36e42fe7907dec9bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 09:00:51 +0200 Subject: [PATCH 23/50] fs: remove ksys_ioctl Fold it into the only remaining caller. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/ioctl.c | 7 +------ include/linux/syscalls.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index d69786d1dd91..4e6cc0a7d69c 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -736,7 +736,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd, return -ENOIOCTLCMD; } -int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct fd f = fdget(fd); int error; @@ -757,11 +757,6 @@ int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return error; } -SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) -{ - return ksys_ioctl(fd, cmd, arg); -} - #ifdef CONFIG_COMPAT /** * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 39ff738997a1..5b0f1fca4cfb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1241,7 +1241,6 @@ int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); -int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); From 9d4b74aee80436d067d79a6c32ebb544b0ae5460 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 18:26:37 +0200 Subject: [PATCH 24/50] fs: refactor do_utimes Split out one helper each for path vs fd based operations. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/utimes.c | 100 ++++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index b7b927502d6e..c667517b6eb1 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -70,6 +70,57 @@ static int utimes_common(const struct path *path, struct timespec64 *times) return error; } +static int do_utimes_path(int dfd, const char __user *filename, + struct timespec64 *times, int flags) +{ + struct path path; + int lookup_flags = 0, error; + + if (times && + (!nsec_valid(times[0].tv_nsec) || !nsec_valid(times[1].tv_nsec))) + return -EINVAL; + if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) + return -EINVAL; + + if (!(flags & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + +retry: + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + return error; + + error = utimes_common(&path, times); + path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + + return error; +} + +static int do_utimes_fd(int fd, struct timespec64 *times, int flags) +{ + struct fd f; + int error; + + if (times && + (!nsec_valid(times[0].tv_nsec) || !nsec_valid(times[1].tv_nsec))) + return -EINVAL; + if (flags) + return -EINVAL; + + f = fdget(fd); + if (!f.file) + return -EBADF; + error = utimes_common(&f.file->f_path, times); + fdput(f); + return error; +} + /* * do_utimes - change times on filename or file descriptor * @dfd: open file descriptor, -1 or AT_FDCWD @@ -88,52 +139,9 @@ static int utimes_common(const struct path *path, struct timespec64 *times) long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, int flags) { - int error = -EINVAL; - - if (times && (!nsec_valid(times[0].tv_nsec) || - !nsec_valid(times[1].tv_nsec))) { - goto out; - } - - if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) - goto out; - - if (filename == NULL && dfd != AT_FDCWD) { - struct fd f; - - if (flags) - goto out; - - f = fdget(dfd); - error = -EBADF; - if (!f.file) - goto out; - - error = utimes_common(&f.file->f_path, times); - fdput(f); - } else { - struct path path; - int lookup_flags = 0; - - if (!(flags & AT_SYMLINK_NOFOLLOW)) - lookup_flags |= LOOKUP_FOLLOW; - if (flags & AT_EMPTY_PATH) - lookup_flags |= LOOKUP_EMPTY; -retry: - error = user_path_at(dfd, filename, lookup_flags, &path); - if (error) - goto out; - - error = utimes_common(&path, times); - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - } - -out: - return error; + if (filename == NULL && dfd != AT_FDCWD) + return do_utimes_fd(dfd, times, flags); + return do_utimes_path(dfd, filename, times, flags); } SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, From 27eb11c9632c66754ea7f67512b3516f998ed213 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Jul 2020 08:15:36 +0200 Subject: [PATCH 25/50] fs: move timespec validation into utimes_common Consolidate the validation of the timespec from the two callers into utimes_common. That means it is done a little later (e.g. after the path lookup), but I can't find anything that requires a specific order of processing the errors. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/utimes.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index c667517b6eb1..bfd86e81c590 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -23,14 +23,19 @@ static int utimes_common(const struct path *path, struct timespec64 *times) struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; + if (times) { + if (!nsec_valid(times[0].tv_nsec) || + !nsec_valid(times[1].tv_nsec)) + return -EINVAL; + if (times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_NOW) + times = NULL; + } + error = mnt_want_write(path->mnt); if (error) goto out; - if (times && times[0].tv_nsec == UTIME_NOW && - times[1].tv_nsec == UTIME_NOW) - times = NULL; - newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { if (times[0].tv_nsec == UTIME_OMIT) @@ -76,9 +81,6 @@ static int do_utimes_path(int dfd, const char __user *filename, struct path path; int lookup_flags = 0, error; - if (times && - (!nsec_valid(times[0].tv_nsec) || !nsec_valid(times[1].tv_nsec))) - return -EINVAL; if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) return -EINVAL; @@ -107,9 +109,6 @@ static int do_utimes_fd(int fd, struct timespec64 *times, int flags) struct fd f; int error; - if (times && - (!nsec_valid(times[0].tv_nsec) || !nsec_valid(times[1].tv_nsec))) - return -EINVAL; if (flags) return -EINVAL; From fd5ad30c782351ab4d4a15941fc61e743a1bd66c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Jul 2020 08:19:55 +0200 Subject: [PATCH 26/50] fs: expose utimes_common Rename utimes_common to vfs_utimes and make it available outside of utimes.c. This will be used by the initramfs unpacking code. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- fs/utimes.c | 6 +++--- include/linux/fs.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index bfd86e81c590..fd3cc4226224 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -16,7 +16,7 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } -static int utimes_common(const struct path *path, struct timespec64 *times) +int vfs_utimes(const struct path *path, struct timespec64 *times) { int error; struct iattr newattrs; @@ -94,7 +94,7 @@ static int do_utimes_path(int dfd, const char __user *filename, if (error) return error; - error = utimes_common(&path, times); + error = vfs_utimes(&path, times); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -115,7 +115,7 @@ static int do_utimes_fd(int fd, struct timespec64 *times, int flags) f = fdget(fd); if (!f.file) return -EBADF; - error = utimes_common(&f.file->f_path, times); + error = vfs_utimes(&f.file->f_path, times); fdput(f); return error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 635086726f20..a1d2685a4878 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1746,6 +1746,7 @@ int vfs_mkobj(struct dentry *, umode_t, int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); +int vfs_utimes(const struct path *path, struct timespec64 *times); extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); From 38b082236e77d403fed23ac2d30d570598744ec3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:19:00 +0200 Subject: [PATCH 27/50] initramfs: use vfs_utimes in do_copy Don't bother saving away the pathname and just use the new struct path based utimes helper instead. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- init/initramfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 35c26668d463..9820fca4d4e3 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -202,7 +202,6 @@ static inline void __init eat(unsigned n) byte_count -= n; } -static __initdata char *vcollected; static __initdata char *collected; static long remains __initdata; static __initdata char *collect; @@ -345,7 +344,6 @@ static int __init do_name(void) vfs_fchmod(wfile, mode); if (body_len) vfs_truncate(&wfile->f_path, body_len); - vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile; } } else if (S_ISDIR(mode)) { @@ -368,11 +366,15 @@ static int __init do_name(void) static int __init do_copy(void) { if (byte_count >= body_len) { + struct timespec64 t[2] = { }; if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len) error("write error"); + + t[0].tv_sec = mtime; + t[1].tv_sec = mtime; + vfs_utimes(&wfile->f_path, t); + fput(wfile); - do_utime(vcollected, mtime); - kfree(vcollected); eat(body_len); state = SkipIt; return 0; From a1e6aaa3743a3daa98d0f9275df8243eef8bb666 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Jun 2020 09:41:08 +0200 Subject: [PATCH 28/50] fs: refactor do_mount Factor out a path_mount helper that takes a struct path * instead of the actual file name. This will allow to convert the init and devtmpfs code to properly mount based on a kernel pointer instead of relying on the implicit set_fs(KERNEL_DS) during early init. Signed-off-by: Christoph Hellwig --- fs/namespace.c | 67 ++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index f30ed401cc6d..6f8234f74bed 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3115,12 +3115,11 @@ char *copy_mount_string(const void __user *data) * Therefore, if this magic number is present, it carries no information * and must be discarded. */ -long do_mount(const char *dev_name, const char __user *dir_name, +static int path_mount(const char *dev_name, struct path *path, const char *type_page, unsigned long flags, void *data_page) { - struct path path; unsigned int mnt_flags = 0, sb_flags; - int retval = 0; + int ret; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -3133,19 +3132,13 @@ long do_mount(const char *dev_name, const char __user *dir_name, if (flags & MS_NOUSER) return -EINVAL; - /* ... and get the mountpoint */ - retval = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); - if (retval) - return retval; - - retval = security_sb_mount(dev_name, &path, - type_page, flags, data_page); - if (!retval && !may_mount()) - retval = -EPERM; - if (!retval && (flags & SB_MANDLOCK) && !may_mandlock()) - retval = -EPERM; - if (retval) - goto dput_out; + ret = security_sb_mount(dev_name, path, type_page, flags, data_page); + if (ret) + return ret; + if (!may_mount()) + return -EPERM; + if ((flags & SB_MANDLOCK) && !may_mandlock()) + return -EPERM; /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) @@ -3172,7 +3165,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_STRICTATIME)) == 0)) { mnt_flags &= ~MNT_ATIME_MASK; - mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; + mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK; } sb_flags = flags & (SB_RDONLY | @@ -3185,22 +3178,32 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_I_VERSION); if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND)) - retval = do_reconfigure_mnt(&path, mnt_flags); - else if (flags & MS_REMOUNT) - retval = do_remount(&path, flags, sb_flags, mnt_flags, - data_page); - else if (flags & MS_BIND) - retval = do_loopback(&path, dev_name, flags & MS_REC); - else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) - retval = do_change_type(&path, flags); - else if (flags & MS_MOVE) - retval = do_move_mount_old(&path, dev_name); - else - retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, - dev_name, data_page); -dput_out: + return do_reconfigure_mnt(path, mnt_flags); + if (flags & MS_REMOUNT) + return do_remount(path, flags, sb_flags, mnt_flags, data_page); + if (flags & MS_BIND) + return do_loopback(path, dev_name, flags & MS_REC); + if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) + return do_change_type(path, flags); + if (flags & MS_MOVE) + return do_move_mount_old(path, dev_name); + + return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name, + data_page); +} + +long do_mount(const char *dev_name, const char __user *dir_name, + const char *type_page, unsigned long flags, void *data_page) +{ + struct path path; + int ret; + + ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); + if (ret) + return ret; + ret = path_mount(dev_name, &path, type_page, flags, data_page); path_put(&path); - return retval; + return ret; } static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) From 41525f56e2564c2feff4fb2824823900efb3a39f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jul 2020 10:54:34 +0200 Subject: [PATCH 29/50] fs: refactor ksys_umount Factor out a path_umount helper that takes a struct path * instead of the actual file name. This will allow to convert the init and devtmpfs code to properly mount based on a kernel pointer instead of relying on the implicit set_fs(KERNEL_DS) during early init. Signed-off-by: Christoph Hellwig --- fs/namespace.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 6f8234f74bed..43834b59eff6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1706,36 +1706,19 @@ static inline bool may_mandlock(void) } #endif -/* - * Now umount can handle mount points as well as block devices. - * This is important for filesystems which use unnamed block devices. - * - * We now support a flag for forced unmount like the other 'big iron' - * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD - */ - -int ksys_umount(char __user *name, int flags) +static int path_umount(struct path *path, int flags) { - struct path path; struct mount *mnt; int retval; - int lookup_flags = LOOKUP_MOUNTPOINT; if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) return -EINVAL; - if (!may_mount()) return -EPERM; - if (!(flags & UMOUNT_NOFOLLOW)) - lookup_flags |= LOOKUP_FOLLOW; - - retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); - if (retval) - goto out; - mnt = real_mount(path.mnt); + mnt = real_mount(path->mnt); retval = -EINVAL; - if (path.dentry != path.mnt->mnt_root) + if (path->dentry != path->mnt->mnt_root) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; @@ -1748,12 +1731,25 @@ int ksys_umount(char __user *name, int flags) retval = do_umount(mnt, flags); dput_and_out: /* we mustn't call path_put() as that would clear mnt_expiry_mark */ - dput(path.dentry); + dput(path->dentry); mntput_no_expire(mnt); -out: return retval; } +int ksys_umount(char __user *name, int flags) +{ + int lookup_flags = LOOKUP_MOUNTPOINT; + struct path path; + int ret; + + if (!(flags & UMOUNT_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); + if (ret) + return ret; + return path_umount(&path, flags); +} + SYSCALL_DEFINE2(umount, char __user *, name, int, flags) { return ksys_umount(name, flags); From e24ab0ef689de43649327f54cd1088f3dad25bb3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jul 2020 10:48:15 +0200 Subject: [PATCH 30/50] fs: push the getname from do_rmdir into the callers This mirrors do_unlinkat and will make life a little easier for the init code to reuse the whole function with a kernel filename. Signed-off-by: Christoph Hellwig --- fs/internal.h | 2 +- fs/namei.c | 10 ++++------ include/linux/syscalls.h | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 9b863a7bd708..e903d5aae139 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -65,7 +65,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, long do_mknodat(int dfd, const char __user *filename, umode_t mode, unsigned int dev); long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); -long do_rmdir(int dfd, const char __user *pathname); +long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); long do_symlinkat(const char __user *oldname, int newdfd, const char __user *newname); diff --git a/fs/namei.c b/fs/namei.c index 72d4219c93ac..d75a6039ae39 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3720,17 +3720,16 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL(vfs_rmdir); -long do_rmdir(int dfd, const char __user *pathname) +long do_rmdir(int dfd, struct filename *name) { int error = 0; - struct filename *name; struct dentry *dentry; struct path path; struct qstr last; int type; unsigned int lookup_flags = 0; retry: - name = filename_parentat(dfd, getname(pathname), lookup_flags, + name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (IS_ERR(name)) return PTR_ERR(name); @@ -3781,7 +3780,7 @@ long do_rmdir(int dfd, const char __user *pathname) SYSCALL_DEFINE1(rmdir, const char __user *, pathname) { - return do_rmdir(AT_FDCWD, pathname); + return do_rmdir(AT_FDCWD, getname(pathname)); } /** @@ -3926,8 +3925,7 @@ SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) return -EINVAL; if (flag & AT_REMOVEDIR) - return do_rmdir(dfd, pathname); - + return do_rmdir(dfd, getname(pathname)); return do_unlinkat(dfd, getname(pathname)); } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5b0f1fca4cfb..e43816198e60 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1281,11 +1281,11 @@ static inline long ksys_unlink(const char __user *pathname) return do_unlinkat(AT_FDCWD, getname(pathname)); } -extern long do_rmdir(int dfd, const char __user *pathname); +long do_rmdir(int dfd, struct filename *name); static inline long ksys_rmdir(const char __user *pathname) { - return do_rmdir(AT_FDCWD, pathname); + return do_rmdir(AT_FDCWD, getname(pathname)); } extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); From bcbacc4909f1b93242ba2894d0282c151c9d0e68 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 10:48:25 +0200 Subject: [PATCH 31/50] devtmpfs: refactor devtmpfsd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the main worker loop into a separate function. This allows devtmpfsd_setup to be marked __init, which will allows us to call __init routines for the setup work. devtmpfѕ itself needs a __ref marker for that to work, and a comment explaining why it works. Signed-off-by: Christoph Hellwig --- drivers/base/devtmpfs.c | 56 ++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index c9017e0584c0..d697634bc0d4 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -378,30 +378,8 @@ static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid, return handle_remove(name, dev); } -static int devtmpfs_setup(void *p) +static void __noreturn devtmpfs_work_loop(void) { - int err; - - err = ksys_unshare(CLONE_NEWNS); - if (err) - goto out; - err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); - if (err) - goto out; - ksys_chdir("/.."); /* will traverse into overmounted root */ - ksys_chroot("."); -out: - *(int *)p = err; - complete(&setup_done); - return err; -} - -static int devtmpfsd(void *p) -{ - int err = devtmpfs_setup(p); - - if (err) - return err; while (1) { spin_lock(&req_lock); while (requests) { @@ -421,6 +399,38 @@ static int devtmpfsd(void *p) spin_unlock(&req_lock); schedule(); } +} + +static int __init devtmpfs_setup(void *p) +{ + int err; + + err = ksys_unshare(CLONE_NEWNS); + if (err) + goto out; + err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); + if (err) + goto out; + ksys_chdir("/.."); /* will traverse into overmounted root */ + ksys_chroot("."); +out: + *(int *)p = err; + complete(&setup_done); + return err; +} + +/* + * The __ref is because devtmpfs_setup needs to be __init for the routines it + * calls. That call is done while devtmpfs_init, which is marked __init, + * synchronously waits for it to complete. + */ +static int __ref devtmpfsd(void *p) +{ + int err = devtmpfs_setup(p); + + if (err) + return err; + devtmpfs_work_loop(); return 0; } From 916db733deca5dc1e14ea8e3acfc121600a9f462 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 7 Jun 2020 09:06:29 +0200 Subject: [PATCH 32/50] init: initialize ramdisk_execute_command at compile time Set ramdisk_execute_command to "/init" at compile time. The command line can still override it, but this saves a few instructions and removes a NULL check. Signed-off-by: Christoph Hellwig --- init/main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/init/main.c b/init/main.c index db0621dfbb04..c2c9143db967 100644 --- a/init/main.c +++ b/init/main.c @@ -154,7 +154,7 @@ static bool initargs_found; #endif static char *execute_command; -static char *ramdisk_execute_command; +static char *ramdisk_execute_command = "/init"; /* * Used to generate warnings if static_key manipulation functions are used @@ -1514,10 +1514,6 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - - if (!ramdisk_execute_command) - ramdisk_execute_command = "/init"; - if (ksys_access((const char __user *) ramdisk_execute_command, 0) != 0) { ramdisk_execute_command = NULL; From a94b5214487b626a148c6aab86809fbeb8fcd6b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:23:21 +0200 Subject: [PATCH 33/50] init: mark console_on_rootfs as __init This helper is only used for the early init code. Signed-off-by: Christoph Hellwig --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index c2c9143db967..47698427b15f 100644 --- a/init/main.c +++ b/init/main.c @@ -1458,7 +1458,7 @@ static int __ref kernel_init(void *unused) } /* Open /dev/console, for stdin/stdout/stderr, this should never fail */ -void console_on_rootfs(void) +void __init console_on_rootfs(void) { struct file *file = filp_open("/dev/console", O_RDWR, 0); From 09cbcec07b578c04ab4ab0e31940c20126f79c4b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jul 2020 12:20:16 +0200 Subject: [PATCH 34/50] init: mark create_dev as __init This helper is only used for the early init code. Signed-off-by: Christoph Hellwig --- init/do_mounts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/do_mounts.h b/init/do_mounts.h index c855b3f0e06d..021e2f60223e 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -13,7 +13,7 @@ void mount_block_root(char *name, int flags); void mount_root(void); extern int root_mountflags; -static inline int create_dev(char *name, dev_t dev) +static inline __init int create_dev(char *name, dev_t dev) { ksys_unlink(name); return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); From c60166f04283ffba7b88b45d824bbfb2bfccee24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jul 2020 11:12:08 +0200 Subject: [PATCH 35/50] init: add an init_mount helper Like do_mount, but takes a kernel pointer for the destination path. Switch over the mounts in the init code and devtmpfs to it, which just happen to work due to the implicit set_fs(KERNEL_DS) during early init right now. Signed-off-by: Christoph Hellwig --- drivers/base/devtmpfs.c | 5 +++-- fs/Makefile | 2 +- fs/init.c | 25 +++++++++++++++++++++++++ fs/internal.h | 4 ++++ fs/namespace.c | 2 +- include/linux/init_syscalls.h | 4 ++++ init/do_mounts.c | 8 ++++---- init/do_mounts.h | 1 + init/do_mounts_initrd.c | 6 +++--- 9 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 fs/init.c create mode 100644 include/linux/init_syscalls.h diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index d697634bc0d4..32af6cb987b4 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "base.h" @@ -359,7 +360,7 @@ int __init devtmpfs_mount(void) if (!thread) return 0; - err = do_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL); + err = init_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL); if (err) printk(KERN_INFO "devtmpfs: error mounting %i\n", err); else @@ -408,7 +409,7 @@ static int __init devtmpfs_setup(void *p) err = ksys_unshare(CLONE_NEWNS); if (err) goto out; - err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); + err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); if (err) goto out; ksys_chdir("/.."); /* will traverse into overmounted root */ diff --git a/fs/Makefile b/fs/Makefile index 2ce5112b02c8..1c7b0e3f6daa 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ - fs_types.o fs_context.o fs_parser.o fsopen.o + fs_types.o fs_context.o fs_parser.o fsopen.o init.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/init.c b/fs/init.c new file mode 100644 index 000000000000..c6eb724e1c7b --- /dev/null +++ b/fs/init.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Routines that mimic syscalls, but don't use the user address space or file + * descriptors. Only for init/ and related early init code. + */ +#include +#include +#include +#include +#include +#include "internal.h" + +int __init init_mount(const char *dev_name, const char *dir_name, + const char *type_page, unsigned long flags, void *data_page) +{ + struct path path; + int ret; + + ret = kern_path(dir_name, LOOKUP_FOLLOW, &path); + if (ret) + return ret; + ret = path_mount(dev_name, &path, type_page, flags, data_page); + path_put(&path); + return ret; +} diff --git a/fs/internal.h b/fs/internal.h index e903d5aae139..72ea0b6f7435 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -89,6 +89,10 @@ extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write_file(struct file *); extern void dissolve_on_fput(struct vfsmount *); + +int path_mount(const char *dev_name, struct path *path, + const char *type_page, unsigned long flags, void *data_page); + /* * fs_struct.c */ diff --git a/fs/namespace.c b/fs/namespace.c index 43834b59eff6..2c4d75920974 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3111,7 +3111,7 @@ char *copy_mount_string(const void __user *data) * Therefore, if this magic number is present, it carries no information * and must be discarded. */ -static int path_mount(const char *dev_name, struct path *path, +int path_mount(const char *dev_name, struct path *path, const char *type_page, unsigned long flags, void *data_page) { unsigned int mnt_flags = 0, sb_flags; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h new file mode 100644 index 000000000000..af9ea88a60e0 --- /dev/null +++ b/include/linux/init_syscalls.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +int __init init_mount(const char *dev_name, const char *dir_name, + const char *type_page, unsigned long flags, void *data_page); diff --git a/init/do_mounts.c b/init/do_mounts.c index a7f22cf58c7e..83db87b6e5d1 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -395,16 +395,16 @@ static int __init do_mount_root(const char *name, const char *fs, int ret; if (data) { - /* do_mount() requires a full page as fifth argument */ + /* init_mount() requires a full page as fifth argument */ p = alloc_page(GFP_KERNEL); if (!p) return -ENOMEM; data_page = page_address(p); - /* zero-pad. do_mount() will make sure it's terminated */ + /* zero-pad. init_mount() will make sure it's terminated */ strncpy(data_page, data, PAGE_SIZE); } - ret = do_mount(name, "/root", fs, flags, data_page); + ret = init_mount(name, "/root", fs, flags, data_page); if (ret) goto out; @@ -628,7 +628,7 @@ void __init prepare_namespace(void) mount_root(); out: devtmpfs_mount(); - do_mount(".", "/", NULL, MS_MOVE, NULL); + init_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); } diff --git a/init/do_mounts.h b/init/do_mounts.h index 021e2f60223e..20e7fec8cb49 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -8,6 +8,7 @@ #include #include #include +#include void mount_block_root(char *name, int flags); void mount_root(void); diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index e08669187d63..1f9336209ad9 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -62,7 +62,7 @@ static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) console_on_rootfs(); /* move initrd over / and chdir/chroot in initrd root */ ksys_chdir("/root"); - do_mount(".", "/", NULL, MS_MOVE, NULL); + init_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); ksys_setsid(); return 0; @@ -99,7 +99,7 @@ static void __init handle_initrd(void) current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - do_mount("..", ".", NULL, MS_MOVE, NULL); + init_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ ksys_chroot(".."); @@ -113,7 +113,7 @@ static void __init handle_initrd(void) mount_root(); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); - error = do_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); + error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); if (!error) printk("okay\n"); else { From 09267defa36aaff6ff829bd2fc8b043ec151cc3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:23:08 +0200 Subject: [PATCH 36/50] init: add an init_umount helper Like ksys_umount, but takes a kernel pointer for the destination path. Switch over the umount in the init code, which just happen to work due to the implicit set_fs(KERNEL_DS) during early init right now. Signed-off-by: Christoph Hellwig --- fs/init.c | 14 ++++++++++++++ fs/internal.h | 1 + fs/namespace.c | 4 ++-- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 1 - init/do_mounts_initrd.c | 2 +- 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/init.c b/fs/init.c index c6eb724e1c7b..9c8e31fdb048 100644 --- a/fs/init.c +++ b/fs/init.c @@ -23,3 +23,17 @@ int __init init_mount(const char *dev_name, const char *dir_name, path_put(&path); return ret; } + +int __init init_umount(const char *name, int flags) +{ + int lookup_flags = LOOKUP_MOUNTPOINT; + struct path path; + int ret; + + if (!(flags & UMOUNT_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + ret = kern_path(name, lookup_flags, &path); + if (ret) + return ret; + return path_umount(&path, flags); +} diff --git a/fs/internal.h b/fs/internal.h index 72ea0b6f7435..491d1e63809b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -92,6 +92,7 @@ extern void dissolve_on_fput(struct vfsmount *); int path_mount(const char *dev_name, struct path *path, const char *type_page, unsigned long flags, void *data_page); +int path_umount(struct path *path, int flags); /* * fs_struct.c diff --git a/fs/namespace.c b/fs/namespace.c index 2c4d75920974..a7301790abb2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1706,7 +1706,7 @@ static inline bool may_mandlock(void) } #endif -static int path_umount(struct path *path, int flags) +int path_umount(struct path *path, int flags) { struct mount *mnt; int retval; @@ -1736,7 +1736,7 @@ static int path_umount(struct path *path, int flags) return retval; } -int ksys_umount(char __user *name, int flags) +static int ksys_umount(char __user *name, int flags) { int lookup_flags = LOOKUP_MOUNTPOINT; struct path path; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index af9ea88a60e0..a5a2e7f19916 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -2,3 +2,4 @@ int __init init_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page); +int __init init_umount(const char *name, int flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e43816198e60..1a4f5d8ee704 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1236,7 +1236,6 @@ asmlinkage long sys_ni_syscall(void); * the ksys_xyzyyz() functions prototyped below. */ -int ksys_umount(char __user *name, int flags); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 1f9336209ad9..6b020a069902 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -122,7 +122,7 @@ static void __init handle_initrd(void) else printk("failed\n"); printk(KERN_NOTICE "Unmounting old root\n"); - ksys_umount("/old", MNT_DETACH); + init_umount("/old", MNT_DETACH); } } From 8fb9f73e5a539ab3aa4785f30fb52c65fa98600c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:23:40 +0200 Subject: [PATCH 37/50] init: add an init_unlink helper Add a simple helper to unlink with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_unlink. Signed-off-by: Christoph Hellwig --- fs/init.c | 5 +++++ include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 7 ------- init/do_mounts.h | 2 +- init/do_mounts_initrd.c | 4 ++-- init/do_mounts_rd.c | 2 +- init/initramfs.c | 3 ++- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/init.c b/fs/init.c index 9c8e31fdb048..507ffbb5d146 100644 --- a/fs/init.c +++ b/fs/init.c @@ -37,3 +37,8 @@ int __init init_umount(const char *name, int flags) return ret; return path_umount(&path, flags); } + +int __init init_unlink(const char *pathname) +{ + return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); +} diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index a5a2e7f19916..00d597249549 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -3,3 +3,4 @@ int __init init_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page); int __init init_umount(const char *name, int flags); +int __init init_unlink(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1a4f5d8ee704..26f9738e5ab8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1273,13 +1273,6 @@ int compat_ksys_ipc(u32 call, int first, int second, * The following kernel syscall equivalents are just wrappers to fs-internal * functions. Therefore, provide stubs to be inlined at the callsites. */ -extern long do_unlinkat(int dfd, struct filename *name); - -static inline long ksys_unlink(const char __user *pathname) -{ - return do_unlinkat(AT_FDCWD, getname(pathname)); -} - long do_rmdir(int dfd, struct filename *name); static inline long ksys_rmdir(const char __user *pathname) diff --git a/init/do_mounts.h b/init/do_mounts.h index 20e7fec8cb49..104d8431725a 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -16,7 +16,7 @@ extern int root_mountflags; static inline __init int create_dev(char *name, dev_t dev) { - ksys_unlink(name); + init_unlink(name); return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); } diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 6b020a069902..8b44dd017842 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -137,11 +137,11 @@ bool __init initrd_load(void) * mounted in the normal path. */ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { - ksys_unlink("/initrd.image"); + init_unlink("/initrd.image"); handle_initrd(); return true; } } - ksys_unlink("/initrd.image"); + init_unlink("/initrd.image"); return false; } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index d4255c10432a..ac021ae6e6fa 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -272,7 +272,7 @@ int __init rd_load_image(char *from) fput(out_file); out: kfree(buf); - ksys_unlink("/dev/ram"); + init_unlink("/dev/ram"); return res; } diff --git a/init/initramfs.c b/init/initramfs.c index 9820fca4d4e3..eb58cee6dadb 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -12,6 +12,7 @@ #include #include #include +#include static ssize_t __init xwrite(struct file *file, const char *p, size_t count, loff_t *pos) @@ -301,7 +302,7 @@ static void __init clean_path(char *path, umode_t fmode) if (S_ISDIR(st.mode)) ksys_rmdir(path); else - ksys_unlink(path); + init_unlink(path); } } From 20cce026c3e0972017b9cb4a7cccfb8cacf187d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:11:45 +0200 Subject: [PATCH 38/50] init: add an init_rmdir helper Add a simple helper to rmdir with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_rmdir. Signed-off-by: Christoph Hellwig --- fs/init.c | 5 +++++ include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 7 ------- init/initramfs.c | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/init.c b/fs/init.c index 507ffbb5d146..eabd9ed2b510 100644 --- a/fs/init.c +++ b/fs/init.c @@ -42,3 +42,8 @@ int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); } + +int __init init_rmdir(const char *pathname) +{ + return do_rmdir(AT_FDCWD, getname_kernel(pathname)); +} diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 00d597249549..abf3af563c0b 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -4,3 +4,4 @@ int __init init_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page); int __init init_umount(const char *name, int flags); int __init init_unlink(const char *pathname); +int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 26f9738e5ab8..a7b14258d245 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1273,13 +1273,6 @@ int compat_ksys_ipc(u32 call, int first, int second, * The following kernel syscall equivalents are just wrappers to fs-internal * functions. Therefore, provide stubs to be inlined at the callsites. */ -long do_rmdir(int dfd, struct filename *name); - -static inline long ksys_rmdir(const char __user *pathname) -{ - return do_rmdir(AT_FDCWD, getname(pathname)); -} - extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); static inline long ksys_mkdir(const char __user *pathname, umode_t mode) diff --git a/init/initramfs.c b/init/initramfs.c index eb58cee6dadb..1a9159bf452f 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -300,7 +300,7 @@ static void __init clean_path(char *path, umode_t fmode) if (!vfs_lstat(path, &st) && (st.mode ^ fmode) & S_IFMT) { if (S_ISDIR(st.mode)) - ksys_rmdir(path); + init_rmdir(path); else init_unlink(path); } From db63f1e315384590b979f8f74abd1b5363b69894 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:25:21 +0200 Subject: [PATCH 39/50] init: add an init_chdir helper Add a simple helper to chdir with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_chdir. Signed-off-by: Christoph Hellwig --- drivers/base/devtmpfs.c | 2 +- fs/init.c | 16 ++++++++++++++++ fs/open.c | 7 +------ include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 1 - init/do_mounts.c | 2 +- init/do_mounts_initrd.c | 8 ++++---- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 32af6cb987b4..e48aaba3166b 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -412,7 +412,7 @@ static int __init devtmpfs_setup(void *p) err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); if (err) goto out; - ksys_chdir("/.."); /* will traverse into overmounted root */ + init_chdir("/.."); /* will traverse into overmounted root */ ksys_chroot("."); out: *(int *)p = err; diff --git a/fs/init.c b/fs/init.c index eabd9ed2b510..64d4e12eba93 100644 --- a/fs/init.c +++ b/fs/init.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "internal.h" @@ -38,6 +39,21 @@ int __init init_umount(const char *name, int flags) return path_umount(&path, flags); } +int __init init_chdir(const char *filename) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); + if (error) + return error; + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + if (!error) + set_fs_pwd(current->fs, &path); + path_put(&path); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/open.c b/fs/open.c index b316dd6a86a8..723e0ac89893 100644 --- a/fs/open.c +++ b/fs/open.c @@ -482,7 +482,7 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) return do_faccessat(AT_FDCWD, filename, mode, 0); } -int ksys_chdir(const char __user *filename) +SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; @@ -508,11 +508,6 @@ int ksys_chdir(const char __user *filename) return error; } -SYSCALL_DEFINE1(chdir, const char __user *, filename) -{ - return ksys_chdir(filename); -} - SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index abf3af563c0b..1e845910ae56 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -3,5 +3,6 @@ int __init init_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page); int __init init_umount(const char *name, int flags); +int __init init_chdir(const char *filename); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a7b14258d245..31fa67fb9894 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1238,7 +1238,6 @@ asmlinkage long sys_ni_syscall(void); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); -int ksys_chdir(const char __user *filename); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); diff --git a/init/do_mounts.c b/init/do_mounts.c index 83db87b6e5d1..a7581c6e85f2 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -408,7 +408,7 @@ static int __init do_mount_root(const char *name, const char *fs, if (ret) goto out; - ksys_chdir("/root"); + init_chdir("/root"); s = current->fs->pwd.dentry->d_sb; ROOT_DEV = s->s_dev; printk(KERN_INFO diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 8b44dd017842..04627fd22a92 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -61,7 +61,7 @@ static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) ksys_unshare(CLONE_FS | CLONE_FILES); console_on_rootfs(); /* move initrd over / and chdir/chroot in initrd root */ - ksys_chdir("/root"); + init_chdir("/root"); init_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); ksys_setsid(); @@ -82,7 +82,7 @@ static void __init handle_initrd(void) /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); ksys_mkdir("/old", 0700); - ksys_chdir("/old"); + init_chdir("/old"); /* * In case that a resume from disk is carried out by linuxrc or one of @@ -104,11 +104,11 @@ static void __init handle_initrd(void) ksys_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { - ksys_chdir("/old"); + init_chdir("/old"); return; } - ksys_chdir("/"); + init_chdir("/"); ROOT_DEV = new_decode_dev(real_root_dev); mount_root(); From 4b7ca5014cbef51cdb99fd644eae4f3773747a05 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:26:13 +0200 Subject: [PATCH 40/50] init: add an init_chroot helper Add a simple helper to chroot with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_chroot. Signed-off-by: Christoph Hellwig --- drivers/base/devtmpfs.c | 2 +- fs/init.c | 24 ++++++++++++++++++++++++ fs/open.c | 7 +------ include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 2 -- init/do_mounts.c | 2 +- init/do_mounts_initrd.c | 4 ++-- 7 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index e48aaba3166b..eac184e6d657 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -413,7 +413,7 @@ static int __init devtmpfs_setup(void *p) if (err) goto out; init_chdir("/.."); /* will traverse into overmounted root */ - ksys_chroot("."); + init_chroot("."); out: *(int *)p = err; complete(&setup_done); diff --git a/fs/init.c b/fs/init.c index 64d4e12eba93..2c78f24814dd 100644 --- a/fs/init.c +++ b/fs/init.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" int __init init_mount(const char *dev_name, const char *dir_name, @@ -54,6 +55,29 @@ int __init init_chdir(const char *filename) return error; } +int __init init_chroot(const char *filename) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); + if (error) + return error; + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + if (error) + goto dput_and_out; + error = -EPERM; + if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) + goto dput_and_out; + error = security_path_chroot(&path); + if (error) + goto dput_and_out; + set_fs_root(current->fs, &path); +dput_and_out: + path_put(&path); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/open.c b/fs/open.c index 723e0ac89893..f62f4752bb43 100644 --- a/fs/open.c +++ b/fs/open.c @@ -530,7 +530,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) return error; } -int ksys_chroot(const char __user *filename) +SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; @@ -563,11 +563,6 @@ int ksys_chroot(const char __user *filename) return error; } -SYSCALL_DEFINE1(chroot, const char __user *, filename) -{ - return ksys_chroot(filename); -} - static int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 1e845910ae56..e07099a14b91 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -4,5 +4,6 @@ int __init init_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page); int __init init_umount(const char *name, int flags); int __init init_chdir(const char *filename); +int __init init_chroot(const char *filename); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 31fa67fb9894..e89d62e944dc 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1235,8 +1235,6 @@ asmlinkage long sys_ni_syscall(void); * Instead, use one of the functions which work equivalently, such as * the ksys_xyzyyz() functions prototyped below. */ - -int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); diff --git a/init/do_mounts.c b/init/do_mounts.c index a7581c6e85f2..b5f9604d0c98 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -629,7 +629,7 @@ void __init prepare_namespace(void) out: devtmpfs_mount(); init_mount(".", "/", NULL, MS_MOVE, NULL); - ksys_chroot("."); + init_chroot("."); } static bool is_tmpfs; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 04627fd22a92..a6b447b191db 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -63,7 +63,7 @@ static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) /* move initrd over / and chdir/chroot in initrd root */ init_chdir("/root"); init_mount(".", "/", NULL, MS_MOVE, NULL); - ksys_chroot("."); + init_chroot("."); ksys_setsid(); return 0; } @@ -101,7 +101,7 @@ static void __init handle_initrd(void) /* move initrd to rootfs' /old */ init_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ - ksys_chroot(".."); + init_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { init_chdir("/old"); From b873498f99c77e7b5be3aa5ffe9ca67437232fe0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:13:26 +0200 Subject: [PATCH 41/50] init: add an init_chown helper Add a simple helper to chown with a kernel space file name and switch the early init code over to it. Signed-off-by: Christoph Hellwig --- fs/init.c | 18 ++++++++++++++++++ fs/internal.h | 2 +- fs/open.c | 2 +- include/linux/init_syscalls.h | 1 + init/initramfs.c | 6 +++--- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fs/init.c b/fs/init.c index 2c78f24814dd..edd024465595 100644 --- a/fs/init.c +++ b/fs/init.c @@ -78,6 +78,24 @@ int __init init_chroot(const char *filename) return error; } +int __init init_chown(const char *filename, uid_t user, gid_t group, int flags) +{ + int lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; + struct path path; + int error; + + error = kern_path(filename, lookup_flags, &path); + if (error) + return error; + error = mnt_want_write(path.mnt); + if (!error) { + error = chown_common(&path, user, group); + mnt_drop_write(path.mnt); + } + path_put(&path); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/internal.h b/fs/internal.h index 491d1e63809b..e81b9e23c3ea 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -134,7 +134,7 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small); int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); - +int chown_common(const struct path *path, uid_t user, gid_t group); extern int vfs_open(const struct path *, struct file *); /* diff --git a/fs/open.c b/fs/open.c index f62f4752bb43..49960a1248f1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -639,7 +639,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) return do_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(const struct path *path, uid_t user, gid_t group) +int chown_common(const struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index e07099a14b91..0da59d76133e 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -5,5 +5,6 @@ int __init init_mount(const char *dev_name, const char *dir_name, int __init init_umount(const char *name, int flags); int __init init_chdir(const char *filename); int __init init_chroot(const char *filename); +int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/init/initramfs.c b/init/initramfs.c index 1a9159bf452f..358dcd93cb9d 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -349,14 +349,14 @@ static int __init do_name(void) } } else if (S_ISDIR(mode)) { ksys_mkdir(collected, mode); - ksys_chown(collected, uid, gid); + init_chown(collected, uid, gid, 0); ksys_chmod(collected, mode); dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { ksys_mknod(collected, mode, rdev); - ksys_chown(collected, uid, gid); + init_chown(collected, uid, gid, 0); ksys_chmod(collected, mode); do_utime(collected, mtime); } @@ -393,7 +393,7 @@ static int __init do_symlink(void) collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); ksys_symlink(collected + N_ALIGN(name_len), collected); - ksys_lchown(collected, uid, gid); + init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW); do_utime(collected, mtime); state = SkipIt; next_state = Reset; From 1097742efc643ffc667c5c6684635b2663145a7d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:41:02 +0200 Subject: [PATCH 42/50] init: add an init_chmod helper Add a simple helper to chmod with a kernel space file name and switch the early init code over to it. Signed-off-by: Christoph Hellwig --- fs/init.c | 13 +++++++++++++ fs/internal.h | 2 +- fs/open.c | 4 ++-- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 7 ------- init/initramfs.c | 4 ++-- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/init.c b/fs/init.c index edd024465595..a66032d128b6 100644 --- a/fs/init.c +++ b/fs/init.c @@ -96,6 +96,19 @@ int __init init_chown(const char *filename, uid_t user, gid_t group, int flags) return error; } +int __init init_chmod(const char *filename, umode_t mode) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW, &path); + if (error) + return error; + error = chmod_common(&path, mode); + path_put(&path); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/internal.h b/fs/internal.h index e81b9e23c3ea..6d82681c7d83 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -131,7 +131,7 @@ extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); -int do_fchmodat(int dfd, const char __user *filename, umode_t mode); +int chmod_common(const struct path *path, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); int chown_common(const struct path *path, uid_t user, gid_t group); diff --git a/fs/open.c b/fs/open.c index 49960a1248f1..7ba89eae46c5 100644 --- a/fs/open.c +++ b/fs/open.c @@ -563,7 +563,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) return error; } -static int chmod_common(const struct path *path, umode_t mode) +int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; @@ -610,7 +610,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) return err; } -int do_fchmodat(int dfd, const char __user *filename, umode_t mode) +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode) { struct path path; int error; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 0da59d76133e..2b1b4dc58682 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -6,5 +6,6 @@ int __init init_umount(const char *name, int flags); int __init init_chdir(const char *filename); int __init init_chroot(const char *filename); int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); +int __init init_chmod(const char *filename, umode_t mode); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e89d62e944dc..8b71fa321ca2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1304,13 +1304,6 @@ static inline long ksys_link(const char __user *oldname, return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } -extern int do_fchmodat(int dfd, const char __user *filename, umode_t mode); - -static inline int ksys_chmod(const char __user *filename, umode_t mode) -{ - return do_fchmodat(AT_FDCWD, filename, mode); -} - long do_faccessat(int dfd, const char __user *filename, int mode, int flags); static inline long ksys_access(const char __user *filename, int mode) diff --git a/init/initramfs.c b/init/initramfs.c index 358dcd93cb9d..efa9fb70038d 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -350,14 +350,14 @@ static int __init do_name(void) } else if (S_ISDIR(mode)) { ksys_mkdir(collected, mode); init_chown(collected, uid, gid, 0); - ksys_chmod(collected, mode); + init_chmod(collected, mode); dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { ksys_mknod(collected, mode, rdev); init_chown(collected, uid, gid, 0); - ksys_chmod(collected, mode); + init_chmod(collected, mode); do_utime(collected, mtime); } } From eb9d7d390e51108b4c6a9a7993ed9be92548c8f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:14:02 +0200 Subject: [PATCH 43/50] init: add an init_eaccess helper Add a simple helper to check if a file exists based on kernel space file name and switch the early init code over to it. Note that this theoretically changes behavior as it always is based on the effective permissions. But during early init that doesn't make a difference. Signed-off-by: Christoph Hellwig --- fs/init.c | 13 +++++++++++++ fs/open.c | 2 +- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 7 ------- init/main.c | 4 ++-- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/init.c b/fs/init.c index a66032d128b6..6d9af40d2897 100644 --- a/fs/init.c +++ b/fs/init.c @@ -109,6 +109,19 @@ int __init init_chmod(const char *filename, umode_t mode) return error; } +int __init init_eaccess(const char *filename) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW, &path); + if (error) + return error; + error = inode_permission(d_inode(path.dentry), MAY_ACCESS); + path_put(&path); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/open.c b/fs/open.c index 7ba89eae46c5..aafecd1f7ba1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -394,7 +394,7 @@ static const struct cred *access_override_creds(void) return old_cred; } -long do_faccessat(int dfd, const char __user *filename, int mode, int flags) +static long do_faccessat(int dfd, const char __user *filename, int mode, int flags) { struct path path; struct inode *inode; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 2b1b4dc58682..7031c0934bee 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -7,5 +7,6 @@ int __init init_chdir(const char *filename); int __init init_chroot(const char *filename); int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); int __init init_chmod(const char *filename, umode_t mode); +int __init init_eaccess(const char *filename); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8b71fa321ca2..a2779638e414 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1304,13 +1304,6 @@ static inline long ksys_link(const char __user *oldname, return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } -long do_faccessat(int dfd, const char __user *filename, int mode, int flags); - -static inline long ksys_access(const char __user *filename, int mode) -{ - return do_faccessat(AT_FDCWD, filename, mode, 0); -} - extern int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); diff --git a/init/main.c b/init/main.c index 47698427b15f..1c710d3e1d46 100644 --- a/init/main.c +++ b/init/main.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include @@ -1514,8 +1515,7 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - if (ksys_access((const char __user *) - ramdisk_execute_command, 0) != 0) { + if (init_eaccess(ramdisk_execute_command) != 0) { ramdisk_execute_command = NULL; prepare_namespace(); } From 812931d693da58cc24d2bb8dec01c2b4a7f4668f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:14:19 +0200 Subject: [PATCH 44/50] init: add an init_link helper Add a simple helper to link with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_link. Signed-off-by: Christoph Hellwig --- fs/init.c | 33 +++++++++++++++++++++++++++++++++ fs/internal.h | 3 +-- fs/namei.c | 4 ++-- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 9 --------- init/initramfs.c | 2 +- 6 files changed, 38 insertions(+), 14 deletions(-) diff --git a/fs/init.c b/fs/init.c index 6d9af40d2897..5db9d9f74868 100644 --- a/fs/init.c +++ b/fs/init.c @@ -122,6 +122,39 @@ int __init init_eaccess(const char *filename) return error; } +int __init init_link(const char *oldname, const char *newname) +{ + struct dentry *new_dentry; + struct path old_path, new_path; + int error; + + error = kern_path(oldname, 0, &old_path); + if (error) + return error; + + new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto out; + + error = -EXDEV; + if (old_path.mnt != new_path.mnt) + goto out_dput; + error = may_linkat(&old_path); + if (unlikely(error)) + goto out_dput; + error = security_path_link(old_path.dentry, &new_path, new_dentry); + if (error) + goto out_dput; + error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, + NULL); +out_dput: + done_path_create(&new_path, new_dentry); +out: + path_put(&old_path); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/internal.h b/fs/internal.h index 6d82681c7d83..58451b033d26 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -69,8 +69,7 @@ long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); long do_symlinkat(const char __user *oldname, int newdfd, const char __user *newname); -int do_linkat(int olddfd, const char __user *oldname, int newdfd, - const char __user *newname, int flags); +int may_linkat(struct path *link); /* * namespace.c diff --git a/fs/namei.c b/fs/namei.c index d75a6039ae39..13de64c6be76 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1024,7 +1024,7 @@ static bool safe_hardlink_source(struct inode *inode) * * Returns 0 if successful, -ve on error. */ -static int may_linkat(struct path *link) +int may_linkat(struct path *link) { struct inode *inode = link->dentry->d_inode; @@ -4086,7 +4086,7 @@ EXPORT_SYMBOL(vfs_link); * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ -int do_linkat(int olddfd, const char __user *oldname, int newdfd, +static int do_linkat(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, int flags) { struct dentry *new_dentry; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 7031c0934bee..5ca15a5b55b7 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -8,5 +8,6 @@ int __init init_chroot(const char *filename); int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); int __init init_chmod(const char *filename, umode_t mode); int __init init_eaccess(const char *filename); +int __init init_link(const char *oldname, const char *newname); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a2779638e414..4b18b91ce465 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1295,15 +1295,6 @@ static inline long ksys_mknod(const char __user *filename, umode_t mode, return do_mknodat(AT_FDCWD, filename, mode, dev); } -extern int do_linkat(int olddfd, const char __user *oldname, int newdfd, - const char __user *newname, int flags); - -static inline long ksys_link(const char __user *oldname, - const char __user *newname) -{ - return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); -} - extern int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); diff --git a/init/initramfs.c b/init/initramfs.c index efa9fb70038d..516a66900d7a 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -312,7 +312,7 @@ static int __init maybe_link(void) char *old = find_link(major, minor, ino, mode, collected); if (old) { clean_path(collected, 0); - return (ksys_link(old, collected) < 0) ? -1 : 1; + return (init_link(old, collected) < 0) ? -1 : 1; } } return 0; From cd3acb6a79349f346714ab3d26d203a0c6ca5ab0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:14:36 +0200 Subject: [PATCH 45/50] init: add an init_symlink helper Add a simple helper to symlink with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_symlink. Signed-off-by: Christoph Hellwig --- fs/init.c | 16 ++++++++++++++++ fs/internal.h | 2 -- fs/namei.c | 2 +- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 9 --------- init/initramfs.c | 2 +- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/init.c b/fs/init.c index 5db9d9f74868..09ef2b58d48c 100644 --- a/fs/init.c +++ b/fs/init.c @@ -155,6 +155,22 @@ int __init init_link(const char *oldname, const char *newname) return error; } +int __init init_symlink(const char *oldname, const char *newname) +{ + struct dentry *dentry; + struct path path; + int error; + + dentry = kern_path_create(AT_FDCWD, newname, &path, 0); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + error = security_path_symlink(&path, dentry, oldname); + if (!error) + error = vfs_symlink(path.dentry->d_inode, dentry, oldname); + done_path_create(&path, dentry); + return error; +} + int __init init_unlink(const char *pathname) { return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/internal.h b/fs/internal.h index 58451b033d26..40b50a222d7a 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -67,8 +67,6 @@ long do_mknodat(int dfd, const char __user *filename, umode_t mode, long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); -long do_symlinkat(const char __user *oldname, int newdfd, - const char __user *newname); int may_linkat(struct path *link); /* diff --git a/fs/namei.c b/fs/namei.c index 13de64c6be76..2f6fa53eb3da 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3955,7 +3955,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) } EXPORT_SYMBOL(vfs_symlink); -long do_symlinkat(const char __user *oldname, int newdfd, +static long do_symlinkat(const char __user *oldname, int newdfd, const char __user *newname) { int error; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 5ca15a5b55b7..125f55ae3f80 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -9,5 +9,6 @@ int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); int __init init_chmod(const char *filename, umode_t mode); int __init init_eaccess(const char *filename); int __init init_link(const char *oldname, const char *newname); +int __init init_symlink(const char *oldname, const char *newname); int __init init_unlink(const char *pathname); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4b18b91ce465..7cdc0d749a04 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1277,15 +1277,6 @@ static inline long ksys_mkdir(const char __user *pathname, umode_t mode) return do_mkdirat(AT_FDCWD, pathname, mode); } -extern long do_symlinkat(const char __user *oldname, int newdfd, - const char __user *newname); - -static inline long ksys_symlink(const char __user *oldname, - const char __user *newname) -{ - return do_symlinkat(oldname, AT_FDCWD, newname); -} - extern long do_mknodat(int dfd, const char __user *filename, umode_t mode, unsigned int dev); diff --git a/init/initramfs.c b/init/initramfs.c index 516a66900d7a..c91fc9a51d2a 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -392,7 +392,7 @@ static int __init do_symlink(void) { collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); - ksys_symlink(collected + N_ALIGN(name_len), collected); + init_symlink(collected + N_ALIGN(name_len), collected); init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW); do_utime(collected, mtime); state = SkipIt; From 83ff98c3e9cd2b82b4289e185f2ce7d635a9cbd3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:14:59 +0200 Subject: [PATCH 46/50] init: add an init_mkdir helper Add a simple helper to mkdir with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_mkdir. Signed-off-by: Christoph Hellwig --- fs/init.c | 18 ++++++++++++++++++ fs/internal.h | 1 - fs/namei.c | 2 +- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 7 ------- init/do_mounts_initrd.c | 2 +- init/initramfs.c | 2 +- init/noinitramfs.c | 5 +++-- 8 files changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/init.c b/fs/init.c index 09ef2b58d48c..127033d08426 100644 --- a/fs/init.c +++ b/fs/init.c @@ -176,6 +176,24 @@ int __init init_unlink(const char *pathname) return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); } +int __init init_mkdir(const char *pathname, umode_t mode) +{ + struct dentry *dentry; + struct path path; + int error; + + dentry = kern_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (!IS_POSIXACL(path.dentry->d_inode)) + mode &= ~current_umask(); + error = security_path_mkdir(&path, dentry, mode); + if (!error) + error = vfs_mkdir(path.dentry->d_inode, dentry, mode); + done_path_create(&path, dentry); + return error; +} + int __init init_rmdir(const char *pathname) { return do_rmdir(AT_FDCWD, getname_kernel(pathname)); diff --git a/fs/internal.h b/fs/internal.h index 40b50a222d7a..4741e591e923 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -64,7 +64,6 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); long do_mknodat(int dfd, const char __user *filename, umode_t mode, unsigned int dev); -long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); int may_linkat(struct path *link); diff --git a/fs/namei.c b/fs/namei.c index 2f6fa53eb3da..d6b25dd32f4d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3645,7 +3645,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } EXPORT_SYMBOL(vfs_mkdir); -long do_mkdirat(int dfd, const char __user *pathname, umode_t mode) +static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode) { struct dentry *dentry; struct path path; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 125f55ae3f80..d808985231f8 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -11,4 +11,5 @@ int __init init_eaccess(const char *filename); int __init init_link(const char *oldname, const char *newname); int __init init_symlink(const char *oldname, const char *newname); int __init init_unlink(const char *pathname); +int __init init_mkdir(const char *pathname, umode_t mode); int __init init_rmdir(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7cdc0d749a04..5ef77a91382a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1270,13 +1270,6 @@ int compat_ksys_ipc(u32 call, int first, int second, * The following kernel syscall equivalents are just wrappers to fs-internal * functions. Therefore, provide stubs to be inlined at the callsites. */ -extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); - -static inline long ksys_mkdir(const char __user *pathname, umode_t mode) -{ - return do_mkdirat(AT_FDCWD, pathname, mode); -} - extern long do_mknodat(int dfd, const char __user *filename, umode_t mode, unsigned int dev); diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a6b447b191db..3f5ac81913dd 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -81,7 +81,7 @@ static void __init handle_initrd(void) create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); - ksys_mkdir("/old", 0700); + init_mkdir("/old", 0700); init_chdir("/old"); /* diff --git a/init/initramfs.c b/init/initramfs.c index c91fc9a51d2a..0489eb65b3b8 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -348,7 +348,7 @@ static int __init do_name(void) state = CopyFile; } } else if (S_ISDIR(mode)) { - ksys_mkdir(collected, mode); + init_mkdir(collected, mode); init_chown(collected, uid, gid, 0); init_chmod(collected, mode); dir_add(collected, mtime); diff --git a/init/noinitramfs.c b/init/noinitramfs.c index fa9cdfa7101d..94cc4df74b11 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c @@ -9,6 +9,7 @@ #include #include #include +#include /* * Create a simple rootfs that is similar to the default initramfs @@ -17,7 +18,7 @@ static int __init default_rootfs(void) { int err; - err = ksys_mkdir((const char __user __force *) "/dev", 0755); + err = init_mkdir("/dev", 0755); if (err < 0) goto out; @@ -27,7 +28,7 @@ static int __init default_rootfs(void) if (err < 0) goto out; - err = ksys_mkdir((const char __user __force *) "/root", 0700); + err = init_mkdir("/root", 0700); if (err < 0) goto out; From 5fee64fcde0770c41e926ff981022eaa512d8980 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:41:20 +0200 Subject: [PATCH 47/50] init: add an init_mknod helper Add a simple helper to mknod with a kernel space file name and switch the early init code over to it. Remove the now unused ksys_mknod. Signed-off-by: Christoph Hellwig --- fs/init.c | 25 +++++++++++++++++++++++++ fs/internal.h | 2 -- fs/namei.c | 2 +- include/linux/init_syscalls.h | 1 + include/linux/syscalls.h | 9 --------- init/do_mounts.h | 2 +- init/initramfs.c | 2 +- init/noinitramfs.c | 3 +-- 8 files changed, 30 insertions(+), 16 deletions(-) diff --git a/fs/init.c b/fs/init.c index 127033d08426..145fb31b7a5f 100644 --- a/fs/init.c +++ b/fs/init.c @@ -122,6 +122,31 @@ int __init init_eaccess(const char *filename) return error; } +int __init init_mknod(const char *filename, umode_t mode, unsigned int dev) +{ + struct dentry *dentry; + struct path path; + int error; + + if (S_ISFIFO(mode) || S_ISSOCK(mode)) + dev = 0; + else if (!(S_ISBLK(mode) || S_ISCHR(mode))) + return -EINVAL; + + dentry = kern_path_create(AT_FDCWD, filename, &path, 0); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (!IS_POSIXACL(path.dentry->d_inode)) + mode &= ~current_umask(); + error = security_path_mknod(&path, dentry, mode, dev); + if (!error) + error = vfs_mknod(path.dentry->d_inode, dentry, mode, + new_decode_dev(dev)); + done_path_create(&path, dentry); + return error; +} + int __init init_link(const char *oldname, const char *newname) { struct dentry *new_dentry; diff --git a/fs/internal.h b/fs/internal.h index 4741e591e923..07e145b2f88c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -62,8 +62,6 @@ extern int filename_lookup(int dfd, struct filename *name, unsigned flags, struct path *path, struct path *root); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -long do_mknodat(int dfd, const char __user *filename, umode_t mode, - unsigned int dev); long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); int may_linkat(struct path *link); diff --git a/fs/namei.c b/fs/namei.c index d6b25dd32f4d..fde8fe086c09 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3564,7 +3564,7 @@ static int may_mknod(umode_t mode) } } -long do_mknodat(int dfd, const char __user *filename, umode_t mode, +static long do_mknodat(int dfd, const char __user *filename, umode_t mode, unsigned int dev) { struct dentry *dentry; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index d808985231f8..fa1fe7a87779 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -8,6 +8,7 @@ int __init init_chroot(const char *filename); int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); int __init init_chmod(const char *filename, umode_t mode); int __init init_eaccess(const char *filename); +int __init init_mknod(const char *filename, umode_t mode, unsigned int dev); int __init init_link(const char *oldname, const char *newname); int __init init_symlink(const char *oldname, const char *newname); int __init init_unlink(const char *pathname); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5ef77a91382a..63046c5e9fc5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1270,15 +1270,6 @@ int compat_ksys_ipc(u32 call, int first, int second, * The following kernel syscall equivalents are just wrappers to fs-internal * functions. Therefore, provide stubs to be inlined at the callsites. */ -extern long do_mknodat(int dfd, const char __user *filename, umode_t mode, - unsigned int dev); - -static inline long ksys_mknod(const char __user *filename, umode_t mode, - unsigned int dev) -{ - return do_mknodat(AT_FDCWD, filename, mode, dev); -} - extern int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); diff --git a/init/do_mounts.h b/init/do_mounts.h index 104d8431725a..7a29ac3e427b 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -17,7 +17,7 @@ extern int root_mountflags; static inline __init int create_dev(char *name, dev_t dev) { init_unlink(name); - return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); + return init_mknod(name, S_IFBLK | 0600, new_encode_dev(dev)); } #ifdef CONFIG_BLK_DEV_RAM diff --git a/init/initramfs.c b/init/initramfs.c index 0489eb65b3b8..425addaf7c69 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -355,7 +355,7 @@ static int __init do_name(void) } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { - ksys_mknod(collected, mode, rdev); + init_mknod(collected, mode, rdev); init_chown(collected, uid, gid, 0); init_chmod(collected, mode); do_utime(collected, mtime); diff --git a/init/noinitramfs.c b/init/noinitramfs.c index 94cc4df74b11..3d62b07f3bb9 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c @@ -22,8 +22,7 @@ static int __init default_rootfs(void) if (err < 0) goto out; - err = ksys_mknod((const char __user __force *) "/dev/console", - S_IFCHR | S_IRUSR | S_IWUSR, + err = init_mknod("/dev/console", S_IFCHR | S_IRUSR | S_IWUSR, new_encode_dev(MKDEV(5, 1))); if (err < 0) goto out; From 716308a5331bf907b819f9db8dc942b19568f925 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 11:15:40 +0200 Subject: [PATCH 48/50] init: add an init_stat helper Add a simple helper to stat with a kernel space file name and switch the early init code over to it. Signed-off-by: Christoph Hellwig --- drivers/md/md-autodetect.c | 3 ++- fs/init.c | 15 +++++++++++++++ include/linux/init_syscalls.h | 1 + init/initramfs.c | 3 ++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index 14b6e86814c0..6bbec89976a7 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -151,7 +152,7 @@ static void __init md_setup_drive(struct md_setup_args *args) if (strncmp(devname, "/dev/", 5) == 0) devname += 5; snprintf(comp_name, 63, "/dev/%s", devname); - if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode)) + if (init_stat(comp_name, &stat, 0) == 0 && S_ISBLK(stat.mode)) dev = new_decode_dev(stat.rdev); if (!dev) { pr_warn("md: Unknown device name: %s\n", devname); diff --git a/fs/init.c b/fs/init.c index 145fb31b7a5f..51646ba38099 100644 --- a/fs/init.c +++ b/fs/init.c @@ -122,6 +122,21 @@ int __init init_eaccess(const char *filename) return error; } +int __init init_stat(const char *filename, struct kstat *stat, int flags) +{ + int lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; + struct path path; + int error; + + error = kern_path(filename, lookup_flags, &path); + if (error) + return error; + error = vfs_getattr(&path, stat, STATX_BASIC_STATS, + flags | AT_NO_AUTOMOUNT); + path_put(&path); + return error; +} + int __init init_mknod(const char *filename, umode_t mode, unsigned int dev) { struct dentry *dentry; diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index fa1fe7a87779..b2fda50daca6 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -8,6 +8,7 @@ int __init init_chroot(const char *filename); int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); int __init init_chmod(const char *filename, umode_t mode); int __init init_eaccess(const char *filename); +int __init init_stat(const char *filename, struct kstat *stat, int flags); int __init init_mknod(const char *filename, umode_t mode, unsigned int dev); int __init init_link(const char *oldname, const char *newname); int __init init_symlink(const char *oldname, const char *newname); diff --git a/init/initramfs.c b/init/initramfs.c index 425addaf7c69..744e111baba4 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -298,7 +298,8 @@ static void __init clean_path(char *path, umode_t fmode) { struct kstat st; - if (!vfs_lstat(path, &st) && (st.mode ^ fmode) & S_IFMT) { + if (init_stat(path, &st, AT_SYMLINK_NOFOLLOW) && + (st.mode ^ fmode) & S_IFMT) { if (S_ISDIR(st.mode)) init_rmdir(path); else From 235e57935bf328c4cce371ffc4dd1d8fab4885cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jul 2020 16:05:31 +0200 Subject: [PATCH 49/50] init: add an init_utimes helper Add a simple helper to set timestamps with a kernel space file name and switch the early init code over to it. Signed-off-by: Christoph Hellwig --- fs/init.c | 13 +++++++++++++ include/linux/init_syscalls.h | 1 + init/initramfs.c | 3 +-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/init.c b/fs/init.c index 51646ba38099..db5c48a85644 100644 --- a/fs/init.c +++ b/fs/init.c @@ -238,3 +238,16 @@ int __init init_rmdir(const char *pathname) { return do_rmdir(AT_FDCWD, getname_kernel(pathname)); } + +int __init init_utimes(char *filename, struct timespec64 *ts) +{ + struct path path; + int error; + + error = kern_path(filename, 0, &path); + if (error) + return error; + error = vfs_utimes(&path, ts); + path_put(&path); + return error; +} diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index b2fda50daca6..3654b525ac0b 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -15,3 +15,4 @@ int __init init_symlink(const char *oldname, const char *newname); int __init init_unlink(const char *pathname); int __init init_mkdir(const char *pathname, umode_t mode); int __init init_rmdir(const char *pathname); +int __init init_utimes(char *filename, struct timespec64 *ts); diff --git a/init/initramfs.c b/init/initramfs.c index 744e111baba4..e6dbfb767057 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -111,8 +111,7 @@ static long __init do_utime(char *filename, time64_t mtime) t[0].tv_nsec = 0; t[1].tv_sec = mtime; t[1].tv_nsec = 0; - - return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW); + return init_utimes(filename, t); } static __initdata LIST_HEAD(dir_list); From f073531070d24bbb82cb2658952d949f4851024b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 17:49:47 +0200 Subject: [PATCH 50/50] init: add an init_dup helper Add a simple helper to grab a reference to a file and install it at the next available fd, and switch the early init code over to it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/init.c | 12 ++++++++++++ include/linux/init_syscalls.h | 1 + init/main.c | 8 ++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/init.c b/fs/init.c index db5c48a85644..e9c320a48cf1 100644 --- a/fs/init.c +++ b/fs/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -251,3 +252,14 @@ int __init init_utimes(char *filename, struct timespec64 *ts) path_put(&path); return error; } + +int __init init_dup(struct file *file) +{ + int fd; + + fd = get_unused_fd_flags(0); + if (fd < 0) + return fd; + fd_install(fd, get_file(file)); + return 0; +} diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 3654b525ac0b..92045d18cbfc 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -16,3 +16,4 @@ int __init init_unlink(const char *pathname); int __init init_mkdir(const char *pathname, umode_t mode); int __init init_rmdir(const char *pathname); int __init init_utimes(char *filename, struct timespec64 *ts); +int __init init_dup(struct file *file); diff --git a/init/main.c b/init/main.c index 1c710d3e1d46..9dae9c4f806b 100644 --- a/init/main.c +++ b/init/main.c @@ -1467,10 +1467,10 @@ void __init console_on_rootfs(void) pr_err("Warning: unable to open an initial console.\n"); return; } - get_file_rcu_many(file, 2); - fd_install(get_unused_fd_flags(0), file); - fd_install(get_unused_fd_flags(0), file); - fd_install(get_unused_fd_flags(0), file); + init_dup(file); + init_dup(file); + init_dup(file); + fput(file); } static noinline void __init kernel_init_freeable(void)