From 92b66d2cdd7a4f6f6aa31be5f16a3f0c88902690 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jul 2018 23:57:02 +0200 Subject: [PATCH 1/4] vfs: limit size of dedupe Suggested-by: Darrick J. Wong Signed-off-by: Miklos Szeredi --- fs/read_write.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 153f8f690490..f43bb12b4759 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -2003,6 +2003,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) if (off + len > i_size_read(src)) return -EINVAL; + /* Arbitrary 1G limit on a single dedupe request, can be raised. */ + len = min_t(u64, len, 1 << 30); + /* pre-format output fields to sane values */ for (i = 0; i < count; i++) { same->info[i].bytes_deduped = 0ULL; From 5740c99e9d30b81fcc478797e7215c61e241f44e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jul 2018 23:57:03 +0200 Subject: [PATCH 2/4] vfs: dedupe: return int Signed-off-by: Miklos Szeredi --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/ioctl.c | 10 +++------- fs/ocfs2/file.c | 17 ++++++----------- fs/read_write.c | 4 ++-- fs/xfs/xfs_file.c | 19 ++----------------- include/linux/fs.h | 2 +- 6 files changed, 16 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 118346aceea9..1c7c13334423 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3247,8 +3247,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); -ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, - struct file *dst_file, u64 dst_loff); +int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, + struct file *dst_file, u64 dst_loff); /* file.c */ int __init btrfs_auto_defrag_init(void); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c2837a32d689..94dc8e6c44ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3600,13 +3600,12 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, return ret; } -ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, - struct file *dst_file, u64 dst_loff) +int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, + struct file *dst_file, u64 dst_loff) { struct inode *src = file_inode(src_file); struct inode *dst = file_inode(dst_file); u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; - ssize_t res; if (WARN_ON_ONCE(bs < PAGE_SIZE)) { /* @@ -3617,10 +3616,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, return -EINVAL; } - res = btrfs_extent_same(src, loff, olen, dst, dst_loff); - if (res) - return res; - return olen; + return btrfs_extent_same(src, loff, olen, dst, dst_loff); } static int clone_finish_inode_update(struct btrfs_trans_handle *trans, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 255f758af03a..f96f018463f7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in, len, false); } -static ssize_t ocfs2_file_dedupe_range(struct file *src_file, - u64 loff, - u64 len, - struct file *dst_file, - u64 dst_loff) +static int ocfs2_file_dedupe_range(struct file *src_file, + u64 loff, + u64 len, + struct file *dst_file, + u64 dst_loff) { - int error; - - error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff, + return ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); - if (error) - return error; - return len; } const struct inode_operations ocfs2_file_iops = { diff --git a/fs/read_write.c b/fs/read_write.c index f43bb12b4759..fa64e51ef4cf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1976,7 +1976,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) u16 count = same->dest_count; struct file *dst_file; loff_t dst_off; - ssize_t deduped; + int deduped; if (!(file->f_mode & FMODE_READ)) return -EINVAL; @@ -2056,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) else if (deduped < 0) info->status = deduped; else - info->bytes_deduped += deduped; + info->bytes_deduped += len; } next_file: diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a3e7767a5715..547ef7e8aec1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -933,7 +933,7 @@ xfs_file_clone_range( len, false); } -STATIC ssize_t +STATIC int xfs_file_dedupe_range( struct file *src_file, u64 loff, @@ -941,23 +941,8 @@ xfs_file_dedupe_range( struct file *dst_file, u64 dst_loff) { - struct inode *srci = file_inode(src_file); - u64 max_dedupe; - int error; - - /* - * Since we have to read all these pages in to compare them, cut - * it off at MAX_RW_COUNT/2 rounded down to the nearest block. - * That means we won't do more than MAX_RW_COUNT IO per request. - */ - max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1); - if (len > max_dedupe) - len = max_dedupe; - error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, + return xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); - if (error) - return error; - return len; } STATIC int diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..b81c4b7e339f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1749,7 +1749,7 @@ struct file_operations { loff_t, size_t, unsigned int); int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); - ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, + int (*dedupe_file_range)(struct file *, u64, u64, struct file *, u64); } __randomize_layout; From 87eb5eb2423213ac0e7315ce5d275f1ff80e0263 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jul 2018 23:57:03 +0200 Subject: [PATCH 3/4] vfs: dedupe: rationalize args Clean up f_op->dedupe_file_range() interface. 1) Use loff_t for offsets and length instead of u64 2) Order the arguments the same way as {copy|clone}_file_range(). Signed-off-by: Miklos Szeredi Reviewed-by: Darrick J. Wong --- fs/btrfs/ctree.h | 5 +++-- fs/btrfs/ioctl.c | 7 ++++--- fs/ocfs2/file.c | 12 ++++++------ fs/read_write.c | 4 ++-- fs/xfs/xfs_file.c | 12 ++++++------ include/linux/fs.h | 2 +- 6 files changed, 22 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1c7c13334423..d9d924017dfb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3247,8 +3247,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); -int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, - struct file *dst_file, u64 dst_loff); +int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, + struct file *dst_file, loff_t dst_loff, + u64 olen); /* file.c */ int __init btrfs_auto_defrag_init(void); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 94dc8e6c44ce..755c9a306321 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3600,8 +3600,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, return ret; } -int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, - struct file *dst_file, u64 dst_loff) +int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, + struct file *dst_file, loff_t dst_loff, + u64 olen) { struct inode *src = file_inode(src_file); struct inode *dst = file_inode(dst_file); @@ -3616,7 +3617,7 @@ int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, return -EINVAL; } - return btrfs_extent_same(src, loff, olen, dst, dst_loff); + return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); } static int clone_finish_inode_update(struct btrfs_trans_handle *trans, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f96f018463f7..9fa35cb6f6e0 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2537,13 +2537,13 @@ static int ocfs2_file_clone_range(struct file *file_in, len, false); } -static int ocfs2_file_dedupe_range(struct file *src_file, - u64 loff, - u64 len, - struct file *dst_file, - u64 dst_loff) +static int ocfs2_file_dedupe_range(struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len) { - return ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff, + return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, true); } diff --git a/fs/read_write.c b/fs/read_write.c index fa64e51ef4cf..c31794f92c2c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -2049,8 +2049,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) info->status = -EINVAL; } else { deduped = dst_file->f_op->dedupe_file_range(file, off, - len, dst_file, - info->dest_offset); + dst_file, + info->dest_offset, len); if (deduped == -EBADE) info->status = FILE_DEDUPE_RANGE_DIFFERS; else if (deduped < 0) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 547ef7e8aec1..0f40ba54d83f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -935,13 +935,13 @@ xfs_file_clone_range( STATIC int xfs_file_dedupe_range( - struct file *src_file, - u64 loff, - u64 len, - struct file *dst_file, - u64 dst_loff) + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len) { - return xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, + return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, true); } diff --git a/include/linux/fs.h b/include/linux/fs.h index b81c4b7e339f..a8fee2f44981 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1749,7 +1749,7 @@ struct file_operations { loff_t, size_t, unsigned int); int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); - int (*dedupe_file_range)(struct file *, u64, u64, struct file *, + int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64); } __randomize_layout; From 1b4f42a1e33fec999e94802df13dbd7521315742 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jul 2018 23:57:03 +0200 Subject: [PATCH 4/4] vfs: dedupe: extract helper for a single dedup Extract vfs_dedupe_file_range_one() helper to deal with a single dedup request. Signed-off-by: Miklos Szeredi Reviewed-by: Christoph Hellwig --- fs/read_write.c | 89 +++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index c31794f92c2c..cce4ebac34a8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1964,6 +1964,44 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, } EXPORT_SYMBOL(vfs_dedupe_file_range_compare); +static int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + u64 len) +{ + s64 ret; + + ret = mnt_want_write_file(dst_file); + if (ret) + return ret; + + ret = clone_verify_area(dst_file, dst_pos, len, true); + if (ret < 0) + goto out_drop_write; + + ret = -EINVAL; + if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE))) + goto out_drop_write; + + ret = -EXDEV; + if (src_file->f_path.mnt != dst_file->f_path.mnt) + goto out_drop_write; + + ret = -EISDIR; + if (S_ISDIR(file_inode(dst_file)->i_mode)) + goto out_drop_write; + + ret = -EINVAL; + if (!dst_file->f_op->dedupe_file_range) + goto out_drop_write; + + ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, + dst_file, dst_pos, len); +out_drop_write: + mnt_drop_write_file(dst_file); + + return ret; +} + int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) { struct file_dedupe_range_info *info; @@ -1972,10 +2010,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) u64 len; int i; int ret; - bool is_admin = capable(CAP_SYS_ADMIN); u16 count = same->dest_count; - struct file *dst_file; - loff_t dst_off; int deduped; if (!(file->f_mode & FMODE_READ)) @@ -2013,54 +2048,28 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) } for (i = 0, info = same->info; i < count; i++, info++) { - struct inode *dst; struct fd dst_fd = fdget(info->dest_fd); + struct file *dst_file = dst_fd.file; - dst_file = dst_fd.file; if (!dst_file) { info->status = -EBADF; goto next_loop; } - dst = file_inode(dst_file); - - ret = mnt_want_write_file(dst_file); - if (ret) { - info->status = ret; - goto next_fdput; - } - - dst_off = info->dest_offset; - ret = clone_verify_area(dst_file, dst_off, len, true); - if (ret < 0) { - info->status = ret; - goto next_file; - } - ret = 0; if (info->reserved) { info->status = -EINVAL; - } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { - info->status = -EINVAL; - } else if (file->f_path.mnt != dst_file->f_path.mnt) { - info->status = -EXDEV; - } else if (S_ISDIR(dst->i_mode)) { - info->status = -EISDIR; - } else if (dst_file->f_op->dedupe_file_range == NULL) { - info->status = -EINVAL; - } else { - deduped = dst_file->f_op->dedupe_file_range(file, off, - dst_file, - info->dest_offset, len); - if (deduped == -EBADE) - info->status = FILE_DEDUPE_RANGE_DIFFERS; - else if (deduped < 0) - info->status = deduped; - else - info->bytes_deduped += len; + goto next_fdput; } -next_file: - mnt_drop_write_file(dst_file); + deduped = vfs_dedupe_file_range_one(file, off, dst_file, + info->dest_offset, len); + if (deduped == -EBADE) + info->status = FILE_DEDUPE_RANGE_DIFFERS; + else if (deduped < 0) + info->status = deduped; + else + info->bytes_deduped = len; + next_fdput: fdput(dst_fd); next_loop: