Merge branch 'iomap-for-next' into mb/dio

2024-12-16 22:46:42 +07:00 · 2019-11-05 11:31:32 -05:00 · 2019-11-05 11:31:32 -05:00 · f21bdbba0a
commit f21bdbba0a
parent 0d0a60c92f a901004214
26 changed files with 1086 additions and 915 deletions
--- a/fs/dax.c
+++ b/fs/dax.c
@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
 static loff_t
 dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct block_device *bdev = iomap->bdev;
 	struct dax_device *dax_dev = iomap->dax_dev;
@ -1247,7 +1247,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	struct inode *inode = mapping->host;
 	unsigned long vaddr = vmf->address;
 	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
-	struct iomap iomap = { 0 };
+	struct iomap iomap = { .type = IOMAP_HOLE };
 	struct iomap srcmap = { .type = IOMAP_HOLE };
 	unsigned flags = IOMAP_FAULT;
 	int error, major = 0;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	 * the file system block size to be equal the page size, which means
 	 * that we never have to deal with more than a single extent here.
 	 */
-	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
 	if (iomap_errp)
 		*iomap_errp = error;
 	if (error) {
@ -1471,7 +1472,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
 	struct inode *inode = mapping->host;
 	vm_fault_t result = VM_FAULT_FALLBACK;
-	struct iomap iomap = { 0 };
+	struct iomap iomap = { .type = IOMAP_HOLE };
 	struct iomap srcmap = { .type = IOMAP_HOLE };
 	pgoff_t max_pgoff;
 	void *entry;
 	loff_t pos;
@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	 * to look up our filesystem block.
 	 */
 	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
-	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
+	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
 			&srcmap);
 	if (error)
 		goto unlock_entry;
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 #ifdef CONFIG_FS_DAX
 static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
-		unsigned flags, struct iomap *iomap)
+		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
 {
 	unsigned int blkbits = inode->i_blkbits;
 	unsigned long first_block = offset >> blkbits;
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@ -3449,7 +3449,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
 }
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
-			    unsigned flags, struct iomap *iomap)
+		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	unsigned int blkbits = inode->i_blkbits;
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags)
 }
 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-			    unsigned flags, struct iomap *iomap)
+			    unsigned flags, struct iomap *iomap,
 			    struct iomap *srcmap)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct metapath mp = { .mp_aheight = 1, };
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@ -732,7 +732,8 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
 	if (ret)
 		goto out_uninit;
-	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
 			   is_sync_kiocb(iocb));
 	gfs2_glock_dq(&gh);
 out_uninit:
@ -767,7 +768,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	if (offset + len > i_size_read(&ip->i_inode))
 		goto out;
-	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);
+	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
 			   is_sync_kiocb(iocb));
 out:
 	gfs2_glock_dq(&gh);
--- a/fs/iomap/Makefile
+++ b/fs/iomap/Makefile
@ -3,13 +3,15 @@
 # Copyright (c) 2019 Oracle.
 # All Rights Reserved.
 #
 ccflags-y += -I $(srctree)/$(src)		# needed for trace events
 obj-$(CONFIG_FS_IOMAP)		+= iomap.o
-iomap-y				+= \
+iomap-y				+= trace.o \
-					apply.o \
+				   apply.o \
-					buffered-io.o \
+				   buffered-io.o \
-					direct-io.o \
+				   direct-io.o \
-					fiemap.o \
+				   fiemap.o \
-					seek.o
+				   seek.o
 iomap-$(CONFIG_SWAP)		+= swapfile.o
--- a/fs/iomap/apply.c
+++ b/fs/iomap/apply.c
@ -23,8 +23,10 @@ loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		const struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-	struct iomap iomap = { 0 };
+	struct iomap iomap = { .type = IOMAP_HOLE };
 	struct iomap srcmap = { .type = IOMAP_HOLE };
 	loff_t written = 0, ret;
 	u64 end;
 	/*
 	 * Need to map a range from start position for length bytes. This can
@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 	 * expose transient stale data. If the reserve fails, we can safely
 	 * back out at this point as there is nothing to undo.
 	 */
-	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
+	ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
 	if (ret)
 		return ret;
 	if (WARN_ON(iomap.offset > pos))
@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 	 * Cut down the length to the one actually provided by the filesystem,
 	 * as it might not be able to give us the whole size that we requested.
 	 */
-	if (iomap.offset + iomap.length < pos + length)
+	end = iomap.offset + iomap.length;
-		length = iomap.offset + iomap.length - pos;
+	if (srcmap.type != IOMAP_HOLE)
 		end = min(end, srcmap.offset + srcmap.length);
 	if (pos + length > end)
 		length = end - pos;
 	/*
-	 * Now that we have guaranteed that the space allocation will succeed.
+	 * Now that we have guaranteed that the space allocation will succeed,
 	 * we can do the copy-in page by page without having to worry about
 	 * failures exposing transient data.
 	 *
 	 * To support COW operations, we read in data for partially blocks from
 	 * the srcmap if the file system filled it in.  In that case we the
 	 * length needs to be limited to the earlier of the ends of the iomaps.
 	 * If the file system did not provide a srcmap we pass in the normal
 	 * iomap into the actors so that they don't need to have special
 	 * handling for the two cases.
 	 */
-	written = actor(inode, pos, length, data, &iomap);
+	written = actor(inode, pos, length, data, &iomap,
 			srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
 	/*
 	 * Now the data has been copied, commit the range we've copied.  This
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
 * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016-2018 Christoph Hellwig.
+ * Copyright (C) 2016-2019 Christoph Hellwig.
 */
 #include <linux/module.h>
 #include <linux/compiler.h>
@ -12,13 +12,34 @@
 #include <linux/buffer_head.h>
 #include <linux/dax.h>
 #include <linux/writeback.h>
 #include <linux/list_sort.h>
 #include <linux/swap.h>
 #include <linux/bio.h>
 #include <linux/sched/signal.h>
 #include <linux/migrate.h>
 #include "trace.h"
 #include "../internal.h"
 /*
 * Structure allocated for each page when block size < PAGE_SIZE to track
 * sub-page uptodate status and I/O completions.
 */
 struct iomap_page {
 	atomic_t		read_count;
 	atomic_t		write_count;
 	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
 };
 static inline struct iomap_page *to_iomap_page(struct page *page)
 {
 	if (page_has_private(page))
 		return (struct iomap_page *)page_private(page);
 	return NULL;
 }
 static struct bio_set iomap_ioend_bioset;
 static struct iomap_page *
 iomap_page_create(struct inode *inode, struct page *page)
 {
@ -203,9 +224,17 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
 	SetPageUptodate(page);
 }
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
 		struct iomap *iomap, loff_t pos)
 {
 	return iomap->type != IOMAP_MAPPED ||
 		(iomap->flags & IOMAP_F_NEW) ||
 		pos >= i_size_read(inode);
 }
 static loff_t
 iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_readpage_ctx *ctx = data;
 	struct page *page = ctx->cur_page;
@ -226,7 +255,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	if (plen == 0)
 		goto done;
-	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+	if (iomap_block_needs_zeroing(inode, iomap, pos)) {
 		zero_user(page, poff, plen);
 		iomap_set_range_uptodate(page, poff, plen);
 		goto done;
@ -293,6 +322,8 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 	unsigned poff;
 	loff_t ret;
 	trace_iomap_readpage(page->mapping->host, 1);
 	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
 		ret = iomap_apply(inode, page_offset(page) + poff,
 				PAGE_SIZE - poff, 0, ops, &ctx,
@ -351,7 +382,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
 static loff_t
 iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_readpage_ctx *ctx = data;
 	loff_t done, ret;
@ -371,7 +402,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_actor(inode, pos + done, length - done,
-				ctx, iomap);
+				ctx, iomap, srcmap);
 	}
 	return done;
@ -389,6 +420,8 @@ iomap_readpages(struct address_space *mapping, struct list_head *pages,
 	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
 	loff_t length = last - pos + PAGE_SIZE, ret = 0;
 	trace_iomap_readpages(mapping->host, nr_pages);
 	while (length > 0) {
 		ret = iomap_apply(mapping->host, pos, length, 0, ops,
 				&ctx, iomap_readpages_actor);
@ -455,6 +488,8 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
 int
 iomap_releasepage(struct page *page, gfp_t gfp_mask)
 {
 	trace_iomap_releasepage(page->mapping->host, page, 0, 0);
 	/*
 	 * mm accommodates an old ext3 case where clean pages might not have had
 	 * the dirty bit cleared. Thus, it can send actual dirty pages to
@ -470,6 +505,8 @@ EXPORT_SYMBOL_GPL(iomap_releasepage);
 void
 iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
 {
 	trace_iomap_invalidatepage(page->mapping->host, page, offset, len);
 	/*
 	 * If we are invalidating the entire page, clear the dirty state from it
 	 * and release it to avoid unnecessary buildup of the LRU.
@ -511,6 +548,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
 EXPORT_SYMBOL_GPL(iomap_migrate_page);
 #endif /* CONFIG_MIGRATION */
 enum {
 	IOMAP_WRITE_F_UNSHARE		= (1 << 0),
 };
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@ -525,19 +566,12 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 }
 static int
-iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
-		unsigned poff, unsigned plen, unsigned from, unsigned to,
+		unsigned plen, struct iomap *iomap)
 		struct iomap *iomap)
 {
 	struct bio_vec bvec;
 	struct bio bio;
 	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
 		zero_user_segments(page, poff, from, to, poff + plen);
 		iomap_set_range_uptodate(page, poff, plen);
 		return 0;
 	}
 	bio_init(&bio, &bvec, 1);
 	bio.bi_opf = REQ_OP_READ;
 	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
@ -547,15 +581,15 @@ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
 }
 static int
-__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
-		struct page *page, struct iomap *iomap)
+		struct page *page, struct iomap *srcmap)
 {
 	struct iomap_page *iop = iomap_page_create(inode, page);
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
 	unsigned from = offset_in_page(pos), to = from + len, poff, plen;
-	int status = 0;
+	int status;
 	if (PageUptodate(page))
 		return 0;
@ -566,29 +600,39 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
 		if (plen == 0)
 			break;
-		if ((from > poff && from < poff + plen) ||
+		if (!(flags & IOMAP_WRITE_F_UNSHARE) &&
-		    (to > poff && to < poff + plen)) {
+		    (from <= poff || from >= poff + plen) &&
-			status = iomap_read_page_sync(inode, block_start, page,
+		    (to <= poff || to >= poff + plen))
-					poff, plen, from, to, iomap);
+			continue;
-			if (status)
+
-				break;
+		if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
 			if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
 				return -EIO;
 			zero_user_segments(page, poff, from, to, poff + plen);
 			iomap_set_range_uptodate(page, poff, plen);
 			continue;
 		}
 		status = iomap_read_page_sync(block_start, page, poff, plen,
 				srcmap);
 		if (status)
 			return status;
 	} while ((block_start += plen) < block_end);
-	return status;
+	return 0;
 }
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, struct iomap *iomap)
+		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
 {
 	const struct iomap_page_ops *page_ops = iomap->page_ops;
 	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int status = 0;
 	BUG_ON(pos + len > iomap->offset + iomap->length);
 	if (srcmap != iomap)
 		BUG_ON(pos + len > srcmap->offset + srcmap->length);
 	if (fatal_signal_pending(current))
 		return -EINTR;
@ -599,18 +643,20 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 			return status;
 	}
-	page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
+	page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT,
 			AOP_FLAG_NOFS);
 	if (!page) {
 		status = -ENOMEM;
 		goto out_no_page;
 	}
-	if (iomap->type == IOMAP_INLINE)
+	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, iomap);
+		iomap_read_inline_data(inode, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
-		status = __block_write_begin_int(page, pos, len, NULL, iomap);
+		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
-		status = __iomap_write_begin(inode, pos, len, page, iomap);
+		status = __iomap_write_begin(inode, pos, len, flags, page,
 				srcmap);
 	if (unlikely(status))
 		goto out_unlock;
@ -656,7 +702,7 @@ EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
 static int
 __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-		unsigned copied, struct page *page, struct iomap *iomap)
+		unsigned copied, struct page *page)
 {
 	flush_dcache_page(page);
@ -696,20 +742,20 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
 }
 static int
-iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
-		unsigned copied, struct page *page, struct iomap *iomap)
+		struct page *page, struct iomap *iomap, struct iomap *srcmap)
 {
 	const struct iomap_page_ops *page_ops = iomap->page_ops;
 	loff_t old_size = inode->i_size;
 	int ret;
-	if (iomap->type == IOMAP_INLINE) {
+	if (srcmap->type == IOMAP_INLINE) {
 		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
-	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
 				page, NULL);
 	} else {
-		ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
+		ret = __iomap_write_end(inode, pos, len, copied, page);
 	}
 	/*
@ -736,12 +782,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 static loff_t
 iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iov_iter *i = data;
 	long status = 0;
 	ssize_t written = 0;
 	unsigned int flags = AOP_FLAG_NOFS;
 	do {
 		struct page *page;
@ -771,8 +816,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 			break;
 		}
-		status = iomap_write_begin(inode, pos, bytes, flags, &page,
+		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
-				iomap);
+				srcmap);
 		if (unlikely(status))
 			break;
@ -783,8 +828,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		flush_dcache_page(page);
-		status = iomap_write_end(inode, pos, bytes, copied, page,
+		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
-				iomap);
+				srcmap);
 		if (unlikely(status < 0))
 			break;
 		copied = status;
@ -835,50 +880,32 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 static struct page *
 __iomap_read_page(struct inode *inode, loff_t offset)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 	page = read_mapping_page(mapping, offset >> PAGE_SHIFT, NULL);
 	if (IS_ERR(page))
 		return page;
 	if (!PageUptodate(page)) {
 		put_page(page);
 		return ERR_PTR(-EIO);
 	}
 	return page;
 }
 static loff_t
-iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	long status = 0;
 	ssize_t written = 0;
 	/* don't bother with blocks that are not shared to start with */
 	if (!(iomap->flags & IOMAP_F_SHARED))
 		return length;
 	/* don't bother with holes or unwritten extents */
 	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
 		return length;
 	do {
-		struct page *page, *rpage;
+		unsigned long offset = offset_in_page(pos);
-		unsigned long offset;	/* Offset into pagecache page */
+		unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
-		unsigned long bytes;	/* Bytes to write to page */
+		struct page *page;
 		offset = offset_in_page(pos);
 		bytes = min_t(loff_t, PAGE_SIZE - offset, length);
 		rpage = __iomap_read_page(inode, pos);
 		if (IS_ERR(rpage))
 			return PTR_ERR(rpage);
 		status = iomap_write_begin(inode, pos, bytes,
-					   AOP_FLAG_NOFS, &page, iomap);
+				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
 		put_page(rpage);
 		if (unlikely(status))
 			return status;
-		WARN_ON_ONCE(!PageUptodate(page));
+		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
-
+				srcmap);
 		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 		if (unlikely(status <= 0)) {
 			if (WARN_ON_ONCE(status == 0))
 				return -EIO;
@ -898,14 +925,14 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 }
 int
-iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
+iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops)
 {
 	loff_t ret;
 	while (len) {
 		ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL,
-				iomap_dirty_actor);
+				iomap_unshare_actor);
 		if (ret <= 0)
 			return ret;
 		pos += ret;
@ -914,23 +941,22 @@ iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 	return 0;
 }
-EXPORT_SYMBOL_GPL(iomap_file_dirty);
+EXPORT_SYMBOL_GPL(iomap_file_unshare);
 static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
-		unsigned bytes, struct iomap *iomap)
+		unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct page *page;
 	int status;
-	status = iomap_write_begin(inode, pos, bytes, AOP_FLAG_NOFS, &page,
+	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
 				   iomap);
 	if (status)
 		return status;
 	zero_user(page, offset, bytes);
 	mark_page_accessed(page);
-	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
+	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
 }
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
@ -942,14 +968,14 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
 static loff_t
 iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	bool *did_zero = data;
 	loff_t written = 0;
 	int status;
 	/* already zeroed?  we're done. */
-	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
 		return count;
 	do {
@ -961,7 +987,8 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
 		if (IS_DAX(inode))
 			status = iomap_dax_zero(pos, offset, bytes, iomap);
 		else
-			status = iomap_zero(inode, pos, offset, bytes, iomap);
+			status = iomap_zero(inode, pos, offset, bytes, iomap,
 					srcmap);
 		if (status < 0)
 			return status;
@ -1011,7 +1038,7 @@ EXPORT_SYMBOL_GPL(iomap_truncate_page);
 static loff_t
 iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct page *page = data;
 	int ret;
@ -1071,3 +1098,551 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 	return block_page_mkwrite_return(ret);
 }
 EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
 static void
 iomap_finish_page_writeback(struct inode *inode, struct page *page,
 		int error)
 {
 	struct iomap_page *iop = to_iomap_page(page);
 	if (error) {
 		SetPageError(page);
 		mapping_set_error(inode->i_mapping, -EIO);
 	}
 	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
 	WARN_ON_ONCE(iop && atomic_read(&iop->write_count) <= 0);
 	if (!iop || atomic_dec_and_test(&iop->write_count))
 		end_page_writeback(page);
 }
 /*
 * We're now finished for good with this ioend structure.  Update the page
 * state, release holds on bios, and finally free up memory.  Do not use the
 * ioend after this.
 */
 static void
 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
 {
 	struct inode *inode = ioend->io_inode;
 	struct bio *bio = &ioend->io_inline_bio;
 	struct bio *last = ioend->io_bio, *next;
 	u64 start = bio->bi_iter.bi_sector;
 	bool quiet = bio_flagged(bio, BIO_QUIET);
 	for (bio = &ioend->io_inline_bio; bio; bio = next) {
 		struct bio_vec *bv;
 		struct bvec_iter_all iter_all;
 		/*
 		 * For the last bio, bi_private points to the ioend, so we
 		 * need to explicitly end the iteration here.
 		 */
 		if (bio == last)
 			next = NULL;
 		else
 			next = bio->bi_private;
 		/* walk each page on bio, ending page IO on them */
 		bio_for_each_segment_all(bv, bio, iter_all)
 			iomap_finish_page_writeback(inode, bv->bv_page, error);
 		bio_put(bio);
 	}
 	if (unlikely(error && !quiet)) {
 		printk_ratelimited(KERN_ERR
 "%s: writeback error on inode %lu, offset %lld, sector %llu",
 			inode->i_sb->s_id, inode->i_ino, ioend->io_offset,
 			start);
 	}
 }
 void
 iomap_finish_ioends(struct iomap_ioend *ioend, int error)
 {
 	struct list_head tmp;
 	list_replace_init(&ioend->io_list, &tmp);
 	iomap_finish_ioend(ioend, error);
 	while (!list_empty(&tmp)) {
 		ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
 		list_del_init(&ioend->io_list);
 		iomap_finish_ioend(ioend, error);
 	}
 }
 EXPORT_SYMBOL_GPL(iomap_finish_ioends);
 /*
 * We can merge two adjacent ioends if they have the same set of work to do.
 */
 static bool
 iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
 {
 	if (ioend->io_bio->bi_status != next->io_bio->bi_status)
 		return false;
 	if ((ioend->io_flags & IOMAP_F_SHARED) ^
 	    (next->io_flags & IOMAP_F_SHARED))
 		return false;
 	if ((ioend->io_type == IOMAP_UNWRITTEN) ^
 	    (next->io_type == IOMAP_UNWRITTEN))
 		return false;
 	if (ioend->io_offset + ioend->io_size != next->io_offset)
 		return false;
 	return true;
 }
 void
 iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends,
 		void (*merge_private)(struct iomap_ioend *ioend,
 				struct iomap_ioend *next))
 {
 	struct iomap_ioend *next;
 	INIT_LIST_HEAD(&ioend->io_list);
 	while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
 			io_list))) {
 		if (!iomap_ioend_can_merge(ioend, next))
 			break;
 		list_move_tail(&next->io_list, &ioend->io_list);
 		ioend->io_size += next->io_size;
 		if (next->io_private && merge_private)
 			merge_private(ioend, next);
 	}
 }
 EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
 static int
 iomap_ioend_compare(void *priv, struct list_head *a, struct list_head *b)
 {
 	struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
 	struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
 	if (ia->io_offset < ib->io_offset)
 		return -1;
 	if (ia->io_offset > ib->io_offset)
 		return 1;
 	return 0;
 }
 void
 iomap_sort_ioends(struct list_head *ioend_list)
 {
 	list_sort(NULL, ioend_list, iomap_ioend_compare);
 }
 EXPORT_SYMBOL_GPL(iomap_sort_ioends);
 static void iomap_writepage_end_bio(struct bio *bio)
 {
 	struct iomap_ioend *ioend = bio->bi_private;
 	iomap_finish_ioend(ioend, blk_status_to_errno(bio->bi_status));
 }
 /*
 * Submit the final bio for an ioend.
 *
 * If @error is non-zero, it means that we have a situation where some part of
 * the submission process has failed after we have marked paged for writeback
 * and unlocked them.  In this situation, we need to fail the bio instead of
 * submitting it.  This typically only happens on a filesystem shutdown.
 */
 static int
 iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
 		int error)
 {
 	ioend->io_bio->bi_private = ioend;
 	ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
 	if (wpc->ops->prepare_ioend)
 		error = wpc->ops->prepare_ioend(ioend, error);
 	if (error) {
 		/*
 		 * If we are failing the IO now, just mark the ioend with an
 		 * error and finish it.  This will run IO completion immediately
 		 * as there is only one reference to the ioend at this point in
 		 * time.
 		 */
 		ioend->io_bio->bi_status = errno_to_blk_status(error);
 		bio_endio(ioend->io_bio);
 		return error;
 	}
 	submit_bio(ioend->io_bio);
 	return 0;
 }
 static struct iomap_ioend *
 iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
 		loff_t offset, sector_t sector, struct writeback_control *wbc)
 {
 	struct iomap_ioend *ioend;
 	struct bio *bio;
 	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &iomap_ioend_bioset);
 	bio_set_dev(bio, wpc->iomap.bdev);
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 	bio->bi_write_hint = inode->i_write_hint;
 	wbc_init_bio(wbc, bio);
 	ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
 	INIT_LIST_HEAD(&ioend->io_list);
 	ioend->io_type = wpc->iomap.type;
 	ioend->io_flags = wpc->iomap.flags;
 	ioend->io_inode = inode;
 	ioend->io_size = 0;
 	ioend->io_offset = offset;
 	ioend->io_private = NULL;
 	ioend->io_bio = bio;
 	return ioend;
 }
 /*
 * Allocate a new bio, and chain the old bio to the new one.
 *
 * Note that we have to do perform the chaining in this unintuitive order
 * so that the bi_private linkage is set up in the right direction for the
 * traversal in iomap_finish_ioend().
 */
 static struct bio *
 iomap_chain_bio(struct bio *prev)
 {
 	struct bio *new;
 	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
 	bio_copy_dev(new, prev);/* also copies over blkcg information */
 	new->bi_iter.bi_sector = bio_end_sector(prev);
 	new->bi_opf = prev->bi_opf;
 	new->bi_write_hint = prev->bi_write_hint;
 	bio_chain(prev, new);
 	bio_get(prev);		/* for iomap_finish_ioend */
 	submit_bio(prev);
 	return new;
 }
 static bool
 iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
 		sector_t sector)
 {
 	if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
 	    (wpc->ioend->io_flags & IOMAP_F_SHARED))
 		return false;
 	if (wpc->iomap.type != wpc->ioend->io_type)
 		return false;
 	if (offset != wpc->ioend->io_offset + wpc->ioend->io_size)
 		return false;
 	if (sector != bio_end_sector(wpc->ioend->io_bio))
 		return false;
 	return true;
 }
 /*
 * Test to see if we have an existing ioend structure that we could append to
 * first, otherwise finish off the current ioend and start another.
 */
 static void
 iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
 		struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct list_head *iolist)
 {
 	sector_t sector = iomap_sector(&wpc->iomap, offset);
 	unsigned len = i_blocksize(inode);
 	unsigned poff = offset & (PAGE_SIZE - 1);
 	bool merged, same_page = false;
 	if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
 		if (wpc->ioend)
 			list_add(&wpc->ioend->io_list, iolist);
 		wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc);
 	}
 	merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
 			&same_page);
 	if (iop && !same_page)
 		atomic_inc(&iop->write_count);
 	if (!merged) {
 		if (bio_full(wpc->ioend->io_bio, len)) {
 			wpc->ioend->io_bio =
 				iomap_chain_bio(wpc->ioend->io_bio);
 		}
 		bio_add_page(wpc->ioend->io_bio, page, len, poff);
 	}
 	wpc->ioend->io_size += len;
 	wbc_account_cgroup_owner(wbc, page, len);
 }
 /*
 * We implement an immediate ioend submission policy here to avoid needing to
 * chain multiple ioends and hence nest mempool allocations which can violate
 * forward progress guarantees we need to provide. The current ioend we are
 * adding blocks to is cached on the writepage context, and if the new block
 * does not append to the cached ioend it will create a new ioend and cache that
 * instead.
 *
 * If a new ioend is created and cached, the old ioend is returned and queued
 * locally for submission once the entire page is processed or an error has been
 * detected.  While ioends are submitted immediately after they are completed,
 * batching optimisations are provided by higher level block plugging.
 *
 * At the end of a writeback pass, there will be a cached ioend remaining on the
 * writepage context that the caller will need to submit.
 */
 static int
 iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct inode *inode,
 		struct page *page, u64 end_offset)
 {
 	struct iomap_page *iop = to_iomap_page(page);
 	struct iomap_ioend *ioend, *next;
 	unsigned len = i_blocksize(inode);
 	u64 file_offset; /* file offset of page */
 	int error = 0, count = 0, i;
 	LIST_HEAD(submit_list);
 	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
 	WARN_ON_ONCE(iop && atomic_read(&iop->write_count) != 0);
 	/*
 	 * Walk through the page to find areas to write back. If we run off the
 	 * end of the current map or find the current map invalid, grab a new
 	 * one.
 	 */
 	for (i = 0, file_offset = page_offset(page);
 	     i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
 	     i++, file_offset += len) {
 		if (iop && !test_bit(i, iop->uptodate))
 			continue;
 		error = wpc->ops->map_blocks(wpc, inode, file_offset);
 		if (error)
 			break;
 		if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE))
 			continue;
 		if (wpc->iomap.type == IOMAP_HOLE)
 			continue;
 		iomap_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
 				 &submit_list);
 		count++;
 	}
 	WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
 	WARN_ON_ONCE(!PageLocked(page));
 	WARN_ON_ONCE(PageWriteback(page));
 	/*
 	 * We cannot cancel the ioend directly here on error.  We may have
 	 * already set other pages under writeback and hence we have to run I/O
 	 * completion to mark the error state of the pages under writeback
 	 * appropriately.
 	 */
 	if (unlikely(error)) {
 		if (!count) {
 			/*
 			 * If the current page hasn't been added to ioend, it
 			 * won't be affected by I/O completions and we must
 			 * discard and unlock it right here.
 			 */
 			if (wpc->ops->discard_page)
 				wpc->ops->discard_page(page);
 			ClearPageUptodate(page);
 			unlock_page(page);
 			goto done;
 		}
 		/*
 		 * If the page was not fully cleaned, we need to ensure that the
 		 * higher layers come back to it correctly.  That means we need
 		 * to keep the page dirty, and for WB_SYNC_ALL writeback we need
 		 * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
 		 * so another attempt to write this page in this writeback sweep
 		 * will be made.
 		 */
 		set_page_writeback_keepwrite(page);
 	} else {
 		clear_page_dirty_for_io(page);
 		set_page_writeback(page);
 	}
 	unlock_page(page);
 	/*
 	 * Preserve the original error if there was one, otherwise catch
 	 * submission errors here and propagate into subsequent ioend
 	 * submissions.
 	 */
 	list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
 		int error2;
 		list_del_init(&ioend->io_list);
 		error2 = iomap_submit_ioend(wpc, ioend, error);
 		if (error2 && !error)
 			error = error2;
 	}
 	/*
 	 * We can end up here with no error and nothing to write only if we race
 	 * with a partial page truncate on a sub-page block sized filesystem.
 	 */
 	if (!count)
 		end_page_writeback(page);
 done:
 	mapping_set_error(page->mapping, error);
 	return error;
 }
 /*
 * Write out a dirty page.
 *
 * For delalloc space on the page we need to allocate space and flush it.
 * For unwritten space on the page we need to start the conversion to
 * regular allocated space.
 */
 static int
 iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
 {
 	struct iomap_writepage_ctx *wpc = data;
 	struct inode *inode = page->mapping->host;
 	pgoff_t end_index;
 	u64 end_offset;
 	loff_t offset;
 	trace_iomap_writepage(inode, page, 0, 0);
 	/*
 	 * Refuse to write the page out if we are called from reclaim context.
 	 *
 	 * This avoids stack overflows when called from deeply used stacks in
 	 * random callers for direct reclaim or memcg reclaim.  We explicitly
 	 * allow reclaim from kswapd as the stack usage there is relatively low.
 	 *
 	 * This should never happen except in the case of a VM regression so
 	 * warn about it.
 	 */
 	if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
 			PF_MEMALLOC))
 		goto redirty;
 	/*
 	 * Given that we do not allow direct reclaim to call us, we should
 	 * never be called in a recursive filesystem reclaim context.
 	 */
 	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
 		goto redirty;
 	/*
 	 * Is this page beyond the end of the file?
 	 *
 	 * The page index is less than the end_index, adjust the end_offset
 	 * to the highest offset that this page should represent.
 	 * -----------------------------------------------------
 	 * |			file mapping	       | <EOF> |
 	 * -----------------------------------------------------
 	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
 	 * ^--------------------------------^----------|--------
 	 * |     desired writeback range    |      see else    |
 	 * ---------------------------------^------------------|
 	 */
 	offset = i_size_read(inode);
 	end_index = offset >> PAGE_SHIFT;
 	if (page->index < end_index)
 		end_offset = (loff_t)(page->index + 1) << PAGE_SHIFT;
 	else {
 		/*
 		 * Check whether the page to write out is beyond or straddles
 		 * i_size or not.
 		 * -------------------------------------------------------
 		 * |		file mapping		        | <EOF>  |
 		 * -------------------------------------------------------
 		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
 		 * ^--------------------------------^-----------|---------
 		 * |				    |      Straddles     |
 		 * ---------------------------------^-----------|--------|
 		 */
 		unsigned offset_into_page = offset & (PAGE_SIZE - 1);
 		/*
 		 * Skip the page if it is fully outside i_size, e.g. due to a
 		 * truncate operation that is in progress. We must redirty the
 		 * page so that reclaim stops reclaiming it. Otherwise
 		 * iomap_vm_releasepage() is called on it and gets confused.
 		 *
 		 * Note that the end_index is unsigned long, it would overflow
 		 * if the given offset is greater than 16TB on 32-bit system
 		 * and if we do check the page is fully outside i_size or not
 		 * via "if (page->index >= end_index + 1)" as "end_index + 1"
 		 * will be evaluated to 0.  Hence this page will be redirtied
 		 * and be written out repeatedly which would result in an
 		 * infinite loop, the user program that perform this operation
 		 * will hang.  Instead, we can verify this situation by checking
 		 * if the page to write is totally beyond the i_size or if it's
 		 * offset is just equal to the EOF.
 		 */
 		if (page->index > end_index ||
 		    (page->index == end_index && offset_into_page == 0))
 			goto redirty;
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
 		 * and every writepage invocation because it may be mmapped.
 		 * "A file is mapped in multiples of the page size.  For a file
 		 * that is not a multiple of the page size, the remaining
 		 * memory is zeroed when mapped, and writes to that region are
 		 * not written out to the file."
 		 */
 		zero_user_segment(page, offset_into_page, PAGE_SIZE);
 		/* Adjust the end_offset to the end of file */
 		end_offset = offset;
 	}
 	return iomap_writepage_map(wpc, wbc, inode, page, end_offset);
 redirty:
 	redirty_page_for_writepage(wbc, page);
 	unlock_page(page);
 	return 0;
 }
 int
 iomap_writepage(struct page *page, struct writeback_control *wbc,
 		struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops)
 {
 	int ret;
 	wpc->ops = ops;
 	ret = iomap_do_writepage(page, wbc, wpc);
 	if (!wpc->ioend)
 		return ret;
 	return iomap_submit_ioend(wpc, wpc->ioend, ret);
 }
 EXPORT_SYMBOL_GPL(iomap_writepage);
 int
 iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
 		struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops)
 {
 	int			ret;
 	wpc->ops = ops;
 	ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc);
 	if (!wpc->ioend)
 		return ret;
 	return iomap_submit_ioend(wpc, wpc->ioend, ret);
 }
 EXPORT_SYMBOL_GPL(iomap_writepages);
 static int __init iomap_init(void)
 {
 	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
 			   offsetof(struct iomap_ioend, io_inline_bio),
 			   BIOSET_NEED_BVECS);
 }
 fs_initcall(iomap_init);
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 static loff_t
 iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_dio *dio = data;
@ -392,7 +392,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 */
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		bool wait_for_completion)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = file_inode(iocb->ki_filp);
@ -400,7 +401,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	loff_t pos = iocb->ki_pos, start = pos;
 	loff_t end = iocb->ki_pos + count - 1, ret = 0;
 	unsigned int flags = IOMAP_DIRECT;
 	bool wait_for_completion = is_sync_kiocb(iocb);
 	struct blk_plug plug;
 	struct iomap_dio *dio;
@ -409,6 +409,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (!count)
 		return 0;
 	if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
 		return -EIO;
 	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
 	if (!dio)
 		return -ENOMEM;
@ -430,7 +433,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		if (pos >= dio->i_size)
 			goto out_free_dio;
-		if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ)
+		if (iter_is_iovec(iter))
 			dio->flags |= IOMAP_DIO_DIRTY;
 	} else {
 		flags |= IOMAP_WRITE;
--- a/fs/iomap/fiemap.c
+++ b/fs/iomap/fiemap.c
@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
 static loff_t
 iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct fiemap_ctx *ctx = data;
 	loff_t ret = length;
@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap);
 static loff_t
 iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	sector_t *bno = data, addr;
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@ -119,7 +119,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
 static loff_t
 iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
-		      void *data, struct iomap *iomap)
+		      void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	switch (iomap->type) {
 	case IOMAP_UNWRITTEN:
@ -165,7 +165,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_hole);
 static loff_t
 iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
-		      void *data, struct iomap *iomap)
+		      void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	switch (iomap->type) {
 	case IOMAP_HOLE:
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@ -76,7 +76,8 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
 * distinction between written and unwritten extents.
 */
 static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
-		loff_t count, void *data, struct iomap *iomap)
+		loff_t count, void *data, struct iomap *iomap,
 		struct iomap *srcmap)
 {
 	struct iomap_swapfile_info *isi = data;
 	int error;
--- a/fs/iomap/trace.c
+++ b/fs/iomap/trace.c
@ -0,0 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
 * Copyright (c) 2019 Christoph Hellwig
 */
 #include <linux/iomap.h>
 /*
 * We include this last to have the helpers above available for the trace
 * event implementations.
 */
 #define CREATE_TRACE_POINTS
 #include "trace.h"
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@ -0,0 +1,88 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright (c) 2009-2019 Christoph Hellwig
 *
 * NOTE: none of these tracepoints shall be consider a stable kernel ABI
 * as they can change at any time.
 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM iomap
 #if !defined(_IOMAP_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _IOMAP_TRACE_H
 #include <linux/tracepoint.h>
 struct inode;
 DECLARE_EVENT_CLASS(iomap_readpage_class,
 	TP_PROTO(struct inode *inode, int nr_pages),
 	TP_ARGS(inode, nr_pages),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(u64, ino)
 		__field(int, nr_pages)
 	),
 	TP_fast_assign(
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = inode->i_ino;
 		__entry->nr_pages = nr_pages;
 	),
 	TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->nr_pages)
 )
 #define DEFINE_READPAGE_EVENT(name)		\
 DEFINE_EVENT(iomap_readpage_class, name,	\
 	TP_PROTO(struct inode *inode, int nr_pages), \
 	TP_ARGS(inode, nr_pages))
 DEFINE_READPAGE_EVENT(iomap_readpage);
 DEFINE_READPAGE_EVENT(iomap_readpages);
 DECLARE_EVENT_CLASS(iomap_page_class,
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
 		 unsigned int len),
 	TP_ARGS(inode, page, off, len),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(u64, ino)
 		__field(pgoff_t, pgoff)
 		__field(loff_t, size)
 		__field(unsigned long, offset)
 		__field(unsigned int, length)
 	),
 	TP_fast_assign(
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = inode->i_ino;
 		__entry->pgoff = page_offset(page);
 		__entry->size = i_size_read(inode);
 		__entry->offset = off;
 		__entry->length = len;
 	),
 	TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
 		  "length %x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->pgoff,
 		  __entry->size,
 		  __entry->offset,
 		  __entry->length)
 )
 #define DEFINE_PAGE_EVENT(name)		\
 DEFINE_EVENT(iomap_page_class, name,	\
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \
 		 unsigned int len),	\
 	TP_ARGS(inode, page, off, len))
 DEFINE_PAGE_EVENT(iomap_writepage);
 DEFINE_PAGE_EVENT(iomap_releasepage);
 DEFINE_PAGE_EVENT(iomap_invalidatepage);
 #endif /* _IOMAP_TRACE_H */
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace
 #include <trace/define_trace.h>
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@ -34,6 +34,7 @@
 #include "xfs_ag_resv.h"
 #include "xfs_refcount.h"
 #include "xfs_icache.h"
 #include "xfs_iomap.h"
 kmem_zone_t		*xfs_bmap_free_item_zone;
@ -4456,16 +4457,21 @@ int
 xfs_bmapi_convert_delalloc(
 	struct xfs_inode	*ip,
 	int			whichfork,
-	xfs_fileoff_t		offset_fsb,
+	xfs_off_t		offset,
-	struct xfs_bmbt_irec	*imap,
+	struct iomap		*iomap,
 	unsigned int		*seq)
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_mount	*mp = ip->i_mount;
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	struct xfs_bmalloca	bma = { NULL };
 	u16			flags = 0;
 	struct xfs_trans	*tp;
 	int			error;
 	if (whichfork == XFS_COW_FORK)
 		flags |= IOMAP_F_SHARED;
 	/*
 	 * Space for the extent and indirect blocks was reserved when the
 	 * delalloc extent was created so there's no need to do so here.
@ -4495,7 +4501,7 @@ xfs_bmapi_convert_delalloc(
 	 * the extent.  Just return the real extent at this offset.
 	 */
 	if (!isnullstartblock(bma.got.br_startblock)) {
-		*imap = bma.got;
+		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
 		*seq = READ_ONCE(ifp->if_seq);
 		goto out_trans_cancel;
 	}
@ -4528,7 +4534,7 @@ xfs_bmapi_convert_delalloc(
 	XFS_STATS_INC(mp, xs_xstrat_quick);
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
-	*imap = bma.got;
+	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
 	*seq = READ_ONCE(ifp->if_seq);
 	if (whichfork == XFS_COW_FORK)
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@ -228,8 +228,7 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
 		int eof);
 int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
-		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
+		xfs_off_t offset, struct iomap *iomap, unsigned int *seq);
 		unsigned int *seq);
 int	xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
 		struct xfs_inode *ip, int whichfork,
 		struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp,
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@ -6,23 +6,6 @@
 #ifndef __XFS_AOPS_H__
 #define __XFS_AOPS_H__
 extern struct bio_set xfs_ioend_bioset;
 /*
 * Structure for buffered I/O completions.
 */
 struct xfs_ioend {
 	struct list_head	io_list;	/* next ioend in chain */
 	int			io_fork;	/* inode fork written back */
 	xfs_exntst_t		io_state;	/* extent state */
 	struct inode		*io_inode;	/* file being written to */
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
 	struct xfs_trans	*io_append_trans;/* xact. for size update */
 	struct bio		*io_bio;	/* bio being built */
 	struct bio		io_inline_bio;	/* MUST BE LAST! */
 };
 extern const struct address_space_operations xfs_address_space_operations;
 extern const struct address_space_operations xfs_dax_aops;
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@ -188,7 +188,7 @@ xfs_file_dio_aio_read(
 	file_accessed(iocb->ki_filp);
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+	ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL, is_sync_kiocb(iocb));
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
@ -547,15 +547,12 @@ xfs_file_dio_aio_write(
 	}
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
 	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
 	/*
-	 * If unaligned, this is the only IO in-flight. If it has not yet
+	 * If unaligned, this is the only IO in-flight. Wait on it before we
-	 * completed, wait on it before we release the iolock to prevent
+	 * release the iolock to prevent subsequent overlapping IO.
 	 * subsequent overlapping IO.
 	 */
-	if (ret == -EIOCBQUEUED && unaligned_io)
+	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops,
-		inode_dio_wait(inode);
+			   is_sync_kiocb(iocb) || unaligned_io);
 out:
 	xfs_iunlock(ip, iolock);
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@ -54,7 +54,7 @@ xfs_bmbt_to_iomap(
 	struct xfs_inode	*ip,
 	struct iomap		*iomap,
 	struct xfs_bmbt_irec	*imap,
-	bool			shared)
+	u16			flags)
 {
 	struct xfs_mount	*mp = ip->i_mount;
@ -79,12 +79,11 @@ xfs_bmbt_to_iomap(
 	iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
 	iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
 	iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
 	iomap->flags = flags;
 	if (xfs_ipincount(ip) &&
 	    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
 		iomap->flags |= IOMAP_F_DIRTY;
 	if (shared)
 		iomap->flags |= IOMAP_F_SHARED;
 	return 0;
 }
@ -540,6 +539,7 @@ xfs_file_iomap_begin_delay(
 	struct xfs_iext_cursor	icur, ccur;
 	xfs_fsblock_t		prealloc_blocks = 0;
 	bool			eof = false, cow_eof = false, shared = false;
 	u16			iomap_flags = 0;
 	int			whichfork = XFS_DATA_FORK;
 	int			error = 0;
@ -707,22 +707,28 @@ xfs_file_iomap_begin_delay(
 	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
 	 * them out if the write happens to fail.
 	 */
-	iomap->flags |= IOMAP_F_NEW;
+	if (whichfork == XFS_DATA_FORK) {
-	trace_xfs_iomap_alloc(ip, offset, count, whichfork,
+		iomap_flags |= IOMAP_F_NEW;
-			whichfork == XFS_DATA_FORK ? &imap : &cmap);
+		trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap);
 	} else {
 		trace_xfs_iomap_alloc(ip, offset, count, whichfork, &cmap);
 	}
 done:
 	if (whichfork == XFS_COW_FORK) {
 		if (imap.br_startoff > offset_fsb) {
 			xfs_trim_extent(&cmap, offset_fsb,
 					imap.br_startoff - offset_fsb);
-			error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true);
+			error = xfs_bmbt_to_iomap(ip, iomap, &cmap,
 					IOMAP_F_SHARED);
 			goto out_unlock;
 		}
 		/* ensure we only report blocks we have a reservation for */
 		xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount);
 		shared = true;
 	}
-	error = xfs_bmbt_to_iomap(ip, iomap, &imap, shared);
+	if (shared)
 		iomap_flags |= IOMAP_F_SHARED;
 	error = xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
@ -922,7 +928,8 @@ xfs_file_iomap_begin(
 	loff_t			offset,
 	loff_t			length,
 	unsigned		flags,
-	struct iomap		*iomap)
+	struct iomap		*iomap,
 	struct iomap		*srcmap)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@ -930,6 +937,7 @@ xfs_file_iomap_begin(
 	xfs_fileoff_t		offset_fsb, end_fsb;
 	int			nimaps = 1, error = 0;
 	bool			shared = false;
 	u16			iomap_flags = 0;
 	unsigned		lockmode;
 	if (XFS_FORCED_SHUTDOWN(mp))
@ -1045,11 +1053,20 @@ xfs_file_iomap_begin(
 	if (error)
 		return error;
-	iomap->flags |= IOMAP_F_NEW;
+	iomap_flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
 out_finish:
-	return xfs_bmbt_to_iomap(ip, iomap, &imap, shared);
+	/*
 	 * Writes that span EOF might trigger an IO size update on completion,
 	 * so consider them to be dirty for the purposes of O_DSYNC even if
 	 * there is no other metadata changes pending or have been made here.
 	 */
 	if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode))
 		iomap_flags |= IOMAP_F_DIRTY;
 	if (shared)
 		iomap_flags |= IOMAP_F_SHARED;
 	return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
 out_found:
 	ASSERT(nimaps);
@ -1145,7 +1162,8 @@ xfs_seek_iomap_begin(
 	loff_t			offset,
 	loff_t			length,
 	unsigned		flags,
-	struct iomap		*iomap)
+	struct iomap		*iomap,
 	struct iomap		*srcmap)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@ -1193,7 +1211,7 @@ xfs_seek_iomap_begin(
 		if (data_fsb < cow_fsb + cmap.br_blockcount)
 			end_fsb = min(end_fsb, data_fsb);
 		xfs_trim_extent(&cmap, offset_fsb, end_fsb);
-		error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true);
+		error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
 		/*
 		 * This is a COW extent, so we must probe the page cache
 		 * because there could be dirty page cache being backed
@ -1215,7 +1233,7 @@ xfs_seek_iomap_begin(
 	imap.br_state = XFS_EXT_NORM;
 done:
 	xfs_trim_extent(&imap, offset_fsb, end_fsb);
-	error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
 out_unlock:
 	xfs_iunlock(ip, lockmode);
 	return error;
@ -1231,7 +1249,8 @@ xfs_xattr_iomap_begin(
 	loff_t			offset,
 	loff_t			length,
 	unsigned		flags,
-	struct iomap		*iomap)
+	struct iomap		*iomap,
 	struct iomap		*srcmap)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@ -1261,7 +1280,7 @@ xfs_xattr_iomap_begin(
 	if (error)
 		return error;
 	ASSERT(nimaps);
-	return xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+	return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
 }
 const struct iomap_ops xfs_xattr_iomap_ops = {
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@ -16,7 +16,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-		struct xfs_bmbt_irec *, bool shared);
+		struct xfs_bmbt_irec *, u16);
 xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize);
 static inline xfs_filblks_t
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@ -178,7 +178,7 @@ xfs_fs_map_blocks(
 	}
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
 	*device_generation = mp->m_generation;
 	return error;
 out_unlock:
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@ -1442,7 +1442,7 @@ xfs_reflink_dirty_extents(
 			flen = XFS_FSB_TO_B(mp, rlen);
 			if (fpos + flen > isize)
 				flen = isize - fpos;
-			error = iomap_file_dirty(VFS_I(ip), fpos, flen,
+			error = iomap_file_unshare(VFS_I(ip), fpos, flen,
 					&xfs_iomap_ops);
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
 			if (error)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@ -40,7 +40,6 @@
 #include <linux/parser.h>
 static const struct super_operations xfs_super_operations;
 struct bio_set xfs_ioend_bioset;
 static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
 #ifdef DEBUG
@ -1853,15 +1852,10 @@ MODULE_ALIAS_FS("xfs");
 STATIC int __init
 xfs_init_zones(void)
 {
 	if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
 			offsetof(struct xfs_ioend, io_inline_bio),
 			BIOSET_NEED_BVECS))
 		goto out;
 	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
 						"xfs_log_ticket");
 	if (!xfs_log_ticket_zone)
-		goto out_free_ioend_bioset;
+		goto out;
 	xfs_bmap_free_item_zone = kmem_zone_init(
 			sizeof(struct xfs_extent_free_item),
@ -1996,8 +1990,6 @@ xfs_init_zones(void)
 	kmem_zone_destroy(xfs_bmap_free_item_zone);
 out_destroy_log_ticket_zone:
 	kmem_zone_destroy(xfs_log_ticket_zone);
 out_free_ioend_bioset:
 	bioset_exit(&xfs_ioend_bioset);
 out:
 	return -ENOMEM;
 }
@ -2028,7 +2020,6 @@ xfs_destroy_zones(void)
 	kmem_zone_destroy(xfs_btree_cur_zone);
 	kmem_zone_destroy(xfs_bmap_free_item_zone);
 	kmem_zone_destroy(xfs_log_ticket_zone);
 	bioset_exit(&xfs_ioend_bioset);
 }
 STATIC int __init
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@ -1158,71 +1158,6 @@ DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_dax_write);
 DECLARE_EVENT_CLASS(xfs_page_class,
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
 		 unsigned int len),
 	TP_ARGS(inode, page, off, len),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(pgoff_t, pgoff)
 		__field(loff_t, size)
 		__field(unsigned long, offset)
 		__field(unsigned int, length)
 	),
 	TP_fast_assign(
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = XFS_I(inode)->i_ino;
 		__entry->pgoff = page_offset(page);
 		__entry->size = i_size_read(inode);
 		__entry->offset = off;
 		__entry->length = len;
 	),
 	TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
 		  "length %x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->pgoff,
 		  __entry->size,
 		  __entry->offset,
 		  __entry->length)
 )
 #define DEFINE_PAGE_EVENT(name)		\
 DEFINE_EVENT(xfs_page_class, name,	\
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \
 		 unsigned int len),	\
 	TP_ARGS(inode, page, off, len))
 DEFINE_PAGE_EVENT(xfs_writepage);
 DEFINE_PAGE_EVENT(xfs_releasepage);
 DEFINE_PAGE_EVENT(xfs_invalidatepage);
 DECLARE_EVENT_CLASS(xfs_readpage_class,
 	TP_PROTO(struct inode *inode, int nr_pages),
 	TP_ARGS(inode, nr_pages),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(int, nr_pages)
 	),
 	TP_fast_assign(
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = inode->i_ino;
 		__entry->nr_pages = nr_pages;
 	),
 	TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->nr_pages)
 )
 #define DEFINE_READPAGE_EVENT(name)		\
 DEFINE_EVENT(xfs_readpage_class, name,	\
 	TP_PROTO(struct inode *inode, int nr_pages), \
 	TP_ARGS(inode, nr_pages))
 DEFINE_READPAGE_EVENT(xfs_vm_readpage);
 DEFINE_READPAGE_EVENT(xfs_vm_readpages);
 DECLARE_EVENT_CLASS(xfs_imap_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
 		 int whichfork, struct xfs_bmbt_irec *irec),
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@ -4,6 +4,7 @@
 #include <linux/atomic.h>
 #include <linux/bitmap.h>
 #include <linux/blk_types.h>
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/mm_types.h>
@ -12,6 +13,7 @@
 struct address_space;
 struct fiemap_extent_info;
 struct inode;
 struct iomap_writepage_ctx;
 struct iov_iter;
 struct kiocb;
 struct page;
@ -21,28 +23,45 @@ struct vm_fault;
 /*
 * Types of block ranges for iomap mappings:
 */
-#define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
+#define IOMAP_HOLE	0	/* no blocks allocated, need allocation */
-#define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
+#define IOMAP_DELALLOC	1	/* delayed allocation blocks */
-#define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
+#define IOMAP_MAPPED	2	/* blocks allocated at @addr */
-#define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
+#define IOMAP_UNWRITTEN	3	/* blocks allocated at @addr in unwritten state */
-#define IOMAP_INLINE	0x05	/* data inline in the inode */
+#define IOMAP_INLINE	4	/* data inline in the inode */
 /*
- * Flags for all iomap mappings:
+ * Flags reported by the file system from iomap_begin:
 *
 * IOMAP_F_NEW indicates that the blocks have been newly allocated and need
 * zeroing for areas that no data is copied to.
 *
 * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access
 * written data and requires fdatasync to commit them to persistent storage.
 * This needs to take into account metadata changes that *may* be made at IO
 * completion, such as file size updates from direct IO.
 *
 * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be
 * unshared as part a write.
 *
 * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block
 * mappings.
 *
 * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
 * buffer heads for this mapping.
 */
-#define IOMAP_F_NEW		0x01	/* blocks have been newly allocated */
+#define IOMAP_F_NEW		0x01
-#define IOMAP_F_DIRTY		0x02	/* uncommitted metadata */
+#define IOMAP_F_DIRTY		0x02
-#define IOMAP_F_BUFFER_HEAD	0x04	/* file system requires buffer heads */
+#define IOMAP_F_SHARED		0x04
-#define IOMAP_F_SIZE_CHANGED	0x08	/* file size has changed */
+#define IOMAP_F_MERGED		0x08
 #define IOMAP_F_BUFFER_HEAD	0x10
 /*
- * Flags that only need to be reported for IOMAP_REPORT requests:
+ * Flags set by the core iomap code during operations:
 *
 * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
 * has changed as the result of this write operation.
 */
-#define IOMAP_F_MERGED		0x10	/* contains multiple blocks/extents */
+#define IOMAP_F_SIZE_CHANGED	0x100
 #define IOMAP_F_SHARED		0x20	/* block shared with another file */
 /*
 * Flags from 0x1000 up are for file system specific usage:
@ -110,7 +129,8 @@ struct iomap_ops {
 	 * The actual length is returned in iomap->length.
 	 */
 	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
-			unsigned flags, struct iomap *iomap);
+			unsigned flags, struct iomap *iomap,
 			struct iomap *srcmap);
 	/*
 	 * Commit and/or unreserve space previous allocated using iomap_begin.
@ -126,29 +146,12 @@ struct iomap_ops {
 * Main iomap iterator function.
 */
 typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
+		void *data, struct iomap *iomap, struct iomap *srcmap);
 loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 		unsigned flags, const struct iomap_ops *ops, void *data,
 		iomap_actor_t actor);
 /*
 * Structure allocate for each page when block size < PAGE_SIZE to track
 * sub-page uptodate status and I/O completions.
 */
 struct iomap_page {
 	atomic_t		read_count;
 	atomic_t		write_count;
 	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
 };
 static inline struct iomap_page *to_iomap_page(struct page *page)
 {
 	if (page_has_private(page))
 		return (struct iomap_page *)page_private(page);
 	return NULL;
 }
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
@ -166,7 +169,7 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
 #else
 #define iomap_migrate_page NULL
 #endif
-int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
+int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, const struct iomap_ops *ops);
@ -183,6 +186,63 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset,
 sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
 		const struct iomap_ops *ops);
 /*
 * Structure for writeback I/O completions.
 */
 struct iomap_ioend {
 	struct list_head	io_list;	/* next ioend in chain */
 	u16			io_type;
 	u16			io_flags;	/* IOMAP_F_* */
 	struct inode		*io_inode;	/* file being written to */
 	size_t			io_size;	/* size of the extent */
 	loff_t			io_offset;	/* offset in the file */
 	void			*io_private;	/* file system private data */
 	struct bio		*io_bio;	/* bio being built */
 	struct bio		io_inline_bio;	/* MUST BE LAST! */
 };
 struct iomap_writeback_ops {
 	/*
 	 * Required, maps the blocks so that writeback can be performed on
 	 * the range starting at offset.
 	 */
 	int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
 				loff_t offset);
 	/*
 	 * Optional, allows the file systems to perform actions just before
 	 * submitting the bio and/or override the bio end_io handler for complex
 	 * operations like copy on write extent manipulation or unwritten extent
 	 * conversions.
 	 */
 	int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
 	/*
 	 * Optional, allows the file system to discard state on a page where
 	 * we failed to submit any I/O.
 	 */
 	void (*discard_page)(struct page *page);
 };
 struct iomap_writepage_ctx {
 	struct iomap		iomap;
 	struct iomap_ioend	*ioend;
 	const struct iomap_writeback_ops *ops;
 };
 void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
 void iomap_ioend_try_merge(struct iomap_ioend *ioend,
 		struct list_head *more_ioends,
 		void (*merge_private)(struct iomap_ioend *ioend,
 				struct iomap_ioend *next));
 void iomap_sort_ioends(struct list_head *ioend_list);
 int iomap_writepage(struct page *page, struct writeback_control *wbc,
 		struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops);
 int iomap_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops);
 /*
 * Flags for direct I/O ->end_io:
 */
@ -195,7 +255,8 @@ struct iomap_dio_ops {
 };
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		bool wait_for_completion);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 #ifdef CONFIG_SWAP