Merge branch 'for-linus-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs update from Al Viro: "Part one: - struct filename-related cleanups - saner iov_iter_init() replacements (and switching the syscalls to use of those) - ntfs switch to ->write_iter() (Anton) - aio cleanups and splitting iocb into common and async parts (Christoph) - assorted fixes (me, bfields, Andrew Elble) There's a lot more, including the completion of switchover to ->{read,write}_iter(), d_inode/d_backing_inode annotations, f_flags race fixes, etc, but that goes after #for-davem merge. David has pulled it, and once it's in I'll send the next vfs pull request" * 'for-linus-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (35 commits) sg_start_req(): use import_iovec() sg_start_req(): make sure that there's not too many elements in iovec blk_rq_map_user(): use import_single_range() sg_io(): use import_iovec() process_vm_access: switch to {compat_,}import_iovec() switch keyctl_instantiate_key_common() to iov_iter switch {compat_,}do_readv_writev() to {compat_,}import_iovec() aio_setup_vectored_rw(): switch to {compat_,}import_iovec() vmsplice_to_user(): switch to import_iovec() kill aio_setup_single_vector() aio: simplify arguments of aio_setup_..._rw() aio: lift iov_iter_init() into aio_setup_..._rw() lift iov_iter into {compat_,}do_readv_writev() NFS: fix BUG() crash in notify_change() with patch to chown_common() dcache: return -ESTALE not -EBUSY on distributed fs race NTFS: Version 2.1.32 - Update file write from aio_write to write_iter. VFS: Add iov_iter_fault_in_multipages_readable() drop bogus check in file_open_root() switch security_inode_getattr() to struct path * constify tomoyo_realpath_from_path() ...
2024-11-24 04:20:53 +07:00 · 2015-04-14 15:31:03 -07:00 · 2015-04-14 15:31:03 -07:00 · ca2ec32658
commit ca2ec32658
parent 6c8a53c9e6 fdc81f45e9
103 changed files with 838 additions and 1117 deletions
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@ -155,8 +155,6 @@ int copy_thread(unsigned long clone_flags,
 */
 void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
 {
-	set_fs(USER_DS); /* user space */
-
 	regs->sp = usp;
 	regs->ret = pc;

--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@ -101,7 +101,6 @@ void start_thread(struct pt_regs *regs, unsigned int pc, unsigned long usp)
 	 */
 	usp -= 8;

-	set_fs(USER_DS);
 	regs->pc  = pc;
 	regs->sp  = usp;
 	regs->tsr |= 0x40; /* set user mode */
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@ -176,8 +176,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set)
 	struct sigframe __user *frame;
 	int rsig, sig = ksig->sig;

-	set_fs(USER_DS);
-
 	frame = get_sigframe(ksig, sizeof(*frame));

 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
@ -257,8 +255,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set)
 	struct rt_sigframe __user *frame;
 	int rsig, sig = ksig->sig;

-	set_fs(USER_DS);
-
 	frame = get_sigframe(ksig, sizeof(*frame));

 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@ -37,8 +37,6 @@
 */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
-	/* Set to run with user-mode data segmentation */
-	set_fs(USER_DS);
 	/* We want to zero all data-containing registers. Is this overkill? */
 	memset(regs, 0, sizeof(*regs));
 	/* We might want to also zero all Processor registers here */
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@ -214,8 +214,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	regs->r2 = (unsigned long)&frame->uc;
 	regs->bpc = (unsigned long)ksig->ka.sa.sa_handler;

-	set_fs(USER_DS);
-
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p\n",
 		current->comm, current->pid, frame, regs->pc);
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@ -111,7 +111,6 @@ struct thread_struct {
 */
 #define start_thread(regs, pc, usp) do {				   \
 	unsigned int *argc = (unsigned int *) bprm->exec;		   \
-	set_fs(USER_DS);						   \
 	current->thread.int_depth = 1;					   \
 	/* Force this process down to user land */			   \
 	regs->ctx.SaveMask = TBICTX_PRIV_BIT;				   \
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@ -236,8 +236,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	/* Offset to handle microblaze rtid r14, 0 */
 	regs->pc = (unsigned long)ksig->ka.sa.sa_handler;

-	set_fs(USER_DS);
-
 #ifdef DEBUG_SIG
 	pr_info("SIG deliver (%s:%d): sp=%p pc=%08lx\n",
 		current->comm, current->pid, frame, regs->pc);
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@ -94,7 +94,6 @@ void show_regs(struct pt_regs *regs)

 void flush_thread(void)
 {
-	set_fs(USER_DS);
 }

 int copy_thread(unsigned long clone_flags,
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@ -197,7 +197,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
 	unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;

-	set_fs(USER_DS);
 	memset(regs, 0, sizeof(struct pt_regs));

 	regs->pc = pc;
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"

--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@ -329,8 +329,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 	if (err)
 		return -EFAULT;

-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
 		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);

@ -408,8 +406,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	if (err)
 		return -EFAULT;

-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
 		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);

--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@ -457,8 +457,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs

 	regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);

-	set_fs(USER_DS);
-
 	/* Broken %016Lx */
 	pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
 		 signal, current->comm, current->pid, frame,
@ -547,8 +545,6 @@ static int setup_rt_frame(struct ksignal *kig, sigset_t *set,
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
 	regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);

-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
 		 signal, current->comm, current->pid, frame,
 		 regs->pc >> 32, regs->pc & 0xffffffff,
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@ -405,11 +405,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 	regs->areg[8] = (unsigned long) &frame->uc;
 	regs->threadptr = tp;

-	/* Set access mode to USER_DS.  Nomenclature is outdated, but
-	 * functionality is used in uaccess.h
-	 */
-	set_fs(USER_DS);
-
 #if DEBUG_SIG
 	printk("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08x\n",
 		current->comm, current->pid, signal, frame, regs->pc);
--- a/block/blk-map.c
+++ b/block/blk-map.c
@ -124,10 +124,10 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 {
 	struct iovec iov;
 	struct iov_iter i;
+	int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);

-	iov.iov_base = ubuf;
-	iov.iov_len = len;
-	iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);
+	if (unlikely(ret < 0))
+		return ret;

 	return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
 }
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@ -335,16 +335,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		struct iov_iter i;
 		struct iovec *iov = NULL;

-		ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
-					    0, NULL, &iov);
-		if (ret < 0) {
-			kfree(iov);
+		ret = import_iovec(rq_data_dir(rq),
+				   hdr->dxferp, hdr->iovec_count,
+				   0, &iov, &i);
+		if (ret < 0)
 			goto out_free_cdb;
-		}

 		/* SG_IO howto says that the shorter of the two wins */
-		iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
-			      min_t(unsigned, ret, hdr->dxfer_len));
+		iov_iter_truncate(&i, hdr->dxfer_len);

 		ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
 		kfree(iov);
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@ -26,7 +26,7 @@
 #include <linux/pfn.h>
 #include <linux/export.h>
 #include <linux/io.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 #include <linux/uaccess.h>

--- a/drivers/char/tile-srom.c
+++ b/drivers/char/tile-srom.c
@ -27,7 +27,6 @@
 #include <linux/types.h>	/* size_t */
 #include <linux/proc_fs.h>
 #include <linux/fcntl.h>	/* O_ACCMODE */
-#include <linux/aio.h>
 #include <linux/pagemap.h>
 #include <linux/hugetlb.h>
 #include <linux/uaccess.h>
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@ -40,7 +40,6 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <linux/cpu.h>
 #include <asm/pgtable.h>
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@ -39,7 +39,6 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <asm/pgtable.h>
 #include <linux/delay.h>
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@ -19,7 +19,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@ -21,7 +21,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/ioctl.h>
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@ -33,7 +33,6 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/aio.h>
 #include <linux/errno.h>
 #include <linux/mtio.h>
 #include <linux/ioctl.h>
@ -51,6 +50,7 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
+#include <linux/uio.h>

 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
@ -1745,17 +1745,14 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 	}

 	if (iov_count) {
-		int size = sizeof(struct iovec) * iov_count;
-		struct iovec *iov;
+		struct iovec *iov = NULL;
 		struct iov_iter i;

-		iov = memdup_user(hp->dxferp, size);
-		if (IS_ERR(iov))
-			return PTR_ERR(iov);
+		res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
+		if (res < 0)
+			return res;

-		iov_iter_init(&i, rw, iov, iov_count,
-			      min_t(size_t, hp->dxfer_len,
-				    iov_length(iov, iov_count)));
+		iov_iter_truncate(&i, hp->dxfer_len);

 		res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
 		kfree(iov);
--- a/drivers/staging/unisys/include/timskmod.h
+++ b/drivers/staging/unisys/include/timskmod.h
@ -46,7 +46,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>

 #include <linux/usb/composite.h>
@ -655,9 +656,10 @@ static void ffs_user_copy_worker(struct work_struct *work)
 		unuse_mm(io_data->mm);
 	}

-	aio_complete(io_data->kiocb, ret, ret);
+	io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);

-	if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
+	if (io_data->ffs->ffs_eventfd &&
+	    !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
 		eventfd_signal(io_data->ffs->ffs_eventfd, 1);

 	usb_ep_free_request(io_data->ep, io_data->req);
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@ -26,6 +26,7 @@
 #include <linux/poll.h>
 #include <linux/mmu_context.h>
 #include <linux/aio.h>
+#include <linux/uio.h>

 #include <linux/device.h>
 #include <linux/moduleparam.h>
@ -469,7 +470,7 @@ static void ep_user_copy_worker(struct work_struct *work)
 		ret = -EFAULT;

 	/* completing the iocb can drop the ctx and mm, don't touch mm after */
-	aio_complete(iocb, ret, ret);
+	iocb->ki_complete(iocb, ret, ret);

 	kfree(priv->buf);
 	kfree(priv->to_free);
@ -497,7 +498,8 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 		kfree(priv);
 		iocb->private = NULL;
 		/* aio_complete() reports bytes-transferred _and_ faults */
-		aio_complete(iocb, req->actual ? req->actual : req->status,
+
+		iocb->ki_complete(iocb, req->actual ? req->actual : req->status,
 				req->status);
 	} else {
 		/* ep_copy_to_user() won't report both; we hide some faults */
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@ -33,7 +33,7 @@
 #include <linux/pagemap.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>

--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@ -12,7 +12,7 @@
 *  affs regular file handling primitives
 */

-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "affs.h"

 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@ -14,7 +14,6 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
-#include <linux/aio.h>
 #include "internal.h"

 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
--- a/fs/aio.c
+++ b/fs/aio.c
@ -151,6 +151,38 @@ struct kioctx {
 	unsigned		id;
 };

+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED		((void *) (~0ULL))
+
+struct aio_kiocb {
+	struct kiocb		common;
+
+	struct kioctx		*ki_ctx;
+	kiocb_cancel_fn		*ki_cancel;
+
+	struct iocb __user	*ki_user_iocb;	/* user's aiocb */
+	__u64			ki_user_data;	/* user's data for completion */
+
+	struct list_head	ki_list;	/* the aio core uses this
+						 * for cancellation */
+
+	/*
+	 * If the aio_resfd field of the userspace iocb is not zero,
+	 * this is the underlying eventfd context to deliver events to.
+	 */
+	struct eventfd_ctx	*ki_eventfd;
+};
+
 /*------ sysctl variables----*/
 static DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;		/* current system wide number of aio requests */
@ -220,7 +252,7 @@ static int __init aio_setup(void)
 	if (IS_ERR(aio_mnt))
 		panic("Failed to create aio fs mount.");

-	kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);

 	pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@ -484,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)

-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
+	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;

@ -500,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);

-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
 {
 	kiocb_cancel_fn *old, *cancel;

@ -518,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
 		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
 	} while (cancel != old);

-	return cancel(kiocb);
+	return cancel(&kiocb->common);
 }

 static void free_ioctx(struct work_struct *work)
@ -554,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 static void free_ioctx_users(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
-	struct kiocb *req;
+	struct aio_kiocb *req;

 	spin_lock_irq(&ctx->ctx_lock);

 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
-				       struct kiocb, ki_list);
+				       struct aio_kiocb, ki_list);

 		list_del_init(&req->ki_list);
 		kiocb_cancel(req);
@ -786,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 	return 0;
 }

-/* wait_on_sync_kiocb:
- *	Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
-	while (!req->ki_ctx) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (req->ki_ctx)
-			break;
-		io_schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
 /*
 * exit_aio: called when the last user of mm goes away.  At this point, there is
 * no way for any new requests to be submited or any of the io_* syscalls to be
@ -956,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
 *	Allocate a slot for an aio request.
 * Returns NULL if no requests are free.
 */
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 {
-	struct kiocb *req;
+	struct aio_kiocb *req;

 	if (!get_reqs_available(ctx)) {
 		user_refill_reqs_available(ctx);
@ -979,10 +996,10 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
 	return NULL;
 }

-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
 {
-	if (req->ki_filp)
-		fput(req->ki_filp);
+	if (req->common.ki_filp)
+		fput(req->common.ki_filp);
 	if (req->ki_eventfd != NULL)
 		eventfd_ctx_put(req->ki_eventfd);
 	kmem_cache_free(kiocb_cachep, req);
@ -1018,8 +1035,9 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 /* aio_complete
 *	Called when the io request on the given iocb is complete.
 */
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
 {
+	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
@ -1033,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	 *    ref, no other paths have a way to get another ref
 	 *  - the sync task helpfully left a reference to itself in the iocb
 	 */
-	if (is_sync_kiocb(iocb)) {
-		iocb->ki_user_data = res;
-		smp_wmb();
-		iocb->ki_ctx = ERR_PTR(-EXDEV);
-		wake_up_process(iocb->ki_obj.tsk);
-		return;
-	}
+	BUG_ON(is_sync_kiocb(kiocb));

 	if (iocb->ki_list.next) {
 		unsigned long flags;
@ -1065,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 	event = ev_page + pos % AIO_EVENTS_PER_PAGE;

-	event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+	event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
 	event->data = iocb->ki_user_data;
 	event->res = res;
 	event->res2 = res2;
@ -1074,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);

 	pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-		 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+		 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
 		 res, res2);

 	/* after flagging the request as done, we
@ -1121,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)

 	percpu_ref_put(&ctx->reqs);
 }
-EXPORT_SYMBOL(aio_complete);

 /* aio_read_events_ring
 *	Pull an event off of the ioctx's event ring.  Returns the number of
@ -1349,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
 			    unsigned long, loff_t);
 typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);

-static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
-				     int rw, char __user *buf,
-				     unsigned long *nr_segs,
-				     struct iovec **iovec,
-				     bool compat)
+static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
+				 struct iovec **iovec,
+				 bool compat,
+				 struct iov_iter *iter)
 {
-	ssize_t ret;
-
-	*nr_segs = kiocb->ki_nbytes;
-
 #ifdef CONFIG_COMPAT
 	if (compat)
-		ret = compat_rw_copy_check_uvector(rw,
+		return compat_import_iovec(rw,
 				(struct compat_iovec __user *)buf,
-				*nr_segs, UIO_FASTIOV, *iovec, iovec);
-	else
+				len, UIO_FASTIOV, iovec, iter);
 #endif
-		ret = rw_copy_check_uvector(rw,
-				(struct iovec __user *)buf,
-				*nr_segs, UIO_FASTIOV, *iovec, iovec);
-	if (ret < 0)
-		return ret;
-
-	/* ki_nbytes now reflect bytes instead of segs */
-	kiocb->ki_nbytes = ret;
-	return 0;
-}
-
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
-				       int rw, char __user *buf,
-				       unsigned long *nr_segs,
-				       struct iovec *iovec)
-{
-	if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
-		return -EFAULT;
-
-	iovec->iov_base = buf;
-	iovec->iov_len = kiocb->ki_nbytes;
-	*nr_segs = 1;
-	return 0;
+	return import_iovec(rw, (struct iovec __user *)buf,
+				len, UIO_FASTIOV, iovec, iter);
 }

 /*
@ -1396,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
 *	Performs the initial checks and io submission.
 */
 static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
-			    char __user *buf, bool compat)
+			    char __user *buf, size_t len, bool compat)
 {
 	struct file *file = req->ki_filp;
 	ssize_t ret;
-	unsigned long nr_segs;
 	int rw;
 	fmode_t mode;
 	aio_rw_op *rw_op;
@ -1431,21 +1414,22 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 		if (!rw_op && !iter_op)
 			return -EINVAL;

-		ret = (opcode == IOCB_CMD_PREADV ||
-		       opcode == IOCB_CMD_PWRITEV)
-			? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
-						&iovec, compat)
-			: aio_setup_single_vector(req, rw, buf, &nr_segs,
-						  iovec);
+		if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+			ret = aio_setup_vectored_rw(rw, buf, len,
+						&iovec, compat, &iter);
+		else {
+			ret = import_single_range(rw, buf, len, iovec, &iter);
+			iovec = NULL;
+		}
 		if (!ret)
-			ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+			ret = rw_verify_area(rw, file, &req->ki_pos,
+					     iov_iter_count(&iter));
 		if (ret < 0) {
-			if (iovec != inline_vecs)
-				kfree(iovec);
+			kfree(iovec);
 			return ret;
 		}

-		req->ki_nbytes = ret;
+		len = ret;

 		/* XXX: move/kill - rw_verify_area()? */
 		/* This matches the pread()/pwrite() logic */
@ -1458,14 +1442,14 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 			file_start_write(file);

 		if (iter_op) {
-			iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
 			ret = iter_op(req, &iter);
 		} else {
-			ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+			ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
 		}

 		if (rw == WRITE)
 			file_end_write(file);
+		kfree(iovec);
 		break;

 	case IOCB_CMD_FDSYNC:
@ -1487,9 +1471,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 		return -EINVAL;
 	}

-	if (iovec != inline_vecs)
-		kfree(iovec);
-
 	if (ret != -EIOCBQUEUED) {
 		/*
 		 * There's no easy way to restart the syscall since other AIO's
@ -1508,7 +1489,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 struct iocb *iocb, bool compat)
 {
-	struct kiocb *req;
+	struct aio_kiocb *req;
 	ssize_t ret;

 	/* enforce forwards compatibility on users */
@ -1531,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	if (unlikely(!req))
 		return -EAGAIN;

-	req->ki_filp = fget(iocb->aio_fildes);
-	if (unlikely(!req->ki_filp)) {
+	req->common.ki_filp = fget(iocb->aio_fildes);
+	if (unlikely(!req->common.ki_filp)) {
 		ret = -EBADF;
 		goto out_put_req;
 	}
+	req->common.ki_pos = iocb->aio_offset;
+	req->common.ki_complete = aio_complete;
+	req->common.ki_flags = 0;

 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
@ -1550,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			req->ki_eventfd = NULL;
 			goto out_put_req;
 		}
+
+		req->common.ki_flags |= IOCB_EVENTFD;
 	}

 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@ -1558,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		goto out_put_req;
 	}

-	req->ki_obj.user = user_iocb;
+	req->ki_user_iocb = user_iocb;
 	req->ki_user_data = iocb->aio_data;
-	req->ki_pos = iocb->aio_offset;
-	req->ki_nbytes = iocb->aio_nbytes;

-	ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+	ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
 			   (char __user *)(unsigned long)iocb->aio_buf,
+			   iocb->aio_nbytes,
 			   compat);
 	if (ret)
 		goto out_put_req;
@ -1651,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 /* lookup_kiocb
 *	Finds a given iocb for cancellation.
 */
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
-				  u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
 {
-	struct list_head *pos;
+	struct aio_kiocb *kiocb;

 	assert_spin_locked(&ctx->ctx_lock);

@ -1662,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
 		return NULL;

 	/* TODO: use a hash or array, this sucks. */
-	list_for_each(pos, &ctx->active_reqs) {
-		struct kiocb *kiocb = list_kiocb(pos);
-		if (kiocb->ki_obj.user == iocb)
+	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+		if (kiocb->ki_user_iocb == iocb)
 			return kiocb;
 	}
 	return NULL;
@ -1684,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 		struct io_event __user *, result)
 {
 	struct kioctx *ctx;
-	struct kiocb *kiocb;
+	struct aio_kiocb *kiocb;
 	u32 key;
 	int ret;

--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@ -15,6 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/writeback.h>
+#include <linux/uio.h>
 #include <asm/uaccess.h>
 #include "bfs.h"

--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@ -27,7 +27,6 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/cleancache.h>
-#include <linux/aio.h>
 #include <asm/uaccess.h>
 #include "internal.h"

--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@ -24,7 +24,6 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@ -32,6 +31,7 @@
 #include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/btrfs.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@ -32,7 +32,6 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include <linux/bit_spinlock.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
@ -43,6 +42,7 @@
 #include <linux/btrfs.h>
 #include <linux/blkdev.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@ -7,7 +7,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>

 #include "super.h"
@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *filp = iocb->ki_filp;
 	struct ceph_file_info *fi = filp->private_data;
-	size_t len = iocb->ki_nbytes;
+	size_t len = iov_iter_count(to);
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct page *pinned_page = NULL;
--- a/fs/dcache.c
+++ b/fs/dcache.c
@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode,
 		struct dentry *dentry, struct dentry *alias)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
-	int ret = -EBUSY;
+	int ret = -ESTALE;

 	/* If alias and dentry share a parent, then no extra locks required */
 	if (alias->d_parent == dentry->d_parent)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@ -37,7 +37,6 @@
 #include <linux/uio.h>
 #include <linux/atomic.h>
 #include <linux/prefetch.h>
-#include <linux/aio.h>

 /*
 * How many user pages to map in one call to get_user_pages().  This determines
@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
 				ret = err;
 		}

-		aio_complete(dio->iocb, ret, 0);
+		dio->iocb->ki_complete(dio->iocb, ret, 0);
 	}

 	kmem_cache_free(dio_cache, dio);
@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
 	 * operation.  AIO can if it was a broken operation described above or
 	 * in fact if all the bios race to complete before we get here.  In
 	 * that case dio_complete() translates the EIOCBQUEUED into the proper
-	 * return code that the caller will hand to aio_complete().
+	 * return code that the caller will hand to ->complete().
 	 *
 	 * This is managed by the bio_lock instead of being an atomic_t so that
 	 * completion paths can drop their ref and use the remaining count to
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@ -31,7 +31,6 @@
 #include <linux/security.h>
 #include <linux/compat.h>
 #include <linux/fs_stack.h>
-#include <linux/aio.h>
 #include "ecryptfs_kernel.h"

 /**
@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 	struct file *file = iocb->ki_filp;

 	rc = generic_file_read_iter(iocb, to);
-	/*
-	 * Even though this is a async interface, we need to wait
-	 * for IO to finish to update atime
-	 */
-	if (-EIOCBQUEUED == rc)
-		rc = wait_on_sync_kiocb(iocb);
 	if (rc >= 0) {
 		path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
 		touch_atime(path);
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@ -31,7 +31,7 @@
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "acl.h"
 #include "xattr.h"
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@ -27,7 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext3.h"
 #include "xattr.h"
 #include "acl.h"
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@ -23,9 +23,9 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
-#include <linux/aio.h>
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@ -20,9 +20,9 @@
 *	(sct@redhat.com), 1993, 1998
 */

-#include <linux/aio.h>
 #include "ext4_jbd2.h"
 #include "truncate.h"
+#include <linux/uio.h>

 #include <trace/events/ext4.h>

--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@ -37,7 +37,6 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
-#include <linux/aio.h>
 #include <linux/bitops.h>

 #include "ext4_jbd2.h"
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@ -18,7 +18,6 @@
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include <linux/workqueue.h>
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@ -12,12 +12,12 @@
 #include <linux/f2fs_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/prefetch.h>
+#include <linux/uio.h>

 #include "f2fs.h"
 #include "node.h"
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@ -19,7 +19,6 @@
 #include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
 #include <linux/uio.h>
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@ -38,7 +38,6 @@
 #include <linux/device.h>
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/aio.h>
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
@ -48,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/uio.h>

 #include "fuse_i.h"

--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@ -19,7 +19,6 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
-#include <linux/aio.h>

 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@ -15,8 +15,8 @@
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
+#include <linux/uio.h>

 static const struct file_operations fuse_direct_io_file_operations;

@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
 	}
 }

+static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
+{
+	if (io->err)
+		return io->err;
+
+	if (io->bytes >= 0 && io->write)
+		return -EIO;
+
+	return io->bytes < 0 ? io->size : io->bytes;
+}
+
 /**
 * In case of short read, the caller sets 'pos' to the position of
 * actual end of fuse request in IO request. Otherwise, if bytes_requested
@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
 */
 static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 {
+	bool is_sync = is_sync_kiocb(io->iocb);
 	int left;

 	spin_lock(&io->lock);
@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 		io->bytes = pos;

 	left = --io->reqs;
+	if (!left && is_sync)
+		complete(io->done);
 	spin_unlock(&io->lock);

-	if (!left) {
-		long res;
+	if (!left && !is_sync) {
+		ssize_t res = fuse_get_res_by_io(io);

-		if (io->err)
-			res = io->err;
-		else if (io->bytes >= 0 && io->write)
-			res = -EIO;
-		else {
-			res = io->bytes < 0 ? io->size : io->bytes;
+		if (res >= 0) {
+			struct inode *inode = file_inode(io->iocb->ki_filp);
+			struct fuse_conn *fc = get_fuse_conn(inode);
+			struct fuse_inode *fi = get_fuse_inode(inode);

-			if (!is_sync_kiocb(io->iocb)) {
-				struct inode *inode = file_inode(io->iocb->ki_filp);
-				struct fuse_conn *fc = get_fuse_conn(inode);
-				struct fuse_inode *fi = get_fuse_inode(inode);
-
-				spin_lock(&fc->lock);
-				fi->attr_version = ++fc->attr_version;
-				spin_unlock(&fc->lock);
-			}
+			spin_lock(&fc->lock);
+			fi->attr_version = ++fc->attr_version;
+			spin_unlock(&fc->lock);
 		}

-		aio_complete(io->iocb, res, 0);
+		io->iocb->ki_complete(io->iocb, res, 0);
 		kfree(io);
 	}
 }
@ -2801,6 +2807,7 @@ static ssize_t
 fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 			loff_t offset)
 {
+	DECLARE_COMPLETION_ONSTACK(wait);
 	ssize_t ret = 0;
 	struct file *file = iocb->ki_filp;
 	struct fuse_file *ff = file->private_data;
@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
 		io->async = false;

+	if (io->async && is_sync_kiocb(iocb))
+		io->done = &wait;
+
 	if (rw == WRITE)
 		ret = __fuse_direct_write(io, iter, &pos);
 	else
@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		if (!is_sync_kiocb(iocb))
 			return -EIOCBQUEUED;

-		ret = wait_on_sync_kiocb(iocb);
-	} else {
-		kfree(io);
+		wait_for_completion(&wait);
+		ret = fuse_get_res_by_io(io);
 	}

+	kfree(io);
+
 	if (rw == WRITE) {
 		if (ret > 0)
 			fuse_write_update_size(inode, pos);
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@ -263,6 +263,7 @@ struct fuse_io_priv {
 	int err;
 	struct kiocb *iocb;
 	struct file *file;
+	struct completion *done;
 };

 /**
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@ -20,7 +20,7 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/backing-dev.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <trace/events/writeback.h>

 #include "gfs2.h"
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@ -25,7 +25,6 @@
 #include <asm/uaccess.h>
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
-#include <linux/aio.h>
 #include <linux/delay.h>

 #include "gfs2.h"
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 #include "hfs_fs.h"
 #include "btree.h"
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@ -22,8 +22,8 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
--- a/fs/namei.c
+++ b/fs/namei.c
@ -119,15 +119,14 @@
 * PATH_MAX includes the nul terminator --RR.
 */

-#define EMBEDDED_NAME_MAX	(PATH_MAX - sizeof(struct filename))
+#define EMBEDDED_NAME_MAX	(PATH_MAX - offsetof(struct filename, iname))

 struct filename *
 getname_flags(const char __user *filename, int flags, int *empty)
 {
-	struct filename *result, *err;
-	int len;
-	long max;
+	struct filename *result;
 	char *kname;
+	int len;

 	result = audit_reusename(filename);
 	if (result)
@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
 	result = __getname();
 	if (unlikely(!result))
 		return ERR_PTR(-ENOMEM);
-	result->refcnt = 1;

 	/*
 	 * First, try to embed the struct filename inside the names_cache
 	 * allocation
 	 */
-	kname = (char *)result + sizeof(*result);
+	kname = (char *)result->iname;
 	result->name = kname;
-	result->separate = false;
-	max = EMBEDDED_NAME_MAX;

-recopy:
-	len = strncpy_from_user(kname, filename, max);
+	len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
 	if (unlikely(len < 0)) {
-		err = ERR_PTR(len);
-		goto error;
+		__putname(result);
+		return ERR_PTR(len);
 	}

 	/*
@ -160,43 +155,49 @@ getname_flags(const char __user *filename, int flags, int *empty)
 	 * names_cache allocation for the pathname, and re-do the copy from
 	 * userland.
 	 */
-	if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
+	if (unlikely(len == EMBEDDED_NAME_MAX)) {
+		const size_t size = offsetof(struct filename, iname[1]);
 		kname = (char *)result;

-		result = kzalloc(sizeof(*result), GFP_KERNEL);
-		if (!result) {
-			err = ERR_PTR(-ENOMEM);
-			result = (struct filename *)kname;
-			goto error;
+		/*
+		 * size is chosen that way we to guarantee that
+		 * result->iname[0] is within the same object and that
+		 * kname can't be equal to result->iname, no matter what.
+		 */
+		result = kzalloc(size, GFP_KERNEL);
+		if (unlikely(!result)) {
+			__putname(kname);
+			return ERR_PTR(-ENOMEM);
 		}
 		result->name = kname;
-		result->separate = true;
-		result->refcnt = 1;
-		max = PATH_MAX;
-		goto recopy;
+		len = strncpy_from_user(kname, filename, PATH_MAX);
+		if (unlikely(len < 0)) {
+			__putname(kname);
+			kfree(result);
+			return ERR_PTR(len);
+		}
+		if (unlikely(len == PATH_MAX)) {
+			__putname(kname);
+			kfree(result);
+			return ERR_PTR(-ENAMETOOLONG);
+		}
 	}

+	result->refcnt = 1;
 	/* The empty path is special. */
 	if (unlikely(!len)) {
 		if (empty)
 			*empty = 1;
-		err = ERR_PTR(-ENOENT);
-		if (!(flags & LOOKUP_EMPTY))
-			goto error;
+		if (!(flags & LOOKUP_EMPTY)) {
+			putname(result);
+			return ERR_PTR(-ENOENT);
+		}
 	}

-	err = ERR_PTR(-ENAMETOOLONG);
-	if (unlikely(len >= PATH_MAX))
-		goto error;
-
 	result->uptr = filename;
 	result->aname = NULL;
 	audit_getname(result);
 	return result;
-
-error:
-	putname(result);
-	return err;
 }

 struct filename *
@ -216,8 +217,7 @@ getname_kernel(const char * filename)
 		return ERR_PTR(-ENOMEM);

 	if (len <= EMBEDDED_NAME_MAX) {
-		result->name = (char *)(result) + sizeof(*result);
-		result->separate = false;
+		result->name = (char *)result->iname;
 	} else if (len <= PATH_MAX) {
 		struct filename *tmp;

@ -227,7 +227,6 @@ getname_kernel(const char * filename)
 			return ERR_PTR(-ENOMEM);
 		}
 		tmp->name = (char *)result;
-		tmp->separate = true;
 		result = tmp;
 	} else {
 		__putname(result);
@ -249,7 +248,7 @@ void putname(struct filename *name)
 	if (--name->refcnt > 0)
 		return;

-	if (name->separate) {
+	if (name->name != name->iname) {
 		__putname(name->name);
 		kfree(name);
 	} else
@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 	return err;
 }

-static int path_init(int dfd, const char *name, unsigned int flags,
+static int path_init(int dfd, const struct filename *name, unsigned int flags,
 		     struct nameidata *nd)
 {
 	int retval = 0;
+	const char *s = name->name;

 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
-		if (*name) {
+		if (*s) {
 			if (!d_can_lookup(root))
 				return -ENOTDIR;
 			retval = inode_permission(inode, MAY_EXEC);
@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 	nd->root.mnt = NULL;

 	nd->m_seq = read_seqbegin(&mount_lock);
-	if (*name=='/') {
+	if (*s == '/') {
 		if (flags & LOOKUP_RCU) {
 			rcu_read_lock();
 			nd->seq = set_root_rcu(nd);
@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,

 		dentry = f.file->f_path.dentry;

-		if (*name) {
+		if (*s) {
 			if (!d_can_lookup(dentry)) {
 				fdput(f);
 				return -ENOTDIR;
@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 	return -ECHILD;
 done:
 	current->total_link_count = 0;
-	return link_path_walk(name, nd);
+	return link_path_walk(s, nd);
 }

 static void path_cleanup(struct nameidata *nd)
@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
 }

 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const char *name,
+static int path_lookupat(int dfd, const struct filename *name,
 				unsigned int flags, struct nameidata *nd)
 {
 	struct path path;
@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
 static int filename_lookup(int dfd, struct filename *name,
 				unsigned int flags, struct nameidata *nd)
 {
-	int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd);
+	int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
 	if (unlikely(retval == -ECHILD))
-		retval = path_lookupat(dfd, name->name, flags, nd);
+		retval = path_lookupat(dfd, name, flags, nd);
 	if (unlikely(retval == -ESTALE))
-		retval = path_lookupat(dfd, name->name,
-						flags | LOOKUP_REVAL, nd);
+		retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);

 	if (likely(!retval))
 		audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
 	return retval;
 }

-static int do_path_lookup(int dfd, const char *name,
-				unsigned int flags, struct nameidata *nd)
-{
-	struct filename *filename = getname_kernel(name);
-	int retval = PTR_ERR(filename);
-
-	if (!IS_ERR(filename)) {
-		retval = filename_lookup(dfd, filename, flags, nd);
-		putname(filename);
-	}
-	return retval;
-}
-
 /* does lookup, returns the object with parent locked */
 struct dentry *kern_path_locked(const char *name, struct path *path)
 {
@ -2089,9 +2075,15 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
 int kern_path(const char *name, unsigned int flags, struct path *path)
 {
 	struct nameidata nd;
-	int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
-	if (!res)
-		*path = nd.path;
+	struct filename *filename = getname_kernel(name);
+	int res = PTR_ERR(filename);
+
+	if (!IS_ERR(filename)) {
+		res = filename_lookup(AT_FDCWD, filename, flags, &nd);
+		putname(filename);
+		if (!res)
+			*path = nd.path;
+	}
 	return res;
 }
 EXPORT_SYMBOL(kern_path);
@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 		    const char *name, unsigned int flags,
 		    struct path *path)
 {
-	struct nameidata nd;
-	int err;
-	nd.root.dentry = dentry;
-	nd.root.mnt = mnt;
+	struct filename *filename = getname_kernel(name);
+	int err = PTR_ERR(filename);
+
 	BUG_ON(flags & LOOKUP_PARENT);
-	/* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
-	err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
-	if (!err)
-		*path = nd.path;
+
+	/* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
+	if (!IS_ERR(filename)) {
+		struct nameidata nd;
+		nd.root.dentry = dentry;
+		nd.root.mnt = mnt;
+		err = filename_lookup(AT_FDCWD, filename,
+				      flags | LOOKUP_ROOT, &nd);
+		if (!err)
+			*path = nd.path;
+		putname(filename);
+	}
 	return err;
 }
 EXPORT_SYMBOL(vfs_path_lookup);
@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 * @len:	maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.  Also note that by using this function the
- * nameidata argument is passed to the filesystem methods and a filesystem
- * using this helper needs to be prepared for that.
+ * not be called by generic code.
 */
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
@ -2341,7 +2338,8 @@ mountpoint_last(struct nameidata *nd, struct path *path)
 * Returns 0 and "path" will be valid on success; Returns error otherwise.
 */
 static int
-path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+path_mountpoint(int dfd, const struct filename *name, struct path *path,
+		unsigned int flags)
 {
 	struct nameidata nd;
 	int err;
@ -2370,20 +2368,20 @@ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags
 }

 static int
-filename_mountpoint(int dfd, struct filename *s, struct path *path,
+filename_mountpoint(int dfd, struct filename *name, struct path *path,
 			unsigned int flags)
 {
 	int error;
-	if (IS_ERR(s))
-		return PTR_ERR(s);
-	error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+	error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
 	if (unlikely(error == -ECHILD))
-		error = path_mountpoint(dfd, s->name, path, flags);
+		error = path_mountpoint(dfd, name, path, flags);
 	if (unlikely(error == -ESTALE))
-		error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+		error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
 	if (likely(!error))
-		audit_inode(s, path->dentry, 0);
-	putname(s);
+		audit_inode(name, path->dentry, 0);
+	putname(name);
 	return error;
 }

@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
 	static const struct qstr name = QSTR_INIT("/", 1);
 	struct dentry *dentry, *child;
 	struct inode *dir;
-	int error = path_lookupat(dfd, pathname->name,
+	int error = path_lookupat(dfd, pathname,
 				  flags | LOOKUP_DIRECTORY, nd);
 	if (unlikely(error))
 		return error;
@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
 		goto out;
 	}

-	error = path_init(dfd, pathname->name, flags, nd);
+	error = path_init(dfd, pathname, flags, nd);
 	if (unlikely(error))
 		goto out;

--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t

 	return -EINVAL;
 #else
-	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);

 	if (rw == READ)
 		return nfs_file_direct_read(iocb, iter, pos);
@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 		long res = (long) dreq->error;
 		if (!res)
 			res = (long) dreq->count;
-		aio_complete(dreq->iocb, res, 0);
+		dreq->iocb->ki_complete(dreq->iocb, res, 0);
 	}

 	complete_all(&dreq->completion);
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@ -26,7 +26,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/swap.h>

--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@ -26,7 +26,7 @@
 #include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "nilfs.h"
 #include "btnode.h"
 #include "segment.h"
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \

 ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o

-ccflags-y := -DNTFS_VERSION=\"2.1.31\"
+ccflags-y := -DNTFS_VERSION=\"2.1.32\"
 ccflags-$(CONFIG_NTFS_DEBUG)	+= -DDEBUG
 ccflags-$(CONFIG_NTFS_RW)	+= -DNTFS_RW

--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@ -1,7 +1,7 @@
 /*
 * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project.
 *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
 *
 * This program/include file is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published
@ -28,7 +28,6 @@
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>

 #include <asm/page.h>
 #include <asm/uaccess.h>
@ -329,62 +328,168 @@ static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size)
 	return err;
 }

-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- *	do {
- *		ret = __get_user(c, uaddr);
- *		uaddr += PAGE_SIZE;
- *	} while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
-		int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
+		size_t *count)
 {
-	const char __user *end;
-	volatile char c;
+	loff_t pos;
+	s64 end, ll;
+	ssize_t err;
+	unsigned long flags;
+	struct inode *vi = file_inode(file);
+	ntfs_inode *base_ni, *ni = NTFS_I(vi);
+	ntfs_volume *vol = ni->vol;

-	/* Set @end to the first byte outside the last page we care about. */
-	end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
-	while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
-		;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
-		size_t iov_ofs, int bytes)
-{
-	do {
-		const char __user *buf;
-		unsigned len;
-
-		buf = iov->iov_base + iov_ofs;
-		len = iov->iov_len - iov_ofs;
-		if (len > bytes)
-			len = bytes;
-		ntfs_fault_in_pages_readable(buf, len);
-		bytes -= len;
-		iov++;
-		iov_ofs = 0;
-	} while (bytes);
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+			"0x%llx, count 0x%lx.", vi->i_ino,
+			(unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)*ppos, (unsigned long)*count);
+	/* We can write back this queue in page reclaim. */
+	current->backing_dev_info = inode_to_bdi(vi);
+	err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
+	if (unlikely(err))
+		goto out;
+	/*
+	 * All checks have passed.  Before we start doing any writing we want
+	 * to abort any totally illegal writes.
+	 */
+	BUG_ON(NInoMstProtected(ni));
+	BUG_ON(ni->type != AT_DATA);
+	/* If file is encrypted, deny access, just like NT4. */
+	if (NInoEncrypted(ni)) {
+		/* Only $DATA attributes can be encrypted. */
+		/*
+		 * Reminder for later: Encrypted files are _always_
+		 * non-resident so that the content can always be encrypted.
+		 */
+		ntfs_debug("Denying write access to encrypted file.");
+		err = -EACCES;
+		goto out;
+	}
+	if (NInoCompressed(ni)) {
+		/* Only unnamed $DATA attribute can be compressed. */
+		BUG_ON(ni->name_len);
+		/*
+		 * Reminder for later: If resident, the data is not actually
+		 * compressed.  Only on the switch to non-resident does
+		 * compression kick in.  This is in contrast to encrypted files
+		 * (see above).
+		 */
+		ntfs_error(vi->i_sb, "Writing to compressed files is not "
+				"implemented yet.  Sorry.");
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	if (*count == 0)
+		goto out;
+	base_ni = ni;
+	if (NInoAttr(ni))
+		base_ni = ni->ext.base_ntfs_ino;
+	err = file_remove_suid(file);
+	if (unlikely(err))
+		goto out;
+	/*
+	 * Our ->update_time method always succeeds thus file_update_time()
+	 * cannot fail either so there is no need to check the return code.
+	 */
+	file_update_time(file);
+	pos = *ppos;
+	/* The first byte after the last cluster being written to. */
+	end = (pos + *count + vol->cluster_size_mask) &
+			~(u64)vol->cluster_size_mask;
+	/*
+	 * If the write goes beyond the allocated size, extend the allocation
+	 * to cover the whole of the write, rounded up to the nearest cluster.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (end > ll) {
+		/*
+		 * Extend the allocation without changing the data size.
+		 *
+		 * Note we ensure the allocation is big enough to at least
+		 * write some data but we do not require the allocation to be
+		 * complete, i.e. it may be partial.
+		 */
+		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+		if (likely(ll >= 0)) {
+			BUG_ON(pos >= ll);
+			/* If the extension was partial truncate the write. */
+			if (end > ll) {
+				ntfs_debug("Truncating write to inode 0x%lx, "
+						"attribute type 0x%x, because "
+						"the allocation was only "
+						"partially extended.",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+				*count = ll - pos;
+			}
+		} else {
+			err = ll;
+			read_lock_irqsave(&ni->size_lock, flags);
+			ll = ni->allocated_size;
+			read_unlock_irqrestore(&ni->size_lock, flags);
+			/* Perform a partial write if possible or fail. */
+			if (pos < ll) {
+				ntfs_debug("Truncating write to inode 0x%lx "
+						"attribute type 0x%x, because "
+						"extending the allocation "
+						"failed (error %d).",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type),
+						(int)-err);
+				*count = ll - pos;
+			} else {
+				if (err != -ENOSPC)
+					ntfs_error(vi->i_sb, "Cannot perform "
+							"write to inode "
+							"0x%lx, attribute "
+							"type 0x%x, because "
+							"extending the "
+							"allocation failed "
+							"(error %ld).",
+							vi->i_ino, (unsigned)
+							le32_to_cpu(ni->type),
+							(long)-err);
+				else
+					ntfs_debug("Cannot perform write to "
+							"inode 0x%lx, "
+							"attribute type 0x%x, "
+							"because there is not "
+							"space left.",
+							vi->i_ino, (unsigned)
+							le32_to_cpu(ni->type));
+				goto out;
+			}
+		}
+	}
+	/*
+	 * If the write starts beyond the initialized size, extend it up to the
+	 * beginning of the write and initialize all non-sparse space between
+	 * the old initialized size and the new one.  This automatically also
+	 * increments the vfs inode->i_size to keep it above or equal to the
+	 * initialized_size.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (pos > ll) {
+		/*
+		 * Wait for ongoing direct i/o to complete before proceeding.
+		 * New direct i/o cannot start as we hold i_mutex.
+		 */
+		inode_dio_wait(vi);
+		err = ntfs_attr_extend_initialized(ni, pos);
+		if (unlikely(err < 0))
+			ntfs_error(vi->i_sb, "Cannot perform write to inode "
+					"0x%lx, attribute type 0x%x, because "
+					"extending the initialized size "
+					"failed (error %d).", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type),
+					(int)-err);
+	}
+out:
+	return err;
 }

 /**
@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
 					goto err_out;
 				}
 			}
-			err = add_to_page_cache_lru(*cached_page, mapping, index,
-					GFP_KERNEL);
+			err = add_to_page_cache_lru(*cached_page, mapping,
+					index, GFP_KERNEL);
 			if (unlikely(err)) {
 				if (err == -EEXIST)
 					continue;
@ -1268,180 +1373,6 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
 	return err;
 }

-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied.  If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
-		unsigned nr_pages, unsigned ofs, const char __user *buf,
-		size_t bytes)
-{
-	struct page **last_page = pages + nr_pages;
-	char *addr;
-	size_t total = 0;
-	unsigned len;
-	int left;
-
-	do {
-		len = PAGE_CACHE_SIZE - ofs;
-		if (len > bytes)
-			len = bytes;
-		addr = kmap_atomic(*pages);
-		left = __copy_from_user_inatomic(addr + ofs, buf, len);
-		kunmap_atomic(addr);
-		if (unlikely(left)) {
-			/* Do it the slow way. */
-			addr = kmap(*pages);
-			left = __copy_from_user(addr + ofs, buf, len);
-			kunmap(*pages);
-			if (unlikely(left))
-				goto err_out;
-		}
-		total += len;
-		bytes -= len;
-		if (!bytes)
-			break;
-		buf += len;
-		ofs = 0;
-	} while (++pages < last_page);
-out:
-	return total;
-err_out:
-	total += len - left;
-	/* Zero the rest of the target like __copy_from_user(). */
-	while (++pages < last_page) {
-		bytes -= len;
-		if (!bytes)
-			break;
-		len = PAGE_CACHE_SIZE;
-		if (len > bytes)
-			len = bytes;
-		zero_user(*pages, 0, len);
-	}
-	goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
-		const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
-	size_t total = 0;
-
-	while (1) {
-		const char __user *buf = iov->iov_base + iov_ofs;
-		unsigned len;
-		size_t left;
-
-		len = iov->iov_len - iov_ofs;
-		if (len > bytes)
-			len = bytes;
-		left = __copy_from_user_inatomic(vaddr, buf, len);
-		total += len;
-		bytes -= len;
-		vaddr += len;
-		if (unlikely(left)) {
-			total -= left;
-			break;
-		}
-		if (!bytes)
-			break;
-		iov++;
-		iov_ofs = 0;
-	}
-	return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
-		size_t *iov_ofsp, size_t bytes)
-{
-	const struct iovec *iov = *iovp;
-	size_t iov_ofs = *iov_ofsp;
-
-	while (bytes) {
-		unsigned len;
-
-		len = iov->iov_len - iov_ofs;
-		if (len > bytes)
-			len = bytes;
-		bytes -= len;
-		iov_ofs += len;
-		if (iov->iov_len == iov_ofs) {
-			iov++;
-			iov_ofs = 0;
-		}
-	}
-	*iovp = iov;
-	*iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic.  This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic.  In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error.  And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
-		unsigned nr_pages, unsigned ofs, const struct iovec **iov,
-		size_t *iov_ofs, size_t bytes)
-{
-	struct page **last_page = pages + nr_pages;
-	char *addr;
-	size_t copied, len, total = 0;
-
-	do {
-		len = PAGE_CACHE_SIZE - ofs;
-		if (len > bytes)
-			len = bytes;
-		addr = kmap_atomic(*pages);
-		copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
-				*iov, *iov_ofs, len);
-		kunmap_atomic(addr);
-		if (unlikely(copied != len)) {
-			/* Do it the slow way. */
-			addr = kmap(*pages);
-			copied = __ntfs_copy_from_user_iovec_inatomic(addr +
-					ofs, *iov, *iov_ofs, len);
-			if (unlikely(copied != len))
-				goto err_out;
-			kunmap(*pages);
-		}
-		total += len;
-		ntfs_set_next_iovec(iov, iov_ofs, len);
-		bytes -= len;
-		if (!bytes)
-			break;
-		ofs = 0;
-	} while (++pages < last_page);
-out:
-	return total;
-err_out:
-	BUG_ON(copied > len);
-	/* Zero the rest of the target like __copy_from_user(). */
-	memset(addr + ofs + copied, 0, len - copied);
-	kunmap(*pages);
-	total += copied;
-	ntfs_set_next_iovec(iov, iov_ofs, copied);
-	while (++pages < last_page) {
-		bytes -= len;
-		if (!bytes)
-			break;
-		len = PAGE_CACHE_SIZE;
-		if (len > bytes)
-			len = bytes;
-		zero_user(*pages, 0, len);
-	}
-	goto out;
-}
-
 static inline void ntfs_flush_dcache_pages(struct page **pages,
 		unsigned nr_pages)
 {
@ -1762,86 +1693,83 @@ static int ntfs_commit_pages_after_write(struct page **pages,
 	return err;
 }

-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied.  If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+		unsigned ofs, struct iov_iter *i, size_t bytes)
 {
-	struct inode *inode = mapping->host;
+	struct page **last_page = pages + nr_pages;
+	size_t total = 0;
+	struct iov_iter data = *i;
+	unsigned len, copied;

-	if (to > inode->i_size) {
-		truncate_pagecache(inode, inode->i_size);
-		ntfs_truncate_vfs(inode);
-	}
+	do {
+		len = PAGE_CACHE_SIZE - ofs;
+		if (len > bytes)
+			len = bytes;
+		copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+				len);
+		total += copied;
+		bytes -= copied;
+		if (!bytes)
+			break;
+		iov_iter_advance(&data, copied);
+		if (copied < len)
+			goto err;
+		ofs = 0;
+	} while (++pages < last_page);
+out:
+	return total;
+err:
+	/* Zero the rest of the target like __copy_from_user(). */
+	len = PAGE_CACHE_SIZE - copied;
+	do {
+		if (len > bytes)
+			len = bytes;
+		zero_user(*pages, copied, len);
+		bytes -= len;
+		copied = 0;
+		len = PAGE_CACHE_SIZE;
+	} while (++pages < last_page);
+	goto out;
 }

 /**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file:	file to write to
+ * @i:		iov_iter with data to write
+ * @pos:	byte offset in file at which to begin writing to
 */
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
-		const struct iovec *iov, unsigned long nr_segs,
-		loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+		loff_t pos)
 {
-	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *vi = mapping->host;
 	ntfs_inode *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 	struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
 	struct page *cached_page = NULL;
-	char __user *buf = NULL;
-	s64 end, ll;
 	VCN last_vcn;
 	LCN lcn;
-	unsigned long flags;
-	size_t bytes, iov_ofs = 0;	/* Offset in the current iovec. */
-	ssize_t status, written;
+	size_t bytes;
+	ssize_t status, written = 0;
 	unsigned nr_pages;
-	int err;

-	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
-			"pos 0x%llx, count 0x%lx.",
-			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
-			(unsigned long long)pos, (unsigned long)count);
-	if (unlikely(!count))
-		return 0;
-	BUG_ON(NInoMstProtected(ni));
-	/*
-	 * If the attribute is not an index root and it is encrypted or
-	 * compressed, we cannot write to it yet.  Note we need to check for
-	 * AT_INDEX_ALLOCATION since this is the type of both directory and
-	 * index inodes.
-	 */
-	if (ni->type != AT_INDEX_ALLOCATION) {
-		/* If file is encrypted, deny access, just like NT4. */
-		if (NInoEncrypted(ni)) {
-			/*
-			 * Reminder for later: Encrypted files are _always_
-			 * non-resident so that the content can always be
-			 * encrypted.
-			 */
-			ntfs_debug("Denying write access to encrypted file.");
-			return -EACCES;
-		}
-		if (NInoCompressed(ni)) {
-			/* Only unnamed $DATA attribute can be compressed. */
-			BUG_ON(ni->type != AT_DATA);
-			BUG_ON(ni->name_len);
-			/*
-			 * Reminder for later: If resident, the data is not
-			 * actually compressed.  Only on the switch to non-
-			 * resident does compression kick in.  This is in
-			 * contrast to encrypted files (see above).
-			 */
-			ntfs_error(vi->i_sb, "Writing to compressed files is "
-					"not implemented yet.  Sorry.");
-			return -EOPNOTSUPP;
-		}
-	}
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+			"0x%llx, count 0x%lx.", vi->i_ino,
+			(unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)pos,
+			(unsigned long)iov_iter_count(i));
 	/*
 	 * If a previous ntfs_truncate() failed, repeat it and abort if it
 	 * fails again.
 	 */
 	if (unlikely(NInoTruncateFailed(ni))) {
+		int err;
+
 		inode_dio_wait(vi);
 		err = ntfs_truncate(vi);
 		if (err || NInoTruncateFailed(ni)) {
@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 			return err;
 		}
 	}
-	/* The first byte after the write. */
-	end = pos + count;
-	/*
-	 * If the write goes beyond the allocated size, extend the allocation
-	 * to cover the whole of the write, rounded up to the nearest cluster.
-	 */
-	read_lock_irqsave(&ni->size_lock, flags);
-	ll = ni->allocated_size;
-	read_unlock_irqrestore(&ni->size_lock, flags);
-	if (end > ll) {
-		/* Extend the allocation without changing the data size. */
-		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
-		if (likely(ll >= 0)) {
-			BUG_ON(pos >= ll);
-			/* If the extension was partial truncate the write. */
-			if (end > ll) {
-				ntfs_debug("Truncating write to inode 0x%lx, "
-						"attribute type 0x%x, because "
-						"the allocation was only "
-						"partially extended.",
-						vi->i_ino, (unsigned)
-						le32_to_cpu(ni->type));
-				end = ll;
-				count = ll - pos;
-			}
-		} else {
-			err = ll;
-			read_lock_irqsave(&ni->size_lock, flags);
-			ll = ni->allocated_size;
-			read_unlock_irqrestore(&ni->size_lock, flags);
-			/* Perform a partial write if possible or fail. */
-			if (pos < ll) {
-				ntfs_debug("Truncating write to inode 0x%lx, "
-						"attribute type 0x%x, because "
-						"extending the allocation "
-						"failed (error code %i).",
-						vi->i_ino, (unsigned)
-						le32_to_cpu(ni->type), err);
-				end = ll;
-				count = ll - pos;
-			} else {
-				ntfs_error(vol->sb, "Cannot perform write to "
-						"inode 0x%lx, attribute type "
-						"0x%x, because extending the "
-						"allocation failed (error "
-						"code %i).", vi->i_ino,
-						(unsigned)
-						le32_to_cpu(ni->type), err);
-				return err;
-			}
-		}
-	}
-	written = 0;
-	/*
-	 * If the write starts beyond the initialized size, extend it up to the
-	 * beginning of the write and initialize all non-sparse space between
-	 * the old initialized size and the new one.  This automatically also
-	 * increments the vfs inode->i_size to keep it above or equal to the
-	 * initialized_size.
-	 */
-	read_lock_irqsave(&ni->size_lock, flags);
-	ll = ni->initialized_size;
-	read_unlock_irqrestore(&ni->size_lock, flags);
-	if (pos > ll) {
-		err = ntfs_attr_extend_initialized(ni, pos);
-		if (err < 0) {
-			ntfs_error(vol->sb, "Cannot perform write to inode "
-					"0x%lx, attribute type 0x%x, because "
-					"extending the initialized size "
-					"failed (error code %i).", vi->i_ino,
-					(unsigned)le32_to_cpu(ni->type), err);
-			status = err;
-			goto err_out;
-		}
-	}
 	/*
 	 * Determine the number of pages per cluster for non-resident
 	 * attributes.
@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 	nr_pages = 1;
 	if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
 		nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
-	/* Finally, perform the actual write. */
 	last_vcn = -1;
-	if (likely(nr_segs == 1))
-		buf = iov->iov_base;
 	do {
 		VCN vcn;
 		pgoff_t idx, start_idx;
@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 						vol->cluster_size_bits, false);
 				up_read(&ni->runlist.lock);
 				if (unlikely(lcn < LCN_HOLE)) {
-					status = -EIO;
 					if (lcn == LCN_ENOMEM)
 						status = -ENOMEM;
-					else
+					else {
+						status = -EIO;
 						ntfs_error(vol->sb, "Cannot "
 							"perform write to "
 							"inode 0x%lx, "
@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 							"is corrupt.",
 							vi->i_ino, (unsigned)
 							le32_to_cpu(ni->type));
+					}
 					break;
 				}
 				if (lcn == LCN_HOLE) {
@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 				}
 			}
 		}
-		if (bytes > count)
-			bytes = count;
+		if (bytes > iov_iter_count(i))
+			bytes = iov_iter_count(i);
+again:
 		/*
 		 * Bring in the user page(s) that we will copy from _first_.
 		 * Otherwise there is a nasty deadlock on copying from the same
@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 		 * pages being swapped out between us bringing them into memory
 		 * and doing the actual copying.
 		 */
-		if (likely(nr_segs == 1))
-			ntfs_fault_in_pages_readable(buf, bytes);
-		else
-			ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+		if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+			status = -EFAULT;
+			break;
+		}
 		/* Get and lock @do_pages starting at index @start_idx. */
 		status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
 				pages, &cached_page);
@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 			status = ntfs_prepare_pages_for_non_resident_write(
 					pages, do_pages, pos, bytes);
 			if (unlikely(status)) {
-				loff_t i_size;
-
 				do {
 					unlock_page(pages[--do_pages]);
 					page_cache_release(pages[do_pages]);
 				} while (do_pages);
-				/*
-				 * The write preparation may have instantiated
-				 * allocated space outside i_size.  Trim this
-				 * off again.  We can ignore any errors in this
-				 * case as we will just be waisting a bit of
-				 * allocated space, which is not a disaster.
-				 */
-				i_size = i_size_read(vi);
-				if (pos + bytes > i_size) {
-					ntfs_write_failed(mapping, pos + bytes);
-				}
 				break;
 			}
 		}
 		u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
-		if (likely(nr_segs == 1)) {
-			copied = ntfs_copy_from_user(pages + u, do_pages - u,
-					ofs, buf, bytes);
-			buf += copied;
-		} else
-			copied = ntfs_copy_from_user_iovec(pages + u,
-					do_pages - u, ofs, &iov, &iov_ofs,
-					bytes);
+		copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+					i, bytes);
 		ntfs_flush_dcache_pages(pages + u, do_pages - u);
-		status = ntfs_commit_pages_after_write(pages, do_pages, pos,
-				bytes);
-		if (likely(!status)) {
-			written += copied;
-			count -= copied;
-			pos += copied;
-			if (unlikely(copied != bytes))
-				status = -EFAULT;
+		status = 0;
+		if (likely(copied == bytes)) {
+			status = ntfs_commit_pages_after_write(pages, do_pages,
+					pos, bytes);
+			if (!status)
+				status = bytes;
 		}
 		do {
 			unlock_page(pages[--do_pages]);
 			page_cache_release(pages[do_pages]);
 		} while (do_pages);
-		if (unlikely(status))
+		if (unlikely(status < 0))
 			break;
-		balance_dirty_pages_ratelimited(mapping);
+		copied = status;
 		cond_resched();
-	} while (count);
-err_out:
-	*ppos = pos;
+		if (unlikely(!copied)) {
+			size_t sc;
+
+			/*
+			 * We failed to copy anything.  Fall back to single
+			 * segment length write.
+			 *
+			 * This is needed to avoid possible livelock in the
+			 * case that all segments in the iov cannot be copied
+			 * at once without a pagefault.
+			 */
+			sc = iov_iter_single_seg_count(i);
+			if (bytes > sc)
+				bytes = sc;
+			goto again;
+		}
+		iov_iter_advance(i, copied);
+		pos += copied;
+		written += copied;
+		balance_dirty_pages_ratelimited(mapping);
+		if (fatal_signal_pending(current)) {
+			status = -EINTR;
+			break;
+		}
+	} while (iov_iter_count(i));
 	if (cached_page)
 		page_cache_release(cached_page);
 	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
@ -2077,59 +1930,56 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 }

 /**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter_nolock - write data to a file
+ * @iocb:	IO state structure (file, offset, etc.)
+ * @from:	iov_iter with data to write
+ *
+ * Basically the same as __generic_file_write_iter() except that it ends
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
 */
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
-		const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
+		struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	loff_t pos;
-	size_t count;		/* after file limit checks */
-	ssize_t written, err;
+	loff_t pos = iocb->ki_pos;
+	ssize_t written = 0;
+	ssize_t err;
+	size_t count = iov_iter_count(from);

-	count = iov_length(iov, nr_segs);
-	pos = *ppos;
-	/* We can write back this queue in page reclaim. */
-	current->backing_dev_info = inode_to_bdi(inode);
-	written = 0;
-	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-	if (err)
-		goto out;
-	if (!count)
-		goto out;
-	err = file_remove_suid(file);
-	if (err)
-		goto out;
-	err = file_update_time(file);
-	if (err)
-		goto out;
-	written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
-			count);
-out:
+	err = ntfs_prepare_file_for_write(file, &pos, &count);
+	if (count && !err) {
+		iov_iter_truncate(from, count);
+		written = ntfs_perform_write(file, from, pos);
+		if (likely(written >= 0))
+			iocb->ki_pos = pos + written;
+	}
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }

 /**
- * ntfs_file_aio_write -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb:	IO state structure
+ * @from:	iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * calling ntfs_file_write_iter_nolock() instead of
+ * __generic_file_write_iter().
 */
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
+	struct inode *vi = file_inode(file);
 	ssize_t ret;

-	BUG_ON(iocb->ki_pos != pos);
-
-	mutex_lock(&inode->i_mutex);
-	ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
-	mutex_unlock(&inode->i_mutex);
+	mutex_lock(&vi->i_mutex);
+	ret = ntfs_file_write_iter_nolock(iocb, from);
+	mutex_unlock(&vi->i_mutex);
 	if (ret > 0) {
-		int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+		ssize_t err;
+
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
@ -2197,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 #endif /* NTFS_RW */

 const struct file_operations ntfs_file_ops = {
-	.llseek		= generic_file_llseek,	 /* Seek inside file. */
-	.read		= new_sync_read,	 /* Read from file. */
-	.read_iter	= generic_file_read_iter, /* Async read from file. */
+	.llseek		= generic_file_llseek,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
 #ifdef NTFS_RW
-	.write		= do_sync_write,	 /* Write to file. */
-	.aio_write	= ntfs_file_aio_write,	 /* Async write to file. */
-	/*.release	= ,*/			 /* Last file is closed.  See
-						    fs/ext2/file.c::
-						    ext2_release_file() for
-						    how to use this to discard
-						    preallocated space for
-						    write opened files. */
-	.fsync		= ntfs_file_fsync,	 /* Sync a file to disk. */
-	/*.aio_fsync	= ,*/			 /* Sync all outstanding async
-						    i/o operations on a
-						    kiocb. */
+	.write		= new_sync_write,
+	.write_iter	= ntfs_file_write_iter,
+	.fsync		= ntfs_file_fsync,
 #endif /* NTFS_RW */
-	/*.ioctl	= ,*/			 /* Perform function on the
-						    mounted filesystem. */
-	.mmap		= generic_file_mmap,	 /* Mmap file. */
-	.open		= ntfs_file_open,	 /* Open file. */
-	.splice_read	= generic_file_splice_read /* Zero-copy data send with
-						    the data source being on
-						    the ntfs partition.  We do
-						    not need to care about the
-						    data destination. */
-	/*.sendpage	= ,*/			 /* Zero-copy data send with
-						    the data destination being
-						    on the ntfs partition.  We
-						    do not need to care about
-						    the data source. */
+	.mmap		= generic_file_mmap,
+	.open		= ntfs_file_open,
+	.splice_read	= generic_file_splice_read,
 };

 const struct inode_operations ntfs_file_inode_ops = {
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@ -28,7 +28,6 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/log2.h>
-#include <linux/aio.h>

 #include "aops.h"
 #include "attrib.h"
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@ -29,6 +29,7 @@
 #include <linux/mpage.h>
 #include <linux/quotaops.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>

 #include <cluster/masklog.h>

--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@ -22,7 +22,7 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H

-#include <linux/aio.h>
+#include <linux/fs.h>

 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 		file->f_path.dentry->d_name.name,
 		(unsigned int)from->nr_segs);	/* GRRRRR */

-	if (iocb->ki_nbytes == 0)
+	if (count == 0)
 		return 0;

 	appending = file->f_flags & O_APPEND ? 1 : 0;
@ -2330,8 +2330,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 	}

 	can_do_direct = direct_io;
-	ret = ocfs2_prepare_inode_for_write(file, ppos,
-					    iocb->ki_nbytes, appending,
+	ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
 					    &can_do_direct, &has_refcount);
 	if (ret < 0) {
 		mlog_errno(ret);
@ -2339,8 +2338,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 	}

 	if (direct_io && !is_sync_kiocb(iocb))
-		unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
-						      *ppos);
+		unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);

 	/*
 	 * We can't complete the direct I/O as requested, fall back to
--- a/fs/open.c
+++ b/fs/open.c
@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
 	uid = make_kuid(current_user_ns(), user);
 	gid = make_kgid(current_user_ns(), group);

+retry_deleg:
 	newattrs.ia_valid =  ATTR_CTIME;
 	if (user != (uid_t) -1) {
 		if (!uid_valid(uid))
@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
 	if (!S_ISDIR(inode->i_mode))
 		newattrs.ia_valid |=
 			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-retry_deleg:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chown(path, uid, gid);
 	if (!error)
@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 		return ERR_PTR(err);
 	if (flags & O_CREAT)
 		return ERR_PTR(-EINVAL);
-	if (!filename && (flags & O_DIRECTORY))
-		if (!dentry->d_inode->i_op->lookup)
-			return ERR_PTR(-ENOTDIR);
 	return do_file_open_root(dentry, mnt, filename, &op);
 }
 EXPORT_SYMBOL(file_open_root);
--- a/fs/pipe.c
+++ b/fs/pipe.c
@ -21,7 +21,6 @@
 #include <linux/audit.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>

 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
--- a/fs/read_write.c
+++ b/fs/read_write.c
@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/aio.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/export.h>
@ -343,13 +342,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)

 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = iov_iter_count(iter);

 	iter->type |= READ;
 	ret = file->f_op->read_iter(&kiocb, iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-
+	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
 	return ret;
@ -366,13 +362,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)

 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = iov_iter_count(iter);

 	iter->type |= WRITE;
 	ret = file->f_op->write_iter(&kiocb, iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-
+	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
 	return ret;
@ -426,11 +419,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp

 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;

 	ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@ -446,12 +437,10 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p

 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 	iov_iter_init(&iter, READ, &iov, 1, len);

 	ret = filp->f_op->read_iter(&kiocb, &iter);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@ -510,11 +499,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof

 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;

 	ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@ -530,12 +517,10 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo

 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 	iov_iter_init(&iter, WRITE, &iov, 1, len);

 	ret = filp->f_op->write_iter(&kiocb, &iter);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@ -710,60 +695,47 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);

-static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
-		unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
+		loff_t *ppos, iter_fn_t fn)
 {
 	struct kiocb kiocb;
-	struct iov_iter iter;
 	ssize_t ret;

 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;

-	iov_iter_init(&iter, rw, iov, nr_segs, len);
-	ret = fn(&kiocb, &iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
+	ret = fn(&kiocb, iter);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }

-static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
-		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter,
+		loff_t *ppos, iov_fn_t fn)
 {
 	struct kiocb kiocb;
 	ssize_t ret;

 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;

-	ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
+	ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }

 /* Do it by hand, with file-ops */
-static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
-		unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+		loff_t *ppos, io_fn_t fn)
 {
-	struct iovec *vector = iov;
 	ssize_t ret = 0;

-	while (nr_segs > 0) {
-		void __user *base;
-		size_t len;
+	while (iov_iter_count(iter)) {
+		struct iovec iovec = iov_iter_iovec(iter);
 		ssize_t nr;

-		base = vector->iov_base;
-		len = vector->iov_len;
-		vector++;
-		nr_segs--;
-
-		nr = fn(filp, base, len, ppos);
+		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);

 		if (nr < 0) {
 			if (!ret)
@ -771,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 			break;
 		}
 		ret += nr;
-		if (nr != len)
+		if (nr != iovec.iov_len)
 			break;
+		iov_iter_advance(iter, nr);
 	}

 	return ret;
@ -863,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	size_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
+	struct iov_iter iter;
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
 	iter_fn_t iter_fn;

-	ret = rw_copy_check_uvector(type, uvector, nr_segs,
-				    ARRAY_SIZE(iovstack), iovstack, &iov);
-	if (ret <= 0)
-		goto out;
+	ret = import_iovec(type, uvector, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;

-	tot_len = ret;
+	tot_len = iov_iter_count(&iter);
+	if (!tot_len)
+		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret < 0)
 		goto out;
@ -891,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	}

 	if (iter_fn)
-		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-						pos, iter_fn);
+		ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
 	else if (fnv)
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
+		ret = do_sync_readv_writev(file, &iter, pos, fnv);
 	else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+		ret = do_loop_readv_writev(file, &iter, pos, fn);

 	if (type != READ)
 		file_end_write(file);

 out:
-	if (iov != iovstack)
-		kfree(iov);
+	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
@ -1043,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
+	struct iov_iter iter;
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
 	iter_fn_t iter_fn;

-	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-					       UIO_FASTIOV, iovstack, &iov);
-	if (ret <= 0)
-		goto out;
+	ret = compat_import_iovec(type, uvector, nr_segs,
+				  UIO_FASTIOV, &iov, &iter);
+	if (ret < 0)
+		return ret;

-	tot_len = ret;
+	tot_len = iov_iter_count(&iter);
+	if (!tot_len)
+		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret < 0)
 		goto out;
@ -1071,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	}

 	if (iter_fn)
-		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-						pos, iter_fn);
+		ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
 	else if (fnv)
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
+		ret = do_sync_readv_writev(file, &iter, pos, fnv);
 	else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+		ret = do_loop_readv_writev(file, &iter, pos, fn);

 	if (type != READ)
 		file_end_write(file);

 out:
-	if (iov != iovstack)
-		kfree(iov);
+	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@ -18,7 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/quotaops.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
--- a/fs/splice.c
+++ b/fs/splice.c
@ -32,7 +32,6 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include "internal.h"

 /*
@ -1534,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
-	ssize_t count;

 	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;

-	ret = rw_copy_check_uvector(READ, uiov, nr_segs,
-				    ARRAY_SIZE(iovstack), iovstack, &iov);
-	if (ret <= 0)
-		goto out;
-
-	count = ret;
-	iov_iter_init(&iter, READ, iov, nr_segs, count);
+	ret = import_iovec(READ, uiov, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;

+	sd.total_len = iov_iter_count(&iter);
 	sd.len = 0;
-	sd.total_len = count;
 	sd.flags = flags;
 	sd.u.data = &iter;
 	sd.pos = 0;

-	pipe_lock(pipe);
-	ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
-	pipe_unlock(pipe);
-
-out:
-	if (iov != iovstack)
-		kfree(iov);
+	if (sd.total_len) {
+		pipe_lock(pipe);
+		ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
+		pipe_unlock(pipe);
+	}

+	kfree(iov);
 	return ret;
 }

--- a/fs/stat.c
+++ b/fs/stat.c
@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
 {
 	int retval;

-	retval = security_inode_getattr(path->mnt, path->dentry);
+	retval = security_inode_getattr(path);
 	if (retval)
 		return retval;
 	return vfs_getattr_nosec(path, stat);
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@ -50,7 +50,6 @@
 */

 #include "ubifs.h"
-#include <linux/aio.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/slab.h>
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@ -34,7 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 #include "udf_i.h"
 #include "udf_sb.h"
@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	int err, pos;
-	size_t count = iocb->ki_nbytes;
+	size_t count = iov_iter_count(from);
 	struct udf_inode_info *iinfo = UDF_I(inode);

 	mutex_lock(&inode->i_mutex);
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 #include "udf_i.h"
 #include "udf_sb.h"
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@ -31,7 +31,6 @@
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/pagevec.h>
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@ -38,7 +38,6 @@
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"

-#include <linux/aio.h>
 #include <linux/dcache.h>
 #include <linux/falloc.h>
 #include <linux/pagevec.h>
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@ -1,86 +1,23 @@
 #ifndef __LINUX__AIO_H
 #define __LINUX__AIO_H

-#include <linux/list.h>
-#include <linux/workqueue.h>
 #include <linux/aio_abi.h>
-#include <linux/uio.h>
-#include <linux/rcupdate.h>
-
-#include <linux/atomic.h>

 struct kioctx;
 struct kiocb;
+struct mm_struct;

 #define KIOCB_KEY		0

-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED		((void *) (~0ULL))
-
 typedef int (kiocb_cancel_fn)(struct kiocb *);

-struct kiocb {
-	struct file		*ki_filp;
-	struct kioctx		*ki_ctx;	/* NULL for sync ops */
-	kiocb_cancel_fn		*ki_cancel;
-	void			*private;
-
-	union {
-		void __user		*user;
-		struct task_struct	*tsk;
-	} ki_obj;
-
-	__u64			ki_user_data;	/* user's data for completion */
-	loff_t			ki_pos;
-	size_t			ki_nbytes;	/* copy of iocb->aio_nbytes */
-
-	struct list_head	ki_list;	/* the aio core uses this
-						 * for cancellation */
-
-	/*
-	 * If the aio_resfd field of the userspace iocb is not zero,
-	 * this is the underlying eventfd context to deliver events to.
-	 */
-	struct eventfd_ctx	*ki_eventfd;
-};
-
-static inline bool is_sync_kiocb(struct kiocb *kiocb)
-{
-	return kiocb->ki_ctx == NULL;
-}
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-	*kiocb = (struct kiocb) {
-			.ki_ctx = NULL,
-			.ki_filp = filp,
-			.ki_obj.tsk = current,
-		};
-}
-
 /* prototypes */
 #ifdef CONFIG_AIO
-extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
-extern void aio_complete(struct kiocb *iocb, long res, long res2);
-struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
 			 struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
-static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
-static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
-struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
 				struct iocb __user * __user *iocbpp,
@ -89,11 +26,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req,
 				       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */

-static inline struct kiocb *list_kiocb(struct list_head *h)
-{
-	return list_entry(h, struct kiocb, ki_list);
-}
-
 /* for sysctl: */
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@ -314,6 +314,28 @@ struct page;
 struct address_space;
 struct writeback_control;

+#define IOCB_EVENTFD		(1 << 0)
+
+struct kiocb {
+	struct file		*ki_filp;
+	loff_t			ki_pos;
+	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+	void			*private;
+	int			ki_flags;
+};
+
+static inline bool is_sync_kiocb(struct kiocb *kiocb)
+{
+	return kiocb->ki_complete == NULL;
+}
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+	*kiocb = (struct kiocb) {
+		.ki_filp = filp,
+	};
+}
+
 /*
 * "descriptor" for what we're up to with a read.
 * This allows us to use the same read code yet
@ -2145,7 +2167,7 @@ struct filename {
 	const __user char	*uptr;	/* original userland pointer */
 	struct audit_names	*aname;
 	int			refcnt;
-	bool			separate; /* should "name" be freed? */
+	const char		iname[];
 };

 extern long vfs_truncate(struct path *, loff_t);
--- a/include/linux/security.h
+++ b/include/linux/security.h
@ -1556,7 +1556,7 @@ struct security_operations {
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
 	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
-	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
+	int (*inode_getattr) (const struct path *path);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
 			       const void *value, size_t size, int flags);
 	void (*inode_post_setxattr) (struct dentry *dentry, const char *name,
@ -1843,7 +1843,7 @@ int security_inode_readlink(struct dentry *dentry);
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
-int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
+int security_inode_getattr(const struct path *path);
 int security_inode_setxattr(struct dentry *dentry, const char *name,
 			    const void *value, size_t size, int flags);
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
@ -2259,8 +2259,7 @@ static inline int security_inode_setattr(struct dentry *dentry,
 	return 0;
 }

-static inline int security_inode_getattr(struct vfsmount *mnt,
-					  struct dentry *dentry)
+static inline int security_inode_getattr(const struct path *path)
 {
 	return 0;
 }
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@ -76,6 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
@ -139,4 +140,18 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);

+int import_iovec(int type, const struct iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i);
+
+#ifdef CONFIG_COMPAT
+struct compat_iovec;
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i);
+#endif
+
+int import_single_range(int type, void __user *buf, size_t len,
+		 struct iovec *iov, struct iov_iter *i);
+
 #endif
--- a/include/net/sock.h
+++ b/include/net/sock.h
@ -57,7 +57,6 @@
 #include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/static_key.h>
-#include <linux/aio.h>
 #include <linux/sched.h>

 #include <linux/filter.h>
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@ -32,7 +32,6 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/aio.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@ -46,6 +45,7 @@
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
+#include <linux/uio.h>

 #include <asm/uaccess.h>

@ -521,7 +521,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 	int i;
 	int level = default_message_loglevel;
 	int facility = 1;	/* LOG_USER */
-	size_t len = iocb->ki_nbytes;
+	size_t len = iov_iter_count(from);
 	ssize_t ret = len;

 	if (len > LOG_LINE_MAX)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@ -19,6 +19,7 @@
 */

 #include <linux/module.h>
+#include <linux/aio.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@ -317,6 +317,32 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);

+/*
+ * Fault in one or more iovecs of the given iov_iter, to a maximum length of
+ * bytes.  For each iovec, fault in each page that constitutes the iovec.
+ *
+ * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
+ * because it is an invalid address).
+ */
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+{
+	size_t skip = i->iov_offset;
+	const struct iovec *iov;
+	int err;
+	struct iovec v;
+
+	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+		iterate_iovec(i, bytes, v, iov, skip, ({
+			err = fault_in_multipages_readable(v.iov_base,
+					v.iov_len);
+			if (unlikely(err))
+			return err;
+		0;}))
+	}
+	return 0;
+}
+EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
@ -766,3 +792,60 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 				   flags);
 }
 EXPORT_SYMBOL(dup_iter);
+
+int import_iovec(int type, const struct iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i)
+{
+	ssize_t n;
+	struct iovec *p;
+	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+				  *iov, &p);
+	if (n < 0) {
+		if (p != *iov)
+			kfree(p);
+		*iov = NULL;
+		return n;
+	}
+	iov_iter_init(i, type, p, nr_segs, n);
+	*iov = p == *iov ? NULL : p;
+	return 0;
+}
+EXPORT_SYMBOL(import_iovec);
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i)
+{
+	ssize_t n;
+	struct iovec *p;
+	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+				  *iov, &p);
+	if (n < 0) {
+		if (p != *iov)
+			kfree(p);
+		*iov = NULL;
+		return n;
+	}
+	iov_iter_init(i, type, p, nr_segs, n);
+	*iov = p == *iov ? NULL : p;
+	return 0;
+}
+#endif
+
+int import_single_range(int rw, void __user *buf, size_t len,
+		 struct iovec *iov, struct iov_iter *i)
+{
+	if (len > MAX_RW_COUNT)
+		len = MAX_RW_COUNT;
+	if (unlikely(!access_ok(!rw, buf, len)))
+		return -EFAULT;
+
+	iov->iov_base = buf;
+	iov->iov_len = len;
+	iov_iter_init(i, rw, iov, 1, len);
+	return 0;
+}
--- a/mm/filemap.c
+++ b/mm/filemap.c
@ -13,7 +13,6 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
-#include <linux/aio.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
 #include <linux/gfp.h>
--- a/mm/page_io.c
+++ b/mm/page_io.c
@ -20,8 +20,8 @@
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/frontswap.h>
-#include <linux/aio.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 #include <asm/pgtable.h>

 static struct bio *get_swap_bio(gfp_t gfp_flags,
@ -274,7 +274,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
-		kiocb.ki_nbytes = PAGE_SIZE;

 		set_page_writeback(page);
 		unlock_page(page);
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@ -257,22 +257,18 @@ static ssize_t process_vm_rw(pid_t pid,
 	struct iovec *iov_r = iovstack_r;
 	struct iov_iter iter;
 	ssize_t rc;
+	int dir = vm_write ? WRITE : READ;

 	if (flags != 0)
 		return -EINVAL;

 	/* Check iovecs */
-	if (vm_write)
-		rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV,
-					   iovstack_l, &iov_l);
-	else
-		rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV,
-					   iovstack_l, &iov_l);
-	if (rc <= 0)
+	rc = import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+	if (rc < 0)
+		return rc;
+	if (!iov_iter_count(&iter))
 		goto free_iovecs;

-	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
-
 	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
 				   iovstack_r, &iov_r);
 	if (rc <= 0)
@ -283,8 +279,7 @@ static ssize_t process_vm_rw(pid_t pid,
 free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
-	if (iov_l != iovstack_l)
-		kfree(iov_l);
+	kfree(iov_l);

 	return rc;
 }
@ -320,21 +315,16 @@ compat_process_vm_rw(compat_pid_t pid,
 	struct iovec *iov_r = iovstack_r;
 	struct iov_iter iter;
 	ssize_t rc = -EFAULT;
+	int dir = vm_write ? WRITE : READ;

 	if (flags != 0)
 		return -EINVAL;

-	if (vm_write)
-		rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt,
-						  UIO_FASTIOV, iovstack_l,
-						  &iov_l);
-	else
-		rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt,
-						  UIO_FASTIOV, iovstack_l,
-						  &iov_l);
-	if (rc <= 0)
+	rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+	if (rc < 0)
+		return rc;
+	if (!iov_iter_count(&iter))
 		goto free_iovecs;
-	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
 					  UIO_FASTIOV, iovstack_r,
 					  &iov_r);
@ -346,8 +336,7 @@ compat_process_vm_rw(compat_pid_t pid,
 free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
-	if (iov_l != iovstack_l)
-		kfree(iov_l);
+	kfree(iov_l);
 	return rc;
 }

--- a/mm/shmem.c
+++ b/mm/shmem.c
@ -31,7 +31,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>

 static struct vfsmount *shm_mnt;

--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@ -46,7 +46,6 @@
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
-#include <linux/aio.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
--- a/net/socket.c
+++ b/net/socket.c
@ -633,8 +633,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 	init_sync_kiocb(&iocb, NULL);
 	ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
 		      __sock_sendmsg(&iocb, sock, msg, size);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }

@ -766,8 +765,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,

 	init_sync_kiocb(&iocb, NULL);
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 EXPORT_SYMBOL(sock_recvmsg);
@ -780,8 +778,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,

 	init_sync_kiocb(&iocb, NULL);
 	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }

@ -858,11 +855,11 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (iocb->ki_pos != 0)
 		return -ESPIPE;

-	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
+	if (!iov_iter_count(to))	/* Match SYS5 behaviour */
 		return 0;

 	res = __sock_recvmsg(iocb, sock, &msg,
-			     iocb->ki_nbytes, msg.msg_flags);
+			     iov_iter_count(to), msg.msg_flags);
 	*to = msg.msg_iter;
 	return res;
 }
@ -883,7 +880,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (sock->type == SOCK_SEQPACKET)
 		msg.msg_flags |= MSG_EOR;

-	res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+	res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from));
 	*from = msg.msg_iter;
 	return res;
 }
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@ -364,12 +364,12 @@ static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid)
 	return common_perm(OP_CHOWN, path, AA_MAY_CHOWN, &cond);
 }

-static int apparmor_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int apparmor_inode_getattr(const struct path *path)
 {
-	if (!mediated_filesystem(dentry))
+	if (!mediated_filesystem(path->dentry))
 		return 0;

-	return common_perm_mnt_dentry(OP_GETATTR, mnt, dentry,
+	return common_perm_mnt_dentry(OP_GETATTR, path->mnt, path->dentry,
 				      AA_MAY_META_READ);
 }

--- a/security/capability.c
+++ b/security/capability.c
@ -225,7 +225,7 @@ static int cap_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 	return 0;
 }

-static int cap_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int cap_inode_getattr(const struct path *path)
 {
 	return 0;
 }
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@ -31,30 +31,21 @@ static long compat_keyctl_instantiate_key_iov(
 	key_serial_t ringid)
 {
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	struct iov_iter from;
 	long ret;

-	if (!_payload_iov || !ioc)
-		goto no_payload;
+	if (!_payload_iov)
+		ioc = 0;

-	ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-					   ARRAY_SIZE(iovstack),
-					   iovstack, &iov);
+	ret = compat_import_iovec(WRITE, _payload_iov, ioc,
+				  ARRAY_SIZE(iovstack), &iov,
+				  &from);
 	if (ret < 0)
-		goto err;
-	if (ret == 0)
-		goto no_payload_free;
+		return ret;

-	ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-err:
-	if (iov != iovstack)
-		kfree(iov);
+	ret = keyctl_instantiate_key_common(id, &from, ringid);
+	kfree(iov);
 	return ret;
-
-no_payload_free:
-	if (iov != iovstack)
-		kfree(iov);
-no_payload:
-	return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
 }

 /*
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@ -243,9 +243,10 @@ extern long keyctl_instantiate_key_iov(key_serial_t,
 				       unsigned, key_serial_t);
 extern long keyctl_invalidate_key(key_serial_t);

+struct iov_iter;
 extern long keyctl_instantiate_key_common(key_serial_t,
-					  const struct iovec *,
-					  unsigned, size_t, key_serial_t);
+					  struct iov_iter *,
+					  key_serial_t);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 extern long keyctl_get_persistent(uid_t, key_serial_t);
 extern unsigned persistent_keyring_expiry;
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@ -997,21 +997,6 @@ static int keyctl_change_reqkey_auth(struct key *key)
 	return commit_creds(new);
 }

-/*
- * Copy the iovec data from userspace
- */
-static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
-				 unsigned ioc)
-{
-	for (; ioc > 0; ioc--) {
-		if (copy_from_user(buffer, iov->iov_base, iov->iov_len) != 0)
-			return -EFAULT;
-		buffer += iov->iov_len;
-		iov++;
-	}
-	return 0;
-}
-
 /*
 * Instantiate a key with the specified payload and link the key into the
 * destination keyring if one is given.
@ -1022,20 +1007,21 @@ static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
 * If successful, 0 will be returned.
 */
 long keyctl_instantiate_key_common(key_serial_t id,
-				   const struct iovec *payload_iov,
-				   unsigned ioc,
-				   size_t plen,
+				   struct iov_iter *from,
 				   key_serial_t ringid)
 {
 	const struct cred *cred = current_cred();
 	struct request_key_auth *rka;
 	struct key *instkey, *dest_keyring;
+	size_t plen = from ? iov_iter_count(from) : 0;
 	void *payload;
 	long ret;
-	bool vm = false;

 	kenter("%d,,%zu,%d", id, plen, ringid);

+	if (!plen)
+		from = NULL;
+
 	ret = -EINVAL;
 	if (plen > 1024 * 1024 - 1)
 		goto error;
@ -1054,20 +1040,19 @@ long keyctl_instantiate_key_common(key_serial_t id,
 	/* pull the payload in if one was supplied */
 	payload = NULL;

-	if (payload_iov) {
+	if (from) {
 		ret = -ENOMEM;
 		payload = kmalloc(plen, GFP_KERNEL);
 		if (!payload) {
 			if (plen <= PAGE_SIZE)
 				goto error;
-			vm = true;
 			payload = vmalloc(plen);
 			if (!payload)
 				goto error;
 		}

-		ret = copy_from_user_iovec(payload, payload_iov, ioc);
-		if (ret < 0)
+		ret = -EFAULT;
+		if (copy_from_iter(payload, plen, from) != plen)
 			goto error2;
 	}

@ -1089,10 +1074,7 @@ long keyctl_instantiate_key_common(key_serial_t id,
 		keyctl_change_reqkey_auth(NULL);

 error2:
-	if (!vm)
-		kfree(payload);
-	else
-		vfree(payload);
+	kvfree(payload);
 error:
 	return ret;
 }
@ -1112,15 +1094,19 @@ long keyctl_instantiate_key(key_serial_t id,
 			    key_serial_t ringid)
 {
 	if (_payload && plen) {
-		struct iovec iov[1] = {
-			[0].iov_base = (void __user *)_payload,
-			[0].iov_len  = plen
-		};
+		struct iovec iov;
+		struct iov_iter from;
+		int ret;

-		return keyctl_instantiate_key_common(id, iov, 1, plen, ringid);
+		ret = import_single_range(WRITE, (void __user *)_payload, plen,
+					  &iov, &from);
+		if (unlikely(ret))
+			return ret;
+
+		return keyctl_instantiate_key_common(id, &from, ringid);
 	}

-	return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+	return keyctl_instantiate_key_common(id, NULL, ringid);
 }

 /*
@ -1138,29 +1124,19 @@ long keyctl_instantiate_key_iov(key_serial_t id,
 				key_serial_t ringid)
 {
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	struct iov_iter from;
 	long ret;

-	if (!_payload_iov || !ioc)
-		goto no_payload;
+	if (!_payload_iov)
+		ioc = 0;

-	ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-				    ARRAY_SIZE(iovstack), iovstack, &iov);
+	ret = import_iovec(WRITE, _payload_iov, ioc,
+				    ARRAY_SIZE(iovstack), &iov, &from);
 	if (ret < 0)
-		goto err;
-	if (ret == 0)
-		goto no_payload_free;
-
-	ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-err:
-	if (iov != iovstack)
-		kfree(iov);
+		return ret;
+	ret = keyctl_instantiate_key_common(id, &from, ringid);
+	kfree(iov);
 	return ret;
-
-no_payload_free:
-	if (iov != iovstack)
-		kfree(iov);
-no_payload:
-	return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
 }

 /*
--- a/security/security.c
+++ b/security/security.c
@ -608,11 +608,11 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
 }
 EXPORT_SYMBOL_GPL(security_inode_setattr);

-int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+int security_inode_getattr(const struct path *path)
 {
-	if (unlikely(IS_PRIVATE(dentry->d_inode)))
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
 		return 0;
-	return security_ops->inode_getattr(mnt, dentry);
+	return security_ops->inode_getattr(path);
 }

 int security_inode_setxattr(struct dentry *dentry, const char *name,
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@ -1623,7 +1623,7 @@ static inline int dentry_has_perm(const struct cred *cred,
   the path to help the auditing code to more easily generate the
   pathname if needed. */
 static inline int path_has_perm(const struct cred *cred,
-				struct path *path,
+				const struct path *path,
 				u32 av)
 {
 	struct inode *inode = path->dentry->d_inode;
@ -2954,15 +2954,9 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 	return dentry_has_perm(cred, dentry, av);
 }

-static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int selinux_inode_getattr(const struct path *path)
 {
-	const struct cred *cred = current_cred();
-	struct path path;
-
-	path.dentry = dentry;
-	path.mnt = mnt;
-
-	return path_has_perm(cred, &path, FILE__GETATTR);
+	return path_has_perm(current_cred(), path, FILE__GETATTR);
 }

 static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@ -1034,19 +1034,16 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 *
 * Returns 0 if access is permitted, an error code otherwise
 */
-static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int smack_inode_getattr(const struct path *path)
 {
 	struct smk_audit_info ad;
-	struct path path;
+	struct inode *inode = path->dentry->d_inode;
 	int rc;

-	path.dentry = dentry;
-	path.mnt = mnt;
-
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
-	smk_ad_setfield_u_fs_path(&ad, path);
-	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
-	rc = smk_bu_inode(dentry->d_inode, MAY_READ, rc);
+	smk_ad_setfield_u_fs_path(&ad, *path);
+	rc = smk_curacc(smk_of_inode(inode), MAY_READ, &ad);
+	rc = smk_bu_inode(inode, MAY_READ, rc);
 	return rc;
 }

--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@ -945,7 +945,7 @@ char *tomoyo_encode2(const char *str, int str_len);
 char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
 		      va_list args);
 char *tomoyo_read_token(struct tomoyo_acl_param *param);
-char *tomoyo_realpath_from_path(struct path *path);
+char *tomoyo_realpath_from_path(const struct path *path);
 char *tomoyo_realpath_nofollow(const char *pathname);
 const char *tomoyo_get_exe(void);
 const char *tomoyo_yesno(const unsigned int value);
@ -978,7 +978,7 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1,
 		      struct path *path2);
 int tomoyo_path_number_perm(const u8 operation, struct path *path,
 			    unsigned long number);
-int tomoyo_path_perm(const u8 operation, struct path *path,
+int tomoyo_path_perm(const u8 operation, const struct path *path,
 		     const char *target);
 unsigned int tomoyo_poll_control(struct file *file, poll_table *wait);
 unsigned int tomoyo_poll_log(struct file *file, poll_table *wait);
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@ -145,7 +145,7 @@ static void tomoyo_add_slash(struct tomoyo_path_info *buf)
 *
 * Returns true on success, false otherwise.
 */
-static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path)
+static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, const struct path *path)
 {
 	buf->name = tomoyo_realpath_from_path(path);
 	if (buf->name) {
@ -782,7 +782,7 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
 *
 * Returns 0 on success, negative value otherwise.
 */
-int tomoyo_path_perm(const u8 operation, struct path *path, const char *target)
+int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target)
 {
 	struct tomoyo_request_info r;
 	struct tomoyo_obj_info obj = {
--- a/Show More
+++ b/Show More