linux_dsm_epyc7002/drivers/block/loop.h
Ming Lei bc07c10a36 block: loop: support DIO & AIO
There are at least 3 advantages to use direct I/O and AIO on
read/write loop's backing file:

1) double cache can be avoided, then memory usage gets
decreased a lot

2) not like user space direct I/O, there isn't cost of
pinning pages

3) avoid context switch for obtaining good throughput
- in buffered file read, random I/O top throughput is often obtained
only if they are submitted concurrently from lots of tasks; but for
sequential I/O, most of times they can be hit from page cache, so
concurrent submissions often introduce unnecessary context switch
and can't improve throughput much. There was such discussion[1]
to use non-blocking I/O to improve the problem for application.
- with direct I/O and AIO, concurrent submissions can be
avoided and random read throughput can't be affected meantime

xfstests(-g auto, ext4) is basically passed when running with
direct I/O(aio), one exception is generic/232, but it failed in
loop buffered I/O(4.2-rc6-next-20150814) too.

Follows the fio test result for performance purpose:
	4 jobs fio test inside ext4 file system over loop block

1) How to run
	- KVM: 4 VCPUs, 2G RAM
	- linux kernel: 4.2-rc6-next-20150814(base) with the patchset
	- the loop block is over one image on SSD.
	- linux psync, 4 jobs, size 1500M, ext4 over loop block
	- test result: IOPS from fio output

2) Throughput(IOPS) becomes a bit better with direct I/O(aio)
        -------------------------------------------------------------
        test cases          |randread   |read   |randwrite  |write  |
        -------------------------------------------------------------
        base                |8015       |113811 |67442      |106978
        -------------------------------------------------------------
        base+loop aio       |8136       |125040 |67811      |111376
        -------------------------------------------------------------

- somehow, it should be caused by more page cache avaiable for
application or one extra page copy is avoided in case of direct I/O

3) context switch
        - context switch decreased by ~50% with loop direct I/O(aio)
	compared with loop buffered I/O(4.2-rc6-next-20150814)

4) memory usage from /proc/meminfo
        -------------------------------------------------------------
                                   | Buffers       | Cached
        -------------------------------------------------------------
        base                       | > 760MB       | ~950MB
        -------------------------------------------------------------
        base+loop direct I/O(aio)  | < 5MB         | ~1.6GB
        -------------------------------------------------------------

- so there are much more page caches available for application with
direct I/O

[1] https://lwn.net/Articles/612483/

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
2015-09-23 11:01:16 -06:00

94 lines
2.3 KiB
C

/*
* loop.h
*
* Written by Theodore Ts'o, 3/29/93.
*
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
* permitted under the GNU General Public License.
*/
#ifndef _LINUX_LOOP_H
#define _LINUX_LOOP_H
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/kthread.h>
#include <uapi/linux/loop.h>
/* Possible states of device */
enum {
Lo_unbound,
Lo_bound,
Lo_rundown,
};
struct loop_func_table;
struct loop_device {
int lo_number;
atomic_t lo_refcnt;
loff_t lo_offset;
loff_t lo_sizelimit;
int lo_flags;
int (*transfer)(struct loop_device *, int cmd,
struct page *raw_page, unsigned raw_off,
struct page *loop_page, unsigned loop_off,
int size, sector_t real_block);
char lo_file_name[LO_NAME_SIZE];
char lo_crypt_name[LO_NAME_SIZE];
char lo_encrypt_key[LO_KEY_SIZE];
int lo_encrypt_key_size;
struct loop_func_table *lo_encryption;
__u32 lo_init[2];
kuid_t lo_key_owner; /* Who set the key */
int (*ioctl)(struct loop_device *, int cmd,
unsigned long arg);
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
void *key_data;
gfp_t old_gfp_mask;
spinlock_t lo_lock;
int lo_state;
struct mutex lo_ctl_mutex;
struct kthread_worker worker;
struct task_struct *worker_task;
bool use_dio;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
};
struct loop_cmd {
struct kthread_work work;
struct request *rq;
struct list_head list;
bool use_aio; /* use AIO interface to handle I/O */
struct kiocb iocb;
};
/* Support for loadable transfer modules */
struct loop_func_table {
int number; /* filter type */
int (*transfer)(struct loop_device *lo, int cmd,
struct page *raw_page, unsigned raw_off,
struct page *loop_page, unsigned loop_off,
int size, sector_t real_block);
int (*init)(struct loop_device *, const struct loop_info64 *);
/* release is called from loop_unregister_transfer or clr_fd */
int (*release)(struct loop_device *);
int (*ioctl)(struct loop_device *, int cmd, unsigned long arg);
struct module *owner;
};
int loop_register_transfer(struct loop_func_table *funcs);
int loop_unregister_transfer(int number);
#endif