linux_dsm_epyc7002/include/linux/elevator.h
Tahsin Erdogan 72ef799b3f block: do not merge requests without consulting with io scheduler
Before merging a bio into an existing request, io scheduler is called to
get its approval first. However, the requests that come from a plug
flush may get merged by block layer without consulting with io
scheduler.

In case of CFQ, this can cause fairness problems. For instance, if a
request gets merged into a low weight cgroup's request, high weight cgroup
now will depend on low weight cgroup to get scheduled. If high weigt cgroup
needs that io request to complete before submitting more requests, then it
will also lose its timeslice.

Following script demonstrates the problem. Group g1 has a low weight, g2
and g3 have equal high weights but g2's requests are adjacent to g1's
requests so they are subject to merging. Due to these merges, g2 gets
poor disk time allocation.

cat > cfq-merge-repro.sh << "EOF"
#!/bin/bash
set -e

IO_ROOT=/mnt-cgroup/io

mkdir -p $IO_ROOT

if ! mount | grep -qw $IO_ROOT; then
  mount -t cgroup none -oblkio $IO_ROOT
fi

cd $IO_ROOT

for i in g1 g2 g3; do
  if [ -d $i ]; then
    rmdir $i
  fi
done

mkdir g1 && echo 10 > g1/blkio.weight
mkdir g2 && echo 495 > g2/blkio.weight
mkdir g3 && echo 495 > g3/blkio.weight

RUNTIME=10

(echo $BASHPID > g1/cgroup.procs &&
 fio --readonly --name name1 --filename /dev/sdb \
     --rw read --size 64k --bs 64k --time_based \
     --runtime=$RUNTIME --offset=0k &> /dev/null)&

(echo $BASHPID > g2/cgroup.procs &&
 fio --readonly --name name1 --filename /dev/sdb \
     --rw read --size 64k --bs 64k --time_based \
     --runtime=$RUNTIME --offset=64k &> /dev/null)&

(echo $BASHPID > g3/cgroup.procs &&
 fio --readonly --name name1 --filename /dev/sdb \
     --rw read --size 64k --bs 64k --time_based \
     --runtime=$RUNTIME --offset=256k &> /dev/null)&

sleep $((RUNTIME+1))

for i in g1 g2 g3; do
  echo ---- $i ----
  cat $i/blkio.time
done

EOF
# ./cfq-merge-repro.sh
---- g1 ----
8:16 162
---- g2 ----
8:16 165
---- g3 ----
8:16 686

After applying the patch:

# ./cfq-merge-repro.sh
---- g1 ----
8:16 90
---- g2 ----
8:16 445
---- g3 ----
8:16 471

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2016-07-20 21:35:12 -06:00

220 lines
7.1 KiB
C

#ifndef _LINUX_ELEVATOR_H
#define _LINUX_ELEVATOR_H
#include <linux/percpu.h>
#include <linux/hashtable.h>
#ifdef CONFIG_BLOCK
struct io_cq;
struct elevator_type;
typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
struct bio *);
typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
struct request *, struct bio *);
typedef int (elevator_allow_rq_merge_fn) (struct request_queue *,
struct request *, struct request *);
typedef void (elevator_bio_merged_fn) (struct request_queue *,
struct request *, struct bio *);
typedef int (elevator_dispatch_fn) (struct request_queue *, int);
typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
typedef void (elevator_init_icq_fn) (struct io_cq *);
typedef void (elevator_exit_icq_fn) (struct io_cq *);
typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
struct bio *, gfp_t);
typedef void (elevator_put_req_fn) (struct request *);
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
typedef int (elevator_init_fn) (struct request_queue *,
struct elevator_type *e);
typedef void (elevator_exit_fn) (struct elevator_queue *);
typedef void (elevator_registered_fn) (struct request_queue *);
struct elevator_ops
{
elevator_merge_fn *elevator_merge_fn;
elevator_merged_fn *elevator_merged_fn;
elevator_merge_req_fn *elevator_merge_req_fn;
elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn;
elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn;
elevator_bio_merged_fn *elevator_bio_merged_fn;
elevator_dispatch_fn *elevator_dispatch_fn;
elevator_add_req_fn *elevator_add_req_fn;
elevator_activate_req_fn *elevator_activate_req_fn;
elevator_deactivate_req_fn *elevator_deactivate_req_fn;
elevator_completed_req_fn *elevator_completed_req_fn;
elevator_request_list_fn *elevator_former_req_fn;
elevator_request_list_fn *elevator_latter_req_fn;
elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */
elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */
elevator_set_req_fn *elevator_set_req_fn;
elevator_put_req_fn *elevator_put_req_fn;
elevator_may_queue_fn *elevator_may_queue_fn;
elevator_init_fn *elevator_init_fn;
elevator_exit_fn *elevator_exit_fn;
elevator_registered_fn *elevator_registered_fn;
};
#define ELV_NAME_MAX (16)
struct elv_fs_entry {
struct attribute attr;
ssize_t (*show)(struct elevator_queue *, char *);
ssize_t (*store)(struct elevator_queue *, const char *, size_t);
};
/*
* identifies an elevator type, such as AS or deadline
*/
struct elevator_type
{
/* managed by elevator core */
struct kmem_cache *icq_cache;
/* fields provided by elevator implementation */
struct elevator_ops ops;
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
struct module *elevator_owner;
/* managed by elevator core */
char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
struct list_head list;
};
#define ELV_HASH_BITS 6
/*
* each queue has an elevator_queue associated with it
*/
struct elevator_queue
{
struct elevator_type *type;
void *elevator_data;
struct kobject kobj;
struct mutex sysfs_lock;
unsigned int registered:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
/*
* block elevator interface
*/
extern void elv_dispatch_sort(struct request_queue *, struct request *);
extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
extern void elv_add_request(struct request_queue *, struct request *, int);
extern void __elv_add_request(struct request_queue *, struct request *, int);
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *, int);
extern void elv_bio_merged(struct request_queue *q, struct request *,
struct bio *);
extern void elv_requeue_request(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *);
extern int elv_register_queue(struct request_queue *q);
extern void elv_unregister_queue(struct request_queue *q);
extern int elv_may_queue(struct request_queue *, int, int);
extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *q, struct request *rq,
struct bio *bio, gfp_t gfp_mask);
extern void elv_put_request(struct request_queue *, struct request *);
extern void elv_drain_elevator(struct request_queue *);
/*
* io scheduler registration
*/
extern void __init load_default_elevator_module(void);
extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
/*
* io scheduler sysfs switching
*/
extern ssize_t elv_iosched_show(struct request_queue *, char *);
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
extern int elevator_init(struct request_queue *, char *);
extern void elevator_exit(struct elevator_queue *);
extern int elevator_change(struct request_queue *, const char *);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
struct elevator_type *);
/*
* Helper functions.
*/
extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
/*
* rb support functions.
*/
extern void elv_rb_add(struct rb_root *, struct request *);
extern void elv_rb_del(struct rb_root *, struct request *);
extern struct request *elv_rb_find(struct rb_root *, sector_t);
/*
* Return values from elevator merger
*/
#define ELEVATOR_NO_MERGE 0
#define ELEVATOR_FRONT_MERGE 1
#define ELEVATOR_BACK_MERGE 2
/*
* Insertion selection
*/
#define ELEVATOR_INSERT_FRONT 1
#define ELEVATOR_INSERT_BACK 2
#define ELEVATOR_INSERT_SORT 3
#define ELEVATOR_INSERT_REQUEUE 4
#define ELEVATOR_INSERT_FLUSH 5
#define ELEVATOR_INSERT_SORT_MERGE 6
/*
* return values from elevator_may_queue_fn
*/
enum {
ELV_MQUEUE_MAY,
ELV_MQUEUE_NO,
ELV_MQUEUE_MUST,
};
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
#else /* CONFIG_BLOCK */
static inline void load_default_elevator_module(void) { }
#endif /* CONFIG_BLOCK */
#endif