diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9d6f04103f01..4838c2b16f2c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -26,7 +26,16 @@ static int cfq_slice_async = HZ / 25; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 125; +/* + * grace period before allowing idle class to get disk access + */ #define CFQ_IDLE_GRACE (HZ / 10) + +/* + * below this threshold, we consider thinktime immediate + */ +#define CFQ_MIN_TT (2) + #define CFQ_SLICE_SCALE (5) #define CFQ_KEY_ASYNC (0) @@ -69,10 +78,9 @@ struct cfq_data { /* * rr list of queues with requests and the count of them */ - struct list_head rr_list[CFQ_PRIO_LISTS]; + struct rb_root service_tree; struct list_head cur_rr; struct list_head idle_rr; - unsigned long cur_rr_tick; unsigned int busy_queues; /* @@ -91,8 +99,6 @@ struct cfq_data { struct cfq_queue *active_queue; struct cfq_io_context *active_cic; - int cur_prio, cur_end_prio; - unsigned long prio_time; unsigned int dispatch_slice; struct timer_list idle_class_timer; @@ -131,8 +137,10 @@ struct cfq_queue { unsigned int key; /* member of the rr/busy/cur/idle cfqd list */ struct list_head cfq_list; - /* in what tick we were last serviced */ - unsigned long rr_tick; + /* service_tree member */ + struct rb_node rb_node; + /* service_tree key */ + unsigned long rb_key; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ @@ -147,8 +155,6 @@ struct cfq_queue { struct list_head fifo; unsigned long slice_end; - unsigned long service_last; - unsigned long slice_start; long slice_resid; /* number of requests that are on the dispatch list or inside driver */ @@ -240,30 +246,26 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw, int is_sync) * if a queue is marked sync and has sync io queued. A sync queue with async * io only, should not get full sync slice length. */ +static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync, + unsigned short prio) +{ + const int base_slice = cfqd->cfq_slice[sync]; + + WARN_ON(prio >= IOPRIO_BE_NR); + + return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); +} + static inline int cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; - - WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); - - return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); + return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); } static inline void cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; - cfqq->slice_end += cfqq->slice_resid; - - /* - * Don't carry over residual for more than one slice, we only want - * to slightly correct the fairness. Carrying over forever would - * easily introduce oscillations. - */ - cfqq->slice_resid = 0; - - cfqq->slice_start = jiffies; } /* @@ -403,33 +405,50 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, return cfq_choose_req(cfqd, next, prev); } -/* - * This function finds out where to insert a BE queue in the service hierarchy - */ -static void cfq_resort_be_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int preempted) +static unsigned long cfq_slice_offset(struct cfq_data *cfqd, + struct cfq_queue *cfqq) { - if (!cfq_cfqq_sync(cfqq)) - list_add_tail(&cfqq->cfq_list, &cfqd->rr_list[cfqq->ioprio]); - else { - struct list_head *n = &cfqd->rr_list[cfqq->ioprio]; + /* + * just an approximation, should be ok. + */ + return ((cfqd->busy_queues - 1) * cfq_prio_slice(cfqd, 1, 0)); +} +static void cfq_service_tree_add(struct cfq_data *cfqd, + struct cfq_queue *cfqq) +{ + struct rb_node **p = &cfqd->service_tree.rb_node; + struct rb_node *parent = NULL; + struct cfq_queue *__cfqq; + unsigned long rb_key; + + rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; + rb_key += cfqq->slice_resid; + cfqq->slice_resid = 0; + + if (!RB_EMPTY_NODE(&cfqq->rb_node)) { /* - * sort by last service, but don't cross a new or async - * queue. we don't cross a new queue because it hasn't - * been service before, and we don't cross an async - * queue because it gets added to the end on expire. + * same position, nothing more to do */ - while ((n = n->prev) != &cfqd->rr_list[cfqq->ioprio]) { - struct cfq_queue *__c = list_entry_cfqq(n); + if (rb_key == cfqq->rb_key) + return; - if (!cfq_cfqq_sync(__c) || !__c->service_last) - break; - if (time_before(__c->service_last, cfqq->service_last)) - break; - } - list_add(&cfqq->cfq_list, n); + rb_erase(&cfqq->rb_node, &cfqd->service_tree); } + + while (*p) { + parent = *p; + __cfqq = rb_entry(parent, struct cfq_queue, rb_node); + + if (rb_key < __cfqq->rb_key) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + cfqq->rb_key = rb_key; + rb_link_node(&cfqq->rb_node, parent, p); + rb_insert_color(&cfqq->rb_node, &cfqd->service_tree); } static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) @@ -443,7 +462,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) if (!cfq_cfqq_on_rr(cfqq)) return; - list_del(&cfqq->cfq_list); + list_del_init(&cfqq->cfq_list); if (cfq_class_rt(cfqq)) { /* @@ -465,7 +484,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) /* * So we get here, ergo the queue is a regular best-effort queue */ - cfq_resort_be_queue(cfqd, cfqq, preempted); + cfq_service_tree_add(cfqd, cfqq); } } @@ -490,6 +509,11 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) cfq_clear_cfqq_on_rr(cfqq); list_del_init(&cfqq->cfq_list); + if (!RB_EMPTY_NODE(&cfqq->rb_node)) { + rb_erase(&cfqq->rb_node, &cfqd->service_tree); + RB_CLEAR_NODE(&cfqq->rb_node); + } + BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; } @@ -684,7 +708,6 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) cfq_clear_cfqq_fifo_expire(cfqq); cfq_mark_cfqq_slice_new(cfqq); cfq_clear_cfqq_queue_new(cfqq); - cfqq->rr_tick = cfqd->cur_rr_tick; } cfqd->active_queue = cfqq; @@ -732,114 +755,19 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted, __cfq_slice_expired(cfqd, cfqq, preempted, timed_out); } -/* - * 0 - * 0,1 - * 0,1,2 - * 0,1,2,3 - * 0,1,2,3,4 - * 0,1,2,3,4,5 - * 0,1,2,3,4,5,6 - * 0,1,2,3,4,5,6,7 - */ -static int cfq_get_next_prio_level(struct cfq_data *cfqd) -{ - int prio, wrap; - - prio = -1; - wrap = 0; - do { - int p; - - for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { - if (!list_empty(&cfqd->rr_list[p])) { - prio = p; - break; - } - } - - if (prio != -1) - break; - cfqd->cur_prio = 0; - if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { - cfqd->cur_end_prio = 0; - if (wrap) - break; - wrap = 1; - } - } while (1); - - if (unlikely(prio == -1)) - return -1; - - BUG_ON(prio >= CFQ_PRIO_LISTS); - - list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); - - cfqd->cur_prio = prio + 1; - if (cfqd->cur_prio > cfqd->cur_end_prio) { - cfqd->cur_end_prio = cfqd->cur_prio; - cfqd->cur_prio = 0; - } - if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { - cfqd->cur_prio = 0; - cfqd->cur_end_prio = 0; - } - - cfqd->cur_rr_tick++; - cfqd->prio_time = jiffies; - return prio; -} - -static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, - struct request *rq) -{ - if (rq->sector >= cfqd->last_position) - return rq->sector - cfqd->last_position; - else - return cfqd->last_position - rq->sector; -} - -static struct cfq_queue *cfq_get_best_queue(struct cfq_data *cfqd) -{ - struct cfq_queue *cfqq = NULL, *__cfqq; - sector_t best = -1, first = -1, dist; - - list_for_each_entry(__cfqq, &cfqd->cur_rr, cfq_list) { - if (!__cfqq->next_rq || !cfq_cfqq_sync(__cfqq)) - continue; - - dist = cfq_dist_from_last(cfqd, __cfqq->next_rq); - if (first == -1) - first = dist; - if (dist < best) { - best = dist; - cfqq = __cfqq; - } - } - - /* - * Only async queue(s) available, grab first entry. Do the same - * if the difference between the first and best isn't more than - * twice, to obey fairness. - */ - if (!cfqq || (best && first != best && ((first / best) < 4))) - cfqq = list_entry_cfqq(cfqd->cur_rr.next); - - return cfqq; -} - static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) { struct cfq_queue *cfqq = NULL; - if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) { + if (!list_empty(&cfqd->cur_rr)) { /* - * if current list is non-empty, grab first entry. if it is - * empty, get next prio level and grab first entry then if any - * are spliced + * if current list is non-empty, grab first entry. */ - cfqq = cfq_get_best_queue(cfqd); + cfqq = list_entry_cfqq(cfqd->cur_rr.next); + } else if (!RB_EMPTY_ROOT(&cfqd->service_tree)) { + struct rb_node *n = rb_first(&cfqd->service_tree); + + cfqq = rb_entry(n, struct cfq_queue, rb_node); } else if (!list_empty(&cfqd->idle_rr)) { /* * if we have idle queues and no rt or be queues had pending @@ -861,27 +789,20 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) { struct cfq_queue *cfqq; - do { - long prio; - - cfqq = cfq_get_next_queue(cfqd); - if (!cfqq) - break; - - prio = cfq_prio_to_slice(cfqd, cfqq); - if (cfqq->slice_resid > -prio) - break; - - cfqq->slice_resid += prio; - list_del_init(&cfqq->cfq_list); - list_add_tail(&cfqq->cfq_list, &cfqd->rr_list[cfqq->ioprio]); - cfqq = NULL; - } while (1); - + cfqq = cfq_get_next_queue(cfqd); __cfq_set_active_queue(cfqd, cfqq); return cfqq; } +static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, + struct request *rq) +{ + if (rq->sector >= cfqd->last_position) + return rq->sector - cfqd->last_position; + else + return cfqd->last_position - rq->sector; +} + static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) { struct cfq_io_context *cic = cfqd->active_cic; @@ -892,45 +813,15 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; } -static struct cfq_queue *__cfq_close_cooperator(struct cfq_data *cfqd, - struct cfq_queue *cur_cfqq, - struct list_head *list) +static int cfq_close_cooperator(struct cfq_data *cfq_data, + struct cfq_queue *cfqq) { - struct cfq_queue *cfqq; - - list_for_each_entry(cfqq, list, cfq_list) { - if (cfqq == cur_cfqq || !cfq_cfqq_sync(cfqq)) - continue; - - BUG_ON(!cfqq->next_rq); - - if (cfq_rq_close(cfqd, cfqq->next_rq)) - return cfqq; - } - - return NULL; -} - -static int cfq_close_cooperator(struct cfq_data *cfqd, - struct cfq_queue *cur_cfqq) -{ - struct cfq_queue *cfqq; - - if (!cfqd->busy_queues) - return 0; - /* - * check cur_rr and same-prio rr_list for candidates + * We should notice if some of the queues are cooperating, eg + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. */ - cfqq = __cfq_close_cooperator(cfqd, cur_cfqq, &cfqd->cur_rr); - if (cfqq) - return 1; - - cfqq = __cfq_close_cooperator(cfqd, cur_cfqq, &cfqd->rr_list[cur_cfqq->ioprio]); - if (cfqq && (cfqq->rr_tick == cfqd->cur_rr_tick)) - cfqq = NULL; - - return cfqq != NULL; + return 0; } #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) @@ -974,7 +865,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) */ sl = cfqd->cfq_slice_idle; if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) - sl = min(sl, msecs_to_jiffies(2)); + sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); mod_timer(&cfqd->idle_slice_timer, jiffies + sl); } @@ -1115,31 +1006,41 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, return dispatched; } -static int -cfq_forced_dispatch_cfqqs(struct list_head *list) +static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) +{ + int dispatched = 0; + + while (cfqq->next_rq) { + cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); + dispatched++; + } + + BUG_ON(!list_empty(&cfqq->fifo)); + return dispatched; +} + +static int cfq_forced_dispatch_cfqqs(struct list_head *list) { struct cfq_queue *cfqq, *next; int dispatched; dispatched = 0; - list_for_each_entry_safe(cfqq, next, list, cfq_list) { - while (cfqq->next_rq) { - cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); - dispatched++; - } - BUG_ON(!list_empty(&cfqq->fifo)); - } + list_for_each_entry_safe(cfqq, next, list, cfq_list) + dispatched += __cfq_forced_dispatch_cfqq(cfqq); return dispatched; } -static int -cfq_forced_dispatch(struct cfq_data *cfqd) +static int cfq_forced_dispatch(struct cfq_data *cfqd) { - int i, dispatched = 0; + int dispatched = 0; + struct rb_node *n; - for (i = 0; i < CFQ_PRIO_LISTS; i++) - dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); + while ((n = rb_first(&cfqd->service_tree)) != NULL) { + struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node); + + dispatched += __cfq_forced_dispatch_cfqq(cfqq); + } dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); @@ -1151,8 +1052,7 @@ cfq_forced_dispatch(struct cfq_data *cfqd) return dispatched; } -static int -cfq_dispatch_requests(request_queue_t *q, int force) +static int cfq_dispatch_requests(request_queue_t *q, int force) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; @@ -1222,7 +1122,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) /* * it's on the empty list and still hashed */ - list_del(&cfqq->cfq_list); hlist_del(&cfqq->cfq_hash); kmem_cache_free(cfq_pool, cfqq); } @@ -1391,8 +1290,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) */ cfqq->org_ioprio = cfqq->ioprio; cfqq->org_ioprio_class = cfqq->ioprio_class; - - cfq_resort_rr_list(cfqq, 0); cfq_clear_cfqq_prio_changed(cfqq); } @@ -1478,6 +1375,7 @@ cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, INIT_HLIST_NODE(&cfqq->cfq_hash); INIT_LIST_HEAD(&cfqq->cfq_list); + RB_CLEAR_NODE(&cfqq->rb_node); INIT_LIST_HEAD(&cfqq->fifo); cfqq->key = key; @@ -1752,7 +1650,8 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) * so we know that it will be selected next. */ BUG_ON(!cfq_cfqq_on_rr(cfqq)); - list_move(&cfqq->cfq_list, &cfqd->cur_rr); + list_del_init(&cfqq->cfq_list); + list_add(&cfqq->cfq_list, &cfqd->cur_rr); cfqq->slice_end = 0; cfq_mark_cfqq_slice_new(cfqq); @@ -1828,13 +1727,10 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) WARN_ON(!cfqq->dispatched); cfqd->rq_in_driver--; cfqq->dispatched--; - cfqq->service_last = now; if (!cfq_class_idle(cfqq)) cfqd->last_end_request = now; - cfq_resort_rr_list(cfqq, 0); - if (sync) RQ_CIC(rq)->last_end_request = now; @@ -1863,9 +1759,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) */ static void cfq_prio_boost(struct cfq_queue *cfqq) { - const int ioprio_class = cfqq->ioprio_class; - const int ioprio = cfqq->ioprio; - if (has_fs_excl()) { /* * boost idle prio on transactions that would lock out other @@ -1884,12 +1777,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) if (cfqq->ioprio != cfqq->org_ioprio) cfqq->ioprio = cfqq->org_ioprio; } - - /* - * refile between round-robin lists if we moved the priority class - */ - if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio)) - cfq_resort_rr_list(cfqq, 0); } static inline int __cfq_may_queue(struct cfq_queue *cfqq) @@ -2127,9 +2014,7 @@ static void *cfq_init_queue(request_queue_t *q) memset(cfqd, 0, sizeof(*cfqd)); - for (i = 0; i < CFQ_PRIO_LISTS; i++) - INIT_LIST_HEAD(&cfqd->rr_list[i]); - + cfqd->service_tree = RB_ROOT; INIT_LIST_HEAD(&cfqd->cur_rr); INIT_LIST_HEAD(&cfqd->idle_rr); INIT_LIST_HEAD(&cfqd->cic_list);