blkcg: factor out blkio_group creation

Currently both blk-throttle and cfq-iosched implement their own
blkio_group creation code in throtl_get_tg() and cfq_get_cfqg().  This
patch factors out the common code into blkg_lookup_create(), which
returns ERR_PTR value so that transitional failures due to queue
bypass can be distinguished from other failures.

* New plkio_policy_ops methods blkio_alloc_group_fn() and
  blkio_link_group_fn added.  Both are transitional and will be
  removed once the blkg management code is fully moved into
  blk-cgroup.c.

* blkio_alloc_group_fn() allocates policy-specific blkg which is
  usually a larger data structure with blkg as the first entry and
  intiailizes it.  Note that initialization of blkg proper, including
  percpu stats, is responsibility of blk-cgroup proper.

  Note that default config (weight, bps...) initialization is done
  from this method; otherwise, we end up violating locking order
  between blkcg and q locks via blkcg_get_CONF() functions.

* blkio_link_group_fn() is called under queue_lock and responsible for
  linking the blkg to the queue.  blkcg side is handled by blk-cgroup
  proper.

* The common blkg creation function is named blkg_lookup_create() and
  blkiocg_lookup_group() is renamed to blkg_lookup() for consistency.
  Also, throtl / cfq related functions are similarly [re]named for
  consistency.

This simplifies blkcg policy implementations and enables further
cleanup.

-v2: Vivek noticed that blkg_lookup_create() incorrectly tested
     blk_queue_dead() instead of blk_queue_bypass() leading a user of
     the function ending up creating a new blkg on bypassing queue.
     This is a bug introduced while relocating bypass patches before
     this one.  Fixed.

-v3: ERR_PTR patch folded into this one.  @for_root added to
     blkg_lookup_create() to allow creating root group on a bypassed
     queue during elevator switch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Tejun Heo 2012-03-05 13:15:06 -08:00 committed by Jens Axboe
parent f51b802c17
commit cd1604fab4
5 changed files with 195 additions and 250 deletions

View File

@ -465,38 +465,93 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
} }
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
/* struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
* This function allocates the per cpu stats for blkio_group. Should be called struct request_queue *q,
* from sleepable context as alloc_per_cpu() requires that. enum blkio_policy_id plid,
*/ bool for_root)
int blkio_alloc_blkg_stats(struct blkio_group *blkg) __releases(q->queue_lock) __acquires(q->queue_lock)
{ {
/* Allocate memory for per cpu stats */ struct blkio_policy_type *pol = blkio_policy[plid];
blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); struct blkio_group *blkg, *new_blkg;
if (!blkg->stats_cpu)
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, WARN_ON_ONCE(!rcu_read_lock_held());
struct blkio_group *blkg, struct request_queue *q, dev_t dev, lockdep_assert_held(q->queue_lock);
enum blkio_policy_id plid)
{
unsigned long flags;
spin_lock_irqsave(&blkcg->lock, flags); /*
spin_lock_init(&blkg->stats_lock); * This could be the first entry point of blkcg implementation and
rcu_assign_pointer(blkg->q, q); * we shouldn't allow anything to go through for a bypassing queue.
blkg->blkcg_id = css_id(&blkcg->css); * The following can be removed if blkg lookup is guaranteed to
* fail on a bypassing queue.
*/
if (unlikely(blk_queue_bypass(q)) && !for_root)
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
blkg = blkg_lookup(blkcg, q, plid);
if (blkg)
return blkg;
if (!css_tryget(&blkcg->css))
return ERR_PTR(-EINVAL);
/*
* Allocate and initialize.
*
* FIXME: The following is broken. Percpu memory allocation
* requires %GFP_KERNEL context and can't be performed from IO
* path. Allocation here should inherently be atomic and the
* following lock dancing can be removed once the broken percpu
* allocation is fixed.
*/
spin_unlock_irq(q->queue_lock);
rcu_read_unlock();
new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg);
if (new_blkg) {
new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
spin_lock_init(&new_blkg->stats_lock);
rcu_assign_pointer(new_blkg->q, q);
new_blkg->blkcg_id = css_id(&blkcg->css);
new_blkg->plid = plid;
cgroup_path(blkcg->css.cgroup, new_blkg->path,
sizeof(new_blkg->path));
}
rcu_read_lock();
spin_lock_irq(q->queue_lock);
css_put(&blkcg->css);
/* did bypass get turned on inbetween? */
if (unlikely(blk_queue_bypass(q)) && !for_root) {
blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
goto out;
}
/* did someone beat us to it? */
blkg = blkg_lookup(blkcg, q, plid);
if (unlikely(blkg))
goto out;
/* did alloc fail? */
if (unlikely(!new_blkg || !new_blkg->stats_cpu)) {
blkg = ERR_PTR(-ENOMEM);
goto out;
}
/* insert */
spin_lock(&blkcg->lock);
swap(blkg, new_blkg);
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
blkg->plid = plid; pol->ops.blkio_link_group_fn(q, blkg);
spin_unlock_irqrestore(&blkcg->lock, flags); spin_unlock(&blkcg->lock);
/* Need to take css reference ? */ out:
cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); if (new_blkg) {
blkg->dev = dev; free_percpu(new_blkg->stats_cpu);
kfree(new_blkg);
}
return blkg;
} }
EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); EXPORT_SYMBOL_GPL(blkg_lookup_create);
static void __blkiocg_del_blkio_group(struct blkio_group *blkg) static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
{ {
@ -533,9 +588,9 @@ int blkiocg_del_blkio_group(struct blkio_group *blkg)
EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
/* called under rcu_read_lock(). */ /* called under rcu_read_lock(). */
struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
struct request_queue *q, struct request_queue *q,
enum blkio_policy_id plid) enum blkio_policy_id plid)
{ {
struct blkio_group *blkg; struct blkio_group *blkg;
struct hlist_node *n; struct hlist_node *n;
@ -545,7 +600,7 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
return blkg; return blkg;
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(blkiocg_lookup_group); EXPORT_SYMBOL_GPL(blkg_lookup);
void blkg_destroy_all(struct request_queue *q) void blkg_destroy_all(struct request_queue *q)
{ {

View File

@ -204,6 +204,10 @@ extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg,
extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
dev_t dev); dev_t dev);
typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q,
struct blkio_cgroup *blkcg);
typedef void (blkio_link_group_fn)(struct request_queue *q,
struct blkio_group *blkg);
typedef void (blkio_unlink_group_fn)(struct request_queue *q, typedef void (blkio_unlink_group_fn)(struct request_queue *q,
struct blkio_group *blkg); struct blkio_group *blkg);
typedef bool (blkio_clear_queue_fn)(struct request_queue *q); typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
@ -219,6 +223,8 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
struct blkio_group *blkg, unsigned int write_iops); struct blkio_group *blkg, unsigned int write_iops);
struct blkio_policy_ops { struct blkio_policy_ops {
blkio_alloc_group_fn *blkio_alloc_group_fn;
blkio_link_group_fn *blkio_link_group_fn;
blkio_unlink_group_fn *blkio_unlink_group_fn; blkio_unlink_group_fn *blkio_unlink_group_fn;
blkio_clear_queue_fn *blkio_clear_queue_fn; blkio_clear_queue_fn *blkio_clear_queue_fn;
blkio_update_group_weight_fn *blkio_update_group_weight_fn; blkio_update_group_weight_fn *blkio_update_group_weight_fn;
@ -307,14 +313,14 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
extern struct blkio_cgroup blkio_root_cgroup; extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, struct request_queue *q, dev_t dev,
enum blkio_policy_id plid);
extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
extern int blkiocg_del_blkio_group(struct blkio_group *blkg); extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
struct request_queue *q, struct request_queue *q,
enum blkio_policy_id plid); enum blkio_policy_id plid);
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
struct request_queue *q,
enum blkio_policy_id plid,
bool for_root);
void blkiocg_update_timeslice_used(struct blkio_group *blkg, void blkiocg_update_timeslice_used(struct blkio_group *blkg,
unsigned long time, unsigned long time,
unsigned long unaccounted_time); unsigned long unaccounted_time);
@ -335,17 +341,11 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
static inline struct blkio_cgroup * static inline struct blkio_cgroup *
task_blkio_cgroup(struct task_struct *tsk) { return NULL; } task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, void *key, dev_t dev,
enum blkio_policy_id plid) {}
static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
static inline int static inline int
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
static inline struct blkio_group * static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } void *key) { return NULL; }
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
unsigned long time, unsigned long time,
unsigned long unaccounted_time) unsigned long unaccounted_time)

View File

@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg)
call_rcu(&tg->rcu_head, throtl_free_tg); call_rcu(&tg->rcu_head, throtl_free_tg);
} }
static void throtl_init_group(struct throtl_grp *tg) static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
struct blkio_cgroup *blkcg)
{ {
struct throtl_grp *tg;
tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
if (!tg)
return NULL;
INIT_HLIST_NODE(&tg->tg_node); INIT_HLIST_NODE(&tg->tg_node);
RB_CLEAR_NODE(&tg->rb_node); RB_CLEAR_NODE(&tg->rb_node);
bio_list_init(&tg->bio_lists[0]); bio_list_init(&tg->bio_lists[0]);
bio_list_init(&tg->bio_lists[1]); bio_list_init(&tg->bio_lists[1]);
tg->limits_changed = false; tg->limits_changed = false;
/* Practically unlimited BW */ tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
tg->bps[0] = tg->bps[1] = -1; tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
tg->iops[0] = tg->iops[1] = -1; tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
/* /*
* Take the initial reference that will be released on destroy * Take the initial reference that will be released on destroy
@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg)
* exit or cgroup deletion path depending on who is exiting first. * exit or cgroup deletion path depending on who is exiting first.
*/ */
atomic_set(&tg->ref, 1); atomic_set(&tg->ref, 1);
}
/* Should be called with rcu read lock held (needed for blkcg) */ return &tg->blkg;
static void
throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
{
hlist_add_head(&tg->tg_node, &td->tg_list);
td->nr_undestroyed_grps++;
} }
static void static void
@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
spin_unlock_irq(td->queue->queue_lock); spin_unlock_irq(td->queue->queue_lock);
} }
static void throtl_init_add_tg_lists(struct throtl_data *td, static void throtl_link_blkio_group(struct request_queue *q,
struct throtl_grp *tg, struct blkio_cgroup *blkcg) struct blkio_group *blkg)
{ {
struct throtl_data *td = q->td;
struct throtl_grp *tg = tg_of_blkg(blkg);
__throtl_tg_fill_dev_details(td, tg); __throtl_tg_fill_dev_details(td, tg);
/* Add group onto cgroup list */ hlist_add_head(&tg->tg_node, &td->tg_list);
blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue, td->nr_undestroyed_grps++;
tg->blkg.dev, BLKIO_POLICY_THROTL);
tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
throtl_add_group_to_td_list(td, tg);
}
/* Should be called without queue lock and outside of rcu period */
static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
{
struct throtl_grp *tg = NULL;
int ret;
tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
if (!tg)
return NULL;
ret = blkio_alloc_blkg_stats(&tg->blkg);
if (ret) {
kfree(tg);
return NULL;
}
throtl_init_group(tg);
return tg;
} }
static struct static struct
throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
{ {
struct throtl_grp *tg = NULL; struct throtl_grp *tg = NULL;
/* /*
* This is the common case when there are no blkio cgroups. * This is the common case when there are no blkio cgroups.
* Avoid lookup in this case * Avoid lookup in this case
*/ */
if (blkcg == &blkio_root_cgroup) if (blkcg == &blkio_root_cgroup)
tg = td->root_tg; tg = td->root_tg;
else else
tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
BLKIO_POLICY_THROTL)); BLKIO_POLICY_THROTL));
__throtl_tg_fill_dev_details(td, tg); __throtl_tg_fill_dev_details(td, tg);
return tg; return tg;
} }
static struct throtl_grp *throtl_get_tg(struct throtl_data *td, static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
struct blkio_cgroup *blkcg) struct blkio_cgroup *blkcg)
{ {
struct throtl_grp *tg = NULL, *__tg = NULL;
struct request_queue *q = td->queue; struct request_queue *q = td->queue;
struct throtl_grp *tg = NULL;
/* no throttling for dead queue */
if (unlikely(blk_queue_bypass(q)))
return NULL;
tg = throtl_find_tg(td, blkcg);
if (tg)
return tg;
if (!css_tryget(&blkcg->css))
return NULL;
/* /*
* Need to allocate a group. Allocation of group also needs allocation * This is the common case when there are no blkio cgroups.
* of per cpu stats which in-turn takes a mutex() and can block. Hence * Avoid lookup in this case
* we need to drop rcu lock and queue_lock before we call alloc.
*/ */
spin_unlock_irq(q->queue_lock); if (blkcg == &blkio_root_cgroup) {
rcu_read_unlock();
tg = throtl_alloc_tg(td);
/* Group allocated and queue is still alive. take the lock */
rcu_read_lock();
spin_lock_irq(q->queue_lock);
css_put(&blkcg->css);
/* Make sure @q is still alive */
if (unlikely(blk_queue_bypass(q))) {
kfree(tg);
return NULL;
}
/*
* If some other thread already allocated the group while we were
* not holding queue lock, free up the group
*/
__tg = throtl_find_tg(td, blkcg);
if (__tg) {
kfree(tg);
return __tg;
}
/* Group allocation failed. Account the IO to root group */
if (!tg) {
tg = td->root_tg; tg = td->root_tg;
return tg; } else {
struct blkio_group *blkg;
blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
/* if %NULL and @q is alive, fall back to root_tg */
if (!IS_ERR(blkg))
tg = tg_of_blkg(blkg);
else if (!blk_queue_dead(q))
tg = td->root_tg;
} }
throtl_init_add_tg_lists(td, tg, blkcg); __throtl_tg_fill_dev_details(td, tg);
return tg; return tg;
} }
@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q)
static struct blkio_policy_type blkio_policy_throtl = { static struct blkio_policy_type blkio_policy_throtl = {
.ops = { .ops = {
.blkio_alloc_group_fn = throtl_alloc_blkio_group,
.blkio_link_group_fn = throtl_link_blkio_group,
.blkio_unlink_group_fn = throtl_unlink_blkio_group, .blkio_unlink_group_fn = throtl_unlink_blkio_group,
.blkio_clear_queue_fn = throtl_clear_queue, .blkio_clear_queue_fn = throtl_clear_queue,
.blkio_update_group_read_bps_fn = .blkio_update_group_read_bps_fn =
@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
*/ */
rcu_read_lock(); rcu_read_lock();
blkcg = task_blkio_cgroup(current); blkcg = task_blkio_cgroup(current);
tg = throtl_find_tg(td, blkcg); tg = throtl_lookup_tg(td, blkcg);
if (tg) { if (tg) {
throtl_tg_fill_dev_details(td, tg); throtl_tg_fill_dev_details(td, tg);
@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
* IO group * IO group
*/ */
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
tg = throtl_get_tg(td, blkcg); tg = throtl_lookup_create_tg(td, blkcg);
if (unlikely(!tg)) if (unlikely(!tg))
goto out_unlock; goto out_unlock;
@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q)
int blk_throtl_init(struct request_queue *q) int blk_throtl_init(struct request_queue *q)
{ {
struct throtl_data *td; struct throtl_data *td;
struct blkio_group *blkg;
td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
if (!td) if (!td)
@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q)
td->limits_changed = false; td->limits_changed = false;
INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
/* alloc and Init root group. */ q->td = td;
td->queue = q; td->queue = q;
/* alloc and init root group. */
rcu_read_lock(); rcu_read_lock();
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
true);
if (!IS_ERR(blkg))
td->root_tg = tg_of_blkg(blkg);
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
rcu_read_unlock(); rcu_read_unlock();
@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q)
kfree(td); kfree(td);
return -ENOMEM; return -ENOMEM;
} }
/* Attach throtl data to request queue */
q->td = td;
return 0; return 0;
} }

View File

@ -1048,10 +1048,12 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
cfqg->needs_update = true; cfqg->needs_update = true;
} }
static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, static void cfq_link_blkio_group(struct request_queue *q,
struct cfq_group *cfqg, struct blkio_cgroup *blkcg) struct blkio_group *blkg)
{ {
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; struct cfq_data *cfqd = q->elevator->elevator_data;
struct backing_dev_info *bdi = &q->backing_dev_info;
struct cfq_group *cfqg = cfqg_of_blkg(blkg);
unsigned int major, minor; unsigned int major, minor;
/* /*
@ -1062,34 +1064,26 @@ static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
*/ */
if (bdi->dev) { if (bdi->dev) {
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, blkg->dev = MKDEV(major, minor);
cfqd->queue, MKDEV(major, minor)); }
} else
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
cfqd->queue, 0);
cfqd->nr_blkcg_linked_grps++; cfqd->nr_blkcg_linked_grps++;
cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
/* Add group on cfqd list */ /* Add group on cfqd list */
hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
} }
/* static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
* Should be called from sleepable context. No request queue lock as per struct blkio_cgroup *blkcg)
* cpu stats are allocated dynamically and alloc_percpu needs to be called
* from sleepable context.
*/
static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
{ {
struct cfq_group *cfqg; struct cfq_group *cfqg;
int ret;
cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node);
if (!cfqg) if (!cfqg)
return NULL; return NULL;
cfq_init_cfqg_base(cfqg); cfq_init_cfqg_base(cfqg);
cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
/* /*
* Take the initial reference that will be released on destroy * Take the initial reference that will be released on destroy
@ -1099,90 +1093,38 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
*/ */
cfqg->ref = 1; cfqg->ref = 1;
ret = blkio_alloc_blkg_stats(&cfqg->blkg); return &cfqg->blkg;
if (ret) {
kfree(cfqg);
return NULL;
}
return cfqg;
}
static struct cfq_group *
cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
{
struct cfq_group *cfqg = NULL;
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
unsigned int major, minor;
/*
* This is the common case when there are no blkio cgroups.
* Avoid lookup in this case
*/
if (blkcg == &blkio_root_cgroup)
cfqg = cfqd->root_group;
else
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue,
BLKIO_POLICY_PROP));
if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
cfqg->blkg.dev = MKDEV(major, minor);
}
return cfqg;
} }
/* /*
* Search for the cfq group current task belongs to. request_queue lock must * Search for the cfq group current task belongs to. request_queue lock must
* be held. * be held.
*/ */
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
struct blkio_cgroup *blkcg) struct blkio_cgroup *blkcg)
{ {
struct cfq_group *cfqg = NULL, *__cfqg = NULL;
struct request_queue *q = cfqd->queue; struct request_queue *q = cfqd->queue;
struct backing_dev_info *bdi = &q->backing_dev_info;
struct cfq_group *cfqg = NULL;
cfqg = cfq_find_cfqg(cfqd, blkcg); /* avoid lookup for the common case where there's no blkio cgroup */
if (cfqg) if (blkcg == &blkio_root_cgroup) {
return cfqg; cfqg = cfqd->root_group;
} else {
struct blkio_group *blkg;
if (!css_tryget(&blkcg->css)) blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
return NULL; if (!IS_ERR(blkg))
cfqg = cfqg_of_blkg(blkg);
/*
* Need to allocate a group. Allocation of group also needs allocation
* of per cpu stats which in-turn takes a mutex() and can block. Hence
* we need to drop rcu lock and queue_lock before we call alloc.
*
* Not taking any queue reference here and assuming that queue is
* around by the time we return. CFQ queue allocation code does
* the same. It might be racy though.
*/
rcu_read_unlock();
spin_unlock_irq(q->queue_lock);
cfqg = cfq_alloc_cfqg(cfqd);
spin_lock_irq(q->queue_lock);
rcu_read_lock();
css_put(&blkcg->css);
/*
* If some other thread already allocated the group while we were
* not holding queue lock, free up the group
*/
__cfqg = cfq_find_cfqg(cfqd, blkcg);
if (__cfqg) {
kfree(cfqg);
return __cfqg;
} }
if (!cfqg) if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
cfqg = cfqd->root_group; unsigned int major, minor;
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
cfqg->blkg.dev = MKDEV(major, minor);
}
cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
return cfqg; return cfqg;
} }
@ -1294,8 +1236,8 @@ static bool cfq_clear_queue(struct request_queue *q)
} }
#else /* GROUP_IOSCHED */ #else /* GROUP_IOSCHED */
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
struct blkio_cgroup *blkcg) struct blkio_cgroup *blkcg)
{ {
return cfqd->root_group; return cfqd->root_group;
} }
@ -2887,7 +2829,8 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
blkcg = task_blkio_cgroup(current); blkcg = task_blkio_cgroup(current);
cfqg = cfq_get_cfqg(cfqd, blkcg); cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
cic = cfq_cic_lookup(cfqd, ioc); cic = cfq_cic_lookup(cfqd, ioc);
/* cic always exists here */ /* cic always exists here */
cfqq = cic_to_cfqq(cic, is_sync); cfqq = cic_to_cfqq(cic, is_sync);
@ -3694,6 +3637,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
static int cfq_init_queue(struct request_queue *q) static int cfq_init_queue(struct request_queue *q)
{ {
struct cfq_data *cfqd; struct cfq_data *cfqd;
struct blkio_group *blkg __maybe_unused;
int i; int i;
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
@ -3711,7 +3655,10 @@ static int cfq_init_queue(struct request_queue *q)
rcu_read_lock(); rcu_read_lock();
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup); blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
true);
if (!IS_ERR(blkg))
cfqd->root_group = cfqg_of_blkg(blkg);
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
rcu_read_unlock(); rcu_read_unlock();
@ -3897,6 +3844,8 @@ static struct elevator_type iosched_cfq = {
#ifdef CONFIG_CFQ_GROUP_IOSCHED #ifdef CONFIG_CFQ_GROUP_IOSCHED
static struct blkio_policy_type blkio_policy_cfq = { static struct blkio_policy_type blkio_policy_cfq = {
.ops = { .ops = {
.blkio_alloc_group_fn = cfq_alloc_blkio_group,
.blkio_link_group_fn = cfq_link_blkio_group,
.blkio_unlink_group_fn = cfq_unlink_blkio_group, .blkio_unlink_group_fn = cfq_unlink_blkio_group,
.blkio_clear_queue_fn = cfq_clear_queue, .blkio_clear_queue_fn = cfq_clear_queue,
.blkio_update_group_weight_fn = cfq_update_blkio_group_weight, .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,

View File

@ -67,12 +67,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
direction, sync); direction, sync);
} }
static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, struct request_queue *q, dev_t dev)
{
blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP);
}
static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
{ {
return blkiocg_del_blkio_group(blkg); return blkiocg_del_blkio_group(blkg);
@ -105,8 +99,6 @@ static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
uint64_t bytes, bool direction, bool sync) {} uint64_t bytes, bool direction, bool sync) {}
static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {} static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {}
static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, struct request_queue *q, dev_t dev) {}
static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
{ {
return 0; return 0;