mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
909 lines
26 KiB
C
909 lines
26 KiB
C
|
/*
|
||
|
* BFQ: CGROUPS support.
|
||
|
*
|
||
|
* Based on ideas and code from CFQ:
|
||
|
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
|
||
|
*
|
||
|
* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
|
||
|
* Paolo Valente <paolo.valente@unimore.it>
|
||
|
*
|
||
|
* Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
|
||
|
*
|
||
|
* Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
|
||
|
* file.
|
||
|
*/
|
||
|
|
||
|
#ifdef CONFIG_CGROUP_BFQIO
|
||
|
static struct bfqio_cgroup bfqio_root_cgroup = {
|
||
|
.weight = BFQ_DEFAULT_GRP_WEIGHT,
|
||
|
.ioprio = BFQ_DEFAULT_GRP_IOPRIO,
|
||
|
.ioprio_class = BFQ_DEFAULT_GRP_CLASS,
|
||
|
};
|
||
|
|
||
|
static inline void bfq_init_entity(struct bfq_entity *entity,
|
||
|
struct bfq_group *bfqg)
|
||
|
{
|
||
|
entity->weight = entity->new_weight;
|
||
|
entity->orig_weight = entity->new_weight;
|
||
|
entity->ioprio = entity->new_ioprio;
|
||
|
entity->ioprio_class = entity->new_ioprio_class;
|
||
|
entity->parent = bfqg->my_entity;
|
||
|
entity->sched_data = &bfqg->sched_data;
|
||
|
}
|
||
|
|
||
|
static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup)
|
||
|
{
|
||
|
return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),
|
||
|
struct bfqio_cgroup, css);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Search the bfq_group for bfqd into the hash table (by now only a list)
|
||
|
* of bgrp. Must be called under rcu_read_lock().
|
||
|
*/
|
||
|
static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
|
||
|
struct bfq_data *bfqd)
|
||
|
{
|
||
|
struct bfq_group *bfqg;
|
||
|
struct hlist_node *n;
|
||
|
void *key;
|
||
|
|
||
|
hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) {
|
||
|
key = rcu_dereference(bfqg->bfqd);
|
||
|
if (key == bfqd)
|
||
|
return bfqg;
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
|
||
|
struct bfq_group *bfqg)
|
||
|
{
|
||
|
struct bfq_entity *entity = &bfqg->entity;
|
||
|
|
||
|
/*
|
||
|
* If the weight of the entity has never been set via the sysfs
|
||
|
* interface, then bgrp->weight == 0. In this case we initialize
|
||
|
* the weight from the current ioprio value. Otherwise, the group
|
||
|
* weight, if set, has priority over the ioprio value.
|
||
|
*/
|
||
|
if (bgrp->weight == 0) {
|
||
|
entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
|
||
|
entity->new_ioprio = bgrp->ioprio;
|
||
|
} else {
|
||
|
if (bgrp->weight < BFQ_MIN_WEIGHT ||
|
||
|
bgrp->weight > BFQ_MAX_WEIGHT) {
|
||
|
printk(KERN_CRIT "bfq_group_init_entity: "
|
||
|
"bgrp->weight %d\n", bgrp->weight);
|
||
|
BUG();
|
||
|
}
|
||
|
entity->new_weight = bgrp->weight;
|
||
|
entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
|
||
|
}
|
||
|
entity->orig_weight = entity->weight = entity->new_weight;
|
||
|
entity->ioprio = entity->new_ioprio;
|
||
|
entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
|
||
|
entity->my_sched_data = &bfqg->sched_data;
|
||
|
bfqg->active_entities = 0;
|
||
|
}
|
||
|
|
||
|
static inline void bfq_group_set_parent(struct bfq_group *bfqg,
|
||
|
struct bfq_group *parent)
|
||
|
{
|
||
|
struct bfq_entity *entity;
|
||
|
|
||
|
BUG_ON(parent == NULL);
|
||
|
BUG_ON(bfqg == NULL);
|
||
|
|
||
|
entity = &bfqg->entity;
|
||
|
entity->parent = parent->my_entity;
|
||
|
entity->sched_data = &parent->sched_data;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_group_chain_alloc - allocate a chain of groups.
|
||
|
* @bfqd: queue descriptor.
|
||
|
* @cgroup: the leaf cgroup this chain starts from.
|
||
|
*
|
||
|
* Allocate a chain of groups starting from the one belonging to
|
||
|
* @cgroup up to the root cgroup. Stop if a cgroup on the chain
|
||
|
* to the root has already an allocated group on @bfqd.
|
||
|
*/
|
||
|
static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
|
||
|
struct cgroup *cgroup)
|
||
|
{
|
||
|
struct bfqio_cgroup *bgrp;
|
||
|
struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
|
||
|
|
||
|
for (; cgroup != NULL; cgroup = cgroup->parent) {
|
||
|
bgrp = cgroup_to_bfqio(cgroup);
|
||
|
|
||
|
bfqg = bfqio_lookup_group(bgrp, bfqd);
|
||
|
if (bfqg != NULL) {
|
||
|
/*
|
||
|
* All the cgroups in the path from there to the
|
||
|
* root must have a bfq_group for bfqd, so we don't
|
||
|
* need any more allocations.
|
||
|
*/
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
|
||
|
if (bfqg == NULL)
|
||
|
goto cleanup;
|
||
|
|
||
|
bfq_group_init_entity(bgrp, bfqg);
|
||
|
bfqg->my_entity = &bfqg->entity;
|
||
|
|
||
|
if (leaf == NULL) {
|
||
|
leaf = bfqg;
|
||
|
prev = leaf;
|
||
|
} else {
|
||
|
bfq_group_set_parent(prev, bfqg);
|
||
|
/*
|
||
|
* Build a list of allocated nodes using the bfqd
|
||
|
* filed, that is still unused and will be
|
||
|
* initialized only after the node will be
|
||
|
* connected.
|
||
|
*/
|
||
|
prev->bfqd = bfqg;
|
||
|
prev = bfqg;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return leaf;
|
||
|
|
||
|
cleanup:
|
||
|
while (leaf != NULL) {
|
||
|
prev = leaf;
|
||
|
leaf = leaf->bfqd;
|
||
|
kfree(prev);
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_group_chain_link - link an allocated group chain to a cgroup
|
||
|
* hierarchy.
|
||
|
* @bfqd: the queue descriptor.
|
||
|
* @cgroup: the leaf cgroup to start from.
|
||
|
* @leaf: the leaf group (to be associated to @cgroup).
|
||
|
*
|
||
|
* Try to link a chain of groups to a cgroup hierarchy, connecting the
|
||
|
* nodes bottom-up, so we can be sure that when we find a cgroup in the
|
||
|
* hierarchy that already as a group associated to @bfqd all the nodes
|
||
|
* in the path to the root cgroup have one too.
|
||
|
*
|
||
|
* On locking: the queue lock protects the hierarchy (there is a hierarchy
|
||
|
* per device) while the bfqio_cgroup lock protects the list of groups
|
||
|
* belonging to the same cgroup.
|
||
|
*/
|
||
|
static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup,
|
||
|
struct bfq_group *leaf)
|
||
|
{
|
||
|
struct bfqio_cgroup *bgrp;
|
||
|
struct bfq_group *bfqg, *next, *prev = NULL;
|
||
|
unsigned long flags;
|
||
|
|
||
|
assert_spin_locked(bfqd->queue->queue_lock);
|
||
|
|
||
|
for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {
|
||
|
bgrp = cgroup_to_bfqio(cgroup);
|
||
|
next = leaf->bfqd;
|
||
|
|
||
|
bfqg = bfqio_lookup_group(bgrp, bfqd);
|
||
|
BUG_ON(bfqg != NULL);
|
||
|
|
||
|
spin_lock_irqsave(&bgrp->lock, flags);
|
||
|
|
||
|
rcu_assign_pointer(leaf->bfqd, bfqd);
|
||
|
hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
|
||
|
hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
|
||
|
|
||
|
spin_unlock_irqrestore(&bgrp->lock, flags);
|
||
|
|
||
|
prev = leaf;
|
||
|
leaf = next;
|
||
|
}
|
||
|
|
||
|
BUG_ON(cgroup == NULL && leaf != NULL);
|
||
|
if (cgroup != NULL && prev != NULL) {
|
||
|
bgrp = cgroup_to_bfqio(cgroup);
|
||
|
bfqg = bfqio_lookup_group(bgrp, bfqd);
|
||
|
bfq_group_set_parent(prev, bfqg);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
|
||
|
* @bfqd: queue descriptor.
|
||
|
* @cgroup: cgroup being searched for.
|
||
|
*
|
||
|
* Return a group associated to @bfqd in @cgroup, allocating one if
|
||
|
* necessary. When a group is returned all the cgroups in the path
|
||
|
* to the root have a group associated to @bfqd.
|
||
|
*
|
||
|
* If the allocation fails, return the root group: this breaks guarantees
|
||
|
* but is a safe fallback. If this loss becomes a problem it can be
|
||
|
* mitigated using the equivalent weight (given by the product of the
|
||
|
* weights of the groups in the path from @group to the root) in the
|
||
|
* root scheduler.
|
||
|
*
|
||
|
* We allocate all the missing nodes in the path from the leaf cgroup
|
||
|
* to the root and we connect the nodes only after all the allocations
|
||
|
* have been successful.
|
||
|
*/
|
||
|
static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
|
||
|
struct cgroup *cgroup)
|
||
|
{
|
||
|
struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
|
||
|
struct bfq_group *bfqg;
|
||
|
|
||
|
bfqg = bfqio_lookup_group(bgrp, bfqd);
|
||
|
if (bfqg != NULL)
|
||
|
return bfqg;
|
||
|
|
||
|
bfqg = bfq_group_chain_alloc(bfqd, cgroup);
|
||
|
if (bfqg != NULL)
|
||
|
bfq_group_chain_link(bfqd, cgroup, bfqg);
|
||
|
else
|
||
|
bfqg = bfqd->root_group;
|
||
|
|
||
|
return bfqg;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_bfqq_move - migrate @bfqq to @bfqg.
|
||
|
* @bfqd: queue descriptor.
|
||
|
* @bfqq: the queue to move.
|
||
|
* @entity: @bfqq's entity.
|
||
|
* @bfqg: the group to move to.
|
||
|
*
|
||
|
* Move @bfqq to @bfqg, deactivating it from its old group and reactivating
|
||
|
* it on the new one. Avoid putting the entity on the old group idle tree.
|
||
|
*
|
||
|
* Must be called under the queue lock; the cgroup owning @bfqg must
|
||
|
* not disappear (by now this just means that we are called under
|
||
|
* rcu_read_lock()).
|
||
|
*/
|
||
|
static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||
|
struct bfq_entity *entity, struct bfq_group *bfqg)
|
||
|
{
|
||
|
int busy, resume;
|
||
|
|
||
|
busy = bfq_bfqq_busy(bfqq);
|
||
|
resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
|
||
|
|
||
|
BUG_ON(resume && !entity->on_st);
|
||
|
BUG_ON(busy && !resume && entity->on_st &&
|
||
|
bfqq != bfqd->in_service_queue);
|
||
|
|
||
|
if (busy) {
|
||
|
BUG_ON(atomic_read(&bfqq->ref) < 2);
|
||
|
|
||
|
if (!resume)
|
||
|
bfq_del_bfqq_busy(bfqd, bfqq, 0);
|
||
|
else
|
||
|
bfq_deactivate_bfqq(bfqd, bfqq, 0);
|
||
|
} else if (entity->on_st)
|
||
|
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
|
||
|
|
||
|
/*
|
||
|
* Here we use a reference to bfqg. We don't need a refcounter
|
||
|
* as the cgroup reference will not be dropped, so that its
|
||
|
* destroy() callback will not be invoked.
|
||
|
*/
|
||
|
entity->parent = bfqg->my_entity;
|
||
|
entity->sched_data = &bfqg->sched_data;
|
||
|
|
||
|
if (busy && resume)
|
||
|
bfq_activate_bfqq(bfqd, bfqq);
|
||
|
|
||
|
if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
|
||
|
bfq_schedule_dispatch(bfqd);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* __bfq_bic_change_cgroup - move @bic to @cgroup.
|
||
|
* @bfqd: the queue descriptor.
|
||
|
* @bic: the bic to move.
|
||
|
* @cgroup: the cgroup to move to.
|
||
|
*
|
||
|
* Move bic to cgroup, assuming that bfqd->queue is locked; the caller
|
||
|
* has to make sure that the reference to cgroup is valid across the call.
|
||
|
*
|
||
|
* NOTE: an alternative approach might have been to store the current
|
||
|
* cgroup in bfqq and getting a reference to it, reducing the lookup
|
||
|
* time here, at the price of slightly more complex code.
|
||
|
*/
|
||
|
static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||
|
struct bfq_io_cq *bic,
|
||
|
struct cgroup *cgroup)
|
||
|
{
|
||
|
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
|
||
|
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
|
||
|
struct bfq_entity *entity;
|
||
|
struct bfq_group *bfqg;
|
||
|
struct bfqio_cgroup *bgrp;
|
||
|
|
||
|
bgrp = cgroup_to_bfqio(cgroup);
|
||
|
|
||
|
bfqg = bfq_find_alloc_group(bfqd, cgroup);
|
||
|
if (async_bfqq != NULL) {
|
||
|
entity = &async_bfqq->entity;
|
||
|
|
||
|
if (entity->sched_data != &bfqg->sched_data) {
|
||
|
bic_set_bfqq(bic, NULL, 0);
|
||
|
bfq_log_bfqq(bfqd, async_bfqq,
|
||
|
"bic_change_group: %p %d",
|
||
|
async_bfqq, atomic_read(&async_bfqq->ref));
|
||
|
bfq_put_queue(async_bfqq);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (sync_bfqq != NULL) {
|
||
|
entity = &sync_bfqq->entity;
|
||
|
if (entity->sched_data != &bfqg->sched_data)
|
||
|
bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
|
||
|
}
|
||
|
|
||
|
return bfqg;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_bic_change_cgroup - move @bic to @cgroup.
|
||
|
* @bic: the bic being migrated.
|
||
|
* @cgroup: the destination cgroup.
|
||
|
*
|
||
|
* When the task owning @bic is moved to @cgroup, @bic is immediately
|
||
|
* moved into its new parent group.
|
||
|
*/
|
||
|
static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
|
||
|
struct cgroup *cgroup)
|
||
|
{
|
||
|
struct bfq_data *bfqd;
|
||
|
unsigned long uninitialized_var(flags);
|
||
|
|
||
|
bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
|
||
|
&flags);
|
||
|
if (bfqd != NULL) {
|
||
|
__bfq_bic_change_cgroup(bfqd, bic, cgroup);
|
||
|
bfq_put_bfqd_unlock(bfqd, &flags);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_bic_update_cgroup - update the cgroup of @bic.
|
||
|
* @bic: the @bic to update.
|
||
|
*
|
||
|
* Make sure that @bic is enqueued in the cgroup of the current task.
|
||
|
* We need this in addition to moving bics during the cgroup attach
|
||
|
* phase because the task owning @bic could be at its first disk
|
||
|
* access or we may end up in the root cgroup as the result of a
|
||
|
* memory allocation failure and here we try to move to the right
|
||
|
* group.
|
||
|
*
|
||
|
* Must be called under the queue lock. It is safe to use the returned
|
||
|
* value even after the rcu_read_unlock() as the migration/destruction
|
||
|
* paths act under the queue lock too. IOW it is impossible to race with
|
||
|
* group migration/destruction and end up with an invalid group as:
|
||
|
* a) here cgroup has not yet been destroyed, nor its destroy callback
|
||
|
* has started execution, as current holds a reference to it,
|
||
|
* b) if it is destroyed after rcu_read_unlock() [after current is
|
||
|
* migrated to a different cgroup] its attach() callback will have
|
||
|
* taken care of remove all the references to the old cgroup data.
|
||
|
*/
|
||
|
static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
|
||
|
{
|
||
|
struct bfq_data *bfqd = bic_to_bfqd(bic);
|
||
|
struct bfq_group *bfqg;
|
||
|
struct cgroup *cgroup;
|
||
|
|
||
|
BUG_ON(bfqd == NULL);
|
||
|
|
||
|
rcu_read_lock();
|
||
|
cgroup = task_cgroup(current, bfqio_subsys_id);
|
||
|
bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup);
|
||
|
rcu_read_unlock();
|
||
|
|
||
|
return bfqg;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
|
||
|
* @st: the service tree being flushed.
|
||
|
*/
|
||
|
static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
|
||
|
{
|
||
|
struct bfq_entity *entity = st->first_idle;
|
||
|
|
||
|
for (; entity != NULL; entity = st->first_idle)
|
||
|
__bfq_deactivate_entity(entity, 0);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_reparent_leaf_entity - move leaf entity to the root_group.
|
||
|
* @bfqd: the device data structure with the root group.
|
||
|
* @entity: the entity to move.
|
||
|
*/
|
||
|
static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
|
||
|
struct bfq_entity *entity)
|
||
|
{
|
||
|
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
|
||
|
|
||
|
BUG_ON(bfqq == NULL);
|
||
|
bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_reparent_active_entities - move to the root group all active
|
||
|
* entities.
|
||
|
* @bfqd: the device data structure with the root group.
|
||
|
* @bfqg: the group to move from.
|
||
|
* @st: the service tree with the entities.
|
||
|
*
|
||
|
* Needs queue_lock to be taken and reference to be valid over the call.
|
||
|
*/
|
||
|
static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
|
||
|
struct bfq_group *bfqg,
|
||
|
struct bfq_service_tree *st)
|
||
|
{
|
||
|
struct rb_root *active = &st->active;
|
||
|
struct bfq_entity *entity = NULL;
|
||
|
|
||
|
if (!RB_EMPTY_ROOT(&st->active))
|
||
|
entity = bfq_entity_of(rb_first(active));
|
||
|
|
||
|
for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
|
||
|
bfq_reparent_leaf_entity(bfqd, entity);
|
||
|
|
||
|
if (bfqg->sched_data.in_service_entity != NULL)
|
||
|
bfq_reparent_leaf_entity(bfqd,
|
||
|
bfqg->sched_data.in_service_entity);
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_destroy_group - destroy @bfqg.
|
||
|
* @bgrp: the bfqio_cgroup containing @bfqg.
|
||
|
* @bfqg: the group being destroyed.
|
||
|
*
|
||
|
* Destroy @bfqg, making sure that it is not referenced from its parent.
|
||
|
*/
|
||
|
static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
|
||
|
{
|
||
|
struct bfq_data *bfqd;
|
||
|
struct bfq_service_tree *st;
|
||
|
struct bfq_entity *entity = bfqg->my_entity;
|
||
|
unsigned long uninitialized_var(flags);
|
||
|
int i;
|
||
|
|
||
|
hlist_del(&bfqg->group_node);
|
||
|
|
||
|
/*
|
||
|
* Empty all service_trees belonging to this group before
|
||
|
* deactivating the group itself.
|
||
|
*/
|
||
|
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
|
||
|
st = bfqg->sched_data.service_tree + i;
|
||
|
|
||
|
/*
|
||
|
* The idle tree may still contain bfq_queues belonging
|
||
|
* to exited task because they never migrated to a different
|
||
|
* cgroup from the one being destroyed now. No one else
|
||
|
* can access them so it's safe to act without any lock.
|
||
|
*/
|
||
|
bfq_flush_idle_tree(st);
|
||
|
|
||
|
/*
|
||
|
* It may happen that some queues are still active
|
||
|
* (busy) upon group destruction (if the corresponding
|
||
|
* processes have been forced to terminate). We move
|
||
|
* all the leaf entities corresponding to these queues
|
||
|
* to the root_group.
|
||
|
* Also, it may happen that the group has an entity
|
||
|
* in service, which is disconnected from the active
|
||
|
* tree: it must be moved, too.
|
||
|
* There is no need to put the sync queues, as the
|
||
|
* scheduler has taken no reference.
|
||
|
*/
|
||
|
bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
|
||
|
if (bfqd != NULL) {
|
||
|
bfq_reparent_active_entities(bfqd, bfqg, st);
|
||
|
bfq_put_bfqd_unlock(bfqd, &flags);
|
||
|
}
|
||
|
BUG_ON(!RB_EMPTY_ROOT(&st->active));
|
||
|
BUG_ON(!RB_EMPTY_ROOT(&st->idle));
|
||
|
}
|
||
|
BUG_ON(bfqg->sched_data.next_in_service != NULL);
|
||
|
BUG_ON(bfqg->sched_data.in_service_entity != NULL);
|
||
|
|
||
|
/*
|
||
|
* We may race with device destruction, take extra care when
|
||
|
* dereferencing bfqg->bfqd.
|
||
|
*/
|
||
|
bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
|
||
|
if (bfqd != NULL) {
|
||
|
hlist_del(&bfqg->bfqd_node);
|
||
|
__bfq_deactivate_entity(entity, 0);
|
||
|
bfq_put_async_queues(bfqd, bfqg);
|
||
|
bfq_put_bfqd_unlock(bfqd, &flags);
|
||
|
}
|
||
|
BUG_ON(entity->tree != NULL);
|
||
|
|
||
|
/*
|
||
|
* No need to defer the kfree() to the end of the RCU grace
|
||
|
* period: we are called from the destroy() callback of our
|
||
|
* cgroup, so we can be sure that no one is a) still using
|
||
|
* this cgroup or b) doing lookups in it.
|
||
|
*/
|
||
|
kfree(bfqg);
|
||
|
}
|
||
|
|
||
|
static void bfq_end_wr_async(struct bfq_data *bfqd)
|
||
|
{
|
||
|
struct hlist_node *pos, *n;
|
||
|
struct bfq_group *bfqg;
|
||
|
|
||
|
hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node)
|
||
|
bfq_end_wr_async_queues(bfqd, bfqg);
|
||
|
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* bfq_disconnect_groups - disconnect @bfqd from all its groups.
|
||
|
* @bfqd: the device descriptor being exited.
|
||
|
*
|
||
|
* When the device exits we just make sure that no lookup can return
|
||
|
* the now unused group structures. They will be deallocated on cgroup
|
||
|
* destruction.
|
||
|
*/
|
||
|
static void bfq_disconnect_groups(struct bfq_data *bfqd)
|
||
|
{
|
||
|
struct hlist_node *pos, *n;
|
||
|
struct bfq_group *bfqg;
|
||
|
|
||
|
bfq_log(bfqd, "disconnect_groups beginning");
|
||
|
hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) {
|
||
|
hlist_del(&bfqg->bfqd_node);
|
||
|
|
||
|
__bfq_deactivate_entity(bfqg->my_entity, 0);
|
||
|
|
||
|
/*
|
||
|
* Don't remove from the group hash, just set an
|
||
|
* invalid key. No lookups can race with the
|
||
|
* assignment as bfqd is being destroyed; this
|
||
|
* implies also that new elements cannot be added
|
||
|
* to the list.
|
||
|
*/
|
||
|
rcu_assign_pointer(bfqg->bfqd, NULL);
|
||
|
|
||
|
bfq_log(bfqd, "disconnect_groups: put async for group %p",
|
||
|
bfqg);
|
||
|
bfq_put_async_queues(bfqd, bfqg);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline void bfq_free_root_group(struct bfq_data *bfqd)
|
||
|
{
|
||
|
struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
|
||
|
struct bfq_group *bfqg = bfqd->root_group;
|
||
|
|
||
|
bfq_put_async_queues(bfqd, bfqg);
|
||
|
|
||
|
spin_lock_irq(&bgrp->lock);
|
||
|
hlist_del_rcu(&bfqg->group_node);
|
||
|
spin_unlock_irq(&bgrp->lock);
|
||
|
|
||
|
/*
|
||
|
* No need to synchronize_rcu() here: since the device is gone
|
||
|
* there cannot be any read-side access to its root_group.
|
||
|
*/
|
||
|
kfree(bfqg);
|
||
|
}
|
||
|
|
||
|
static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
|
||
|
{
|
||
|
struct bfq_group *bfqg;
|
||
|
struct bfqio_cgroup *bgrp;
|
||
|
int i;
|
||
|
|
||
|
bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
|
||
|
if (bfqg == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
bfqg->entity.parent = NULL;
|
||
|
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
|
||
|
bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
|
||
|
|
||
|
bgrp = &bfqio_root_cgroup;
|
||
|
spin_lock_irq(&bgrp->lock);
|
||
|
rcu_assign_pointer(bfqg->bfqd, bfqd);
|
||
|
hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
|
||
|
spin_unlock_irq(&bgrp->lock);
|
||
|
|
||
|
return bfqg;
|
||
|
}
|
||
|
|
||
|
#define SHOW_FUNCTION(__VAR) \
|
||
|
static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \
|
||
|
struct cftype *cftype) \
|
||
|
{ \
|
||
|
struct bfqio_cgroup *bgrp; \
|
||
|
u64 ret; \
|
||
|
\
|
||
|
if (!cgroup_lock_live_group(cgroup)) \
|
||
|
return -ENODEV; \
|
||
|
\
|
||
|
bgrp = cgroup_to_bfqio(cgroup); \
|
||
|
spin_lock_irq(&bgrp->lock); \
|
||
|
ret = bgrp->__VAR; \
|
||
|
spin_unlock_irq(&bgrp->lock); \
|
||
|
\
|
||
|
cgroup_unlock(); \
|
||
|
\
|
||
|
return ret; \
|
||
|
}
|
||
|
|
||
|
SHOW_FUNCTION(weight);
|
||
|
SHOW_FUNCTION(ioprio);
|
||
|
SHOW_FUNCTION(ioprio_class);
|
||
|
#undef SHOW_FUNCTION
|
||
|
|
||
|
#define STORE_FUNCTION(__VAR, __MIN, __MAX) \
|
||
|
static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \
|
||
|
struct cftype *cftype, \
|
||
|
u64 val) \
|
||
|
{ \
|
||
|
struct bfqio_cgroup *bgrp; \
|
||
|
struct bfq_group *bfqg; \
|
||
|
struct hlist_node *n; \
|
||
|
\
|
||
|
if (val < (__MIN) || val > (__MAX)) \
|
||
|
return -EINVAL; \
|
||
|
\
|
||
|
if (!cgroup_lock_live_group(cgroup)) \
|
||
|
return -ENODEV; \
|
||
|
\
|
||
|
bgrp = cgroup_to_bfqio(cgroup); \
|
||
|
\
|
||
|
spin_lock_irq(&bgrp->lock); \
|
||
|
bgrp->__VAR = (unsigned short)val; \
|
||
|
hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \
|
||
|
/* \
|
||
|
* Setting the ioprio_changed flag of the entity \
|
||
|
* to 1 with new_##__VAR == ##__VAR would re-set \
|
||
|
* the value of the weight to its ioprio mapping. \
|
||
|
* Set the flag only if necessary. \
|
||
|
*/ \
|
||
|
if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
|
||
|
bfqg->entity.new_##__VAR = (unsigned short)val; \
|
||
|
/* \
|
||
|
* Make sure that the above new value has been \
|
||
|
* stored in bfqg->entity.new_##__VAR before \
|
||
|
* setting the ioprio_changed flag. In fact, \
|
||
|
* this flag may be read asynchronously (in \
|
||
|
* critical sections protected by a different \
|
||
|
* lock than that held here), and finding this \
|
||
|
* flag set may cause the execution of the code \
|
||
|
* for updating parameters whose value may \
|
||
|
* depend also on bfqg->entity.new_##__VAR (in \
|
||
|
* __bfq_entity_update_weight_prio). \
|
||
|
* This barrier makes sure that the new value \
|
||
|
* of bfqg->entity.new_##__VAR is correctly \
|
||
|
* seen in that code. \
|
||
|
*/ \
|
||
|
smp_wmb(); \
|
||
|
bfqg->entity.ioprio_changed = 1; \
|
||
|
} \
|
||
|
} \
|
||
|
spin_unlock_irq(&bgrp->lock); \
|
||
|
\
|
||
|
cgroup_unlock(); \
|
||
|
\
|
||
|
return 0; \
|
||
|
}
|
||
|
|
||
|
STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
|
||
|
STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
|
||
|
STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
|
||
|
#undef STORE_FUNCTION
|
||
|
|
||
|
static struct cftype bfqio_files[] = {
|
||
|
{
|
||
|
.name = "weight",
|
||
|
.read_u64 = bfqio_cgroup_weight_read,
|
||
|
.write_u64 = bfqio_cgroup_weight_write,
|
||
|
},
|
||
|
{
|
||
|
.name = "ioprio",
|
||
|
.read_u64 = bfqio_cgroup_ioprio_read,
|
||
|
.write_u64 = bfqio_cgroup_ioprio_write,
|
||
|
},
|
||
|
{
|
||
|
.name = "ioprio_class",
|
||
|
.read_u64 = bfqio_cgroup_ioprio_class_read,
|
||
|
.write_u64 = bfqio_cgroup_ioprio_class_write,
|
||
|
},
|
||
|
};
|
||
|
|
||
|
static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
|
||
|
{
|
||
|
return cgroup_add_files(cgroup, subsys, bfqio_files,
|
||
|
ARRAY_SIZE(bfqio_files));
|
||
|
}
|
||
|
|
||
|
static struct cgroup_subsys_state *bfqio_create(struct cgroup *cgroup)
|
||
|
{
|
||
|
struct bfqio_cgroup *bgrp;
|
||
|
|
||
|
if (cgroup->parent != NULL) {
|
||
|
bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
|
||
|
if (bgrp == NULL)
|
||
|
return ERR_PTR(-ENOMEM);
|
||
|
} else
|
||
|
bgrp = &bfqio_root_cgroup;
|
||
|
|
||
|
spin_lock_init(&bgrp->lock);
|
||
|
INIT_HLIST_HEAD(&bgrp->group_data);
|
||
|
bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
|
||
|
bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
|
||
|
|
||
|
return &bgrp->css;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* We cannot support shared io contexts, as we have no means to support
|
||
|
* two tasks with the same ioc in two different groups without major rework
|
||
|
* of the main bic/bfqq data structures. By now we allow a task to change
|
||
|
* its cgroup only if it's the only owner of its ioc; the drawback of this
|
||
|
* behavior is that a group containing a task that forked using CLONE_IO
|
||
|
* will not be destroyed until the tasks sharing the ioc die.
|
||
|
*/
|
||
|
static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
|
||
|
{
|
||
|
struct task_struct *task;
|
||
|
struct io_context *ioc;
|
||
|
int ret = 0;
|
||
|
|
||
|
cgroup_taskset_for_each(task, cgroup, tset) {
|
||
|
/* task_lock() is needed to avoid races with exit_io_context() */
|
||
|
task_lock(task);
|
||
|
ioc = task->io_context;
|
||
|
if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
|
||
|
/*
|
||
|
* ioc == NULL means that the task is either too
|
||
|
* young or exiting: if it has still no ioc the
|
||
|
* ioc can't be shared, if the task is exiting the
|
||
|
* attach will fail anyway, no matter what we
|
||
|
* return here.
|
||
|
*/
|
||
|
ret = -EINVAL;
|
||
|
task_unlock(task);
|
||
|
if (ret)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static void bfqio_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
|
||
|
{
|
||
|
struct task_struct *task;
|
||
|
struct io_context *ioc;
|
||
|
struct io_cq *icq;
|
||
|
struct hlist_node *n;
|
||
|
|
||
|
/*
|
||
|
* IMPORTANT NOTE: The move of more than one process at a time to a
|
||
|
* new group has not yet been tested.
|
||
|
*/
|
||
|
cgroup_taskset_for_each(task, cgroup, tset) {
|
||
|
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
|
||
|
if (ioc) {
|
||
|
/*
|
||
|
* Handle cgroup change here.
|
||
|
*/
|
||
|
rcu_read_lock();
|
||
|
hlist_for_each_entry_rcu(icq, n, &ioc->icq_list, ioc_node)
|
||
|
if (!strncmp(
|
||
|
icq->q->elevator->type->elevator_name,
|
||
|
"bfq", ELV_NAME_MAX))
|
||
|
bfq_bic_change_cgroup(icq_to_bic(icq),
|
||
|
cgroup);
|
||
|
rcu_read_unlock();
|
||
|
put_io_context(ioc);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void bfqio_destroy(struct cgroup *cgroup)
|
||
|
{
|
||
|
struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
|
||
|
struct hlist_node *n, *tmp;
|
||
|
struct bfq_group *bfqg;
|
||
|
|
||
|
/*
|
||
|
* Since we are destroying the cgroup, there are no more tasks
|
||
|
* referencing it, and all the RCU grace periods that may have
|
||
|
* referenced it are ended (as the destruction of the parent
|
||
|
* cgroup is RCU-safe); bgrp->group_data will not be accessed by
|
||
|
* anything else and we don't need any synchronization.
|
||
|
*/
|
||
|
hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node)
|
||
|
bfq_destroy_group(bgrp, bfqg);
|
||
|
|
||
|
BUG_ON(!hlist_empty(&bgrp->group_data));
|
||
|
|
||
|
kfree(bgrp);
|
||
|
}
|
||
|
|
||
|
struct cgroup_subsys bfqio_subsys = {
|
||
|
.name = "bfqio",
|
||
|
.create = bfqio_create,
|
||
|
.can_attach = bfqio_can_attach,
|
||
|
.attach = bfqio_attach,
|
||
|
.destroy = bfqio_destroy,
|
||
|
.populate = bfqio_populate,
|
||
|
.subsys_id = bfqio_subsys_id,
|
||
|
};
|
||
|
#else
|
||
|
static inline void bfq_init_entity(struct bfq_entity *entity,
|
||
|
struct bfq_group *bfqg)
|
||
|
{
|
||
|
entity->weight = entity->new_weight;
|
||
|
entity->orig_weight = entity->new_weight;
|
||
|
entity->ioprio = entity->new_ioprio;
|
||
|
entity->ioprio_class = entity->new_ioprio_class;
|
||
|
entity->sched_data = &bfqg->sched_data;
|
||
|
}
|
||
|
|
||
|
static inline struct bfq_group *
|
||
|
bfq_bic_update_cgroup(struct bfq_io_cq *bic)
|
||
|
{
|
||
|
struct bfq_data *bfqd = bic_to_bfqd(bic);
|
||
|
return bfqd->root_group;
|
||
|
}
|
||
|
|
||
|
static inline void bfq_bfqq_move(struct bfq_data *bfqd,
|
||
|
struct bfq_queue *bfqq,
|
||
|
struct bfq_entity *entity,
|
||
|
struct bfq_group *bfqg)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
static void bfq_end_wr_async(struct bfq_data *bfqd)
|
||
|
{
|
||
|
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
|
||
|
}
|
||
|
|
||
|
static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
|
||
|
{
|
||
|
bfq_put_async_queues(bfqd, bfqd->root_group);
|
||
|
}
|
||
|
|
||
|
static inline void bfq_free_root_group(struct bfq_data *bfqd)
|
||
|
{
|
||
|
kfree(bfqd->root_group);
|
||
|
}
|
||
|
|
||
|
static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
|
||
|
{
|
||
|
struct bfq_group *bfqg;
|
||
|
int i;
|
||
|
|
||
|
bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
|
||
|
if (bfqg == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
|
||
|
bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
|
||
|
|
||
|
return bfqg;
|
||
|
}
|
||
|
#endif
|