mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
blk-mq: allow changing of queue depth through sysfs
For request_fn based devices, the block layer exports a 'nr_requests' file through sysfs to allow adjusting of queue depth on the fly. Currently this returns -EINVAL for blk-mq, since it's not wired up. Wire this up for blk-mq, so that it now also always dynamic adjustments of the allowed queue depth for any given block device managed by blk-mq. Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
64b14519e5
commit
e3a2b3f931
8 changed files with 133 additions and 59 deletions
|
@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
|
||||||
__freed_request(rl, sync ^ 1);
|
__freed_request(rl, sync ^ 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||||
|
{
|
||||||
|
struct request_list *rl;
|
||||||
|
|
||||||
|
spin_lock_irq(q->queue_lock);
|
||||||
|
q->nr_requests = nr;
|
||||||
|
blk_queue_congestion_threshold(q);
|
||||||
|
|
||||||
|
/* congestion isn't cgroup aware and follows root blkcg for now */
|
||||||
|
rl = &q->root_rl;
|
||||||
|
|
||||||
|
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
|
||||||
|
blk_set_queue_congested(q, BLK_RW_SYNC);
|
||||||
|
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
|
||||||
|
blk_clear_queue_congested(q, BLK_RW_SYNC);
|
||||||
|
|
||||||
|
if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
|
||||||
|
blk_set_queue_congested(q, BLK_RW_ASYNC);
|
||||||
|
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
|
||||||
|
blk_clear_queue_congested(q, BLK_RW_ASYNC);
|
||||||
|
|
||||||
|
blk_queue_for_each_rl(rl, q) {
|
||||||
|
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
|
||||||
|
blk_set_rl_full(rl, BLK_RW_SYNC);
|
||||||
|
} else {
|
||||||
|
blk_clear_rl_full(rl, BLK_RW_SYNC);
|
||||||
|
wake_up(&rl->wait[BLK_RW_SYNC]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
|
||||||
|
blk_set_rl_full(rl, BLK_RW_ASYNC);
|
||||||
|
} else {
|
||||||
|
blk_clear_rl_full(rl, BLK_RW_ASYNC);
|
||||||
|
wake_up(&rl->wait[BLK_RW_ASYNC]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine if elevator data should be initialized when allocating the
|
* Determine if elevator data should be initialized when allocating the
|
||||||
* request associated with @bio.
|
* request associated with @bio.
|
||||||
|
|
|
@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a previously busy queue goes inactive, potential waiters could now
|
* Wakeup all potentially sleeping on normal (non-reserved) tags
|
||||||
* be allowed to queue. Wake them up and check.
|
|
||||||
*/
|
*/
|
||||||
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
|
||||||
{
|
{
|
||||||
struct blk_mq_tags *tags = hctx->tags;
|
|
||||||
struct blk_mq_bitmap_tags *bt;
|
struct blk_mq_bitmap_tags *bt;
|
||||||
int i, wake_index;
|
int i, wake_index;
|
||||||
|
|
||||||
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
|
|
||||||
return;
|
|
||||||
|
|
||||||
atomic_dec(&tags->active_queues);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Will only throttle depth on non-reserved tags
|
|
||||||
*/
|
|
||||||
bt = &tags->bitmap_tags;
|
bt = &tags->bitmap_tags;
|
||||||
wake_index = bt->wake_index;
|
wake_index = bt->wake_index;
|
||||||
for (i = 0; i < BT_WAIT_QUEUES; i++) {
|
for (i = 0; i < BT_WAIT_QUEUES; i++) {
|
||||||
|
@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a previously busy queue goes inactive, potential waiters could now
|
||||||
|
* be allowed to queue. Wake them up and check.
|
||||||
|
*/
|
||||||
|
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||||
|
{
|
||||||
|
struct blk_mq_tags *tags = hctx->tags;
|
||||||
|
|
||||||
|
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
|
||||||
|
return;
|
||||||
|
|
||||||
|
atomic_dec(&tags->active_queues);
|
||||||
|
|
||||||
|
blk_mq_tag_wakeup_all(tags);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For shared tag users, we track the number of currently active users
|
* For shared tag users, we track the number of currently active users
|
||||||
* and attempt to provide a fair share of the tag depth for each of them.
|
* and attempt to provide a fair share of the tag depth for each of them.
|
||||||
|
@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
|
||||||
return bt->depth - used;
|
return bt->depth - used;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void bt_update_count(struct blk_mq_bitmap_tags *bt,
|
||||||
|
unsigned int depth)
|
||||||
|
{
|
||||||
|
unsigned int tags_per_word = 1U << bt->bits_per_word;
|
||||||
|
unsigned int map_depth = depth;
|
||||||
|
|
||||||
|
if (depth) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < bt->map_nr; i++) {
|
||||||
|
bt->map[i].depth = min(map_depth, tags_per_word);
|
||||||
|
map_depth -= bt->map[i].depth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bt->wake_cnt = BT_WAIT_BATCH;
|
||||||
|
if (bt->wake_cnt > depth / 4)
|
||||||
|
bt->wake_cnt = max(1U, depth / 4);
|
||||||
|
|
||||||
|
bt->depth = depth;
|
||||||
|
}
|
||||||
|
|
||||||
static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
|
static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
|
||||||
int node, bool reserved)
|
int node, bool reserved)
|
||||||
{
|
{
|
||||||
|
@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
|
||||||
* condition.
|
* condition.
|
||||||
*/
|
*/
|
||||||
if (depth) {
|
if (depth) {
|
||||||
unsigned int nr, i, map_depth, tags_per_word;
|
unsigned int nr, tags_per_word;
|
||||||
|
|
||||||
tags_per_word = (1 << bt->bits_per_word);
|
tags_per_word = (1 << bt->bits_per_word);
|
||||||
|
|
||||||
|
@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
bt->map_nr = nr;
|
bt->map_nr = nr;
|
||||||
map_depth = depth;
|
|
||||||
for (i = 0; i < nr; i++) {
|
|
||||||
bt->map[i].depth = min(map_depth, tags_per_word);
|
|
||||||
map_depth -= tags_per_word;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
|
bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
|
||||||
|
@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
|
||||||
for (i = 0; i < BT_WAIT_QUEUES; i++)
|
for (i = 0; i < BT_WAIT_QUEUES; i++)
|
||||||
init_waitqueue_head(&bt->bs[i].wait);
|
init_waitqueue_head(&bt->bs[i].wait);
|
||||||
|
|
||||||
bt->wake_cnt = BT_WAIT_BATCH;
|
bt_update_count(bt, depth);
|
||||||
if (bt->wake_cnt > depth / 4)
|
|
||||||
bt->wake_cnt = max(1U, depth / 4);
|
|
||||||
|
|
||||||
bt->depth = depth;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
|
||||||
*tag = prandom_u32() % depth;
|
*tag = prandom_u32() % depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
|
||||||
|
{
|
||||||
|
tdepth -= tags->nr_reserved_tags;
|
||||||
|
if (tdepth > tags->nr_tags)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't need (or can't) update reserved tags here, they remain
|
||||||
|
* static and should never need resizing.
|
||||||
|
*/
|
||||||
|
bt_update_count(&tags->bitmap_tags, tdepth);
|
||||||
|
blk_mq_tag_wakeup_all(tags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
|
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
|
||||||
{
|
{
|
||||||
char *orig_page = page;
|
char *orig_page = page;
|
||||||
|
|
|
@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
|
||||||
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
|
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
|
||||||
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
|
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
|
||||||
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
|
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
|
||||||
|
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
BLK_MQ_TAG_CACHE_MIN = 1,
|
BLK_MQ_TAG_CACHE_MIN = 1,
|
||||||
|
|
|
@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_free_tag_set);
|
EXPORT_SYMBOL(blk_mq_free_tag_set);
|
||||||
|
|
||||||
|
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||||
|
{
|
||||||
|
struct blk_mq_tag_set *set = q->tag_set;
|
||||||
|
struct blk_mq_hw_ctx *hctx;
|
||||||
|
int i, ret;
|
||||||
|
|
||||||
|
if (!set || nr > set->queue_depth)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
|
ret = blk_mq_tag_update_depth(hctx->tags, nr);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
|
q->nr_requests = nr;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void blk_mq_disable_hotplug(void)
|
void blk_mq_disable_hotplug(void)
|
||||||
{
|
{
|
||||||
mutex_lock(&all_q_mutex);
|
mutex_lock(&all_q_mutex);
|
||||||
|
|
|
@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
|
||||||
void blk_mq_free_queue(struct request_queue *q);
|
void blk_mq_free_queue(struct request_queue *q);
|
||||||
void blk_mq_clone_flush_request(struct request *flush_rq,
|
void blk_mq_clone_flush_request(struct request *flush_rq,
|
||||||
struct request *orig_rq);
|
struct request *orig_rq);
|
||||||
|
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CPU hotplug helpers
|
* CPU hotplug helpers
|
||||||
|
|
|
@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
||||||
static ssize_t
|
static ssize_t
|
||||||
queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
||||||
{
|
{
|
||||||
struct request_list *rl;
|
|
||||||
unsigned long nr;
|
unsigned long nr;
|
||||||
int ret;
|
int ret, err;
|
||||||
|
|
||||||
if (!q->request_fn)
|
if (!q->request_fn && !q->mq_ops)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = queue_var_store(&nr, page, count);
|
ret = queue_var_store(&nr, page, count);
|
||||||
|
@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
||||||
if (nr < BLKDEV_MIN_RQ)
|
if (nr < BLKDEV_MIN_RQ)
|
||||||
nr = BLKDEV_MIN_RQ;
|
nr = BLKDEV_MIN_RQ;
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
if (q->request_fn)
|
||||||
q->nr_requests = nr;
|
err = blk_update_nr_requests(q, nr);
|
||||||
blk_queue_congestion_threshold(q);
|
else
|
||||||
|
err = blk_mq_update_nr_requests(q, nr);
|
||||||
|
|
||||||
/* congestion isn't cgroup aware and follows root blkcg for now */
|
if (err)
|
||||||
rl = &q->root_rl;
|
return err;
|
||||||
|
|
||||||
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
|
|
||||||
blk_set_queue_congested(q, BLK_RW_SYNC);
|
|
||||||
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
|
|
||||||
blk_clear_queue_congested(q, BLK_RW_SYNC);
|
|
||||||
|
|
||||||
if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
|
|
||||||
blk_set_queue_congested(q, BLK_RW_ASYNC);
|
|
||||||
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
|
|
||||||
blk_clear_queue_congested(q, BLK_RW_ASYNC);
|
|
||||||
|
|
||||||
blk_queue_for_each_rl(rl, q) {
|
|
||||||
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
|
|
||||||
blk_set_rl_full(rl, BLK_RW_SYNC);
|
|
||||||
} else {
|
|
||||||
blk_clear_rl_full(rl, BLK_RW_SYNC);
|
|
||||||
wake_up(&rl->wait[BLK_RW_SYNC]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
|
|
||||||
blk_set_rl_full(rl, BLK_RW_ASYNC);
|
|
||||||
} else {
|
|
||||||
blk_clear_rl_full(rl, BLK_RW_ASYNC);
|
|
||||||
wake_up(&rl->wait[BLK_RW_ASYNC]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
|
||||||
return q->nr_congestion_off;
|
return q->nr_congestion_off;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int blk_update_nr_requests(struct request_queue *, unsigned int);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Contribute to IO statistics IFF:
|
* Contribute to IO statistics IFF:
|
||||||
*
|
*
|
||||||
|
|
|
@ -63,7 +63,7 @@ struct blk_mq_hw_ctx {
|
||||||
struct blk_mq_tag_set {
|
struct blk_mq_tag_set {
|
||||||
struct blk_mq_ops *ops;
|
struct blk_mq_ops *ops;
|
||||||
unsigned int nr_hw_queues;
|
unsigned int nr_hw_queues;
|
||||||
unsigned int queue_depth;
|
unsigned int queue_depth; /* max hw supported */
|
||||||
unsigned int reserved_tags;
|
unsigned int reserved_tags;
|
||||||
unsigned int cmd_size; /* per-request extra data */
|
unsigned int cmd_size; /* per-request extra data */
|
||||||
int numa_node;
|
int numa_node;
|
||||||
|
|
Loading…
Reference in a new issue