diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5b675b226f64..014984d199e7 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -573,6 +573,57 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) return dur; } +static inline unsigned +bfq_bfqq_cooperations(struct bfq_queue *bfqq) +{ + return bfqq->bic ? bfqq->bic->cooperations : 0; +} + +static inline void +bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) +{ + if (bic->saved_idle_window) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); + if (bic->saved_IO_bound) + bfq_mark_bfqq_IO_bound(bfqq); + else + bfq_clear_bfqq_IO_bound(bfqq); + /* Assuming that the flag in_large_burst is already correctly set */ + if (bic->wr_time_left && bfqq->bfqd->low_latency && + !bfq_bfqq_in_large_burst(bfqq) && + bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { + /* + * Start a weight raising period with the duration given by + * the raising_time_left snapshot. + */ + if (bfq_bfqq_busy(bfqq)) + bfqq->bfqd->wr_busy_queues++; + bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bic->wr_time_left; + bfqq->last_wr_start_finish = jiffies; + bfqq->entity.ioprio_changed = 1; + } + /* + * Clear wr_time_left to prevent bfq_bfqq_save_state() from + * getting confused about the queue's need of a weight-raising + * period. + */ + bic->wr_time_left = 0; +} + +/* Must be called with the queue_lock held. */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; + + io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; + process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; + BUG_ON(process_refs < 0); + return process_refs; +} + /* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ static inline void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -817,7 +868,7 @@ static void bfq_add_request(struct request *rq) bfq_rq_pos_tree_add(bfqd, bfqq); if (!bfq_bfqq_busy(bfqq)) { - bool soft_rt, + bool soft_rt, coop_or_in_burst, idle_for_long_time = time_is_before_jiffies( bfqq->budget_timeout + bfqd->bfq_wr_min_idle_time); @@ -841,11 +892,12 @@ static void bfq_add_request(struct request *rq) bfqd->last_ins_in_burst = jiffies; } + coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) || + bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh; soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && - !bfq_bfqq_in_large_burst(bfqq) && + !coop_or_in_burst && time_is_before_jiffies(bfqq->soft_rt_next_start); - interactive = !bfq_bfqq_in_large_burst(bfqq) && - idle_for_long_time; + interactive = !coop_or_in_burst && idle_for_long_time; entity->budget = max_t(unsigned long, bfqq->max_budget, bfq_serv_to_charge(next_rq, bfqq)); @@ -864,11 +916,20 @@ static void bfq_add_request(struct request *rq) if (!bfqd->low_latency) goto add_bfqq_busy; + if (bfq_bfqq_just_split(bfqq)) + goto set_ioprio_changed; + /* - * If the queue is not being boosted and has been idle - * for enough time, start a weight-raising period + * If the queue: + * - is not being boosted, + * - has been idle for enough time, + * - is not a sync queue or is linked to a bfq_io_cq (it is + * shared "for its nature" or it is not shared and its + * requests have not been redirected to a shared queue) + * start a weight-raising period. */ - if (old_wr_coeff == 1 && (interactive || soft_rt)) { + if (old_wr_coeff == 1 && (interactive || soft_rt) && + (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { bfqq->wr_coeff = bfqd->bfq_wr_coeff; if (interactive) bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); @@ -882,7 +943,7 @@ static void bfq_add_request(struct request *rq) } else if (old_wr_coeff > 1) { if (interactive) bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); - else if (bfq_bfqq_in_large_burst(bfqq) || + else if (coop_or_in_burst || (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && !soft_rt)) { @@ -901,18 +962,18 @@ static void bfq_add_request(struct request *rq) /* * * The remaining weight-raising time is lower - * than bfqd->bfq_wr_rt_max_time, which - * means that the application is enjoying - * weight raising either because deemed soft- - * rt in the near past, or because deemed - * interactive a long ago. In both cases, - * resetting now the current remaining weight- - * raising time for the application to the - * weight-raising duration for soft rt - * applications would not cause any latency - * increase for the application (as the new - * duration would be higher than the remaining - * time). + * than bfqd->bfq_wr_rt_max_time, which means + * that the application is enjoying weight + * raising either because deemed soft-rt in + * the near past, or because deemed interactive + * a long ago. + * In both cases, resetting now the current + * remaining weight-raising time for the + * application to the weight-raising duration + * for soft rt applications would not cause any + * latency increase for the application (as the + * new duration would be higher than the + * remaining time). * * In addition, the application is now meeting * the requirements for being deemed soft rt. @@ -947,6 +1008,7 @@ static void bfq_add_request(struct request *rq) bfqd->bfq_wr_rt_max_time; } } +set_ioprio_changed: if (old_wr_coeff != bfqq->wr_coeff) entity->ioprio_changed = 1; add_bfqq_busy: @@ -1167,12 +1229,367 @@ static void bfq_end_wr(struct bfq_data *bfqd) spin_unlock_irq(bfqd->queue->queue_lock); } +static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) +{ + if (request) + return blk_rq_pos(io_struct); + else + return ((struct bio *)io_struct)->bi_sector; +} + +static inline sector_t bfq_dist_from(sector_t pos1, + sector_t pos2) +{ + if (pos1 >= pos2) + return pos1 - pos2; + else + return pos2 - pos1; +} + +static inline int bfq_rq_close_to_sector(void *io_struct, bool request, + sector_t sector) +{ + return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= + BFQQ_SEEK_THR; +} + +static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) +{ + struct rb_root *root = &bfqd->rq_pos_tree; + struct rb_node *parent, *node; + struct bfq_queue *__bfqq; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + /* + * First, if we find a request starting at the end of the last + * request, choose it. + */ + __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); + if (__bfqq != NULL) + return __bfqq; + + /* + * If the exact sector wasn't found, the parent of the NULL leaf + * will contain the closest sector (rq_pos_tree sorted by + * next_request position). + */ + __bfqq = rb_entry(parent, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + if (blk_rq_pos(__bfqq->next_rq) < sector) + node = rb_next(&__bfqq->pos_node); + else + node = rb_prev(&__bfqq->pos_node); + if (node == NULL) + return NULL; + + __bfqq = rb_entry(node, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + return NULL; +} + +/* + * bfqd - obvious + * cur_bfqq - passed in so that we don't decide that the current queue + * is closely cooperating with itself + * sector - used as a reference point to search for a close queue + */ +static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + struct bfq_queue *cur_bfqq, + sector_t sector) +{ + struct bfq_queue *bfqq; + + if (bfq_class_idle(cur_bfqq)) + return NULL; + if (!bfq_bfqq_sync(cur_bfqq)) + return NULL; + if (BFQQ_SEEKY(cur_bfqq)) + return NULL; + + /* If device has only one backlogged bfq_queue, don't search. */ + if (bfqd->busy_queues == 1) + return NULL; + + /* + * We should notice if some of the queues are cooperating, e.g. + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. + */ + bfqq = bfqq_close(bfqd, sector); + if (bfqq == NULL || bfqq == cur_bfqq) + return NULL; + + /* + * Do not merge queues from different bfq_groups. + */ + if (bfqq->entity.parent != cur_bfqq->entity.parent) + return NULL; + + /* + * It only makes sense to merge sync queues. + */ + if (!bfq_bfqq_sync(bfqq)) + return NULL; + if (BFQQ_SEEKY(bfqq)) + return NULL; + + /* + * Do not merge queues of different priority classes. + */ + if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) + return NULL; + + return bfqq; +} + +static struct bfq_queue * +bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + int process_refs, new_process_refs; + struct bfq_queue *__bfqq; + + /* + * If there are no process references on the new_bfqq, then it is + * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain + * may have dropped their last reference (not just their last process + * reference). + */ + if (!bfqq_process_refs(new_bfqq)) + return NULL; + + /* Avoid a circular list and skip interim queue merges. */ + while ((__bfqq = new_bfqq->new_bfqq)) { + if (__bfqq == bfqq) + return NULL; + new_bfqq = __bfqq; + } + + process_refs = bfqq_process_refs(bfqq); + new_process_refs = bfqq_process_refs(new_bfqq); + /* + * If the process for the bfqq has gone away, there is no + * sense in merging the queues. + */ + if (process_refs == 0 || new_process_refs == 0) + return NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", + new_bfqq->pid); + + /* + * Merging is just a redirection: the requests of the process + * owning one of the two queues are redirected to the other queue. + * The latter queue, in its turn, is set as shared if this is the + * first time that the requests of some process are redirected to + * it. + * + * We redirect bfqq to new_bfqq and not the opposite, because we + * are in the context of the process owning bfqq, hence we have + * the io_cq of this process. So we can immediately configure this + * io_cq to redirect the requests of the process to new_bfqq. + * + * NOTE, even if new_bfqq coincides with the in-service queue, the + * io_cq of new_bfqq is not available, because, if the in-service + * queue is shared, bfqd->in_service_bic may not point to the + * io_cq of the in-service queue. + * Redirecting the requests of the process owning bfqq to the + * currently in-service queue is in any case the best option, as + * we feed the in-service queue with new requests close to the + * last request served and, by doing so, hopefully increase the + * throughput. + */ + bfqq->new_bfqq = new_bfqq; + atomic_add(process_refs, &new_bfqq->ref); + return new_bfqq; +} + +/* + * Attempt to schedule a merge of bfqq with the currently in-service queue + * or with a close queue among the scheduled queues. + * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue + * structure otherwise. + * + * The OOM queue is not allowed to participate to cooperation: in fact, since + * the requests temporarily redirected to the OOM queue could be redirected + * again to dedicated queues at any time, the state needed to correctly + * handle merging with the OOM queue would be quite complex and expensive + * to maintain. Besides, in such a critical condition as an out of memory, + * the benefits of queue merging may be little relevant, or even negligible. + */ +static struct bfq_queue * +bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + void *io_struct, bool request) +{ + struct bfq_queue *in_service_bfqq, *new_bfqq; + + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + + if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) + return NULL; + + in_service_bfqq = bfqd->in_service_queue; + + if (in_service_bfqq == NULL || in_service_bfqq == bfqq || + !bfqd->in_service_bic || + unlikely(in_service_bfqq == &bfqd->oom_bfqq)) + goto check_scheduled; + + if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) + goto check_scheduled; + + if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) + goto check_scheduled; + + if (in_service_bfqq->entity.parent != bfqq->entity.parent) + goto check_scheduled; + + if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) { + new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); + if (new_bfqq != NULL) + return new_bfqq; /* Merge with in-service queue */ + } + + /* + * Check whether there is a cooperator among currently scheduled + * queues. The only thing we need is that the bio/request is not + * NULL, as we need it to establish whether a cooperator exists. + */ +check_scheduled: + new_bfqq = bfq_close_cooperator(bfqd, bfqq, + bfq_io_struct_pos(io_struct, request)); + if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq)) + return bfq_setup_merge(bfqq, new_bfqq); + + return NULL; +} + +static inline void +bfq_bfqq_save_state(struct bfq_queue *bfqq) +{ + /* + * If bfqq->bic == NULL, the queue is already shared or its requests + * have already been redirected to a shared queue; both idle window + * and weight raising state have already been saved. Do nothing. + */ + if (bfqq->bic == NULL) + return; + if (bfqq->bic->wr_time_left) + /* + * This is the queue of a just-started process, and would + * deserve weight raising: we set wr_time_left to the full + * weight-raising duration to trigger weight-raising when + * and if the queue is split and the first request of the + * queue is enqueued. + */ + bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd); + else if (bfqq->wr_coeff > 1) { + unsigned long wr_duration = + jiffies - bfqq->last_wr_start_finish; + /* + * It may happen that a queue's weight raising period lasts + * longer than its wr_cur_max_time, as weight raising is + * handled only when a request is enqueued or dispatched (it + * does not use any timer). If the weight raising period is + * about to end, don't save it. + */ + if (bfqq->wr_cur_max_time <= wr_duration) + bfqq->bic->wr_time_left = 0; + else + bfqq->bic->wr_time_left = + bfqq->wr_cur_max_time - wr_duration; + /* + * The bfq_queue is becoming shared or the requests of the + * process owning the queue are being redirected to a shared + * queue. Stop the weight raising period of the queue, as in + * both cases it should not be owned by an interactive or + * soft real-time application. + */ + bfq_bfqq_end_wr(bfqq); + } else + bfqq->bic->wr_time_left = 0; + bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); + bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); + bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); + bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); + bfqq->bic->cooperations++; + bfqq->bic->failed_cooperations = 0; +} + +static inline void +bfq_get_bic_reference(struct bfq_queue *bfqq) +{ + /* + * If bfqq->bic has a non-NULL value, the bic to which it belongs + * is about to begin using a shared bfq_queue. + */ + if (bfqq->bic) + atomic_long_inc(&bfqq->bic->icq.ioc->refcount); +} + +static void +bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, + struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", + (long unsigned)new_bfqq->pid); + /* Save weight raising and idle window of the merged queues */ + bfq_bfqq_save_state(bfqq); + bfq_bfqq_save_state(new_bfqq); + if (bfq_bfqq_IO_bound(bfqq)) + bfq_mark_bfqq_IO_bound(new_bfqq); + bfq_clear_bfqq_IO_bound(bfqq); + /* + * Grab a reference to the bic, to prevent it from being destroyed + * before being possibly touched by a bfq_split_bfqq(). + */ + bfq_get_bic_reference(bfqq); + bfq_get_bic_reference(new_bfqq); + /* + * Merge queues (that is, let bic redirect its requests to new_bfqq) + */ + bic_set_bfqq(bic, new_bfqq, 1); + bfq_mark_bfqq_coop(new_bfqq); + /* + * new_bfqq now belongs to at least two bics (it is a shared queue): + * set new_bfqq->bic to NULL. bfqq either: + * - does not belong to any bic any more, and hence bfqq->bic must + * be set to NULL, or + * - is a queue whose owning bics have already been redirected to a + * different queue, hence the queue is destined to not belong to + * any bic soon and bfqq->bic is already NULL (therefore the next + * assignment causes no harm). + */ + new_bfqq->bic = NULL; + bfqq->bic = NULL; + bfq_put_queue(bfqq); +} + +static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) +{ + struct bfq_io_cq *bic = bfqq->bic; + struct bfq_data *bfqd = bfqq->bfqd; + + if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { + bic->failed_cooperations++; + if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations) + bic->cooperations = 0; + } +} + static int bfq_allow_merge(struct request_queue *q, struct request *rq, struct bio *bio) { struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_io_cq *bic; - struct bfq_queue *bfqq; + struct bfq_queue *bfqq, *new_bfqq; /* * Disallow merge of a sync bio into an async request. @@ -1190,6 +1607,24 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, return 0; bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); + /* + * We take advantage of this function to perform an early merge + * of the queues of possible cooperating processes. + */ + if (bfqq != NULL) { + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); + if (new_bfqq != NULL) { + bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); + /* + * If we get here, the bio will be queued in the + * shared queue, i.e., new_bfqq, so use new_bfqq + * to decide whether bio and rq can be merged. + */ + bfqq = new_bfqq; + } else + bfq_bfqq_increase_failed_cooperations(bfqq); + } + return bfqq == RQ_BFQQ(rq); } @@ -1214,135 +1649,14 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, /* * Get and set a new queue for service. */ -static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd, - struct bfq_queue *bfqq) +static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) { - if (!bfqq) - bfqq = bfq_get_next_queue(bfqd); - else - bfq_get_next_queue_forced(bfqd, bfqq); + struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); __bfq_set_in_service_queue(bfqd, bfqq); return bfqq; } -static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, - struct request *rq) -{ - if (blk_rq_pos(rq) >= bfqd->last_position) - return blk_rq_pos(rq) - bfqd->last_position; - else - return bfqd->last_position - blk_rq_pos(rq); -} - -/* - * Return true if bfqq has no request pending and rq is close enough to - * bfqd->last_position, or if rq is closer to bfqd->last_position than - * bfqq->next_rq - */ -static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) -{ - return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; -} - -static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) -{ - struct rb_root *root = &bfqd->rq_pos_tree; - struct rb_node *parent, *node; - struct bfq_queue *__bfqq; - sector_t sector = bfqd->last_position; - - if (RB_EMPTY_ROOT(root)) - return NULL; - - /* - * First, if we find a request starting at the end of the last - * request, choose it. - */ - __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); - if (__bfqq != NULL) - return __bfqq; - - /* - * If the exact sector wasn't found, the parent of the NULL leaf - * will contain the closest sector (rq_pos_tree sorted by - * next_request position). - */ - __bfqq = rb_entry(parent, struct bfq_queue, pos_node); - if (bfq_rq_close(bfqd, __bfqq->next_rq)) - return __bfqq; - - if (blk_rq_pos(__bfqq->next_rq) < sector) - node = rb_next(&__bfqq->pos_node); - else - node = rb_prev(&__bfqq->pos_node); - if (node == NULL) - return NULL; - - __bfqq = rb_entry(node, struct bfq_queue, pos_node); - if (bfq_rq_close(bfqd, __bfqq->next_rq)) - return __bfqq; - - return NULL; -} - -/* - * bfqd - obvious - * cur_bfqq - passed in so that we don't decide that the current queue - * is closely cooperating with itself. - * - * We are assuming that cur_bfqq has dispatched at least one request, - * and that bfqd->last_position reflects a position on the disk associated - * with the I/O issued by cur_bfqq. - */ -static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - struct bfq_queue *cur_bfqq) -{ - struct bfq_queue *bfqq; - - if (bfq_class_idle(cur_bfqq)) - return NULL; - if (!bfq_bfqq_sync(cur_bfqq)) - return NULL; - if (BFQQ_SEEKY(cur_bfqq)) - return NULL; - - /* If device has only one backlogged bfq_queue, don't search. */ - if (bfqd->busy_queues == 1) - return NULL; - - /* - * We should notice if some of the queues are cooperating, e.g. - * working closely on the same area of the disk. In that case, - * we can group them together and don't waste time idling. - */ - bfqq = bfqq_close(bfqd); - if (bfqq == NULL || bfqq == cur_bfqq) - return NULL; - - /* - * Do not merge queues from different bfq_groups. - */ - if (bfqq->entity.parent != cur_bfqq->entity.parent) - return NULL; - - /* - * It only makes sense to merge sync queues. - */ - if (!bfq_bfqq_sync(bfqq)) - return NULL; - if (BFQQ_SEEKY(bfqq)) - return NULL; - - /* - * Do not merge queues of different priority classes. - */ - if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) - return NULL; - - return bfqq; -} - /* * If enough samples have been computed, return the current max budget * stored in bfqd, which is dynamically updated according to the @@ -1488,61 +1802,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) return rq; } -/* Must be called with the queue_lock held. */ -static int bfqq_process_refs(struct bfq_queue *bfqq) -{ - int process_refs, io_refs; - - io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; - process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; - BUG_ON(process_refs < 0); - return process_refs; -} - -static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -{ - int process_refs, new_process_refs; - struct bfq_queue *__bfqq; - - /* - * If there are no process references on the new_bfqq, then it is - * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain - * may have dropped their last reference (not just their last process - * reference). - */ - if (!bfqq_process_refs(new_bfqq)) - return; - - /* Avoid a circular list and skip interim queue merges. */ - while ((__bfqq = new_bfqq->new_bfqq)) { - if (__bfqq == bfqq) - return; - new_bfqq = __bfqq; - } - - process_refs = bfqq_process_refs(bfqq); - new_process_refs = bfqq_process_refs(new_bfqq); - /* - * If the process for the bfqq has gone away, there is no - * sense in merging the queues. - */ - if (process_refs == 0 || new_process_refs == 0) - return; - - /* - * Merge in the direction of the lesser amount of work. - */ - if (new_process_refs >= process_refs) { - bfqq->new_bfqq = new_bfqq; - atomic_add(process_refs, &new_bfqq->ref); - } else { - new_bfqq->new_bfqq = bfqq; - atomic_add(new_process_refs, &bfqq->ref); - } - bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", - new_bfqq->pid); -} - static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) { struct bfq_entity *entity = &bfqq->entity; @@ -2269,7 +2528,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) */ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) { - struct bfq_queue *bfqq, *new_bfqq = NULL; + struct bfq_queue *bfqq; struct request *next_rq; enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; @@ -2279,17 +2538,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); - /* - * If another queue has a request waiting within our mean seek - * distance, let it run. The expire code will check for close - * cooperators and put the close queue at the front of the - * service tree. If possible, merge the expiring queue with the - * new bfqq. - */ - new_bfqq = bfq_close_cooperator(bfqd, bfqq); - if (new_bfqq != NULL && bfqq->new_bfqq == NULL) - bfq_setup_merge(bfqq, new_bfqq); - if (bfq_may_expire_for_budg_timeout(bfqq) && !timer_pending(&bfqd->idle_slice_timer) && !bfq_bfqq_must_idle(bfqq)) @@ -2328,10 +2576,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfq_clear_bfqq_wait_request(bfqq); del_timer(&bfqd->idle_slice_timer); } - if (new_bfqq == NULL) - goto keep_queue; - else - goto expire; + goto keep_queue; } } @@ -2340,40 +2585,30 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * for a new request, or has requests waiting for a completion and * may idle after their completion, then keep it anyway. */ - if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || - (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) { + if (timer_pending(&bfqd->idle_slice_timer) || + (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { bfqq = NULL; goto keep_queue; - } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { - /* - * Expiring the queue because there is a close cooperator, - * cancel timer. - */ - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); } reason = BFQ_BFQQ_NO_MORE_REQUESTS; expire: bfq_bfqq_expire(bfqd, bfqq, 0, reason); new_queue: - bfqq = bfq_set_in_service_queue(bfqd, new_bfqq); + bfqq = bfq_set_in_service_queue(bfqd); bfq_log(bfqd, "select_queue: new queue %d returned", bfqq != NULL ? bfqq->pid : 0); keep_queue: return bfqq; } -static void bfq_update_wr_data(struct bfq_data *bfqd, - struct bfq_queue *bfqq) +static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - if (bfqq->wr_coeff > 1) { /* queue is being boosted */ - struct bfq_entity *entity = &bfqq->entity; - + struct bfq_entity *entity = &bfqq->entity; + if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ bfq_log_bfqq(bfqd, bfqq, "raising period dur %u/%u msec, old coeff %u, w %d(%d)", - jiffies_to_msecs(jiffies - - bfqq->last_wr_start_finish), + jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), jiffies_to_msecs(bfqq->wr_cur_max_time), bfqq->wr_coeff, bfqq->entity.weight, bfqq->entity.orig_weight); @@ -2382,12 +2617,16 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, entity->orig_weight * bfqq->wr_coeff); if (entity->ioprio_changed) bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); + /* * If the queue was activated in a burst, or * too much time has elapsed from the beginning - * of this weight-raising, then end weight raising. + * of this weight-raising period, or the queue has + * exceeded the acceptable number of cooperations, + * then end weight raising. */ if (bfq_bfqq_in_large_burst(bfqq) || + bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || time_is_before_jiffies(bfqq->last_wr_start_finish + bfqq->wr_cur_max_time)) { bfqq->last_wr_start_finish = jiffies; @@ -2396,11 +2635,13 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, bfqq->last_wr_start_finish, jiffies_to_msecs(bfqq->wr_cur_max_time)); bfq_bfqq_end_wr(bfqq); - __bfq_entity_update_weight_prio( - bfq_entity_service_tree(entity), - entity); } } + /* Update weight both if it must be raised and if it must be lowered */ + if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) + __bfq_entity_update_weight_prio( + bfq_entity_service_tree(entity), + entity); } /* @@ -2647,6 +2888,25 @@ static inline void bfq_init_icq(struct io_cq *icq) struct bfq_io_cq *bic = icq_to_bic(icq); bic->ttime.last_end_request = jiffies; + /* + * A newly created bic indicates that the process has just + * started doing I/O, and is probably mapping into memory its + * executable and libraries: it definitely needs weight raising. + * There is however the possibility that the process performs, + * for a while, I/O close to some other process. EQM intercepts + * this behavior and may merge the queue corresponding to the + * process with some other queue, BEFORE the weight of the queue + * is raised. Merged queues are not weight-raised (they are assumed + * to belong to processes that benefit only from high throughput). + * If the merge is basically the consequence of an accident, then + * the queue will be split soon and will get back its old weight. + * It is then important to write down somewhere that this queue + * does need weight raising, even if it did not make it to get its + * weight raised before being merged. To this purpose, we overload + * the field raising_time_left and assign 1 to it, to mark the queue + * as needing weight raising. + */ + bic->wr_time_left = 1; } static void bfq_exit_icq(struct io_cq *icq) @@ -2660,6 +2920,13 @@ static void bfq_exit_icq(struct io_cq *icq) } if (bic->bfqq[BLK_RW_SYNC]) { + /* + * If the bic is using a shared queue, put the reference + * taken on the io_context when the bic started using a + * shared bfq_queue. + */ + if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) + put_io_context(icq->ioc); bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); bic->bfqq[BLK_RW_SYNC] = NULL; } @@ -2952,6 +3219,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) return; + /* Idle window just restored, statistics are meaningless. */ + if (bfq_bfqq_just_split(bfqq)) + return; + enable_idle = bfq_bfqq_idle_window(bfqq); if (atomic_read(&bic->icq.ioc->active_ref) == 0 || @@ -2999,6 +3270,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || !BFQQ_SEEKY(bfqq)) bfq_update_idle_window(bfqd, bfqq, bic); + bfq_clear_bfqq_just_split(bfqq); bfq_log_bfqq(bfqd, bfqq, "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", @@ -3059,12 +3331,47 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, static void bfq_insert_request(struct request_queue *q, struct request *rq) { struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; assert_spin_locked(bfqd->queue->queue_lock); + /* + * An unplug may trigger a requeue of a request from the device + * driver: make sure we are in process context while trying to + * merge two bfq_queues. + */ + if (!in_interrupt()) { + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); + if (new_bfqq != NULL) { + if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) + new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); + /* + * Release the request's reference to the old bfqq + * and make sure one is taken to the shared queue. + */ + new_bfqq->allocated[rq_data_dir(rq)]++; + bfqq->allocated[rq_data_dir(rq)]--; + atomic_inc(&new_bfqq->ref); + bfq_put_queue(bfqq); + if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) + bfq_merge_bfqqs(bfqd, RQ_BIC(rq), + bfqq, new_bfqq); + rq->elv.priv[1] = new_bfqq; + bfqq = new_bfqq; + } else + bfq_bfqq_increase_failed_cooperations(bfqq); + } + bfq_add_request(rq); + /* + * Here a newly-created bfq_queue has already started a weight-raising + * period: clear raising_time_left to prevent bfq_bfqq_save_state() + * from assigning it a full weight-raising period. See the detailed + * comments about this field in bfq_init_icq(). + */ + if (bfqq->bic != NULL) + bfqq->bic->wr_time_left = 0; rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); list_add_tail(&rq->queuelist, &bfqq->fifo); @@ -3226,18 +3533,6 @@ static void bfq_put_request(struct request *rq) } } -static struct bfq_queue * -bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - struct bfq_queue *bfqq) -{ - bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", - (long unsigned)bfqq->new_bfqq->pid); - bic_set_bfqq(bic, bfqq->new_bfqq, 1); - bfq_mark_bfqq_coop(bfqq->new_bfqq); - bfq_put_queue(bfqq); - return bic_to_bfqq(bic, 1); -} - /* * Returns NULL if a new bfqq should be allocated, or the old bfqq if this * was the last process referring to said bfqq. @@ -3246,6 +3541,9 @@ static struct bfq_queue * bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); + + put_io_context(bic->icq.ioc); + if (bfqq_process_refs(bfqq) == 1) { bfqq->pid = current->pid; bfq_clear_bfqq_coop(bfqq); @@ -3274,6 +3572,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, struct bfq_queue *bfqq; struct bfq_group *bfqg; unsigned long flags; + bool split = false; might_sleep_if(gfp_mask & __GFP_WAIT); @@ -3291,25 +3590,26 @@ new_queue: if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); bic_set_bfqq(bic, bfqq, is_sync); + if (split && is_sync) { + if ((bic->was_in_burst_list && bfqd->large_burst) || + bic->saved_in_large_burst) + bfq_mark_bfqq_in_large_burst(bfqq); + else { + bfq_clear_bfqq_in_large_burst(bfqq); + if (bic->was_in_burst_list) + hlist_add_head(&bfqq->burst_list_node, + &bfqd->burst_list); + } + } } else { - /* - * If the queue was seeky for too long, break it apart. - */ + /* If the queue was seeky for too long, break it apart. */ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); bfqq = bfq_split_bfqq(bic, bfqq); + split = true; if (!bfqq) goto new_queue; } - - /* - * Check to see if this queue is scheduled to merge with - * another closely cooperating queue. The merging of queues - * happens here as it must be done in process context. - * The reference on new_bfqq was taken in merge_bfqqs. - */ - if (bfqq->new_bfqq != NULL) - bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); } bfqq->allocated[rw]++; @@ -3320,6 +3620,26 @@ new_queue: rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; + /* + * If a bfq_queue has only one process reference, it is owned + * by only one bfq_io_cq: we can set the bic field of the + * bfq_queue to the address of that structure. Also, if the + * queue has just been split, mark a flag so that the + * information is available to the other scheduler hooks. + */ + if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { + bfqq->bic = bic; + if (split) { + bfq_mark_bfqq_just_split(bfqq); + /* + * If the queue has just been split from a shared + * queue, restore the idle window and the possible + * weight raising period. + */ + bfq_bfqq_resume_state(bfqq, bic); + } + } + spin_unlock_irqrestore(q->queue_lock, flags); return 0; diff --git a/block/bfq-sched.c b/block/bfq-sched.c index c3430993ec94..d0890c6d4c11 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -1085,34 +1085,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) return bfqq; } -/* - * Forced extraction of the given queue. - */ -static void bfq_get_next_queue_forced(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - struct bfq_entity *entity; - struct bfq_sched_data *sd; - - BUG_ON(bfqd->in_service_queue != NULL); - - entity = &bfqq->entity; - /* - * Bubble up extraction/update from the leaf to the root. - */ - for_each_entity(entity) { - sd = entity->sched_data; - bfq_update_budget(entity); - bfq_update_vtime(bfq_entity_service_tree(entity)); - bfq_active_extract(bfq_entity_service_tree(entity), entity); - sd->in_service_entity = entity; - sd->next_in_service = NULL; - entity->service = 0; - } - - return; -} - static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) { if (bfqd->in_service_bic != NULL) { diff --git a/block/bfq.h b/block/bfq.h index 619bcdd1ec15..aeb9bb4bbce7 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -218,18 +218,21 @@ struct bfq_group; * idle @bfq_queue with no outstanding requests, then * the task associated with the queue it is deemed as * soft real-time (see the comments to the function - * bfq_bfqq_softrt_next_start()). + * bfq_bfqq_softrt_next_start()) * @last_idle_bklogged: time of the last transition of the @bfq_queue from * idle to backlogged * @service_from_backlogged: cumulative service received from the @bfq_queue * since the last transition from idle to * backlogged + * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the + * queue is shared * - * A bfq_queue is a leaf request queue; it can be associated with an io_context - * or more, if it is async or shared between cooperating processes. @cgroup - * holds a reference to the cgroup, to be sure that it does not disappear while - * a bfqq still references it (mostly to avoid races between request issuing and - * task migration followed by cgroup destruction). + * A bfq_queue is a leaf request queue; it can be associated with an + * io_context or more, if it is async or shared between cooperating + * processes. @cgroup holds a reference to the cgroup, to be sure that it + * does not disappear while a bfqq still references it (mostly to avoid + * races between request issuing and task migration followed by cgroup + * destruction). * All the fields are protected by the queue lock of the containing bfqd. */ struct bfq_queue { @@ -269,6 +272,7 @@ struct bfq_queue { unsigned int requests_within_timer; pid_t pid; + struct bfq_io_cq *bic; /* weight-raising fields */ unsigned long wr_cur_max_time; @@ -298,12 +302,42 @@ struct bfq_ttime { * @icq: associated io_cq structure * @bfqq: array of two process queues, the sync and the async * @ttime: associated @bfq_ttime struct + * @wr_time_left: snapshot of the time left before weight raising ends + * for the sync queue associated to this process; this + * snapshot is taken to remember this value while the weight + * raising is suspended because the queue is merged with a + * shared queue, and is used to set @raising_cur_max_time + * when the queue is split from the shared queue and its + * weight is raised again + * @saved_idle_window: same purpose as the previous field for the idle + * window + * @saved_IO_bound: same purpose as the previous two fields for the I/O + * bound classification of a queue + * @saved_in_large_burst: same purpose as the previous fields for the + * value of the field keeping the queue's belonging + * to a large burst + * @was_in_burst_list: true if the queue belonged to a burst list + * before its merge with another cooperating queue + * @cooperations: counter of consecutive successful queue merges underwent + * by any of the process' @bfq_queues + * @failed_cooperations: counter of consecutive failed queue merges of any + * of the process' @bfq_queues */ struct bfq_io_cq { struct io_cq icq; /* must be the first member */ struct bfq_queue *bfqq[2]; struct bfq_ttime ttime; int ioprio; + + unsigned int wr_time_left; + bool saved_idle_window; + bool saved_IO_bound; + + bool saved_in_large_burst; + bool was_in_burst_list; + + unsigned int cooperations; + unsigned int failed_cooperations; }; enum bfq_device_speed { @@ -536,7 +570,7 @@ enum bfqq_state_flags { BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ - BFQ_BFQQ_FLAG_IO_bound, /* + BFQ_BFQQ_FLAG_IO_bound, /* * bfqq has timed-out at least once * having consumed at most 2/10 of * its budget @@ -549,12 +583,13 @@ enum bfqq_state_flags { * bfqq has proved to be slow and * seeky until budget timeout */ - BFQ_BFQQ_FLAG_softrt_update, /* + BFQ_BFQQ_FLAG_softrt_update, /* * may need softrt-next-start * update */ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ - BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ + BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ + BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ }; #define BFQ_BFQQ_FNS(name) \ @@ -583,6 +618,7 @@ BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(constantly_seeky); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); +BFQ_BFQQ_FNS(just_split); BFQ_BFQQ_FNS(softrt_update); #undef BFQ_BFQQ_FNS