* [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements
@ 2021-01-25 19:02 Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
` (6 more replies)
0 siblings, 7 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente
Hi,
here's batch 2/3.
Thanks,
Paolo
Paolo Valente (6):
block, bfq: replace mechanism for evaluating I/O intensity
block, bfq: re-evaluate convenience of I/O plugging on rq arrivals
block, bfq: fix switch back from soft-rt weitgh-raising
block, bfq: save also weight-raised service on queue merging
block, bfq: save also injection state on queue merging
block, bfq: make waker-queue detection more robust
block/bfq-iosched.c | 328 ++++++++++++++++++++++++++------------------
block/bfq-iosched.h | 29 ++--
2 files changed, 214 insertions(+), 143 deletions(-)
--
2.20.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals Paolo Valente
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara
Some BFQ mechanisms make their decisions on a bfq_queue basing also on
whether the bfq_queue is I/O bound. In this respect, the current logic
for evaluating whether a bfq_queue is I/O bound is rather rough. This
commits replaces this logic with a more effective one.
The new logic measures the percentage of time during which a bfq_queue
is active, and marks the bfq_queue as I/O bound if the latter if this
percentage is above a fixed threshold.
Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
block/bfq-iosched.c | 63 +++++++++++++++++++++++++++++++--------------
block/bfq-iosched.h | 16 ++++++------
2 files changed, 52 insertions(+), 27 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c045613ce927..db393f5d70ba 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1026,6 +1026,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
bfqq->entity.new_weight = bic->saved_weight;
bfqq->ttime = bic->saved_ttime;
+ bfqq->io_start_time = bic->saved_io_start_time;
+ bfqq->tot_idle_time = bic->saved_tot_idle_time;
bfqq->wr_coeff = bic->saved_wr_coeff;
bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
@@ -1721,17 +1723,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
bfq_clear_bfqq_just_created(bfqq);
-
- if (!bfq_bfqq_IO_bound(bfqq)) {
- if (arrived_in_time) {
- bfqq->requests_within_timer++;
- if (bfqq->requests_within_timer >=
- bfqd->bfq_requests_within_timer)
- bfq_mark_bfqq_IO_bound(bfqq);
- } else
- bfqq->requests_within_timer = 0;
- }
-
if (bfqd->low_latency) {
if (unlikely(time_is_after_jiffies(bfqq->split_time)))
/* wraparound */
@@ -1865,6 +1856,36 @@ static void bfq_reset_inject_limit(struct bfq_data *bfqd,
bfqq->decrease_time_jif = jiffies;
}
+static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
+{
+ u64 tot_io_time = now_ns - bfqq->io_start_time;
+
+ if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfqq->dispatched == 0)
+ bfqq->tot_idle_time +=
+ now_ns - bfqq->ttime.last_end_request;
+
+ if (unlikely(bfq_bfqq_just_created(bfqq)))
+ return;
+
+ /*
+ * Must be busy for at least about 80% of the time to be
+ * considered I/O bound.
+ */
+ if (bfqq->tot_idle_time * 5 > tot_io_time)
+ bfq_clear_bfqq_IO_bound(bfqq);
+ else
+ bfq_mark_bfqq_IO_bound(bfqq);
+
+ /*
+ * Keep an observation window of at most 200 ms in the past
+ * from now.
+ */
+ if (tot_io_time > 200 * NSEC_PER_MSEC) {
+ bfqq->io_start_time = now_ns - (tot_io_time>>1);
+ bfqq->tot_idle_time >>= 1;
+ }
+}
+
static void bfq_add_request(struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq);
@@ -1872,6 +1893,7 @@ static void bfq_add_request(struct request *rq)
struct request *next_rq, *prev;
unsigned int old_wr_coeff = bfqq->wr_coeff;
bool interactive = false;
+ u64 now_ns = ktime_get_ns();
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
bfqq->queued[rq_is_sync(rq)]++;
@@ -1934,7 +1956,7 @@ static void bfq_add_request(struct request *rq)
*/
if (bfqd->last_completed_rq_bfqq &&
!bfq_bfqq_has_short_ttime(bfqq) &&
- ktime_get_ns() - bfqd->last_completion <
+ now_ns - bfqd->last_completion <
4 * NSEC_PER_MSEC) {
if (bfqd->last_completed_rq_bfqq != bfqq &&
bfqd->last_completed_rq_bfqq !=
@@ -2051,6 +2073,9 @@ static void bfq_add_request(struct request *rq)
}
}
+ if (bfq_bfqq_sync(bfqq))
+ bfq_update_io_intensity(bfqq, now_ns);
+
elv_rb_add(&bfqq->sort_list, rq);
/*
@@ -2712,6 +2737,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
bic->saved_ttime = bfqq->ttime;
bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+ bic->saved_io_start_time = bfqq->io_start_time;
+ bic->saved_tot_idle_time = bfqq->tot_idle_time;
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
if (unlikely(bfq_bfqq_just_created(bfqq) &&
@@ -3979,10 +4006,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
bfq_bfqq_budget_left(bfqq) >= entity->budget / 3)))
bfq_bfqq_charge_time(bfqd, bfqq, delta);
- if (reason == BFQQE_TOO_IDLE &&
- entity->service <= 2 * entity->budget / 10)
- bfq_clear_bfqq_IO_bound(bfqq);
-
if (bfqd->low_latency && bfqq->wr_coeff == 1)
bfqq->last_wr_start_finish = jiffies;
@@ -5088,6 +5111,8 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_io_cq *bic, pid_t pid, int is_sync)
{
+ u64 now_ns = ktime_get_ns();
+
RB_CLEAR_NODE(&bfqq->entity.rb_node);
INIT_LIST_HEAD(&bfqq->fifo);
INIT_HLIST_NODE(&bfqq->burst_list_node);
@@ -5115,7 +5140,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_clear_bfqq_sync(bfqq);
/* set end request to minus infinity from now */
- bfqq->ttime.last_end_request = ktime_get_ns() + 1;
+ bfqq->ttime.last_end_request = now_ns + 1;
+
+ bfqq->io_start_time = now_ns;
bfq_mark_bfqq_IO_bound(bfqq);
@@ -6529,8 +6556,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->bfq_slice_idle = bfq_slice_idle;
bfqd->bfq_timeout = bfq_timeout;
- bfqd->bfq_requests_within_timer = 120;
-
bfqd->bfq_large_burst_thresh = 8;
bfqd->bfq_burst_interval = msecs_to_jiffies(180);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 703895224562..c913b06016b3 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -291,6 +291,11 @@ struct bfq_queue {
/* associated @bfq_ttime struct */
struct bfq_ttime ttime;
+ /* when bfqq started to do I/O within the last observation window */
+ u64 io_start_time;
+ /* how long bfqq has remained empty during the last observ. window */
+ u64 tot_idle_time;
+
/* bit vector: a 1 for each seeky requests in history */
u32 seek_history;
@@ -407,6 +412,9 @@ struct bfq_io_cq {
*/
bool saved_IO_bound;
+ u64 saved_io_start_time;
+ u64 saved_tot_idle_time;
+
/*
* Same purpose as the previous fields for the value of the
* field keeping the queue's belonging to a large burst
@@ -641,14 +649,6 @@ struct bfq_data {
*/
unsigned int bfq_timeout;
- /*
- * Number of consecutive requests that must be issued within
- * the idle time slice to set again idling to a queue which
- * was marked as non-I/O-bound (see the definition of the
- * IO_bound flag for further details).
- */
- unsigned int bfq_requests_within_timer;
-
/*
* Force device idling whenever needed to provide accurate
* service guarantees, without caring about throughput
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising Paolo Valente
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara
Upon an I/O-dispatch attempt, BFQ may detect that it was better to
plug I/O dispatch, and to wait for a new request to arrive for the
currently in-service queue. But the arrival of a new request for an
empty bfq_queue, and thus the switch from idle to busy of the
bfq_queue, may cause the scenario to change, and make plugging no
longer needed for service guarantees, or more convenient for
throughput. In this case, keeping I/O-dispatch plugged would certainly
lower throughput.
To address this issue, this commit makes such a check, and stops
plugging I/O if it is better to stop plugging I/O.
Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
block/bfq-iosched.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index db393f5d70ba..6a02a12ff553 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1649,6 +1649,8 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq,
return bfqq_weight > in_serv_weight;
}
+static bool bfq_better_to_idle(struct bfq_queue *bfqq);
+
static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
int old_wr_coeff,
@@ -1750,10 +1752,10 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
bfq_add_bfqq_busy(bfqd, bfqq);
/*
- * Expire in-service queue only if preemption may be needed
- * for guarantees. In particular, we care only about two
- * cases. The first is that bfqq has to recover a service
- * hole, as explained in the comments on
+ * Expire in-service queue if preemption may be needed for
+ * guarantees or throughput. As for guarantees, we care
+ * explicitly about two cases. The first is that bfqq has to
+ * recover a service hole, as explained in the comments on
* bfq_bfqq_update_budg_for_activation(), i.e., that
* bfqq_wants_to_preempt is true. However, if bfqq does not
* carry time-critical I/O, then bfqq's bandwidth is less
@@ -1780,11 +1782,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
* timestamps of the in-service queue would need to be
* updated, and this operation is quite costly (see the
* comments on bfq_bfqq_update_budg_for_activation()).
+ *
+ * As for throughput, we ask bfq_better_to_idle() whether we
+ * still need to plug I/O dispatching. If bfq_better_to_idle()
+ * says no, then plugging is not needed any longer, either to
+ * boost throughput or to perserve service guarantees. Then
+ * the best option is to stop plugging I/O, as not doing so
+ * would certainly lower throughput. We may end up in this
+ * case if: (1) upon a dispatch attempt, we detected that it
+ * was better to plug I/O dispatch, and to wait for a new
+ * request to arrive for the currently in-service queue, but
+ * (2) this switch of bfqq to busy changes the scenario.
*/
if (bfqd->in_service_queue &&
((bfqq_wants_to_preempt &&
bfqq->wr_coeff >= bfqd->in_service_queue->wr_coeff) ||
- bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue)) &&
+ bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue) ||
+ !bfq_better_to_idle(bfqd->in_service_queue)) &&
next_queue_may_preempt(bfqd))
bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
false, BFQQE_PREEMPTED);
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging Paolo Valente
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara
A bfq_queue may happen to be deemed as soft real-time while it is
still enjoying interactive weight-raising. If this happens because of
a false positive, then the bfq_queue is likely to loose its soft
real-time status soon. Upon losing such a status, the bfq_queue must
get back its interactive weight-raising, if its interactive period is
not over yet. But this case is not handled. This commit corrects this
error.
Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
block/bfq-iosched.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6a02a12ff553..9e5242b2788a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5293,8 +5293,26 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfqq->wr_coeff > 1 &&
bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
- BFQQ_TOTALLY_SEEKY(bfqq))
- bfq_bfqq_end_wr(bfqq);
+ BFQQ_TOTALLY_SEEKY(bfqq)) {
+ if (time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
+ bfq_wr_duration(bfqd))) {
+ /*
+ * In soft_rt weight raising with the
+ * interactive-weight-raising period
+ * elapsed (so no switch back to
+ * interactive weight raising).
+ */
+ bfq_bfqq_end_wr(bfqq);
+ } else { /*
+ * stopping soft_rt weight raising
+ * while still in interactive period,
+ * switch back to interactive weight
+ * raising
+ */
+ switch_back_to_interactive_wr(bfqq, bfqd);
+ bfqq->entity.prio_changed = 1;
+ }
+ }
}
static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
` (2 preceding siblings ...)
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state " Paolo Valente
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara
To prevent weight-raising information from being lost on bfq_queue merging,
also the amount of service that a bfq_queue receives must be saved and
restored when the bfq_queue is merged and split, respectively.
Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
block/bfq-iosched.c | 2 ++
block/bfq-iosched.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 9e5242b2788a..56ad6067d41d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1029,6 +1029,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
bfqq->io_start_time = bic->saved_io_start_time;
bfqq->tot_idle_time = bic->saved_tot_idle_time;
bfqq->wr_coeff = bic->saved_wr_coeff;
+ bfqq->service_from_wr = bic->saved_service_from_wr;
bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
@@ -2775,6 +2776,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
bic->saved_wr_coeff = bfqq->wr_coeff;
bic->saved_wr_start_at_switch_to_srt =
bfqq->wr_start_at_switch_to_srt;
+ bic->saved_service_from_wr = bfqq->service_from_wr;
bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index c913b06016b3..d15299d59f89 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -440,6 +440,7 @@ struct bfq_io_cq {
*/
unsigned long saved_wr_coeff;
unsigned long saved_last_wr_start_finish;
+ unsigned long saved_service_from_wr;
unsigned long saved_wr_start_at_switch_to_srt;
unsigned int saved_wr_cur_max_time;
struct bfq_ttime saved_ttime;
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state on queue merging
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
` (3 preceding siblings ...)
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust Paolo Valente
2021-01-25 21:18 ` [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Jens Axboe
6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara
To prevent injection information from being lost on bfq_queue merging,
also the amount of service that a bfq_queue receives must be saved and
restored when the bfq_queue is merged and split, respectively.
Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
block/bfq-iosched.c | 8 ++++++++
block/bfq-iosched.h | 5 +++++
2 files changed, 13 insertions(+)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 56ad6067d41d..e56ee60df014 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1024,6 +1024,10 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
else
bfq_clear_bfqq_IO_bound(bfqq);
+ bfqq->last_serv_time_ns = bic->saved_last_serv_time_ns;
+ bfqq->inject_limit = bic->saved_inject_limit;
+ bfqq->decrease_time_jif = bic->saved_decrease_time_jif;
+
bfqq->entity.new_weight = bic->saved_weight;
bfqq->ttime = bic->saved_ttime;
bfqq->io_start_time = bic->saved_io_start_time;
@@ -2748,6 +2752,10 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
if (!bic)
return;
+ bic->saved_last_serv_time_ns = bfqq->last_serv_time_ns;
+ bic->saved_inject_limit = bfqq->inject_limit;
+ bic->saved_decrease_time_jif = bfqq->decrease_time_jif;
+
bic->saved_weight = bfqq->entity.orig_weight;
bic->saved_ttime = bfqq->ttime;
bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index d15299d59f89..3f350fa3c5fd 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -444,6 +444,11 @@ struct bfq_io_cq {
unsigned long saved_wr_start_at_switch_to_srt;
unsigned int saved_wr_cur_max_time;
struct bfq_ttime saved_ttime;
+
+ /* Save also injection state */
+ u64 saved_last_serv_time_ns;
+ unsigned int saved_inject_limit;
+ unsigned long saved_decrease_time_jif;
};
/**
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
` (4 preceding siblings ...)
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state " Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
2021-01-25 21:18 ` [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Jens Axboe
6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara
In the presence of many parallel I/O flows, the detection of waker
bfq_queues suffers from false positives. This commits addresses this
issue by making the filtering of actual wakers more selective. In more
detail, a candidate waker must be found to meet waker requirements
three times before being promoted to actual waker.
Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
block/bfq-iosched.c | 211 +++++++++++++++++++++-----------------------
block/bfq-iosched.h | 7 +-
2 files changed, 108 insertions(+), 110 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e56ee60df014..445cef9c0bb9 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -158,7 +158,6 @@ BFQ_BFQQ_FNS(in_large_burst);
BFQ_BFQQ_FNS(coop);
BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update);
-BFQ_BFQQ_FNS(has_waker);
#undef BFQ_BFQQ_FNS \
/* Expiration time of sync (0) and async (1) requests, in ns. */
@@ -1905,6 +1904,107 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
}
}
+/*
+ * Detect whether bfqq's I/O seems synchronized with that of some
+ * other queue, i.e., whether bfqq, after remaining empty, happens to
+ * receive new I/O only right after some I/O request of the other
+ * queue has been completed. We call waker queue the other queue, and
+ * we assume, for simplicity, that bfqq may have at most one waker
+ * queue.
+ *
+ * A remarkable throughput boost can be reached by unconditionally
+ * injecting the I/O of the waker queue, every time a new
+ * bfq_dispatch_request happens to be invoked while I/O is being
+ * plugged for bfqq. In addition to boosting throughput, this
+ * unblocks bfqq's I/O, thereby improving bandwidth and latency for
+ * bfqq. Note that these same results may be achieved with the general
+ * injection mechanism, but less effectively. For details on this
+ * aspect, see the comments on the choice of the queue for injection
+ * in bfq_select_queue().
+ *
+ * Turning back to the detection of a waker queue, a queue Q is deemed
+ * as a waker queue for bfqq if, for three consecutive times, bfqq
+ * happens to become non empty right after a request of Q has been
+ * completed. In particular, on the first time, Q is tentatively set
+ * as a candidate waker queue, while on the third consecutive time
+ * that Q is detected, the field waker_bfqq is set to Q, to confirm
+ * that Q is a waker queue for bfqq. These detection steps are
+ * performed only if bfqq has a long think time, so as to make it more
+ * likely that bfqq's I/O is actually being blocked by a
+ * synchronization. This last filter, plus the above three-times
+ * requirement, make false positives less likely.
+ *
+ * NOTE
+ *
+ * The sooner a waker queue is detected, the sooner throughput can be
+ * boosted by injecting I/O from the waker queue. Fortunately,
+ * detection is likely to be actually fast, for the following
+ * reasons. While blocked by synchronization, bfqq has a long think
+ * time. This implies that bfqq's inject limit is at least equal to 1
+ * (see the comments in bfq_update_inject_limit()). So, thanks to
+ * injection, the waker queue is likely to be served during the very
+ * first I/O-plugging time interval for bfqq. This triggers the first
+ * step of the detection mechanism. Thanks again to injection, the
+ * candidate waker queue is then likely to be confirmed no later than
+ * during the next I/O-plugging interval for bfqq.
+ *
+ * ISSUE
+ *
+ * On queue merging all waker information is lost.
+ */
+void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, u64 now_ns)
+{
+ if (!bfqd->last_completed_rq_bfqq ||
+ bfqd->last_completed_rq_bfqq == bfqq ||
+ bfq_bfqq_has_short_ttime(bfqq) ||
+ now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
+ bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+ return;
+
+ if (bfqd->last_completed_rq_bfqq !=
+ bfqq->tentative_waker_bfqq) {
+ /*
+ * First synchronization detected with a
+ * candidate waker queue, or with a different
+ * candidate waker queue from the current one.
+ */
+ bfqq->tentative_waker_bfqq =
+ bfqd->last_completed_rq_bfqq;
+ bfqq->num_waker_detections = 1;
+ } else /* Same tentative waker queue detected again */
+ bfqq->num_waker_detections++;
+
+ if (bfqq->num_waker_detections == 3) {
+ bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
+ bfqq->tentative_waker_bfqq = NULL;
+
+ /*
+ * If the waker queue disappears, then
+ * bfqq->waker_bfqq must be reset. To
+ * this goal, we maintain in each
+ * waker queue a list, woken_list, of
+ * all the queues that reference the
+ * waker queue through their
+ * waker_bfqq pointer. When the waker
+ * queue exits, the waker_bfqq pointer
+ * of all the queues in the woken_list
+ * is reset.
+ *
+ * In addition, if bfqq is already in
+ * the woken_list of a waker queue,
+ * then, before being inserted into
+ * the woken_list of a new waker
+ * queue, bfqq must be removed from
+ * the woken_list of the old waker
+ * queue.
+ */
+ if (!hlist_unhashed(&bfqq->woken_list_node))
+ hlist_del_init(&bfqq->woken_list_node);
+ hlist_add_head(&bfqq->woken_list_node,
+ &bfqd->last_completed_rq_bfqq->woken_list);
+ }
+}
+
static void bfq_add_request(struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq);
@@ -1919,111 +2019,7 @@ static void bfq_add_request(struct request *rq)
bfqd->queued++;
if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
- /*
- * Detect whether bfqq's I/O seems synchronized with
- * that of some other queue, i.e., whether bfqq, after
- * remaining empty, happens to receive new I/O only
- * right after some I/O request of the other queue has
- * been completed. We call waker queue the other
- * queue, and we assume, for simplicity, that bfqq may
- * have at most one waker queue.
- *
- * A remarkable throughput boost can be reached by
- * unconditionally injecting the I/O of the waker
- * queue, every time a new bfq_dispatch_request
- * happens to be invoked while I/O is being plugged
- * for bfqq. In addition to boosting throughput, this
- * unblocks bfqq's I/O, thereby improving bandwidth
- * and latency for bfqq. Note that these same results
- * may be achieved with the general injection
- * mechanism, but less effectively. For details on
- * this aspect, see the comments on the choice of the
- * queue for injection in bfq_select_queue().
- *
- * Turning back to the detection of a waker queue, a
- * queue Q is deemed as a waker queue for bfqq if, for
- * two consecutive times, bfqq happens to become non
- * empty right after a request of Q has been
- * completed. In particular, on the first time, Q is
- * tentatively set as a candidate waker queue, while
- * on the second time, the flag
- * bfq_bfqq_has_waker(bfqq) is set to confirm that Q
- * is a waker queue for bfqq. These detection steps
- * are performed only if bfqq has a long think time,
- * so as to make it more likely that bfqq's I/O is
- * actually being blocked by a synchronization. This
- * last filter, plus the above two-times requirement,
- * make false positives less likely.
- *
- * NOTE
- *
- * The sooner a waker queue is detected, the sooner
- * throughput can be boosted by injecting I/O from the
- * waker queue. Fortunately, detection is likely to be
- * actually fast, for the following reasons. While
- * blocked by synchronization, bfqq has a long think
- * time. This implies that bfqq's inject limit is at
- * least equal to 1 (see the comments in
- * bfq_update_inject_limit()). So, thanks to
- * injection, the waker queue is likely to be served
- * during the very first I/O-plugging time interval
- * for bfqq. This triggers the first step of the
- * detection mechanism. Thanks again to injection, the
- * candidate waker queue is then likely to be
- * confirmed no later than during the next
- * I/O-plugging interval for bfqq.
- */
- if (bfqd->last_completed_rq_bfqq &&
- !bfq_bfqq_has_short_ttime(bfqq) &&
- now_ns - bfqd->last_completion <
- 4 * NSEC_PER_MSEC) {
- if (bfqd->last_completed_rq_bfqq != bfqq &&
- bfqd->last_completed_rq_bfqq !=
- bfqq->waker_bfqq) {
- /*
- * First synchronization detected with
- * a candidate waker queue, or with a
- * different candidate waker queue
- * from the current one.
- */
- bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
-
- /*
- * If the waker queue disappears, then
- * bfqq->waker_bfqq must be reset. To
- * this goal, we maintain in each
- * waker queue a list, woken_list, of
- * all the queues that reference the
- * waker queue through their
- * waker_bfqq pointer. When the waker
- * queue exits, the waker_bfqq pointer
- * of all the queues in the woken_list
- * is reset.
- *
- * In addition, if bfqq is already in
- * the woken_list of a waker queue,
- * then, before being inserted into
- * the woken_list of a new waker
- * queue, bfqq must be removed from
- * the woken_list of the old waker
- * queue.
- */
- if (!hlist_unhashed(&bfqq->woken_list_node))
- hlist_del_init(&bfqq->woken_list_node);
- hlist_add_head(&bfqq->woken_list_node,
- &bfqd->last_completed_rq_bfqq->woken_list);
-
- bfq_clear_bfqq_has_waker(bfqq);
- } else if (bfqd->last_completed_rq_bfqq ==
- bfqq->waker_bfqq &&
- !bfq_bfqq_has_waker(bfqq)) {
- /*
- * synchronization with waker_bfqq
- * seen for the second time
- */
- bfq_mark_bfqq_has_waker(bfqq);
- }
- }
+ bfq_check_waker(bfqd, bfqq, now_ns);
/*
* Periodically reset inject limit, to make sure that
@@ -4569,7 +4565,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
bfq_bfqq_budget_left(async_bfqq))
bfqq = bfqq->bic->bfqq[0];
- else if (bfq_bfqq_has_waker(bfqq) &&
+ else if (bfqq->waker_bfqq &&
bfq_bfqq_busy(bfqq->waker_bfqq) &&
bfqq->waker_bfqq->next_rq &&
bfq_serv_to_charge(bfqq->waker_bfqq->next_rq,
@@ -4976,7 +4972,6 @@ void bfq_put_queue(struct bfq_queue *bfqq)
hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
woken_list_node) {
item->waker_bfqq = NULL;
- bfq_clear_bfqq_has_waker(item);
hlist_del_init(&item->woken_list_node);
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 3f350fa3c5fd..b8e793c34ff1 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -376,6 +376,11 @@ struct bfq_queue {
* bfq_select_queue().
*/
struct bfq_queue *waker_bfqq;
+ /* pointer to the curr. tentative waker queue, see bfq_check_waker() */
+ struct bfq_queue *tentative_waker_bfqq;
+ /* number of times the same tentative waker has been detected */
+ unsigned int num_waker_detections;
+
/* node for woken_list, see below */
struct hlist_node woken_list_node;
/*
@@ -776,7 +781,6 @@ enum bfqq_state_flags {
*/
BFQQF_coop, /* bfqq is shared */
BFQQF_split_coop, /* shared bfqq will be split */
- BFQQF_has_waker /* bfqq has a waker queue */
};
#define BFQ_BFQQ_FNS(name) \
@@ -796,7 +800,6 @@ BFQ_BFQQ_FNS(in_large_burst);
BFQ_BFQQ_FNS(coop);
BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update);
-BFQ_BFQQ_FNS(has_waker);
#undef BFQ_BFQQ_FNS
/* Expiration reasons. */
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
` (5 preceding siblings ...)
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust Paolo Valente
@ 2021-01-25 21:18 ` Jens Axboe
6 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2021-01-25 21:18 UTC (permalink / raw)
To: Paolo Valente; +Cc: linux-block, linux-kernel
On 1/25/21 12:02 PM, Paolo Valente wrote:
> Hi,
> here's batch 2/3.
>
> Thanks,
> Paolo
>
> Paolo Valente (6):
> block, bfq: replace mechanism for evaluating I/O intensity
> block, bfq: re-evaluate convenience of I/O plugging on rq arrivals
> block, bfq: fix switch back from soft-rt weitgh-raising
> block, bfq: save also weight-raised service on queue merging
> block, bfq: save also injection state on queue merging
> block, bfq: make waker-queue detection more robust
>
> block/bfq-iosched.c | 328 ++++++++++++++++++++++++++------------------
> block/bfq-iosched.h | 29 ++--
> 2 files changed, 214 insertions(+), 143 deletions(-)
Applied, thanks.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2021-01-26 12:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state " Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust Paolo Valente
2021-01-25 21:18 ` [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).