[PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements
@ 2021-01-25 19:02 Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente

Hi,
here's batch 2/3.

Thanks,
Paolo

Paolo Valente (6):
  block, bfq: replace mechanism for evaluating I/O intensity
  block, bfq: re-evaluate convenience of I/O plugging on rq arrivals
  block, bfq: fix switch back from soft-rt weitgh-raising
  block, bfq: save also weight-raised service on queue merging
  block, bfq: save also injection state on queue merging
  block, bfq: make waker-queue detection more robust

 block/bfq-iosched.c | 328 ++++++++++++++++++++++++++------------------
 block/bfq-iosched.h |  29 ++--
 2 files changed, 214 insertions(+), 143 deletions(-)

--
2.20.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals Paolo Valente
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara

Some BFQ mechanisms make their decisions on a bfq_queue basing also on
whether the bfq_queue is I/O bound. In this respect, the current logic
for evaluating whether a bfq_queue is I/O bound is rather rough. This
commits replaces this logic with a more effective one.

The new logic measures the percentage of time during which a bfq_queue
is active, and marks the bfq_queue as I/O bound if the latter if this
percentage is above a fixed threshold.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 63 +++++++++++++++++++++++++++++++--------------
 block/bfq-iosched.h | 16 ++++++------
 2 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c045613ce927..db393f5d70ba 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1026,6 +1026,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
 
 	bfqq->entity.new_weight = bic->saved_weight;
 	bfqq->ttime = bic->saved_ttime;
+	bfqq->io_start_time = bic->saved_io_start_time;
+	bfqq->tot_idle_time = bic->saved_tot_idle_time;
 	bfqq->wr_coeff = bic->saved_wr_coeff;
 	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
 	bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
@@ -1721,17 +1723,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 
 	bfq_clear_bfqq_just_created(bfqq);
 
-
-	if (!bfq_bfqq_IO_bound(bfqq)) {
-		if (arrived_in_time) {
-			bfqq->requests_within_timer++;
-			if (bfqq->requests_within_timer >=
-			    bfqd->bfq_requests_within_timer)
-				bfq_mark_bfqq_IO_bound(bfqq);
-		} else
-			bfqq->requests_within_timer = 0;
-	}
-
 	if (bfqd->low_latency) {
 		if (unlikely(time_is_after_jiffies(bfqq->split_time)))
 			/* wraparound */
@@ -1865,6 +1856,36 @@ static void bfq_reset_inject_limit(struct bfq_data *bfqd,
 	bfqq->decrease_time_jif = jiffies;
 }
 
+static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
+{
+	u64 tot_io_time = now_ns - bfqq->io_start_time;
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfqq->dispatched == 0)
+		bfqq->tot_idle_time +=
+			now_ns - bfqq->ttime.last_end_request;
+
+	if (unlikely(bfq_bfqq_just_created(bfqq)))
+		return;
+
+	/*
+	 * Must be busy for at least about 80% of the time to be
+	 * considered I/O bound.
+	 */
+	if (bfqq->tot_idle_time * 5 > tot_io_time)
+		bfq_clear_bfqq_IO_bound(bfqq);
+	else
+		bfq_mark_bfqq_IO_bound(bfqq);
+
+	/*
+	 * Keep an observation window of at most 200 ms in the past
+	 * from now.
+	 */
+	if (tot_io_time > 200 * NSEC_PER_MSEC) {
+		bfqq->io_start_time = now_ns - (tot_io_time>>1);
+		bfqq->tot_idle_time >>= 1;
+	}
+}
+
 static void bfq_add_request(struct request *rq)
 {
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
@@ -1872,6 +1893,7 @@ static void bfq_add_request(struct request *rq)
 	struct request *next_rq, *prev;
 	unsigned int old_wr_coeff = bfqq->wr_coeff;
 	bool interactive = false;
+	u64 now_ns = ktime_get_ns();
 
 	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
 	bfqq->queued[rq_is_sync(rq)]++;
@@ -1934,7 +1956,7 @@ static void bfq_add_request(struct request *rq)
 		 */
 		if (bfqd->last_completed_rq_bfqq &&
 		    !bfq_bfqq_has_short_ttime(bfqq) &&
-		    ktime_get_ns() - bfqd->last_completion <
+		    now_ns - bfqd->last_completion <
 		    4 * NSEC_PER_MSEC) {
 			if (bfqd->last_completed_rq_bfqq != bfqq &&
 			    bfqd->last_completed_rq_bfqq !=
@@ -2051,6 +2073,9 @@ static void bfq_add_request(struct request *rq)
 		}
 	}
 
+	if (bfq_bfqq_sync(bfqq))
+		bfq_update_io_intensity(bfqq, now_ns);
+
 	elv_rb_add(&bfqq->sort_list, rq);
 
 	/*
@@ -2712,6 +2737,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
 	bic->saved_ttime = bfqq->ttime;
 	bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
 	bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+	bic->saved_io_start_time = bfqq->io_start_time;
+	bic->saved_tot_idle_time = bfqq->tot_idle_time;
 	bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
 	bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
 	if (unlikely(bfq_bfqq_just_created(bfqq) &&
@@ -3979,10 +4006,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 	      bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3)))
 		bfq_bfqq_charge_time(bfqd, bfqq, delta);
 
-	if (reason == BFQQE_TOO_IDLE &&
-	    entity->service <= 2 * entity->budget / 10)
-		bfq_clear_bfqq_IO_bound(bfqq);
-
 	if (bfqd->low_latency && bfqq->wr_coeff == 1)
 		bfqq->last_wr_start_finish = jiffies;
 
@@ -5088,6 +5111,8 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
 static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			  struct bfq_io_cq *bic, pid_t pid, int is_sync)
 {
+	u64 now_ns = ktime_get_ns();
+
 	RB_CLEAR_NODE(&bfqq->entity.rb_node);
 	INIT_LIST_HEAD(&bfqq->fifo);
 	INIT_HLIST_NODE(&bfqq->burst_list_node);
@@ -5115,7 +5140,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		bfq_clear_bfqq_sync(bfqq);
 
 	/* set end request to minus infinity from now */
-	bfqq->ttime.last_end_request = ktime_get_ns() + 1;
+	bfqq->ttime.last_end_request = now_ns + 1;
+
+	bfqq->io_start_time = now_ns;
 
 	bfq_mark_bfqq_IO_bound(bfqq);
 
@@ -6529,8 +6556,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
 	bfqd->bfq_slice_idle = bfq_slice_idle;
 	bfqd->bfq_timeout = bfq_timeout;
 
-	bfqd->bfq_requests_within_timer = 120;
-
 	bfqd->bfq_large_burst_thresh = 8;
 	bfqd->bfq_burst_interval = msecs_to_jiffies(180);
 
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 703895224562..c913b06016b3 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -291,6 +291,11 @@ struct bfq_queue {
 	/* associated @bfq_ttime struct */
 	struct bfq_ttime ttime;
 
+	/* when bfqq started to do I/O within the last observation window */
+	u64 io_start_time;
+	/* how long bfqq has remained empty during the last observ. window */
+	u64 tot_idle_time;
+
 	/* bit vector: a 1 for each seeky requests in history */
 	u32 seek_history;
 
@@ -407,6 +412,9 @@ struct bfq_io_cq {
 	 */
 	bool saved_IO_bound;
 
+	u64 saved_io_start_time;
+	u64 saved_tot_idle_time;
+
 	/*
 	 * Same purpose as the previous fields for the value of the
 	 * field keeping the queue's belonging to a large burst
@@ -641,14 +649,6 @@ struct bfq_data {
 	 */
 	unsigned int bfq_timeout;
 
-	/*
-	 * Number of consecutive requests that must be issued within
-	 * the idle time slice to set again idling to a queue which
-	 * was marked as non-I/O-bound (see the definition of the
-	 * IO_bound flag for further details).
-	 */
-	unsigned int bfq_requests_within_timer;
-
 	/*
 	 * Force device idling whenever needed to provide accurate
 	 * service guarantees, without caring about throughput
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising Paolo Valente
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara

Upon an I/O-dispatch attempt, BFQ may detect that it was better to
plug I/O dispatch, and to wait for a new request to arrive for the
currently in-service queue. But the arrival of a new request for an
empty bfq_queue, and thus the switch from idle to busy of the
bfq_queue, may cause the scenario to change, and make plugging no
longer needed for service guarantees, or more convenient for
throughput. In this case, keeping I/O-dispatch plugged would certainly
lower throughput.

To address this issue, this commit makes such a check, and stops
plugging I/O if it is better to stop plugging I/O.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index db393f5d70ba..6a02a12ff553 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1649,6 +1649,8 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq,
 	return bfqq_weight > in_serv_weight;
 }
 
+static bool bfq_better_to_idle(struct bfq_queue *bfqq);
+
 static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 					     struct bfq_queue *bfqq,
 					     int old_wr_coeff,
@@ -1750,10 +1752,10 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 	bfq_add_bfqq_busy(bfqd, bfqq);
 
 	/*
-	 * Expire in-service queue only if preemption may be needed
-	 * for guarantees. In particular, we care only about two
-	 * cases. The first is that bfqq has to recover a service
-	 * hole, as explained in the comments on
+	 * Expire in-service queue if preemption may be needed for
+	 * guarantees or throughput. As for guarantees, we care
+	 * explicitly about two cases. The first is that bfqq has to
+	 * recover a service hole, as explained in the comments on
 	 * bfq_bfqq_update_budg_for_activation(), i.e., that
 	 * bfqq_wants_to_preempt is true. However, if bfqq does not
 	 * carry time-critical I/O, then bfqq's bandwidth is less
@@ -1780,11 +1782,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 	 * timestamps of the in-service queue would need to be
 	 * updated, and this operation is quite costly (see the
 	 * comments on bfq_bfqq_update_budg_for_activation()).
+	 *
+	 * As for throughput, we ask bfq_better_to_idle() whether we
+	 * still need to plug I/O dispatching. If bfq_better_to_idle()
+	 * says no, then plugging is not needed any longer, either to
+	 * boost throughput or to perserve service guarantees. Then
+	 * the best option is to stop plugging I/O, as not doing so
+	 * would certainly lower throughput. We may end up in this
+	 * case if: (1) upon a dispatch attempt, we detected that it
+	 * was better to plug I/O dispatch, and to wait for a new
+	 * request to arrive for the currently in-service queue, but
+	 * (2) this switch of bfqq to busy changes the scenario.
 	 */
 	if (bfqd->in_service_queue &&
 	    ((bfqq_wants_to_preempt &&
 	      bfqq->wr_coeff >= bfqd->in_service_queue->wr_coeff) ||
-	     bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue)) &&
+	     bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue) ||
+	     !bfq_better_to_idle(bfqd->in_service_queue)) &&
 	    next_queue_may_preempt(bfqd))
 		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
 				false, BFQQE_PREEMPTED);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging Paolo Valente
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara

A bfq_queue may happen to be deemed as soft real-time while it is
still enjoying interactive weight-raising. If this happens because of
a false positive, then the bfq_queue is likely to loose its soft
real-time status soon. Upon losing such a status, the bfq_queue must
get back its interactive weight-raising, if its interactive period is
not over yet. But this case is not handled. This commit corrects this
error.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6a02a12ff553..9e5242b2788a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5293,8 +5293,26 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 
 	if (bfqq->wr_coeff > 1 &&
 	    bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
-	    BFQQ_TOTALLY_SEEKY(bfqq))
-		bfq_bfqq_end_wr(bfqq);
+	    BFQQ_TOTALLY_SEEKY(bfqq)) {
+		if (time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
+					   bfq_wr_duration(bfqd))) {
+			/*
+			 * In soft_rt weight raising with the
+			 * interactive-weight-raising period
+			 * elapsed (so no switch back to
+			 * interactive weight raising).
+			 */
+			bfq_bfqq_end_wr(bfqq);
+		} else { /*
+			  * stopping soft_rt weight raising
+			  * while still in interactive period,
+			  * switch back to interactive weight
+			  * raising
+			  */
+			switch_back_to_interactive_wr(bfqq, bfqd);
+			bfqq->entity.prio_changed = 1;
+		}
+	}
 }
 
 static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
                   ` (2 preceding siblings ...)
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state " Paolo Valente
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara

To prevent weight-raising information from being lost on bfq_queue merging,
also the amount of service that a bfq_queue receives must be saved and
restored when the bfq_queue is merged and split, respectively.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 ++
 block/bfq-iosched.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 9e5242b2788a..56ad6067d41d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1029,6 +1029,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
 	bfqq->io_start_time = bic->saved_io_start_time;
 	bfqq->tot_idle_time = bic->saved_tot_idle_time;
 	bfqq->wr_coeff = bic->saved_wr_coeff;
+	bfqq->service_from_wr = bic->saved_service_from_wr;
 	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
 	bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
 	bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
@@ -2775,6 +2776,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
 		bic->saved_wr_coeff = bfqq->wr_coeff;
 		bic->saved_wr_start_at_switch_to_srt =
 			bfqq->wr_start_at_switch_to_srt;
+		bic->saved_service_from_wr = bfqq->service_from_wr;
 		bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
 		bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
 	}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index c913b06016b3..d15299d59f89 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -440,6 +440,7 @@ struct bfq_io_cq {
 	 */
 	unsigned long saved_wr_coeff;
 	unsigned long saved_last_wr_start_finish;
+	unsigned long saved_service_from_wr;
 	unsigned long saved_wr_start_at_switch_to_srt;
 	unsigned int saved_wr_cur_max_time;
 	struct bfq_ttime saved_ttime;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state on queue merging
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
                   ` (3 preceding siblings ...)
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust Paolo Valente
  2021-01-25 21:18 ` [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Jens Axboe
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara

To prevent injection information from being lost on bfq_queue merging,
also the amount of service that a bfq_queue receives must be saved and
restored when the bfq_queue is merged and split, respectively.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 8 ++++++++
 block/bfq-iosched.h | 5 +++++
 2 files changed, 13 insertions(+)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 56ad6067d41d..e56ee60df014 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1024,6 +1024,10 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
 	else
 		bfq_clear_bfqq_IO_bound(bfqq);
 
+	bfqq->last_serv_time_ns = bic->saved_last_serv_time_ns;
+	bfqq->inject_limit = bic->saved_inject_limit;
+	bfqq->decrease_time_jif = bic->saved_decrease_time_jif;
+
 	bfqq->entity.new_weight = bic->saved_weight;
 	bfqq->ttime = bic->saved_ttime;
 	bfqq->io_start_time = bic->saved_io_start_time;
@@ -2748,6 +2752,10 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
 	if (!bic)
 		return;
 
+	bic->saved_last_serv_time_ns = bfqq->last_serv_time_ns;
+	bic->saved_inject_limit = bfqq->inject_limit;
+	bic->saved_decrease_time_jif = bfqq->decrease_time_jif;
+
 	bic->saved_weight = bfqq->entity.orig_weight;
 	bic->saved_ttime = bfqq->ttime;
 	bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index d15299d59f89..3f350fa3c5fd 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -444,6 +444,11 @@ struct bfq_io_cq {
 	unsigned long saved_wr_start_at_switch_to_srt;
 	unsigned int saved_wr_cur_max_time;
 	struct bfq_ttime saved_ttime;
+
+	/* Save also injection state */
+	u64 saved_last_serv_time_ns;
+	unsigned int saved_inject_limit;
+	unsigned long saved_decrease_time_jif;
 };
 
 /**
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
                   ` (4 preceding siblings ...)
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state " Paolo Valente
@ 2021-01-25 19:02 ` Paolo Valente
  2021-01-25 21:18 ` [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Jens Axboe
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Valente @ 2021-01-25 19:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Paolo Valente, Jan Kara

In the presence of many parallel I/O flows, the detection of waker
bfq_queues suffers from false positives. This commits addresses this
issue by making the filtering of actual wakers more selective. In more
detail, a candidate waker must be found to meet waker requirements
three times before being promoted to actual waker.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 211 +++++++++++++++++++++-----------------------
 block/bfq-iosched.h |   7 +-
 2 files changed, 108 insertions(+), 110 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e56ee60df014..445cef9c0bb9 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -158,7 +158,6 @@ BFQ_BFQQ_FNS(in_large_burst);
 BFQ_BFQQ_FNS(coop);
 BFQ_BFQQ_FNS(split_coop);
 BFQ_BFQQ_FNS(softrt_update);
-BFQ_BFQQ_FNS(has_waker);
 #undef BFQ_BFQQ_FNS						\
 
 /* Expiration time of sync (0) and async (1) requests, in ns. */
@@ -1905,6 +1904,107 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
 	}
 }
 
+/*
+ * Detect whether bfqq's I/O seems synchronized with that of some
+ * other queue, i.e., whether bfqq, after remaining empty, happens to
+ * receive new I/O only right after some I/O request of the other
+ * queue has been completed. We call waker queue the other queue, and
+ * we assume, for simplicity, that bfqq may have at most one waker
+ * queue.
+ *
+ * A remarkable throughput boost can be reached by unconditionally
+ * injecting the I/O of the waker queue, every time a new
+ * bfq_dispatch_request happens to be invoked while I/O is being
+ * plugged for bfqq.  In addition to boosting throughput, this
+ * unblocks bfqq's I/O, thereby improving bandwidth and latency for
+ * bfqq. Note that these same results may be achieved with the general
+ * injection mechanism, but less effectively. For details on this
+ * aspect, see the comments on the choice of the queue for injection
+ * in bfq_select_queue().
+ *
+ * Turning back to the detection of a waker queue, a queue Q is deemed
+ * as a waker queue for bfqq if, for three consecutive times, bfqq
+ * happens to become non empty right after a request of Q has been
+ * completed. In particular, on the first time, Q is tentatively set
+ * as a candidate waker queue, while on the third consecutive time
+ * that Q is detected, the field waker_bfqq is set to Q, to confirm
+ * that Q is a waker queue for bfqq. These detection steps are
+ * performed only if bfqq has a long think time, so as to make it more
+ * likely that bfqq's I/O is actually being blocked by a
+ * synchronization. This last filter, plus the above three-times
+ * requirement, make false positives less likely.
+ *
+ * NOTE
+ *
+ * The sooner a waker queue is detected, the sooner throughput can be
+ * boosted by injecting I/O from the waker queue. Fortunately,
+ * detection is likely to be actually fast, for the following
+ * reasons. While blocked by synchronization, bfqq has a long think
+ * time. This implies that bfqq's inject limit is at least equal to 1
+ * (see the comments in bfq_update_inject_limit()). So, thanks to
+ * injection, the waker queue is likely to be served during the very
+ * first I/O-plugging time interval for bfqq. This triggers the first
+ * step of the detection mechanism. Thanks again to injection, the
+ * candidate waker queue is then likely to be confirmed no later than
+ * during the next I/O-plugging interval for bfqq.
+ *
+ * ISSUE
+ *
+ * On queue merging all waker information is lost.
+ */
+void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, u64 now_ns)
+{
+	if (!bfqd->last_completed_rq_bfqq ||
+	    bfqd->last_completed_rq_bfqq == bfqq ||
+	    bfq_bfqq_has_short_ttime(bfqq) ||
+	    now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
+	    bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+		return;
+
+	if (bfqd->last_completed_rq_bfqq !=
+	    bfqq->tentative_waker_bfqq) {
+		/*
+		 * First synchronization detected with a
+		 * candidate waker queue, or with a different
+		 * candidate waker queue from the current one.
+		 */
+		bfqq->tentative_waker_bfqq =
+			bfqd->last_completed_rq_bfqq;
+		bfqq->num_waker_detections = 1;
+	} else /* Same tentative waker queue detected again */
+		bfqq->num_waker_detections++;
+
+	if (bfqq->num_waker_detections == 3) {
+		bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
+		bfqq->tentative_waker_bfqq = NULL;
+
+		/*
+		 * If the waker queue disappears, then
+		 * bfqq->waker_bfqq must be reset. To
+		 * this goal, we maintain in each
+		 * waker queue a list, woken_list, of
+		 * all the queues that reference the
+		 * waker queue through their
+		 * waker_bfqq pointer. When the waker
+		 * queue exits, the waker_bfqq pointer
+		 * of all the queues in the woken_list
+		 * is reset.
+		 *
+		 * In addition, if bfqq is already in
+		 * the woken_list of a waker queue,
+		 * then, before being inserted into
+		 * the woken_list of a new waker
+		 * queue, bfqq must be removed from
+		 * the woken_list of the old waker
+		 * queue.
+		 */
+		if (!hlist_unhashed(&bfqq->woken_list_node))
+			hlist_del_init(&bfqq->woken_list_node);
+		hlist_add_head(&bfqq->woken_list_node,
+			       &bfqd->last_completed_rq_bfqq->woken_list);
+	}
+}
+
 static void bfq_add_request(struct request *rq)
 {
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
@@ -1919,111 +2019,7 @@ static void bfq_add_request(struct request *rq)
 	bfqd->queued++;
 
 	if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
-		/*
-		 * Detect whether bfqq's I/O seems synchronized with
-		 * that of some other queue, i.e., whether bfqq, after
-		 * remaining empty, happens to receive new I/O only
-		 * right after some I/O request of the other queue has
-		 * been completed. We call waker queue the other
-		 * queue, and we assume, for simplicity, that bfqq may
-		 * have at most one waker queue.
-		 *
-		 * A remarkable throughput boost can be reached by
-		 * unconditionally injecting the I/O of the waker
-		 * queue, every time a new bfq_dispatch_request
-		 * happens to be invoked while I/O is being plugged
-		 * for bfqq.  In addition to boosting throughput, this
-		 * unblocks bfqq's I/O, thereby improving bandwidth
-		 * and latency for bfqq. Note that these same results
-		 * may be achieved with the general injection
-		 * mechanism, but less effectively. For details on
-		 * this aspect, see the comments on the choice of the
-		 * queue for injection in bfq_select_queue().
-		 *
-		 * Turning back to the detection of a waker queue, a
-		 * queue Q is deemed as a waker queue for bfqq if, for
-		 * two consecutive times, bfqq happens to become non
-		 * empty right after a request of Q has been
-		 * completed. In particular, on the first time, Q is
-		 * tentatively set as a candidate waker queue, while
-		 * on the second time, the flag
-		 * bfq_bfqq_has_waker(bfqq) is set to confirm that Q
-		 * is a waker queue for bfqq. These detection steps
-		 * are performed only if bfqq has a long think time,
-		 * so as to make it more likely that bfqq's I/O is
-		 * actually being blocked by a synchronization. This
-		 * last filter, plus the above two-times requirement,
-		 * make false positives less likely.
-		 *
-		 * NOTE
-		 *
-		 * The sooner a waker queue is detected, the sooner
-		 * throughput can be boosted by injecting I/O from the
-		 * waker queue. Fortunately, detection is likely to be
-		 * actually fast, for the following reasons. While
-		 * blocked by synchronization, bfqq has a long think
-		 * time. This implies that bfqq's inject limit is at
-		 * least equal to 1 (see the comments in
-		 * bfq_update_inject_limit()). So, thanks to
-		 * injection, the waker queue is likely to be served
-		 * during the very first I/O-plugging time interval
-		 * for bfqq. This triggers the first step of the
-		 * detection mechanism. Thanks again to injection, the
-		 * candidate waker queue is then likely to be
-		 * confirmed no later than during the next
-		 * I/O-plugging interval for bfqq.
-		 */
-		if (bfqd->last_completed_rq_bfqq &&
-		    !bfq_bfqq_has_short_ttime(bfqq) &&
-		    now_ns - bfqd->last_completion <
-		    4 * NSEC_PER_MSEC) {
-			if (bfqd->last_completed_rq_bfqq != bfqq &&
-			    bfqd->last_completed_rq_bfqq !=
-			    bfqq->waker_bfqq) {
-				/*
-				 * First synchronization detected with
-				 * a candidate waker queue, or with a
-				 * different candidate waker queue
-				 * from the current one.
-				 */
-				bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
-
-				/*
-				 * If the waker queue disappears, then
-				 * bfqq->waker_bfqq must be reset. To
-				 * this goal, we maintain in each
-				 * waker queue a list, woken_list, of
-				 * all the queues that reference the
-				 * waker queue through their
-				 * waker_bfqq pointer. When the waker
-				 * queue exits, the waker_bfqq pointer
-				 * of all the queues in the woken_list
-				 * is reset.
-				 *
-				 * In addition, if bfqq is already in
-				 * the woken_list of a waker queue,
-				 * then, before being inserted into
-				 * the woken_list of a new waker
-				 * queue, bfqq must be removed from
-				 * the woken_list of the old waker
-				 * queue.
-				 */
-				if (!hlist_unhashed(&bfqq->woken_list_node))
-					hlist_del_init(&bfqq->woken_list_node);
-				hlist_add_head(&bfqq->woken_list_node,
-				    &bfqd->last_completed_rq_bfqq->woken_list);
-
-				bfq_clear_bfqq_has_waker(bfqq);
-			} else if (bfqd->last_completed_rq_bfqq ==
-				   bfqq->waker_bfqq &&
-				   !bfq_bfqq_has_waker(bfqq)) {
-				/*
-				 * synchronization with waker_bfqq
-				 * seen for the second time
-				 */
-				bfq_mark_bfqq_has_waker(bfqq);
-			}
-		}
+		bfq_check_waker(bfqd, bfqq, now_ns);
 
 		/*
 		 * Periodically reset inject limit, to make sure that
@@ -4569,7 +4565,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
 		    bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
 		    bfq_bfqq_budget_left(async_bfqq))
 			bfqq = bfqq->bic->bfqq[0];
-		else if (bfq_bfqq_has_waker(bfqq) &&
+		else if (bfqq->waker_bfqq &&
 			   bfq_bfqq_busy(bfqq->waker_bfqq) &&
 			   bfqq->waker_bfqq->next_rq &&
 			   bfq_serv_to_charge(bfqq->waker_bfqq->next_rq,
@@ -4976,7 +4972,6 @@ void bfq_put_queue(struct bfq_queue *bfqq)
 	hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
 				  woken_list_node) {
 		item->waker_bfqq = NULL;
-		bfq_clear_bfqq_has_waker(item);
 		hlist_del_init(&item->woken_list_node);
 	}
 
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 3f350fa3c5fd..b8e793c34ff1 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -376,6 +376,11 @@ struct bfq_queue {
 	 * bfq_select_queue().
 	 */
 	struct bfq_queue *waker_bfqq;
+	/* pointer to the curr. tentative waker queue, see bfq_check_waker() */
+	struct bfq_queue *tentative_waker_bfqq;
+	/* number of times the same tentative waker has been detected */
+	unsigned int num_waker_detections;
+
 	/* node for woken_list, see below */
 	struct hlist_node woken_list_node;
 	/*
@@ -776,7 +781,6 @@ enum bfqq_state_flags {
 				 */
 	BFQQF_coop,		/* bfqq is shared */
 	BFQQF_split_coop,	/* shared bfqq will be split */
-	BFQQF_has_waker		/* bfqq has a waker queue */
 };
 
 #define BFQ_BFQQ_FNS(name)						\
@@ -796,7 +800,6 @@ BFQ_BFQQ_FNS(in_large_burst);
 BFQ_BFQQ_FNS(coop);
 BFQ_BFQQ_FNS(split_coop);
 BFQ_BFQQ_FNS(softrt_update);
-BFQ_BFQQ_FNS(has_waker);
 #undef BFQ_BFQQ_FNS
 
 /* Expiration reasons. */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements
  2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
                   ` (5 preceding siblings ...)
  2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust Paolo Valente
@ 2021-01-25 21:18 ` Jens Axboe
  6 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2021-01-25 21:18 UTC (permalink / raw)
  To: Paolo Valente; +Cc: linux-block, linux-kernel

On 1/25/21 12:02 PM, Paolo Valente wrote:
> Hi,
> here's batch 2/3.
> 
> Thanks,
> Paolo
> 
> Paolo Valente (6):
>   block, bfq: replace mechanism for evaluating I/O intensity
>   block, bfq: re-evaluate convenience of I/O plugging on rq arrivals
>   block, bfq: fix switch back from soft-rt weitgh-raising
>   block, bfq: save also weight-raised service on queue merging
>   block, bfq: save also injection state on queue merging
>   block, bfq: make waker-queue detection more robust
> 
>  block/bfq-iosched.c | 328 ++++++++++++++++++++++++++------------------
>  block/bfq-iosched.h |  29 ++--
>  2 files changed, 214 insertions(+), 143 deletions(-)

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-01-26 19:53 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-25 19:02 [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 1/6] block, bfq: replace mechanism for evaluating I/O intensity Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 2/6] block, bfq: re-evaluate convenience of I/O plugging on rq arrivals Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 3/6] block, bfq: fix switch back from soft-rt weitgh-raising Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 4/6] block, bfq: save also weight-raised service on queue merging Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 5/6] block, bfq: save also injection state " Paolo Valente
2021-01-25 19:02 ` [PATCH BUGFIX/IMPROVEMENT 6/6] block, bfq: make waker-queue detection more robust Paolo Valente
2021-01-25 21:18 ` [PATCH BUGFIX/IMPROVEMENT 0/6] block, bfq: second batch of fixes and improvements Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.