All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org, kernel-team@fb.com, newella@fb.com,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 20/27] blk-iocost: revamp in-period donation snapbacks
Date: Tue,  1 Sep 2020 14:52:50 -0400	[thread overview]
Message-ID: <20200901185257.645114-21-tj@kernel.org> (raw)
In-Reply-To: <20200901185257.645114-1-tj@kernel.org>

When the margin drops below the minimum on a donating iocg, donation is
immediately canceled in full. There are a couple shortcomings with the
current behavior.

* It's abrupt. A small temporary budget deficit can lead to a wide swing in
  weight allocation and a large surplus.

* It's open coded in the issue path but not implemented for the merge path.
  A series of merges at a low inuse can make the iocg incur debts and stall
  incorrectly.

This patch reimplements in-period donation snapbacks so that

* inuse adjustment and cost calculations are factored into
  adjust_inuse_and_calc_cost() which is called from both the issue and merge
  paths.

* Snapbacks are more gradual. It occurs in quarter steps.

* A snapback triggers if the margin goes below the low threshold and is
  lower than the budget at the time of the last adjustment.

* For the above, __propagate_weights() stores the margin in
  iocg->saved_margin. Move iocg->last_inuse storing together into
  __propagate_weights() for consistency.

* Full snapback is guaranteed when there are waiters.

* With precise donation and gradual snapbacks, inuse adjustments are now a
  lot more effective and the value of scaling inuse on weight changes isn't
  clear. Removed inuse scaling from weight_update().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-iocost.c | 133 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 37 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 694f1487208a..d09b4011449c 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -226,6 +226,8 @@ enum {
 	MARGIN_TARGET_PCT	= 50,
 	MARGIN_MAX_PCT		= 100,
 
+	INUSE_ADJ_STEP_PCT	= 25,
+
 	/* Have some play in timer operations */
 	TIMER_SLACK_PCT		= 1,
 
@@ -443,12 +445,17 @@ struct ioc_gq {
 	 *
 	 * `last_inuse` remembers `inuse` while an iocg is idle to persist
 	 * surplus adjustments.
+	 *
+	 * `inuse` may be adjusted dynamically during period. `saved_*` are used
+	 * to determine and track adjustments.
 	 */
 	u32				cfg_weight;
 	u32				weight;
 	u32				active;
 	u32				inuse;
+
 	u32				last_inuse;
+	s64				saved_margin;
 
 	sector_t			cursor;		/* to detect randio */
 
@@ -934,9 +941,11 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
 
 /*
  * Update @iocg's `active` and `inuse` to @active and @inuse, update level
- * weight sums and propagate upwards accordingly.
+ * weight sums and propagate upwards accordingly. If @save, the current margin
+ * is saved to be used as reference for later inuse in-period adjustments.
  */
-static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
+				bool save, struct ioc_now *now)
 {
 	struct ioc *ioc = iocg->ioc;
 	int lvl;
@@ -945,6 +954,10 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
 
 	inuse = clamp_t(u32, inuse, 1, active);
 
+	iocg->last_inuse = iocg->inuse;
+	if (save)
+		iocg->saved_margin = now->vnow - atomic64_read(&iocg->vtime);
+
 	if (active == iocg->active && inuse == iocg->inuse)
 		return;
 
@@ -996,9 +1009,10 @@ static void commit_weights(struct ioc *ioc)
 	}
 }
 
-static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
+			      bool save, struct ioc_now *now)
 {
-	__propagate_weights(iocg, active, inuse);
+	__propagate_weights(iocg, active, inuse, save, now);
 	commit_weights(iocg->ioc);
 }
 
@@ -1082,7 +1096,7 @@ static u32 current_hweight_max(struct ioc_gq *iocg)
 	return max_t(u32, hwm, 1);
 }
 
-static void weight_updated(struct ioc_gq *iocg)
+static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
 {
 	struct ioc *ioc = iocg->ioc;
 	struct blkcg_gq *blkg = iocg_to_blkg(iocg);
@@ -1093,9 +1107,7 @@ static void weight_updated(struct ioc_gq *iocg)
 
 	weight = iocg->cfg_weight ?: iocc->dfl_weight;
 	if (weight != iocg->weight && iocg->active)
-		propagate_weights(iocg, weight,
-				  DIV64_U64_ROUND_UP((u64)iocg->inuse * weight,
-						     iocg->weight));
+		propagate_weights(iocg, weight, iocg->inuse, true, now);
 	iocg->weight = weight;
 }
 
@@ -1165,8 +1177,9 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 	 */
 	iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1;
 	list_add(&iocg->active_list, &ioc->active_iocgs);
+
 	propagate_weights(iocg, iocg->weight,
-			  iocg->last_inuse ?: iocg->weight);
+			  iocg->last_inuse ?: iocg->weight, true, now);
 
 	TRACE_IOCG_PATH(iocg_activate, iocg, now,
 			last_period, cur_period, vtime);
@@ -1789,7 +1802,7 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
 		inuse = DIV64_U64_ROUND_UP(
 			parent->child_adjusted_sum * iocg->hweight_after_donation,
 			parent->hweight_inuse);
-		__propagate_weights(iocg, iocg->active, inuse);
+		__propagate_weights(iocg, iocg->active, inuse, true, now);
 	}
 
 	/* walk list should be dissolved after use */
@@ -1844,8 +1857,7 @@ static void ioc_timer_fn(struct timer_list *timer)
 			iocg_kick_waitq(iocg, true, &now);
 		} else if (iocg_is_idle(iocg)) {
 			/* no waiter and idle, deactivate */
-			iocg->last_inuse = iocg->inuse;
-			__propagate_weights(iocg, 0, 0);
+			__propagate_weights(iocg, 0, 0, false, &now);
 			list_del_init(&iocg->active_list);
 		}
 
@@ -1925,7 +1937,7 @@ static void ioc_timer_fn(struct timer_list *timer)
 				list_add(&iocg->surplus_list, &surpluses);
 			} else {
 				__propagate_weights(iocg, iocg->active,
-						    iocg->active);
+						    iocg->active, true, &now);
 				nr_shortages++;
 			}
 		} else {
@@ -2055,6 +2067,50 @@ static void ioc_timer_fn(struct timer_list *timer)
 	spin_unlock_irq(&ioc->lock);
 }
 
+static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
+				      u64 abs_cost, struct ioc_now *now)
+{
+	struct ioc *ioc = iocg->ioc;
+	struct ioc_margins *margins = &ioc->margins;
+	u32 adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100);
+	u32 hwi;
+	s64 margin;
+	u64 cost, new_inuse;
+
+	current_hweight(iocg, NULL, &hwi);
+	cost = abs_cost_to_cost(abs_cost, hwi);
+	margin = now->vnow - vtime - cost;
+
+	/*
+	 * We only increase inuse during period and do so iff the margin has
+	 * deteriorated since the previous adjustment.
+	 */
+	if (margin >= iocg->saved_margin || margin >= margins->low ||
+	    iocg->inuse == iocg->active)
+		return cost;
+
+	spin_lock_irq(&ioc->lock);
+
+	/* we own inuse only when @iocg is in the normal active state */
+	if (list_empty(&iocg->active_list)) {
+		spin_unlock_irq(&ioc->lock);
+		return cost;
+	}
+
+	/* bump up inuse till @abs_cost fits in the existing budget */
+	new_inuse = iocg->inuse;
+	do {
+		new_inuse = new_inuse + adj_step;
+		propagate_weights(iocg, iocg->active, new_inuse, true, now);
+		current_hweight(iocg, NULL, &hwi);
+		cost = abs_cost_to_cost(abs_cost, hwi);
+	} while (time_after64(vtime + cost, now->vnow) &&
+		 iocg->inuse != iocg->active);
+
+	spin_unlock_irq(&ioc->lock);
+	return cost;
+}
+
 static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
 				    bool is_merge, u64 *costp)
 {
@@ -2136,7 +2192,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	struct ioc_gq *iocg = blkg_to_iocg(blkg);
 	struct ioc_now now;
 	struct iocg_wait wait;
-	u32 hw_active, hw_inuse;
 	u64 abs_cost, cost, vtime;
 	bool use_debt, ioc_locked;
 	unsigned long flags;
@@ -2154,21 +2209,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 		return;
 
 	iocg->cursor = bio_end_sector(bio);
-
 	vtime = atomic64_read(&iocg->vtime);
-	current_hweight(iocg, &hw_active, &hw_inuse);
-
-	if (hw_inuse < hw_active &&
-	    time_after_eq64(vtime + ioc->margins.min, now.vnow)) {
-		TRACE_IOCG_PATH(inuse_reset, iocg, &now,
-				iocg->inuse, iocg->weight, hw_inuse, hw_active);
-		spin_lock_irq(&ioc->lock);
-		propagate_weights(iocg, iocg->weight, iocg->weight);
-		spin_unlock_irq(&ioc->lock);
-		current_hweight(iocg, &hw_active, &hw_inuse);
-	}
-
-	cost = abs_cost_to_cost(abs_cost, hw_inuse);
+	cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now);
 
 	/*
 	 * If no one's waiting and within budget, issue right away.  The
@@ -2190,7 +2232,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	 */
 	use_debt = bio_issue_as_root_blkg(bio) || fatal_signal_pending(current);
 	ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt);
-
+retry_lock:
 	iocg_lock(iocg, ioc_locked, &flags);
 
 	/*
@@ -2232,6 +2274,17 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 		return;
 	}
 
+	/* guarantee that iocgs w/ waiters have maximum inuse */
+	if (iocg->inuse != iocg->active) {
+		if (!ioc_locked) {
+			iocg_unlock(iocg, false, &flags);
+			ioc_locked = true;
+			goto retry_lock;
+		}
+		propagate_weights(iocg, iocg->active, iocg->active, true,
+				  &now);
+	}
+
 	/*
 	 * Append self to the waitq and schedule the wakeup timer if we're
 	 * the first waiter.  The timer duration is calculated based on the
@@ -2274,8 +2327,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 	struct ioc *ioc = iocg->ioc;
 	sector_t bio_end = bio_end_sector(bio);
 	struct ioc_now now;
-	u32 hw_inuse;
-	u64 abs_cost, cost;
+	u64 vtime, abs_cost, cost;
 	unsigned long flags;
 
 	/* bypass if disabled or for root cgroup */
@@ -2287,8 +2339,9 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 		return;
 
 	ioc_now(ioc, &now);
-	current_hweight(iocg, NULL, &hw_inuse);
-	cost = abs_cost_to_cost(abs_cost, hw_inuse);
+
+	vtime = atomic64_read(&iocg->vtime);
+	cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now);
 
 	/* update cursor if backmerging into the request at the cursor */
 	if (blk_rq_pos(rq) < bio_end &&
@@ -2530,7 +2583,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
 	}
 
 	spin_lock_irqsave(&ioc->lock, flags);
-	weight_updated(iocg);
+	weight_updated(iocg, &now);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
@@ -2544,7 +2597,10 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
 		spin_lock_irqsave(&ioc->lock, flags);
 
 		if (!list_empty(&iocg->active_list)) {
-			propagate_weights(iocg, 0, 0);
+			struct ioc_now now;
+
+			ioc_now(ioc, &now);
+			propagate_weights(iocg, 0, 0, false, &now);
 			list_del_init(&iocg->active_list);
 		}
 
@@ -2612,6 +2668,7 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg);
 	struct blkg_conf_ctx ctx;
+	struct ioc_now now;
 	struct ioc_gq *iocg;
 	u32 v;
 	int ret;
@@ -2632,7 +2689,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 
 			if (iocg) {
 				spin_lock_irq(&iocg->ioc->lock);
-				weight_updated(iocg);
+				ioc_now(iocg->ioc, &now);
+				weight_updated(iocg, &now);
 				spin_unlock_irq(&iocg->ioc->lock);
 			}
 		}
@@ -2658,7 +2716,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 
 	spin_lock(&iocg->ioc->lock);
 	iocg->cfg_weight = v * WEIGHT_ONE;
-	weight_updated(iocg);
+	ioc_now(iocg->ioc, &now);
+	weight_updated(iocg, &now);
 	spin_unlock(&iocg->ioc->lock);
 
 	blkg_conf_finish(&ctx);
-- 
2.26.2


WARNING: multiple messages have this Message-ID (diff)
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org
Cc: linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	kernel-team-b10kYP2dOMg@public.gmane.org,
	newella-b10kYP2dOMg@public.gmane.org,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Subject: [PATCH 20/27] blk-iocost: revamp in-period donation snapbacks
Date: Tue,  1 Sep 2020 14:52:50 -0400	[thread overview]
Message-ID: <20200901185257.645114-21-tj@kernel.org> (raw)
In-Reply-To: <20200901185257.645114-1-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

When the margin drops below the minimum on a donating iocg, donation is
immediately canceled in full. There are a couple shortcomings with the
current behavior.

* It's abrupt. A small temporary budget deficit can lead to a wide swing in
  weight allocation and a large surplus.

* It's open coded in the issue path but not implemented for the merge path.
  A series of merges at a low inuse can make the iocg incur debts and stall
  incorrectly.

This patch reimplements in-period donation snapbacks so that

* inuse adjustment and cost calculations are factored into
  adjust_inuse_and_calc_cost() which is called from both the issue and merge
  paths.

* Snapbacks are more gradual. It occurs in quarter steps.

* A snapback triggers if the margin goes below the low threshold and is
  lower than the budget at the time of the last adjustment.

* For the above, __propagate_weights() stores the margin in
  iocg->saved_margin. Move iocg->last_inuse storing together into
  __propagate_weights() for consistency.

* Full snapback is guaranteed when there are waiters.

* With precise donation and gradual snapbacks, inuse adjustments are now a
  lot more effective and the value of scaling inuse on weight changes isn't
  clear. Removed inuse scaling from weight_update().

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 block/blk-iocost.c | 133 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 37 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 694f1487208a..d09b4011449c 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -226,6 +226,8 @@ enum {
 	MARGIN_TARGET_PCT	= 50,
 	MARGIN_MAX_PCT		= 100,
 
+	INUSE_ADJ_STEP_PCT	= 25,
+
 	/* Have some play in timer operations */
 	TIMER_SLACK_PCT		= 1,
 
@@ -443,12 +445,17 @@ struct ioc_gq {
 	 *
 	 * `last_inuse` remembers `inuse` while an iocg is idle to persist
 	 * surplus adjustments.
+	 *
+	 * `inuse` may be adjusted dynamically during period. `saved_*` are used
+	 * to determine and track adjustments.
 	 */
 	u32				cfg_weight;
 	u32				weight;
 	u32				active;
 	u32				inuse;
+
 	u32				last_inuse;
+	s64				saved_margin;
 
 	sector_t			cursor;		/* to detect randio */
 
@@ -934,9 +941,11 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
 
 /*
  * Update @iocg's `active` and `inuse` to @active and @inuse, update level
- * weight sums and propagate upwards accordingly.
+ * weight sums and propagate upwards accordingly. If @save, the current margin
+ * is saved to be used as reference for later inuse in-period adjustments.
  */
-static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
+				bool save, struct ioc_now *now)
 {
 	struct ioc *ioc = iocg->ioc;
 	int lvl;
@@ -945,6 +954,10 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
 
 	inuse = clamp_t(u32, inuse, 1, active);
 
+	iocg->last_inuse = iocg->inuse;
+	if (save)
+		iocg->saved_margin = now->vnow - atomic64_read(&iocg->vtime);
+
 	if (active == iocg->active && inuse == iocg->inuse)
 		return;
 
@@ -996,9 +1009,10 @@ static void commit_weights(struct ioc *ioc)
 	}
 }
 
-static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
+			      bool save, struct ioc_now *now)
 {
-	__propagate_weights(iocg, active, inuse);
+	__propagate_weights(iocg, active, inuse, save, now);
 	commit_weights(iocg->ioc);
 }
 
@@ -1082,7 +1096,7 @@ static u32 current_hweight_max(struct ioc_gq *iocg)
 	return max_t(u32, hwm, 1);
 }
 
-static void weight_updated(struct ioc_gq *iocg)
+static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
 {
 	struct ioc *ioc = iocg->ioc;
 	struct blkcg_gq *blkg = iocg_to_blkg(iocg);
@@ -1093,9 +1107,7 @@ static void weight_updated(struct ioc_gq *iocg)
 
 	weight = iocg->cfg_weight ?: iocc->dfl_weight;
 	if (weight != iocg->weight && iocg->active)
-		propagate_weights(iocg, weight,
-				  DIV64_U64_ROUND_UP((u64)iocg->inuse * weight,
-						     iocg->weight));
+		propagate_weights(iocg, weight, iocg->inuse, true, now);
 	iocg->weight = weight;
 }
 
@@ -1165,8 +1177,9 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 	 */
 	iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1;
 	list_add(&iocg->active_list, &ioc->active_iocgs);
+
 	propagate_weights(iocg, iocg->weight,
-			  iocg->last_inuse ?: iocg->weight);
+			  iocg->last_inuse ?: iocg->weight, true, now);
 
 	TRACE_IOCG_PATH(iocg_activate, iocg, now,
 			last_period, cur_period, vtime);
@@ -1789,7 +1802,7 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
 		inuse = DIV64_U64_ROUND_UP(
 			parent->child_adjusted_sum * iocg->hweight_after_donation,
 			parent->hweight_inuse);
-		__propagate_weights(iocg, iocg->active, inuse);
+		__propagate_weights(iocg, iocg->active, inuse, true, now);
 	}
 
 	/* walk list should be dissolved after use */
@@ -1844,8 +1857,7 @@ static void ioc_timer_fn(struct timer_list *timer)
 			iocg_kick_waitq(iocg, true, &now);
 		} else if (iocg_is_idle(iocg)) {
 			/* no waiter and idle, deactivate */
-			iocg->last_inuse = iocg->inuse;
-			__propagate_weights(iocg, 0, 0);
+			__propagate_weights(iocg, 0, 0, false, &now);
 			list_del_init(&iocg->active_list);
 		}
 
@@ -1925,7 +1937,7 @@ static void ioc_timer_fn(struct timer_list *timer)
 				list_add(&iocg->surplus_list, &surpluses);
 			} else {
 				__propagate_weights(iocg, iocg->active,
-						    iocg->active);
+						    iocg->active, true, &now);
 				nr_shortages++;
 			}
 		} else {
@@ -2055,6 +2067,50 @@ static void ioc_timer_fn(struct timer_list *timer)
 	spin_unlock_irq(&ioc->lock);
 }
 
+static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
+				      u64 abs_cost, struct ioc_now *now)
+{
+	struct ioc *ioc = iocg->ioc;
+	struct ioc_margins *margins = &ioc->margins;
+	u32 adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100);
+	u32 hwi;
+	s64 margin;
+	u64 cost, new_inuse;
+
+	current_hweight(iocg, NULL, &hwi);
+	cost = abs_cost_to_cost(abs_cost, hwi);
+	margin = now->vnow - vtime - cost;
+
+	/*
+	 * We only increase inuse during period and do so iff the margin has
+	 * deteriorated since the previous adjustment.
+	 */
+	if (margin >= iocg->saved_margin || margin >= margins->low ||
+	    iocg->inuse == iocg->active)
+		return cost;
+
+	spin_lock_irq(&ioc->lock);
+
+	/* we own inuse only when @iocg is in the normal active state */
+	if (list_empty(&iocg->active_list)) {
+		spin_unlock_irq(&ioc->lock);
+		return cost;
+	}
+
+	/* bump up inuse till @abs_cost fits in the existing budget */
+	new_inuse = iocg->inuse;
+	do {
+		new_inuse = new_inuse + adj_step;
+		propagate_weights(iocg, iocg->active, new_inuse, true, now);
+		current_hweight(iocg, NULL, &hwi);
+		cost = abs_cost_to_cost(abs_cost, hwi);
+	} while (time_after64(vtime + cost, now->vnow) &&
+		 iocg->inuse != iocg->active);
+
+	spin_unlock_irq(&ioc->lock);
+	return cost;
+}
+
 static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
 				    bool is_merge, u64 *costp)
 {
@@ -2136,7 +2192,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	struct ioc_gq *iocg = blkg_to_iocg(blkg);
 	struct ioc_now now;
 	struct iocg_wait wait;
-	u32 hw_active, hw_inuse;
 	u64 abs_cost, cost, vtime;
 	bool use_debt, ioc_locked;
 	unsigned long flags;
@@ -2154,21 +2209,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 		return;
 
 	iocg->cursor = bio_end_sector(bio);
-
 	vtime = atomic64_read(&iocg->vtime);
-	current_hweight(iocg, &hw_active, &hw_inuse);
-
-	if (hw_inuse < hw_active &&
-	    time_after_eq64(vtime + ioc->margins.min, now.vnow)) {
-		TRACE_IOCG_PATH(inuse_reset, iocg, &now,
-				iocg->inuse, iocg->weight, hw_inuse, hw_active);
-		spin_lock_irq(&ioc->lock);
-		propagate_weights(iocg, iocg->weight, iocg->weight);
-		spin_unlock_irq(&ioc->lock);
-		current_hweight(iocg, &hw_active, &hw_inuse);
-	}
-
-	cost = abs_cost_to_cost(abs_cost, hw_inuse);
+	cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now);
 
 	/*
 	 * If no one's waiting and within budget, issue right away.  The
@@ -2190,7 +2232,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	 */
 	use_debt = bio_issue_as_root_blkg(bio) || fatal_signal_pending(current);
 	ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt);
-
+retry_lock:
 	iocg_lock(iocg, ioc_locked, &flags);
 
 	/*
@@ -2232,6 +2274,17 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 		return;
 	}
 
+	/* guarantee that iocgs w/ waiters have maximum inuse */
+	if (iocg->inuse != iocg->active) {
+		if (!ioc_locked) {
+			iocg_unlock(iocg, false, &flags);
+			ioc_locked = true;
+			goto retry_lock;
+		}
+		propagate_weights(iocg, iocg->active, iocg->active, true,
+				  &now);
+	}
+
 	/*
 	 * Append self to the waitq and schedule the wakeup timer if we're
 	 * the first waiter.  The timer duration is calculated based on the
@@ -2274,8 +2327,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 	struct ioc *ioc = iocg->ioc;
 	sector_t bio_end = bio_end_sector(bio);
 	struct ioc_now now;
-	u32 hw_inuse;
-	u64 abs_cost, cost;
+	u64 vtime, abs_cost, cost;
 	unsigned long flags;
 
 	/* bypass if disabled or for root cgroup */
@@ -2287,8 +2339,9 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 		return;
 
 	ioc_now(ioc, &now);
-	current_hweight(iocg, NULL, &hw_inuse);
-	cost = abs_cost_to_cost(abs_cost, hw_inuse);
+
+	vtime = atomic64_read(&iocg->vtime);
+	cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now);
 
 	/* update cursor if backmerging into the request at the cursor */
 	if (blk_rq_pos(rq) < bio_end &&
@@ -2530,7 +2583,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
 	}
 
 	spin_lock_irqsave(&ioc->lock, flags);
-	weight_updated(iocg);
+	weight_updated(iocg, &now);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
@@ -2544,7 +2597,10 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
 		spin_lock_irqsave(&ioc->lock, flags);
 
 		if (!list_empty(&iocg->active_list)) {
-			propagate_weights(iocg, 0, 0);
+			struct ioc_now now;
+
+			ioc_now(ioc, &now);
+			propagate_weights(iocg, 0, 0, false, &now);
 			list_del_init(&iocg->active_list);
 		}
 
@@ -2612,6 +2668,7 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg);
 	struct blkg_conf_ctx ctx;
+	struct ioc_now now;
 	struct ioc_gq *iocg;
 	u32 v;
 	int ret;
@@ -2632,7 +2689,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 
 			if (iocg) {
 				spin_lock_irq(&iocg->ioc->lock);
-				weight_updated(iocg);
+				ioc_now(iocg->ioc, &now);
+				weight_updated(iocg, &now);
 				spin_unlock_irq(&iocg->ioc->lock);
 			}
 		}
@@ -2658,7 +2716,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 
 	spin_lock(&iocg->ioc->lock);
 	iocg->cfg_weight = v * WEIGHT_ONE;
-	weight_updated(iocg);
+	ioc_now(iocg->ioc, &now);
+	weight_updated(iocg, &now);
 	spin_unlock(&iocg->ioc->lock);
 
 	blkg_conf_finish(&ctx);
-- 
2.26.2


  parent reply	other threads:[~2020-09-01 18:55 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-01 18:52 [PATCHSET for-5.10/block] blk-iocost: iocost: improve donation, debt and excess handling Tejun Heo
2020-09-01 18:52 ` [PATCH 01/27] blk-iocost: ioc_pd_free() shouldn't assume irq disabled Tejun Heo
2020-09-01 18:52 ` [PATCH 02/27] blk-stat: make q->stats->lock irqsafe Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 03/27] blk-iocost: use local[64]_t for percpu stat Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-11-20 21:51   ` Stafford Horne
2020-11-20 22:13     ` Tejun Heo
2020-11-20 22:13       ` Tejun Heo
2020-09-01 18:52 ` [PATCH 04/27] blk-iocost: rename propagate_active_weights() to propagate_weights() Tejun Heo
2020-09-01 18:52 ` [PATCH 05/27] blk-iocost: clamp inuse and skip noops in __propagate_weights() Tejun Heo
2020-09-01 18:52 ` [PATCH 06/27] blk-iocost: move iocg_kick_delay() above iocg_kick_waitq() Tejun Heo
2020-09-01 18:52 ` [PATCH 07/27] blk-iocost: make iocg_kick_waitq() call iocg_kick_delay() after paying debt Tejun Heo
2020-09-01 18:52 ` [PATCH 08/27] blk-iocost: s/HWEIGHT_WHOLE/WEIGHT_ONE/g Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 09/27] blk-iocost: use WEIGHT_ONE based fixed point number for weights Tejun Heo
2020-09-01 18:52 ` [PATCH 10/27] blk-iocost: make ioc_now->now and ioc->period_at 64bit Tejun Heo
2020-09-01 18:52 ` [PATCH 11/27] blk-iocost: streamline vtime margin and timer slack handling Tejun Heo
2020-09-01 18:52 ` [PATCH 12/27] blk-iocost: grab ioc->lock for debt handling Tejun Heo
2020-09-01 18:52 ` [PATCH 13/27] blk-iocost: add absolute usage stat Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 14/27] blk-iocost: calculate iocg->usages[] from iocg->local_stat.usage_us Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 15/27] blk-iocost: replace iocg->has_surplus with ->surplus_list Tejun Heo
2020-09-01 18:52 ` [PATCH 16/27] blk-iocost: decouple vrate adjustment from surplus transfers Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 17/27] blk-iocost: restructure surplus donation logic Tejun Heo
2020-09-01 18:52 ` [PATCH 18/27] blk-iocost: implement Andy's method for donation weight updates Tejun Heo
2020-09-01 18:52 ` [PATCH 19/27] blk-iocost: revamp donation amount determination Tejun Heo
2020-09-01 18:52 ` Tejun Heo [this message]
2020-09-01 18:52   ` [PATCH 20/27] blk-iocost: revamp in-period donation snapbacks Tejun Heo
2020-09-01 18:52 ` [PATCH 21/27] blk-iocost: revamp debt handling Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 22/27] blk-iocost: implement delay adjustment hysteresis Tejun Heo
2020-09-01 18:52 ` [PATCH 23/27] blk-iocost: halve debts if device stays idle Tejun Heo
2020-09-01 18:52 ` [PATCH 24/27] blk-iocost: implement vtime loss compensation Tejun Heo
2020-09-01 18:52 ` [PATCH 25/27] blk-iocost: restore inuse update tracepoints Tejun Heo
2020-09-01 18:52 ` [PATCH 26/27] blk-iocost: add three debug stat - cost.wait, indebt and indelay Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 27/27] blk-iocost: update iocost_monitor.py Tejun Heo
2020-09-01 22:57 ` [PATCHSET for-5.10/block] blk-iocost: iocost: improve donation, debt and excess handling Jens Axboe
2020-09-01 22:57   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200901185257.645114-21-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=kernel-team@fb.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=newella@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.