All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] sched/fair: use reweight_entity to reweight tasks
@ 2017-08-03 15:13 josef
  2017-08-03 15:13 ` [PATCH 2/2] sched/fair: calculate runnable_weight slightly differently josef
  0 siblings, 1 reply; 3+ messages in thread
From: josef @ 2017-08-03 15:13 UTC (permalink / raw)
  To: riel, kernel-team, mingo, peterz, linux-kernel, tj; +Cc: Josef Bacik

From: Josef Bacik <jbacik@fb.com>

reweight_task only accounts for the load average change in the cfs_rq, but
doesn't account for the runnable_average change in the cfs_rq.  We need to do
everything reweight_entity does, and then we just set our inv_weight
appropriately.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 kernel/sched/fair.c | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0cff1b6..c336534 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2809,26 +2809,6 @@ __sub_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
 }
 
-void reweight_task(struct task_struct *p, int prio)
-{
-	struct sched_entity *se = &p->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	struct load_weight *load = &p->se.load;
-
-	u32 divider = LOAD_AVG_MAX - 1024 + se->avg.period_contrib;
-
-	__sub_load_avg(cfs_rq, se);
-
-	load->weight = scale_load(sched_prio_to_weight[prio]);
-	load->inv_weight = sched_prio_to_wmult[prio];
-
-	se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, divider);
-	se->avg.runnable_load_avg =
-		div_u64(se_runnable(se) * se->avg.runnable_load_sum, divider);
-
-	__add_load_avg(cfs_rq, se);
-}
-
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			    unsigned long weight, unsigned long runnable)
 {
@@ -2858,6 +2838,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	}
 }
 
+void reweight_task(struct task_struct *p, int prio)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	struct load_weight *load = &se->load;
+	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
+
+	reweight_entity(cfs_rq, se, weight, weight);
+	load->inv_weight = sched_prio_to_wmult[prio];
+}
+
 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] sched/fair: calculate runnable_weight slightly differently
  2017-08-03 15:13 [PATCH 1/2] sched/fair: use reweight_entity to reweight tasks josef
@ 2017-08-03 15:13 ` josef
  2017-09-29 20:14   ` [tip:sched/core] sched/fair: Calculate " tip-bot for Josef Bacik
  0 siblings, 1 reply; 3+ messages in thread
From: josef @ 2017-08-03 15:13 UTC (permalink / raw)
  To: riel, kernel-team, mingo, peterz, linux-kernel, tj; +Cc: Josef Bacik

From: Josef Bacik <jbacik@fb.com>

Our runnable_weight currently looks like this

runnable_weight = shares * runnable_load_avg / load_avg

The goal is to scale the runnable weight for the group based on its runnable to
load_avg ratio.  The problem with this is it biases us towards tasks that never
go to sleep.  Tasks that go to sleep are going to have their runnable_load_avg
decayed pretty hard, which will drastically reduce the runnable weight of groups
with interactive tasks.  To solve this imbalance we tweak this slightly, so in
the ideal case it is still the above, but in the interactive case it is

runnable_weight = shares * runnable_weight / load_weight

which will make the weight distribution fairer between interactive and
non-interactive groups.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 kernel/sched/fair.c | 45 +++++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c336534..29db62c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2687,7 +2687,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  * hence icky!
  */
-static long calc_cfs_shares(struct cfs_rq *cfs_rq)
+static long calc_group_shares(struct cfs_rq *cfs_rq)
 {
 	long tg_weight, tg_shares, load, shares;
 	struct task_group *tg = cfs_rq->tg;
@@ -2852,6 +2852,36 @@ void reweight_task(struct task_struct *p, int prio)
 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
 
 /*
+ * The runnable shares of this group are calculated as such
+ *
+ *          max(cfs_rq->avg.runnable_load_avg, cfs_rq->runnable_weight)
+ * shares * ------------------------------------------------------------
+ *               max(cfs_rq->avg.load_avg, cfs_rq->load.weight)
+ *
+ * We do this to keep the shares in line with expected load on the cfs_rq.
+ * Consider a cfs_rq that has several tasks wake up on this cfs_rq for the first
+ * time, it's runnable_load_avg is not going to be representative of the actual
+ * load this cfs_rq will now experience, which will bias us agaisnt this cfs_rq.
+ * The weight on the cfs_rq is the immediate effect of having new tasks
+ * enqueue'd onto it which should be used to calculate the new runnable shares.
+ * At the same time we need the actual load_avg to be the lower bounds for the
+ * calculation, to handle when our weight drops quickly from having entities
+ * dequeued.
+ */
+static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
+{
+	long load_avg = max(cfs_rq->avg.load_avg,
+			    scale_load_down(cfs_rq->load.weight));
+	long runnable = max(cfs_rq->avg.runnable_load_avg,
+			    scale_load_down(cfs_rq->runnable_weight));
+
+	runnable *= shares;
+	if (load_avg)
+		runnable /= load_avg;
+	return clamp_t(long, runnable, MIN_SHARES, shares);
+}
+
+/*
  * Recomputes the group entity based on the current state of its group
  * runqueue.
  */
@@ -2872,18 +2902,9 @@ static void update_cfs_group(struct sched_entity *se)
 	if (likely(se->load.weight == shares))
 		return;
 #else
-	shares = calc_cfs_shares(gcfs_rq);
+	shares = calc_group_shares(gcfs_rq);
 #endif
-	/*
-	 * The hierarchical runnable load metric is the proportional part
-	 * of this group's runnable_load_avg / load_avg.
-	 *
-	 * Note: we need to deal with very sporadic 'runnable > load' cases
-	 * due to numerical instability.
-	 */
-	runnable = shares * gcfs_rq->avg.runnable_load_avg;
-	if (runnable)
-		runnable /= max(gcfs_rq->avg.load_avg, gcfs_rq->avg.runnable_load_avg);
+	runnable = calc_group_runnable(gcfs_rq, shares);
 
 	reweight_entity(cfs_rq_of(se), se, shares, runnable);
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [tip:sched/core] sched/fair: Calculate runnable_weight slightly differently
  2017-08-03 15:13 ` [PATCH 2/2] sched/fair: calculate runnable_weight slightly differently josef
@ 2017-09-29 20:14   ` tip-bot for Josef Bacik
  0 siblings, 0 replies; 3+ messages in thread
From: tip-bot for Josef Bacik @ 2017-09-29 20:14 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, mingo, hpa, tglx, peterz, jbacik, torvalds

Commit-ID:  2c8e4dce7963d2bae02db95fce2691365630685c
Gitweb:     https://git.kernel.org/tip/2c8e4dce7963d2bae02db95fce2691365630685c
Author:     Josef Bacik <jbacik@fb.com>
AuthorDate: Thu, 3 Aug 2017 11:13:39 -0400
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 29 Sep 2017 19:35:17 +0200

sched/fair: Calculate runnable_weight slightly differently

Our runnable_weight currently looks like this

runnable_weight = shares * runnable_load_avg / load_avg

The goal is to scale the runnable weight for the group based on its runnable to
load_avg ratio.  The problem with this is it biases us towards tasks that never
go to sleep.  Tasks that go to sleep are going to have their runnable_load_avg
decayed pretty hard, which will drastically reduce the runnable weight of groups
with interactive tasks.  To solve this imbalance we tweak this slightly, so in
the ideal case it is still the above, but in the interactive case it is

runnable_weight = shares * runnable_weight / load_weight

which will make the weight distribution fairer between interactive and
non-interactive groups.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@fb.com
Cc: linux-kernel@vger.kernel.org
Cc: riel@redhat.com
Cc: tj@kernel.org
Link: http://lkml.kernel.org/r/1501773219-18774-2-git-send-email-jbacik@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 45 +++++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 67c3964..a62098e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2883,7 +2883,7 @@ void reweight_task(struct task_struct *p, int prio)
  *
  * hence icky!
  */
-static long calc_cfs_shares(struct cfs_rq *cfs_rq)
+static long calc_group_shares(struct cfs_rq *cfs_rq)
 {
 	long tg_weight, tg_shares, load, shares;
 	struct task_group *tg = cfs_rq->tg;
@@ -2920,6 +2920,36 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq)
 	 */
 	return clamp_t(long, shares, MIN_SHARES, tg_shares);
 }
+
+/*
+ * The runnable shares of this group are calculated as such
+ *
+ *          max(cfs_rq->avg.runnable_load_avg, cfs_rq->runnable_weight)
+ * shares * ------------------------------------------------------------
+ *               max(cfs_rq->avg.load_avg, cfs_rq->load.weight)
+ *
+ * We do this to keep the shares in line with expected load on the cfs_rq.
+ * Consider a cfs_rq that has several tasks wake up on this cfs_rq for the first
+ * time, it's runnable_load_avg is not going to be representative of the actual
+ * load this cfs_rq will now experience, which will bias us agaisnt this cfs_rq.
+ * The weight on the cfs_rq is the immediate effect of having new tasks
+ * enqueue'd onto it which should be used to calculate the new runnable shares.
+ * At the same time we need the actual load_avg to be the lower bounds for the
+ * calculation, to handle when our weight drops quickly from having entities
+ * dequeued.
+ */
+static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
+{
+	long load_avg = max(cfs_rq->avg.load_avg,
+			    scale_load_down(cfs_rq->load.weight));
+	long runnable = max(cfs_rq->avg.runnable_load_avg,
+			    scale_load_down(cfs_rq->runnable_weight));
+
+	runnable *= shares;
+	if (load_avg)
+		runnable /= load_avg;
+	return clamp_t(long, runnable, MIN_SHARES, shares);
+}
 # endif /* CONFIG_SMP */
 
 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
@@ -2945,17 +2975,8 @@ static void update_cfs_group(struct sched_entity *se)
 	if (likely(se->load.weight == shares))
 		return;
 #else
-	shares = calc_cfs_shares(gcfs_rq);
-	/*
-	 * The hierarchical runnable load metric is the proportional part
-	 * of this group's runnable_load_avg / load_avg.
-	 *
-	 * Note: we need to deal with very sporadic 'runnable > load' cases
-	 * due to numerical instability.
-	 */
-	runnable = shares * gcfs_rq->avg.runnable_load_avg;
-	if (runnable)
-		runnable /= max(gcfs_rq->avg.load_avg, gcfs_rq->avg.runnable_load_avg);
+	shares   = calc_group_shares(gcfs_rq);
+	runnable = calc_group_runnable(gcfs_rq, shares);
 #endif
 
 	reweight_entity(cfs_rq_of(se), se, shares, runnable);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-09-29 20:17 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-03 15:13 [PATCH 1/2] sched/fair: use reweight_entity to reweight tasks josef
2017-08-03 15:13 ` [PATCH 2/2] sched/fair: calculate runnable_weight slightly differently josef
2017-09-29 20:14   ` [tip:sched/core] sched/fair: Calculate " tip-bot for Josef Bacik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.