linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "tip-bot2 for Vincent Guittot" <tip-bot2@linutronix.de>
To: linux-tip-commits@vger.kernel.org
Cc: Vincent Guittot <vincent.guittot@linaro.org>,
	Mel Gorman <mgorman@techsingularity.net>,
	Ingo Molnar <mingo@kernel.org>,
	"Dietmar Eggemann" <dietmar.eggemann@arm.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Juri Lelli <juri.lelli@redhat.com>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Phil Auld <pauld@redhat.com>, Hillf Danton <hdanton@sina.com>,
	x86 <x86@kernel.org>, LKML <linux-kernel@vger.kernel.org>
Subject: [tip: sched/core] sched/numa: Replace runnable_load_avg by load_avg
Date: Mon, 24 Feb 2020 15:20:32 -0000	[thread overview]
Message-ID: <158255763273.28353.4606148712630409487.tip-bot2@tip-bot2> (raw)
In-Reply-To: <20200224095223.13361-6-mgorman@techsingularity.net>

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     6499b1b2dd1b8d404a16b9fbbf1af6b9b3c1d83d
Gitweb:        https://git.kernel.org/tip/6499b1b2dd1b8d404a16b9fbbf1af6b9b3c1d83d
Author:        Vincent Guittot <vincent.guittot@linaro.org>
AuthorDate:    Mon, 24 Feb 2020 09:52:15 
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Mon, 24 Feb 2020 11:36:34 +01:00

sched/numa: Replace runnable_load_avg by load_avg

Similarly to what has been done for the normal load balancer, we can
replace runnable_load_avg by load_avg in numa load balancing and track the
other statistics like the utilization and the number of running tasks to
get to better view of the current state of a node.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: "Dietmar Eggemann <dietmar.eggemann@arm.com>"
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Cc: Hillf Danton <hdanton@sina.com>
Link: https://lore.kernel.org/r/20200224095223.13361-6-mgorman@techsingularity.net
---
 kernel/sched/fair.c | 102 +++++++++++++++++++++++++++++--------------
 1 file changed, 70 insertions(+), 32 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a6c7f8b..bc3d651 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1473,38 +1473,35 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
 }
 
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
-
-static unsigned long cpu_runnable_load(struct rq *rq)
-{
-	return cfs_rq_runnable_load_avg(&rq->cfs);
-}
+/*
+ * 'numa_type' describes the node at the moment of load balancing.
+ */
+enum numa_type {
+	/* The node has spare capacity that can be used to run more tasks.  */
+	node_has_spare = 0,
+	/*
+	 * The node is fully used and the tasks don't compete for more CPU
+	 * cycles. Nevertheless, some tasks might wait before running.
+	 */
+	node_fully_busy,
+	/*
+	 * The node is overloaded and can't provide expected CPU cycles to all
+	 * tasks.
+	 */
+	node_overloaded
+};
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
 	unsigned long load;
-
+	unsigned long util;
 	/* Total compute capacity of CPUs on a node */
 	unsigned long compute_capacity;
+	unsigned int nr_running;
+	unsigned int weight;
+	enum numa_type node_type;
 };
 
-/*
- * XXX borrowed from update_sg_lb_stats
- */
-static void update_numa_stats(struct numa_stats *ns, int nid)
-{
-	int cpu;
-
-	memset(ns, 0, sizeof(*ns));
-	for_each_cpu(cpu, cpumask_of_node(nid)) {
-		struct rq *rq = cpu_rq(cpu);
-
-		ns->load += cpu_runnable_load(rq);
-		ns->compute_capacity += capacity_of(cpu);
-	}
-
-}
-
 struct task_numa_env {
 	struct task_struct *p;
 
@@ -1521,6 +1518,47 @@ struct task_numa_env {
 	int best_cpu;
 };
 
+static unsigned long cpu_load(struct rq *rq);
+static unsigned long cpu_util(int cpu);
+
+static inline enum
+numa_type numa_classify(unsigned int imbalance_pct,
+			 struct numa_stats *ns)
+{
+	if ((ns->nr_running > ns->weight) &&
+	    ((ns->compute_capacity * 100) < (ns->util * imbalance_pct)))
+		return node_overloaded;
+
+	if ((ns->nr_running < ns->weight) ||
+	    ((ns->compute_capacity * 100) > (ns->util * imbalance_pct)))
+		return node_has_spare;
+
+	return node_fully_busy;
+}
+
+/*
+ * XXX borrowed from update_sg_lb_stats
+ */
+static void update_numa_stats(struct task_numa_env *env,
+			      struct numa_stats *ns, int nid)
+{
+	int cpu;
+
+	memset(ns, 0, sizeof(*ns));
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		struct rq *rq = cpu_rq(cpu);
+
+		ns->load += cpu_load(rq);
+		ns->util += cpu_util(cpu);
+		ns->nr_running += rq->cfs.h_nr_running;
+		ns->compute_capacity += capacity_of(cpu);
+	}
+
+	ns->weight = cpumask_weight(cpumask_of_node(nid));
+
+	ns->node_type = numa_classify(env->imbalance_pct, ns);
+}
+
 static void task_numa_assign(struct task_numa_env *env,
 			     struct task_struct *p, long imp)
 {
@@ -1556,6 +1594,11 @@ static bool load_too_imbalanced(long src_load, long dst_load,
 	long orig_src_load, orig_dst_load;
 	long src_capacity, dst_capacity;
 
+
+	/* If dst node has spare capacity, there is no real load imbalance */
+	if (env->dst_stats.node_type == node_has_spare)
+		return false;
+
 	/*
 	 * The load is corrected for the CPU capacity available on each node.
 	 *
@@ -1788,10 +1831,10 @@ static int task_numa_migrate(struct task_struct *p)
 	dist = env.dist = node_distance(env.src_nid, env.dst_nid);
 	taskweight = task_weight(p, env.src_nid, dist);
 	groupweight = group_weight(p, env.src_nid, dist);
-	update_numa_stats(&env.src_stats, env.src_nid);
+	update_numa_stats(&env, &env.src_stats, env.src_nid);
 	taskimp = task_weight(p, env.dst_nid, dist) - taskweight;
 	groupimp = group_weight(p, env.dst_nid, dist) - groupweight;
-	update_numa_stats(&env.dst_stats, env.dst_nid);
+	update_numa_stats(&env, &env.dst_stats, env.dst_nid);
 
 	/* Try to find a spot on the preferred nid. */
 	task_numa_find_cpu(&env, taskimp, groupimp);
@@ -1824,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p)
 
 			env.dist = dist;
 			env.dst_nid = nid;
-			update_numa_stats(&env.dst_stats, env.dst_nid);
+			update_numa_stats(&env, &env.dst_stats, env.dst_nid);
 			task_numa_find_cpu(&env, taskimp, groupimp);
 		}
 	}
@@ -3686,11 +3729,6 @@ static void remove_entity_load_avg(struct sched_entity *se)
 	raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
 }
 
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->avg.runnable_load_avg;
-}
-
 static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
 {
 	return cfs_rq->avg.load_avg;

  reply	other threads:[~2020-02-24 15:20 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-24  9:52 [PATCH 00/13] Reconcile NUMA balancing decisions with the load balancer v6 Mel Gorman
2020-02-24  9:52 ` [PATCH 01/13] sched/fair: Allow a per-CPU kthread waking a task to stack on the same CPU, to fix XFS performance regression Mel Gorman
2020-02-24  9:52 ` [PATCH 02/13] sched/numa: Trace when no candidate CPU was found on the preferred node Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Mel Gorman
2020-02-24  9:52 ` [PATCH 03/13] sched/numa: Distinguish between the different task_numa_migrate failure cases Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] sched/numa: Distinguish between the different task_numa_migrate() " tip-bot2 for Mel Gorman
2020-02-24  9:52 ` [PATCH 04/13] sched/fair: Reorder enqueue/dequeue_task_fair path Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Vincent Guittot
2020-02-24  9:52 ` [PATCH 05/13] sched/numa: Replace runnable_load_avg by load_avg Mel Gorman
2020-02-24 15:20   ` tip-bot2 for Vincent Guittot [this message]
2020-02-24  9:52 ` [PATCH 06/13] sched/numa: Use similar logic to the load balancer for moving between domains with spare capacity Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Mel Gorman
2020-02-24  9:52 ` [PATCH 07/13] sched/pelt: Remove unused runnable load average Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Vincent Guittot
2020-02-24  9:52 ` [PATCH 08/13] sched/pelt: Add a new runnable average signal Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Vincent Guittot
2020-02-24 16:01     ` Valentin Schneider
2020-02-24 16:34       ` Mel Gorman
2020-02-25  8:23       ` Vincent Guittot
2020-02-24  9:52 ` [PATCH 09/13] sched/fair: Take into account runnable_avg to classify group Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Vincent Guittot
2020-02-24  9:52 ` [PATCH 10/13] sched/numa: Prefer using an idle cpu as a migration target instead of comparing tasks Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] sched/numa: Prefer using an idle CPU " tip-bot2 for Mel Gorman
2020-02-24  9:52 ` [PATCH 11/13] sched/numa: Find an alternative idle CPU if the CPU is part of an active NUMA balance Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Mel Gorman
2020-02-24  9:52 ` [PATCH 12/13] sched/numa: Bias swapping tasks based on their preferred node Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Mel Gorman
2020-02-24  9:52 ` [PATCH 13/13] sched/numa: Stop an exhastive search if a reasonable swap candidate or idle CPU is found Mel Gorman
2020-02-24 15:20   ` [tip: sched/core] " tip-bot2 for Mel Gorman
2020-02-24 15:16 ` [PATCH 00/13] Reconcile NUMA balancing decisions with the load balancer v6 Ingo Molnar
2020-02-25 11:59   ` Mel Gorman
2020-02-25 13:28     ` Vincent Guittot
2020-02-25 14:24       ` Mel Gorman
2020-02-25 14:53         ` Vincent Guittot
2020-02-27  9:09         ` Ingo Molnar
2020-03-09 19:12 ` Phil Auld
2020-03-09 20:36   ` Mel Gorman
2020-03-12  9:54     ` Mel Gorman
2020-03-12 12:17       ` Jirka Hladky
     [not found]       ` <CAE4VaGA4q4_qfC5qe3zaLRfiJhvMaSb2WADgOcQeTwmPvNat+A@mail.gmail.com>
2020-03-12 15:56         ` Mel Gorman
2020-03-12 17:06           ` Jirka Hladky
     [not found]           ` <CAE4VaGD8DUEi6JnKd8vrqUL_8HZXnNyHMoK2D+1-F5wo+5Z53Q@mail.gmail.com>
2020-03-12 21:47             ` Mel Gorman
2020-03-12 22:24               ` Jirka Hladky
2020-03-20 15:08                 ` Jirka Hladky
     [not found]                 ` <CAE4VaGC09OfU2zXeq2yp_N0zXMbTku5ETz0KEocGi-RSiKXv-w@mail.gmail.com>
2020-03-20 15:22                   ` Mel Gorman
2020-03-20 15:33                     ` Jirka Hladky
     [not found]                     ` <CAE4VaGBGbTT8dqNyLWAwuiqL8E+3p1_SqP6XTTV71wNZMjc9Zg@mail.gmail.com>
2020-03-20 16:38                       ` Mel Gorman
2020-03-20 17:21                         ` Jirka Hladky
2020-05-07 15:24                         ` Jirka Hladky
2020-05-07 15:54                           ` Mel Gorman
2020-05-07 16:29                             ` Jirka Hladky
2020-05-07 17:49                               ` Phil Auld
     [not found]                                 ` <20200508034741.13036-1-hdanton@sina.com>
2020-05-18 14:52                                   ` Jirka Hladky
     [not found]                                     ` <20200519043154.10876-1-hdanton@sina.com>
2020-05-20 13:58                                       ` Jirka Hladky
2020-05-20 16:01                                         ` Jirka Hladky
2020-05-21 11:06                                         ` Mel Gorman
     [not found]                                         ` <20200521140931.15232-1-hdanton@sina.com>
2020-05-21 16:04                                           ` Mel Gorman
     [not found]                                           ` <20200522010950.3336-1-hdanton@sina.com>
2020-05-22 11:05                                             ` Mel Gorman
2020-05-08  9:22                               ` Mel Gorman
2020-05-08 11:05                                 ` Jirka Hladky
     [not found]                                 ` <CAE4VaGC_v6On-YvqdTwAWu3Mq4ofiV0pLov-QpV+QHr_SJr+Rw@mail.gmail.com>
2020-05-13 14:57                                   ` Jirka Hladky
2020-05-13 15:30                                     ` Mel Gorman
2020-05-13 16:20                                       ` Jirka Hladky
2020-05-14  9:50                                         ` Mel Gorman
     [not found]                                           ` <CAE4VaGCGUFOAZ+YHDnmeJ95o4W0j04Yb7EWnf8a43caUQs_WuQ@mail.gmail.com>
2020-05-14 10:08                                             ` Mel Gorman
2020-05-14 10:22                                               ` Jirka Hladky
2020-05-14 11:50                                                 ` Mel Gorman
2020-05-14 13:34                                                   ` Jirka Hladky
2020-05-14 15:31                                       ` Peter Zijlstra
2020-05-15  8:47                                         ` Mel Gorman
2020-05-15 11:17                                           ` Peter Zijlstra
2020-05-15 13:03                                             ` Mel Gorman
2020-05-15 13:12                                               ` Peter Zijlstra
2020-05-15 13:28                                                 ` Peter Zijlstra
2020-05-15 14:24                                             ` Peter Zijlstra
2020-05-21 10:38                                               ` Mel Gorman
2020-05-21 11:41                                                 ` Peter Zijlstra
2020-05-22 13:28                                                   ` Mel Gorman
2020-05-22 14:38                                                     ` Peter Zijlstra
2020-05-15 11:28                                           ` Peter Zijlstra
2020-05-15 12:22                                             ` Mel Gorman
2020-05-15 12:51                                               ` Peter Zijlstra
2020-05-15 14:43                                       ` Jirka Hladky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=158255763273.28353.4606148712630409487.tip-bot2@tip-bot2 \
    --to=tip-bot2@linutronix.de \
    --cc=a.p.zijlstra@chello.nl \
    --cc=dietmar.eggemann@arm.com \
    --cc=hdanton@sina.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@kernel.org \
    --cc=pauld@redhat.com \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).