[RFC 3/3] sched/topology: Different sched groups must not have the same balance cpu

From: Lauro Ramos Venancio <lvenanci@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: lwang@redhat.com, riel@redhat.com, Mike Galbraith <efault@gmx.de>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>,
	Lauro Ramos Venancio <lvenanci@redhat.com>
Subject: [RFC 3/3] sched/topology: Different sched groups must not have the same balance cpu
Date: Thu, 13 Apr 2017 10:56:09 -0300	[thread overview]
Message-ID: <1492091769-19879-4-git-send-email-lvenanci@redhat.com> (raw)
In-Reply-To: <1492091769-19879-1-git-send-email-lvenanci@redhat.com>

Currently, the group balance cpu is the groups's first CPU. But with
overlapping groups, two different groups can have the same first CPU.

This patch uses the group mask to mark all the CPUs that have a
particular group as its main sched group. The group balance cpu is the
first group CPU that is also in the mask.

Signed-off-by: Lauro Ramos Venancio <lvenanci@redhat.com>
---
 kernel/sched/topology.c | 76 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d0302ad..7920bbb 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -477,27 +477,31 @@ enum s_alloc {
 };
 
 /*
- * Build an iteration mask that can exclude certain CPUs from the upwards
- * domain traversal.
+ * An overlap sched group may not be present in all CPUs that compose the
+ * group. So build the mask, marking all the group CPUs where it is present.
  *
  * Asymmetric node setups can result in situations where the domain tree is of
  * unequal depth, make sure to skip domains that already cover the entire
  * range.
- *
- * In that case build_sched_domains() will have terminated the iteration early
- * and our sibling sd spans will be empty. Domains should always include the
- * CPU they're built on, so check that.
  */
 static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
 {
-	const struct cpumask *span = sched_domain_span(sd);
+	const struct cpumask *sg_span = sched_group_cpus(sg);
 	struct sd_data *sdd = sd->private;
 	struct sched_domain *sibling;
 	int i;
 
-	for_each_cpu(i, span) {
+	for_each_cpu(i, sg_span) {
 		sibling = *per_cpu_ptr(sdd->sd, i);
-		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+
+		/*
+		 * Asymmetric node setups: skip domains that are already
+		 * done.
+		 */
+		if (!sibling->groups)
+			continue;
+
+		if (!cpumask_equal(sg_span, sched_group_cpus(sibling->groups)))
 			continue;
 
 		cpumask_set_cpu(i, sched_group_mask(sg));
@@ -513,6 +517,28 @@ int group_balance_cpu(struct sched_group *sg)
 	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
 }
 
+/*
+ * Find the group balance cpu when the group mask is not available yet.
+ */
+static int find_group_balance_cpu(struct sched_domain *sd,
+				  struct sched_group *sg)
+{
+	const struct cpumask *sg_span = sched_group_cpus(sg);
+	struct sd_data *sdd = sd->private;
+	struct sched_domain *sibling;
+	int i;
+
+	for_each_cpu(i, sg_span) {
+		sibling = *per_cpu_ptr(sdd->sd, i);
+		if (cpumask_equal(sg_span, sched_group_cpus(sibling->groups)))
+			return i;
+	}
+
+	WARN(1, "group balance cpu not found.");
+	return 0;
+}
+
+
 static struct sched_group *
 build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
 {
@@ -554,6 +580,19 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
 }
 
+static void init_overlap_sched_groups(struct sched_domain *sd)
+{
+	struct sched_group *sg = sd->groups;
+	int cpu;
+
+	do {
+		cpu = find_group_balance_cpu(sd, sg);
+		init_overlap_sched_group(sd, sg, cpu);
+
+		sg = sg->next;
+	} while (sg != sd->groups);
+}
+
 static int
 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 {
@@ -568,8 +607,6 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 	if (!sg)
 		return -ENOMEM;
 
-	init_overlap_sched_group(sd, sg, cpu);
-
 	sd->groups = sg;
 	last = sg;
 	sg->next = sg;
@@ -584,7 +621,12 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 
 		sibling = *per_cpu_ptr(sdd->sd, i);
 
-		/* See the comment near build_group_mask(). */
+		/*
+		 * In Asymmetric node setups, build_sched_domains() will have
+		 * terminated the iteration early and our sibling sd spans will
+		 * be empty. Domains should always include the CPU they're
+		 * built on, so check that.
+		 */
 		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
 			continue;
 
@@ -595,8 +637,6 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 		sg_span = sched_group_cpus(sg);
 		cpumask_or(covered, covered, sg_span);
 
-		init_overlap_sched_group(sd, sg, i);
-
 		last->next = sg;
 		last = sg;
 		sg->next = sd->groups;
@@ -1449,6 +1489,14 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		}
 	}
 
+	/* Init overlap groups */
+	for_each_cpu(i, cpu_map) {
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			if (sd->flags & SD_OVERLAP)
+				init_overlap_sched_groups(sd);
+		}
+	}
+
 	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
 		if (!cpumask_test_cpu(i, cpu_map))
-- 
1.8.3.1