LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Lauro Ramos Venancio <lvenanci@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: lwang@redhat.com, riel@redhat.com, Mike Galbraith <efault@gmx.de>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>,
	Lauro Ramos Venancio <lvenanci@redhat.com>
Subject: [RFC 3/3] sched/topology: Different sched groups must not have the same balance cpu
Date: Thu, 13 Apr 2017 10:56:09 -0300
Message-ID: <1492091769-19879-4-git-send-email-lvenanci@redhat.com> (raw)
In-Reply-To: <1492091769-19879-1-git-send-email-lvenanci@redhat.com>

Currently, the group balance cpu is the groups's first CPU. But with
overlapping groups, two different groups can have the same first CPU.

This patch uses the group mask to mark all the CPUs that have a
particular group as its main sched group. The group balance cpu is the
first group CPU that is also in the mask.

Signed-off-by: Lauro Ramos Venancio <lvenanci@redhat.com>
---
 kernel/sched/topology.c | 76 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d0302ad..7920bbb 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -477,27 +477,31 @@ enum s_alloc {
 };
 
 /*
- * Build an iteration mask that can exclude certain CPUs from the upwards
- * domain traversal.
+ * An overlap sched group may not be present in all CPUs that compose the
+ * group. So build the mask, marking all the group CPUs where it is present.
  *
  * Asymmetric node setups can result in situations where the domain tree is of
  * unequal depth, make sure to skip domains that already cover the entire
  * range.
- *
- * In that case build_sched_domains() will have terminated the iteration early
- * and our sibling sd spans will be empty. Domains should always include the
- * CPU they're built on, so check that.
  */
 static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
 {
-	const struct cpumask *span = sched_domain_span(sd);
+	const struct cpumask *sg_span = sched_group_cpus(sg);
 	struct sd_data *sdd = sd->private;
 	struct sched_domain *sibling;
 	int i;
 
-	for_each_cpu(i, span) {
+	for_each_cpu(i, sg_span) {
 		sibling = *per_cpu_ptr(sdd->sd, i);
-		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+
+		/*
+		 * Asymmetric node setups: skip domains that are already
+		 * done.
+		 */
+		if (!sibling->groups)
+			continue;
+
+		if (!cpumask_equal(sg_span, sched_group_cpus(sibling->groups)))
 			continue;
 
 		cpumask_set_cpu(i, sched_group_mask(sg));
@@ -513,6 +517,28 @@ int group_balance_cpu(struct sched_group *sg)
 	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
 }
 
+/*
+ * Find the group balance cpu when the group mask is not available yet.
+ */
+static int find_group_balance_cpu(struct sched_domain *sd,
+				  struct sched_group *sg)
+{
+	const struct cpumask *sg_span = sched_group_cpus(sg);
+	struct sd_data *sdd = sd->private;
+	struct sched_domain *sibling;
+	int i;
+
+	for_each_cpu(i, sg_span) {
+		sibling = *per_cpu_ptr(sdd->sd, i);
+		if (cpumask_equal(sg_span, sched_group_cpus(sibling->groups)))
+			return i;
+	}
+
+	WARN(1, "group balance cpu not found.");
+	return 0;
+}
+
+
 static struct sched_group *
 build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
 {
@@ -554,6 +580,19 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
 }
 
+static void init_overlap_sched_groups(struct sched_domain *sd)
+{
+	struct sched_group *sg = sd->groups;
+	int cpu;
+
+	do {
+		cpu = find_group_balance_cpu(sd, sg);
+		init_overlap_sched_group(sd, sg, cpu);
+
+		sg = sg->next;
+	} while (sg != sd->groups);
+}
+
 static int
 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 {
@@ -568,8 +607,6 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 	if (!sg)
 		return -ENOMEM;
 
-	init_overlap_sched_group(sd, sg, cpu);
-
 	sd->groups = sg;
 	last = sg;
 	sg->next = sg;
@@ -584,7 +621,12 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 
 		sibling = *per_cpu_ptr(sdd->sd, i);
 
-		/* See the comment near build_group_mask(). */
+		/*
+		 * In Asymmetric node setups, build_sched_domains() will have
+		 * terminated the iteration early and our sibling sd spans will
+		 * be empty. Domains should always include the CPU they're
+		 * built on, so check that.
+		 */
 		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
 			continue;
 
@@ -595,8 +637,6 @@ static void init_overlap_sched_group(struct sched_domain *sd,
 		sg_span = sched_group_cpus(sg);
 		cpumask_or(covered, covered, sg_span);
 
-		init_overlap_sched_group(sd, sg, i);
-
 		last->next = sg;
 		last = sg;
 		sg->next = sd->groups;
@@ -1449,6 +1489,14 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		}
 	}
 
+	/* Init overlap groups */
+	for_each_cpu(i, cpu_map) {
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			if (sd->flags & SD_OVERLAP)
+				init_overlap_sched_groups(sd);
+		}
+	}
+
 	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
 		if (!cpumask_test_cpu(i, cpu_map))
-- 
1.8.3.1

  parent reply index

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-13 13:56 [RFC 0/3] sched/topology: fix sched groups on NUMA machines with mesh topology Lauro Ramos Venancio
2017-04-13 13:56 ` [RFC 1/3] sched/topology: Refactor function build_overlap_sched_groups() Lauro Ramos Venancio
2017-04-13 14:50   ` Rik van Riel
2017-05-15  9:02   ` [tip:sched/core] " tip-bot for Lauro Ramos Venancio
2017-04-13 13:56 ` [RFC 2/3] sched/topology: fix sched groups on NUMA machines with mesh topology Lauro Ramos Venancio
2017-04-13 15:16   ` Rik van Riel
2017-04-13 15:48   ` Peter Zijlstra
2017-04-13 20:21     ` Lauro Venancio
2017-04-13 21:06       ` Lauro Venancio
2017-04-13 23:38         ` Rik van Riel
2017-04-14 10:48           ` Peter Zijlstra
2017-04-14 11:38   ` Peter Zijlstra
2017-04-14 12:20     ` Peter Zijlstra
2017-05-15  9:03       ` [tip:sched/core] sched/fair, cpumask: Export for_each_cpu_wrap() tip-bot for Peter Zijlstra
2017-05-17 10:53         ` hackbench vs select_idle_sibling; was: " Peter Zijlstra
2017-05-17 12:46           ` Matt Fleming
2017-05-17 14:49           ` Chris Mason
2017-05-19 15:00           ` Matt Fleming
2017-06-05 13:00             ` Matt Fleming
2017-06-06  9:21               ` Peter Zijlstra
2017-06-09 17:52                 ` Chris Mason
2017-06-08  9:22           ` [tip:sched/core] sched/core: Implement new approach to scale select_idle_cpu() tip-bot for Peter Zijlstra
2017-04-14 16:58     ` [RFC 2/3] sched/topology: fix sched groups on NUMA machines with mesh topology Peter Zijlstra
2017-04-17 14:40       ` Lauro Venancio
2017-04-13 13:56 ` Lauro Ramos Venancio [this message]
2017-04-13 15:27   ` [RFC 3/3] sched/topology: Different sched groups must not have the same balance cpu Rik van Riel
2017-04-14 16:49   ` Peter Zijlstra
2017-04-17 15:34     ` Lauro Venancio
2017-04-18 12:32       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1492091769-19879-4-git-send-email-lvenanci@redhat.com \
    --to=lvenanci@redhat.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lwang@redhat.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git