All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings
@ 2017-03-02  0:49 Oliver O'Halloran
  2017-03-02  0:49 ` [PATCH 2/5] powerpc/smp: add set_cpus_related() Oliver O'Halloran
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-02  0:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

To determine which logical CPUs are on the same core the kernel uses the
ibm,chipid property from the device tree node associated with that cpu.
The lookup for this this information is currently open coded in both
traverse_siblings() and traverse_siblings_chip_id(). This patch replaces
these manual lookups with the existing cpu_to_chip_id() function.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/kernel/smp.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 893bd7f79be6..dfe0e1d9cd06 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -613,19 +613,11 @@ EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
 
 static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
 {
-	const struct cpumask *mask;
-	struct device_node *np;
-	int i, plen;
-	const __be32 *prop;
+	const struct cpumask *mask = add ? cpu_online_mask : cpu_present_mask;
+	int i;
 
-	mask = add ? cpu_online_mask : cpu_present_mask;
 	for_each_cpu(i, mask) {
-		np = of_get_cpu_node(i, NULL);
-		if (!np)
-			continue;
-		prop = of_get_property(np, "ibm,chip-id", &plen);
-		if (prop && plen == sizeof(int) &&
-		    of_read_number(prop, 1) == chipid) {
+		if (cpu_to_chip_id(i) == chipid) {
 			if (add) {
 				cpumask_set_cpu(cpu, cpu_core_mask(i));
 				cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -634,7 +626,6 @@ static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
 				cpumask_clear_cpu(i, cpu_core_mask(cpu));
 			}
 		}
-		of_node_put(np);
 	}
 }
 
@@ -664,23 +655,19 @@ static void traverse_core_siblings(int cpu, bool add)
 {
 	struct device_node *l2_cache, *np;
 	const struct cpumask *mask;
-	int i, chip, plen;
-	const __be32 *prop;
+	int chip_id;
+	int i;
 
-	/* First see if we have ibm,chip-id properties in cpu nodes */
-	np = of_get_cpu_node(cpu, NULL);
-	if (np) {
-		chip = -1;
-		prop = of_get_property(np, "ibm,chip-id", &plen);
-		if (prop && plen == sizeof(int))
-			chip = of_read_number(prop, 1);
-		of_node_put(np);
-		if (chip >= 0) {
-			traverse_siblings_chip_id(cpu, add, chip);
-			return;
-		}
+	/* threads that share a chip-id are considered siblings (same die) */
+	chip_id = cpu_to_chip_id(cpu);
+
+	if (chip_id >= 0) {
+		traverse_siblings_chip_id(cpu, add, chip_id);
+		return;
 	}
 
+	/* if the chip-id fails then threads which share L2 cache are */
+
 	l2_cache = cpu_to_l2cache(cpu);
 	mask = add ? cpu_online_mask : cpu_present_mask;
 	for_each_cpu(i, mask) {
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/5] powerpc/smp: add set_cpus_related()
  2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
@ 2017-03-02  0:49 ` Oliver O'Halloran
  2017-03-15 11:18   ` Michael Ellerman
  2017-03-02  0:49 ` [PATCH 3/5] powerpc/smp: Add update_cpu_masks() Oliver O'Halloran
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-02  0:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Add a helper function for updating the per-cpu core and sibling thread
cpumasks. This helper just sets (or clears) the relevant bit in the
cpumasks each CPU. This is open-coded in several places inside the
mask setup code so moving it into a seperate function is a sensible
cleanup.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/kernel/smp.c | 61 ++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index dfe0e1d9cd06..1c531887ca51 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -377,6 +377,25 @@ static void smp_store_cpu_info(int id)
 #endif
 }
 
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks. We
+ * need to ensure that they are kept consistant between CPUs when they are
+ * changed.
+ *
+ * This is slightly tricky since the core mask must be a strict superset of
+ * the sibling mask.
+ */
+static void set_cpus_related(int i, int j, bool related, struct cpumask *(*relation_fn)(int))
+{
+	if (related) {
+		cpumask_set_cpu(i, relation_fn(j));
+		cpumask_set_cpu(j, relation_fn(i));
+	} else {
+		cpumask_clear_cpu(i, relation_fn(j));
+		cpumask_clear_cpu(j, relation_fn(i));
+	}
+}
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int cpu;
@@ -616,17 +635,9 @@ static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
 	const struct cpumask *mask = add ? cpu_online_mask : cpu_present_mask;
 	int i;
 
-	for_each_cpu(i, mask) {
-		if (cpu_to_chip_id(i) == chipid) {
-			if (add) {
-				cpumask_set_cpu(cpu, cpu_core_mask(i));
-				cpumask_set_cpu(i, cpu_core_mask(cpu));
-			} else {
-				cpumask_clear_cpu(cpu, cpu_core_mask(i));
-				cpumask_clear_cpu(i, cpu_core_mask(cpu));
-			}
-		}
-	}
+	for_each_cpu(i, mask)
+		if (cpu_to_chip_id(i) == chipid)
+			set_cpus_related(cpu, i, add, cpu_core_mask);
 }
 
 /* Must be called when no change can occur to cpu_present_mask,
@@ -666,23 +677,17 @@ static void traverse_core_siblings(int cpu, bool add)
 		return;
 	}
 
-	/* if the chip-id fails then threads which share L2 cache are */
-
+	/* if the chip-id fails then group siblings by the L2 cache */
 	l2_cache = cpu_to_l2cache(cpu);
 	mask = add ? cpu_online_mask : cpu_present_mask;
 	for_each_cpu(i, mask) {
 		np = cpu_to_l2cache(i);
 		if (!np)
 			continue;
-		if (np == l2_cache) {
-			if (add) {
-				cpumask_set_cpu(cpu, cpu_core_mask(i));
-				cpumask_set_cpu(i, cpu_core_mask(cpu));
-			} else {
-				cpumask_clear_cpu(cpu, cpu_core_mask(i));
-				cpumask_clear_cpu(i, cpu_core_mask(cpu));
-			}
-		}
+
+		if (np == l2_cache)
+			set_cpus_related(cpu, i, add, cpu_core_mask);
+
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
@@ -720,15 +725,13 @@ void start_secondary(void *unused)
 	for (i = 0; i < threads_per_core; i++) {
 		if (cpu_is_offline(base + i) && (cpu != base + i))
 			continue;
-		cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
-		cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+		set_cpus_related(cpu, base + i, true, cpu_sibling_mask);
 
 		/* cpu_core_map should be a superset of
 		 * cpu_sibling_map even if we don't have cache
 		 * information, so update the former here, too.
 		 */
-		cpumask_set_cpu(cpu, cpu_core_mask(base + i));
-		cpumask_set_cpu(base + i, cpu_core_mask(cpu));
+		set_cpus_related(cpu, base + i, true, cpu_core_mask);
 	}
 	traverse_core_siblings(cpu, true);
 
@@ -818,10 +821,8 @@ int __cpu_disable(void)
 	/* Update sibling maps */
 	base = cpu_first_thread_sibling(cpu);
 	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
-		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
-		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
-		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
-		cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
+		set_cpus_related(cpu, base + i, false, cpu_sibling_mask);
+		set_cpus_related(cpu, base + i, false, cpu_core_mask);
 	}
 	traverse_core_siblings(cpu, false);
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/5] powerpc/smp: Add update_cpu_masks()
  2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
  2017-03-02  0:49 ` [PATCH 2/5] powerpc/smp: add set_cpus_related() Oliver O'Halloran
@ 2017-03-02  0:49 ` Oliver O'Halloran
  2017-03-15 11:18   ` Michael Ellerman
  2017-03-02  0:49 ` [PATCH 4/5] powerpc/smp: add cpu_cache_mask Oliver O'Halloran
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-02  0:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

When adding and removing a CPU from the system the per-cpu masks that
are used by the scheduler to construct scheduler domains need to be updated
to account for the cpu entering or exiting the system. Currently logic this
is open-coded for the thread sibling mask and shared for the core mask.
This patch moves all the logic for rebuilding these masks into a single
function and simplifies the logic which determines which CPUs are within
a "core".

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/kernel/smp.c | 90 ++++++++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1c531887ca51..3922cace927e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -630,14 +630,20 @@ int cpu_first_thread_of_core(int core)
 }
 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
 
-static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
+static bool update_core_mask_by_chip_id(int cpu, bool add)
 {
 	const struct cpumask *mask = add ? cpu_online_mask : cpu_present_mask;
+	int chipid = cpu_to_chip_id(cpu);
 	int i;
 
+	if (chipid == -1)
+		return false;
+
 	for_each_cpu(i, mask)
 		if (cpu_to_chip_id(i) == chipid)
 			set_cpus_related(cpu, i, add, cpu_core_mask);
+
+	return true;
 }
 
 /* Must be called when no change can occur to cpu_present_mask,
@@ -662,42 +668,72 @@ static struct device_node *cpu_to_l2cache(int cpu)
 	return cache;
 }
 
-static void traverse_core_siblings(int cpu, bool add)
+static bool update_core_mask_by_l2(int cpu, bool onlining)
 {
+	const struct cpumask *mask = onlining ? cpu_online_mask : cpu_present_mask;
 	struct device_node *l2_cache, *np;
-	const struct cpumask *mask;
-	int chip_id;
 	int i;
 
-	/* threads that share a chip-id are considered siblings (same die) */
-	chip_id = cpu_to_chip_id(cpu);
-
-	if (chip_id >= 0) {
-		traverse_siblings_chip_id(cpu, add, chip_id);
-		return;
-	}
-
-	/* if the chip-id fails then group siblings by the L2 cache */
 	l2_cache = cpu_to_l2cache(cpu);
-	mask = add ? cpu_online_mask : cpu_present_mask;
+	if (l2_cache == NULL)
+		return false;
+
 	for_each_cpu(i, mask) {
 		np = cpu_to_l2cache(i);
 		if (!np)
 			continue;
 
 		if (np == l2_cache)
-			set_cpus_related(cpu, i, add, cpu_core_mask);
+			set_cpus_related(cpu, i, onlining, cpu_core_mask);
 
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
+
+	return true;
+}
+
+static void update_thread_mask(int cpu, bool onlining)
+{
+	int base = cpu_first_thread_sibling(cpu);
+	int i;
+
+	pr_info("CPUDEBUG: onlining cpu %d, base %d, thread_per_core %d",
+		cpu, base, threads_per_core);
+
+	for (i = 0; i < threads_per_core; i++) {
+		/* Threads are onlined one by one. By the final time this
+		 * function is called for the core the sibling mask for each
+		 * thread will be complete, but we need to ensure that offline
+		 * threads aren't touched before they run start_secondary() */
+		if (onlining && cpu_is_offline(base + i) && (cpu != base + i))
+			continue;
+
+		set_cpus_related(cpu, base + i, onlining, cpu_sibling_mask);
+	}
+}
+
+static void update_cpu_masks(int cpu, bool onlining)
+{
+	int i;
+
+	update_thread_mask(cpu, onlining);
+
+	if (update_core_mask_by_chip_id(cpu, onlining))
+		return;
+
+	if (update_core_mask_by_l2(cpu, onlining))
+		return;
+
+	/* if all else fails duplicate the sibling mask */
+	for_each_cpu(i, cpu_sibling_mask(cpu))
+		set_cpus_related(cpu, i, onlining, cpu_core_mask);
 }
 
 /* Activate a secondary processor. */
 void start_secondary(void *unused)
 {
 	unsigned int cpu = smp_processor_id();
-	int i, base;
 
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
@@ -721,19 +757,7 @@ void start_secondary(void *unused)
 	vdso_getcpu_init();
 #endif
 	/* Update sibling maps */
-	base = cpu_first_thread_sibling(cpu);
-	for (i = 0; i < threads_per_core; i++) {
-		if (cpu_is_offline(base + i) && (cpu != base + i))
-			continue;
-		set_cpus_related(cpu, base + i, true, cpu_sibling_mask);
-
-		/* cpu_core_map should be a superset of
-		 * cpu_sibling_map even if we don't have cache
-		 * information, so update the former here, too.
-		 */
-		set_cpus_related(cpu, base + i, true, cpu_core_mask);
-	}
-	traverse_core_siblings(cpu, true);
+	update_cpu_masks(cpu, true);
 
 	set_numa_node(numa_cpu_lookup_table[cpu]);
 	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -808,7 +832,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
 int __cpu_disable(void)
 {
 	int cpu = smp_processor_id();
-	int base, i;
 	int err;
 
 	if (!smp_ops->cpu_disable)
@@ -819,12 +842,7 @@ int __cpu_disable(void)
 		return err;
 
 	/* Update sibling maps */
-	base = cpu_first_thread_sibling(cpu);
-	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
-		set_cpus_related(cpu, base + i, false, cpu_sibling_mask);
-		set_cpus_related(cpu, base + i, false, cpu_core_mask);
-	}
-	traverse_core_siblings(cpu, false);
+	update_cpu_masks(cpu, false);
 
 	return 0;
 }
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/5] powerpc/smp: add cpu_cache_mask
  2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
  2017-03-02  0:49 ` [PATCH 2/5] powerpc/smp: add set_cpus_related() Oliver O'Halloran
  2017-03-02  0:49 ` [PATCH 3/5] powerpc/smp: Add update_cpu_masks() Oliver O'Halloran
@ 2017-03-02  0:49 ` Oliver O'Halloran
  2017-03-15 11:26   ` Michael Ellerman
  2017-03-02  0:49 ` [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology Oliver O'Halloran
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-02  0:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

Traditionally we have only ever tracked which CPUs are in the same core
(cpu_sibling_mask) and on the same die (cpu_core_mask). For Power9 we
need to be aware of which CPUs share cache with each other so this patch
adds cpu_cache_mask and the underlying cpu_cache_map variable to track
this.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/smp.h | 6 ++++++
 arch/powerpc/kernel/smp.c      | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 32db16d2e7ad..a7fc3a105d61 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -94,6 +94,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
 #endif
 
 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_cache_map);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
 
 static inline struct cpumask *cpu_sibling_mask(int cpu)
@@ -106,6 +107,11 @@ static inline struct cpumask *cpu_core_mask(int cpu)
 	return per_cpu(cpu_core_map, cpu);
 }
 
+static inline struct cpumask *cpu_cache_mask(int cpu)
+{
+	return per_cpu(cpu_cache_map, cpu);
+}
+
 extern int cpu_to_core_id(int cpu);
 
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3922cace927e..5571f30ff72d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -72,9 +72,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 struct thread_info *secondary_ti;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_cache_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
 
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_cache_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /* SMP operations for this machine */
@@ -415,6 +417,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for_each_possible_cpu(cpu) {
 		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
 					GFP_KERNEL, cpu_to_node(cpu));
+		zalloc_cpumask_var_node(&per_cpu(cpu_cache_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
 		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
 					GFP_KERNEL, cpu_to_node(cpu));
 		/*
@@ -428,6 +432,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	}
 
 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+	cpumask_set_cpu(boot_cpuid, cpu_cache_mask(boot_cpuid));
 	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
 	if (smp_ops && smp_ops->probe)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology
  2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
                   ` (2 preceding siblings ...)
  2017-03-02  0:49 ` [PATCH 4/5] powerpc/smp: add cpu_cache_mask Oliver O'Halloran
@ 2017-03-02  0:49 ` Oliver O'Halloran
  2017-03-02 10:25   ` Balbir Singh
  2017-03-15 11:30   ` Michael Ellerman
  2017-03-02  3:44 ` [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Balbir Singh
  2017-03-15 11:18 ` Michael Ellerman
  5 siblings, 2 replies; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-02  0:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran

In previous generations of Power processors each core had a private L2
cache. The Power9 processor has a slightly different architecture where
the L2 cache is shared among pairs of cores rather than being completely
private.

Making the scheduler aware of this cache sharing allows the scheduler to
make more intelligent migration decisions. When one core in the pair is
overloaded tasks can be migrated to its paired core to improve throughput
without cache-refilling penality typically associated with task
migration.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/kernel/smp.c | 44 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5571f30ff72d..5e1811b24415 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -673,7 +673,7 @@ static struct device_node *cpu_to_l2cache(int cpu)
 	return cache;
 }
 
-static bool update_core_mask_by_l2(int cpu, bool onlining)
+static bool update_mask_by_l2(int cpu, bool onlining, struct cpumask *(*mask_fn)(int))
 {
 	const struct cpumask *mask = onlining ? cpu_online_mask : cpu_present_mask;
 	struct device_node *l2_cache, *np;
@@ -689,7 +689,7 @@ static bool update_core_mask_by_l2(int cpu, bool onlining)
 			continue;
 
 		if (np == l2_cache)
-			set_cpus_related(cpu, i, onlining, cpu_core_mask);
+			set_cpus_related(cpu, i, onlining, mask_fn);
 
 		of_node_put(np);
 	}
@@ -724,10 +724,17 @@ static void update_cpu_masks(int cpu, bool onlining)
 
 	update_thread_mask(cpu, onlining);
 
+	/* we need the l2 cache mask for the power9 scheduler topology */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		update_mask_by_l2(cpu, onlining, cpu_cache_mask);
+
+	/* now build the core mask */
+	set_cpus_related(cpu, cpu, onlining, cpu_core_mask);
+
 	if (update_core_mask_by_chip_id(cpu, onlining))
 		return;
 
-	if (update_core_mask_by_l2(cpu, onlining))
+	if (update_mask_by_l2(cpu, onlining, cpu_core_mask))
 		return;
 
 	/* if all else fails duplicate the sibling mask */
@@ -805,6 +812,32 @@ static struct sched_domain_topology_level powerpc_topology[] = {
 	{ NULL, },
 };
 
+
+/* P9 has a slightly odd architecture where two, four thread cores share an L2
+ * cache. For highly threaded workloads it makes sense to try and keep tasks
+ * inside the pair for better cache utilisation so the scheduler needs to be
+ * aware of this. */
+static int powerpc_shared_cache_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES | SD_PREFER_SIBLING;
+}
+
+/* this is kind of gross, but passing cpu_cache_mask directly
+ * causes the build to fail due to incompatible pointer types */
+static inline const struct cpumask *cpu_cache_mask_c(int cpu)
+{
+	return cpu_cache_mask(cpu);
+}
+
+static struct sched_domain_topology_level power9_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+	{ cpu_cache_mask_c, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	cpumask_var_t old_mask;
@@ -829,7 +862,10 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	dump_numa_cpu_topology();
 
-	set_sched_topology(powerpc_topology);
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		set_sched_topology(power9_topology);
+	else
+		set_sched_topology(powerpc_topology);
 
 }
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings
  2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
                   ` (3 preceding siblings ...)
  2017-03-02  0:49 ` [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology Oliver O'Halloran
@ 2017-03-02  3:44 ` Balbir Singh
  2017-03-15 11:18 ` Michael Ellerman
  5 siblings, 0 replies; 20+ messages in thread
From: Balbir Singh @ 2017-03-02  3:44 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev

On Thu, Mar 02, 2017 at 11:49:16AM +1100, Oliver O'Halloran wrote:
> To determine which logical CPUs are on the same core the kernel uses the
> ibm,chipid property from the device tree node associated with that cpu.
> The lookup for this this information is currently open coded in both
> traverse_siblings() and traverse_siblings_chip_id(). This patch replaces
> these manual lookups with the existing cpu_to_chip_id() function.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---

Looks like a nice cleanup

Balbir

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology
  2017-03-02  0:49 ` [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology Oliver O'Halloran
@ 2017-03-02 10:25   ` Balbir Singh
  2017-03-15 11:33     ` Michael Ellerman
  2017-03-15 11:30   ` Michael Ellerman
  1 sibling, 1 reply; 20+ messages in thread
From: Balbir Singh @ 2017-03-02 10:25 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev



On 02/03/17 11:49, Oliver O'Halloran wrote:
> In previous generations of Power processors each core had a private L2
> cache. The Power9 processor has a slightly different architecture where
> the L2 cache is shared among pairs of cores rather than being completely
> private.
> 
> Making the scheduler aware of this cache sharing allows the scheduler to
> make more intelligent migration decisions. When one core in the pair is
> overloaded tasks can be migrated to its paired core to improve throughput
> without cache-refilling penality typically associated with task
> migration.

Could you please describe the changes to sched_domains w.r.t before and
after for P9.

I think most of the changes make sense, but some data to back them up
including any results you have to show that these patches help would be
nice

Balbir Singh.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings
  2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
                   ` (4 preceding siblings ...)
  2017-03-02  3:44 ` [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Balbir Singh
@ 2017-03-15 11:18 ` Michael Ellerman
  2017-03-23  1:09   ` Oliver O'Halloran
  5 siblings, 1 reply; 20+ messages in thread
From: Michael Ellerman @ 2017-03-15 11:18 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Oliver O'Halloran

Oliver O'Halloran <oohall@gmail.com> writes:

> To determine which logical CPUs are on the same core the kernel uses the
> ibm,chipid property from the device tree node associated with that cpu.
> The lookup for this this information is currently open coded in both
> traverse_siblings() and traverse_siblings_chip_id(). This patch replaces
> these manual lookups with the existing cpu_to_chip_id() function.

Some minor nits.

cpu_to_chip_id() actually searches recursively up the parents until it
finds a ibm,chip-id, so it's not a 1:1 replacement for the existing
logic, but it's probably still an OK conversion. It's still worth
mentioning in the change log thought.

> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 893bd7f79be6..dfe0e1d9cd06 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -664,23 +655,19 @@ static void traverse_core_siblings(int cpu, bool add)
>  {
>  	struct device_node *l2_cache, *np;
>  	const struct cpumask *mask;
> -	int i, chip, plen;
> -	const __be32 *prop;
> +	int chip_id;
> +	int i;
>  
> -	/* First see if we have ibm,chip-id properties in cpu nodes */
> -	np = of_get_cpu_node(cpu, NULL);
> -	if (np) {
> -		chip = -1;
> -		prop = of_get_property(np, "ibm,chip-id", &plen);
> -		if (prop && plen == sizeof(int))
> -			chip = of_read_number(prop, 1);
> -		of_node_put(np);
> -		if (chip >= 0) {
> -			traverse_siblings_chip_id(cpu, add, chip);
> -			return;
> -		}
> +	/* threads that share a chip-id are considered siblings (same die) */

You might know it means the "same die", but AFAIK there's no actual
definition for what the chip-id means, so let's not write comments that
might be wrong in future. Just saying they're considered siblings is
sufficient.

Also "Threads" :)

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] powerpc/smp: add set_cpus_related()
  2017-03-02  0:49 ` [PATCH 2/5] powerpc/smp: add set_cpus_related() Oliver O'Halloran
@ 2017-03-15 11:18   ` Michael Ellerman
  2017-03-23  1:27     ` Oliver O'Halloran
  0 siblings, 1 reply; 20+ messages in thread
From: Michael Ellerman @ 2017-03-15 11:18 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Oliver O'Halloran

Oliver O'Halloran <oohall@gmail.com> writes:
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index dfe0e1d9cd06..1c531887ca51 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -377,6 +377,25 @@ static void smp_store_cpu_info(int id)
>  #endif
>  }
>  
> +/*
> + * Relationships between CPUs are maintained in a set of per-cpu cpumasks. We
> + * need to ensure that they are kept consistant between CPUs when they are
> + * changed.
> + *
> + * This is slightly tricky since the core mask must be a strict superset of
> + * the sibling mask.
> + */
> +static void set_cpus_related(int i, int j, bool related, struct cpumask *(*relation_fn)(int))
> +{
> +	if (related) {
> +		cpumask_set_cpu(i, relation_fn(j));
> +		cpumask_set_cpu(j, relation_fn(i));
> +	} else {
> +		cpumask_clear_cpu(i, relation_fn(j));
> +		cpumask_clear_cpu(j, relation_fn(i));
> +	}
> +}

I think you pushed the abstraction one notch too far on this one, or
perhaps not far enough.

We end up with a function called "set" that might clear, depending on a
bool you pass. Which is hard to parse, eg:

	set_cpus_related(cpu, base + i, false, cpu_sibling_mask);

And I know there's two places where we pass an existing bool "add", but
there's four where we pass true or false.

If we want to push it in that direction I think we should just pass the
set/clear routine instead of the flag, so:

	do_cpus_related(cpu, base + i, cpumask_clear_cpu, cpu_sibling_mask);

But that might be overdoing it.

So I think we should just do:

static void set_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
{
        cpumask_set_cpu(i, mask_func(j));
        cpumask_set_cpu(j, mask_func(i));
}

static void clear_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
{
        cpumask_clear_cpu(i, mask_func(j));
        cpumask_clear_cpu(j, mask_func(i));
}


So the cases with add become:

	if (add)
		set_cpus_related(cpu, i, cpu_core_mask(i));
	else
		clear_cpus_related(cpu, i, cpu_core_mask(i));

Which is not as pretty but more explicit.

And the other cases look much better, eg:

	clear_cpus_related(cpu, base + i, cpu_sibling_mask);

??

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] powerpc/smp: Add update_cpu_masks()
  2017-03-02  0:49 ` [PATCH 3/5] powerpc/smp: Add update_cpu_masks() Oliver O'Halloran
@ 2017-03-15 11:18   ` Michael Ellerman
  0 siblings, 0 replies; 20+ messages in thread
From: Michael Ellerman @ 2017-03-15 11:18 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Oliver O'Halloran

Oliver O'Halloran <oohall@gmail.com> writes:

> When adding and removing a CPU from the system the per-cpu masks that
> are used by the scheduler to construct scheduler domains need to be updated
> to account for the cpu entering or exiting the system. Currently logic this
> is open-coded for the thread sibling mask and shared for the core mask.

"logic this is"

> This patch moves all the logic for rebuilding these masks into a single
> function and simplifies the logic which determines which CPUs are within
> a "core".

The diff is hard to read but I think it's OK. Other than ...

> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 1c531887ca51..3922cace927e 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -630,14 +630,20 @@ int cpu_first_thread_of_core(int core)
>  }
>  EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
>  
> -static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
> +static bool update_core_mask_by_chip_id(int cpu, bool add)
                                                         ^
                                                         call it
                                                         onlining like below

>  {
>  	const struct cpumask *mask = add ? cpu_online_mask : cpu_present_mask;
> +	int chipid = cpu_to_chip_id(cpu);
>  	int i;
>  
> +	if (chipid == -1)
> +		return false;
> +
>  	for_each_cpu(i, mask)
>  		if (cpu_to_chip_id(i) == chipid)
>  			set_cpus_related(cpu, i, add, cpu_core_mask);
> +
> +	return true;
>  }
>  
>  /* Must be called when no change can occur to cpu_present_mask,
> @@ -662,42 +668,72 @@ static struct device_node *cpu_to_l2cache(int cpu)
>  	return cache;
>  }
>  
> -static void traverse_core_siblings(int cpu, bool add)
> +static bool update_core_mask_by_l2(int cpu, bool onlining)
>  {
> +	const struct cpumask *mask = onlining ? cpu_online_mask : cpu_present_mask;
>  	struct device_node *l2_cache, *np;
> -	const struct cpumask *mask;
> -	int chip_id;
>  	int i;
>  
> -	/* threads that share a chip-id are considered siblings (same die) */
> -	chip_id = cpu_to_chip_id(cpu);
> -
> -	if (chip_id >= 0) {
> -		traverse_siblings_chip_id(cpu, add, chip_id);
> -		return;
> -	}
> -
> -	/* if the chip-id fails then group siblings by the L2 cache */
>  	l2_cache = cpu_to_l2cache(cpu);
> -	mask = add ? cpu_online_mask : cpu_present_mask;
> +	if (l2_cache == NULL)
> +		return false;
> +
>  	for_each_cpu(i, mask) {
>  		np = cpu_to_l2cache(i);
>  		if (!np)
>  			continue;
>  
>  		if (np == l2_cache)
> -			set_cpus_related(cpu, i, add, cpu_core_mask);
> +			set_cpus_related(cpu, i, onlining, cpu_core_mask);
>  
>  		of_node_put(np);
>  	}
>  	of_node_put(l2_cache);
> +
> +	return true;
> +}
> +
> +static void update_thread_mask(int cpu, bool onlining)
> +{
> +	int base = cpu_first_thread_sibling(cpu);
> +	int i;
> +
> +	pr_info("CPUDEBUG: onlining cpu %d, base %d, thread_per_core %d",
> +		cpu, base, threads_per_core);

That's too spammy for cpu hotplug. Make it pr_debug() at most.

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] powerpc/smp: add cpu_cache_mask
  2017-03-02  0:49 ` [PATCH 4/5] powerpc/smp: add cpu_cache_mask Oliver O'Halloran
@ 2017-03-15 11:26   ` Michael Ellerman
  2017-03-23  3:33     ` Oliver O'Halloran
  0 siblings, 1 reply; 20+ messages in thread
From: Michael Ellerman @ 2017-03-15 11:26 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Oliver O'Halloran

Oliver O'Halloran <oohall@gmail.com> writes:

> Traditionally we have only ever tracked which CPUs are in the same core
> (cpu_sibling_mask) and on the same die (cpu_core_mask). For Power9 we
> need to be aware of which CPUs share cache with each other so this patch
> adds cpu_cache_mask and the underlying cpu_cache_map variable to track
> this.

But which cache?

Some CPUs on Power8 share L3, or L4.

I think just call it cpu_l2cache_map to make it explicit.

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology
  2017-03-02  0:49 ` [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology Oliver O'Halloran
  2017-03-02 10:25   ` Balbir Singh
@ 2017-03-15 11:30   ` Michael Ellerman
  1 sibling, 0 replies; 20+ messages in thread
From: Michael Ellerman @ 2017-03-15 11:30 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Oliver O'Halloran

Oliver O'Halloran <oohall@gmail.com> writes:

> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 5571f30ff72d..5e1811b24415 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -724,10 +724,17 @@ static void update_cpu_masks(int cpu, bool onlining)
>  
>  	update_thread_mask(cpu, onlining);
>  
> +	/* we need the l2 cache mask for the power9 scheduler topology */
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		update_mask_by_l2(cpu, onlining, cpu_cache_mask);

I guess I missed something somewhere, why do we need to check the CPU
feature?

...
> @@ -829,7 +862,10 @@ void __init smp_cpus_done(unsigned int max_cpus)
>  
>  	dump_numa_cpu_topology();
>  
> -	set_sched_topology(powerpc_topology);
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		set_sched_topology(power9_topology);
> +	else
> +		set_sched_topology(powerpc_topology);
  
Ideally this would just all come from the device tree. If we detect that
the L2 is shared then we tell the scheduler that, we shouldn't need to
know that P9 is special. It certainly doesn't say anywhere in ISA 3.00
that the L2 is shared.

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology
  2017-03-02 10:25   ` Balbir Singh
@ 2017-03-15 11:33     ` Michael Ellerman
  0 siblings, 0 replies; 20+ messages in thread
From: Michael Ellerman @ 2017-03-15 11:33 UTC (permalink / raw)
  To: Balbir Singh, Oliver O'Halloran, linuxppc-dev

Balbir Singh <bsingharora@gmail.com> writes:

> On 02/03/17 11:49, Oliver O'Halloran wrote:
>> In previous generations of Power processors each core had a private L2
>> cache. The Power9 processor has a slightly different architecture where
>> the L2 cache is shared among pairs of cores rather than being completely
>> private.
>> 
>> Making the scheduler aware of this cache sharing allows the scheduler to
>> make more intelligent migration decisions. When one core in the pair is
>> overloaded tasks can be migrated to its paired core to improve throughput
>> without cache-refilling penality typically associated with task
>> migration.
>
> Could you please describe the changes to sched_domains w.r.t before and
> after for P9.

Yes please.

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings
  2017-03-15 11:18 ` Michael Ellerman
@ 2017-03-23  1:09   ` Oliver O'Halloran
  2017-03-28  3:03     ` Michael Ellerman
  0 siblings, 1 reply; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-23  1:09 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> Oliver O'Halloran <oohall@gmail.com> writes:
>
>> To determine which logical CPUs are on the same core the kernel uses the
>> ibm,chipid property from the device tree node associated with that cpu.
>> The lookup for this this information is currently open coded in both
>> traverse_siblings() and traverse_siblings_chip_id(). This patch replaces
>> these manual lookups with the existing cpu_to_chip_id() function.
>
> Some minor nits.
>
> cpu_to_chip_id() actually searches recursively up the parents until it
> finds a ibm,chip-id, so it's not a 1:1 replacement for the existing
> logic, but it's probably still an OK conversion. It's still worth
> mentioning in the change log thought.

fair enough

>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>> index 893bd7f79be6..dfe0e1d9cd06 100644
>> --- a/arch/powerpc/kernel/smp.c
>> +++ b/arch/powerpc/kernel/smp.c
>> @@ -664,23 +655,19 @@ static void traverse_core_siblings(int cpu, bool add)
>>  {
>>       struct device_node *l2_cache, *np;
>>       const struct cpumask *mask;
>> -     int i, chip, plen;
>> -     const __be32 *prop;
>> +     int chip_id;
>> +     int i;
>>
>> -     /* First see if we have ibm,chip-id properties in cpu nodes */
>> -     np = of_get_cpu_node(cpu, NULL);
>> -     if (np) {
>> -             chip = -1;
>> -             prop = of_get_property(np, "ibm,chip-id", &plen);
>> -             if (prop && plen == sizeof(int))
>> -                     chip = of_read_number(prop, 1);
>> -             of_node_put(np);
>> -             if (chip >= 0) {
>> -                     traverse_siblings_chip_id(cpu, add, chip);
>> -                     return;
>> -             }
>> +     /* threads that share a chip-id are considered siblings (same die) */
>
> You might know it means the "same die", but AFAIK there's no actual
> definition for what the chip-id means, so let's not write comments that
> might be wrong in future. Just saying they're considered siblings is
> sufficient.
>
> Also "Threads" :)

The cpus masks are all built in terms of threads, so this is
technically correct even if it sounds stupid. Maybe "logical cpus"
would be better?

>
> cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] powerpc/smp: add set_cpus_related()
  2017-03-15 11:18   ` Michael Ellerman
@ 2017-03-23  1:27     ` Oliver O'Halloran
  2017-03-28  1:15       ` Michael Ellerman
  0 siblings, 1 reply; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-23  1:27 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> Oliver O'Halloran <oohall@gmail.com> writes:
>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>> index dfe0e1d9cd06..1c531887ca51 100644
>> --- a/arch/powerpc/kernel/smp.c
>> +++ b/arch/powerpc/kernel/smp.c
>> @@ -377,6 +377,25 @@ static void smp_store_cpu_info(int id)
>>  #endif
>>  }
>>
>> +/*
>> + * Relationships between CPUs are maintained in a set of per-cpu cpumasks. We
>> + * need to ensure that they are kept consistant between CPUs when they are
>> + * changed.
>> + *
>> + * This is slightly tricky since the core mask must be a strict superset of
>> + * the sibling mask.
>> + */
>> +static void set_cpus_related(int i, int j, bool related, struct cpumask *(*relation_fn)(int))
>> +{
>> +     if (related) {
>> +             cpumask_set_cpu(i, relation_fn(j));
>> +             cpumask_set_cpu(j, relation_fn(i));
>> +     } else {
>> +             cpumask_clear_cpu(i, relation_fn(j));
>> +             cpumask_clear_cpu(j, relation_fn(i));
>> +     }
>> +}
>
> I think you pushed the abstraction one notch too far on this one, or
> perhaps not far enough.
>
> We end up with a function called "set" that might clear, depending on a
> bool you pass. Which is hard to parse, eg:
>
>         set_cpus_related(cpu, base + i, false, cpu_sibling_mask);
>
> And I know there's two places where we pass an existing bool "add", but
> there's four where we pass true or false.

I think you're looking at this patch. With the full series applied we
never pass a literal to set_cpus_related() directly:

[12:14 oliver ~/.../powerpc/kernel (p9-sched $%)]$ gg set_cpus_related
smp.c:391:static void set_cpus_related(int i, int j, bool related,
struct cpumask *(*relation_fn)(int))
smp.c:647:      set_cpus_related(cpu, cpu, add, cpu_core_mask);
smp.c:651:                      set_cpus_related(cpu, i, add, cpu_core_mask);
smp.c:685:      set_cpus_related(cpu, cpu, onlining, mask_fn);
smp.c:697:                      set_cpus_related(cpu, i, onlining, mask_fn);
smp.c:721:              set_cpus_related(cpu, base + i, onlining,
cpu_sibling_mask);
smp.c:736:      set_cpus_related(cpu, cpu, onlining, cpu_core_mask);
smp.c:746:              set_cpus_related(cpu, i, onlining, cpu_core_mask);

I agree that set_cpus_related() is probably a bad name,
make_cpus_related() maybe?

>
> If we want to push it in that direction I think we should just pass the
> set/clear routine instead of the flag, so:
>
>         do_cpus_related(cpu, base + i, cpumask_clear_cpu, cpu_sibling_mask);
>
> But that might be overdoing it.

I think this would be ok.

>
> So I think we should just do:
>
> static void set_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
> {
>         cpumask_set_cpu(i, mask_func(j));
>         cpumask_set_cpu(j, mask_func(i));
> }
>
> static void clear_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
> {
>         cpumask_clear_cpu(i, mask_func(j));
>         cpumask_clear_cpu(j, mask_func(i));
> }
>
>
> So the cases with add become:
>
>         if (add)
>                 set_cpus_related(cpu, i, cpu_core_mask(i));
>         else
>                 clear_cpus_related(cpu, i, cpu_core_mask(i));

Dunno, I was trying to get rid of this sort of thing since the logic
is duplicated in a lot of places. Seemed to me that it was just
pointlessly verbose rather than being helpfully explicit.

>
> Which is not as pretty but more explicit.
>
> And the other cases look much better, eg:
>
>         clear_cpus_related(cpu, base + i, cpu_sibling_mask);
>
> ??
>
> cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] powerpc/smp: add cpu_cache_mask
  2017-03-15 11:26   ` Michael Ellerman
@ 2017-03-23  3:33     ` Oliver O'Halloran
  2017-03-28  1:05       ` Michael Ellerman
  0 siblings, 1 reply; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-23  3:33 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

On Wed, Mar 15, 2017 at 10:26 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> Oliver O'Halloran <oohall@gmail.com> writes:
>
>> Traditionally we have only ever tracked which CPUs are in the same core
>> (cpu_sibling_mask) and on the same die (cpu_core_mask). For Power9 we
>> need to be aware of which CPUs share cache with each other so this patch
>> adds cpu_cache_mask and the underlying cpu_cache_map variable to track
>> this.
>
> But which cache?

I'm not sure it matters. All the scheduler really wants to know is
that that migrating between cpus with a shared cache is cheaper than
migrating elsewhere.

> Some CPUs on Power8 share L3, or L4.

Eh... it's not really the same. The "L4" is part of the memory buffers
and it's function is conceptually different to the processor caches.
The L3 on P8 is only shared when the core that owns is offline (or
sleeping) so the scheduler doesn't really need to be aware of it. Even
if the scheduler was aware I don't think it can take advantage of it
without some terrible hacks.

>
> I think just call it cpu_l2cache_map to make it explicit.

I was being deliberately vague. I know it's only a shared currently,
but it's possible we might have a (real) shared L3 in the future. The
latest high-end x86 chips have some of l3 sharing across the entire
chip so you never know. I'm not particularly attached to the name
though, so i'll rename it if you really want.

Oliver

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] powerpc/smp: add cpu_cache_mask
  2017-03-23  3:33     ` Oliver O'Halloran
@ 2017-03-28  1:05       ` Michael Ellerman
  0 siblings, 0 replies; 20+ messages in thread
From: Michael Ellerman @ 2017-03-28  1:05 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev

Oliver O'Halloran <oohall@gmail.com> writes:

> On Wed, Mar 15, 2017 at 10:26 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
>> Oliver O'Halloran <oohall@gmail.com> writes:
>>
>>> Traditionally we have only ever tracked which CPUs are in the same core
>>> (cpu_sibling_mask) and on the same die (cpu_core_mask). For Power9 we
>>> need to be aware of which CPUs share cache with each other so this patch
>>> adds cpu_cache_mask and the underlying cpu_cache_map variable to track
>>> this.
>
>> Some CPUs on Power8 share L3, or L4.
>
> Eh... it's not really the same. The "L4" is part of the memory buffers
> and it's function is conceptually different to the processor caches.
> The L3 on P8 is only shared when the core that owns is offline (or
> sleeping) so the scheduler doesn't really need to be aware of it.

But that's exactly my point, this mask only tracks whether CPUs share an
L2, so it should be named as such.

>> I think just call it cpu_l2cache_map to make it explicit.
>
> I was being deliberately vague. I know it's only a shared currently,
> but it's possible we might have a (real) shared L3 in the future. The
> latest high-end x86 chips have some of l3 sharing across the entire
> chip so you never know.

Sure, but in that case we'd probably want a new mask to track which CPUs
share L3, because we'd probably also have CPUs sharing L2, and those
masks might not be equal.

But if I'm wrong we can just rename it *then*.

>I'm not particularly attached to the name though, so i'll rename it
>if you really want.

Yes please.

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] powerpc/smp: add set_cpus_related()
  2017-03-23  1:27     ` Oliver O'Halloran
@ 2017-03-28  1:15       ` Michael Ellerman
  0 siblings, 0 replies; 20+ messages in thread
From: Michael Ellerman @ 2017-03-28  1:15 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev

Oliver O'Halloran <oohall@gmail.com> writes:

> On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
>> Oliver O'Halloran <oohall@gmail.com> writes:
>>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>>> index dfe0e1d9cd06..1c531887ca51 100644
>>> --- a/arch/powerpc/kernel/smp.c
>>> +++ b/arch/powerpc/kernel/smp.c
>>> @@ -377,6 +377,25 @@ static void smp_store_cpu_info(int id)
>>>  #endif
>>>  }
>>>
>>> +/*
>>> + * Relationships between CPUs are maintained in a set of per-cpu cpumasks. We
>>> + * need to ensure that they are kept consistant between CPUs when they are
>>> + * changed.
>>> + *
>>> + * This is slightly tricky since the core mask must be a strict superset of
>>> + * the sibling mask.
>>> + */
>>> +static void set_cpus_related(int i, int j, bool related, struct cpumask *(*relation_fn)(int))
>>> +{
>>> +     if (related) {
>>> +             cpumask_set_cpu(i, relation_fn(j));
>>> +             cpumask_set_cpu(j, relation_fn(i));
>>> +     } else {
>>> +             cpumask_clear_cpu(i, relation_fn(j));
>>> +             cpumask_clear_cpu(j, relation_fn(i));
>>> +     }
>>> +}
>>
>> I think you pushed the abstraction one notch too far on this one, or
>> perhaps not far enough.
>>
>> We end up with a function called "set" that might clear, depending on a
>> bool you pass. Which is hard to parse, eg:
>>
>>         set_cpus_related(cpu, base + i, false, cpu_sibling_mask);
>>
>> And I know there's two places where we pass an existing bool "add", but
>> there's four where we pass true or false.
>
> I think you're looking at this patch.

Yeah I guess I was.

> With the full series applied we never pass a literal to
> set_cpus_related() directly:
>
> [12:14 oliver ~/.../powerpc/kernel (p9-sched $%)]$ gg set_cpus_related
> smp.c:391:static void set_cpus_related(int i, int j, bool related,
> struct cpumask *(*relation_fn)(int))
> smp.c:647:      set_cpus_related(cpu, cpu, add, cpu_core_mask);
> smp.c:651:                      set_cpus_related(cpu, i, add, cpu_core_mask);
> smp.c:685:      set_cpus_related(cpu, cpu, onlining, mask_fn);
> smp.c:697:                      set_cpus_related(cpu, i, onlining, mask_fn);
> smp.c:721:              set_cpus_related(cpu, base + i, onlining, cpu_sibling_mask);
> smp.c:736:      set_cpus_related(cpu, cpu, onlining, cpu_core_mask);
> smp.c:746:              set_cpus_related(cpu, i, onlining, cpu_core_mask);

Yeah I guess it's a bit better.

But it's still very non-obvious what each of those lines is doing.

> I agree that set_cpus_related() is probably a bad name,
> make_cpus_related() maybe?

Except in the clearing case it's unrelating them.

I think the fact that we can't think of a meaningful name is a sign it's
not a good API.

>> If we want to push it in that direction I think we should just pass the
>> set/clear routine instead of the flag, so:
>>
>>         do_cpus_related(cpu, base + i, cpumask_clear_cpu, cpu_sibling_mask);
>>
>> But that might be overdoing it.
>
> I think this would be ok.
>
>>
>> So I think we should just do:
>>
>> static void set_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
>> {
>>         cpumask_set_cpu(i, mask_func(j));
>>         cpumask_set_cpu(j, mask_func(i));
>> }
>>
>> static void clear_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
>> {
>>         cpumask_clear_cpu(i, mask_func(j));
>>         cpumask_clear_cpu(j, mask_func(i));
>> }
>>
>> So the cases with add become:
>>
>>         if (add)
>>                 set_cpus_related(cpu, i, cpu_core_mask(i));
>>         else
>>                 clear_cpus_related(cpu, i, cpu_core_mask(i));
>
> Dunno, I was trying to get rid of this sort of thing since the logic
> is duplicated in a lot of places. Seemed to me that it was just
> pointlessly verbose rather than being helpfully explicit.

It's annoyingly verbose perhaps, but I don't think pointlessly.

Your version passes two CPU numbers, a bool and a function to another
function, and based on the bool calls a third or fourth function and
passes the CPU numbers and result of the first function.

My version(s) above takes two cpu numbers and a mask, and calls one
function passing it the CPU numbers and the mask.

So it's clearly simpler and more explicit, and because the function
names are actually meaningful you can have some hope of determining what
they do without looking at the definition every time you see it.

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings
  2017-03-23  1:09   ` Oliver O'Halloran
@ 2017-03-28  3:03     ` Michael Ellerman
  2017-03-28  3:23       ` Oliver O'Halloran
  0 siblings, 1 reply; 20+ messages in thread
From: Michael Ellerman @ 2017-03-28  3:03 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev

Oliver O'Halloran <oohall@gmail.com> writes:
> On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
>> Oliver O'Halloran <oohall@gmail.com> writes:
>>> +     /* threads that share a chip-id are considered siblings (same die) */
>>
>> Also "Threads" :)
>
> The cpus masks are all built in terms of threads, so this is
> technically correct even if it sounds stupid. Maybe "logical cpus"
> would be better?

No I meant you need a capital "T" !

cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings
  2017-03-28  3:03     ` Michael Ellerman
@ 2017-03-28  3:23       ` Oliver O'Halloran
  0 siblings, 0 replies; 20+ messages in thread
From: Oliver O'Halloran @ 2017-03-28  3:23 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

On Tue, Mar 28, 2017 at 2:03 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> Oliver O'Halloran <oohall@gmail.com> writes:
>> On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
>>> Oliver O'Halloran <oohall@gmail.com> writes:
>>>> +     /* threads that share a chip-id are considered siblings (same die) */
>>>
>>> Also "Threads" :)
>>
>> The cpus masks are all built in terms of threads, so this is
>> technically correct even if it sounds stupid. Maybe "logical cpus"
>> would be better?
>
> No I meant you need a capital "T" !

capital letters are against my religion.

>
> cheers

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-03-28  3:23 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-02  0:49 [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Oliver O'Halloran
2017-03-02  0:49 ` [PATCH 2/5] powerpc/smp: add set_cpus_related() Oliver O'Halloran
2017-03-15 11:18   ` Michael Ellerman
2017-03-23  1:27     ` Oliver O'Halloran
2017-03-28  1:15       ` Michael Ellerman
2017-03-02  0:49 ` [PATCH 3/5] powerpc/smp: Add update_cpu_masks() Oliver O'Halloran
2017-03-15 11:18   ` Michael Ellerman
2017-03-02  0:49 ` [PATCH 4/5] powerpc/smp: add cpu_cache_mask Oliver O'Halloran
2017-03-15 11:26   ` Michael Ellerman
2017-03-23  3:33     ` Oliver O'Halloran
2017-03-28  1:05       ` Michael Ellerman
2017-03-02  0:49 ` [PATCH 5/5] powerpc/smp: Add Power9 scheduler topology Oliver O'Halloran
2017-03-02 10:25   ` Balbir Singh
2017-03-15 11:33     ` Michael Ellerman
2017-03-15 11:30   ` Michael Ellerman
2017-03-02  3:44 ` [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings Balbir Singh
2017-03-15 11:18 ` Michael Ellerman
2017-03-23  1:09   ` Oliver O'Halloran
2017-03-28  3:03     ` Michael Ellerman
2017-03-28  3:23       ` Oliver O'Halloran

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.