All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-27 10:42 ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-27 10:42 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Russell King, Vincent Guittot, linux-arm-kernel, patches,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki, Hanjun Guo

Power aware scheduling needs the cpu topology information to improve the
cpu scheduler decision making.

For ARM64, we can get the topology from the MPIDR register which defines the
the affinity of processors.

This patch is mainly based on arch/arm/kernel/topology.c written by
Vincent Guittot, and replaced the topology array with per cpu variable.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
---
 arch/arm64/Kconfig                |    9 +++
 arch/arm64/include/asm/cputype.h  |   11 ++++
 arch/arm64/include/asm/topology.h |   41 ++++++++++++
 arch/arm64/kernel/Makefile        |    1 +
 arch/arm64/kernel/smp.c           |    6 ++
 arch/arm64/kernel/topology.c      |  128 +++++++++++++++++++++++++++++++++++++
 6 files changed, 196 insertions(+)
 create mode 100644 arch/arm64/include/asm/topology.h
 create mode 100644 arch/arm64/kernel/topology.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9737e97..f0ce91b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -150,6 +150,15 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config ARM64_CPU_TOPOLOGY
+	bool "Support cpu topology definition"
+	depends on SMP && ARM64
+	default y
+	help
+	  Support ARM64 cpu topology definition. The MPIDR register defines
+	  affinity between processors which is then used to describe the cpu
+	  topology of an ARM64 System.
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fe138e..68b55af 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -30,6 +30,17 @@
 
 #define MPIDR_HWID_BITMASK	0xff00ffffff
 
+#define MPIDR_SMP_BITMASK	(0x1 << 30)
+#define MPIDR_MT_BITMASK	(0x1 << 24)
+
+#define MPIDR_LEVEL_BITS	8
+#define MPIDR_LEVEL_MASK 	((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK)
+#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK)
+#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK)
+#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
+
 #define read_cpuid(reg) ({						\
 	u64 __val;							\
 	asm("mrs	%0, " reg : "=r" (__val));			\
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 0000000..8631808
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_ARM64_TOPOLOGY_H
+#define _ASM_ARM64_TOPOLOGY_H
+
+#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+
+#include <linux/cpumask.h>
+
+struct cputopo_arm64 {
+	int thread_id;
+	int core_id;
+	int socket_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+};
+
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+
+#define topology_physical_package_id(cpu)	(cpu_topo(cpu).socket_id)
+#define topology_core_id(cpu)		(cpu_topo(cpu).core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topo(cpu).core_sibling)
+#define topology_thread_cpumask(cpu)	(&cpu_topo(cpu).thread_sibling)
+
+#define mc_capable()	(cpu_topo(0).socket_id != -1)
+#define smt_capable()	(cpu_topo(0).thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM64_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564..a47c359 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY)  += topology.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index fee5cce..197b1da 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -39,6 +39,7 @@
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/topology.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -215,6 +216,8 @@ asmlinkage void secondary_start_kernel(void)
 	local_irq_enable();
 	local_fiq_enable();
 
+	store_cpu_topology(cpu);
+
 	/*
 	 * OK, it's off to the idle thread for us
 	 */
@@ -387,6 +390,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	int cpu, err;
 	unsigned int ncores = num_possible_cpus();
 
+	init_cpu_topology();
+	store_cpu_topology(smp_processor_id());
+
 	/*
 	 * are we trying to boot more cores than exist?
 	 */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 0000000..1eb0435
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,128 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Written by: Hanjun Guo
+ *
+ * based on arch/arm/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+
+#include <asm/cputype.h>
+#include <asm/topology.h>
+
+DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topo(cpu).core_sibling;
+}
+
+void update_siblings_masks(unsigned int cpuid)
+{
+	struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		topo = &cpu_topo(cpu);
+
+		if (cpuid_topo->socket_id != topo->socket_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+	smp_wmb();
+}
+
+/*
+ * store_cpu_topology is called at boot when only one cpu is running
+ * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
+ * which prevents simultaneous write access to cpu_topology array
+ */
+void store_cpu_topology(unsigned int cpuid)
+{
+	struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
+	u64 mpidr;
+
+	/* If the cpu topology has been already set, just return */
+	if (cpuid_topo->core_id != -1)
+		return;
+
+	mpidr = read_cpuid_mpidr();
+
+	/* create cpu topology mapping */
+	if (!(mpidr & MPIDR_SMP_BITMASK)) {
+		/*
+		 * This is a multiprocessor system
+		 * multiprocessor format & multiprocessor mode field are set
+		 */
+
+		if (mpidr & MPIDR_MT_BITMASK) {
+			/* core performance interdependency */
+			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
+		} else {
+			/* largely independent cores */
+			cpuid_topo->thread_id = -1;
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+		}
+	} else {
+		/*
+		 * This is an uniprocessor system
+		 * we are in multiprocessor format but uniprocessor system
+		 * or in the old uniprocessor format
+		 */
+		cpuid_topo->thread_id = -1;
+		cpuid_topo->core_id = 0;
+		cpuid_topo->socket_id = -1;
+	}
+
+	update_siblings_masks(cpuid);
+
+	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
+		cpuid, cpu_topo(cpuid).thread_id,
+		cpu_topo(cpuid).core_id,
+		cpu_topo(cpuid).socket_id, mpidr);
+}
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	/* init core mask */
+	for_each_possible_cpu(cpu) {
+		struct cputopo_arm64 *topo = &cpu_topo(cpu);
+
+		topo->thread_id = -1;
+		topo->core_id =  -1;
+		topo->socket_id = -1;
+		cpumask_clear(&topo->core_sibling);
+		cpumask_clear(&topo->thread_sibling);
+	}
+	smp_wmb();
+}
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-27 10:42 ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-27 10:42 UTC (permalink / raw)
  To: linux-arm-kernel

Power aware scheduling needs the cpu topology information to improve the
cpu scheduler decision making.

For ARM64, we can get the topology from the MPIDR register which defines the
the affinity of processors.

This patch is mainly based on arch/arm/kernel/topology.c written by
Vincent Guittot, and replaced the topology array with per cpu variable.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
---
 arch/arm64/Kconfig                |    9 +++
 arch/arm64/include/asm/cputype.h  |   11 ++++
 arch/arm64/include/asm/topology.h |   41 ++++++++++++
 arch/arm64/kernel/Makefile        |    1 +
 arch/arm64/kernel/smp.c           |    6 ++
 arch/arm64/kernel/topology.c      |  128 +++++++++++++++++++++++++++++++++++++
 6 files changed, 196 insertions(+)
 create mode 100644 arch/arm64/include/asm/topology.h
 create mode 100644 arch/arm64/kernel/topology.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9737e97..f0ce91b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -150,6 +150,15 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config ARM64_CPU_TOPOLOGY
+	bool "Support cpu topology definition"
+	depends on SMP && ARM64
+	default y
+	help
+	  Support ARM64 cpu topology definition. The MPIDR register defines
+	  affinity between processors which is then used to describe the cpu
+	  topology of an ARM64 System.
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fe138e..68b55af 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -30,6 +30,17 @@
 
 #define MPIDR_HWID_BITMASK	0xff00ffffff
 
+#define MPIDR_SMP_BITMASK	(0x1 << 30)
+#define MPIDR_MT_BITMASK	(0x1 << 24)
+
+#define MPIDR_LEVEL_BITS	8
+#define MPIDR_LEVEL_MASK 	((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK)
+#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK)
+#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK)
+#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
+
 #define read_cpuid(reg) ({						\
 	u64 __val;							\
 	asm("mrs	%0, " reg : "=r" (__val));			\
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 0000000..8631808
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_ARM64_TOPOLOGY_H
+#define _ASM_ARM64_TOPOLOGY_H
+
+#ifdef CONFIG_ARM64_CPU_TOPOLOGY
+
+#include <linux/cpumask.h>
+
+struct cputopo_arm64 {
+	int thread_id;
+	int core_id;
+	int socket_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+};
+
+DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
+
+#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
+
+#define topology_physical_package_id(cpu)	(cpu_topo(cpu).socket_id)
+#define topology_core_id(cpu)		(cpu_topo(cpu).core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topo(cpu).core_sibling)
+#define topology_thread_cpumask(cpu)	(&cpu_topo(cpu).thread_sibling)
+
+#define mc_capable()	(cpu_topo(0).socket_id != -1)
+#define smt_capable()	(cpu_topo(0).thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM64_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564..a47c359 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY)  += topology.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index fee5cce..197b1da 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -39,6 +39,7 @@
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/topology.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -215,6 +216,8 @@ asmlinkage void secondary_start_kernel(void)
 	local_irq_enable();
 	local_fiq_enable();
 
+	store_cpu_topology(cpu);
+
 	/*
 	 * OK, it's off to the idle thread for us
 	 */
@@ -387,6 +390,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	int cpu, err;
 	unsigned int ncores = num_possible_cpus();
 
+	init_cpu_topology();
+	store_cpu_topology(smp_processor_id());
+
 	/*
 	 * are we trying to boot more cores than exist?
 	 */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 0000000..1eb0435
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,128 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Written by: Hanjun Guo
+ *
+ * based on arch/arm/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+
+#include <asm/cputype.h>
+#include <asm/topology.h>
+
+DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topo(cpu).core_sibling;
+}
+
+void update_siblings_masks(unsigned int cpuid)
+{
+	struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		topo = &cpu_topo(cpu);
+
+		if (cpuid_topo->socket_id != topo->socket_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+	smp_wmb();
+}
+
+/*
+ * store_cpu_topology is called at boot when only one cpu is running
+ * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
+ * which prevents simultaneous write access to cpu_topology array
+ */
+void store_cpu_topology(unsigned int cpuid)
+{
+	struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
+	u64 mpidr;
+
+	/* If the cpu topology has been already set, just return */
+	if (cpuid_topo->core_id != -1)
+		return;
+
+	mpidr = read_cpuid_mpidr();
+
+	/* create cpu topology mapping */
+	if (!(mpidr & MPIDR_SMP_BITMASK)) {
+		/*
+		 * This is a multiprocessor system
+		 * multiprocessor format & multiprocessor mode field are set
+		 */
+
+		if (mpidr & MPIDR_MT_BITMASK) {
+			/* core performance interdependency */
+			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
+		} else {
+			/* largely independent cores */
+			cpuid_topo->thread_id = -1;
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+		}
+	} else {
+		/*
+		 * This is an uniprocessor system
+		 * we are in multiprocessor format but uniprocessor system
+		 * or in the old uniprocessor format
+		 */
+		cpuid_topo->thread_id = -1;
+		cpuid_topo->core_id = 0;
+		cpuid_topo->socket_id = -1;
+	}
+
+	update_siblings_masks(cpuid);
+
+	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
+		cpuid, cpu_topo(cpuid).thread_id,
+		cpu_topo(cpuid).core_id,
+		cpu_topo(cpuid).socket_id, mpidr);
+}
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	/* init core mask */
+	for_each_possible_cpu(cpu) {
+		struct cputopo_arm64 *topo = &cpu_topo(cpu);
+
+		topo->thread_id = -1;
+		topo->core_id =  -1;
+		topo->socket_id = -1;
+		cpumask_clear(&topo->core_sibling);
+		cpumask_clear(&topo->thread_sibling);
+	}
+	smp_wmb();
+}
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id
  2013-07-27 10:42 ` Hanjun Guo
@ 2013-07-27 10:42   ` Hanjun Guo
  -1 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-27 10:42 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon
  Cc: Russell King, Vincent Guittot, linux-arm-kernel, patches,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki, Hanjun Guo

In the cpu topology information, we define topology_physical_package_id()
as cpu socket id, which means that the socket id is the idenfication for
physical processor, not for a cluster in a cpu die.

On ARM64 platform, multi cluster in a cpu die will be normal, here is a
example with 2 cores in a cluster and 2 cluster in a socket:

|--------------------------------------|
|                socket                |
|                                      |
| |---------------|  |---------------| |
| |    cluster    |  |    cluster    | |
| |               |  |               | |
| | |----| |----| |  | |----| |----| | |
| | |core| |core| |  | |core| |core| | |
| | |----| |----| |  | |----| |----| | |
| |               |  |               | |
| |---------------|  |---------------| |
|                                      |
|--------------------------------------|

ARM64 extended the MPIDR into 64 bit and introduce another affinity level,
we can use this affinity level for socket id and use the third highest level
affinity for cluster id, which  make the socket id behavior in its original
way.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
---
 arch/arm64/include/asm/topology.h |    1 +
 arch/arm64/kernel/topology.c      |    8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 8631808..ff68ecc 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -8,6 +8,7 @@
 struct cputopo_arm64 {
 	int thread_id;
 	int core_id;
+	int cluster_id;
 	int socket_id;
 	cpumask_t thread_sibling;
 	cpumask_t core_sibling;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 1eb0435..6d1e5a6 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -80,12 +80,14 @@ void store_cpu_topology(unsigned int cpuid)
 			/* core performance interdependency */
 			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
-			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
+			cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_3(mpidr);
 		} else {
 			/* largely independent cores */
 			cpuid_topo->thread_id = -1;
 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
-			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+			cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
 		}
 	} else {
 		/*
@@ -95,6 +97,7 @@ void store_cpu_topology(unsigned int cpuid)
 		 */
 		cpuid_topo->thread_id = -1;
 		cpuid_topo->core_id = 0;
+		cpuid_topo->cluster_id = -1;
 		cpuid_topo->socket_id = -1;
 	}
 
@@ -120,6 +123,7 @@ void __init init_cpu_topology(void)
 
 		topo->thread_id = -1;
 		topo->core_id =  -1;
+		topo->cluster_id = -1;
 		topo->socket_id = -1;
 		cpumask_clear(&topo->core_sibling);
 		cpumask_clear(&topo->thread_sibling);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id
@ 2013-07-27 10:42   ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-27 10:42 UTC (permalink / raw)
  To: linux-arm-kernel

In the cpu topology information, we define topology_physical_package_id()
as cpu socket id, which means that the socket id is the idenfication for
physical processor, not for a cluster in a cpu die.

On ARM64 platform, multi cluster in a cpu die will be normal, here is a
example with 2 cores in a cluster and 2 cluster in a socket:

|--------------------------------------|
|                socket                |
|                                      |
| |---------------|  |---------------| |
| |    cluster    |  |    cluster    | |
| |               |  |               | |
| | |----| |----| |  | |----| |----| | |
| | |core| |core| |  | |core| |core| | |
| | |----| |----| |  | |----| |----| | |
| |               |  |               | |
| |---------------|  |---------------| |
|                                      |
|--------------------------------------|

ARM64 extended the MPIDR into 64 bit and introduce another affinity level,
we can use this affinity level for socket id and use the third highest level
affinity for cluster id, which  make the socket id behavior in its original
way.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
---
 arch/arm64/include/asm/topology.h |    1 +
 arch/arm64/kernel/topology.c      |    8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 8631808..ff68ecc 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -8,6 +8,7 @@
 struct cputopo_arm64 {
 	int thread_id;
 	int core_id;
+	int cluster_id;
 	int socket_id;
 	cpumask_t thread_sibling;
 	cpumask_t core_sibling;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 1eb0435..6d1e5a6 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -80,12 +80,14 @@ void store_cpu_topology(unsigned int cpuid)
 			/* core performance interdependency */
 			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
-			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
+			cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_3(mpidr);
 		} else {
 			/* largely independent cores */
 			cpuid_topo->thread_id = -1;
 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
-			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+			cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
 		}
 	} else {
 		/*
@@ -95,6 +97,7 @@ void store_cpu_topology(unsigned int cpuid)
 		 */
 		cpuid_topo->thread_id = -1;
 		cpuid_topo->core_id = 0;
+		cpuid_topo->cluster_id = -1;
 		cpuid_topo->socket_id = -1;
 	}
 
@@ -120,6 +123,7 @@ void __init init_cpu_topology(void)
 
 		topo->thread_id = -1;
 		topo->core_id =  -1;
+		topo->cluster_id = -1;
 		topo->socket_id = -1;
 		cpumask_clear(&topo->core_sibling);
 		cpumask_clear(&topo->thread_sibling);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id
  2013-07-27 10:42   ` Hanjun Guo
@ 2013-07-29  9:38     ` Vincent Guittot
  -1 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29  9:38 UTC (permalink / raw)
  To: Hanjun Guo
  Cc: Catalin Marinas, Will Deacon, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki

On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> In the cpu topology information, we define topology_physical_package_id()
> as cpu socket id, which means that the socket id is the idenfication for
> physical processor, not for a cluster in a cpu die.
>
> On ARM64 platform, multi cluster in a cpu die will be normal, here is a
> example with 2 cores in a cluster and 2 cluster in a socket:
>
> |--------------------------------------|
> |                socket                |
> |                                      |
> | |---------------|  |---------------| |
> | |    cluster    |  |    cluster    | |
> | |               |  |               | |
> | | |----| |----| |  | |----| |----| | |
> | | |core| |core| |  | |core| |core| | |
> | | |----| |----| |  | |----| |----| | |
> | |               |  |               | |
> | |---------------|  |---------------| |
> |                                      |
> |--------------------------------------|
>
> ARM64 extended the MPIDR into 64 bit and introduce another affinity level,
> we can use this affinity level for socket id and use the third highest level
> affinity for cluster id, which  make the socket id behavior in its original
> way.
>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> ---
>  arch/arm64/include/asm/topology.h |    1 +
>  arch/arm64/kernel/topology.c      |    8 ++++++--
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> index 8631808..ff68ecc 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -8,6 +8,7 @@
>  struct cputopo_arm64 {
>         int thread_id;
>         int core_id;
> +       int cluster_id;
>         int socket_id;
>         cpumask_t thread_sibling;
>         cpumask_t core_sibling;
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index 1eb0435..6d1e5a6 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -80,12 +80,14 @@ void store_cpu_topology(unsigned int cpuid)
>                         /* core performance interdependency */
>                         cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>                         cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> -                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
> +                       cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_3(mpidr);

socket_id is currently used by update_siblings_masks to update the
core_sibling mask. This mask defines which CPUs share their cache and
AFAICT, the cache are shared at the cluster level so cluster_id should
be used instead socket_id.

Have you got more information about the goal of this new level_3 ?

Vincent

>                 } else {
>                         /* largely independent cores */
>                         cpuid_topo->thread_id = -1;
>                         cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
> -                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +                       cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>                 }
>         } else {
>                 /*
> @@ -95,6 +97,7 @@ void store_cpu_topology(unsigned int cpuid)
>                  */
>                 cpuid_topo->thread_id = -1;
>                 cpuid_topo->core_id = 0;
> +               cpuid_topo->cluster_id = -1;
>                 cpuid_topo->socket_id = -1;
>         }
>
> @@ -120,6 +123,7 @@ void __init init_cpu_topology(void)
>
>                 topo->thread_id = -1;
>                 topo->core_id =  -1;
> +               topo->cluster_id = -1;
>                 topo->socket_id = -1;
>                 cpumask_clear(&topo->core_sibling);
>                 cpumask_clear(&topo->thread_sibling);
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id
@ 2013-07-29  9:38     ` Vincent Guittot
  0 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29  9:38 UTC (permalink / raw)
  To: linux-arm-kernel

On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> In the cpu topology information, we define topology_physical_package_id()
> as cpu socket id, which means that the socket id is the idenfication for
> physical processor, not for a cluster in a cpu die.
>
> On ARM64 platform, multi cluster in a cpu die will be normal, here is a
> example with 2 cores in a cluster and 2 cluster in a socket:
>
> |--------------------------------------|
> |                socket                |
> |                                      |
> | |---------------|  |---------------| |
> | |    cluster    |  |    cluster    | |
> | |               |  |               | |
> | | |----| |----| |  | |----| |----| | |
> | | |core| |core| |  | |core| |core| | |
> | | |----| |----| |  | |----| |----| | |
> | |               |  |               | |
> | |---------------|  |---------------| |
> |                                      |
> |--------------------------------------|
>
> ARM64 extended the MPIDR into 64 bit and introduce another affinity level,
> we can use this affinity level for socket id and use the third highest level
> affinity for cluster id, which  make the socket id behavior in its original
> way.
>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> ---
>  arch/arm64/include/asm/topology.h |    1 +
>  arch/arm64/kernel/topology.c      |    8 ++++++--
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> index 8631808..ff68ecc 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -8,6 +8,7 @@
>  struct cputopo_arm64 {
>         int thread_id;
>         int core_id;
> +       int cluster_id;
>         int socket_id;
>         cpumask_t thread_sibling;
>         cpumask_t core_sibling;
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index 1eb0435..6d1e5a6 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -80,12 +80,14 @@ void store_cpu_topology(unsigned int cpuid)
>                         /* core performance interdependency */
>                         cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>                         cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> -                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
> +                       cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_3(mpidr);

socket_id is currently used by update_siblings_masks to update the
core_sibling mask. This mask defines which CPUs share their cache and
AFAICT, the cache are shared at the cluster level so cluster_id should
be used instead socket_id.

Have you got more information about the goal of this new level_3 ?

Vincent

>                 } else {
>                         /* largely independent cores */
>                         cpuid_topo->thread_id = -1;
>                         cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
> -                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +                       cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>                 }
>         } else {
>                 /*
> @@ -95,6 +97,7 @@ void store_cpu_topology(unsigned int cpuid)
>                  */
>                 cpuid_topo->thread_id = -1;
>                 cpuid_topo->core_id = 0;
> +               cpuid_topo->cluster_id = -1;
>                 cpuid_topo->socket_id = -1;
>         }
>
> @@ -120,6 +123,7 @@ void __init init_cpu_topology(void)
>
>                 topo->thread_id = -1;
>                 topo->core_id =  -1;
> +               topo->cluster_id = -1;
>                 topo->socket_id = -1;
>                 cpumask_clear(&topo->core_sibling);
>                 cpumask_clear(&topo->thread_sibling);
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-27 10:42 ` Hanjun Guo
@ 2013-07-29  9:46   ` Vincent Guittot
  -1 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29  9:46 UTC (permalink / raw)
  To: Hanjun Guo
  Cc: Catalin Marinas, Will Deacon, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki

On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> Power aware scheduling needs the cpu topology information to improve the
> cpu scheduler decision making.

It's not only power aware scheduling. The scheduler already uses
topology and cache sharing when  CONFIG_SCHED_MC and/or
CONFIG_SCHED_SMT are enable. So you should also add these configs for
arm64 so the scheduler can use it

Vincent

>
> For ARM64, we can get the topology from the MPIDR register which defines the
> the affinity of processors.
>
> This patch is mainly based on arch/arm/kernel/topology.c written by
> Vincent Guittot, and replaced the topology array with per cpu variable.
>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> ---
>  arch/arm64/Kconfig                |    9 +++
>  arch/arm64/include/asm/cputype.h  |   11 ++++
>  arch/arm64/include/asm/topology.h |   41 ++++++++++++
>  arch/arm64/kernel/Makefile        |    1 +
>  arch/arm64/kernel/smp.c           |    6 ++
>  arch/arm64/kernel/topology.c      |  128 +++++++++++++++++++++++++++++++++++++
>  6 files changed, 196 insertions(+)
>  create mode 100644 arch/arm64/include/asm/topology.h
>  create mode 100644 arch/arm64/kernel/topology.c
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 9737e97..f0ce91b 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -150,6 +150,15 @@ config SMP
>
>           If you don't know what to do here, say N.
>
> +config ARM64_CPU_TOPOLOGY
> +       bool "Support cpu topology definition"
> +       depends on SMP && ARM64
> +       default y
> +       help
> +         Support ARM64 cpu topology definition. The MPIDR register defines
> +         affinity between processors which is then used to describe the cpu
> +         topology of an ARM64 System.
> +
>  config NR_CPUS
>         int "Maximum number of CPUs (2-32)"
>         range 2 32
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 5fe138e..68b55af 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -30,6 +30,17 @@
>
>  #define MPIDR_HWID_BITMASK     0xff00ffffff
>
> +#define MPIDR_SMP_BITMASK      (0x1 << 30)
> +#define MPIDR_MT_BITMASK       (0x1 << 24)
> +
> +#define MPIDR_LEVEL_BITS       8
> +#define MPIDR_LEVEL_MASK       ((1 << MPIDR_LEVEL_BITS) - 1)
> +
> +#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK)
> +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK)
> +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK)
> +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
> +
>  #define read_cpuid(reg) ({                                             \
>         u64 __val;                                                      \
>         asm("mrs        %0, " reg : "=r" (__val));                      \
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> new file mode 100644
> index 0000000..8631808
> --- /dev/null
> +++ b/arch/arm64/include/asm/topology.h
> @@ -0,0 +1,41 @@
> +#ifndef _ASM_ARM64_TOPOLOGY_H
> +#define _ASM_ARM64_TOPOLOGY_H
> +
> +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
> +
> +#include <linux/cpumask.h>
> +
> +struct cputopo_arm64 {
> +       int thread_id;
> +       int core_id;
> +       int socket_id;
> +       cpumask_t thread_sibling;
> +       cpumask_t core_sibling;
> +};
> +
> +DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
> +
> +#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
> +
> +#define topology_physical_package_id(cpu)      (cpu_topo(cpu).socket_id)
> +#define topology_core_id(cpu)          (cpu_topo(cpu).core_id)
> +#define topology_core_cpumask(cpu)     (&cpu_topo(cpu).core_sibling)
> +#define topology_thread_cpumask(cpu)   (&cpu_topo(cpu).thread_sibling)
> +
> +#define mc_capable()   (cpu_topo(0).socket_id != -1)
> +#define smt_capable()  (cpu_topo(0).thread_id != -1)
> +
> +void init_cpu_topology(void);
> +void store_cpu_topology(unsigned int cpuid);
> +const struct cpumask *cpu_coregroup_mask(int cpu);
> +
> +#else
> +
> +static inline void init_cpu_topology(void) { }
> +static inline void store_cpu_topology(unsigned int cpuid) { }
> +
> +#endif
> +
> +#include <asm-generic/topology.h>
> +
> +#endif /* _ASM_ARM64_TOPOLOGY_H */
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 7b4b564..a47c359 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP)                       += smp.o smp_spin_table.o smp_psci.o
>  arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
>  arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
>  arm64-obj-$(CONFIG_EARLY_PRINTK)       += early_printk.o
> +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY)  += topology.o
>
>  obj-y                                  += $(arm64-obj-y) vdso/
>  obj-m                                  += $(arm64-obj-m)
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index fee5cce..197b1da 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -39,6 +39,7 @@
>  #include <asm/atomic.h>
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
> +#include <asm/topology.h>
>  #include <asm/mmu_context.h>
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
> @@ -215,6 +216,8 @@ asmlinkage void secondary_start_kernel(void)
>         local_irq_enable();
>         local_fiq_enable();
>
> +       store_cpu_topology(cpu);
> +
>         /*
>          * OK, it's off to the idle thread for us
>          */
> @@ -387,6 +390,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>         int cpu, err;
>         unsigned int ncores = num_possible_cpus();
>
> +       init_cpu_topology();
> +       store_cpu_topology(smp_processor_id());
> +
>         /*
>          * are we trying to boot more cores than exist?
>          */
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> new file mode 100644
> index 0000000..1eb0435
> --- /dev/null
> +++ b/arch/arm64/kernel/topology.c
> @@ -0,0 +1,128 @@
> +/*
> + * arch/arm64/kernel/topology.c
> + *
> + * Copyright (C) 2013 Linaro Limited.
> + * Written by: Hanjun Guo
> + *
> + * based on arch/arm/kernel/topology.c
> + *
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License.  See the file "COPYING" in the main directory of this archive
> + * for more details.
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/cpumask.h>
> +#include <linux/init.h>
> +#include <linux/percpu.h>
> +#include <linux/sched.h>
> +
> +#include <asm/cputype.h>
> +#include <asm/topology.h>
> +
> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
> +
> +const struct cpumask *cpu_coregroup_mask(int cpu)
> +{
> +       return &cpu_topo(cpu).core_sibling;
> +}
> +
> +void update_siblings_masks(unsigned int cpuid)
> +{
> +       struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
> +       int cpu;
> +
> +       /* update core and thread sibling masks */
> +       for_each_possible_cpu(cpu) {
> +               topo = &cpu_topo(cpu);
> +
> +               if (cpuid_topo->socket_id != topo->socket_id)
> +                       continue;
> +
> +               cpumask_set_cpu(cpuid, &topo->core_sibling);
> +               if (cpu != cpuid)
> +                       cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
> +
> +               if (cpuid_topo->core_id != topo->core_id)
> +                       continue;
> +
> +               cpumask_set_cpu(cpuid, &topo->thread_sibling);
> +               if (cpu != cpuid)
> +                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
> +       }
> +       smp_wmb();
> +}
> +
> +/*
> + * store_cpu_topology is called at boot when only one cpu is running
> + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
> + * which prevents simultaneous write access to cpu_topology array
> + */
> +void store_cpu_topology(unsigned int cpuid)
> +{
> +       struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
> +       u64 mpidr;
> +
> +       /* If the cpu topology has been already set, just return */
> +       if (cpuid_topo->core_id != -1)
> +               return;
> +
> +       mpidr = read_cpuid_mpidr();
> +
> +       /* create cpu topology mapping */
> +       if (!(mpidr & MPIDR_SMP_BITMASK)) {
> +               /*
> +                * This is a multiprocessor system
> +                * multiprocessor format & multiprocessor mode field are set
> +                */
> +
> +               if (mpidr & MPIDR_MT_BITMASK) {
> +                       /* core performance interdependency */
> +                       cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
> +               } else {
> +                       /* largely independent cores */
> +                       cpuid_topo->thread_id = -1;
> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +               }
> +       } else {
> +               /*
> +                * This is an uniprocessor system
> +                * we are in multiprocessor format but uniprocessor system
> +                * or in the old uniprocessor format
> +                */
> +               cpuid_topo->thread_id = -1;
> +               cpuid_topo->core_id = 0;
> +               cpuid_topo->socket_id = -1;
> +       }
> +
> +       update_siblings_masks(cpuid);
> +
> +       printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
> +               cpuid, cpu_topo(cpuid).thread_id,
> +               cpu_topo(cpuid).core_id,
> +               cpu_topo(cpuid).socket_id, mpidr);
> +}
> +
> +/*
> + * init_cpu_topology is called at boot when only one cpu is running
> + * which prevent simultaneous write access to cpu_topology array
> + */
> +void __init init_cpu_topology(void)
> +{
> +       unsigned int cpu;
> +
> +       /* init core mask */
> +       for_each_possible_cpu(cpu) {
> +               struct cputopo_arm64 *topo = &cpu_topo(cpu);
> +
> +               topo->thread_id = -1;
> +               topo->core_id =  -1;
> +               topo->socket_id = -1;
> +               cpumask_clear(&topo->core_sibling);
> +               cpumask_clear(&topo->thread_sibling);
> +       }
> +       smp_wmb();
> +}
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29  9:46   ` Vincent Guittot
  0 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29  9:46 UTC (permalink / raw)
  To: linux-arm-kernel

On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> Power aware scheduling needs the cpu topology information to improve the
> cpu scheduler decision making.

It's not only power aware scheduling. The scheduler already uses
topology and cache sharing when  CONFIG_SCHED_MC and/or
CONFIG_SCHED_SMT are enable. So you should also add these configs for
arm64 so the scheduler can use it

Vincent

>
> For ARM64, we can get the topology from the MPIDR register which defines the
> the affinity of processors.
>
> This patch is mainly based on arch/arm/kernel/topology.c written by
> Vincent Guittot, and replaced the topology array with per cpu variable.
>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
> ---
>  arch/arm64/Kconfig                |    9 +++
>  arch/arm64/include/asm/cputype.h  |   11 ++++
>  arch/arm64/include/asm/topology.h |   41 ++++++++++++
>  arch/arm64/kernel/Makefile        |    1 +
>  arch/arm64/kernel/smp.c           |    6 ++
>  arch/arm64/kernel/topology.c      |  128 +++++++++++++++++++++++++++++++++++++
>  6 files changed, 196 insertions(+)
>  create mode 100644 arch/arm64/include/asm/topology.h
>  create mode 100644 arch/arm64/kernel/topology.c
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 9737e97..f0ce91b 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -150,6 +150,15 @@ config SMP
>
>           If you don't know what to do here, say N.
>
> +config ARM64_CPU_TOPOLOGY
> +       bool "Support cpu topology definition"
> +       depends on SMP && ARM64
> +       default y
> +       help
> +         Support ARM64 cpu topology definition. The MPIDR register defines
> +         affinity between processors which is then used to describe the cpu
> +         topology of an ARM64 System.
> +
>  config NR_CPUS
>         int "Maximum number of CPUs (2-32)"
>         range 2 32
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 5fe138e..68b55af 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -30,6 +30,17 @@
>
>  #define MPIDR_HWID_BITMASK     0xff00ffffff
>
> +#define MPIDR_SMP_BITMASK      (0x1 << 30)
> +#define MPIDR_MT_BITMASK       (0x1 << 24)
> +
> +#define MPIDR_LEVEL_BITS       8
> +#define MPIDR_LEVEL_MASK       ((1 << MPIDR_LEVEL_BITS) - 1)
> +
> +#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK)
> +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK)
> +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK)
> +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
> +
>  #define read_cpuid(reg) ({                                             \
>         u64 __val;                                                      \
>         asm("mrs        %0, " reg : "=r" (__val));                      \
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> new file mode 100644
> index 0000000..8631808
> --- /dev/null
> +++ b/arch/arm64/include/asm/topology.h
> @@ -0,0 +1,41 @@
> +#ifndef _ASM_ARM64_TOPOLOGY_H
> +#define _ASM_ARM64_TOPOLOGY_H
> +
> +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
> +
> +#include <linux/cpumask.h>
> +
> +struct cputopo_arm64 {
> +       int thread_id;
> +       int core_id;
> +       int socket_id;
> +       cpumask_t thread_sibling;
> +       cpumask_t core_sibling;
> +};
> +
> +DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
> +
> +#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
> +
> +#define topology_physical_package_id(cpu)      (cpu_topo(cpu).socket_id)
> +#define topology_core_id(cpu)          (cpu_topo(cpu).core_id)
> +#define topology_core_cpumask(cpu)     (&cpu_topo(cpu).core_sibling)
> +#define topology_thread_cpumask(cpu)   (&cpu_topo(cpu).thread_sibling)
> +
> +#define mc_capable()   (cpu_topo(0).socket_id != -1)
> +#define smt_capable()  (cpu_topo(0).thread_id != -1)
> +
> +void init_cpu_topology(void);
> +void store_cpu_topology(unsigned int cpuid);
> +const struct cpumask *cpu_coregroup_mask(int cpu);
> +
> +#else
> +
> +static inline void init_cpu_topology(void) { }
> +static inline void store_cpu_topology(unsigned int cpuid) { }
> +
> +#endif
> +
> +#include <asm-generic/topology.h>
> +
> +#endif /* _ASM_ARM64_TOPOLOGY_H */
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 7b4b564..a47c359 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP)                       += smp.o smp_spin_table.o smp_psci.o
>  arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
>  arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
>  arm64-obj-$(CONFIG_EARLY_PRINTK)       += early_printk.o
> +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY)  += topology.o
>
>  obj-y                                  += $(arm64-obj-y) vdso/
>  obj-m                                  += $(arm64-obj-m)
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index fee5cce..197b1da 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -39,6 +39,7 @@
>  #include <asm/atomic.h>
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
> +#include <asm/topology.h>
>  #include <asm/mmu_context.h>
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
> @@ -215,6 +216,8 @@ asmlinkage void secondary_start_kernel(void)
>         local_irq_enable();
>         local_fiq_enable();
>
> +       store_cpu_topology(cpu);
> +
>         /*
>          * OK, it's off to the idle thread for us
>          */
> @@ -387,6 +390,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>         int cpu, err;
>         unsigned int ncores = num_possible_cpus();
>
> +       init_cpu_topology();
> +       store_cpu_topology(smp_processor_id());
> +
>         /*
>          * are we trying to boot more cores than exist?
>          */
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> new file mode 100644
> index 0000000..1eb0435
> --- /dev/null
> +++ b/arch/arm64/kernel/topology.c
> @@ -0,0 +1,128 @@
> +/*
> + * arch/arm64/kernel/topology.c
> + *
> + * Copyright (C) 2013 Linaro Limited.
> + * Written by: Hanjun Guo
> + *
> + * based on arch/arm/kernel/topology.c
> + *
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License.  See the file "COPYING" in the main directory of this archive
> + * for more details.
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/cpumask.h>
> +#include <linux/init.h>
> +#include <linux/percpu.h>
> +#include <linux/sched.h>
> +
> +#include <asm/cputype.h>
> +#include <asm/topology.h>
> +
> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
> +
> +const struct cpumask *cpu_coregroup_mask(int cpu)
> +{
> +       return &cpu_topo(cpu).core_sibling;
> +}
> +
> +void update_siblings_masks(unsigned int cpuid)
> +{
> +       struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
> +       int cpu;
> +
> +       /* update core and thread sibling masks */
> +       for_each_possible_cpu(cpu) {
> +               topo = &cpu_topo(cpu);
> +
> +               if (cpuid_topo->socket_id != topo->socket_id)
> +                       continue;
> +
> +               cpumask_set_cpu(cpuid, &topo->core_sibling);
> +               if (cpu != cpuid)
> +                       cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
> +
> +               if (cpuid_topo->core_id != topo->core_id)
> +                       continue;
> +
> +               cpumask_set_cpu(cpuid, &topo->thread_sibling);
> +               if (cpu != cpuid)
> +                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
> +       }
> +       smp_wmb();
> +}
> +
> +/*
> + * store_cpu_topology is called at boot when only one cpu is running
> + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
> + * which prevents simultaneous write access to cpu_topology array
> + */
> +void store_cpu_topology(unsigned int cpuid)
> +{
> +       struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
> +       u64 mpidr;
> +
> +       /* If the cpu topology has been already set, just return */
> +       if (cpuid_topo->core_id != -1)
> +               return;
> +
> +       mpidr = read_cpuid_mpidr();
> +
> +       /* create cpu topology mapping */
> +       if (!(mpidr & MPIDR_SMP_BITMASK)) {
> +               /*
> +                * This is a multiprocessor system
> +                * multiprocessor format & multiprocessor mode field are set
> +                */
> +
> +               if (mpidr & MPIDR_MT_BITMASK) {
> +                       /* core performance interdependency */
> +                       cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
> +               } else {
> +                       /* largely independent cores */
> +                       cpuid_topo->thread_id = -1;
> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
> +               }
> +       } else {
> +               /*
> +                * This is an uniprocessor system
> +                * we are in multiprocessor format but uniprocessor system
> +                * or in the old uniprocessor format
> +                */
> +               cpuid_topo->thread_id = -1;
> +               cpuid_topo->core_id = 0;
> +               cpuid_topo->socket_id = -1;
> +       }
> +
> +       update_siblings_masks(cpuid);
> +
> +       printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
> +               cpuid, cpu_topo(cpuid).thread_id,
> +               cpu_topo(cpuid).core_id,
> +               cpu_topo(cpuid).socket_id, mpidr);
> +}
> +
> +/*
> + * init_cpu_topology is called at boot when only one cpu is running
> + * which prevent simultaneous write access to cpu_topology array
> + */
> +void __init init_cpu_topology(void)
> +{
> +       unsigned int cpu;
> +
> +       /* init core mask */
> +       for_each_possible_cpu(cpu) {
> +               struct cputopo_arm64 *topo = &cpu_topo(cpu);
> +
> +               topo->thread_id = -1;
> +               topo->core_id =  -1;
> +               topo->socket_id = -1;
> +               cpumask_clear(&topo->core_sibling);
> +               cpumask_clear(&topo->thread_sibling);
> +       }
> +       smp_wmb();
> +}
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29  9:46   ` Vincent Guittot
@ 2013-07-29  9:54     ` Will Deacon
  -1 siblings, 0 replies; 30+ messages in thread
From: Will Deacon @ 2013-07-29  9:54 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Hanjun Guo, Catalin Marinas, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki, lorenzo.pieralisi

On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > Power aware scheduling needs the cpu topology information to improve the
> > cpu scheduler decision making.
> 
> It's not only power aware scheduling. The scheduler already uses
> topology and cache sharing when  CONFIG_SCHED_MC and/or
> CONFIG_SCHED_SMT are enable. So you should also add these configs for
> arm64 so the scheduler can use it

... except that the architecture doesn't define what the AFF fields in MPIDR
really represent. Using them to make key scheduling decisions relating to
cache proximity seems pretty risky to me, especially given the track record
we've seen already on AArch32 silicon. It's a convenient register if it
contains the data we want it to contain, but we need to force ourselves to
come to terms with reality here and simply use it as an identifier for a
CPU.

Can't we just use the device-tree to represent this topological data for
arm64? Lorenzo has been working on bindings in this area.

Will

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29  9:54     ` Will Deacon
  0 siblings, 0 replies; 30+ messages in thread
From: Will Deacon @ 2013-07-29  9:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > Power aware scheduling needs the cpu topology information to improve the
> > cpu scheduler decision making.
> 
> It's not only power aware scheduling. The scheduler already uses
> topology and cache sharing when  CONFIG_SCHED_MC and/or
> CONFIG_SCHED_SMT are enable. So you should also add these configs for
> arm64 so the scheduler can use it

... except that the architecture doesn't define what the AFF fields in MPIDR
really represent. Using them to make key scheduling decisions relating to
cache proximity seems pretty risky to me, especially given the track record
we've seen already on AArch32 silicon. It's a convenient register if it
contains the data we want it to contain, but we need to force ourselves to
come to terms with reality here and simply use it as an identifier for a
CPU.

Can't we just use the device-tree to represent this topological data for
arm64? Lorenzo has been working on bindings in this area.

Will

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29  9:46   ` Vincent Guittot
@ 2013-07-29 10:15     ` Sudeep KarkadaNagesha
  -1 siblings, 0 replies; 30+ messages in thread
From: Sudeep KarkadaNagesha @ 2013-07-29 10:15 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Hanjun Guo, linaro-kernel, Graeme Gregory, Al Stone,
	Patch Tracking, Catalin Marinas, linaro-acpi, Will Deacon,
	linux-kernel, Tomasz Nowicki, Naresh Bhat, Russell King, LAK,
	Sudeep.KarkadaNagesha

On 29/07/13 10:46, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> Power aware scheduling needs the cpu topology information to improve the
>> cpu scheduler decision making.
> 
> It's not only power aware scheduling. The scheduler already uses
> topology and cache sharing when  CONFIG_SCHED_MC and/or
> CONFIG_SCHED_SMT are enable. So you should also add these configs for
> arm64 so the scheduler can use it
> 
Just for my knowledge, I thought power aware using SCHED_MC/SMT was
removed. I see commit 8e7fbcbc22c12414bcc9dfdd683637f58fb32759 "sched:
Remove stale power aware scheduling remnants and dysfunctional knobs"
I may be missing something here.

Regards,
Sudeep


^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29 10:15     ` Sudeep KarkadaNagesha
  0 siblings, 0 replies; 30+ messages in thread
From: Sudeep KarkadaNagesha @ 2013-07-29 10:15 UTC (permalink / raw)
  To: linux-arm-kernel

On 29/07/13 10:46, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> Power aware scheduling needs the cpu topology information to improve the
>> cpu scheduler decision making.
> 
> It's not only power aware scheduling. The scheduler already uses
> topology and cache sharing when  CONFIG_SCHED_MC and/or
> CONFIG_SCHED_SMT are enable. So you should also add these configs for
> arm64 so the scheduler can use it
> 
Just for my knowledge, I thought power aware using SCHED_MC/SMT was
removed. I see commit 8e7fbcbc22c12414bcc9dfdd683637f58fb32759 "sched:
Remove stale power aware scheduling remnants and dysfunctional knobs"
I may be missing something here.

Regards,
Sudeep

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29 10:15     ` Sudeep KarkadaNagesha
@ 2013-07-29 10:28       ` Vincent Guittot
  -1 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29 10:28 UTC (permalink / raw)
  To: Sudeep KarkadaNagesha
  Cc: Hanjun Guo, linaro-kernel, Graeme Gregory, Al Stone,
	Patch Tracking, Catalin Marinas, linaro-acpi, Will Deacon,
	linux-kernel, Tomasz Nowicki, Naresh Bhat, Russell King, LAK

On 29 July 2013 12:15, Sudeep KarkadaNagesha
<Sudeep.KarkadaNagesha@arm.com> wrote:
> On 29/07/13 10:46, Vincent Guittot wrote:
>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>> Power aware scheduling needs the cpu topology information to improve the
>>> cpu scheduler decision making.
>>
>> It's not only power aware scheduling. The scheduler already uses
>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>> arm64 so the scheduler can use it
>>
> Just for my knowledge, I thought power aware using SCHED_MC/SMT was
> removed. I see commit 8e7fbcbc22c12414bcc9dfdd683637f58fb32759 "sched:
> Remove stale power aware scheduling remnants and dysfunctional knobs"
> I may be missing something here.

It's a common mistake to mixed SCHED_MC and powersaving balance with
SCHED_MC. Only the powersaving policy has been removed but the
SCHED_MC and SCHED_SMT are always in the scheduler and gives perf
improvement on arm 32bits

>
> Regards,
> Sudeep
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29 10:28       ` Vincent Guittot
  0 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29 10:28 UTC (permalink / raw)
  To: linux-arm-kernel

On 29 July 2013 12:15, Sudeep KarkadaNagesha
<Sudeep.KarkadaNagesha@arm.com> wrote:
> On 29/07/13 10:46, Vincent Guittot wrote:
>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>> Power aware scheduling needs the cpu topology information to improve the
>>> cpu scheduler decision making.
>>
>> It's not only power aware scheduling. The scheduler already uses
>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>> arm64 so the scheduler can use it
>>
> Just for my knowledge, I thought power aware using SCHED_MC/SMT was
> removed. I see commit 8e7fbcbc22c12414bcc9dfdd683637f58fb32759 "sched:
> Remove stale power aware scheduling remnants and dysfunctional knobs"
> I may be missing something here.

It's a common mistake to mixed SCHED_MC and powersaving balance with
SCHED_MC. Only the powersaving policy has been removed but the
SCHED_MC and SCHED_SMT are always in the scheduler and gives perf
improvement on arm 32bits

>
> Regards,
> Sudeep
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29  9:54     ` Will Deacon
@ 2013-07-29 10:39       ` Vincent Guittot
  -1 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29 10:39 UTC (permalink / raw)
  To: Will Deacon
  Cc: Hanjun Guo, Catalin Marinas, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki, Lorenzo Pieralisi

On 29 July 2013 11:54, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> > Power aware scheduling needs the cpu topology information to improve the
>> > cpu scheduler decision making.
>>
>> It's not only power aware scheduling. The scheduler already uses
>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>> arm64 so the scheduler can use it
>
> ... except that the architecture doesn't define what the AFF fields in MPIDR
> really represent. Using them to make key scheduling decisions relating to

Do you mean that it's not define for arm64 ARM? AFAIK, there are good
explanation in the arm32 ARM and it's currently used with SCHED_MC and
SCHED_SMT

> cache proximity seems pretty risky to me, especially given the track record
> we've seen already on AArch32 silicon. It's a convenient register if it
> contains the data we want it to contain, but we need to force ourselves to
> come to terms with reality here and simply use it as an identifier for a
> CPU.
>
> Can't we just use the device-tree to represent this topological data for
> arm64? Lorenzo has been working on bindings in this area.

I agree that we should probably use DT if we can't rely in MPIDR for arm64

Vincent
>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29 10:39       ` Vincent Guittot
  0 siblings, 0 replies; 30+ messages in thread
From: Vincent Guittot @ 2013-07-29 10:39 UTC (permalink / raw)
  To: linux-arm-kernel

On 29 July 2013 11:54, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> > Power aware scheduling needs the cpu topology information to improve the
>> > cpu scheduler decision making.
>>
>> It's not only power aware scheduling. The scheduler already uses
>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>> arm64 so the scheduler can use it
>
> ... except that the architecture doesn't define what the AFF fields in MPIDR
> really represent. Using them to make key scheduling decisions relating to

Do you mean that it's not define for arm64 ARM? AFAIK, there are good
explanation in the arm32 ARM and it's currently used with SCHED_MC and
SCHED_SMT

> cache proximity seems pretty risky to me, especially given the track record
> we've seen already on AArch32 silicon. It's a convenient register if it
> contains the data we want it to contain, but we need to force ourselves to
> come to terms with reality here and simply use it as an identifier for a
> CPU.
>
> Can't we just use the device-tree to represent this topological data for
> arm64? Lorenzo has been working on bindings in this area.

I agree that we should probably use DT if we can't rely in MPIDR for arm64

Vincent
>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29  9:54     ` Will Deacon
@ 2013-07-29 13:36       ` Dave Martin
  -1 siblings, 0 replies; 30+ messages in thread
From: Dave Martin @ 2013-07-29 13:36 UTC (permalink / raw)
  To: Will Deacon
  Cc: Vincent Guittot, linaro-kernel, Graeme Gregory, Al Stone,
	Patch Tracking, Catalin Marinas, linaro-acpi, linux-kernel,
	Tomasz Nowicki, Hanjun Guo, Naresh Bhat, Russell King, LAK

On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> > On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > > Power aware scheduling needs the cpu topology information to improve the
> > > cpu scheduler decision making.
> > 
> > It's not only power aware scheduling. The scheduler already uses
> > topology and cache sharing when  CONFIG_SCHED_MC and/or
> > CONFIG_SCHED_SMT are enable. So you should also add these configs for
> > arm64 so the scheduler can use it
> 
> ... except that the architecture doesn't define what the AFF fields in MPIDR
> really represent. Using them to make key scheduling decisions relating to

In fact, the ARM Architecture doesn't place any requirements on MPIDRs to
force the aff fields to exist _at all_.  It's just a recommendation.
Instead, you have a 24 or 32-bit number which is unique per CPU, and which
is _probably_ assigned in a way resembling the aff fields.

> cache proximity seems pretty risky to me, especially given the track record
> we've seen already on AArch32 silicon. It's a convenient register if it
> contains the data we want it to contain, but we need to force ourselves to
> come to terms with reality here and simply use it as an identifier for a
> CPU.

+1

Also, we should align arm and arm64.  The problem is basically exactly
the same, and the solution needs to be the same.  struct cputopo_arm is
already being abused  -- for example, TC2 describes the A15 and A7
clusters on a single die as having different "socket_id" values, even
though this is obviously nonsense.  But there's no other way to describe
that system today.

> Can't we just use the device-tree to represent this topological data for
> arm64? Lorenzo has been working on bindings in this area.

This may become more important as we start to see things like asymmetric
topologies appearing (different numbers of nodes and different
interdependence characteristics in adjacent branches of the topology
etc.)

Cheers
---Dave

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29 13:36       ` Dave Martin
  0 siblings, 0 replies; 30+ messages in thread
From: Dave Martin @ 2013-07-29 13:36 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> > On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > > Power aware scheduling needs the cpu topology information to improve the
> > > cpu scheduler decision making.
> > 
> > It's not only power aware scheduling. The scheduler already uses
> > topology and cache sharing when  CONFIG_SCHED_MC and/or
> > CONFIG_SCHED_SMT are enable. So you should also add these configs for
> > arm64 so the scheduler can use it
> 
> ... except that the architecture doesn't define what the AFF fields in MPIDR
> really represent. Using them to make key scheduling decisions relating to

In fact, the ARM Architecture doesn't place any requirements on MPIDRs to
force the aff fields to exist _at all_.  It's just a recommendation.
Instead, you have a 24 or 32-bit number which is unique per CPU, and which
is _probably_ assigned in a way resembling the aff fields.

> cache proximity seems pretty risky to me, especially given the track record
> we've seen already on AArch32 silicon. It's a convenient register if it
> contains the data we want it to contain, but we need to force ourselves to
> come to terms with reality here and simply use it as an identifier for a
> CPU.

+1

Also, we should align arm and arm64.  The problem is basically exactly
the same, and the solution needs to be the same.  struct cputopo_arm is
already being abused  -- for example, TC2 describes the A15 and A7
clusters on a single die as having different "socket_id" values, even
though this is obviously nonsense.  But there's no other way to describe
that system today.

> Can't we just use the device-tree to represent this topological data for
> arm64? Lorenzo has been working on bindings in this area.

This may become more important as we start to see things like asymmetric
topologies appearing (different numbers of nodes and different
interdependence characteristics in adjacent branches of the topology
etc.)

Cheers
---Dave

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29 13:36       ` Dave Martin
@ 2013-07-29 17:23         ` Lorenzo Pieralisi
  -1 siblings, 0 replies; 30+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-29 17:23 UTC (permalink / raw)
  To: Dave Martin
  Cc: Will Deacon, Vincent Guittot, linaro-kernel, Graeme Gregory,
	Al Stone, Patch Tracking, Catalin Marinas, linaro-acpi,
	linux-kernel, Tomasz Nowicki, Hanjun Guo, Naresh Bhat,
	Russell King, LAK

On Mon, Jul 29, 2013 at 02:36:30PM +0100, Dave Martin wrote:
> On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
> > On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> > > On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > > > Power aware scheduling needs the cpu topology information to improve the
> > > > cpu scheduler decision making.
> > > 
> > > It's not only power aware scheduling. The scheduler already uses
> > > topology and cache sharing when  CONFIG_SCHED_MC and/or
> > > CONFIG_SCHED_SMT are enable. So you should also add these configs for
> > > arm64 so the scheduler can use it
> > 
> > ... except that the architecture doesn't define what the AFF fields in MPIDR
> > really represent. Using them to make key scheduling decisions relating to
> 
> In fact, the ARM Architecture doesn't place any requirements on MPIDRs to
> force the aff fields to exist _at all_.  It's just a recommendation.
> Instead, you have a 24 or 32-bit number which is unique per CPU, and which
> is _probably_ assigned in a way resembling the aff fields.
> 
> > cache proximity seems pretty risky to me, especially given the track record
> > we've seen already on AArch32 silicon. It's a convenient register if it
> > contains the data we want it to contain, but we need to force ourselves to
> > come to terms with reality here and simply use it as an identifier for a
> > CPU.
> 
> +1
> 
> Also, we should align arm and arm64.  The problem is basically exactly
> the same, and the solution needs to be the same.  struct cputopo_arm is
> already being abused  -- for example, TC2 describes the A15 and A7
> clusters on a single die as having different "socket_id" values, even
> though this is obviously nonsense.  But there's no other way to describe
> that system today.
> 
> > Can't we just use the device-tree to represent this topological data for
> > arm64? Lorenzo has been working on bindings in this area.
> 
> This may become more important as we start to see things like asymmetric
> topologies appearing (different numbers of nodes and different
> interdependence characteristics in adjacent branches of the topology
> etc.)

Will and Dave summed up the existing issues with MPIDR definition related to
the topology description.

FYI, a link to the current topology bindings posted on DT-discuss and LAKML:

https://lists.ozlabs.org/pipermail/devicetree-discuss/2013-April/031725.html

I am waiting for the dust to settle on the DT bindings review discussions to
repost them and get them finalized.

Lorenzo


^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-29 17:23         ` Lorenzo Pieralisi
  0 siblings, 0 replies; 30+ messages in thread
From: Lorenzo Pieralisi @ 2013-07-29 17:23 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 29, 2013 at 02:36:30PM +0100, Dave Martin wrote:
> On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
> > On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> > > On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > > > Power aware scheduling needs the cpu topology information to improve the
> > > > cpu scheduler decision making.
> > > 
> > > It's not only power aware scheduling. The scheduler already uses
> > > topology and cache sharing when  CONFIG_SCHED_MC and/or
> > > CONFIG_SCHED_SMT are enable. So you should also add these configs for
> > > arm64 so the scheduler can use it
> > 
> > ... except that the architecture doesn't define what the AFF fields in MPIDR
> > really represent. Using them to make key scheduling decisions relating to
> 
> In fact, the ARM Architecture doesn't place any requirements on MPIDRs to
> force the aff fields to exist _at all_.  It's just a recommendation.
> Instead, you have a 24 or 32-bit number which is unique per CPU, and which
> is _probably_ assigned in a way resembling the aff fields.
> 
> > cache proximity seems pretty risky to me, especially given the track record
> > we've seen already on AArch32 silicon. It's a convenient register if it
> > contains the data we want it to contain, but we need to force ourselves to
> > come to terms with reality here and simply use it as an identifier for a
> > CPU.
> 
> +1
> 
> Also, we should align arm and arm64.  The problem is basically exactly
> the same, and the solution needs to be the same.  struct cputopo_arm is
> already being abused  -- for example, TC2 describes the A15 and A7
> clusters on a single die as having different "socket_id" values, even
> though this is obviously nonsense.  But there's no other way to describe
> that system today.
> 
> > Can't we just use the device-tree to represent this topological data for
> > arm64? Lorenzo has been working on bindings in this area.
> 
> This may become more important as we start to see things like asymmetric
> topologies appearing (different numbers of nodes and different
> interdependence characteristics in adjacent branches of the topology
> etc.)

Will and Dave summed up the existing issues with MPIDR definition related to
the topology description.

FYI, a link to the current topology bindings posted on DT-discuss and LAKML:

https://lists.ozlabs.org/pipermail/devicetree-discuss/2013-April/031725.html

I am waiting for the dust to settle on the DT bindings review discussions to
repost them and get them finalized.

Lorenzo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id
  2013-07-29  9:38     ` Vincent Guittot
@ 2013-07-30  7:46       ` Hanjun Guo
  -1 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-30  7:46 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Catalin Marinas, Will Deacon, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki

On 2013-7-29 17:38, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> In the cpu topology information, we define topology_physical_package_id()
>> as cpu socket id, which means that the socket id is the idenfication for
>> physical processor, not for a cluster in a cpu die.
>>
>> On ARM64 platform, multi cluster in a cpu die will be normal, here is a
>> example with 2 cores in a cluster and 2 cluster in a socket:
>>
>> |--------------------------------------|
>> |                socket                |
>> |                                      |
>> | |---------------|  |---------------| |
>> | |    cluster    |  |    cluster    | |
>> | |               |  |               | |
>> | | |----| |----| |  | |----| |----| | |
>> | | |core| |core| |  | |core| |core| | |
>> | | |----| |----| |  | |----| |----| | |
>> | |               |  |               | |
>> | |---------------|  |---------------| |
>> |                                      |
>> |--------------------------------------|
>>
>> ARM64 extended the MPIDR into 64 bit and introduce another affinity level,
>> we can use this affinity level for socket id and use the third highest level
>> affinity for cluster id, which  make the socket id behavior in its original
>> way.
>>
>> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>> ---
>>  arch/arm64/include/asm/topology.h |    1 +
>>  arch/arm64/kernel/topology.c      |    8 ++++++--
>>  2 files changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
>> index 8631808..ff68ecc 100644
>> --- a/arch/arm64/include/asm/topology.h
>> +++ b/arch/arm64/include/asm/topology.h
>> @@ -8,6 +8,7 @@
>>  struct cputopo_arm64 {
>>         int thread_id;
>>         int core_id;
>> +       int cluster_id;
>>         int socket_id;
>>         cpumask_t thread_sibling;
>>         cpumask_t core_sibling;
>> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>> index 1eb0435..6d1e5a6 100644
>> --- a/arch/arm64/kernel/topology.c
>> +++ b/arch/arm64/kernel/topology.c
>> @@ -80,12 +80,14 @@ void store_cpu_topology(unsigned int cpuid)
>>                         /* core performance interdependency */
>>                         cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>>                         cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
>> -                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>> +                       cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_3(mpidr);
> 
> socket_id is currently used by update_siblings_masks to update the
> core_sibling mask. This mask defines which CPUs share their cache and
> AFAICT, the cache are shared at the cluster level so cluster_id should
> be used instead socket_id.

For some architecture, cpu cores in a cluster share L2 cache, and clusters
in the cpu die share L3 cache, so I think we can make a difference between
socket id and cluster id.

> 
> Have you got more information about the goal of this new level_3 ?

Actually not, I think ARM should give some recommendations as ARMv7 did.

Thanks
Hanjun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id
@ 2013-07-30  7:46       ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-30  7:46 UTC (permalink / raw)
  To: linux-arm-kernel

On 2013-7-29 17:38, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> In the cpu topology information, we define topology_physical_package_id()
>> as cpu socket id, which means that the socket id is the idenfication for
>> physical processor, not for a cluster in a cpu die.
>>
>> On ARM64 platform, multi cluster in a cpu die will be normal, here is a
>> example with 2 cores in a cluster and 2 cluster in a socket:
>>
>> |--------------------------------------|
>> |                socket                |
>> |                                      |
>> | |---------------|  |---------------| |
>> | |    cluster    |  |    cluster    | |
>> | |               |  |               | |
>> | | |----| |----| |  | |----| |----| | |
>> | | |core| |core| |  | |core| |core| | |
>> | | |----| |----| |  | |----| |----| | |
>> | |               |  |               | |
>> | |---------------|  |---------------| |
>> |                                      |
>> |--------------------------------------|
>>
>> ARM64 extended the MPIDR into 64 bit and introduce another affinity level,
>> we can use this affinity level for socket id and use the third highest level
>> affinity for cluster id, which  make the socket id behavior in its original
>> way.
>>
>> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>> ---
>>  arch/arm64/include/asm/topology.h |    1 +
>>  arch/arm64/kernel/topology.c      |    8 ++++++--
>>  2 files changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
>> index 8631808..ff68ecc 100644
>> --- a/arch/arm64/include/asm/topology.h
>> +++ b/arch/arm64/include/asm/topology.h
>> @@ -8,6 +8,7 @@
>>  struct cputopo_arm64 {
>>         int thread_id;
>>         int core_id;
>> +       int cluster_id;
>>         int socket_id;
>>         cpumask_t thread_sibling;
>>         cpumask_t core_sibling;
>> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>> index 1eb0435..6d1e5a6 100644
>> --- a/arch/arm64/kernel/topology.c
>> +++ b/arch/arm64/kernel/topology.c
>> @@ -80,12 +80,14 @@ void store_cpu_topology(unsigned int cpuid)
>>                         /* core performance interdependency */
>>                         cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>>                         cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
>> -                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>> +                       cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_3(mpidr);
> 
> socket_id is currently used by update_siblings_masks to update the
> core_sibling mask. This mask defines which CPUs share their cache and
> AFAICT, the cache are shared at the cluster level so cluster_id should
> be used instead socket_id.

For some architecture, cpu cores in a cluster share L2 cache, and clusters
in the cpu die share L3 cache, so I think we can make a difference between
socket id and cluster id.

> 
> Have you got more information about the goal of this new level_3 ?

Actually not, I think ARM should give some recommendations as ARMv7 did.

Thanks
Hanjun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29  9:46   ` Vincent Guittot
@ 2013-07-30  7:49     ` Hanjun Guo
  -1 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-30  7:49 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Catalin Marinas, Will Deacon, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki

On 2013-7-29 17:46, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> Power aware scheduling needs the cpu topology information to improve the
>> cpu scheduler decision making.
> 
> It's not only power aware scheduling. The scheduler already uses
> topology and cache sharing when  CONFIG_SCHED_MC and/or
> CONFIG_SCHED_SMT are enable. So you should also add these configs for
> arm64 so the scheduler can use it

Yes, you are right, thanks for the advice.

> 
> Vincent
> 
>>
>> For ARM64, we can get the topology from the MPIDR register which defines the
>> the affinity of processors.
>>
>> This patch is mainly based on arch/arm/kernel/topology.c written by
>> Vincent Guittot, and replaced the topology array with per cpu variable.
>>
>> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>> ---
>>  arch/arm64/Kconfig                |    9 +++
>>  arch/arm64/include/asm/cputype.h  |   11 ++++
>>  arch/arm64/include/asm/topology.h |   41 ++++++++++++
>>  arch/arm64/kernel/Makefile        |    1 +
>>  arch/arm64/kernel/smp.c           |    6 ++
>>  arch/arm64/kernel/topology.c      |  128 +++++++++++++++++++++++++++++++++++++
>>  6 files changed, 196 insertions(+)
>>  create mode 100644 arch/arm64/include/asm/topology.h
>>  create mode 100644 arch/arm64/kernel/topology.c
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 9737e97..f0ce91b 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -150,6 +150,15 @@ config SMP
>>
>>           If you don't know what to do here, say N.
>>
>> +config ARM64_CPU_TOPOLOGY
>> +       bool "Support cpu topology definition"
>> +       depends on SMP && ARM64
>> +       default y
>> +       help
>> +         Support ARM64 cpu topology definition. The MPIDR register defines
>> +         affinity between processors which is then used to describe the cpu
>> +         topology of an ARM64 System.
>> +
>>  config NR_CPUS
>>         int "Maximum number of CPUs (2-32)"
>>         range 2 32
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index 5fe138e..68b55af 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -30,6 +30,17 @@
>>
>>  #define MPIDR_HWID_BITMASK     0xff00ffffff
>>
>> +#define MPIDR_SMP_BITMASK      (0x1 << 30)
>> +#define MPIDR_MT_BITMASK       (0x1 << 24)
>> +
>> +#define MPIDR_LEVEL_BITS       8
>> +#define MPIDR_LEVEL_MASK       ((1 << MPIDR_LEVEL_BITS) - 1)
>> +
>> +#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK)
>> +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK)
>> +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK)
>> +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
>> +
>>  #define read_cpuid(reg) ({                                             \
>>         u64 __val;                                                      \
>>         asm("mrs        %0, " reg : "=r" (__val));                      \
>> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
>> new file mode 100644
>> index 0000000..8631808
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/topology.h
>> @@ -0,0 +1,41 @@
>> +#ifndef _ASM_ARM64_TOPOLOGY_H
>> +#define _ASM_ARM64_TOPOLOGY_H
>> +
>> +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
>> +
>> +#include <linux/cpumask.h>
>> +
>> +struct cputopo_arm64 {
>> +       int thread_id;
>> +       int core_id;
>> +       int socket_id;
>> +       cpumask_t thread_sibling;
>> +       cpumask_t core_sibling;
>> +};
>> +
>> +DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
>> +
>> +#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
>> +
>> +#define topology_physical_package_id(cpu)      (cpu_topo(cpu).socket_id)
>> +#define topology_core_id(cpu)          (cpu_topo(cpu).core_id)
>> +#define topology_core_cpumask(cpu)     (&cpu_topo(cpu).core_sibling)
>> +#define topology_thread_cpumask(cpu)   (&cpu_topo(cpu).thread_sibling)
>> +
>> +#define mc_capable()   (cpu_topo(0).socket_id != -1)
>> +#define smt_capable()  (cpu_topo(0).thread_id != -1)
>> +
>> +void init_cpu_topology(void);
>> +void store_cpu_topology(unsigned int cpuid);
>> +const struct cpumask *cpu_coregroup_mask(int cpu);
>> +
>> +#else
>> +
>> +static inline void init_cpu_topology(void) { }
>> +static inline void store_cpu_topology(unsigned int cpuid) { }
>> +
>> +#endif
>> +
>> +#include <asm-generic/topology.h>
>> +
>> +#endif /* _ASM_ARM64_TOPOLOGY_H */
>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
>> index 7b4b564..a47c359 100644
>> --- a/arch/arm64/kernel/Makefile
>> +++ b/arch/arm64/kernel/Makefile
>> @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP)                       += smp.o smp_spin_table.o smp_psci.o
>>  arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
>>  arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
>>  arm64-obj-$(CONFIG_EARLY_PRINTK)       += early_printk.o
>> +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY)  += topology.o
>>
>>  obj-y                                  += $(arm64-obj-y) vdso/
>>  obj-m                                  += $(arm64-obj-m)
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index fee5cce..197b1da 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -39,6 +39,7 @@
>>  #include <asm/atomic.h>
>>  #include <asm/cacheflush.h>
>>  #include <asm/cputype.h>
>> +#include <asm/topology.h>
>>  #include <asm/mmu_context.h>
>>  #include <asm/pgtable.h>
>>  #include <asm/pgalloc.h>
>> @@ -215,6 +216,8 @@ asmlinkage void secondary_start_kernel(void)
>>         local_irq_enable();
>>         local_fiq_enable();
>>
>> +       store_cpu_topology(cpu);
>> +
>>         /*
>>          * OK, it's off to the idle thread for us
>>          */
>> @@ -387,6 +390,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>>         int cpu, err;
>>         unsigned int ncores = num_possible_cpus();
>>
>> +       init_cpu_topology();
>> +       store_cpu_topology(smp_processor_id());
>> +
>>         /*
>>          * are we trying to boot more cores than exist?
>>          */
>> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>> new file mode 100644
>> index 0000000..1eb0435
>> --- /dev/null
>> +++ b/arch/arm64/kernel/topology.c
>> @@ -0,0 +1,128 @@
>> +/*
>> + * arch/arm64/kernel/topology.c
>> + *
>> + * Copyright (C) 2013 Linaro Limited.
>> + * Written by: Hanjun Guo
>> + *
>> + * based on arch/arm/kernel/topology.c
>> + *
>> + * This file is subject to the terms and conditions of the GNU General Public
>> + * License.  See the file "COPYING" in the main directory of this archive
>> + * for more details.
>> + */
>> +
>> +#include <linux/cpu.h>
>> +#include <linux/cpumask.h>
>> +#include <linux/init.h>
>> +#include <linux/percpu.h>
>> +#include <linux/sched.h>
>> +
>> +#include <asm/cputype.h>
>> +#include <asm/topology.h>
>> +
>> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
>> +
>> +const struct cpumask *cpu_coregroup_mask(int cpu)
>> +{
>> +       return &cpu_topo(cpu).core_sibling;
>> +}
>> +
>> +void update_siblings_masks(unsigned int cpuid)
>> +{
>> +       struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
>> +       int cpu;
>> +
>> +       /* update core and thread sibling masks */
>> +       for_each_possible_cpu(cpu) {
>> +               topo = &cpu_topo(cpu);
>> +
>> +               if (cpuid_topo->socket_id != topo->socket_id)
>> +                       continue;
>> +
>> +               cpumask_set_cpu(cpuid, &topo->core_sibling);
>> +               if (cpu != cpuid)
>> +                       cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
>> +
>> +               if (cpuid_topo->core_id != topo->core_id)
>> +                       continue;
>> +
>> +               cpumask_set_cpu(cpuid, &topo->thread_sibling);
>> +               if (cpu != cpuid)
>> +                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>> +       }
>> +       smp_wmb();
>> +}
>> +
>> +/*
>> + * store_cpu_topology is called at boot when only one cpu is running
>> + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
>> + * which prevents simultaneous write access to cpu_topology array
>> + */
>> +void store_cpu_topology(unsigned int cpuid)
>> +{
>> +       struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
>> +       u64 mpidr;
>> +
>> +       /* If the cpu topology has been already set, just return */
>> +       if (cpuid_topo->core_id != -1)
>> +               return;
>> +
>> +       mpidr = read_cpuid_mpidr();
>> +
>> +       /* create cpu topology mapping */
>> +       if (!(mpidr & MPIDR_SMP_BITMASK)) {
>> +               /*
>> +                * This is a multiprocessor system
>> +                * multiprocessor format & multiprocessor mode field are set
>> +                */
>> +
>> +               if (mpidr & MPIDR_MT_BITMASK) {
>> +                       /* core performance interdependency */
>> +                       cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
>> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>> +               } else {
>> +                       /* largely independent cores */
>> +                       cpuid_topo->thread_id = -1;
>> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
>> +               }
>> +       } else {
>> +               /*
>> +                * This is an uniprocessor system
>> +                * we are in multiprocessor format but uniprocessor system
>> +                * or in the old uniprocessor format
>> +                */
>> +               cpuid_topo->thread_id = -1;
>> +               cpuid_topo->core_id = 0;
>> +               cpuid_topo->socket_id = -1;
>> +       }
>> +
>> +       update_siblings_masks(cpuid);
>> +
>> +       printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
>> +               cpuid, cpu_topo(cpuid).thread_id,
>> +               cpu_topo(cpuid).core_id,
>> +               cpu_topo(cpuid).socket_id, mpidr);
>> +}
>> +
>> +/*
>> + * init_cpu_topology is called at boot when only one cpu is running
>> + * which prevent simultaneous write access to cpu_topology array
>> + */
>> +void __init init_cpu_topology(void)
>> +{
>> +       unsigned int cpu;
>> +
>> +       /* init core mask */
>> +       for_each_possible_cpu(cpu) {
>> +               struct cputopo_arm64 *topo = &cpu_topo(cpu);
>> +
>> +               topo->thread_id = -1;
>> +               topo->core_id =  -1;
>> +               topo->socket_id = -1;
>> +               cpumask_clear(&topo->core_sibling);
>> +               cpumask_clear(&topo->thread_sibling);
>> +       }
>> +       smp_wmb();
>> +}
>> --
>> 1.7.9.5
>>


^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-30  7:49     ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-30  7:49 UTC (permalink / raw)
  To: linux-arm-kernel

On 2013-7-29 17:46, Vincent Guittot wrote:
> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> Power aware scheduling needs the cpu topology information to improve the
>> cpu scheduler decision making.
> 
> It's not only power aware scheduling. The scheduler already uses
> topology and cache sharing when  CONFIG_SCHED_MC and/or
> CONFIG_SCHED_SMT are enable. So you should also add these configs for
> arm64 so the scheduler can use it

Yes, you are right, thanks for the advice.

> 
> Vincent
> 
>>
>> For ARM64, we can get the topology from the MPIDR register which defines the
>> the affinity of processors.
>>
>> This patch is mainly based on arch/arm/kernel/topology.c written by
>> Vincent Guittot, and replaced the topology array with per cpu variable.
>>
>> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>> ---
>>  arch/arm64/Kconfig                |    9 +++
>>  arch/arm64/include/asm/cputype.h  |   11 ++++
>>  arch/arm64/include/asm/topology.h |   41 ++++++++++++
>>  arch/arm64/kernel/Makefile        |    1 +
>>  arch/arm64/kernel/smp.c           |    6 ++
>>  arch/arm64/kernel/topology.c      |  128 +++++++++++++++++++++++++++++++++++++
>>  6 files changed, 196 insertions(+)
>>  create mode 100644 arch/arm64/include/asm/topology.h
>>  create mode 100644 arch/arm64/kernel/topology.c
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 9737e97..f0ce91b 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -150,6 +150,15 @@ config SMP
>>
>>           If you don't know what to do here, say N.
>>
>> +config ARM64_CPU_TOPOLOGY
>> +       bool "Support cpu topology definition"
>> +       depends on SMP && ARM64
>> +       default y
>> +       help
>> +         Support ARM64 cpu topology definition. The MPIDR register defines
>> +         affinity between processors which is then used to describe the cpu
>> +         topology of an ARM64 System.
>> +
>>  config NR_CPUS
>>         int "Maximum number of CPUs (2-32)"
>>         range 2 32
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index 5fe138e..68b55af 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -30,6 +30,17 @@
>>
>>  #define MPIDR_HWID_BITMASK     0xff00ffffff
>>
>> +#define MPIDR_SMP_BITMASK      (0x1 << 30)
>> +#define MPIDR_MT_BITMASK       (0x1 << 24)
>> +
>> +#define MPIDR_LEVEL_BITS       8
>> +#define MPIDR_LEVEL_MASK       ((1 << MPIDR_LEVEL_BITS) - 1)
>> +
>> +#define MPIDR_AFFINITY_LEVEL_0(mpidr) ((mpidr) & MPIDR_LEVEL_MASK)
>> +#define MPIDR_AFFINITY_LEVEL_1(mpidr) ((mpidr >> 8) & MPIDR_LEVEL_MASK)
>> +#define MPIDR_AFFINITY_LEVEL_2(mpidr) ((mpidr >> 16) & MPIDR_LEVEL_MASK)
>> +#define MPIDR_AFFINITY_LEVEL_3(mpidr) ((mpidr >> 32) & MPIDR_LEVEL_MASK)
>> +
>>  #define read_cpuid(reg) ({                                             \
>>         u64 __val;                                                      \
>>         asm("mrs        %0, " reg : "=r" (__val));                      \
>> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
>> new file mode 100644
>> index 0000000..8631808
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/topology.h
>> @@ -0,0 +1,41 @@
>> +#ifndef _ASM_ARM64_TOPOLOGY_H
>> +#define _ASM_ARM64_TOPOLOGY_H
>> +
>> +#ifdef CONFIG_ARM64_CPU_TOPOLOGY
>> +
>> +#include <linux/cpumask.h>
>> +
>> +struct cputopo_arm64 {
>> +       int thread_id;
>> +       int core_id;
>> +       int socket_id;
>> +       cpumask_t thread_sibling;
>> +       cpumask_t core_sibling;
>> +};
>> +
>> +DECLARE_PER_CPU(struct cputopo_arm64, cpu_topology);
>> +
>> +#define cpu_topo(cpu) per_cpu(cpu_topology, cpu)
>> +
>> +#define topology_physical_package_id(cpu)      (cpu_topo(cpu).socket_id)
>> +#define topology_core_id(cpu)          (cpu_topo(cpu).core_id)
>> +#define topology_core_cpumask(cpu)     (&cpu_topo(cpu).core_sibling)
>> +#define topology_thread_cpumask(cpu)   (&cpu_topo(cpu).thread_sibling)
>> +
>> +#define mc_capable()   (cpu_topo(0).socket_id != -1)
>> +#define smt_capable()  (cpu_topo(0).thread_id != -1)
>> +
>> +void init_cpu_topology(void);
>> +void store_cpu_topology(unsigned int cpuid);
>> +const struct cpumask *cpu_coregroup_mask(int cpu);
>> +
>> +#else
>> +
>> +static inline void init_cpu_topology(void) { }
>> +static inline void store_cpu_topology(unsigned int cpuid) { }
>> +
>> +#endif
>> +
>> +#include <asm-generic/topology.h>
>> +
>> +#endif /* _ASM_ARM64_TOPOLOGY_H */
>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
>> index 7b4b564..a47c359 100644
>> --- a/arch/arm64/kernel/Makefile
>> +++ b/arch/arm64/kernel/Makefile
>> @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP)                       += smp.o smp_spin_table.o smp_psci.o
>>  arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
>>  arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
>>  arm64-obj-$(CONFIG_EARLY_PRINTK)       += early_printk.o
>> +arm64-obj-$(CONFIG_ARM64_CPU_TOPOLOGY)  += topology.o
>>
>>  obj-y                                  += $(arm64-obj-y) vdso/
>>  obj-m                                  += $(arm64-obj-m)
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index fee5cce..197b1da 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -39,6 +39,7 @@
>>  #include <asm/atomic.h>
>>  #include <asm/cacheflush.h>
>>  #include <asm/cputype.h>
>> +#include <asm/topology.h>
>>  #include <asm/mmu_context.h>
>>  #include <asm/pgtable.h>
>>  #include <asm/pgalloc.h>
>> @@ -215,6 +216,8 @@ asmlinkage void secondary_start_kernel(void)
>>         local_irq_enable();
>>         local_fiq_enable();
>>
>> +       store_cpu_topology(cpu);
>> +
>>         /*
>>          * OK, it's off to the idle thread for us
>>          */
>> @@ -387,6 +390,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>>         int cpu, err;
>>         unsigned int ncores = num_possible_cpus();
>>
>> +       init_cpu_topology();
>> +       store_cpu_topology(smp_processor_id());
>> +
>>         /*
>>          * are we trying to boot more cores than exist?
>>          */
>> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>> new file mode 100644
>> index 0000000..1eb0435
>> --- /dev/null
>> +++ b/arch/arm64/kernel/topology.c
>> @@ -0,0 +1,128 @@
>> +/*
>> + * arch/arm64/kernel/topology.c
>> + *
>> + * Copyright (C) 2013 Linaro Limited.
>> + * Written by: Hanjun Guo
>> + *
>> + * based on arch/arm/kernel/topology.c
>> + *
>> + * This file is subject to the terms and conditions of the GNU General Public
>> + * License.  See the file "COPYING" in the main directory of this archive
>> + * for more details.
>> + */
>> +
>> +#include <linux/cpu.h>
>> +#include <linux/cpumask.h>
>> +#include <linux/init.h>
>> +#include <linux/percpu.h>
>> +#include <linux/sched.h>
>> +
>> +#include <asm/cputype.h>
>> +#include <asm/topology.h>
>> +
>> +DEFINE_PER_CPU(struct cputopo_arm64, cpu_topology);
>> +
>> +const struct cpumask *cpu_coregroup_mask(int cpu)
>> +{
>> +       return &cpu_topo(cpu).core_sibling;
>> +}
>> +
>> +void update_siblings_masks(unsigned int cpuid)
>> +{
>> +       struct cputopo_arm64 *topo, *cpuid_topo = &cpu_topo(cpuid);
>> +       int cpu;
>> +
>> +       /* update core and thread sibling masks */
>> +       for_each_possible_cpu(cpu) {
>> +               topo = &cpu_topo(cpu);
>> +
>> +               if (cpuid_topo->socket_id != topo->socket_id)
>> +                       continue;
>> +
>> +               cpumask_set_cpu(cpuid, &topo->core_sibling);
>> +               if (cpu != cpuid)
>> +                       cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
>> +
>> +               if (cpuid_topo->core_id != topo->core_id)
>> +                       continue;
>> +
>> +               cpumask_set_cpu(cpuid, &topo->thread_sibling);
>> +               if (cpu != cpuid)
>> +                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>> +       }
>> +       smp_wmb();
>> +}
>> +
>> +/*
>> + * store_cpu_topology is called at boot when only one cpu is running
>> + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
>> + * which prevents simultaneous write access to cpu_topology array
>> + */
>> +void store_cpu_topology(unsigned int cpuid)
>> +{
>> +       struct cputopo_arm64 *cpuid_topo = &cpu_topo(cpuid);
>> +       u64 mpidr;
>> +
>> +       /* If the cpu topology has been already set, just return */
>> +       if (cpuid_topo->core_id != -1)
>> +               return;
>> +
>> +       mpidr = read_cpuid_mpidr();
>> +
>> +       /* create cpu topology mapping */
>> +       if (!(mpidr & MPIDR_SMP_BITMASK)) {
>> +               /*
>> +                * This is a multiprocessor system
>> +                * multiprocessor format & multiprocessor mode field are set
>> +                */
>> +
>> +               if (mpidr & MPIDR_MT_BITMASK) {
>> +                       /* core performance interdependency */
>> +                       cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
>> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_2(mpidr);
>> +               } else {
>> +                       /* largely independent cores */
>> +                       cpuid_topo->thread_id = -1;
>> +                       cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL_0(mpidr);
>> +                       cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL_1(mpidr);
>> +               }
>> +       } else {
>> +               /*
>> +                * This is an uniprocessor system
>> +                * we are in multiprocessor format but uniprocessor system
>> +                * or in the old uniprocessor format
>> +                */
>> +               cpuid_topo->thread_id = -1;
>> +               cpuid_topo->core_id = 0;
>> +               cpuid_topo->socket_id = -1;
>> +       }
>> +
>> +       update_siblings_masks(cpuid);
>> +
>> +       printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr 0x%llx\n",
>> +               cpuid, cpu_topo(cpuid).thread_id,
>> +               cpu_topo(cpuid).core_id,
>> +               cpu_topo(cpuid).socket_id, mpidr);
>> +}
>> +
>> +/*
>> + * init_cpu_topology is called at boot when only one cpu is running
>> + * which prevent simultaneous write access to cpu_topology array
>> + */
>> +void __init init_cpu_topology(void)
>> +{
>> +       unsigned int cpu;
>> +
>> +       /* init core mask */
>> +       for_each_possible_cpu(cpu) {
>> +               struct cputopo_arm64 *topo = &cpu_topo(cpu);
>> +
>> +               topo->thread_id = -1;
>> +               topo->core_id =  -1;
>> +               topo->socket_id = -1;
>> +               cpumask_clear(&topo->core_sibling);
>> +               cpumask_clear(&topo->thread_sibling);
>> +       }
>> +       smp_wmb();
>> +}
>> --
>> 1.7.9.5
>>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29 13:36       ` Dave Martin
@ 2013-07-30  8:09         ` Hanjun Guo
  -1 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-30  8:09 UTC (permalink / raw)
  To: Dave Martin
  Cc: Will Deacon, Vincent Guittot, linaro-kernel, Graeme Gregory,
	Al Stone, Patch Tracking, Catalin Marinas, linaro-acpi,
	linux-kernel, Tomasz Nowicki, Naresh Bhat, Russell King, LAK

On 2013-7-29 21:36, Dave Martin wrote:
> On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
>> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
>>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>>> Power aware scheduling needs the cpu topology information to improve the
>>>> cpu scheduler decision making.
>>>
>>> It's not only power aware scheduling. The scheduler already uses
>>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>>> arm64 so the scheduler can use it
>>
>> ... except that the architecture doesn't define what the AFF fields in MPIDR
>> really represent. Using them to make key scheduling decisions relating to
> 
> In fact, the ARM Architecture doesn't place any requirements on MPIDRs to
> force the aff fields to exist _at all_.  It's just a recommendation.
> Instead, you have a 24 or 32-bit number which is unique per CPU, and which
> is _probably_ assigned in a way resembling the aff fields.
> 
>> cache proximity seems pretty risky to me, especially given the track record
>> we've seen already on AArch32 silicon. It's a convenient register if it
>> contains the data we want it to contain, but we need to force ourselves to
>> come to terms with reality here and simply use it as an identifier for a
>> CPU.
> 
> +1
> 
> Also, we should align arm and arm64.  The problem is basically exactly
> the same, and the solution needs to be the same.  struct cputopo_arm is
> already being abused  -- for example, TC2 describes the A15 and A7
> clusters on a single die as having different "socket_id" values, even
> though this is obviously nonsense.  But there's no other way to describe
> that system today.
> 
>> Can't we just use the device-tree to represent this topological data for
>> arm64? Lorenzo has been working on bindings in this area.
> 
> This may become more important as we start to see things like asymmetric
> topologies appearing (different numbers of nodes and different
> interdependence characteristics in adjacent branches of the topology
> etc.)

Agreed.
I would like to mention that the ACPI Static Resource Affinity Table (SRAT)
stores topology information for all the processors and memory, describing
the physical locations of the processors and memory in the system. ACPI will
be another available solution for this.

Thanks
Hanjun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-07-30  8:09         ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-07-30  8:09 UTC (permalink / raw)
  To: linux-arm-kernel

On 2013-7-29 21:36, Dave Martin wrote:
> On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
>> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
>>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>>> Power aware scheduling needs the cpu topology information to improve the
>>>> cpu scheduler decision making.
>>>
>>> It's not only power aware scheduling. The scheduler already uses
>>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>>> arm64 so the scheduler can use it
>>
>> ... except that the architecture doesn't define what the AFF fields in MPIDR
>> really represent. Using them to make key scheduling decisions relating to
> 
> In fact, the ARM Architecture doesn't place any requirements on MPIDRs to
> force the aff fields to exist _at all_.  It's just a recommendation.
> Instead, you have a 24 or 32-bit number which is unique per CPU, and which
> is _probably_ assigned in a way resembling the aff fields.
> 
>> cache proximity seems pretty risky to me, especially given the track record
>> we've seen already on AArch32 silicon. It's a convenient register if it
>> contains the data we want it to contain, but we need to force ourselves to
>> come to terms with reality here and simply use it as an identifier for a
>> CPU.
> 
> +1
> 
> Also, we should align arm and arm64.  The problem is basically exactly
> the same, and the solution needs to be the same.  struct cputopo_arm is
> already being abused  -- for example, TC2 describes the A15 and A7
> clusters on a single die as having different "socket_id" values, even
> though this is obviously nonsense.  But there's no other way to describe
> that system today.
> 
>> Can't we just use the device-tree to represent this topological data for
>> arm64? Lorenzo has been working on bindings in this area.
> 
> This may become more important as we start to see things like asymmetric
> topologies appearing (different numbers of nodes and different
> interdependence characteristics in adjacent branches of the topology
> etc.)

Agreed.
I would like to mention that the ACPI Static Resource Affinity Table (SRAT)
stores topology information for all the processors and memory, describing
the physical locations of the processors and memory in the system. ACPI will
be another available solution for this.

Thanks
Hanjun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-07-29  9:54     ` Will Deacon
@ 2013-08-14 11:27       ` Catalin Marinas
  -1 siblings, 0 replies; 30+ messages in thread
From: Catalin Marinas @ 2013-08-14 11:27 UTC (permalink / raw)
  To: Will Deacon
  Cc: Vincent Guittot, Hanjun Guo, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki, Lorenzo Pieralisi

On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> > On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > > Power aware scheduling needs the cpu topology information to improve the
> > > cpu scheduler decision making.
> > 
> > It's not only power aware scheduling. The scheduler already uses
> > topology and cache sharing when  CONFIG_SCHED_MC and/or
> > CONFIG_SCHED_SMT are enable. So you should also add these configs for
> > arm64 so the scheduler can use it
> 
> ... except that the architecture doesn't define what the AFF fields in MPIDR
> really represent. Using them to make key scheduling decisions relating to
> cache proximity seems pretty risky to me, especially given the track record
> we've seen already on AArch32 silicon. It's a convenient register if it
> contains the data we want it to contain, but we need to force ourselves to
> come to terms with reality here and simply use it as an identifier for a
> CPU.
> 
> Can't we just use the device-tree to represent this topological data for
> arm64? Lorenzo has been working on bindings in this area.

Catching up on email after holiday - I agree with Will here, we should
use DT for representing the topology (or ACPI) and not rely on the MPIDR
value.

-- 
Catalin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-08-14 11:27       ` Catalin Marinas
  0 siblings, 0 replies; 30+ messages in thread
From: Catalin Marinas @ 2013-08-14 11:27 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
> > On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> > > Power aware scheduling needs the cpu topology information to improve the
> > > cpu scheduler decision making.
> > 
> > It's not only power aware scheduling. The scheduler already uses
> > topology and cache sharing when  CONFIG_SCHED_MC and/or
> > CONFIG_SCHED_SMT are enable. So you should also add these configs for
> > arm64 so the scheduler can use it
> 
> ... except that the architecture doesn't define what the AFF fields in MPIDR
> really represent. Using them to make key scheduling decisions relating to
> cache proximity seems pretty risky to me, especially given the track record
> we've seen already on AArch32 silicon. It's a convenient register if it
> contains the data we want it to contain, but we need to force ourselves to
> come to terms with reality here and simply use it as an identifier for a
> CPU.
> 
> Can't we just use the device-tree to represent this topological data for
> arm64? Lorenzo has been working on bindings in this area.

Catching up on email after holiday - I agree with Will here, we should
use DT for representing the topology (or ACPI) and not rely on the MPIDR
value.

-- 
Catalin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [RFC][PATCH 1/2] ARM64: add cpu topology definition
  2013-08-14 11:27       ` Catalin Marinas
@ 2013-08-15  1:00         ` Hanjun Guo
  -1 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-08-15  1:00 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Will Deacon, Vincent Guittot, Russell King, LAK, Patch Tracking,
	linaro-kernel, linux-kernel, linaro-acpi, Al Stone,
	Graeme Gregory, Naresh Bhat, Tomasz Nowicki, Lorenzo Pieralisi

On 2013-8-14 19:27, Catalin Marinas wrote:
> On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
>> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
>>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>>> Power aware scheduling needs the cpu topology information to improve the
>>>> cpu scheduler decision making.
>>>
>>> It's not only power aware scheduling. The scheduler already uses
>>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>>> arm64 so the scheduler can use it
>>
>> ... except that the architecture doesn't define what the AFF fields in MPIDR
>> really represent. Using them to make key scheduling decisions relating to
>> cache proximity seems pretty risky to me, especially given the track record
>> we've seen already on AArch32 silicon. It's a convenient register if it
>> contains the data we want it to contain, but we need to force ourselves to
>> come to terms with reality here and simply use it as an identifier for a
>> CPU.
>>
>> Can't we just use the device-tree to represent this topological data for
>> arm64? Lorenzo has been working on bindings in this area.
> 
> Catching up on email after holiday - I agree with Will here, we should
> use DT for representing the topology (or ACPI) and not rely on the MPIDR
> value.
> 

Ok, I'm working on the ACPI part now, Thanks for your comments.

Regards
Hanjun

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [RFC][PATCH 1/2] ARM64: add cpu topology definition
@ 2013-08-15  1:00         ` Hanjun Guo
  0 siblings, 0 replies; 30+ messages in thread
From: Hanjun Guo @ 2013-08-15  1:00 UTC (permalink / raw)
  To: linux-arm-kernel

On 2013-8-14 19:27, Catalin Marinas wrote:
> On Mon, Jul 29, 2013 at 10:54:01AM +0100, Will Deacon wrote:
>> On Mon, Jul 29, 2013 at 10:46:06AM +0100, Vincent Guittot wrote:
>>> On 27 July 2013 12:42, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>>> Power aware scheduling needs the cpu topology information to improve the
>>>> cpu scheduler decision making.
>>>
>>> It's not only power aware scheduling. The scheduler already uses
>>> topology and cache sharing when  CONFIG_SCHED_MC and/or
>>> CONFIG_SCHED_SMT are enable. So you should also add these configs for
>>> arm64 so the scheduler can use it
>>
>> ... except that the architecture doesn't define what the AFF fields in MPIDR
>> really represent. Using them to make key scheduling decisions relating to
>> cache proximity seems pretty risky to me, especially given the track record
>> we've seen already on AArch32 silicon. It's a convenient register if it
>> contains the data we want it to contain, but we need to force ourselves to
>> come to terms with reality here and simply use it as an identifier for a
>> CPU.
>>
>> Can't we just use the device-tree to represent this topological data for
>> arm64? Lorenzo has been working on bindings in this area.
> 
> Catching up on email after holiday - I agree with Will here, we should
> use DT for representing the topology (or ACPI) and not rely on the MPIDR
> value.
> 

Ok, I'm working on the ACPI part now, Thanks for your comments.

Regards
Hanjun

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2013-08-15  1:02 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-27 10:42 [RFC][PATCH 1/2] ARM64: add cpu topology definition Hanjun Guo
2013-07-27 10:42 ` Hanjun Guo
2013-07-27 10:42 ` [RFC][PATCH 2/2] ARM64: introduce cluster id and make a difference between socket id Hanjun Guo
2013-07-27 10:42   ` Hanjun Guo
2013-07-29  9:38   ` Vincent Guittot
2013-07-29  9:38     ` Vincent Guittot
2013-07-30  7:46     ` Hanjun Guo
2013-07-30  7:46       ` Hanjun Guo
2013-07-29  9:46 ` [RFC][PATCH 1/2] ARM64: add cpu topology definition Vincent Guittot
2013-07-29  9:46   ` Vincent Guittot
2013-07-29  9:54   ` Will Deacon
2013-07-29  9:54     ` Will Deacon
2013-07-29 10:39     ` Vincent Guittot
2013-07-29 10:39       ` Vincent Guittot
2013-07-29 13:36     ` Dave Martin
2013-07-29 13:36       ` Dave Martin
2013-07-29 17:23       ` Lorenzo Pieralisi
2013-07-29 17:23         ` Lorenzo Pieralisi
2013-07-30  8:09       ` Hanjun Guo
2013-07-30  8:09         ` Hanjun Guo
2013-08-14 11:27     ` Catalin Marinas
2013-08-14 11:27       ` Catalin Marinas
2013-08-15  1:00       ` Hanjun Guo
2013-08-15  1:00         ` Hanjun Guo
2013-07-29 10:15   ` Sudeep KarkadaNagesha
2013-07-29 10:15     ` Sudeep KarkadaNagesha
2013-07-29 10:28     ` Vincent Guittot
2013-07-29 10:28       ` Vincent Guittot
2013-07-30  7:49   ` Hanjun Guo
2013-07-30  7:49     ` Hanjun Guo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.