All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qing Wang <wangqing@vivo.com>
To: Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Sudeep Holla <sudeep.holla@arm.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	"Rafael J. Wysocki" <rafael@kernel.org>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
Cc: Wang Qing <wangqing@vivo.com>
Subject: [PATCH 2/2] arm64: Add complex scheduler level for arm64
Date: Thu, 21 Apr 2022 07:55:58 -0700	[thread overview]
Message-ID: <1650552960-60165-3-git-send-email-wangqing@vivo.com> (raw)
In-Reply-To: <1650552960-60165-1-git-send-email-wangqing@vivo.com>

From: Wang Qing <wangqing@vivo.com>

The DSU-110 DynamIQ™ cluster supports blocks that are called complexes
which contain up to two cores of the same type and some shared logic.
Sharing some logic between the cores can make a complex area efficient.

This patch adds complex level for complexs and automatically enables
the load balance among complexs. It will directly benefit a lot of
workload which loves more resources such as memory bandwidth, caches.

Testing has been done in qcom sm8450 with Stream benchmark:
8threads stream (2 little cores * 2(complex) + 3 middle cores + 1 big core)
                stream                 stream
                w/o patch              w/ patch
MB/sec copy     37579.2 (   0.00%)    39127.3 (   4.12%)
MB/sec scale    38261.1 (   0.00%)    39195.4 (   2.44%)
MB/sec add      39497.0 (   0.00%)    41101.5 (   4.06%)
MB/sec triad    39885.6 (   0.00%)    40772.7 (   2.22%)

Signed-off-by: Wang Qing <wangqing@vivo.com>
---
 arch/arm64/Kconfig      | 13 +++++++++++
 arch/arm64/kernel/smp.c | 48 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index edbe035cb0e3..4063de8c6153 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1207,6 +1207,19 @@ config SCHED_CLUSTER
 	  by sharing mid-level caches, last-level cache tags or internal
 	  busses.
 
+config SCHED_COMPLEX
+	bool "Complex scheduler support"
+	help
+	  DSU supports blocks that are called complexes which contain up to
+	  two cores of the same type and some shared logic. Sharing some logic
+	  between the cores can make a complex area efficient.
+
+	  Complex also can be considered as a shared cache group smaller
+	  than cluster.
+
+	  Complex scheduler support improves the CPU scheduler's decision
+	  making when dealing with machines that have complexs of CPUs.
+
 config SCHED_SMT
 	bool "SMT scheduler support"
 	help
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b46041f2b97..526765112146 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -14,6 +14,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/topology.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
@@ -57,6 +58,10 @@
 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
 EXPORT_PER_CPU_SYMBOL(cpu_number);
 
+#ifdef SCHED_COMPLEX
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_complex_map);
+#endif
+
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
@@ -715,6 +720,47 @@ void __init smp_init_cpus(void)
 	}
 }
 
+#ifdef SCHED_COMPLEX
+static int arm64_complex_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+
+const struct cpumask *arm64_complex_mask(int cpu)
+{
+	const struct cpumask *core_mask = cpu_cpu_mask(cpu);
+
+	/* Find the smaller shared cache level than clustergroup and coregroup*/
+#ifdef CONFIG_SCHED_MC
+	core_mask = cpu_coregroup_mask(cpu);
+#endif
+#ifdef CONFIG_SCHED_CLUSTER
+	core_mask = cpu_clustergroup_mask(cpu);
+#endif
+
+	find_max_sub_sc(core_mask, cpu, &per_cpu(cpu_complex_map, cpu));
+
+	return &per_cpu(cpu_complex_map, cpu);
+}
+#endif
+
+static struct sched_domain_topology_level arm64_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_COMPLEX
+	{ arm64_complex_mask, arm64_complex_flags, SD_INIT_NAME(CPL) },
+#endif
+#ifdef CONFIG_SCHED_CLUSTER
+	{ cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	const struct cpu_operations *ops;
@@ -723,9 +769,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	unsigned int this_cpu;
 
 	init_cpu_topology();
-
 	this_cpu = smp_processor_id();
 	store_cpu_topology(this_cpu);
+	set_sched_topology(arm64_topology);
 	numa_store_cpu_info(this_cpu);
 	numa_add_cpu(this_cpu);
 
-- 
2.27.0.windows.1


WARNING: multiple messages have this Message-ID (diff)
From: Qing Wang <wangqing@vivo.com>
To: Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Sudeep Holla <sudeep.holla@arm.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	"Rafael J. Wysocki" <rafael@kernel.org>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
Cc: Wang Qing <wangqing@vivo.com>
Subject: [PATCH 2/2] arm64: Add complex scheduler level for arm64
Date: Thu, 21 Apr 2022 07:55:58 -0700	[thread overview]
Message-ID: <1650552960-60165-3-git-send-email-wangqing@vivo.com> (raw)
In-Reply-To: <1650552960-60165-1-git-send-email-wangqing@vivo.com>

From: Wang Qing <wangqing@vivo.com>

The DSU-110 DynamIQ™ cluster supports blocks that are called complexes
which contain up to two cores of the same type and some shared logic.
Sharing some logic between the cores can make a complex area efficient.

This patch adds complex level for complexs and automatically enables
the load balance among complexs. It will directly benefit a lot of
workload which loves more resources such as memory bandwidth, caches.

Testing has been done in qcom sm8450 with Stream benchmark:
8threads stream (2 little cores * 2(complex) + 3 middle cores + 1 big core)
                stream                 stream
                w/o patch              w/ patch
MB/sec copy     37579.2 (   0.00%)    39127.3 (   4.12%)
MB/sec scale    38261.1 (   0.00%)    39195.4 (   2.44%)
MB/sec add      39497.0 (   0.00%)    41101.5 (   4.06%)
MB/sec triad    39885.6 (   0.00%)    40772.7 (   2.22%)

Signed-off-by: Wang Qing <wangqing@vivo.com>
---
 arch/arm64/Kconfig      | 13 +++++++++++
 arch/arm64/kernel/smp.c | 48 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index edbe035cb0e3..4063de8c6153 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1207,6 +1207,19 @@ config SCHED_CLUSTER
 	  by sharing mid-level caches, last-level cache tags or internal
 	  busses.
 
+config SCHED_COMPLEX
+	bool "Complex scheduler support"
+	help
+	  DSU supports blocks that are called complexes which contain up to
+	  two cores of the same type and some shared logic. Sharing some logic
+	  between the cores can make a complex area efficient.
+
+	  Complex also can be considered as a shared cache group smaller
+	  than cluster.
+
+	  Complex scheduler support improves the CPU scheduler's decision
+	  making when dealing with machines that have complexs of CPUs.
+
 config SCHED_SMT
 	bool "SMT scheduler support"
 	help
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b46041f2b97..526765112146 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -14,6 +14,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/topology.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
@@ -57,6 +58,10 @@
 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
 EXPORT_PER_CPU_SYMBOL(cpu_number);
 
+#ifdef SCHED_COMPLEX
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_complex_map);
+#endif
+
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
@@ -715,6 +720,47 @@ void __init smp_init_cpus(void)
 	}
 }
 
+#ifdef SCHED_COMPLEX
+static int arm64_complex_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+
+const struct cpumask *arm64_complex_mask(int cpu)
+{
+	const struct cpumask *core_mask = cpu_cpu_mask(cpu);
+
+	/* Find the smaller shared cache level than clustergroup and coregroup*/
+#ifdef CONFIG_SCHED_MC
+	core_mask = cpu_coregroup_mask(cpu);
+#endif
+#ifdef CONFIG_SCHED_CLUSTER
+	core_mask = cpu_clustergroup_mask(cpu);
+#endif
+
+	find_max_sub_sc(core_mask, cpu, &per_cpu(cpu_complex_map, cpu));
+
+	return &per_cpu(cpu_complex_map, cpu);
+}
+#endif
+
+static struct sched_domain_topology_level arm64_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_COMPLEX
+	{ arm64_complex_mask, arm64_complex_flags, SD_INIT_NAME(CPL) },
+#endif
+#ifdef CONFIG_SCHED_CLUSTER
+	{ cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	const struct cpu_operations *ops;
@@ -723,9 +769,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	unsigned int this_cpu;
 
 	init_cpu_topology();
-
 	this_cpu = smp_processor_id();
 	store_cpu_topology(this_cpu);
+	set_sched_topology(arm64_topology);
 	numa_store_cpu_info(this_cpu);
 	numa_add_cpu(this_cpu);
 
-- 
2.27.0.windows.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2022-04-21 14:56 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-21 14:55 [PATCH 0/2] Add complex scheduler level for arm64 Qing Wang
2022-04-21 14:55 ` Qing Wang
2022-04-21 14:55 ` [PATCH 1/2] arch_topology: support for describing cache topology from DT Qing Wang
2022-04-21 14:55   ` Qing Wang
2022-04-21 15:47   ` Greg Kroah-Hartman
2022-04-21 15:47     ` Greg Kroah-Hartman
2022-04-22  2:30   ` kernel test robot
2022-04-22  2:30     ` kernel test robot
2022-04-22  9:22   ` Sudeep Holla
2022-04-22  9:22     ` Sudeep Holla
2022-04-22  9:48     ` 王擎
2022-04-22  9:48       ` 王擎
2022-04-22  9:27   ` kernel test robot
2022-04-22  9:27     ` kernel test robot
2022-04-21 14:55 ` Qing Wang [this message]
2022-04-21 14:55   ` [PATCH 2/2] arm64: Add complex scheduler level for arm64 Qing Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1650552960-60165-3-git-send-email-wangqing@vivo.com \
    --to=wangqing@vivo.com \
    --cc=catalin.marinas@arm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rafael@kernel.org \
    --cc=sudeep.holla@arm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.