All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves
@ 2023-03-14 23:16 Ricardo Neri
  2023-03-20 16:44 ` Dave Hansen
  0 siblings, 1 reply; 5+ messages in thread
From: Ricardo Neri @ 2023-03-14 23:16 UTC (permalink / raw)
  To: x86
  Cc: Ricardo Neri, linux-kernel, Ricardo Neri, Srinivas Pandruvada,
	Len Brown, Rafael J. Wysocki, Zhang Rui, Chen Yu, stable

Make num_cache_leaves a per-CPU variable. Otherwise, populate_cache_
leaves() fails on systems with asymmetric number of subleaves in CPUID
leaf 0x4. Intel Meteor Lake is an example of such a system.

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Chen Yu <yu.c.chen@intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
After this change, all CPUs will traverse CPUID leaf 0x4 when booted for
the first time. On systems with asymmetric cache topologies this is
useless work.

Creating a list of processor models that have asymmetric cache topologies
was considered. The burden of maintaining such list would outweigh the
performance benefit of skipping this extra step.
---
 arch/x86/kernel/cpu/cacheinfo.c | 48 ++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 4063e8991211..6ad51657c853 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -176,7 +176,18 @@ struct _cpuid4_info_regs {
 	struct amd_northbridge *nb;
 };
 
-static unsigned short num_cache_leaves;
+static DEFINE_PER_CPU(unsigned short, num_cache_leaves);
+
+static inline unsigned short get_num_cache_leaves(unsigned int cpu)
+{
+	return per_cpu(num_cache_leaves, cpu);
+}
+
+static inline void
+set_num_cache_leaves(unsigned short nr_leaves, unsigned int cpu)
+{
+	per_cpu(num_cache_leaves, cpu) = nr_leaves;
+}
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
    information to the user.  This makes some assumptions about the machine:
@@ -716,19 +727,21 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 {
 
+	unsigned int cpu = c->cpu_index;
+
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		num_cache_leaves = find_num_cache_leaves(c);
+		set_num_cache_leaves(find_num_cache_leaves(c), cpu);
 	} else if (c->extended_cpuid_level >= 0x80000006) {
 		if (cpuid_edx(0x80000006) & 0xf000)
-			num_cache_leaves = 4;
+			set_num_cache_leaves(4, cpu);
 		else
-			num_cache_leaves = 3;
+			set_num_cache_leaves(3, cpu);
 	}
 }
 
 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 {
-	num_cache_leaves = find_num_cache_leaves(c);
+	set_num_cache_leaves(find_num_cache_leaves(c), c->cpu_index);
 }
 
 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -738,24 +751,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_SMP
 	unsigned int cpu = c->cpu_index;
-#endif
 
 	if (c->cpuid_level > 3) {
-		static int is_initialized;
-
-		if (is_initialized == 0) {
-			/* Init num_cache_leaves from boot CPU */
-			num_cache_leaves = find_num_cache_leaves(c);
-			is_initialized++;
-		}
+		/*
+		 * There should be at least one leaf. A non-zero value means
+		 * that the number of leaves has been initialized.
+		 */
+		if (!get_num_cache_leaves(cpu))
+			set_num_cache_leaves(find_num_cache_leaves(c), cpu);
 
 		/*
 		 * Whenever possible use cpuid(4), deterministic cache
 		 * parameters cpuid leaf to find the cache details
 		 */
-		for (i = 0; i < num_cache_leaves; i++) {
+		for (i = 0; i < get_num_cache_leaves(cpu); i++) {
 			struct _cpuid4_info_regs this_leaf = {};
 			int retval;
 
@@ -791,14 +801,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 	 * trace cache
 	 */
-	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+	if ((!get_num_cache_leaves(cpu) || c->x86 == 15) && c->cpuid_level > 1) {
 		/* supports eax=2  call */
 		int j, n;
 		unsigned int regs[4];
 		unsigned char *dp = (unsigned char *)regs;
 		int only_trace = 0;
 
-		if (num_cache_leaves != 0 && c->x86 == 15)
+		if (get_num_cache_leaves(cpu) && c->x86 == 15)
 			only_trace = 1;
 
 		/* Number of times to iterate */
@@ -1000,12 +1010,12 @@ int init_cache_level(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
-	if (!num_cache_leaves)
+	if (!get_num_cache_leaves(cpu))
 		return -ENOENT;
 	if (!this_cpu_ci)
 		return -EINVAL;
 	this_cpu_ci->num_levels = 3;
-	this_cpu_ci->num_leaves = num_cache_leaves;
+	this_cpu_ci->num_leaves = get_num_cache_leaves(cpu);
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves
  2023-03-14 23:16 [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves Ricardo Neri
@ 2023-03-20 16:44 ` Dave Hansen
  2023-03-24  1:07   ` Ricardo Neri
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2023-03-20 16:44 UTC (permalink / raw)
  To: Ricardo Neri, x86
  Cc: Ricardo Neri, linux-kernel, Srinivas Pandruvada, Len Brown,
	Rafael J. Wysocki, Zhang Rui, Chen Yu, stable

On 3/14/23 16:16, Ricardo Neri wrote:
> -static unsigned short num_cache_leaves;
> +static DEFINE_PER_CPU(unsigned short, num_cache_leaves);
> +
> +static inline unsigned short get_num_cache_leaves(unsigned int cpu)
> +{
> +	return per_cpu(num_cache_leaves, cpu);
> +}

I know it's in generic code, but we do already have this:

static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);

which has a num_leaves in it:

struct cpu_cacheinfo {
        struct cacheinfo *info_list;
        unsigned int num_levels;
        unsigned int num_leaves;
        bool cpu_map_populated;
};

That's currently _populated_ from the arch code that you are modifying.
Do we really need this data stored identically in two different per-cpu
locations?

I'd also love to hear some more background on "Intel Meteor Lake" and
_why_ it has an asymmetric cache topology.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves
  2023-03-20 16:44 ` Dave Hansen
@ 2023-03-24  1:07   ` Ricardo Neri
  0 siblings, 0 replies; 5+ messages in thread
From: Ricardo Neri @ 2023-03-24  1:07 UTC (permalink / raw)
  To: Dave Hansen
  Cc: x86, Ricardo Neri, linux-kernel, Srinivas Pandruvada, Len Brown,
	Rafael J. Wysocki, Zhang Rui, Chen Yu, stable

On Mon, Mar 20, 2023 at 09:44:04AM -0700, Dave Hansen wrote:
> On 3/14/23 16:16, Ricardo Neri wrote:
> > -static unsigned short num_cache_leaves;
> > +static DEFINE_PER_CPU(unsigned short, num_cache_leaves);
> > +
> > +static inline unsigned short get_num_cache_leaves(unsigned int cpu)
> > +{
> > +	return per_cpu(num_cache_leaves, cpu);
> > +}

Thank you very much for your feedback Dave!

> 
> I know it's in generic code, but we do already have this:
> 
> static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
> 
> which has a num_leaves in it:
> 
> struct cpu_cacheinfo {
>         struct cacheinfo *info_list;
>         unsigned int num_levels;
>         unsigned int num_leaves;
>         bool cpu_map_populated;
> };
> 
> That's currently _populated_ from the arch code that you are modifying.
> Do we really need this data stored identically in two different per-cpu
> locations?

That is a good observation. As you state, the ci_cpu_cacheinfo is already
initialized in the arch code. I can certainly modify the patch to make use
of it instead adding a new per-CPU variable.

> 
> I'd also love to hear some more background on "Intel Meteor Lake" and
> _why_ it has an asymmetric cache topology.

Meteor Lake has cores in more than one die. The cache to which these cores
are connected is different in each die. This is reflected in CPUID leaf 4.

Thanks and BR,
Ricardo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves
  2023-03-14 23:15 Ricardo Neri
@ 2023-03-14 23:23 ` Ricardo Neri
  0 siblings, 0 replies; 5+ messages in thread
From: Ricardo Neri @ 2023-03-14 23:23 UTC (permalink / raw)
  To: x86
  Cc: Ricardo Neri, Srinivas Pandruvada, Len Brown, Rafael J. Wysocki,
	Zhang Rui, Chen Yu, stable

On Tue, Mar 14, 2023 at 04:15:19PM -0700, Ricardo Neri wrote:
> Make num_cache_leaves a per-CPU variable. Otherwise, populate_cache_
> leaves() fails on systems with asymmetric number of subleaves in CPUID
> leaf 0x4. Intel Meteor Lake is an example of such a system.

Please disregard this patch sent privately by mistake. I resent the same
patch publicly. I apologize for the noise.

Thanks and BR,
Ricardo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves
@ 2023-03-14 23:15 Ricardo Neri
  2023-03-14 23:23 ` Ricardo Neri
  0 siblings, 1 reply; 5+ messages in thread
From: Ricardo Neri @ 2023-03-14 23:15 UTC (permalink / raw)
  To: x86
  Cc: Ricardo Neri, Ricardo Neri, Srinivas Pandruvada, Len Brown,
	Rafael J. Wysocki, Zhang Rui, Chen Yu, stable

Make num_cache_leaves a per-CPU variable. Otherwise, populate_cache_
leaves() fails on systems with asymmetric number of subleaves in CPUID
leaf 0x4. Intel Meteor Lake is an example of such a system.

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Chen Yu <yu.c.chen@intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
After this change, all CPUs will traverse CPUID leaf 0x4 when booted for
the first time. On systems with asymmetric cache topologies this is
useless work.

Creating a list of processor models that have asymmetric cache topologies
was considered. The burden of maintaining such list would outweigh the
performance benefit of skipping this extra step.
---
 arch/x86/kernel/cpu/cacheinfo.c | 48 ++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 4063e8991211..6ad51657c853 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -176,7 +176,18 @@ struct _cpuid4_info_regs {
 	struct amd_northbridge *nb;
 };
 
-static unsigned short num_cache_leaves;
+static DEFINE_PER_CPU(unsigned short, num_cache_leaves);
+
+static inline unsigned short get_num_cache_leaves(unsigned int cpu)
+{
+	return per_cpu(num_cache_leaves, cpu);
+}
+
+static inline void
+set_num_cache_leaves(unsigned short nr_leaves, unsigned int cpu)
+{
+	per_cpu(num_cache_leaves, cpu) = nr_leaves;
+}
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
    information to the user.  This makes some assumptions about the machine:
@@ -716,19 +727,21 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 {
 
+	unsigned int cpu = c->cpu_index;
+
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		num_cache_leaves = find_num_cache_leaves(c);
+		set_num_cache_leaves(find_num_cache_leaves(c), cpu);
 	} else if (c->extended_cpuid_level >= 0x80000006) {
 		if (cpuid_edx(0x80000006) & 0xf000)
-			num_cache_leaves = 4;
+			set_num_cache_leaves(4, cpu);
 		else
-			num_cache_leaves = 3;
+			set_num_cache_leaves(3, cpu);
 	}
 }
 
 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 {
-	num_cache_leaves = find_num_cache_leaves(c);
+	set_num_cache_leaves(find_num_cache_leaves(c), c->cpu_index);
 }
 
 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -738,24 +751,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_SMP
 	unsigned int cpu = c->cpu_index;
-#endif
 
 	if (c->cpuid_level > 3) {
-		static int is_initialized;
-
-		if (is_initialized == 0) {
-			/* Init num_cache_leaves from boot CPU */
-			num_cache_leaves = find_num_cache_leaves(c);
-			is_initialized++;
-		}
+		/*
+		 * There should be at least one leaf. A non-zero value means
+		 * that the number of leaves has been initialized.
+		 */
+		if (!get_num_cache_leaves(cpu))
+			set_num_cache_leaves(find_num_cache_leaves(c), cpu);
 
 		/*
 		 * Whenever possible use cpuid(4), deterministic cache
 		 * parameters cpuid leaf to find the cache details
 		 */
-		for (i = 0; i < num_cache_leaves; i++) {
+		for (i = 0; i < get_num_cache_leaves(cpu); i++) {
 			struct _cpuid4_info_regs this_leaf = {};
 			int retval;
 
@@ -791,14 +801,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 	 * trace cache
 	 */
-	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+	if ((!get_num_cache_leaves(cpu) || c->x86 == 15) && c->cpuid_level > 1) {
 		/* supports eax=2  call */
 		int j, n;
 		unsigned int regs[4];
 		unsigned char *dp = (unsigned char *)regs;
 		int only_trace = 0;
 
-		if (num_cache_leaves != 0 && c->x86 == 15)
+		if (get_num_cache_leaves(cpu) && c->x86 == 15)
 			only_trace = 1;
 
 		/* Number of times to iterate */
@@ -1000,12 +1010,12 @@ int init_cache_level(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
-	if (!num_cache_leaves)
+	if (!get_num_cache_leaves(cpu))
 		return -ENOENT;
 	if (!this_cpu_ci)
 		return -EINVAL;
 	this_cpu_ci->num_levels = 3;
-	this_cpu_ci->num_leaves = num_cache_leaves;
+	this_cpu_ci->num_leaves = get_num_cache_leaves(cpu);
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-03-24  0:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-14 23:16 [PATCH] x86/cacheinfo: Define per-CPU num_cache_leaves Ricardo Neri
2023-03-20 16:44 ` Dave Hansen
2023-03-24  1:07   ` Ricardo Neri
  -- strict thread matches above, loose matches on Subject: below --
2023-03-14 23:15 Ricardo Neri
2023-03-14 23:23 ` Ricardo Neri

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.