linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] arch_topology: support parsing cluster_id from DT
@ 2022-05-05  8:35 Qing Wang
  2022-05-10 17:21 ` Dietmar Eggemann
  0 siblings, 1 reply; 5+ messages in thread
From: Qing Wang @ 2022-05-05  8:35 UTC (permalink / raw)
  To: Sudeep Holla, Greg Kroah-Hartman, Rafael J. Wysocki, linux-kernel
  Cc: Wang Qing

From: Wang Qing <wangqing@vivo.com>

Use nested cluster structures in DT to support describing multi-level
cluster topologies.

Notice: the clusters describing in DT currently are not physical
boundaries, since changing "cluster" to "socket" is too involved and error
prone, this patch will not have any effect on one-level cluster topo, but
can support the mutil-level cluster topo to support CLUSTER_SCHED.

Signed-off-by: Wang Qing <wangqing@vivo.com>
---
 drivers/base/arch_topology.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 1d6636ebaac5..f2ea8113d619 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -491,7 +491,7 @@ static int __init get_cpu_for_node(struct device_node *node)
 }
 
 static int __init parse_core(struct device_node *core, int package_id,
-			     int core_id)
+			     int cluster_id, int core_id)
 {
 	char name[20];
 	bool leaf = true;
@@ -507,6 +507,7 @@ static int __init parse_core(struct device_node *core, int package_id,
 			cpu = get_cpu_for_node(t);
 			if (cpu >= 0) {
 				cpu_topology[cpu].package_id = package_id;
+				cpu_topology[cpu].cluster_id = cluster_id;
 				cpu_topology[cpu].core_id = core_id;
 				cpu_topology[cpu].thread_id = i;
 			} else if (cpu != -ENODEV) {
@@ -528,6 +529,7 @@ static int __init parse_core(struct device_node *core, int package_id,
 		}
 
 		cpu_topology[cpu].package_id = package_id;
+		cpu_topology[cpu].cluster_id = cluster_id;
 		cpu_topology[cpu].core_id = core_id;
 	} else if (leaf && cpu != -ENODEV) {
 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -544,13 +546,15 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 	bool has_cores = false;
 	struct device_node *c;
 	static int package_id __initdata;
+	static int cluster_id __initdata;
 	int core_id = 0;
 	int i, ret;
 
 	/*
-	 * First check for child clusters; we currently ignore any
-	 * information about the nesting of clusters and present the
-	 * scheduler with a flat list of them.
+	 * nesting of clusters :
+	 * level 1:  package_id
+	 * level 2:  cluster_id
+	 * level 3+: ignore
 	 */
 	i = 0;
 	do {
@@ -559,6 +563,14 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 		if (c) {
 			leaf = false;
 			ret = parse_cluster(c, depth + 1);
+			if (depth == 0) {
+				package_id++;
+				cluster_id = 0;
+			} else if (depth == 1)
+				cluster_id++;
+			else
+				pr_err("Ignore nested clusters with more than two levels!\n");
+
 			of_node_put(c);
 			if (ret != 0)
 				return ret;
@@ -582,7 +594,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 			}
 
 			if (leaf) {
-				ret = parse_core(c, package_id, core_id++);
+				ret = parse_core(c, package_id, cluster_id, core_id++);
 			} else {
 				pr_err("%pOF: Non-leaf cluster with core %s\n",
 				       cluster, name);
@@ -599,9 +611,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 	if (leaf && !has_cores)
 		pr_warn("%pOF: empty cluster\n", cluster);
 
-	if (leaf)
-		package_id++;
-
 	return 0;
 }
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] arch_topology: support parsing cluster_id from DT
  2022-05-05  8:35 [PATCH] arch_topology: support parsing cluster_id from DT Qing Wang
@ 2022-05-10 17:21 ` Dietmar Eggemann
  2022-05-11  9:38   ` 王擎
  0 siblings, 1 reply; 5+ messages in thread
From: Dietmar Eggemann @ 2022-05-10 17:21 UTC (permalink / raw)
  To: Qing Wang, Sudeep Holla, Greg Kroah-Hartman, Rafael J. Wysocki,
	linux-kernel

On 05/05/2022 10:35, Qing Wang wrote:
> From: Wang Qing <wangqing@vivo.com>
> 
> Use nested cluster structures in DT to support describing multi-level
> cluster topologies.
> 
> Notice: the clusters describing in DT currently are not physical
> boundaries, since changing "cluster" to "socket" is too involved and error
> prone, this patch will not have any effect on one-level cluster topo, but
> can support the mutil-level cluster topo to support CLUSTER_SCHED.
> 
> Signed-off-by: Wang Qing <wangqing@vivo.com>
> ---
>  drivers/base/arch_topology.c | 25 +++++++++++++++++--------
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 1d6636ebaac5..f2ea8113d619 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -491,7 +491,7 @@ static int __init get_cpu_for_node(struct device_node *node)
>  }
>  
>  static int __init parse_core(struct device_node *core, int package_id,
> -			     int core_id)
> +			     int cluster_id, int core_id)
>  {
>  	char name[20];
>  	bool leaf = true;
> @@ -507,6 +507,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>  			cpu = get_cpu_for_node(t);
>  			if (cpu >= 0) {
>  				cpu_topology[cpu].package_id = package_id;
> +				cpu_topology[cpu].cluster_id = cluster_id;
>  				cpu_topology[cpu].core_id = core_id;
>  				cpu_topology[cpu].thread_id = i;
>  			} else if (cpu != -ENODEV) {
> @@ -528,6 +529,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>  		}
>  
>  		cpu_topology[cpu].package_id = package_id;
> +		cpu_topology[cpu].cluster_id = cluster_id;
>  		cpu_topology[cpu].core_id = core_id;
>  	} else if (leaf && cpu != -ENODEV) {
>  		pr_err("%pOF: Can't get CPU for leaf core\n", core);
> @@ -544,13 +546,15 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  	bool has_cores = false;
>  	struct device_node *c;
>  	static int package_id __initdata;
> +	static int cluster_id __initdata;

Starting with cluster_id = 0 breaks existing platforms like
./arch/arm64/boot/dts/arm/juno.dts. For them it has to be still set to -1.

You get e.g.:

# cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
CLS
DIE

instead of:

# cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
MC
DIE


>  	int core_id = 0;
>  	int i, ret;
>  
>  	/*
> -	 * First check for child clusters; we currently ignore any
> -	 * information about the nesting of clusters and present the
> -	 * scheduler with a flat list of them.
> +	 * nesting of clusters :
> +	 * level 1:  package_id
> +	 * level 2:  cluster_id
> +	 * level 3+: ignore
>  	 */
>  	i = 0;
>  	do {
> @@ -559,6 +563,14 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  		if (c) {
>  			leaf = false;
>  			ret = parse_cluster(c, depth + 1);
> +			if (depth == 0) {
> +				package_id++;
> +				cluster_id = 0;

-                               cluster_id = 0;
+                               cluster_id = -1;

Would have to be cluster_id = -1. 0 is a valid 2. level cluster.
Otherwise you're not removing the CLS data from CPU4 to CPU7 in the
`Armv9 with L2 complexes` cpu-map example I used for testing:

                cpu-map {
                        cluster0 {
                                cluster0 {
                                        core0 {
                                                cpu = <&cpu0>;
                                        };
                                        core1 {
                                                cpu = <&cpu1>;
                                        };
                                };
                                cluster1 {
                                        core0 {
                                                cpu = <&cpu2>;
                                        };
                                        core1 {
                                                cpu = <&cpu3>;
                                        };
                                };
                        };
                        cluster1 {
                                core0 {
                                        cpu = <&cpu4>;
                                };
                                core1 {
                                        cpu = <&cpu5>;
                                };
                                core2 {
                                        cpu = <&cpu6>;
                                };
                        };
                        cluster2 {
                                core0 {
                                        cpu = <&cpu7>;
                                };
                        };
                };

> +			} else if (depth == 1)
> +				cluster_id++;
> +			else
> +				pr_err("Ignore nested clusters with more than two levels!\n");
> +
>  			of_node_put(c);
>  			if (ret != 0)
>  				return ret;
> @@ -582,7 +594,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  			}
>  
>  			if (leaf) {
> -				ret = parse_core(c, package_id, core_id++);
> +				ret = parse_core(c, package_id, cluster_id, core_id++);
>  			} else {
>  				pr_err("%pOF: Non-leaf cluster with core %s\n",
>  				       cluster, name);
> @@ -599,9 +611,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>  	if (leaf && !has_cores)
>  		pr_warn("%pOF: empty cluster\n", cluster);
>  
> -	if (leaf)
> -		package_id++;
> -
>  	return 0;
>  }

Looks like you also need to adapt update_siblings_masks() to only set
cpu in &cpu_topo->thread_sibling and &cpuid_topo->thread_sibling when
`cpu_topo->thread_id != -1`.

@@ -723,11 +723,11 @@ void update_siblings_masks(unsigned int cpuid)
                cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
                cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);

-               if (cpuid_topo->core_id != cpu_topo->core_id)
-                       continue;
-
-               cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
-               cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+               if (cpu_topo->thread_id != -1 &&
+                   cpuid_topo->core_id == cpu_topo->core_id) {
+                       cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] arch_topology: support parsing cluster_id from DT
  2022-05-10 17:21 ` Dietmar Eggemann
@ 2022-05-11  9:38   ` 王擎
  2022-05-12  9:52     ` Dietmar Eggemann
  0 siblings, 1 reply; 5+ messages in thread
From: 王擎 @ 2022-05-11  9:38 UTC (permalink / raw)
  To: Dietmar Eggemann, Sudeep Holla, Greg Kroah-Hartman,
	Rafael J. Wysocki, linux-kernel


>> From: Wang Qing <wangqing@vivo.com>
>> 
>> Use nested cluster structures in DT to support describing multi-level
>> cluster topologies.
>> 
>> Notice: the clusters describing in DT currently are not physical
>> boundaries, since changing "cluster" to "socket" is too involved and error
>> prone, this patch will not have any effect on one-level cluster topo, but
>> can support the mutil-level cluster topo to support CLUSTER_SCHED.
>> 
>> Signed-off-by: Wang Qing <wangqing@vivo.com>
>> ---
>>  drivers/base/arch_topology.c | 25 +++++++++++++++++--------
>>  1 file changed, 17 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
>> index 1d6636ebaac5..f2ea8113d619 100644
>> --- a/drivers/base/arch_topology.c
>> +++ b/drivers/base/arch_topology.c
>> @@ -491,7 +491,7 @@ static int __init get_cpu_for_node(struct device_node *node)
>>  }
>>  
>>  static int __init parse_core(struct device_node *core, int package_id,
>> -                          int core_id)
>> +                          int cluster_id, int core_id)
>>  {
>>        char name[20];
>>        bool leaf = true;
>> @@ -507,6 +507,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>>                        cpu = get_cpu_for_node(t);
>>                        if (cpu >= 0) {
>>                                cpu_topology[cpu].package_id = package_id;
>> +                             cpu_topology[cpu].cluster_id = cluster_id;
>>                                cpu_topology[cpu].core_id = core_id;
>>                                cpu_topology[cpu].thread_id = i;
>>                        } else if (cpu != -ENODEV) {
>> @@ -528,6 +529,7 @@ static int __init parse_core(struct device_node *core, int package_id,
>>                }
>>  
>>                cpu_topology[cpu].package_id = package_id;
>> +             cpu_topology[cpu].cluster_id = cluster_id;
>>                cpu_topology[cpu].core_id = core_id;
>>        } else if (leaf && cpu != -ENODEV) {
>>                pr_err("%pOF: Can't get CPU for leaf core\n", core);
>> @@ -544,13 +546,15 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>        bool has_cores = false;
>>        struct device_node *c;
>>        static int package_id __initdata;
>> +     static int cluster_id __initdata;
>
>Starting with cluster_id = 0 breaks existing platforms like
>./arch/arm64/boot/dts/arm/juno.dts. For them it has to be still set to -1.

Yes, I noticed this problem, please help to review version V2.

>
>You get e.g.:
>
># cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
>CLS
>DIE
>
>instead of:
>
># cat /sys/kernel/debug/sched/domains/cpu1/domain*/name
>MC
>DIE
>
>
>>        int core_id = 0;
>>        int i, ret;
>>  
>>        /*
>> -      * First check for child clusters; we currently ignore any
>> -      * information about the nesting of clusters and present the
>> -      * scheduler with a flat list of them.
>> +      * nesting of clusters :
>> +      * level 1:  package_id
>> +      * level 2:  cluster_id
>> +      * level 3+: ignore
>>         */
>>        i = 0;
>>        do {
>> @@ -559,6 +563,14 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>                if (c) {
>>                        leaf = false;
>>                        ret = parse_cluster(c, depth + 1);
>> +                     if (depth == 0) {
>> +                             package_id++;
>> +                             cluster_id = 0;
>
>-                               cluster_id = 0;
>+                               cluster_id = -1;

This modification is not enough..

>
>Would have to be cluster_id = -1. 0 is a valid 2. level cluster.
>Otherwise you're not removing the CLS data from CPU4 to CPU7 in the
>`Armv9 with L2 complexes` cpu-map example I used for testing:
>
>                cpu-map {
>                        cluster0 {
>                                cluster0 {
>                                        core0 {
>                                                cpu = <&cpu0>;
>                                        };
>                                        core1 {
>                                                cpu = <&cpu1>;
>                                        };
>                                };
>                                cluster1 {
>                                        core0 {
>                                                cpu = <&cpu2>;
>                                        };
>                                        core1 {
>                                                cpu = <&cpu3>;
>                                        };
>                                };
>                        };
>                        cluster1 {
>                                core0 {
>                                        cpu = <&cpu4>;
>                                };
>                                core1 {
>                                        cpu = <&cpu5>;
>                                };
>                                core2 {
>                                        cpu = <&cpu6>;
>                                };
>                        };
>                        cluster2 {
>                                core0 {
>                                        cpu = <&cpu7>;
>                                };
>                        };
>                };
>
>> +                     } else if (depth == 1)
>> +                             cluster_id++;
>> +                     else
>> +                             pr_err("Ignore nested clusters with more than two levels!\n");
>> +
>>                        of_node_put(c);
>>                        if (ret != 0)
>>                                return ret;
>> @@ -582,7 +594,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>                        }
>>  
>>                        if (leaf) {
>> -                             ret = parse_core(c, package_id, core_id++);
>> +                             ret = parse_core(c, package_id, cluster_id, core_id++);
>>                        } else {
>>                                pr_err("%pOF: Non-leaf cluster with core %s\n",
>>                                       cluster, name);
>> @@ -599,9 +611,6 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
>>        if (leaf && !has_cores)
>>                pr_warn("%pOF: empty cluster\n", cluster);
>>  
>> -     if (leaf)
>> -             package_id++;
>> -
>>        return 0;
>>  }
>
>Looks like you also need to adapt update_siblings_masks() to only set
>cpu in &cpu_topo->thread_sibling and &cpuid_topo->thread_sibling when
>`cpu_topo->thread_id != -1`.
>
>@@ -723,11 +723,11 @@ void update_siblings_masks(unsigned int cpuid)
>                cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
>                cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
>
>-               if (cpuid_topo->core_id != cpu_topo->core_id)
>-                       continue;
>-
>-               cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>-               cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>+               if (cpu_topo->thread_id != -1 &&
>+                   cpuid_topo->core_id == cpu_topo->core_id) {
>+                       cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>+                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>+

This seems like another problem?

Thanks,
Qing

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] arch_topology: support parsing cluster_id from DT
  2022-05-11  9:38   ` 王擎
@ 2022-05-12  9:52     ` Dietmar Eggemann
  2022-05-12 12:52       ` 王擎
  0 siblings, 1 reply; 5+ messages in thread
From: Dietmar Eggemann @ 2022-05-12  9:52 UTC (permalink / raw)
  To: 王擎,
	Sudeep Holla, Greg Kroah-Hartman, Rafael J. Wysocki,
	linux-kernel

On 11/05/2022 11:38, 王擎 wrote:
> 
>>> From: Wang Qing <wangqing@vivo.com>

[...]

>> Would have to be cluster_id = -1. 0 is a valid 2. level cluster.
>> Otherwise you're not removing the CLS data from CPU4 to CPU7 in the
>> `Armv9 with L2 complexes` cpu-map example I used for testing:
>>
>>                 cpu-map {
>>                         cluster0 {
>>                                 cluster0 {
>>                                         core0 {
>>                                                 cpu = <&cpu0>;
>>                                         };
>>                                         core1 {
>>                                                 cpu = <&cpu1>;
>>                                         };
>>                                 };
>>                                 cluster1 {
>>                                         core0 {
>>                                                 cpu = <&cpu2>;
>>                                         };
>>                                         core1 {
>>                                                 cpu = <&cpu3>;
>>                                         };
>>                                 };
>>                         };
>>                         cluster1 {
>>                                 core0 {
>>                                         cpu = <&cpu4>;
>>                                 };
>>                                 core1 {
>>                                         cpu = <&cpu5>;
>>                                 };
>>                                 core2 {
>>                                         cpu = <&cpu6>;
>>                                 };
>>                         };
>>                         cluster2 {
>>                                 core0 {
>>                                         cpu = <&cpu7>;
>>                                 };
>>                         };
>>                 };

[...]

>> Looks like you also need to adapt update_siblings_masks() to only set
>> cpu in &cpu_topo->thread_sibling and &cpuid_topo->thread_sibling when
>> `cpu_topo->thread_id != -1`.
>>
>> @@ -723,11 +723,11 @@ void update_siblings_masks(unsigned int cpuid)
>>                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
>>                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
>>
>> -               if (cpuid_topo->core_id != cpu_topo->core_id)
>> -                       continue;
>> -
>> -               cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>> -               cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>> +               if (cpu_topo->thread_id != -1 &&
>> +                   cpuid_topo->core_id == cpu_topo->core_id) {
>> +                       cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>> +                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>> +
> 
> This seems like another problem?

I don't think so. If you run a system with the cpu-map mentioned above
you get:

# cat /sys/kernel/debug/sched/domains/cpu0/domain*/name
SMT        <-- !!!
CLS
MC

root@e125579:~# cat /proc/schedstat | awk '{print $1 " " $2 }' | grep
^[cd] | head -5
cpu0 0
domain0 05 <-- !!!
domain1 07 <-- !!!
domain2 0f
domain3 ff

So you get an unwanted SMT level for CPU0-3 and messed-up cpumasks
without this change.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] arch_topology: support parsing cluster_id from DT
  2022-05-12  9:52     ` Dietmar Eggemann
@ 2022-05-12 12:52       ` 王擎
  0 siblings, 0 replies; 5+ messages in thread
From: 王擎 @ 2022-05-12 12:52 UTC (permalink / raw)
  To: Dietmar Eggemann, Sudeep Holla, Greg Kroah-Hartman,
	Rafael J. Wysocki, linux-kernel


>> 
>>>> From: Wang Qing <wangqing@vivo.com>
>
>[...]
>
>>> Would have to be cluster_id = -1. 0 is a valid 2. level cluster.
>>> Otherwise you're not removing the CLS data from CPU4 to CPU7 in the
>>> `Armv9 with L2 complexes` cpu-map example I used for testing:
>>>
>>>                 cpu-map {
>>>                         cluster0 {
>>>                                 cluster0 {
>>>                                         core0 {
>>>                                                 cpu = <&cpu0>;
>>>                                         };
>>>                                         core1 {
>>>                                                 cpu = <&cpu1>;
>>>                                         };
>>>                                 };
>>>                                 cluster1 {
>>>                                         core0 {
>>>                                                 cpu = <&cpu2>;
>>>                                         };
>>>                                         core1 {
>>>                                                 cpu = <&cpu3>;
>>>                                         };
>>>                                 };
>>>                         };
>>>                         cluster1 {
>>>                                 core0 {
>>>                                         cpu = <&cpu4>;
>>>                                 };
>>>                                 core1 {
>>>                                         cpu = <&cpu5>;
>>>                                 };
>>>                                 core2 {
>>>                                         cpu = <&cpu6>;
>>>                                 };
>>>                         };
>>>                         cluster2 {
>>>                                 core0 {
>>>                                         cpu = <&cpu7>;
>>>                                 };
>>>                         };
>>>                 };
>
>[...]
>
>>> Looks like you also need to adapt update_siblings_masks() to only set
>>> cpu in &cpu_topo->thread_sibling and &cpuid_topo->thread_sibling when
>>> `cpu_topo->thread_id != -1`.
>>>
>>> @@ -723,11 +723,11 @@ void update_siblings_masks(unsigned int cpuid)
>>>                 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
>>>                 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
>>>
>>> -               if (cpuid_topo->core_id != cpu_topo->core_id)
>>> -                       continue;
>>> -
>>> -               cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>>> -               cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>>> +               if (cpu_topo->thread_id != -1 &&
>>> +                   cpuid_topo->core_id == cpu_topo->core_id) {
>>> +                       cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
>>> +                       cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
>>> +
>> 
>> This seems like another problem?
>
>I don't think so. If you run a system with the cpu-map mentioned above

Here is not enough, cpu0,1,4,5 will make up SMT below:
cluster0 {
        cluster0 {
                core0 {
                        thread0 {
                                 cpu = <&CPU0>;
                                };
                        thread1 {
                                 cpu = <&CPU1>;
                                };
                      };
                };
                core1 {
                        thread0 {
                                 cpu = <&CPU2>;
                                };
                        thread1 {
                                 cpu = <&CPU3>;
                                };
                      };
                };
        cluster1 {
                core0 {
                        thread0 {
                                 cpu = <&CPU4>;
                                };
                        thread1 {
                                 cpu = <&CPU5>;
                                };
                      };
                };
                core1 {
                        thread0 {
                                 cpu = <&CPU6>;
                                };
                        thread1 {
                                 cpu = <&CPU7>;
                                };
                      };
                };
...
I will handle this.

Thanks,
Qing

>you get:
>
># cat /sys/kernel/debug/sched/domains/cpu0/domain*/name
>SMT        <-- !!!
>CLS
>MC
>
>root@e125579:~# cat /proc/schedstat | awk '{print $1 " " $2 }' | grep
>^[cd] | head -5
>cpu0 0
>domain0 05 <-- !!!
>domain1 07 <-- !!!
>domain2 0f
>domain3 ff
>
>So you get an unwanted SMT level for CPU0-3 and messed-up cpumasks
>without this change.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-05-12 12:52 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05  8:35 [PATCH] arch_topology: support parsing cluster_id from DT Qing Wang
2022-05-10 17:21 ` Dietmar Eggemann
2022-05-11  9:38   ` 王擎
2022-05-12  9:52     ` Dietmar Eggemann
2022-05-12 12:52       ` 王擎

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).