linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off
@ 2021-07-26 17:07 Pratik R. Sampat
  2021-07-27  6:16 ` Gautham R Shenoy
  0 siblings, 1 reply; 5+ messages in thread
From: Pratik R. Sampat @ 2021-07-26 17:07 UTC (permalink / raw)
  To: mpe, rjw, linux-pm, linuxppc-dev, linux-kernel, stable, psampat,
	pratik.r.sampat

In the numa=off kernel command-line configuration init_chip_info() loops
around the number of chips and attempts to copy the cpumask of that node
which is NULL for all iterations after the first chip.

Hence, store the cpu mask for each chip instead of derving cpumask from
node while populating the "chips" struct array and copy that to the
chips[i].mask

Cc: stable@vger.kernel.org
Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping at chip level")
Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
Reported-by: Shirisha Ganta <shirisha.ganta1@ibm.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 005600cef273..8ec10d9aed8f 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1046,12 +1046,20 @@ static int init_chip_info(void)
 	unsigned int *chip;
 	unsigned int cpu, i;
 	unsigned int prev_chip_id = UINT_MAX;
+	cpumask_t *chip_cpu_mask;
 	int ret = 0;
 
 	chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
 
+	/* Allocate a chip cpu mask large enough to fit mask for all chips */
+	chip_cpu_mask = kcalloc(32, sizeof(cpumask_t), GFP_KERNEL);
+	if (!chip_cpu_mask) {
+		ret = -ENOMEM;
+		goto free_and_return;
+	}
+
 	for_each_possible_cpu(cpu) {
 		unsigned int id = cpu_to_chip_id(cpu);
 
@@ -1059,22 +1067,25 @@ static int init_chip_info(void)
 			prev_chip_id = id;
 			chip[nr_chips++] = id;
 		}
+		cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]);
 	}
 
 	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
 	if (!chips) {
 		ret = -ENOMEM;
-		goto free_and_return;
+		goto out_chip_cpu_mask;
 	}
 
 	for (i = 0; i < nr_chips; i++) {
 		chips[i].id = chip[i];
-		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
+		cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]);
 		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
 		for_each_cpu(cpu, &chips[i].mask)
 			per_cpu(chip_info, cpu) =  &chips[i];
 	}
 
+out_chip_cpu_mask:
+	kfree(chip_cpu_mask);
 free_and_return:
 	kfree(chip);
 	return ret;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off
  2021-07-26 17:07 [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off Pratik R. Sampat
@ 2021-07-27  6:16 ` Gautham R Shenoy
  2021-07-27  6:49   ` Pratik Sampat
  0 siblings, 1 reply; 5+ messages in thread
From: Gautham R Shenoy @ 2021-07-27  6:16 UTC (permalink / raw)
  To: Pratik R. Sampat
  Cc: linux-pm, pratik.r.sampat, rjw, linux-kernel, stable, linuxppc-dev

On Mon, Jul 26, 2021 at 10:37:57PM +0530, Pratik R. Sampat wrote:
> In the numa=off kernel command-line configuration init_chip_info() loops
> around the number of chips and attempts to copy the cpumask of that node
> which is NULL for all iterations after the first chip.
> 
> Hence, store the cpu mask for each chip instead of derving cpumask from
> node while populating the "chips" struct array and copy that to the
> chips[i].mask
> 
> Cc: stable@vger.kernel.org
> Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping at chip level")
> Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
> Reported-by: Shirisha Ganta <shirisha.ganta1@ibm.com>
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index 005600cef273..8ec10d9aed8f 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -1046,12 +1046,20 @@ static int init_chip_info(void)
>  	unsigned int *chip;
>  	unsigned int cpu, i;
>  	unsigned int prev_chip_id = UINT_MAX;
> +	cpumask_t *chip_cpu_mask;
>  	int ret = 0;
> 
>  	chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
>  	if (!chip)
>  		return -ENOMEM;
> 
> +	/* Allocate a chip cpu mask large enough to fit mask for all chips */
> +	chip_cpu_mask = kcalloc(32, sizeof(cpumask_t), GFP_KERNEL);

I suppose by 32 you mean the maximum number of chips possible. You
could use a #define for that.

Otherwise, the patch looks good to me.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>



> +	if (!chip_cpu_mask) {
> +		ret = -ENOMEM;
> +		goto free_and_return;
> +	}
> +
>  	for_each_possible_cpu(cpu) {
>  		unsigned int id = cpu_to_chip_id(cpu);
> 
> @@ -1059,22 +1067,25 @@ static int init_chip_info(void)
>  			prev_chip_id = id;
>  			chip[nr_chips++] = id;
>  		}
> +		cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]);
>  	}
> 
>  	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
>  	if (!chips) {
>  		ret = -ENOMEM;
> -		goto free_and_return;
> +		goto out_chip_cpu_mask;
>  	}
> 
>  	for (i = 0; i < nr_chips; i++) {
>  		chips[i].id = chip[i];
> -		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
> +		cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]);
>  		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
>  		for_each_cpu(cpu, &chips[i].mask)
>  			per_cpu(chip_info, cpu) =  &chips[i];
>  	}
> 
> +out_chip_cpu_mask:
> +	kfree(chip_cpu_mask);
>  free_and_return:
>  	kfree(chip);
>  	return ret;
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off
  2021-07-27  6:16 ` Gautham R Shenoy
@ 2021-07-27  6:49   ` Pratik Sampat
  0 siblings, 0 replies; 5+ messages in thread
From: Pratik Sampat @ 2021-07-27  6:49 UTC (permalink / raw)
  To: ego; +Cc: linux-pm, pratik.r.sampat, rjw, linux-kernel, stable, linuxppc-dev



On 27/07/21 11:46 am, Gautham R Shenoy wrote:
> On Mon, Jul 26, 2021 at 10:37:57PM +0530, Pratik R. Sampat wrote:
>> In the numa=off kernel command-line configuration init_chip_info() loops
>> around the number of chips and attempts to copy the cpumask of that node
>> which is NULL for all iterations after the first chip.
>>
>> Hence, store the cpu mask for each chip instead of derving cpumask from
>> node while populating the "chips" struct array and copy that to the
>> chips[i].mask
>>
>> Cc: stable@vger.kernel.org
>> Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping at chip level")
>> Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
>> Reported-by: Shirisha Ganta <shirisha.ganta1@ibm.com>
>> ---
>>   drivers/cpufreq/powernv-cpufreq.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
>> index 005600cef273..8ec10d9aed8f 100644
>> --- a/drivers/cpufreq/powernv-cpufreq.c
>> +++ b/drivers/cpufreq/powernv-cpufreq.c
>> @@ -1046,12 +1046,20 @@ static int init_chip_info(void)
>>   	unsigned int *chip;
>>   	unsigned int cpu, i;
>>   	unsigned int prev_chip_id = UINT_MAX;
>> +	cpumask_t *chip_cpu_mask;
>>   	int ret = 0;
>>
>>   	chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
>>   	if (!chip)
>>   		return -ENOMEM;
>>
>> +	/* Allocate a chip cpu mask large enough to fit mask for all chips */
>> +	chip_cpu_mask = kcalloc(32, sizeof(cpumask_t), GFP_KERNEL);
> I suppose by 32 you mean the maximum number of chips possible. You
> could use a #define for that.

ack, I could #define the constant.

> Otherwise, the patch looks good to me.
>
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
>
Thanks
Pratik

>
>> +	if (!chip_cpu_mask) {
>> +		ret = -ENOMEM;
>> +		goto free_and_return;
>> +	}
>> +
>>   	for_each_possible_cpu(cpu) {
>>   		unsigned int id = cpu_to_chip_id(cpu);
>>
>> @@ -1059,22 +1067,25 @@ static int init_chip_info(void)
>>   			prev_chip_id = id;
>>   			chip[nr_chips++] = id;
>>   		}
>> +		cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]);
>>   	}
>>
>>   	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
>>   	if (!chips) {
>>   		ret = -ENOMEM;
>> -		goto free_and_return;
>> +		goto out_chip_cpu_mask;
>>   	}
>>
>>   	for (i = 0; i < nr_chips; i++) {
>>   		chips[i].id = chip[i];
>> -		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
>> +		cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]);
>>   		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
>>   		for_each_cpu(cpu, &chips[i].mask)
>>   			per_cpu(chip_info, cpu) =  &chips[i];
>>   	}
>>
>> +out_chip_cpu_mask:
>> +	kfree(chip_cpu_mask);
>>   free_and_return:
>>   	kfree(chip);
>>   	return ret;
>> -- 
>> 2.31.1
>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off
  2021-06-15  5:09 Pratik R. Sampat
@ 2021-07-08  9:08 ` Gautham R Shenoy
  0 siblings, 0 replies; 5+ messages in thread
From: Gautham R Shenoy @ 2021-07-08  9:08 UTC (permalink / raw)
  To: Pratik R. Sampat
  Cc: pratik.r.sampat, linux-pm, rjw, linux-kernel, linuxppc-dev

Hello Pratik,

On Tue, Jun 15, 2021 at 10:39:49AM +0530, Pratik R. Sampat wrote:
> In the numa=off kernel command-line configuration init_chip_info() loops
> around the number of chips and attempts to copy the cpumask of that node
> which is NULL for all iterations after the first chip.

Thanks for taking a look into this. Indeed there is an issue here
because the code here assumes that node_mask as a proxy for the
chip_mask. This assumption breaks when run with numa=off, since there will only be a
single node, but multiple chips.


> 
> Hence adding a check to bail out after the first initialization if there
> is only one node.
> 
> Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping at chip level")
> Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
> Reported-by: Shirisha Ganta <shirishaganta1@ibm.com>
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index e439b43c19eb..663f9c4b5e3a 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -1078,6 +1078,8 @@ static int init_chip_info(void)
>  		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
>  		for_each_cpu(cpu, &chips[i].mask)
>  			per_cpu(chip_info, cpu) =  &chips[i];
> +		if (num_possible_nodes() == 1)
> +			break;

With this we will only initialize the chip[0].throttle work function,
while for the rest of the chips chip[i].throttle will be
uninitialized. While we may be running in the numa=off mode, the fact
remains that those other chips do exist and they may experiencing
throttling, during which they will try to schedule work for chip[i] in
order to take corrective action, which will fail.

Hence a more correct approach may be to maintain a chip[i] mask
independent of the node mask.





>  	}
>  
>  free_and_return:
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off
@ 2021-06-15  5:09 Pratik R. Sampat
  2021-07-08  9:08 ` Gautham R Shenoy
  0 siblings, 1 reply; 5+ messages in thread
From: Pratik R. Sampat @ 2021-06-15  5:09 UTC (permalink / raw)
  To: mpe, rjw, linux-pm, linuxppc-dev, linux-kernel, psampat, pratik.r.sampat

In the numa=off kernel command-line configuration init_chip_info() loops
around the number of chips and attempts to copy the cpumask of that node
which is NULL for all iterations after the first chip.

Hence adding a check to bail out after the first initialization if there
is only one node.

Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping at chip level")
Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
Reported-by: Shirisha Ganta <shirishaganta1@ibm.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index e439b43c19eb..663f9c4b5e3a 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1078,6 +1078,8 @@ static int init_chip_info(void)
 		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
 		for_each_cpu(cpu, &chips[i].mask)
 			per_cpu(chip_info, cpu) =  &chips[i];
+		if (num_possible_nodes() == 1)
+			break;
 	}
 
 free_and_return:
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-07-27  6:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-26 17:07 [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off Pratik R. Sampat
2021-07-27  6:16 ` Gautham R Shenoy
2021-07-27  6:49   ` Pratik Sampat
  -- strict thread matches above, loose matches on Subject: below --
2021-06-15  5:09 Pratik R. Sampat
2021-07-08  9:08 ` Gautham R Shenoy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).