All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 4/4] arm64: Dereference CPU operations indirectly
@ 2020-02-03 23:51 Gavin Shan
  2020-02-11 11:46 ` Lorenzo Pieralisi
  0 siblings, 1 reply; 3+ messages in thread
From: Gavin Shan @ 2020-02-03 23:51 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: mark.rutland, lorenzo.pieralisi, catalin.marinas, robin.murphy,
	sudeep.holla, will

One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
much memory has been used for this.

This introduces another array (@cpu_ops_index[NR_CPUS/4]), of which the
index to CPU operations array is stored. With this, we just need one byte
to be shared by 4 CPUs, 64 bytes for 256 CPUs, to dereference the CPU
operations indirectly. Note this optimization has the assumption: these
CPU operations aren't dereferenced in hot path.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 arch/arm64/kernel/cpu_ops.c | 49 ++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index e133011f64b5..d9103d5c9c6f 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2013 ARM Ltd.
  */
-
 #include <linux/acpi.h>
 #include <linux/cache.h>
 #include <linux/errno.h>
@@ -20,39 +19,32 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
 #endif
 extern const struct cpu_operations cpu_psci_ops;
 
-static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-
-static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
+/*
+ * Each element of the index array is shared by 4 CPUs. It means each
+ * CPU index uses 2 bits.
+ */
+static const struct cpu_operations *const cpu_ops[] = {
 	&smp_spin_table_ops,
-	&cpu_psci_ops,
-	NULL,
-};
-
-static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 	&acpi_parking_protocol_ops,
 #endif
 	&cpu_psci_ops,
-	NULL,
 };
+static unsigned char cpu_ops_indexes[DIV_ROUND_UP(NR_CPUS, 4)] __ro_after_init;
 
-static const struct cpu_operations * __init cpu_get_ops(const char *name)
+static int __init get_cpu_ops_index(const char *name)
 {
-	const struct cpu_operations *const *ops;
-
-	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
-
-	while (*ops) {
-		if (!strcmp(name, (*ops)->name))
-			return *ops;
+	int index;
 
-		ops++;
+	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++) {
+		if (!strcmp(cpu_ops[index]->name, name))
+			return index;
 	}
 
-	return NULL;
+	return -ERANGE;
 }
 
-static const char *__init cpu_read_enable_method(int cpu)
+static const char *__init get_cpu_method(int cpu)
 {
 	const char *enable_method;
 
@@ -98,21 +90,28 @@ static const char *__init cpu_read_enable_method(int cpu)
  */
 int __init init_cpu_ops(int cpu)
 {
-	const char *enable_method = cpu_read_enable_method(cpu);
+	const char *enable_method = get_cpu_method(cpu);
+	unsigned char *pindex = &cpu_ops_indexes[cpu / 4];
+	int index;
 
 	if (!enable_method)
 		return -ENODEV;
 
-	cpu_ops[cpu] = cpu_get_ops(enable_method);
-	if (!cpu_ops[cpu]) {
+	index = get_cpu_ops_index(enable_method);
+	if (index < 0) {
 		pr_warn("Unsupported enable-method: %s\n", enable_method);
 		return -EOPNOTSUPP;
 	}
 
+	*pindex &= ~(0x3 << (2 * (cpu % 4)));
+	*pindex |= ((index & 0x3) << (2 * (cpu % 4)));
+
 	return 0;
 }
 
 const struct cpu_operations *get_cpu_ops(int cpu)
 {
-	return cpu_ops[cpu];
+	int index = ((cpu_ops_indexes[cpu / 4] >> (2 * (cpu % 4))) & 0x3);
+
+	return cpu_ops[index];
 }
-- 
2.23.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 4/4] arm64: Dereference CPU operations indirectly
  2020-02-03 23:51 [PATCH v2 4/4] arm64: Dereference CPU operations indirectly Gavin Shan
@ 2020-02-11 11:46 ` Lorenzo Pieralisi
  2020-02-11 23:29   ` Gavin Shan
  0 siblings, 1 reply; 3+ messages in thread
From: Lorenzo Pieralisi @ 2020-02-11 11:46 UTC (permalink / raw)
  To: Gavin Shan
  Cc: mark.rutland, catalin.marinas, robin.murphy, sudeep.holla, will,
	linux-arm-kernel

On Tue, Feb 04, 2020 at 10:51:07AM +1100, Gavin Shan wrote:
> One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
> memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
> much memory has been used for this.
> 
> This introduces another array (@cpu_ops_index[NR_CPUS/4]), of which the
> index to CPU operations array is stored. With this, we just need one byte
> to be shared by 4 CPUs, 64 bytes for 256 CPUs, to dereference the CPU
> operations indirectly. Note this optimization has the assumption: these
> CPU operations aren't dereferenced in hot path.

Actually the enable method must be the same across cpus, which brings
your optimization down to 1 byte for whatever number of cpus (aka,
you need an index to the one and only CPU ops entry).

If a cpu has an enable method != from the first that has been detected
we should let the cpu ops read fail, that index must not/can not be
different on != cpus, really, if it is firmware is broken and it is
probably better to avoid booting a cpu rather than trying, I hardly
see how we can introduce a regression by adding this logic, TBC.

Please let me know if anyone spots something I have missed.

Thanks,
Lorenzo

> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  arch/arm64/kernel/cpu_ops.c | 49 ++++++++++++++++++-------------------
>  1 file changed, 24 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
> index e133011f64b5..d9103d5c9c6f 100644
> --- a/arch/arm64/kernel/cpu_ops.c
> +++ b/arch/arm64/kernel/cpu_ops.c
> @@ -4,7 +4,6 @@
>   *
>   * Copyright (C) 2013 ARM Ltd.
>   */
> -
>  #include <linux/acpi.h>
>  #include <linux/cache.h>
>  #include <linux/errno.h>
> @@ -20,39 +19,32 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
>  #endif
>  extern const struct cpu_operations cpu_psci_ops;
>  
> -static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
> -
> -static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
> +/*
> + * Each element of the index array is shared by 4 CPUs. It means each
> + * CPU index uses 2 bits.
> + */
> +static const struct cpu_operations *const cpu_ops[] = {
>  	&smp_spin_table_ops,
> -	&cpu_psci_ops,
> -	NULL,
> -};
> -
> -static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
>  #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>  	&acpi_parking_protocol_ops,
>  #endif
>  	&cpu_psci_ops,
> -	NULL,
>  };
> +static unsigned char cpu_ops_indexes[DIV_ROUND_UP(NR_CPUS, 4)] __ro_after_init;
>  
> -static const struct cpu_operations * __init cpu_get_ops(const char *name)
> +static int __init get_cpu_ops_index(const char *name)
>  {
> -	const struct cpu_operations *const *ops;
> -
> -	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
> -
> -	while (*ops) {
> -		if (!strcmp(name, (*ops)->name))
> -			return *ops;
> +	int index;
>  
> -		ops++;
> +	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++) {
> +		if (!strcmp(cpu_ops[index]->name, name))
> +			return index;
>  	}
>  
> -	return NULL;
> +	return -ERANGE;
>  }
>  
> -static const char *__init cpu_read_enable_method(int cpu)
> +static const char *__init get_cpu_method(int cpu)
>  {
>  	const char *enable_method;
>  
> @@ -98,21 +90,28 @@ static const char *__init cpu_read_enable_method(int cpu)
>   */
>  int __init init_cpu_ops(int cpu)
>  {
> -	const char *enable_method = cpu_read_enable_method(cpu);
> +	const char *enable_method = get_cpu_method(cpu);
> +	unsigned char *pindex = &cpu_ops_indexes[cpu / 4];
> +	int index;
>  
>  	if (!enable_method)
>  		return -ENODEV;
>  
> -	cpu_ops[cpu] = cpu_get_ops(enable_method);
> -	if (!cpu_ops[cpu]) {
> +	index = get_cpu_ops_index(enable_method);
> +	if (index < 0) {
>  		pr_warn("Unsupported enable-method: %s\n", enable_method);
>  		return -EOPNOTSUPP;
>  	}
>  
> +	*pindex &= ~(0x3 << (2 * (cpu % 4)));
> +	*pindex |= ((index & 0x3) << (2 * (cpu % 4)));
> +
>  	return 0;
>  }
>  
>  const struct cpu_operations *get_cpu_ops(int cpu)
>  {
> -	return cpu_ops[cpu];
> +	int index = ((cpu_ops_indexes[cpu / 4] >> (2 * (cpu % 4))) & 0x3);
> +
> +	return cpu_ops[index];
>  }
> -- 
> 2.23.0
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 4/4] arm64: Dereference CPU operations indirectly
  2020-02-11 11:46 ` Lorenzo Pieralisi
@ 2020-02-11 23:29   ` Gavin Shan
  0 siblings, 0 replies; 3+ messages in thread
From: Gavin Shan @ 2020-02-11 23:29 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: mark.rutland, catalin.marinas, robin.murphy, sudeep.holla, will,
	linux-arm-kernel

On 2/11/20 10:46 PM, Lorenzo Pieralisi wrote:
> On Tue, Feb 04, 2020 at 10:51:07AM +1100, Gavin Shan wrote:
>> One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
>> memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
>> much memory has been used for this.
>>
>> This introduces another array (@cpu_ops_index[NR_CPUS/4]), of which the
>> index to CPU operations array is stored. With this, we just need one byte
>> to be shared by 4 CPUs, 64 bytes for 256 CPUs, to dereference the CPU
>> operations indirectly. Note this optimization has the assumption: these
>> CPU operations aren't dereferenced in hot path.
> 
> Actually the enable method must be the same across cpus, which brings
> your optimization down to 1 byte for whatever number of cpus (aka,
> you need an index to the one and only CPU ops entry).
> 
> If a cpu has an enable method != from the first that has been detected
> we should let the cpu ops read fail, that index must not/can not be
> different on != cpus, really, if it is firmware is broken and it is
> probably better to avoid booting a cpu rather than trying, I hardly
> see how we can introduce a regression by adding this logic, TBC.
> 
> Please let me know if anyone spots something I have missed.
> 

Lorenzo, thank you for the comments. Yes, we just need only one byte
if there is only one valid set of CPU operations. It seems it's ok
for cpu_read_ops() to return error if the selected CPU operation isn't
the one that have been previously choosen. In the error path, the CPU
won't be marked in the possible cpumask and it won't be tried to be
brought up.

I'll change the code in v3 based on your comments.

Thanks,
Gavin


> 
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   arch/arm64/kernel/cpu_ops.c | 49 ++++++++++++++++++-------------------
>>   1 file changed, 24 insertions(+), 25 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
>> index e133011f64b5..d9103d5c9c6f 100644
>> --- a/arch/arm64/kernel/cpu_ops.c
>> +++ b/arch/arm64/kernel/cpu_ops.c
>> @@ -4,7 +4,6 @@
>>    *
>>    * Copyright (C) 2013 ARM Ltd.
>>    */
>> -
>>   #include <linux/acpi.h>
>>   #include <linux/cache.h>
>>   #include <linux/errno.h>
>> @@ -20,39 +19,32 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
>>   #endif
>>   extern const struct cpu_operations cpu_psci_ops;
>>   
>> -static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
>> -
>> -static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
>> +/*
>> + * Each element of the index array is shared by 4 CPUs. It means each
>> + * CPU index uses 2 bits.
>> + */
>> +static const struct cpu_operations *const cpu_ops[] = {
>>   	&smp_spin_table_ops,
>> -	&cpu_psci_ops,
>> -	NULL,
>> -};
>> -
>> -static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
>>   #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>>   	&acpi_parking_protocol_ops,
>>   #endif
>>   	&cpu_psci_ops,
>> -	NULL,
>>   };
>> +static unsigned char cpu_ops_indexes[DIV_ROUND_UP(NR_CPUS, 4)] __ro_after_init;
>>   
>> -static const struct cpu_operations * __init cpu_get_ops(const char *name)
>> +static int __init get_cpu_ops_index(const char *name)
>>   {
>> -	const struct cpu_operations *const *ops;
>> -
>> -	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
>> -
>> -	while (*ops) {
>> -		if (!strcmp(name, (*ops)->name))
>> -			return *ops;
>> +	int index;
>>   
>> -		ops++;
>> +	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++) {
>> +		if (!strcmp(cpu_ops[index]->name, name))
>> +			return index;
>>   	}
>>   
>> -	return NULL;
>> +	return -ERANGE;
>>   }
>>   
>> -static const char *__init cpu_read_enable_method(int cpu)
>> +static const char *__init get_cpu_method(int cpu)
>>   {
>>   	const char *enable_method;
>>   
>> @@ -98,21 +90,28 @@ static const char *__init cpu_read_enable_method(int cpu)
>>    */
>>   int __init init_cpu_ops(int cpu)
>>   {
>> -	const char *enable_method = cpu_read_enable_method(cpu);
>> +	const char *enable_method = get_cpu_method(cpu);
>> +	unsigned char *pindex = &cpu_ops_indexes[cpu / 4];
>> +	int index;
>>   
>>   	if (!enable_method)
>>   		return -ENODEV;
>>   
>> -	cpu_ops[cpu] = cpu_get_ops(enable_method);
>> -	if (!cpu_ops[cpu]) {
>> +	index = get_cpu_ops_index(enable_method);
>> +	if (index < 0) {
>>   		pr_warn("Unsupported enable-method: %s\n", enable_method);
>>   		return -EOPNOTSUPP;
>>   	}
>>   
>> +	*pindex &= ~(0x3 << (2 * (cpu % 4)));
>> +	*pindex |= ((index & 0x3) << (2 * (cpu % 4)));
>> +
>>   	return 0;
>>   }
>>   
>>   const struct cpu_operations *get_cpu_ops(int cpu)
>>   {
>> -	return cpu_ops[cpu];
>> +	int index = ((cpu_ops_indexes[cpu / 4] >> (2 * (cpu % 4))) & 0x3);
>> +
>> +	return cpu_ops[index];
>>   }
>> -- 
>> 2.23.0
>>
> 


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-02-11 23:29 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-03 23:51 [PATCH v2 4/4] arm64: Dereference CPU operations indirectly Gavin Shan
2020-02-11 11:46 ` Lorenzo Pieralisi
2020-02-11 23:29   ` Gavin Shan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.