* [PATCH 1/2] powerpc: remove cpu_online_cores_map function
@ 2021-11-05 3:50 Nicholas Piggin
2021-11-05 3:50 ` [PATCH 2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192 Nicholas Piggin
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Nicholas Piggin @ 2021-11-05 3:50 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
This function builds the cores online map with on-stack cpumasks which
can cause high stack usage with large NR_CPUS.
It is not used in any performance sensitive paths, so instead just check
for first thread sibling.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/cputhreads.h | 33 -----------------------
arch/powerpc/platforms/powernv/idle.c | 10 +++----
arch/powerpc/platforms/powernv/opal-imc.c | 6 ++---
3 files changed, 8 insertions(+), 41 deletions(-)
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index b167186aaee4..f26c430f3982 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -32,44 +32,11 @@ extern cpumask_t threads_core_mask;
#define threads_core_mask (*get_cpu_mask(0))
#endif
-/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
- * hit by the argument
- *
- * @threads: a cpumask of online threads
- *
- * This function returns a cpumask which will have one online cpu's
- * bit set for each core that has at least one thread set in the argument.
- *
- * This can typically be used for things like IPI for tlb invalidations
- * since those need to be done only once per core/TLB
- */
-static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
-{
- cpumask_t tmp, res;
- int i, cpu;
-
- cpumask_clear(&res);
- for (i = 0; i < NR_CPUS; i += threads_per_core) {
- cpumask_shift_left(&tmp, &threads_core_mask, i);
- if (cpumask_intersects(threads, &tmp)) {
- cpu = cpumask_next_and(-1, &tmp, cpu_online_mask);
- if (cpu < nr_cpu_ids)
- cpumask_set_cpu(cpu, &res);
- }
- }
- return res;
-}
-
static inline int cpu_nr_cores(void)
{
return nr_cpu_ids >> threads_shift;
}
-static inline cpumask_t cpu_online_cores_map(void)
-{
- return cpu_thread_mask_to_cores(cpu_online_mask);
-}
-
#ifdef CONFIG_SMP
int cpu_core_index_of_thread(int cpu);
int cpu_first_thread_of_core(int core);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e3ffdc8e8567..70da314fd2d7 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -146,9 +146,13 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
static void pnv_fastsleep_workaround_apply(void *info)
{
+ int cpu = smp_processor_id();
int rc;
int *err = info;
+ if (cpu_first_thread_sibling(cpu) != cpu)
+ return;
+
rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
OPAL_CONFIG_IDLE_APPLY);
if (rc)
@@ -175,7 +179,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
{
- cpumask_t primary_thread_mask;
int err;
u8 val;
@@ -200,10 +203,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
power7_fastsleep_workaround_exit = false;
cpus_read_lock();
- primary_thread_mask = cpu_online_cores_map();
- on_each_cpu_mask(&primary_thread_mask,
- pnv_fastsleep_workaround_apply,
- &err, 1);
+ on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
cpus_read_unlock();
if (err) {
pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 05d3832019b9..3fea5da6d1b3 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -200,13 +200,13 @@ static void disable_nest_pmu_counters(void)
static void disable_core_pmu_counters(void)
{
- cpumask_t cores_map;
int cpu, rc;
cpus_read_lock();
/* Disable the IMC Core functions */
- cores_map = cpu_online_cores_map();
- for_each_cpu(cpu, &cores_map) {
+ for_each_online_cpu(cpu) {
+ if (cpu_first_thread_sibling(cpu) != cpu)
+ continue;
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(cpu));
if (rc)
--
2.23.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192
2021-11-05 3:50 [PATCH 1/2] powerpc: remove cpu_online_cores_map function Nicholas Piggin
@ 2021-11-05 3:50 ` Nicholas Piggin
2021-11-05 13:09 ` [PATCH 1/2] powerpc: remove cpu_online_cores_map function Sachin Sant
2021-12-07 13:26 ` Michael Ellerman
2 siblings, 0 replies; 4+ messages in thread
From: Nicholas Piggin @ 2021-11-05 3:50 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
Some core kernel code starts to go beyond the 2048 byte stack size
warning at NR_CPUS=8192, so select CPUMASK_OFFSTACK in that case.
x86 does similarly for very large NR_CPUS.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ba5b66189358..b8f6185d3998 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -163,6 +163,7 @@ config PPC
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
+ select CPUMASK_OFFSTACK if NR_CPUS >= 8192
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
select DMA_OPS_BYPASS if PPC64
select DMA_OPS if PPC64
--
2.23.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] powerpc: remove cpu_online_cores_map function
2021-11-05 3:50 [PATCH 1/2] powerpc: remove cpu_online_cores_map function Nicholas Piggin
2021-11-05 3:50 ` [PATCH 2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192 Nicholas Piggin
@ 2021-11-05 13:09 ` Sachin Sant
2021-12-07 13:26 ` Michael Ellerman
2 siblings, 0 replies; 4+ messages in thread
From: Sachin Sant @ 2021-11-05 13:09 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
> On 05-Nov-2021, at 9:20 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
>
> This function builds the cores online map with on-stack cpumasks which
> can cause high stack usage with large NR_CPUS.
>
> It is not used in any performance sensitive paths, so instead just check
> for first thread sibling.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Thanks
- Sachin
> arch/powerpc/include/asm/cputhreads.h | 33 -----------------------
> arch/powerpc/platforms/powernv/idle.c | 10 +++----
> arch/powerpc/platforms/powernv/opal-imc.c | 6 ++---
> 3 files changed, 8 insertions(+), 41 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
> index b167186aaee4..f26c430f3982 100644
> --- a/arch/powerpc/include/asm/cputhreads.h
> +++ b/arch/powerpc/include/asm/cputhreads.h
> @@ -32,44 +32,11 @@ extern cpumask_t threads_core_mask;
> #define threads_core_mask (*get_cpu_mask(0))
> #endif
>
> -/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
> - * hit by the argument
> - *
> - * @threads: a cpumask of online threads
> - *
> - * This function returns a cpumask which will have one online cpu's
> - * bit set for each core that has at least one thread set in the argument.
> - *
> - * This can typically be used for things like IPI for tlb invalidations
> - * since those need to be done only once per core/TLB
> - */
> -static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
> -{
> - cpumask_t tmp, res;
> - int i, cpu;
> -
> - cpumask_clear(&res);
> - for (i = 0; i < NR_CPUS; i += threads_per_core) {
> - cpumask_shift_left(&tmp, &threads_core_mask, i);
> - if (cpumask_intersects(threads, &tmp)) {
> - cpu = cpumask_next_and(-1, &tmp, cpu_online_mask);
> - if (cpu < nr_cpu_ids)
> - cpumask_set_cpu(cpu, &res);
> - }
> - }
> - return res;
> -}
> -
> static inline int cpu_nr_cores(void)
> {
> return nr_cpu_ids >> threads_shift;
> }
>
> -static inline cpumask_t cpu_online_cores_map(void)
> -{
> - return cpu_thread_mask_to_cores(cpu_online_mask);
> -}
> -
> #ifdef CONFIG_SMP
> int cpu_core_index_of_thread(int cpu);
> int cpu_first_thread_of_core(int core);
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index e3ffdc8e8567..70da314fd2d7 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -146,9 +146,13 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
> static void pnv_fastsleep_workaround_apply(void *info)
>
> {
> + int cpu = smp_processor_id();
> int rc;
> int *err = info;
>
> + if (cpu_first_thread_sibling(cpu) != cpu)
> + return;
> +
> rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
> OPAL_CONFIG_IDLE_APPLY);
> if (rc)
> @@ -175,7 +179,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
> struct device_attribute *attr, const char *buf,
> size_t count)
> {
> - cpumask_t primary_thread_mask;
> int err;
> u8 val;
>
> @@ -200,10 +203,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
> power7_fastsleep_workaround_exit = false;
>
> cpus_read_lock();
> - primary_thread_mask = cpu_online_cores_map();
> - on_each_cpu_mask(&primary_thread_mask,
> - pnv_fastsleep_workaround_apply,
> - &err, 1);
> + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
> cpus_read_unlock();
> if (err) {
> pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
> diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
> index 05d3832019b9..3fea5da6d1b3 100644
> --- a/arch/powerpc/platforms/powernv/opal-imc.c
> +++ b/arch/powerpc/platforms/powernv/opal-imc.c
> @@ -200,13 +200,13 @@ static void disable_nest_pmu_counters(void)
>
> static void disable_core_pmu_counters(void)
> {
> - cpumask_t cores_map;
> int cpu, rc;
>
> cpus_read_lock();
> /* Disable the IMC Core functions */
> - cores_map = cpu_online_cores_map();
> - for_each_cpu(cpu, &cores_map) {
> + for_each_online_cpu(cpu) {
> + if (cpu_first_thread_sibling(cpu) != cpu)
> + continue;
> rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
> get_hard_smp_processor_id(cpu));
> if (rc)
> --
> 2.23.0
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] powerpc: remove cpu_online_cores_map function
2021-11-05 3:50 [PATCH 1/2] powerpc: remove cpu_online_cores_map function Nicholas Piggin
2021-11-05 3:50 ` [PATCH 2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192 Nicholas Piggin
2021-11-05 13:09 ` [PATCH 1/2] powerpc: remove cpu_online_cores_map function Sachin Sant
@ 2021-12-07 13:26 ` Michael Ellerman
2 siblings, 0 replies; 4+ messages in thread
From: Michael Ellerman @ 2021-12-07 13:26 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
On Fri, 5 Nov 2021 13:50:41 +1000, Nicholas Piggin wrote:
> This function builds the cores online map with on-stack cpumasks which
> can cause high stack usage with large NR_CPUS.
>
> It is not used in any performance sensitive paths, so instead just check
> for first thread sibling.
>
>
> [...]
Applied to powerpc/next.
[1/2] powerpc: remove cpu_online_cores_map function
https://git.kernel.org/powerpc/c/b350111bf7b3f4a780d28c44f18f7c9fcbe6d11b
[2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192
https://git.kernel.org/powerpc/c/2eafc4748bc08c5b9b6ee0b5b65ad20b30f7d704
cheers
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-12-07 13:32 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-05 3:50 [PATCH 1/2] powerpc: remove cpu_online_cores_map function Nicholas Piggin
2021-11-05 3:50 ` [PATCH 2/2] powerpc: select CPUMASK_OFFSTACK if NR_CPUS >= 8192 Nicholas Piggin
2021-11-05 13:09 ` [PATCH 1/2] powerpc: remove cpu_online_cores_map function Sachin Sant
2021-12-07 13:26 ` Michael Ellerman
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.