sparclinux.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/6] Remove onstack cpumask var for sparc
@ 2024-04-23  8:30 Dawei Li
  2024-04-23  8:30 ` [PATCH v3 1/6] sparc/srmmu: Remove on-stack cpumask var Dawei Li
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

Hi,

This is v3 of previous series on removal of on-stack cpumask var for
sparc arch.

Change since v2:

- PATCH[1]:
  - Rename helper, cpumask_any_but_current -> any_other_mm_cpus. (Sam)
  - Add Reviewed-by from Sam.

- PATCH[2-4]:
  - Add Reviewed-by from Sam.

- PATCH[5]:
  - Remove __initdata annotation and related commit message. (Sam)

- PATCH[6]:
  - Change from ":?" to "if else" style. (Sam)
  - Core logic _unchanged_.

- PATCH[7]:
  - Removed due to potentially conflicted with other pending series. (Sam) 

v1:
https://lore.kernel.org/all/20240418104949.3606645-1-dawei.li@shingroup.cn/

v2:
https://lore.kernel.org/lkml/20240420051547.3681642-1-dawei.li@shingroup.cn/

Dawei Li (6):
  sparc/srmmu: Remove on-stack cpumask var
  sparc/irq: Remove on-stack cpumask var
  sparc/of: Remove on-stack cpumask var
  sparc/pci_msi: Remove on-stack cpumask var
  sparc/init: Remove on-stack cpumask var
  sparc/leon: Remove on-stack cpumask var

 arch/sparc/kernel/irq_64.c       | 10 +++-----
 arch/sparc/kernel/leon_kernel.c  |  7 +++---
 arch/sparc/kernel/of_device_64.c |  5 +---
 arch/sparc/kernel/pci_msi.c      |  5 +---
 arch/sparc/mm/init_64.c          |  2 +-
 arch/sparc/mm/srmmu.c            | 40 ++++++++++----------------------
 6 files changed, 21 insertions(+), 48 deletions(-)

Thanks,

    Dawei
-- 
2.27.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 1/6] sparc/srmmu: Remove on-stack cpumask var
  2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
@ 2024-04-23  8:30 ` Dawei Li
  2024-04-23  8:30 ` [PATCH v3 2/6] sparc/irq: " Dawei Li
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

Use cpumask_any_but() to avoid the need for a temporary cpumask on
the stack and simplify code.

Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/mm/srmmu.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 852085ada368..9df51a62333d 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void)
 	local_ops->tlb_all();
 }
 
+static bool any_other_mm_cpus(struct mm_struct *mm)
+{
+	return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids;
+}
+
 static void smp_flush_cache_mm(struct mm_struct *mm)
 {
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (any_other_mm_cpus(mm))
 			xc1(local_ops->cache_mm, (unsigned long)mm);
 		local_ops->cache_mm(mm);
 	}
@@ -1668,10 +1670,7 @@ static void smp_flush_cache_mm(struct mm_struct *mm)
 static void smp_flush_tlb_mm(struct mm_struct *mm)
 {
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask)) {
+		if (any_other_mm_cpus(mm)) {
 			xc1(local_ops->tlb_mm, (unsigned long)mm);
 			if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
 				cpumask_copy(mm_cpumask(mm),
@@ -1688,10 +1687,7 @@ static void smp_flush_cache_range(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (any_other_mm_cpus(mm))
 			xc3(local_ops->cache_range, (unsigned long)vma, start,
 			    end);
 		local_ops->cache_range(vma, start, end);
@@ -1705,10 +1701,7 @@ static void smp_flush_tlb_range(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (any_other_mm_cpus(mm))
 			xc3(local_ops->tlb_range, (unsigned long)vma, start,
 			    end);
 		local_ops->tlb_range(vma, start, end);
@@ -1720,10 +1713,7 @@ static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (any_other_mm_cpus(mm))
 			xc2(local_ops->cache_page, (unsigned long)vma, page);
 		local_ops->cache_page(vma, page);
 	}
@@ -1734,10 +1724,7 @@ static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (any_other_mm_cpus(mm))
 			xc2(local_ops->tlb_page, (unsigned long)vma, page);
 		local_ops->tlb_page(vma, page);
 	}
@@ -1759,10 +1746,7 @@ static void smp_flush_page_to_ram(unsigned long page)
 
 static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 {
-	cpumask_t cpu_mask;
-	cpumask_copy(&cpu_mask, mm_cpumask(mm));
-	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-	if (!cpumask_empty(&cpu_mask))
+	if (any_other_mm_cpus(mm))
 		xc2(local_ops->sig_insns, (unsigned long)mm, insn_addr);
 	local_ops->sig_insns(mm, insn_addr);
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 2/6] sparc/irq: Remove on-stack cpumask var
  2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
  2024-04-23  8:30 ` [PATCH v3 1/6] sparc/srmmu: Remove on-stack cpumask var Dawei Li
@ 2024-04-23  8:30 ` Dawei Li
  2024-04-23  8:30 ` [PATCH v3 3/6] sparc/of: " Dawei Li
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

- Both 2 arguments of cpumask_equal() is constant and free of change, no
  need to allocate extra cpumask variables.

- Merge cpumask_and(), cpumask_first() and cpumask_empty() into
  cpumask_first_and().

Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/irq_64.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5280e325d4d6..01ee800efde3 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -349,17 +349,13 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 #ifdef CONFIG_SMP
 static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
 {
-	cpumask_t mask;
 	int cpuid;
 
-	cpumask_copy(&mask, affinity);
-	if (cpumask_equal(&mask, cpu_online_mask)) {
+	if (cpumask_equal(affinity, cpu_online_mask)) {
 		cpuid = map_to_cpu(irq);
 	} else {
-		cpumask_t tmp;
-
-		cpumask_and(&tmp, cpu_online_mask, &mask);
-		cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
+		cpuid = cpumask_first_and(affinity, cpu_online_mask);
+		cpuid = cpuid < nr_cpu_ids ? cpuid : map_to_cpu(irq);
 	}
 
 	return cpuid;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 3/6] sparc/of: Remove on-stack cpumask var
  2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
  2024-04-23  8:30 ` [PATCH v3 1/6] sparc/srmmu: Remove on-stack cpumask var Dawei Li
  2024-04-23  8:30 ` [PATCH v3 2/6] sparc/irq: " Dawei Li
@ 2024-04-23  8:30 ` Dawei Li
  2024-04-23  8:30 ` [PATCH v3 4/6] sparc/pci_msi: " Dawei Li
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

@cpumask of irq_set_affinity() is read-only and free of change, drop
unneeded cpumask var.

Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/of_device_64.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index c350c58c7f69..f98c2901f335 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -624,10 +624,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
 out:
 	nid = of_node_to_nid(dp);
 	if (nid != -1) {
-		cpumask_t numa_mask;
-
-		cpumask_copy(&numa_mask, cpumask_of_node(nid));
-		irq_set_affinity(irq, &numa_mask);
+		irq_set_affinity(irq, cpumask_of_node(nid));
 	}
 
 	return irq;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 4/6] sparc/pci_msi: Remove on-stack cpumask var
  2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
                   ` (2 preceding siblings ...)
  2024-04-23  8:30 ` [PATCH v3 3/6] sparc/of: " Dawei Li
@ 2024-04-23  8:30 ` Dawei Li
  2024-04-23  8:30 ` [PATCH v3 5/6] sparc/init: " Dawei Li
  2024-04-23  8:30 ` [PATCH v3 6/6] sparc/leon: " Dawei Li
  5 siblings, 0 replies; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

@cpumask of irq_set_affinity() is read-only and free of change, drop
unneeded cpumask var.

Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/pci_msi.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index fc7402948b7b..acb2f83a1d5c 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -287,10 +287,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 
 	nid = pbm->numa_node;
 	if (nid != -1) {
-		cpumask_t numa_mask;
-
-		cpumask_copy(&numa_mask, cpumask_of_node(nid));
-		irq_set_affinity(irq, &numa_mask);
+		irq_set_affinity(irq, cpumask_of_node(nid));
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 			  "MSIQ",
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 5/6] sparc/init: Remove on-stack cpumask var
  2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
                   ` (3 preceding siblings ...)
  2024-04-23  8:30 ` [PATCH v3 4/6] sparc/pci_msi: " Dawei Li
@ 2024-04-23  8:30 ` Dawei Li
  2024-04-23 17:32   ` Sam Ravnborg
  2024-04-23  8:30 ` [PATCH v3 6/6] sparc/leon: " Dawei Li
  5 siblings, 1 reply; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

Since the cpumask var resides in __init function, which means it's free
of any concurrenct access, it can be safely marked with static to get
rid of allocation on stack.

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 1ca9054d9b97..9edbf57a2c59 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1438,7 +1438,7 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
 static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
 					 int index)
 {
-	cpumask_t mask;
+	static cpumask_t mask;
 	int cpu;
 
 	numa_parse_mdesc_group_cpus(md, grp, &mask);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 6/6] sparc/leon: Remove on-stack cpumask var
  2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
                   ` (4 preceding siblings ...)
  2024-04-23  8:30 ` [PATCH v3 5/6] sparc/init: " Dawei Li
@ 2024-04-23  8:30 ` Dawei Li
  2024-04-23 17:29   ` Sam Ravnborg
  5 siblings, 1 reply; 9+ messages in thread
From: Dawei Li @ 2024-04-23  8:30 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

Use cpumask_subset() and cpumask_first_and() to avoid the need for a
temporary cpumask on the stack.

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/leon_kernel.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 4c61da491fee..a43cf794bb1e 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -106,13 +106,12 @@ unsigned long leon_get_irqmask(unsigned int irq)
 #ifdef CONFIG_SMP
 static int irq_choose_cpu(const struct cpumask *affinity)
 {
-	cpumask_t mask;
+	unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask);
 
-	cpumask_and(&mask, cpu_online_mask, affinity);
-	if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
+	if (cpumask_subset(cpu_online_mask, affinity) || cpu >= nr_cpu_ids)
 		return boot_cpu_id;
 	else
-		return cpumask_first(&mask);
+		return cpu;
 }
 #else
 #define irq_choose_cpu(affinity) boot_cpu_id
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 6/6] sparc/leon: Remove on-stack cpumask var
  2024-04-23  8:30 ` [PATCH v3 6/6] sparc/leon: " Dawei Li
@ 2024-04-23 17:29   ` Sam Ravnborg
  0 siblings, 0 replies; 9+ messages in thread
From: Sam Ravnborg @ 2024-04-23 17:29 UTC (permalink / raw)
  To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel

On Tue, Apr 23, 2024 at 04:30:43PM +0800, Dawei Li wrote:
> In general it's preferable to avoid placing cpumasks on the stack, as
> for large values of NR_CPUS these can consume significant amounts of
> stack space and make stack overflows more likely.
> 
> Use cpumask_subset() and cpumask_first_and() to avoid the need for a
> temporary cpumask on the stack.
> 
> Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
Looks fine.

Reviewed-by: Sam Ravnborg <sam@ravnborg.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 5/6] sparc/init: Remove on-stack cpumask var
  2024-04-23  8:30 ` [PATCH v3 5/6] sparc/init: " Dawei Li
@ 2024-04-23 17:32   ` Sam Ravnborg
  0 siblings, 0 replies; 9+ messages in thread
From: Sam Ravnborg @ 2024-04-23 17:32 UTC (permalink / raw)
  To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel

Hi Dawei.

On Tue, Apr 23, 2024 at 04:30:42PM +0800, Dawei Li wrote:
> In general it's preferable to avoid placing cpumasks on the stack, as
> for large values of NR_CPUS these can consume significant amounts of
> stack space and make stack overflows more likely.
> 
> Since the cpumask var resides in __init function, which means it's free
> of any concurrenct access, it can be safely marked with static to get
> rid of allocation on stack.
> 
> Signed-off-by: Dawei Li <dawei.li@shingroup.cn>

I am not convinced this patch is the right approach, and I am not sure
it is worth trying to fix it.
This patch adds complexity, where the other patches simplified code.
I recommend to drop this, we can re-visit if this turns out to be a real
problem.

	Sam

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-04-23 17:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-23  8:30 [PATCH v3 0/6] Remove onstack cpumask var for sparc Dawei Li
2024-04-23  8:30 ` [PATCH v3 1/6] sparc/srmmu: Remove on-stack cpumask var Dawei Li
2024-04-23  8:30 ` [PATCH v3 2/6] sparc/irq: " Dawei Li
2024-04-23  8:30 ` [PATCH v3 3/6] sparc/of: " Dawei Li
2024-04-23  8:30 ` [PATCH v3 4/6] sparc/pci_msi: " Dawei Li
2024-04-23  8:30 ` [PATCH v3 5/6] sparc/init: " Dawei Li
2024-04-23 17:32   ` Sam Ravnborg
2024-04-23  8:30 ` [PATCH v3 6/6] sparc/leon: " Dawei Li
2024-04-23 17:29   ` Sam Ravnborg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).