sparclinux.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] Remove onstack cpumask var usage
@ 2024-04-18 10:49 Dawei Li
  2024-04-18 10:49 ` [PATCH 1/5] sparc/srmmu: Remove on-stack cpumask var Dawei Li
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: Dawei Li @ 2024-04-18 10:49 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, Dawei Li

Hi,

This series aims at removing on-stack cpumask var usage for sparc arch.

Generally it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

Dawei Li (5):
  sparc/srmmu: Remove on-stack cpumask var
  sparc/irq: Remove on-stack cpumask var
  sparc/of: Remove on-stack cpumask var
  sparc/pci_msi: Remove on-stack cpumask var
  sparc: Remove on-stack cpumask var

 arch/sparc/kernel/irq_64.c       | 10 +++-----
 arch/sparc/kernel/of_device_64.c |  5 +---
 arch/sparc/kernel/pci_msi.c      |  5 +---
 arch/sparc/mm/init_64.c          |  2 +-
 arch/sparc/mm/srmmu.c            | 40 ++++++++++----------------------
 5 files changed, 18 insertions(+), 44 deletions(-)

Thanks,

    Dawei

-- 
2.27.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/5] sparc/srmmu: Remove on-stack cpumask var
  2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
@ 2024-04-18 10:49 ` Dawei Li
  2024-04-18 10:49 ` [PATCH 2/5] sparc/irq: " Dawei Li
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Dawei Li @ 2024-04-18 10:49 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

Use cpumask_any_but() to avoid the need for a temporary cpumask on
the stack.

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/mm/srmmu.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 852085ada368..86fd20c878ae 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void)
 	local_ops->tlb_all();
 }
 
+static bool cpumask_any_but_current(struct mm_struct *mm)
+{
+	return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids;
+}
+
 static void smp_flush_cache_mm(struct mm_struct *mm)
 {
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (cpumask_any_but_current(mm))
 			xc1(local_ops->cache_mm, (unsigned long)mm);
 		local_ops->cache_mm(mm);
 	}
@@ -1668,10 +1670,7 @@ static void smp_flush_cache_mm(struct mm_struct *mm)
 static void smp_flush_tlb_mm(struct mm_struct *mm)
 {
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask)) {
+		if (cpumask_any_but_current(mm)) {
 			xc1(local_ops->tlb_mm, (unsigned long)mm);
 			if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
 				cpumask_copy(mm_cpumask(mm),
@@ -1688,10 +1687,7 @@ static void smp_flush_cache_range(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (cpumask_any_but_current(mm))
 			xc3(local_ops->cache_range, (unsigned long)vma, start,
 			    end);
 		local_ops->cache_range(vma, start, end);
@@ -1705,10 +1701,7 @@ static void smp_flush_tlb_range(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (cpumask_any_but_current(mm))
 			xc3(local_ops->tlb_range, (unsigned long)vma, start,
 			    end);
 		local_ops->tlb_range(vma, start, end);
@@ -1720,10 +1713,7 @@ static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (cpumask_any_but_current(mm))
 			xc2(local_ops->cache_page, (unsigned long)vma, page);
 		local_ops->cache_page(vma, page);
 	}
@@ -1734,10 +1724,7 @@ static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask;
-		cpumask_copy(&cpu_mask, mm_cpumask(mm));
-		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-		if (!cpumask_empty(&cpu_mask))
+		if (cpumask_any_but_current(mm))
 			xc2(local_ops->tlb_page, (unsigned long)vma, page);
 		local_ops->tlb_page(vma, page);
 	}
@@ -1759,10 +1746,7 @@ static void smp_flush_page_to_ram(unsigned long page)
 
 static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 {
-	cpumask_t cpu_mask;
-	cpumask_copy(&cpu_mask, mm_cpumask(mm));
-	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-	if (!cpumask_empty(&cpu_mask))
+	if (cpumask_any_but_current(mm))
 		xc2(local_ops->sig_insns, (unsigned long)mm, insn_addr);
 	local_ops->sig_insns(mm, insn_addr);
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] sparc/irq: Remove on-stack cpumask var
  2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
  2024-04-18 10:49 ` [PATCH 1/5] sparc/srmmu: Remove on-stack cpumask var Dawei Li
@ 2024-04-18 10:49 ` Dawei Li
  2024-04-19  0:19   ` kernel test robot
  2024-04-18 10:49 ` [PATCH 3/5] sparc/of: " Dawei Li
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Dawei Li @ 2024-04-18 10:49 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

- Both 2 arguments of cpumask_equal() is constant and free of change, no
  need to allocate extra cpumask variables.

- Merge cpumask_and(), cpumask_first() and cpumask_empty() into
  cpumask_first_and().

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/irq_64.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5280e325d4d6..bc96f1bcd2fc 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -349,17 +349,13 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 #ifdef CONFIG_SMP
 static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
 {
-	cpumask_t mask;
 	int cpuid;
 
-	cpumask_copy(&mask, affinity);
-	if (cpumask_equal(&mask, cpu_online_mask)) {
+	if (cpumask_equal(affinity, cpu_online_mask)) {
 		cpuid = map_to_cpu(irq);
 	} else {
-		cpumask_t tmp;
-
-		cpumask_and(&tmp, cpu_online_mask, &mask);
-		cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
+		cpuid = cpumask_first_and(affinity, cpu_online_mask);
+		cpuid = cpuid < nr_cpu_ids ? : map_to_cpu(irq);
 	}
 
 	return cpuid;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] sparc/of: Remove on-stack cpumask var
  2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
  2024-04-18 10:49 ` [PATCH 1/5] sparc/srmmu: Remove on-stack cpumask var Dawei Li
  2024-04-18 10:49 ` [PATCH 2/5] sparc/irq: " Dawei Li
@ 2024-04-18 10:49 ` Dawei Li
  2024-04-18 10:49 ` [PATCH 4/5] sparc/pci_msi: " Dawei Li
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Dawei Li @ 2024-04-18 10:49 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

@cpumask of irq_set_affinity() is read-only and free of change, drop
unneeded cpumask var.

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/of_device_64.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index c350c58c7f69..f98c2901f335 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -624,10 +624,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
 out:
 	nid = of_node_to_nid(dp);
 	if (nid != -1) {
-		cpumask_t numa_mask;
-
-		cpumask_copy(&numa_mask, cpumask_of_node(nid));
-		irq_set_affinity(irq, &numa_mask);
+		irq_set_affinity(irq, cpumask_of_node(nid));
 	}
 
 	return irq;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] sparc/pci_msi: Remove on-stack cpumask var
  2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
                   ` (2 preceding siblings ...)
  2024-04-18 10:49 ` [PATCH 3/5] sparc/of: " Dawei Li
@ 2024-04-18 10:49 ` Dawei Li
  2024-04-18 10:49 ` [PATCH 5/5] sparc: " Dawei Li
  2024-04-19  5:13 ` [PATCH 0/5] Remove onstack cpumask var usage Sam Ravnborg
  5 siblings, 0 replies; 11+ messages in thread
From: Dawei Li @ 2024-04-18 10:49 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

@cpumask of irq_set_affinity() is read-only and free of change, drop
unneeded cpumask var.

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/kernel/pci_msi.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index fc7402948b7b..acb2f83a1d5c 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -287,10 +287,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 
 	nid = pbm->numa_node;
 	if (nid != -1) {
-		cpumask_t numa_mask;
-
-		cpumask_copy(&numa_mask, cpumask_of_node(nid));
-		irq_set_affinity(irq, &numa_mask);
+		irq_set_affinity(irq, cpumask_of_node(nid));
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 			  "MSIQ",
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] sparc: Remove on-stack cpumask var
  2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
                   ` (3 preceding siblings ...)
  2024-04-18 10:49 ` [PATCH 4/5] sparc/pci_msi: " Dawei Li
@ 2024-04-18 10:49 ` Dawei Li
  2024-04-19  5:13 ` [PATCH 0/5] Remove onstack cpumask var usage Sam Ravnborg
  5 siblings, 0 replies; 11+ messages in thread
From: Dawei Li @ 2024-04-18 10:49 UTC (permalink / raw)
  To: davem, andreas; +Cc: sparclinux, linux-kernel, Dawei Li

In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.

Since the cpumask var resides in __init function, which means it's free
of any concurrenct access, it can be safely marked with static to get
rid of allocation on stack.

while at it, mark it with __initdata to keep it from persistently
consumed memory.

Signed-off-by: Dawei Li <dawei.li@shingroup.cn>
---
 arch/sparc/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 1ca9054d9b97..088d9c103dcc 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1438,7 +1438,7 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
 static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
 					 int index)
 {
-	cpumask_t mask;
+	static cpumask_t mask __initdata;
 	int cpu;
 
 	numa_parse_mdesc_group_cpus(md, grp, &mask);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] sparc/irq: Remove on-stack cpumask var
  2024-04-18 10:49 ` [PATCH 2/5] sparc/irq: " Dawei Li
@ 2024-04-19  0:19   ` kernel test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-04-19  0:19 UTC (permalink / raw)
  To: Dawei Li, davem, andreas
  Cc: oe-kbuild-all, sparclinux, linux-kernel, Dawei Li

Hi Dawei,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.9-rc4]
[also build test WARNING on linus/master next-20240418]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Dawei-Li/sparc-srmmu-Remove-on-stack-cpumask-var/20240418-185348
base:   v6.9-rc4
patch link:    https://lore.kernel.org/r/20240418104949.3606645-3-dawei.li%40shingroup.cn
patch subject: [PATCH 2/5] sparc/irq: Remove on-stack cpumask var
config: sparc64-defconfig (https://download.01.org/0day-ci/archive/20240419/202404190826.Zi1J5nCx-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240419/202404190826.Zi1J5nCx-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202404190826.Zi1J5nCx-lkp@intel.com/

All warnings (new ones prefixed by >>):

   arch/sparc/kernel/irq_64.c: In function 'irq_choose_cpu':
>> arch/sparc/kernel/irq_64.c:358:46: warning: the omitted middle operand in '?:' will always be 'true', suggest explicit middle operand [-Wparentheses]
     358 |                 cpuid = cpuid < nr_cpu_ids ? : map_to_cpu(irq);
         |                                              ^


vim +358 arch/sparc/kernel/irq_64.c

   348	
   349	#ifdef CONFIG_SMP
   350	static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
   351	{
   352		int cpuid;
   353	
   354		if (cpumask_equal(affinity, cpu_online_mask)) {
   355			cpuid = map_to_cpu(irq);
   356		} else {
   357			cpuid = cpumask_first_and(affinity, cpu_online_mask);
 > 358			cpuid = cpuid < nr_cpu_ids ? : map_to_cpu(irq);
   359		}
   360	
   361		return cpuid;
   362	}
   363	#else
   364	#define irq_choose_cpu(irq, affinity)	\
   365		real_hard_smp_processor_id()
   366	#endif
   367	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/5] Remove onstack cpumask var usage
  2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
                   ` (4 preceding siblings ...)
  2024-04-18 10:49 ` [PATCH 5/5] sparc: " Dawei Li
@ 2024-04-19  5:13 ` Sam Ravnborg
  2024-04-19  9:26   ` Dawei Li
  5 siblings, 1 reply; 11+ messages in thread
From: Sam Ravnborg @ 2024-04-19  5:13 UTC (permalink / raw)
  To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel

Hi Dawei,

On Thu, Apr 18, 2024 at 06:49:44PM +0800, Dawei Li wrote:
> Hi,
> 
> This series aims at removing on-stack cpumask var usage for sparc arch.
> 
> Generally it's preferable to avoid placing cpumasks on the stack, as
> for large values of NR_CPUS these can consume significant amounts of
> stack space and make stack overflows more likely.

Took a quick look at the patches, looks good except the one the bot
already complained about.
A quick grep shows a few more cases where we have an on-stack cpumask
in sparc code.

kernel/ds.c:    cpumask_t mask;
kernel/leon_kernel.c:   cpumask_t mask;
kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1,
kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1,

Do you plan to look at the other on-stack users too?
It would be nice to see them all gone in one patch-set.

	Sam

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/5] Remove onstack cpumask var usage
  2024-04-19  5:13 ` [PATCH 0/5] Remove onstack cpumask var usage Sam Ravnborg
@ 2024-04-19  9:26   ` Dawei Li
  2024-04-19 20:33     ` Sam Ravnborg
  2024-04-19 22:13     ` Yury Norov
  0 siblings, 2 replies; 11+ messages in thread
From: Dawei Li @ 2024-04-19  9:26 UTC (permalink / raw)
  To: Sam Ravnborg; +Cc: davem, andreas, sparclinux, linux-kernel, yury.norov

Hi Sam,

Thanks for the review.

On Fri, Apr 19, 2024 at 07:13:50AM +0200, Sam Ravnborg wrote:
> Hi Dawei,
> 
> On Thu, Apr 18, 2024 at 06:49:44PM +0800, Dawei Li wrote:
> > Hi,
> > 
> > This series aims at removing on-stack cpumask var usage for sparc arch.
> > 
> > Generally it's preferable to avoid placing cpumasks on the stack, as
> > for large values of NR_CPUS these can consume significant amounts of
> > stack space and make stack overflows more likely.
> 
> Took a quick look at the patches, looks good except the one the bot
> already complained about.

I will fix this building warning in respinning.

> A quick grep shows a few more cases where we have an on-stack cpumask
> in sparc code.
> 
> kernel/ds.c:    cpumask_t mask;

About this case, it's kinda tricky for:
- dr_cpu_data() returns void, so alloc_cpumask_var() is no go.

- No idea of the calling context of dr_cpu_data(). IIUC,
  dr_cpu_data()
  ->dr_cpu_configure()  
   ->kzalloc(resp_len, GFP_KERNEL)
  So I guess it's in process context?
  If consumption above is OK, a simple but _ugly_ solution could be:

diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index ffdc15588ac2..c9e4ebdccf49 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -634,7 +634,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
        struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
        u32 *cpu_list = (u32 *) (tag + 1);
        u64 req_num = tag->req_num;
-       cpumask_t mask;
+       static DEFINE_MUTEX(mask_lock);
+       static cpumask_t mask;
        unsigned int i;
        int err;

@@ -651,6 +652,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,

        purge_dups(cpu_list, tag->num_records);

+       mutex_lock(&mask_lock);
+
        cpumask_clear(&mask);
        for (i = 0; i < tag->num_records; i++) {
                if (cpu_list[i] == CPU_SENTINEL)
@@ -665,6 +668,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
        else
                err = dr_cpu_unconfigure(dp, cp, req_num, &mask);

+       mutex_unlock(&mask_lock);
+
        if (err)
                dr_cpu_send_error(dp, cp, data);
 }

How does it sound to you?

> kernel/leon_kernel.c:   cpumask_t mask;

It's in irqchip::irq_set_affinity(), which is in atomic context(raw spinlock(s) held),
so dynamic allocation is not a good idea.

My proposal(*untested*) is somewhat complicated for it introduces a new helper.

diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 4c61da491fee..6eced7acb8bc 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -104,15 +104,25 @@ unsigned long leon_get_irqmask(unsigned int irq)
 }

 #ifdef CONFIG_SMP
+
+static bool cpumask_include(const struct cpumask *srcp1, const struct cpumask *srcp2)
+{
+       unsigned int cpu;
+
+       for_each_cpu(cpu, srcp2) {
+               if (!cpumask_test_cpu(cpu, srcp1))
+                       return false;
+       }
+
+       return true;
+}
+
 static int irq_choose_cpu(const struct cpumask *affinity)
 {
-       cpumask_t mask;
+       unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask);

-       cpumask_and(&mask, cpu_online_mask, affinity);
-       if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
-               return boot_cpu_id;
-       else
-               return cpumask_first(&mask);
+       return cpumask_include(affinity, cpu_online_mask) || cpu >= nr_cpu_ids ?
+              boot_cpu_id : cpu;
 }
 #else
 #define irq_choose_cpu(affinity) boot_cpu_id

Is it OK?

[cc Yury for bitmap API]

> kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1,
> kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1,

Actually I am awared of existence of (at least some of) them, but so far I
have not found a _proper_ way of dealing with them(especially for case of
ds.c).

Please lemme dig into it.

Thanks,

    Dawei

> 
> Do you plan to look at the other on-stack users too?
> It would be nice to see them all gone in one patch-set.
> 
> 	Sam
> 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/5] Remove onstack cpumask var usage
  2024-04-19  9:26   ` Dawei Li
@ 2024-04-19 20:33     ` Sam Ravnborg
  2024-04-19 22:13     ` Yury Norov
  1 sibling, 0 replies; 11+ messages in thread
From: Sam Ravnborg @ 2024-04-19 20:33 UTC (permalink / raw)
  To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel, yury.norov

Hi Dawei,

> About this case, it's kinda tricky for:
> - dr_cpu_data() returns void, so alloc_cpumask_var() is no go.
> 
> - No idea of the calling context of dr_cpu_data(). IIUC,
>   dr_cpu_data()
>   ->dr_cpu_configure()  
>    ->kzalloc(resp_len, GFP_KERNEL)
>   So I guess it's in process context?
>   If consumption above is OK, a simple but _ugly_ solution could be:
> 
> diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
> index ffdc15588ac2..c9e4ebdccf49 100644
> --- a/arch/sparc/kernel/ds.c
> +++ b/arch/sparc/kernel/ds.c
> @@ -634,7 +634,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
>         struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
>         u32 *cpu_list = (u32 *) (tag + 1);
>         u64 req_num = tag->req_num;
> -       cpumask_t mask;
> +       static DEFINE_MUTEX(mask_lock);
> +       static cpumask_t mask;
>         unsigned int i;
>         int err;
> 
> @@ -651,6 +652,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
> 
>         purge_dups(cpu_list, tag->num_records);
> 
> +       mutex_lock(&mask_lock);
> +
>         cpumask_clear(&mask);
>         for (i = 0; i < tag->num_records; i++) {
>                 if (cpu_list[i] == CPU_SENTINEL)
> @@ -665,6 +668,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
>         else
>                 err = dr_cpu_unconfigure(dp, cp, req_num, &mask);
> 
> +       mutex_unlock(&mask_lock);
> +
>         if (err)
>                 dr_cpu_send_error(dp, cp, data);
>  }
> 
> How does it sound to you?
This introduces too much complexity to solve a potential stack issue.
If an improvement is required, then we need a simpler solution.

> 
> > kernel/leon_kernel.c:   cpumask_t mask;
> 
> It's in irqchip::irq_set_affinity(), which is in atomic context(raw spinlock(s) held),
> so dynamic allocation is not a good idea.
> 
> My proposal(*untested*) is somewhat complicated for it introduces a new helper.
> 
> diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
> index 4c61da491fee..6eced7acb8bc 100644
> --- a/arch/sparc/kernel/leon_kernel.c
> +++ b/arch/sparc/kernel/leon_kernel.c
> @@ -104,15 +104,25 @@ unsigned long leon_get_irqmask(unsigned int irq)
>  }
> 
>  #ifdef CONFIG_SMP
> +
> +static bool cpumask_include(const struct cpumask *srcp1, const struct cpumask *srcp2)
> +{
> +       unsigned int cpu;
> +
> +       for_each_cpu(cpu, srcp2) {
> +               if (!cpumask_test_cpu(cpu, srcp1))
> +                       return false;
> +       }
> +
> +       return true;
> +}
> +
>  static int irq_choose_cpu(const struct cpumask *affinity)
>  {
> -       cpumask_t mask;
> +       unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask);
> 
> -       cpumask_and(&mask, cpu_online_mask, affinity);
> -       if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
> -               return boot_cpu_id;
> -       else
> -               return cpumask_first(&mask);
> +       return cpumask_include(affinity, cpu_online_mask) || cpu >= nr_cpu_ids ?
> +              boot_cpu_id : cpu;
>  }

I think something like the following should do the trick.


	if (cpumask_equal(affinity, cpu_online_mask))
		return boot_cpu_id;

	cpuid = cpumask_first_and(affinity, cpu_online_mask);
	if (cpuid < nr_cpu_ids)
		return cpuid;
	else
		return boot_cpu_id;

If the passed affinity equals the online cpu's, then use the boot cpu.
Else, use the first online cpu in the affinity mask.
If none found use the boot cpu.



>  #else
>  #define irq_choose_cpu(affinity) boot_cpu_id
> 
> Is it OK?
> 
> [cc Yury for bitmap API]
> 
> > kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1,
> > kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1,
Looks simple, just pass a pointer and not by value.
> 
> Actually I am awared of existence of (at least some of) them, but so far I
> have not found a _proper_ way of dealing with them(especially for case of
> ds.c).
> 
> Please lemme dig into it.
Looks forward to next iteration.

	Sam

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/5] Remove onstack cpumask var usage
  2024-04-19  9:26   ` Dawei Li
  2024-04-19 20:33     ` Sam Ravnborg
@ 2024-04-19 22:13     ` Yury Norov
  1 sibling, 0 replies; 11+ messages in thread
From: Yury Norov @ 2024-04-19 22:13 UTC (permalink / raw)
  To: Dawei Li; +Cc: Sam Ravnborg, davem, andreas, sparclinux, linux-kernel

On Fri, Apr 19, 2024 at 05:26:34PM +0800, Dawei Li wrote:
> Hi Sam,
> 
> Thanks for the review.
> 
> On Fri, Apr 19, 2024 at 07:13:50AM +0200, Sam Ravnborg wrote:
> > Hi Dawei,
> > 
> > On Thu, Apr 18, 2024 at 06:49:44PM +0800, Dawei Li wrote:
> > > Hi,
> > > 
> > > This series aims at removing on-stack cpumask var usage for sparc arch.
> > > 
> > > Generally it's preferable to avoid placing cpumasks on the stack, as
> > > for large values of NR_CPUS these can consume significant amounts of
> > > stack space and make stack overflows more likely.
> > 
> > Took a quick look at the patches, looks good except the one the bot
> > already complained about.
> 
> I will fix this building warning in respinning.
> 
> > A quick grep shows a few more cases where we have an on-stack cpumask
> > in sparc code.
> > 
> > kernel/ds.c:    cpumask_t mask;
> 
> About this case, it's kinda tricky for:
> - dr_cpu_data() returns void, so alloc_cpumask_var() is no go.
> 
> - No idea of the calling context of dr_cpu_data(). IIUC,
>   dr_cpu_data()
>   ->dr_cpu_configure()  
>    ->kzalloc(resp_len, GFP_KERNEL)
>   So I guess it's in process context?
>   If consumption above is OK, a simple but _ugly_ solution could be:
> 
> diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
> index ffdc15588ac2..c9e4ebdccf49 100644
> --- a/arch/sparc/kernel/ds.c
> +++ b/arch/sparc/kernel/ds.c
> @@ -634,7 +634,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
>         struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
>         u32 *cpu_list = (u32 *) (tag + 1);
>         u64 req_num = tag->req_num;
> -       cpumask_t mask;
> +       static DEFINE_MUTEX(mask_lock);
> +       static cpumask_t mask;
>         unsigned int i;
>         int err;
> 
> @@ -651,6 +652,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
> 
>         purge_dups(cpu_list, tag->num_records);
> 
> +       mutex_lock(&mask_lock);
> +
>         cpumask_clear(&mask);
>         for (i = 0; i < tag->num_records; i++) {
>                 if (cpu_list[i] == CPU_SENTINEL)
> @@ -665,6 +668,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
>         else
>                 err = dr_cpu_unconfigure(dp, cp, req_num, &mask);
> 
> +       mutex_unlock(&mask_lock);
> +
>         if (err)
>                 dr_cpu_send_error(dp, cp, data);
>  }
> 
> How does it sound to you?
> 
> > kernel/leon_kernel.c:   cpumask_t mask;
> 
> It's in irqchip::irq_set_affinity(), which is in atomic context(raw spinlock(s) held),
> so dynamic allocation is not a good idea.
> 
> My proposal(*untested*) is somewhat complicated for it introduces a new helper.
> 
> diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
> index 4c61da491fee..6eced7acb8bc 100644
> --- a/arch/sparc/kernel/leon_kernel.c
> +++ b/arch/sparc/kernel/leon_kernel.c
> @@ -104,15 +104,25 @@ unsigned long leon_get_irqmask(unsigned int irq)
>  }
> 
>  #ifdef CONFIG_SMP
> +
> +static bool cpumask_include(const struct cpumask *srcp1, const struct cpumask *srcp2)

Don't steal the other's subsystems prefixes.

> +{
> +       unsigned int cpu;
> +
> +       for_each_cpu(cpu, srcp2) {
> +               if (!cpumask_test_cpu(cpu, srcp1))
> +                       return false;
> +       }
> +
> +       return true;
> +}

We've got cpumask_subset() for this. 

>  static int irq_choose_cpu(const struct cpumask *affinity)
>  {
> -       cpumask_t mask;
> +       unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask);
> 
> -       cpumask_and(&mask, cpu_online_mask, affinity);
> -       if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
> -               return boot_cpu_id;
> -       else
> -               return cpumask_first(&mask);
> +       return cpumask_include(affinity, cpu_online_mask) || cpu >= nr_cpu_ids ?
> +              boot_cpu_id : cpu;
>  }
>  #else
>  #define irq_choose_cpu(affinity) boot_cpu_id
> 
> Is it OK?
> 
> [cc Yury for bitmap API]
> 
> > kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1,
> > kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1,
> 
> Actually I am awared of existence of (at least some of) them, but so far I
> have not found a _proper_ way of dealing with them(especially for case of
> ds.c).
> 
> Please lemme dig into it.
> 
> Thanks,
> 
>     Dawei
> 
> > 
> > Do you plan to look at the other on-stack users too?
> > It would be nice to see them all gone in one patch-set.
> > 
> > 	Sam
> > 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-04-19 22:13 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-18 10:49 [PATCH 0/5] Remove onstack cpumask var usage Dawei Li
2024-04-18 10:49 ` [PATCH 1/5] sparc/srmmu: Remove on-stack cpumask var Dawei Li
2024-04-18 10:49 ` [PATCH 2/5] sparc/irq: " Dawei Li
2024-04-19  0:19   ` kernel test robot
2024-04-18 10:49 ` [PATCH 3/5] sparc/of: " Dawei Li
2024-04-18 10:49 ` [PATCH 4/5] sparc/pci_msi: " Dawei Li
2024-04-18 10:49 ` [PATCH 5/5] sparc: " Dawei Li
2024-04-19  5:13 ` [PATCH 0/5] Remove onstack cpumask var usage Sam Ravnborg
2024-04-19  9:26   ` Dawei Li
2024-04-19 20:33     ` Sam Ravnborg
2024-04-19 22:13     ` Yury Norov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).