linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [RFC] watchdog/softlockup: Fix softlockup_stop_all() hungtask bug
@ 2021-09-16 17:56 Jinhui Guo
  2021-09-16 19:44 ` Andrew Morton
  0 siblings, 1 reply; 3+ messages in thread
From: Jinhui Guo @ 2021-09-16 17:56 UTC (permalink / raw)
  To: akpm, pmladek, peterz, valentin.schneider; +Cc: linux-kernel, guojinhui

If NR_CPUS equal to 1, it would trigger hungtask, it can be
triggered by follow command:
	echo 0 > /proc/sys/kernel/watchdog
	echo 1 > /proc/sys/kernel/watchdog
The hungtask stack:
	__schedule
	schedule
	schedule_timeout
	__wait_for_common
	softlockup_stop_fn
	lockup_detector_reconfigure
	proc_watchdog_common
	proc_watchdog
	proc_sys_call_handler
	vfs_write
	ksys_write
The watchdog_allowed_mask is completely cleared when the
watchdog is disabled. But the macro for_each_cpu() assume
all masks are "1" when macro NR_CPUS equal to 1. It makes
watchdog_allowed_mask not work at all.

Fixes: be45bf5395e0 ("watchdog/softlockup: Fix cpu_stop_queue_work() double-queue bug")

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Petr Mladek <pmladek@suse.com>
Signed-off-by: Jinhui Guo <guojinhui@huawei.com>
---
 include/linux/cpumask.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 5d4d07a9e1ed..1a35dbcc397d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -175,10 +175,11 @@ static inline int cpumask_any_distribute(const struct cpumask *srcp)
 	return cpumask_first(srcp);
 }
 
+/* It should check cpumask in some special case, such as watchdog */
 #define for_each_cpu(cpu, mask)			\
-	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+	for ((cpu) = 0; (cpu) < 1 && test_bit(0, cpumask_bits(mask)); (cpu)++)
 #define for_each_cpu_not(cpu, mask)		\
-	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+	for ((cpu) = 0; (cpu) < 1 && !test_bit(0, cpumask_bits(mask)); (cpu)++)
 #define for_each_cpu_wrap(cpu, mask, start)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
 #define for_each_cpu_and(cpu, mask1, mask2)	\
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] [RFC] watchdog/softlockup: Fix softlockup_stop_all() hungtask bug
  2021-09-16 17:56 [PATCH] [RFC] watchdog/softlockup: Fix softlockup_stop_all() hungtask bug Jinhui Guo
@ 2021-09-16 19:44 ` Andrew Morton
  2021-09-22  2:49   ` JinHui GUO
  0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2021-09-16 19:44 UTC (permalink / raw)
  To: Jinhui Guo; +Cc: pmladek, peterz, valentin.schneider, linux-kernel

On Fri, 17 Sep 2021 01:56:50 +0800 Jinhui Guo <guojinhui@huawei.com> wrote:

> If NR_CPUS equal to 1, it would trigger hungtask, it can be
> triggered by follow command:
> 	echo 0 > /proc/sys/kernel/watchdog
> 	echo 1 > /proc/sys/kernel/watchdog
> The hungtask stack:
> 	__schedule
> 	schedule
> 	schedule_timeout
> 	__wait_for_common
> 	softlockup_stop_fn
> 	lockup_detector_reconfigure
> 	proc_watchdog_common
> 	proc_watchdog
> 	proc_sys_call_handler
> 	vfs_write
> 	ksys_write
> The watchdog_allowed_mask is completely cleared when the
> watchdog is disabled. But the macro for_each_cpu() assume
> all masks are "1" when macro NR_CPUS equal to 1. It makes
> watchdog_allowed_mask not work at all.
> 
> ...
>
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -175,10 +175,11 @@ static inline int cpumask_any_distribute(const struct cpumask *srcp)
>  	return cpumask_first(srcp);
>  }
>  
> +/* It should check cpumask in some special case, such as watchdog */
>  #define for_each_cpu(cpu, mask)			\
> -	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> +	for ((cpu) = 0; (cpu) < 1 && test_bit(0, cpumask_bits(mask)); (cpu)++)
>  #define for_each_cpu_not(cpu, mask)		\
> -	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> +	for ((cpu) = 0; (cpu) < 1 && !test_bit(0, cpumask_bits(mask)); (cpu)++)
>  #define for_each_cpu_wrap(cpu, mask, start)	\
>  	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
>  #define for_each_cpu_and(cpu, mask1, mask2)	\

x86_64 allnoconfig:

ld: arch/x86/kernel/cpu/cacheinfo.o: in function `populate_cache_leaves':
cacheinfo.c:(.text+0xa27): undefined reference to `cpu_llc_shared_map'
ld: cacheinfo.c:(.text+0xa49): undefined reference to `cpu_llc_shared_map'

Because the new for_each_cpu() now references `mask' and some code isn't
able to handle that change.  There are probably other instances of this
across all our architectures and configs.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] [RFC] watchdog/softlockup: Fix softlockup_stop_all() hungtask bug
  2021-09-16 19:44 ` Andrew Morton
@ 2021-09-22  2:49   ` JinHui GUO
  0 siblings, 0 replies; 3+ messages in thread
From: JinHui GUO @ 2021-09-22  2:49 UTC (permalink / raw)
  To: akpm; +Cc: guojinhui, linux-kernel, peterz, pmladek, valentin.schneider

> x86_64 allnoconfig:

> ld: arch/x86/kernel/cpu/cacheinfo.o: in function `populate_cache_leaves':
> cacheinfo.c:(.text+0xa27): undefined reference to `cpu_llc_shared_map'
> ld: cacheinfo.c:(.text+0xa49): undefined reference to `cpu_llc_shared_map'

> Because the new for_each_cpu() now references `mask' and some code isn't
> able to handle that change.  There are probably other instances of this
> across all our architectures and configs.

There is another bug in file arch/x86/include/asm/smp.h. The per-cpu value
cpu_llc_shared_map is defined in file arch/x86/kernel/smpboot.c. But the
file arch/x86/kernel/smpboot.c would not be compiled while CONFIG_SMP is
not defined.

declared in file arch/x86/include/asm/smp.h:
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);

defined in file arch/x86/kernel/smpboot.c:
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);

the stack:
cpu_llc_shared_map
cpu_llc_shared_mask
__cache_amd_cpumap_setup
__cache_cpumap_setup
populate_cache_leaves

CONFIG_SMP in makefile arch/x86/kernel/Makefile:
obj-$(CONFIG_SMP)               += smpboot.o

cpu_llc_shared_mask is just used in arch/x86/kernel/cpu/cacheinfo.c by for_each_cpu
while CONFIG_SMP is not defined:
./arch/x86/kernel/cpu/cacheinfo.c:889:          for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
./arch/x86/kernel/cpu/cacheinfo.c:894:                  for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
./arch/x86/include/asm/smp.h:22:static inline struct cpumask *cpu_llc_shared_mask(int cpu)

It can be fixed just as follow:

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h1
index 630ff08532be..f5d3ca5696b3 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -21,7 +21,12 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);

 static inline struct cpumask *cpu_llc_shared_mask(int cpu)
 {
+#ifdef CONFIG_SMP
        return per_cpu(cpu_llc_shared_map, cpu);
+#else
+ /* cpu_llc_shared_map is not defined while !CONFIG_SMP */
+ return cpu_all_mask;
+#endif
 }

 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-09-22  2:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-16 17:56 [PATCH] [RFC] watchdog/softlockup: Fix softlockup_stop_all() hungtask bug Jinhui Guo
2021-09-16 19:44 ` Andrew Morton
2021-09-22  2:49   ` JinHui GUO

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).