linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] rcu: Dump memory object info if callback function is invalid
@ 2022-11-11 10:28 Zhen Lei
  2022-11-11 11:54 ` Zhang, Qiang1
  2022-11-11 15:50 ` Elliott, Robert (Servers)
  0 siblings, 2 replies; 18+ messages in thread
From: Zhen Lei @ 2022-11-11 10:28 UTC (permalink / raw)
  To: Paul E . McKenney, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel
  Cc: Zhen Lei

When a structure containing an RCU callback rhp is (incorrectly) freed
and reallocated after rhp is passed to call_rcu(), it is not unusual for
rhp->func to be set to NULL. This defeats the debugging prints used by
__call_rcu_common() in kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y,
which expect to identify the offending code using the identity of this
function.

And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things
are even worse, as can be seen from this splat:

Unable to handle kernel NULL pointer dereference at virtual address 0
... ...
PC is at 0x0
LR is at rcu_do_batch+0x1c0/0x3b8
... ...
 (rcu_do_batch) from (rcu_core+0x1d4/0x284)
 (rcu_core) from (__do_softirq+0x24c/0x344)
 (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
 (__irq_exit_rcu) from (irq_exit+0x8/0x10)
 (irq_exit) from (__handle_domain_irq+0x74/0x9c)
 (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
 (gic_handle_irq) from (__irq_svc+0x5c/0x94)
 (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
 (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
 (default_idle_call) from (do_idle+0xf8/0x150)
 (do_idle) from (cpu_startup_entry+0x18/0x20)
 (cpu_startup_entry) from (0xc01530)

This commit therefore adds calls to mem_dump_obj(rhp) to output some
information, for example:

  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256

This provides the rough size of the memory block and the offset of the
rcu_head structure, which as least provides at least a few clues to help
locate the problem. If the problem is reproducible, additional slab
debugging can be enabled, for example, CONFIG_DEBUG_SLAB=y, which can
provide significantly more information.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcu.h      | 7 +++++++
 kernel/rcu/srcutiny.c | 1 +
 kernel/rcu/srcutree.c | 1 +
 kernel/rcu/tasks.h    | 1 +
 kernel/rcu/tiny.c     | 1 +
 kernel/rcu/tree.c     | 1 +
 6 files changed, 12 insertions(+)

v1 --> v2:
1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
2. Paul E. McKenney helped me update the commit message, thanks.

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 65704cbc9df7b3d..32ab45fabf8eebf 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -10,6 +10,7 @@
 #ifndef __LINUX_RCU_H
 #define __LINUX_RCU_H
 
+#include <linux/mm.h>
 #include <trace/events/rcu.h>
 
 /*
@@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+static inline void debug_rcu_head_callback(struct rcu_head *rhp)
+{
+	if (unlikely(!rhp->func))
+		mem_dump_obj(rhp);
+}
+
 extern int rcu_cpu_stall_suppress_at_boot;
 
 static inline bool rcu_stall_is_suppressed_at_boot(void)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 33adafdad261389..5e7f336baa06ae0 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
 	while (lh) {
 		rhp = lh;
 		lh = lh->next;
+		debug_rcu_head_callback(rhp);
 		local_bh_disable();
 		rhp->func(rhp);
 		local_bh_enable();
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index ca4b5dcec675bac..294972e66b31863 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	rhp = rcu_cblist_dequeue(&ready_cbs);
 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
 		debug_rcu_head_unqueue(rhp);
+		debug_rcu_head_callback(rhp);
 		local_bh_disable();
 		rhp->func(rhp);
 		local_bh_enable();
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index b0b885e071fa8dc..b7f8c67c586cdc4 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
 	len = rcl.len;
 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) {
+		debug_rcu_head_callback(rhp);
 		local_bh_disable();
 		rhp->func(rhp);
 		local_bh_enable();
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index bb8f7d270f01747..56e9a5d91d97ec5 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head)
 
 	trace_rcu_invoke_callback("", head);
 	f = head->func;
+	debug_rcu_head_callback(head);
 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
 	f(head);
 	rcu_lock_release(&rcu_callback_map);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 15aaff3203bf2d0..ed93ddb8203d42c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		trace_rcu_invoke_callback(rcu_state.name, rhp);
 
 		f = rhp->func;
+		debug_rcu_head_callback(rhp);
 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
 		f(rhp);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 10:28 [PATCH v2] rcu: Dump memory object info if callback function is invalid Zhen Lei
@ 2022-11-11 11:54 ` Zhang, Qiang1
  2022-11-11 12:34   ` Leizhen (ThunderTown)
  2022-11-11 15:50 ` Elliott, Robert (Servers)
  1 sibling, 1 reply; 18+ messages in thread
From: Zhang, Qiang1 @ 2022-11-11 11:54 UTC (permalink / raw)
  To: Zhen Lei, Paul E . McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes, rcu,
	linux-kernel

>When a structure containing an RCU callback rhp is (incorrectly) freed
>and reallocated after rhp is passed to call_rcu(), it is not unusual for
>rhp->func to be set to NULL. This defeats the debugging prints used by
>__call_rcu_common() in kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y,
>which expect to identify the offending code using the identity of this
>function.
>
>And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things
>are even worse, as can be seen from this splat:
>
>Unable to handle kernel NULL pointer dereference at virtual address 0
>... ...
>PC is at 0x0
>LR is at rcu_do_batch+0x1c0/0x3b8
>... ...
> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> (rcu_core) from (__do_softirq+0x24c/0x344)
> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> (default_idle_call) from (do_idle+0xf8/0x150)
> (do_idle) from (cpu_startup_entry+0x18/0x20)
> (cpu_startup_entry) from (0xc01530)
>
>This commit therefore adds calls to mem_dump_obj(rhp) to output some
>information, for example:
>
>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>
>This provides the rough size of the memory block and the offset of the
>rcu_head structure, which as least provides at least a few clues to help
>locate the problem. If the problem is reproducible, additional slab
>debugging can be enabled, for example, CONFIG_DEBUG_SLAB=y, which can
>provide significantly more information.
>
>Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>---
> kernel/rcu/rcu.h      | 7 +++++++
> kernel/rcu/srcutiny.c | 1 +
> kernel/rcu/srcutree.c | 1 +
> kernel/rcu/tasks.h    | 1 +
> kernel/rcu/tiny.c     | 1 +
> kernel/rcu/tree.c     | 1 +
> 6 files changed, 12 insertions(+)
>
>v1 --> v2:
>1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>2. Paul E. McKenney helped me update the commit message, thanks.
>

Hi, Zhen Lei

Maybe the following scenarios should be considered:

                CPU 0
tasks context
   spin_lock(&vmap_area_lock)
          Interrupt 
	 RCU softirq
	      rcu_do_batch
		mem_dump_obj
                                  vmalloc_dump_obj
                                       spin_lock(&vmap_area_lock)   <--  deadlock     

Thanks
Zqiang


>diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
>index 65704cbc9df7b3d..32ab45fabf8eebf 100644
>--- a/kernel/rcu/rcu.h
>+++ b/kernel/rcu/rcu.h
>@@ -10,6 +10,7 @@
> #ifndef __LINUX_RCU_H
> #define __LINUX_RCU_H
> 
>+#include <linux/mm.h>
> #include <trace/events/rcu.h>
> 
> /*
>@@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
> }
> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> 
>+static inline void debug_rcu_head_callback(struct rcu_head *rhp)
>+{
>+	if (unlikely(!rhp->func))
>+		mem_dump_obj(rhp);
>+}
>+
> extern int rcu_cpu_stall_suppress_at_boot;
> 
> static inline bool rcu_stall_is_suppressed_at_boot(void)
>diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
>index 33adafdad261389..5e7f336baa06ae0 100644
>--- a/kernel/rcu/srcutiny.c
>+++ b/kernel/rcu/srcutiny.c
>@@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> 	while (lh) {
> 		rhp = lh;
> 		lh = lh->next;
>+		debug_rcu_head_callback(rhp);
> 		local_bh_disable();
> 		rhp->func(rhp);
> 		local_bh_enable();
>diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
>index ca4b5dcec675bac..294972e66b31863 100644
>--- a/kernel/rcu/srcutree.c
>+++ b/kernel/rcu/srcutree.c
>@@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> 		debug_rcu_head_unqueue(rhp);
>+		debug_rcu_head_callback(rhp);
> 		local_bh_disable();
> 		rhp->func(rhp);
> 		local_bh_enable();
>diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
>index b0b885e071fa8dc..b7f8c67c586cdc4 100644
>--- a/kernel/rcu/tasks.h
>+++ b/kernel/rcu/tasks.h
>@@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> 	len = rcl.len;
> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) {
>+		debug_rcu_head_callback(rhp);
> 		local_bh_disable();
> 		rhp->func(rhp);
> 		local_bh_enable();
>diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
>index bb8f7d270f01747..56e9a5d91d97ec5 100644
>--- a/kernel/rcu/tiny.c
>+++ b/kernel/rcu/tiny.c
>@@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head)
> 
> 	trace_rcu_invoke_callback("", head);
> 	f = head->func;
>+	debug_rcu_head_callback(head);
> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> 	f(head);
> 	rcu_lock_release(&rcu_callback_map);
>diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
>index 15aaff3203bf2d0..ed93ddb8203d42c 100644
>--- a/kernel/rcu/tree.c
>+++ b/kernel/rcu/tree.c
>@@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> 
> 		f = rhp->func;
>+		debug_rcu_head_callback(rhp);
> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> 		f(rhp);
> 
>-- 
>2.25.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 11:54 ` Zhang, Qiang1
@ 2022-11-11 12:34   ` Leizhen (ThunderTown)
  2022-11-11 13:05     ` Zhang, Qiang1
  0 siblings, 1 reply; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-11 12:34 UTC (permalink / raw)
  To: Zhang, Qiang1, Paul E . McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes, rcu,
	linux-kernel



On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>> When a structure containing an RCU callback rhp is (incorrectly) freed
>> and reallocated after rhp is passed to call_rcu(), it is not unusual for
>> rhp->func to be set to NULL. This defeats the debugging prints used by
>> __call_rcu_common() in kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y,
>> which expect to identify the offending code using the identity of this
>> function.
>>
>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things
>> are even worse, as can be seen from this splat:
>>
>> Unable to handle kernel NULL pointer dereference at virtual address 0
>> ... ...
>> PC is at 0x0
>> LR is at rcu_do_batch+0x1c0/0x3b8
>> ... ...
>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>> (rcu_core) from (__do_softirq+0x24c/0x344)
>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>> (default_idle_call) from (do_idle+0xf8/0x150)
>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>> (cpu_startup_entry) from (0xc01530)
>>
>> This commit therefore adds calls to mem_dump_obj(rhp) to output some
>> information, for example:
>>
>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>
>> This provides the rough size of the memory block and the offset of the
>> rcu_head structure, which as least provides at least a few clues to help
>> locate the problem. If the problem is reproducible, additional slab
>> debugging can be enabled, for example, CONFIG_DEBUG_SLAB=y, which can
>> provide significantly more information.
>>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>> ---
>> kernel/rcu/rcu.h      | 7 +++++++
>> kernel/rcu/srcutiny.c | 1 +
>> kernel/rcu/srcutree.c | 1 +
>> kernel/rcu/tasks.h    | 1 +
>> kernel/rcu/tiny.c     | 1 +
>> kernel/rcu/tree.c     | 1 +
>> 6 files changed, 12 insertions(+)
>>
>> v1 --> v2:
>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>
> 
> Hi, Zhen Lei
> 
> Maybe the following scenarios should be considered:
> 
>                 CPU 0
> tasks context
>    spin_lock(&vmap_area_lock)
>           Interrupt 
> 	 RCU softirq
> 	      rcu_do_batch
> 		mem_dump_obj
>                                   vmalloc_dump_obj
>                                        spin_lock(&vmap_area_lock)   <--  deadlock     

Right, thanks. I just saw the robot's report. So this patch should be dropped.
I'll try to add an helper in mm, where I can check whether the lock has been
held, and dump the content of memory object.

> 
> Thanks
> Zqiang
> 
> 
>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
>> index 65704cbc9df7b3d..32ab45fabf8eebf 100644
>> --- a/kernel/rcu/rcu.h
>> +++ b/kernel/rcu/rcu.h
>> @@ -10,6 +10,7 @@
>> #ifndef __LINUX_RCU_H
>> #define __LINUX_RCU_H
>>
>> +#include <linux/mm.h>
>> #include <trace/events/rcu.h>
>>
>> /*
>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
>> }
>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
>>
>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp)
>> +{
>> +	if (unlikely(!rhp->func))
>> +		mem_dump_obj(rhp);
>> +}
>> +
>> extern int rcu_cpu_stall_suppress_at_boot;
>>
>> static inline bool rcu_stall_is_suppressed_at_boot(void)
>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
>> index 33adafdad261389..5e7f336baa06ae0 100644
>> --- a/kernel/rcu/srcutiny.c
>> +++ b/kernel/rcu/srcutiny.c
>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
>> 	while (lh) {
>> 		rhp = lh;
>> 		lh = lh->next;
>> +		debug_rcu_head_callback(rhp);
>> 		local_bh_disable();
>> 		rhp->func(rhp);
>> 		local_bh_enable();
>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
>> index ca4b5dcec675bac..294972e66b31863 100644
>> --- a/kernel/rcu/srcutree.c
>> +++ b/kernel/rcu/srcutree.c
>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
>> 		debug_rcu_head_unqueue(rhp);
>> +		debug_rcu_head_callback(rhp);
>> 		local_bh_disable();
>> 		rhp->func(rhp);
>> 		local_bh_enable();
>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
>> index b0b885e071fa8dc..b7f8c67c586cdc4 100644
>> --- a/kernel/rcu/tasks.h
>> +++ b/kernel/rcu/tasks.h
>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
>> 	len = rcl.len;
>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) {
>> +		debug_rcu_head_callback(rhp);
>> 		local_bh_disable();
>> 		rhp->func(rhp);
>> 		local_bh_enable();
>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
>> index bb8f7d270f01747..56e9a5d91d97ec5 100644
>> --- a/kernel/rcu/tiny.c
>> +++ b/kernel/rcu/tiny.c
>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head)
>>
>> 	trace_rcu_invoke_callback("", head);
>> 	f = head->func;
>> +	debug_rcu_head_callback(head);
>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
>> 	f(head);
>> 	rcu_lock_release(&rcu_callback_map);
>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
>> index 15aaff3203bf2d0..ed93ddb8203d42c 100644
>> --- a/kernel/rcu/tree.c
>> +++ b/kernel/rcu/tree.c
>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
>>
>> 		f = rhp->func;
>> +		debug_rcu_head_callback(rhp);
>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
>> 		f(rhp);
>>
>> -- 
>> 2.25.1
> 
> .
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 12:34   ` Leizhen (ThunderTown)
@ 2022-11-11 13:05     ` Zhang, Qiang1
  2022-11-11 18:42       ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Zhang, Qiang1 @ 2022-11-11 13:05 UTC (permalink / raw)
  To: Leizhen (ThunderTown),
	Paul E . McKenney, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel

On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>> When a structure containing an RCU callback rhp is (incorrectly) 
>> freed and reallocated after rhp is passed to call_rcu(), it is not 
>> unusual for
>> rhp->func to be set to NULL. This defeats the debugging prints used 
>> rhp->by
>> __call_rcu_common() in kernels built with 
>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
>> offending code using the identity of this function.
>>
>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
>> are even worse, as can be seen from this splat:
>>
>> Unable to handle kernel NULL pointer dereference at virtual address 0 
>> ... ...
>> PC is at 0x0
>> LR is at rcu_do_batch+0x1c0/0x3b8
>> ... ...
>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>> (rcu_core) from (__do_softirq+0x24c/0x344)
>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>> (default_idle_call) from (do_idle+0xf8/0x150)
>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>> (cpu_startup_entry) from (0xc01530)
>>
>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
>> information, for example:
>>
>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>
>> This provides the rough size of the memory block and the offset of 
>> the rcu_head structure, which as least provides at least a few clues 
>> to help locate the problem. If the problem is reproducible, 
>> additional slab debugging can be enabled, for example, 
>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
>>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>> ---
>> kernel/rcu/rcu.h      | 7 +++++++
>> kernel/rcu/srcutiny.c | 1 +
>> kernel/rcu/srcutree.c | 1 +
>> kernel/rcu/tasks.h    | 1 +
>> kernel/rcu/tiny.c     | 1 +
>> kernel/rcu/tree.c     | 1 +
>> 6 files changed, 12 insertions(+)
>>
>> v1 --> v2:
>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>
> 
> Hi, Zhen Lei
> 
> Maybe the following scenarios should be considered:
> 
>                 CPU 0
> tasks context
>    spin_lock(&vmap_area_lock)
>           Interrupt 
> 	 RCU softirq
> 	      rcu_do_batch
> 		mem_dump_obj
>                                   vmalloc_dump_obj
>                                        spin_lock(&vmap_area_lock)   <--  deadlock     

>Right, thanks. I just saw the robot's report. So this patch should be dropped.
>I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.

This is a workaround, or maybe try a modification like the following, 
of course, need to ask Paul's opinion.

diff --git a/mm/util.c b/mm/util.c
index 12984e76767e..86da0739fe5d 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
 {
        const char *type;

+       if (is_vmalloc_addr(object)) {
+               if (in_task() && vmalloc_dump_obj(object))
+                       return;
+               type = "vmalloc memory";
+               goto end;
+       }
+
        if (kmem_valid_obj(object)) {
                kmem_dump_obj(object);
                return;
        }

-       if (vmalloc_dump_obj(object))
-               return;
-
        if (virt_addr_valid(object))
                type = "non-slab/vmalloc memory";
        else if (object == NULL)
@@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
                type = "zero-size pointer";
        else
                type = "non-paged memory";
-
+end:
        pr_cont(" %s\n", type);
 }
 EXPORT_SYMBOL_GPL(mem_dump_obj);

Thanks
Zqiang


> 
> Thanks
> Zqiang
> 
> 
>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
>> --- a/kernel/rcu/rcu.h
>> +++ b/kernel/rcu/rcu.h
>> @@ -10,6 +10,7 @@
>> #ifndef __LINUX_RCU_H
>> #define __LINUX_RCU_H
>>
>> +#include <linux/mm.h>
>> #include <trace/events/rcu.h>
>>
>> /*
>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
>> rcu_head *head) }
>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
>>
>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
>> +	if (unlikely(!rhp->func))
>> +		mem_dump_obj(rhp);
>> +}
>> +
>> extern int rcu_cpu_stall_suppress_at_boot;
>>
>> static inline bool rcu_stall_is_suppressed_at_boot(void)
>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
>> 33adafdad261389..5e7f336baa06ae0 100644
>> --- a/kernel/rcu/srcutiny.c
>> +++ b/kernel/rcu/srcutiny.c
>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
>> 	while (lh) {
>> 		rhp = lh;
>> 		lh = lh->next;
>> +		debug_rcu_head_callback(rhp);
>> 		local_bh_disable();
>> 		rhp->func(rhp);
>> 		local_bh_enable();
>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
>> ca4b5dcec675bac..294972e66b31863 100644
>> --- a/kernel/rcu/srcutree.c
>> +++ b/kernel/rcu/srcutree.c
>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
>> 		debug_rcu_head_unqueue(rhp);
>> +		debug_rcu_head_callback(rhp);
>> 		local_bh_disable();
>> 		rhp->func(rhp);
>> 		local_bh_enable();
>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
>> --- a/kernel/rcu/tasks.h
>> +++ b/kernel/rcu/tasks.h
>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
>> 	len = rcl.len;
>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
>> rcu_cblist_dequeue(&rcl)) {
>> +		debug_rcu_head_callback(rhp);
>> 		local_bh_disable();
>> 		rhp->func(rhp);
>> 		local_bh_enable();
>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
>> bb8f7d270f01747..56e9a5d91d97ec5 100644
>> --- a/kernel/rcu/tiny.c
>> +++ b/kernel/rcu/tiny.c
>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
>> *head)
>>
>> 	trace_rcu_invoke_callback("", head);
>> 	f = head->func;
>> +	debug_rcu_head_callback(head);
>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
>> 	f(head);
>> 	rcu_lock_release(&rcu_callback_map);
>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
>> --- a/kernel/rcu/tree.c
>> +++ b/kernel/rcu/tree.c
>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
>>
>> 		f = rhp->func;
>> +		debug_rcu_head_callback(rhp);
>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
>> 		f(rhp);
>>
>> --
>> 2.25.1
> 
> .
> 

--
Regards,
  Zhen Lei

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 10:28 [PATCH v2] rcu: Dump memory object info if callback function is invalid Zhen Lei
  2022-11-11 11:54 ` Zhang, Qiang1
@ 2022-11-11 15:50 ` Elliott, Robert (Servers)
  2022-11-14  7:05   ` Leizhen (ThunderTown)
  1 sibling, 1 reply; 18+ messages in thread
From: Elliott, Robert (Servers) @ 2022-11-11 15:50 UTC (permalink / raw)
  To: Zhen Lei, Paul E . McKenney, Frederic Weisbecker,
	Neeraj Upadhyay, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes, rcu,
	linux-kernel



> +static inline void debug_rcu_head_callback(struct rcu_head *rhp)
> +{
> +	if (unlikely(!rhp->func))
> +		mem_dump_obj(rhp);
> +}
> +

The mm/util.c definition of mem_dump_object() says:
 * This function uses pr_cont(), so that the caller is expected to have
 * printed out whatever preamble is appropriate.

so this needs to call pr_alert() or pr_err() before that to explain what
is being printed (with no \n), like these:

kernel/rcu/rcutorture.c: pr_alert("mem_dump_obj(%px):", &rhp);
kernel/rcu/rcutorture.c: mem_dump_obj(&rhp);
...
kernel/rcu/tree.c: pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);
kernel/rcu/tree.c: mem_dump_obj(head);



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 13:05     ` Zhang, Qiang1
@ 2022-11-11 18:42       ` Paul E. McKenney
  2022-11-12  2:32         ` Leizhen (ThunderTown)
  2022-11-14 18:22         ` Uladzislau Rezki
  0 siblings, 2 replies; 18+ messages in thread
From: Paul E. McKenney @ 2022-11-11 18:42 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Leizhen (ThunderTown),
	Frederic Weisbecker, Neeraj Upadhyay, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes,
	rcu, linux-kernel

On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> >> When a structure containing an RCU callback rhp is (incorrectly) 
> >> freed and reallocated after rhp is passed to call_rcu(), it is not 
> >> unusual for
> >> rhp->func to be set to NULL. This defeats the debugging prints used 
> >> rhp->by
> >> __call_rcu_common() in kernels built with 
> >> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> >> offending code using the identity of this function.
> >>
> >> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> >> are even worse, as can be seen from this splat:
> >>
> >> Unable to handle kernel NULL pointer dereference at virtual address 0 
> >> ... ...
> >> PC is at 0x0
> >> LR is at rcu_do_batch+0x1c0/0x3b8
> >> ... ...
> >> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> >> (rcu_core) from (__do_softirq+0x24c/0x344)
> >> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> >> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> >> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> >> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> >> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> >> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> >> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> >> (default_idle_call) from (do_idle+0xf8/0x150)
> >> (do_idle) from (cpu_startup_entry+0x18/0x20)
> >> (cpu_startup_entry) from (0xc01530)
> >>
> >> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> >> information, for example:
> >>
> >>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> >>
> >> This provides the rough size of the memory block and the offset of 
> >> the rcu_head structure, which as least provides at least a few clues 
> >> to help locate the problem. If the problem is reproducible, 
> >> additional slab debugging can be enabled, for example, 
> >> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> >>
> >> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> >> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >> ---
> >> kernel/rcu/rcu.h      | 7 +++++++
> >> kernel/rcu/srcutiny.c | 1 +
> >> kernel/rcu/srcutree.c | 1 +
> >> kernel/rcu/tasks.h    | 1 +
> >> kernel/rcu/tiny.c     | 1 +
> >> kernel/rcu/tree.c     | 1 +
> >> 6 files changed, 12 insertions(+)
> >>
> >> v1 --> v2:
> >> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> >> 2. Paul E. McKenney helped me update the commit message, thanks.
> >>
> > 
> > Hi, Zhen Lei
> > 
> > Maybe the following scenarios should be considered:
> > 
> >                 CPU 0
> > tasks context
> >    spin_lock(&vmap_area_lock)
> >           Interrupt 
> > 	 RCU softirq
> > 	      rcu_do_batch
> > 		mem_dump_obj
> >                                   vmalloc_dump_obj
> >                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> 
> >Right, thanks. I just saw the robot's report. So this patch should be dropped.
> >I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> 
> This is a workaround, or maybe try a modification like the following, 
> of course, need to ask Paul's opinion.

Another approach is to schedule a workqueue handler to do the
mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
environment.

This would allow vmalloc_dump_obj() to be called unconditionally.

Other thoughts?

							Thanx, Paul

> diff --git a/mm/util.c b/mm/util.c
> index 12984e76767e..86da0739fe5d 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
>  {
>         const char *type;
> 
> +       if (is_vmalloc_addr(object)) {
> +               if (in_task() && vmalloc_dump_obj(object))
> +                       return;
> +               type = "vmalloc memory";
> +               goto end;
> +       }
> +
>         if (kmem_valid_obj(object)) {
>                 kmem_dump_obj(object);
>                 return;
>         }
> 
> -       if (vmalloc_dump_obj(object))
> -               return;
> -
>         if (virt_addr_valid(object))
>                 type = "non-slab/vmalloc memory";
>         else if (object == NULL)
> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
>                 type = "zero-size pointer";
>         else
>                 type = "non-paged memory";
> -
> +end:
>         pr_cont(" %s\n", type);
>  }
>  EXPORT_SYMBOL_GPL(mem_dump_obj);
> 
> Thanks
> Zqiang
> 
> 
> > 
> > Thanks
> > Zqiang
> > 
> > 
> >> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
> >> 65704cbc9df7b3d..32ab45fabf8eebf 100644
> >> --- a/kernel/rcu/rcu.h
> >> +++ b/kernel/rcu/rcu.h
> >> @@ -10,6 +10,7 @@
> >> #ifndef __LINUX_RCU_H
> >> #define __LINUX_RCU_H
> >>
> >> +#include <linux/mm.h>
> >> #include <trace/events/rcu.h>
> >>
> >> /*
> >> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
> >> rcu_head *head) }
> >> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> >>
> >> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
> >> +	if (unlikely(!rhp->func))
> >> +		mem_dump_obj(rhp);
> >> +}
> >> +
> >> extern int rcu_cpu_stall_suppress_at_boot;
> >>
> >> static inline bool rcu_stall_is_suppressed_at_boot(void)
> >> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
> >> 33adafdad261389..5e7f336baa06ae0 100644
> >> --- a/kernel/rcu/srcutiny.c
> >> +++ b/kernel/rcu/srcutiny.c
> >> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> >> 	while (lh) {
> >> 		rhp = lh;
> >> 		lh = lh->next;
> >> +		debug_rcu_head_callback(rhp);
> >> 		local_bh_disable();
> >> 		rhp->func(rhp);
> >> 		local_bh_enable();
> >> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
> >> ca4b5dcec675bac..294972e66b31863 100644
> >> --- a/kernel/rcu/srcutree.c
> >> +++ b/kernel/rcu/srcutree.c
> >> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> >> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> >> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> >> 		debug_rcu_head_unqueue(rhp);
> >> +		debug_rcu_head_callback(rhp);
> >> 		local_bh_disable();
> >> 		rhp->func(rhp);
> >> 		local_bh_enable();
> >> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
> >> b0b885e071fa8dc..b7f8c67c586cdc4 100644
> >> --- a/kernel/rcu/tasks.h
> >> +++ b/kernel/rcu/tasks.h
> >> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> >> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> >> 	len = rcl.len;
> >> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
> >> rcu_cblist_dequeue(&rcl)) {
> >> +		debug_rcu_head_callback(rhp);
> >> 		local_bh_disable();
> >> 		rhp->func(rhp);
> >> 		local_bh_enable();
> >> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
> >> bb8f7d270f01747..56e9a5d91d97ec5 100644
> >> --- a/kernel/rcu/tiny.c
> >> +++ b/kernel/rcu/tiny.c
> >> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
> >> *head)
> >>
> >> 	trace_rcu_invoke_callback("", head);
> >> 	f = head->func;
> >> +	debug_rcu_head_callback(head);
> >> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> >> 	f(head);
> >> 	rcu_lock_release(&rcu_callback_map);
> >> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
> >> 15aaff3203bf2d0..ed93ddb8203d42c 100644
> >> --- a/kernel/rcu/tree.c
> >> +++ b/kernel/rcu/tree.c
> >> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> >>
> >> 		f = rhp->func;
> >> +		debug_rcu_head_callback(rhp);
> >> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> >> 		f(rhp);
> >>
> >> --
> >> 2.25.1
> > 
> > .
> > 
> 
> --
> Regards,
>   Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 18:42       ` Paul E. McKenney
@ 2022-11-12  2:32         ` Leizhen (ThunderTown)
  2022-11-12  2:39           ` Leizhen (ThunderTown)
  2022-11-12  6:08           ` Paul E. McKenney
  2022-11-14 18:22         ` Uladzislau Rezki
  1 sibling, 2 replies; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-12  2:32 UTC (permalink / raw)
  To: paulmck, Zhang, Qiang1
  Cc: Frederic Weisbecker, Neeraj Upadhyay, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes,
	rcu, linux-kernel



On 2022/11/12 2:42, Paul E. McKenney wrote:
> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>>>> When a structure containing an RCU callback rhp is (incorrectly) 
>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
>>>> unusual for
>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
>>>> rhp->by
>>>> __call_rcu_common() in kernels built with 
>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
>>>> offending code using the identity of this function.
>>>>
>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
>>>> are even worse, as can be seen from this splat:
>>>>
>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
>>>> ... ...
>>>> PC is at 0x0
>>>> LR is at rcu_do_batch+0x1c0/0x3b8
>>>> ... ...
>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>>>> (default_idle_call) from (do_idle+0xf8/0x150)
>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>>>> (cpu_startup_entry) from (0xc01530)
>>>>
>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
>>>> information, for example:
>>>>
>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>>>
>>>> This provides the rough size of the memory block and the offset of 
>>>> the rcu_head structure, which as least provides at least a few clues 
>>>> to help locate the problem. If the problem is reproducible, 
>>>> additional slab debugging can be enabled, for example, 
>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
>>>>
>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>>>> ---
>>>> kernel/rcu/rcu.h      | 7 +++++++
>>>> kernel/rcu/srcutiny.c | 1 +
>>>> kernel/rcu/srcutree.c | 1 +
>>>> kernel/rcu/tasks.h    | 1 +
>>>> kernel/rcu/tiny.c     | 1 +
>>>> kernel/rcu/tree.c     | 1 +
>>>> 6 files changed, 12 insertions(+)
>>>>
>>>> v1 --> v2:
>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>>>
>>>
>>> Hi, Zhen Lei
>>>
>>> Maybe the following scenarios should be considered:
>>>
>>>                 CPU 0
>>> tasks context
>>>    spin_lock(&vmap_area_lock)
>>>           Interrupt 
>>> 	 RCU softirq
>>> 	      rcu_do_batch
>>> 		mem_dump_obj
>>>                                   vmalloc_dump_obj
>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
>>
>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
>>
>> This is a workaround, or maybe try a modification like the following, 
>> of course, need to ask Paul's opinion.
> 
> Another approach is to schedule a workqueue handler to do the
> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> environment.

It's about to panic, so no chance to schedule.

> 
> This would allow vmalloc_dump_obj() to be called unconditionally.
> 
> Other thoughts?

locked = spin_is_locked(&vmap_area_lock);
if (!locked)
    spin_lock(&vmap_area_lock)

Careful analysis is required, which may cause other problems.

Or in new function:
if (locked)
    return;
spin_lock(&vmap_area_lock);

If there is a chance to dump the data, dump the data. If there is no
chance to dump the data, do not dump the data. This is the fate of
debugging information.

> 
> 							Thanx, Paul
> 
>> diff --git a/mm/util.c b/mm/util.c
>> index 12984e76767e..86da0739fe5d 100644
>> --- a/mm/util.c
>> +++ b/mm/util.c
>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
>>  {
>>         const char *type;
>>
>> +       if (is_vmalloc_addr(object)) {
>> +               if (in_task() && vmalloc_dump_obj(object))
>> +                       return;
>> +               type = "vmalloc memory";
>> +               goto end;
>> +       }
>> +
>>         if (kmem_valid_obj(object)) {
>>                 kmem_dump_obj(object);
>>                 return;
>>         }
>>
>> -       if (vmalloc_dump_obj(object))
>> -               return;
>> -
>>         if (virt_addr_valid(object))
>>                 type = "non-slab/vmalloc memory";
>>         else if (object == NULL)
>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
>>                 type = "zero-size pointer";
>>         else
>>                 type = "non-paged memory";
>> -
>> +end:
>>         pr_cont(" %s\n", type);
>>  }
>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
>>
>> Thanks
>> Zqiang
>>
>>
>>>
>>> Thanks
>>> Zqiang
>>>
>>>
>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
>>>> --- a/kernel/rcu/rcu.h
>>>> +++ b/kernel/rcu/rcu.h
>>>> @@ -10,6 +10,7 @@
>>>> #ifndef __LINUX_RCU_H
>>>> #define __LINUX_RCU_H
>>>>
>>>> +#include <linux/mm.h>
>>>> #include <trace/events/rcu.h>
>>>>
>>>> /*
>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
>>>> rcu_head *head) }
>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
>>>>
>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
>>>> +	if (unlikely(!rhp->func))
>>>> +		mem_dump_obj(rhp);
>>>> +}
>>>> +
>>>> extern int rcu_cpu_stall_suppress_at_boot;
>>>>
>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
>>>> 33adafdad261389..5e7f336baa06ae0 100644
>>>> --- a/kernel/rcu/srcutiny.c
>>>> +++ b/kernel/rcu/srcutiny.c
>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
>>>> 	while (lh) {
>>>> 		rhp = lh;
>>>> 		lh = lh->next;
>>>> +		debug_rcu_head_callback(rhp);
>>>> 		local_bh_disable();
>>>> 		rhp->func(rhp);
>>>> 		local_bh_enable();
>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
>>>> ca4b5dcec675bac..294972e66b31863 100644
>>>> --- a/kernel/rcu/srcutree.c
>>>> +++ b/kernel/rcu/srcutree.c
>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
>>>> 		debug_rcu_head_unqueue(rhp);
>>>> +		debug_rcu_head_callback(rhp);
>>>> 		local_bh_disable();
>>>> 		rhp->func(rhp);
>>>> 		local_bh_enable();
>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
>>>> --- a/kernel/rcu/tasks.h
>>>> +++ b/kernel/rcu/tasks.h
>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
>>>> 	len = rcl.len;
>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
>>>> rcu_cblist_dequeue(&rcl)) {
>>>> +		debug_rcu_head_callback(rhp);
>>>> 		local_bh_disable();
>>>> 		rhp->func(rhp);
>>>> 		local_bh_enable();
>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
>>>> --- a/kernel/rcu/tiny.c
>>>> +++ b/kernel/rcu/tiny.c
>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
>>>> *head)
>>>>
>>>> 	trace_rcu_invoke_callback("", head);
>>>> 	f = head->func;
>>>> +	debug_rcu_head_callback(head);
>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
>>>> 	f(head);
>>>> 	rcu_lock_release(&rcu_callback_map);
>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
>>>> --- a/kernel/rcu/tree.c
>>>> +++ b/kernel/rcu/tree.c
>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
>>>>
>>>> 		f = rhp->func;
>>>> +		debug_rcu_head_callback(rhp);
>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
>>>> 		f(rhp);
>>>>
>>>> --
>>>> 2.25.1
>>>
>>> .
>>>
>>
>> --
>> Regards,
>>   Zhen Lei
> .
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-12  2:32         ` Leizhen (ThunderTown)
@ 2022-11-12  2:39           ` Leizhen (ThunderTown)
  2022-11-12  6:09             ` Paul E. McKenney
  2022-11-12  6:08           ` Paul E. McKenney
  1 sibling, 1 reply; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-12  2:39 UTC (permalink / raw)
  To: paulmck, Zhang, Qiang1
  Cc: Frederic Weisbecker, Neeraj Upadhyay, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes,
	rcu, linux-kernel



On 2022/11/12 10:32, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/11/12 2:42, Paul E. McKenney wrote:
>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
>>>>> unusual for
>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
>>>>> rhp->by
>>>>> __call_rcu_common() in kernels built with 
>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
>>>>> offending code using the identity of this function.
>>>>>
>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
>>>>> are even worse, as can be seen from this splat:
>>>>>
>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
>>>>> ... ...
>>>>> PC is at 0x0
>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
>>>>> ... ...
>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>>>>> (cpu_startup_entry) from (0xc01530)
>>>>>
>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
>>>>> information, for example:
>>>>>
>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>>>>
>>>>> This provides the rough size of the memory block and the offset of 
>>>>> the rcu_head structure, which as least provides at least a few clues 
>>>>> to help locate the problem. If the problem is reproducible, 
>>>>> additional slab debugging can be enabled, for example, 
>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
>>>>>
>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>>>>> ---
>>>>> kernel/rcu/rcu.h      | 7 +++++++
>>>>> kernel/rcu/srcutiny.c | 1 +
>>>>> kernel/rcu/srcutree.c | 1 +
>>>>> kernel/rcu/tasks.h    | 1 +
>>>>> kernel/rcu/tiny.c     | 1 +
>>>>> kernel/rcu/tree.c     | 1 +
>>>>> 6 files changed, 12 insertions(+)
>>>>>
>>>>> v1 --> v2:
>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>>>>
>>>>
>>>> Hi, Zhen Lei
>>>>
>>>> Maybe the following scenarios should be considered:
>>>>
>>>>                 CPU 0
>>>> tasks context
>>>>    spin_lock(&vmap_area_lock)
>>>>           Interrupt 
>>>> 	 RCU softirq
>>>> 	      rcu_do_batch
>>>> 		mem_dump_obj
>>>>                                   vmalloc_dump_obj
>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
>>>
>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
>>>
>>> This is a workaround, or maybe try a modification like the following, 
>>> of course, need to ask Paul's opinion.
>>
>> Another approach is to schedule a workqueue handler to do the
>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
>> environment.
> 
> It's about to panic, so no chance to schedule.
> 
>>
>> This would allow vmalloc_dump_obj() to be called unconditionally.
>>
>> Other thoughts?
> 
> locked = spin_is_locked(&vmap_area_lock);
> if (!locked)
>     spin_lock(&vmap_area_lock)
> 
> Careful analysis is required, which may cause other problems.
> 
> Or in new function:

Oh, perhaps no new function is needed, mem_dump_obj() itself prints
debugging information. I will try a mm patch first.

> if (locked)
>     return;
> spin_lock(&vmap_area_lock);
> 
> If there is a chance to dump the data, dump the data. If there is no
> chance to dump the data, do not dump the data. This is the fate of
> debugging information.
> 
>>
>> 							Thanx, Paul
>>
>>> diff --git a/mm/util.c b/mm/util.c
>>> index 12984e76767e..86da0739fe5d 100644
>>> --- a/mm/util.c
>>> +++ b/mm/util.c
>>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
>>>  {
>>>         const char *type;
>>>
>>> +       if (is_vmalloc_addr(object)) {
>>> +               if (in_task() && vmalloc_dump_obj(object))
>>> +                       return;
>>> +               type = "vmalloc memory";
>>> +               goto end;
>>> +       }
>>> +
>>>         if (kmem_valid_obj(object)) {
>>>                 kmem_dump_obj(object);
>>>                 return;
>>>         }
>>>
>>> -       if (vmalloc_dump_obj(object))
>>> -               return;
>>> -
>>>         if (virt_addr_valid(object))
>>>                 type = "non-slab/vmalloc memory";
>>>         else if (object == NULL)
>>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
>>>                 type = "zero-size pointer";
>>>         else
>>>                 type = "non-paged memory";
>>> -
>>> +end:
>>>         pr_cont(" %s\n", type);
>>>  }
>>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
>>>
>>> Thanks
>>> Zqiang
>>>
>>>
>>>>
>>>> Thanks
>>>> Zqiang
>>>>
>>>>
>>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
>>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
>>>>> --- a/kernel/rcu/rcu.h
>>>>> +++ b/kernel/rcu/rcu.h
>>>>> @@ -10,6 +10,7 @@
>>>>> #ifndef __LINUX_RCU_H
>>>>> #define __LINUX_RCU_H
>>>>>
>>>>> +#include <linux/mm.h>
>>>>> #include <trace/events/rcu.h>
>>>>>
>>>>> /*
>>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
>>>>> rcu_head *head) }
>>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
>>>>>
>>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
>>>>> +	if (unlikely(!rhp->func))
>>>>> +		mem_dump_obj(rhp);
>>>>> +}
>>>>> +
>>>>> extern int rcu_cpu_stall_suppress_at_boot;
>>>>>
>>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
>>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
>>>>> 33adafdad261389..5e7f336baa06ae0 100644
>>>>> --- a/kernel/rcu/srcutiny.c
>>>>> +++ b/kernel/rcu/srcutiny.c
>>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
>>>>> 	while (lh) {
>>>>> 		rhp = lh;
>>>>> 		lh = lh->next;
>>>>> +		debug_rcu_head_callback(rhp);
>>>>> 		local_bh_disable();
>>>>> 		rhp->func(rhp);
>>>>> 		local_bh_enable();
>>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
>>>>> ca4b5dcec675bac..294972e66b31863 100644
>>>>> --- a/kernel/rcu/srcutree.c
>>>>> +++ b/kernel/rcu/srcutree.c
>>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
>>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
>>>>> 		debug_rcu_head_unqueue(rhp);
>>>>> +		debug_rcu_head_callback(rhp);
>>>>> 		local_bh_disable();
>>>>> 		rhp->func(rhp);
>>>>> 		local_bh_enable();
>>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
>>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
>>>>> --- a/kernel/rcu/tasks.h
>>>>> +++ b/kernel/rcu/tasks.h
>>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
>>>>> 	len = rcl.len;
>>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
>>>>> rcu_cblist_dequeue(&rcl)) {
>>>>> +		debug_rcu_head_callback(rhp);
>>>>> 		local_bh_disable();
>>>>> 		rhp->func(rhp);
>>>>> 		local_bh_enable();
>>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
>>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
>>>>> --- a/kernel/rcu/tiny.c
>>>>> +++ b/kernel/rcu/tiny.c
>>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
>>>>> *head)
>>>>>
>>>>> 	trace_rcu_invoke_callback("", head);
>>>>> 	f = head->func;
>>>>> +	debug_rcu_head_callback(head);
>>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
>>>>> 	f(head);
>>>>> 	rcu_lock_release(&rcu_callback_map);
>>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
>>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
>>>>> --- a/kernel/rcu/tree.c
>>>>> +++ b/kernel/rcu/tree.c
>>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
>>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
>>>>>
>>>>> 		f = rhp->func;
>>>>> +		debug_rcu_head_callback(rhp);
>>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
>>>>> 		f(rhp);
>>>>>
>>>>> --
>>>>> 2.25.1
>>>>
>>>> .
>>>>
>>>
>>> --
>>> Regards,
>>>   Zhen Lei
>> .
>>
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-12  2:32         ` Leizhen (ThunderTown)
  2022-11-12  2:39           ` Leizhen (ThunderTown)
@ 2022-11-12  6:08           ` Paul E. McKenney
  2022-11-14  7:18             ` Leizhen (ThunderTown)
  1 sibling, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2022-11-12  6:08 UTC (permalink / raw)
  To: Leizhen (ThunderTown)
  Cc: Zhang, Qiang1, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel

On Sat, Nov 12, 2022 at 10:32:32AM +0800, Leizhen (ThunderTown) wrote:
> On 2022/11/12 2:42, Paul E. McKenney wrote:
> > On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> >> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> >>>> When a structure containing an RCU callback rhp is (incorrectly) 
> >>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
> >>>> unusual for
> >>>> rhp->func to be set to NULL. This defeats the debugging prints used 
> >>>> rhp->by
> >>>> __call_rcu_common() in kernels built with 
> >>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> >>>> offending code using the identity of this function.
> >>>>
> >>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> >>>> are even worse, as can be seen from this splat:
> >>>>
> >>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
> >>>> ... ...
> >>>> PC is at 0x0
> >>>> LR is at rcu_do_batch+0x1c0/0x3b8
> >>>> ... ...
> >>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> >>>> (rcu_core) from (__do_softirq+0x24c/0x344)
> >>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> >>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> >>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> >>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> >>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> >>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> >>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> >>>> (default_idle_call) from (do_idle+0xf8/0x150)
> >>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
> >>>> (cpu_startup_entry) from (0xc01530)
> >>>>
> >>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> >>>> information, for example:
> >>>>
> >>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> >>>>
> >>>> This provides the rough size of the memory block and the offset of 
> >>>> the rcu_head structure, which as least provides at least a few clues 
> >>>> to help locate the problem. If the problem is reproducible, 
> >>>> additional slab debugging can be enabled, for example, 
> >>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> >>>>
> >>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> >>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >>>> ---
> >>>> kernel/rcu/rcu.h      | 7 +++++++
> >>>> kernel/rcu/srcutiny.c | 1 +
> >>>> kernel/rcu/srcutree.c | 1 +
> >>>> kernel/rcu/tasks.h    | 1 +
> >>>> kernel/rcu/tiny.c     | 1 +
> >>>> kernel/rcu/tree.c     | 1 +
> >>>> 6 files changed, 12 insertions(+)
> >>>>
> >>>> v1 --> v2:
> >>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> >>>> 2. Paul E. McKenney helped me update the commit message, thanks.
> >>>>
> >>>
> >>> Hi, Zhen Lei
> >>>
> >>> Maybe the following scenarios should be considered:
> >>>
> >>>                 CPU 0
> >>> tasks context
> >>>    spin_lock(&vmap_area_lock)
> >>>           Interrupt 
> >>> 	 RCU softirq
> >>> 	      rcu_do_batch
> >>> 		mem_dump_obj
> >>>                                   vmalloc_dump_obj
> >>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> >>
> >>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
> >>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> >>
> >> This is a workaround, or maybe try a modification like the following, 
> >> of course, need to ask Paul's opinion.
> > 
> > Another approach is to schedule a workqueue handler to do the
> > mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> > environment.
> 
> It's about to panic, so no chance to schedule.

It won't panic if you drop the callback on the floor.

Though to your point, the ->next pointer is likely also trashed.  So you
could just drop the remainder of the callback list on the floor.  That
might provide a good (though not perfect) chance of getting decent output.

> > This would allow vmalloc_dump_obj() to be called unconditionally.
> > 
> > Other thoughts?
> 
> locked = spin_is_locked(&vmap_area_lock);
> if (!locked)
>     spin_lock(&vmap_area_lock)
> 
> Careful analysis is required, which may cause other problems.
> 
> Or in new function:
> if (locked)
>     return;
> spin_lock(&vmap_area_lock);
> 
> If there is a chance to dump the data, dump the data. If there is no
> chance to dump the data, do not dump the data. This is the fate of
> debugging information.

My concern is that there will be increasing numbers of special cases
over time.

							Thanx, Paul

> >> diff --git a/mm/util.c b/mm/util.c
> >> index 12984e76767e..86da0739fe5d 100644
> >> --- a/mm/util.c
> >> +++ b/mm/util.c
> >> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
> >>  {
> >>         const char *type;
> >>
> >> +       if (is_vmalloc_addr(object)) {
> >> +               if (in_task() && vmalloc_dump_obj(object))
> >> +                       return;
> >> +               type = "vmalloc memory";
> >> +               goto end;
> >> +       }
> >> +
> >>         if (kmem_valid_obj(object)) {
> >>                 kmem_dump_obj(object);
> >>                 return;
> >>         }
> >>
> >> -       if (vmalloc_dump_obj(object))
> >> -               return;
> >> -
> >>         if (virt_addr_valid(object))
> >>                 type = "non-slab/vmalloc memory";
> >>         else if (object == NULL)
> >> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
> >>                 type = "zero-size pointer";
> >>         else
> >>                 type = "non-paged memory";
> >> -
> >> +end:
> >>         pr_cont(" %s\n", type);
> >>  }
> >>  EXPORT_SYMBOL_GPL(mem_dump_obj);
> >>
> >> Thanks
> >> Zqiang
> >>
> >>
> >>>
> >>> Thanks
> >>> Zqiang
> >>>
> >>>
> >>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
> >>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
> >>>> --- a/kernel/rcu/rcu.h
> >>>> +++ b/kernel/rcu/rcu.h
> >>>> @@ -10,6 +10,7 @@
> >>>> #ifndef __LINUX_RCU_H
> >>>> #define __LINUX_RCU_H
> >>>>
> >>>> +#include <linux/mm.h>
> >>>> #include <trace/events/rcu.h>
> >>>>
> >>>> /*
> >>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
> >>>> rcu_head *head) }
> >>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> >>>>
> >>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
> >>>> +	if (unlikely(!rhp->func))
> >>>> +		mem_dump_obj(rhp);
> >>>> +}
> >>>> +
> >>>> extern int rcu_cpu_stall_suppress_at_boot;
> >>>>
> >>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
> >>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
> >>>> 33adafdad261389..5e7f336baa06ae0 100644
> >>>> --- a/kernel/rcu/srcutiny.c
> >>>> +++ b/kernel/rcu/srcutiny.c
> >>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> >>>> 	while (lh) {
> >>>> 		rhp = lh;
> >>>> 		lh = lh->next;
> >>>> +		debug_rcu_head_callback(rhp);
> >>>> 		local_bh_disable();
> >>>> 		rhp->func(rhp);
> >>>> 		local_bh_enable();
> >>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
> >>>> ca4b5dcec675bac..294972e66b31863 100644
> >>>> --- a/kernel/rcu/srcutree.c
> >>>> +++ b/kernel/rcu/srcutree.c
> >>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> >>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> >>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> >>>> 		debug_rcu_head_unqueue(rhp);
> >>>> +		debug_rcu_head_callback(rhp);
> >>>> 		local_bh_disable();
> >>>> 		rhp->func(rhp);
> >>>> 		local_bh_enable();
> >>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
> >>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
> >>>> --- a/kernel/rcu/tasks.h
> >>>> +++ b/kernel/rcu/tasks.h
> >>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> >>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> >>>> 	len = rcl.len;
> >>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
> >>>> rcu_cblist_dequeue(&rcl)) {
> >>>> +		debug_rcu_head_callback(rhp);
> >>>> 		local_bh_disable();
> >>>> 		rhp->func(rhp);
> >>>> 		local_bh_enable();
> >>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
> >>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
> >>>> --- a/kernel/rcu/tiny.c
> >>>> +++ b/kernel/rcu/tiny.c
> >>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
> >>>> *head)
> >>>>
> >>>> 	trace_rcu_invoke_callback("", head);
> >>>> 	f = head->func;
> >>>> +	debug_rcu_head_callback(head);
> >>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> >>>> 	f(head);
> >>>> 	rcu_lock_release(&rcu_callback_map);
> >>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
> >>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
> >>>> --- a/kernel/rcu/tree.c
> >>>> +++ b/kernel/rcu/tree.c
> >>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> >>>>
> >>>> 		f = rhp->func;
> >>>> +		debug_rcu_head_callback(rhp);
> >>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> >>>> 		f(rhp);
> >>>>
> >>>> --
> >>>> 2.25.1
> >>>
> >>> .
> >>>
> >>
> >> --
> >> Regards,
> >>   Zhen Lei
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-12  2:39           ` Leizhen (ThunderTown)
@ 2022-11-12  6:09             ` Paul E. McKenney
  0 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2022-11-12  6:09 UTC (permalink / raw)
  To: Leizhen (ThunderTown)
  Cc: Zhang, Qiang1, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel

On Sat, Nov 12, 2022 at 10:39:55AM +0800, Leizhen (ThunderTown) wrote:
> On 2022/11/12 10:32, Leizhen (ThunderTown) wrote:
> > On 2022/11/12 2:42, Paul E. McKenney wrote:
> >> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> >>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> >>>>> When a structure containing an RCU callback rhp is (incorrectly) 
> >>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
> >>>>> unusual for
> >>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
> >>>>> rhp->by
> >>>>> __call_rcu_common() in kernels built with 
> >>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> >>>>> offending code using the identity of this function.
> >>>>>
> >>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> >>>>> are even worse, as can be seen from this splat:
> >>>>>
> >>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
> >>>>> ... ...
> >>>>> PC is at 0x0
> >>>>> LR is at rcu_do_batch+0x1c0/0x3b8
> >>>>> ... ...
> >>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> >>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
> >>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> >>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> >>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> >>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> >>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> >>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> >>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> >>>>> (default_idle_call) from (do_idle+0xf8/0x150)
> >>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
> >>>>> (cpu_startup_entry) from (0xc01530)
> >>>>>
> >>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> >>>>> information, for example:
> >>>>>
> >>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> >>>>>
> >>>>> This provides the rough size of the memory block and the offset of 
> >>>>> the rcu_head structure, which as least provides at least a few clues 
> >>>>> to help locate the problem. If the problem is reproducible, 
> >>>>> additional slab debugging can be enabled, for example, 
> >>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> >>>>>
> >>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> >>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >>>>> ---
> >>>>> kernel/rcu/rcu.h      | 7 +++++++
> >>>>> kernel/rcu/srcutiny.c | 1 +
> >>>>> kernel/rcu/srcutree.c | 1 +
> >>>>> kernel/rcu/tasks.h    | 1 +
> >>>>> kernel/rcu/tiny.c     | 1 +
> >>>>> kernel/rcu/tree.c     | 1 +
> >>>>> 6 files changed, 12 insertions(+)
> >>>>>
> >>>>> v1 --> v2:
> >>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> >>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
> >>>>>
> >>>>
> >>>> Hi, Zhen Lei
> >>>>
> >>>> Maybe the following scenarios should be considered:
> >>>>
> >>>>                 CPU 0
> >>>> tasks context
> >>>>    spin_lock(&vmap_area_lock)
> >>>>           Interrupt 
> >>>> 	 RCU softirq
> >>>> 	      rcu_do_batch
> >>>> 		mem_dump_obj
> >>>>                                   vmalloc_dump_obj
> >>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> >>>
> >>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
> >>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> >>>
> >>> This is a workaround, or maybe try a modification like the following, 
> >>> of course, need to ask Paul's opinion.
> >>
> >> Another approach is to schedule a workqueue handler to do the
> >> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> >> environment.
> > 
> > It's about to panic, so no chance to schedule.
> > 
> >>
> >> This would allow vmalloc_dump_obj() to be called unconditionally.
> >>
> >> Other thoughts?
> > 
> > locked = spin_is_locked(&vmap_area_lock);
> > if (!locked)
> >     spin_lock(&vmap_area_lock)
> > 
> > Careful analysis is required, which may cause other problems.
> > 
> > Or in new function:
> 
> Oh, perhaps no new function is needed, mem_dump_obj() itself prints
> debugging information. I will try a mm patch first.

That does sound very worth trying!

							Thanx, Paul

> > if (locked)
> >     return;
> > spin_lock(&vmap_area_lock);
> > 
> > If there is a chance to dump the data, dump the data. If there is no
> > chance to dump the data, do not dump the data. This is the fate of
> > debugging information.
> > 
> >>
> >> 							Thanx, Paul
> >>
> >>> diff --git a/mm/util.c b/mm/util.c
> >>> index 12984e76767e..86da0739fe5d 100644
> >>> --- a/mm/util.c
> >>> +++ b/mm/util.c
> >>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
> >>>  {
> >>>         const char *type;
> >>>
> >>> +       if (is_vmalloc_addr(object)) {
> >>> +               if (in_task() && vmalloc_dump_obj(object))
> >>> +                       return;
> >>> +               type = "vmalloc memory";
> >>> +               goto end;
> >>> +       }
> >>> +
> >>>         if (kmem_valid_obj(object)) {
> >>>                 kmem_dump_obj(object);
> >>>                 return;
> >>>         }
> >>>
> >>> -       if (vmalloc_dump_obj(object))
> >>> -               return;
> >>> -
> >>>         if (virt_addr_valid(object))
> >>>                 type = "non-slab/vmalloc memory";
> >>>         else if (object == NULL)
> >>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
> >>>                 type = "zero-size pointer";
> >>>         else
> >>>                 type = "non-paged memory";
> >>> -
> >>> +end:
> >>>         pr_cont(" %s\n", type);
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
> >>>
> >>> Thanks
> >>> Zqiang
> >>>
> >>>
> >>>>
> >>>> Thanks
> >>>> Zqiang
> >>>>
> >>>>
> >>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
> >>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
> >>>>> --- a/kernel/rcu/rcu.h
> >>>>> +++ b/kernel/rcu/rcu.h
> >>>>> @@ -10,6 +10,7 @@
> >>>>> #ifndef __LINUX_RCU_H
> >>>>> #define __LINUX_RCU_H
> >>>>>
> >>>>> +#include <linux/mm.h>
> >>>>> #include <trace/events/rcu.h>
> >>>>>
> >>>>> /*
> >>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
> >>>>> rcu_head *head) }
> >>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> >>>>>
> >>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
> >>>>> +	if (unlikely(!rhp->func))
> >>>>> +		mem_dump_obj(rhp);
> >>>>> +}
> >>>>> +
> >>>>> extern int rcu_cpu_stall_suppress_at_boot;
> >>>>>
> >>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
> >>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
> >>>>> 33adafdad261389..5e7f336baa06ae0 100644
> >>>>> --- a/kernel/rcu/srcutiny.c
> >>>>> +++ b/kernel/rcu/srcutiny.c
> >>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> >>>>> 	while (lh) {
> >>>>> 		rhp = lh;
> >>>>> 		lh = lh->next;
> >>>>> +		debug_rcu_head_callback(rhp);
> >>>>> 		local_bh_disable();
> >>>>> 		rhp->func(rhp);
> >>>>> 		local_bh_enable();
> >>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
> >>>>> ca4b5dcec675bac..294972e66b31863 100644
> >>>>> --- a/kernel/rcu/srcutree.c
> >>>>> +++ b/kernel/rcu/srcutree.c
> >>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> >>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> >>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> >>>>> 		debug_rcu_head_unqueue(rhp);
> >>>>> +		debug_rcu_head_callback(rhp);
> >>>>> 		local_bh_disable();
> >>>>> 		rhp->func(rhp);
> >>>>> 		local_bh_enable();
> >>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
> >>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
> >>>>> --- a/kernel/rcu/tasks.h
> >>>>> +++ b/kernel/rcu/tasks.h
> >>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> >>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> >>>>> 	len = rcl.len;
> >>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
> >>>>> rcu_cblist_dequeue(&rcl)) {
> >>>>> +		debug_rcu_head_callback(rhp);
> >>>>> 		local_bh_disable();
> >>>>> 		rhp->func(rhp);
> >>>>> 		local_bh_enable();
> >>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
> >>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
> >>>>> --- a/kernel/rcu/tiny.c
> >>>>> +++ b/kernel/rcu/tiny.c
> >>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
> >>>>> *head)
> >>>>>
> >>>>> 	trace_rcu_invoke_callback("", head);
> >>>>> 	f = head->func;
> >>>>> +	debug_rcu_head_callback(head);
> >>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> >>>>> 	f(head);
> >>>>> 	rcu_lock_release(&rcu_callback_map);
> >>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
> >>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
> >>>>> --- a/kernel/rcu/tree.c
> >>>>> +++ b/kernel/rcu/tree.c
> >>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> >>>>>
> >>>>> 		f = rhp->func;
> >>>>> +		debug_rcu_head_callback(rhp);
> >>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> >>>>> 		f(rhp);
> >>>>>
> >>>>> --
> >>>>> 2.25.1
> >>>>
> >>>> .
> >>>>
> >>>
> >>> --
> >>> Regards,
> >>>   Zhen Lei
> >> .
> >>
> > 
> 
> -- 
> Regards,
>   Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 15:50 ` Elliott, Robert (Servers)
@ 2022-11-14  7:05   ` Leizhen (ThunderTown)
  0 siblings, 0 replies; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-14  7:05 UTC (permalink / raw)
  To: Elliott, Robert (Servers),
	Paul E . McKenney, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel



On 2022/11/11 23:50, Elliott, Robert (Servers) wrote:
> 
> 
>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp)
>> +{
>> +	if (unlikely(!rhp->func))
>> +		mem_dump_obj(rhp);
>> +}
>> +
> 
> The mm/util.c definition of mem_dump_object() says:
>  * This function uses pr_cont(), so that the caller is expected to have
>  * printed out whatever preamble is appropriate.
> 
> so this needs to call pr_alert() or pr_err() before that to explain what
> is being printed (with no \n), like these:
> 
> kernel/rcu/rcutorture.c: pr_alert("mem_dump_obj(%px):", &rhp);
> kernel/rcu/rcutorture.c: mem_dump_obj(&rhp);
> ...
> kernel/rcu/tree.c: pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);
> kernel/rcu/tree.c: mem_dump_obj(head);

Yes, right, thanks.

> 
> 
> .
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-12  6:08           ` Paul E. McKenney
@ 2022-11-14  7:18             ` Leizhen (ThunderTown)
  2022-11-14 16:06               ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-14  7:18 UTC (permalink / raw)
  To: paulmck
  Cc: Zhang, Qiang1, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel



On 2022/11/12 14:08, Paul E. McKenney wrote:
> On Sat, Nov 12, 2022 at 10:32:32AM +0800, Leizhen (ThunderTown) wrote:
>> On 2022/11/12 2:42, Paul E. McKenney wrote:
>>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
>>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
>>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
>>>>>> unusual for
>>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
>>>>>> rhp->by
>>>>>> __call_rcu_common() in kernels built with 
>>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
>>>>>> offending code using the identity of this function.
>>>>>>
>>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
>>>>>> are even worse, as can be seen from this splat:
>>>>>>
>>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
>>>>>> ... ...
>>>>>> PC is at 0x0
>>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
>>>>>> ... ...
>>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
>>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
>>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>>>>>> (cpu_startup_entry) from (0xc01530)
>>>>>>
>>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
>>>>>> information, for example:
>>>>>>
>>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>>>>>
>>>>>> This provides the rough size of the memory block and the offset of 
>>>>>> the rcu_head structure, which as least provides at least a few clues 
>>>>>> to help locate the problem. If the problem is reproducible, 
>>>>>> additional slab debugging can be enabled, for example, 
>>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
>>>>>>
>>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>>>>>> ---
>>>>>> kernel/rcu/rcu.h      | 7 +++++++
>>>>>> kernel/rcu/srcutiny.c | 1 +
>>>>>> kernel/rcu/srcutree.c | 1 +
>>>>>> kernel/rcu/tasks.h    | 1 +
>>>>>> kernel/rcu/tiny.c     | 1 +
>>>>>> kernel/rcu/tree.c     | 1 +
>>>>>> 6 files changed, 12 insertions(+)
>>>>>>
>>>>>> v1 --> v2:
>>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>>>>>
>>>>>
>>>>> Hi, Zhen Lei
>>>>>
>>>>> Maybe the following scenarios should be considered:
>>>>>
>>>>>                 CPU 0
>>>>> tasks context
>>>>>    spin_lock(&vmap_area_lock)
>>>>>           Interrupt 
>>>>> 	 RCU softirq
>>>>> 	      rcu_do_batch
>>>>> 		mem_dump_obj
>>>>>                                   vmalloc_dump_obj
>>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
>>>>
>>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
>>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
>>>>
>>>> This is a workaround, or maybe try a modification like the following, 
>>>> of course, need to ask Paul's opinion.
>>>
>>> Another approach is to schedule a workqueue handler to do the
>>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
>>> environment.
>>
>> It's about to panic, so no chance to schedule.
> 
> It won't panic if you drop the callback on the floor.
> 
> Though to your point, the ->next pointer is likely also trashed.  So you
> could just drop the remainder of the callback list on the floor.  That
> might provide a good (though not perfect) chance of getting decent output.

OK, I think I understand what you mean.
if (!f)
	schedule_work(&work);
else
	f(rhp)

> 
>>> This would allow vmalloc_dump_obj() to be called unconditionally.
>>>
>>> Other thoughts?
>>
>> locked = spin_is_locked(&vmap_area_lock);
>> if (!locked)
>>     spin_lock(&vmap_area_lock)
>>
>> Careful analysis is required, which may cause other problems.
>>
>> Or in new function:
>> if (locked)
>>     return;
>> spin_lock(&vmap_area_lock);
>>
>> If there is a chance to dump the data, dump the data. If there is no
>> chance to dump the data, do not dump the data. This is the fate of
>> debugging information.
> 
> My concern is that there will be increasing numbers of special cases
> over time.

OK, I got it.

> 
> 							Thanx, Paul
> 
>>>> diff --git a/mm/util.c b/mm/util.c
>>>> index 12984e76767e..86da0739fe5d 100644
>>>> --- a/mm/util.c
>>>> +++ b/mm/util.c
>>>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
>>>>  {
>>>>         const char *type;
>>>>
>>>> +       if (is_vmalloc_addr(object)) {
>>>> +               if (in_task() && vmalloc_dump_obj(object))
>>>> +                       return;
>>>> +               type = "vmalloc memory";
>>>> +               goto end;
>>>> +       }
>>>> +
>>>>         if (kmem_valid_obj(object)) {
>>>>                 kmem_dump_obj(object);
>>>>                 return;
>>>>         }
>>>>
>>>> -       if (vmalloc_dump_obj(object))
>>>> -               return;
>>>> -
>>>>         if (virt_addr_valid(object))
>>>>                 type = "non-slab/vmalloc memory";
>>>>         else if (object == NULL)
>>>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
>>>>                 type = "zero-size pointer";
>>>>         else
>>>>                 type = "non-paged memory";
>>>> -
>>>> +end:
>>>>         pr_cont(" %s\n", type);
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
>>>>
>>>> Thanks
>>>> Zqiang
>>>>
>>>>
>>>>>
>>>>> Thanks
>>>>> Zqiang
>>>>>
>>>>>
>>>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
>>>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
>>>>>> --- a/kernel/rcu/rcu.h
>>>>>> +++ b/kernel/rcu/rcu.h
>>>>>> @@ -10,6 +10,7 @@
>>>>>> #ifndef __LINUX_RCU_H
>>>>>> #define __LINUX_RCU_H
>>>>>>
>>>>>> +#include <linux/mm.h>
>>>>>> #include <trace/events/rcu.h>
>>>>>>
>>>>>> /*
>>>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
>>>>>> rcu_head *head) }
>>>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
>>>>>>
>>>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
>>>>>> +	if (unlikely(!rhp->func))
>>>>>> +		mem_dump_obj(rhp);
>>>>>> +}
>>>>>> +
>>>>>> extern int rcu_cpu_stall_suppress_at_boot;
>>>>>>
>>>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
>>>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
>>>>>> 33adafdad261389..5e7f336baa06ae0 100644
>>>>>> --- a/kernel/rcu/srcutiny.c
>>>>>> +++ b/kernel/rcu/srcutiny.c
>>>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
>>>>>> 	while (lh) {
>>>>>> 		rhp = lh;
>>>>>> 		lh = lh->next;
>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>> 		local_bh_disable();
>>>>>> 		rhp->func(rhp);
>>>>>> 		local_bh_enable();
>>>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
>>>>>> ca4b5dcec675bac..294972e66b31863 100644
>>>>>> --- a/kernel/rcu/srcutree.c
>>>>>> +++ b/kernel/rcu/srcutree.c
>>>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>>>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
>>>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
>>>>>> 		debug_rcu_head_unqueue(rhp);
>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>> 		local_bh_disable();
>>>>>> 		rhp->func(rhp);
>>>>>> 		local_bh_enable();
>>>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
>>>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
>>>>>> --- a/kernel/rcu/tasks.h
>>>>>> +++ b/kernel/rcu/tasks.h
>>>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>>>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
>>>>>> 	len = rcl.len;
>>>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
>>>>>> rcu_cblist_dequeue(&rcl)) {
>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>> 		local_bh_disable();
>>>>>> 		rhp->func(rhp);
>>>>>> 		local_bh_enable();
>>>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
>>>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
>>>>>> --- a/kernel/rcu/tiny.c
>>>>>> +++ b/kernel/rcu/tiny.c
>>>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
>>>>>> *head)
>>>>>>
>>>>>> 	trace_rcu_invoke_callback("", head);
>>>>>> 	f = head->func;
>>>>>> +	debug_rcu_head_callback(head);
>>>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
>>>>>> 	f(head);
>>>>>> 	rcu_lock_release(&rcu_callback_map);
>>>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
>>>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
>>>>>> --- a/kernel/rcu/tree.c
>>>>>> +++ b/kernel/rcu/tree.c
>>>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
>>>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
>>>>>>
>>>>>> 		f = rhp->func;
>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
>>>>>> 		f(rhp);
>>>>>>
>>>>>> --
>>>>>> 2.25.1
>>>>>
>>>>> .
>>>>>
>>>>
>>>> --
>>>> Regards,
>>>>   Zhen Lei
>>> .
>>>
>>
>> -- 
>> Regards,
>>   Zhen Lei
> .
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-14  7:18             ` Leizhen (ThunderTown)
@ 2022-11-14 16:06               ` Paul E. McKenney
  2022-11-16 14:43                 ` Leizhen (ThunderTown)
  0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2022-11-14 16:06 UTC (permalink / raw)
  To: Leizhen (ThunderTown)
  Cc: Zhang, Qiang1, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel

On Mon, Nov 14, 2022 at 03:18:10PM +0800, Leizhen (ThunderTown) wrote:
> On 2022/11/12 14:08, Paul E. McKenney wrote:
> > On Sat, Nov 12, 2022 at 10:32:32AM +0800, Leizhen (ThunderTown) wrote:
> >> On 2022/11/12 2:42, Paul E. McKenney wrote:
> >>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> >>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> >>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
> >>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
> >>>>>> unusual for
> >>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
> >>>>>> rhp->by
> >>>>>> __call_rcu_common() in kernels built with 
> >>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> >>>>>> offending code using the identity of this function.
> >>>>>>
> >>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> >>>>>> are even worse, as can be seen from this splat:
> >>>>>>
> >>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
> >>>>>> ... ...
> >>>>>> PC is at 0x0
> >>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
> >>>>>> ... ...
> >>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> >>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
> >>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> >>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> >>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> >>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> >>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> >>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> >>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> >>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
> >>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
> >>>>>> (cpu_startup_entry) from (0xc01530)
> >>>>>>
> >>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> >>>>>> information, for example:
> >>>>>>
> >>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> >>>>>>
> >>>>>> This provides the rough size of the memory block and the offset of 
> >>>>>> the rcu_head structure, which as least provides at least a few clues 
> >>>>>> to help locate the problem. If the problem is reproducible, 
> >>>>>> additional slab debugging can be enabled, for example, 
> >>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> >>>>>>
> >>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> >>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >>>>>> ---
> >>>>>> kernel/rcu/rcu.h      | 7 +++++++
> >>>>>> kernel/rcu/srcutiny.c | 1 +
> >>>>>> kernel/rcu/srcutree.c | 1 +
> >>>>>> kernel/rcu/tasks.h    | 1 +
> >>>>>> kernel/rcu/tiny.c     | 1 +
> >>>>>> kernel/rcu/tree.c     | 1 +
> >>>>>> 6 files changed, 12 insertions(+)
> >>>>>>
> >>>>>> v1 --> v2:
> >>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> >>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
> >>>>>>
> >>>>>
> >>>>> Hi, Zhen Lei
> >>>>>
> >>>>> Maybe the following scenarios should be considered:
> >>>>>
> >>>>>                 CPU 0
> >>>>> tasks context
> >>>>>    spin_lock(&vmap_area_lock)
> >>>>>           Interrupt 
> >>>>> 	 RCU softirq
> >>>>> 	      rcu_do_batch
> >>>>> 		mem_dump_obj
> >>>>>                                   vmalloc_dump_obj
> >>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> >>>>
> >>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
> >>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> >>>>
> >>>> This is a workaround, or maybe try a modification like the following, 
> >>>> of course, need to ask Paul's opinion.
> >>>
> >>> Another approach is to schedule a workqueue handler to do the
> >>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> >>> environment.
> >>
> >> It's about to panic, so no chance to schedule.
> > 
> > It won't panic if you drop the callback on the floor.
> > 
> > Though to your point, the ->next pointer is likely also trashed.  So you
> > could just drop the remainder of the callback list on the floor.  That
> > might provide a good (though not perfect) chance of getting decent output.
> 
> OK, I think I understand what you mean.
> if (!f)
> 	schedule_work(&work);
> else
> 	f(rhp)

Yes, except that the "schedule_work()" also needs to be accompanied
by something that refuses to execute the rest of those callbacks.
This needs to break out of the loop (or return) and to adjust counts,
among other things.  This might be as easy as setting count to the
negative of the length of the "rcl" list, but does need some attention
to the code following the callback-invocation loop.

							Thanx, Paul

> >>> This would allow vmalloc_dump_obj() to be called unconditionally.
> >>>
> >>> Other thoughts?
> >>
> >> locked = spin_is_locked(&vmap_area_lock);
> >> if (!locked)
> >>     spin_lock(&vmap_area_lock)
> >>
> >> Careful analysis is required, which may cause other problems.
> >>
> >> Or in new function:
> >> if (locked)
> >>     return;
> >> spin_lock(&vmap_area_lock);
> >>
> >> If there is a chance to dump the data, dump the data. If there is no
> >> chance to dump the data, do not dump the data. This is the fate of
> >> debugging information.
> > 
> > My concern is that there will be increasing numbers of special cases
> > over time.
> 
> OK, I got it.
> 
> > 
> > 							Thanx, Paul
> > 
> >>>> diff --git a/mm/util.c b/mm/util.c
> >>>> index 12984e76767e..86da0739fe5d 100644
> >>>> --- a/mm/util.c
> >>>> +++ b/mm/util.c
> >>>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
> >>>>  {
> >>>>         const char *type;
> >>>>
> >>>> +       if (is_vmalloc_addr(object)) {
> >>>> +               if (in_task() && vmalloc_dump_obj(object))
> >>>> +                       return;
> >>>> +               type = "vmalloc memory";
> >>>> +               goto end;
> >>>> +       }
> >>>> +
> >>>>         if (kmem_valid_obj(object)) {
> >>>>                 kmem_dump_obj(object);
> >>>>                 return;
> >>>>         }
> >>>>
> >>>> -       if (vmalloc_dump_obj(object))
> >>>> -               return;
> >>>> -
> >>>>         if (virt_addr_valid(object))
> >>>>                 type = "non-slab/vmalloc memory";
> >>>>         else if (object == NULL)
> >>>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
> >>>>                 type = "zero-size pointer";
> >>>>         else
> >>>>                 type = "non-paged memory";
> >>>> -
> >>>> +end:
> >>>>         pr_cont(" %s\n", type);
> >>>>  }
> >>>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
> >>>>
> >>>> Thanks
> >>>> Zqiang
> >>>>
> >>>>
> >>>>>
> >>>>> Thanks
> >>>>> Zqiang
> >>>>>
> >>>>>
> >>>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
> >>>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
> >>>>>> --- a/kernel/rcu/rcu.h
> >>>>>> +++ b/kernel/rcu/rcu.h
> >>>>>> @@ -10,6 +10,7 @@
> >>>>>> #ifndef __LINUX_RCU_H
> >>>>>> #define __LINUX_RCU_H
> >>>>>>
> >>>>>> +#include <linux/mm.h>
> >>>>>> #include <trace/events/rcu.h>
> >>>>>>
> >>>>>> /*
> >>>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
> >>>>>> rcu_head *head) }
> >>>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> >>>>>>
> >>>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
> >>>>>> +	if (unlikely(!rhp->func))
> >>>>>> +		mem_dump_obj(rhp);
> >>>>>> +}
> >>>>>> +
> >>>>>> extern int rcu_cpu_stall_suppress_at_boot;
> >>>>>>
> >>>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
> >>>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
> >>>>>> 33adafdad261389..5e7f336baa06ae0 100644
> >>>>>> --- a/kernel/rcu/srcutiny.c
> >>>>>> +++ b/kernel/rcu/srcutiny.c
> >>>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> >>>>>> 	while (lh) {
> >>>>>> 		rhp = lh;
> >>>>>> 		lh = lh->next;
> >>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>> 		local_bh_disable();
> >>>>>> 		rhp->func(rhp);
> >>>>>> 		local_bh_enable();
> >>>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
> >>>>>> ca4b5dcec675bac..294972e66b31863 100644
> >>>>>> --- a/kernel/rcu/srcutree.c
> >>>>>> +++ b/kernel/rcu/srcutree.c
> >>>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> >>>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> >>>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> >>>>>> 		debug_rcu_head_unqueue(rhp);
> >>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>> 		local_bh_disable();
> >>>>>> 		rhp->func(rhp);
> >>>>>> 		local_bh_enable();
> >>>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
> >>>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
> >>>>>> --- a/kernel/rcu/tasks.h
> >>>>>> +++ b/kernel/rcu/tasks.h
> >>>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> >>>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> >>>>>> 	len = rcl.len;
> >>>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
> >>>>>> rcu_cblist_dequeue(&rcl)) {
> >>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>> 		local_bh_disable();
> >>>>>> 		rhp->func(rhp);
> >>>>>> 		local_bh_enable();
> >>>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
> >>>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
> >>>>>> --- a/kernel/rcu/tiny.c
> >>>>>> +++ b/kernel/rcu/tiny.c
> >>>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
> >>>>>> *head)
> >>>>>>
> >>>>>> 	trace_rcu_invoke_callback("", head);
> >>>>>> 	f = head->func;
> >>>>>> +	debug_rcu_head_callback(head);
> >>>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> >>>>>> 	f(head);
> >>>>>> 	rcu_lock_release(&rcu_callback_map);
> >>>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
> >>>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
> >>>>>> --- a/kernel/rcu/tree.c
> >>>>>> +++ b/kernel/rcu/tree.c
> >>>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >>>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> >>>>>>
> >>>>>> 		f = rhp->func;
> >>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> >>>>>> 		f(rhp);
> >>>>>>
> >>>>>> --
> >>>>>> 2.25.1
> >>>>>
> >>>>> .
> >>>>>
> >>>>
> >>>> --
> >>>> Regards,
> >>>>   Zhen Lei
> >>> .
> >>>
> >>
> >> -- 
> >> Regards,
> >>   Zhen Lei
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-11 18:42       ` Paul E. McKenney
  2022-11-12  2:32         ` Leizhen (ThunderTown)
@ 2022-11-14 18:22         ` Uladzislau Rezki
  2022-11-17  1:29           ` Leizhen (ThunderTown)
  1 sibling, 1 reply; 18+ messages in thread
From: Uladzislau Rezki @ 2022-11-14 18:22 UTC (permalink / raw)
  To: Paul E. McKenney, Zhang, Qiang1, Zhen Lei
  Cc: Zhang, Qiang1, Leizhen (ThunderTown),
	Frederic Weisbecker, Neeraj Upadhyay, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes,
	rcu, linux-kernel

> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> > On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> > >> When a structure containing an RCU callback rhp is (incorrectly) 
> > >> freed and reallocated after rhp is passed to call_rcu(), it is not 
> > >> unusual for
> > >> rhp->func to be set to NULL. This defeats the debugging prints used 
> > >> rhp->by
> > >> __call_rcu_common() in kernels built with 
> > >> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> > >> offending code using the identity of this function.
> > >>
> > >> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> > >> are even worse, as can be seen from this splat:
> > >>
> > >> Unable to handle kernel NULL pointer dereference at virtual address 0 
> > >> ... ...
> > >> PC is at 0x0
> > >> LR is at rcu_do_batch+0x1c0/0x3b8
> > >> ... ...
> > >> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> > >> (rcu_core) from (__do_softirq+0x24c/0x344)
> > >> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> > >> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> > >> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> > >> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> > >> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> > >> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> > >> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> > >> (default_idle_call) from (do_idle+0xf8/0x150)
> > >> (do_idle) from (cpu_startup_entry+0x18/0x20)
> > >> (cpu_startup_entry) from (0xc01530)
> > >>
> > >> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> > >> information, for example:
> > >>
> > >>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> > >>
> > >> This provides the rough size of the memory block and the offset of 
> > >> the rcu_head structure, which as least provides at least a few clues 
> > >> to help locate the problem. If the problem is reproducible, 
> > >> additional slab debugging can be enabled, for example, 
> > >> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> > >>
> > >> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> > >> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > >> ---
> > >> kernel/rcu/rcu.h      | 7 +++++++
> > >> kernel/rcu/srcutiny.c | 1 +
> > >> kernel/rcu/srcutree.c | 1 +
> > >> kernel/rcu/tasks.h    | 1 +
> > >> kernel/rcu/tiny.c     | 1 +
> > >> kernel/rcu/tree.c     | 1 +
> > >> 6 files changed, 12 insertions(+)
> > >>
> > >> v1 --> v2:
> > >> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> > >> 2. Paul E. McKenney helped me update the commit message, thanks.
> > >>
> > > 
> > > Hi, Zhen Lei
> > > 
> > > Maybe the following scenarios should be considered:
> > > 
> > >                 CPU 0
> > > tasks context
> > >    spin_lock(&vmap_area_lock)
> > >           Interrupt 
> > > 	 RCU softirq
> > > 	      rcu_do_batch
> > > 		mem_dump_obj
> > >                                   vmalloc_dump_obj
> > >                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> > 
> > >Right, thanks. I just saw the robot's report. So this patch should be dropped.
> > >I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> > 
> > This is a workaround, or maybe try a modification like the following, 
> > of course, need to ask Paul's opinion.
> 
> Another approach is to schedule a workqueue handler to do the
> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> environment.
> 
> This would allow vmalloc_dump_obj() to be called unconditionally.
> 
> Other thoughts?
> 
<snip>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ca71de7c9d77..956eb28f9c77 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2591,6 +2591,19 @@ struct vm_struct *find_vm_area(const void *addr)
 	return va->vm;
 }
 
+static struct vm_struct *
+find_vm_area_trylock(const void *addr)
+{
+	struct vmap_area *va = NULL;
+
+	if (spin_trylock(&vmap_area_lock)) {
+		va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
+		spin_unlock(&vmap_area_lock);
+	}
+
+	return va ? va->vm : NULL;
+}
+
 /**
  * remove_vm_area - find and remove a continuous kernel virtual area
  * @addr:	    base address
@@ -4048,7 +4061,7 @@ bool vmalloc_dump_obj(void *object)
 	struct vm_struct *vm;
 	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
 
-	vm = find_vm_area(objp);
+	vm = find_vm_area_trylock(objp);
 	if (!vm)
 		return false;
 	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
<snip>

There is one issue though, it is not correct to reference vm->members
after a lock is dropped because of: use after free bugs. It is better
to embed the search and read out: nr_pages, addr, caller and drop the
lock.

--
Uladzislau Rezki

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-14 16:06               ` Paul E. McKenney
@ 2022-11-16 14:43                 ` Leizhen (ThunderTown)
  2022-11-16 19:57                   ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-16 14:43 UTC (permalink / raw)
  To: paulmck
  Cc: Zhang, Qiang1, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel



On 2022/11/15 0:06, Paul E. McKenney wrote:
> On Mon, Nov 14, 2022 at 03:18:10PM +0800, Leizhen (ThunderTown) wrote:
>> On 2022/11/12 14:08, Paul E. McKenney wrote:
>>> On Sat, Nov 12, 2022 at 10:32:32AM +0800, Leizhen (ThunderTown) wrote:
>>>> On 2022/11/12 2:42, Paul E. McKenney wrote:
>>>>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
>>>>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>>>>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
>>>>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
>>>>>>>> unusual for
>>>>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
>>>>>>>> rhp->by
>>>>>>>> __call_rcu_common() in kernels built with 
>>>>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
>>>>>>>> offending code using the identity of this function.
>>>>>>>>
>>>>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
>>>>>>>> are even worse, as can be seen from this splat:
>>>>>>>>
>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
>>>>>>>> ... ...
>>>>>>>> PC is at 0x0
>>>>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
>>>>>>>> ... ...
>>>>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>>>>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
>>>>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>>>>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>>>>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>>>>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>>>>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>>>>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>>>>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>>>>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
>>>>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>>>>>>>> (cpu_startup_entry) from (0xc01530)
>>>>>>>>
>>>>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
>>>>>>>> information, for example:
>>>>>>>>
>>>>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>>>>>>>
>>>>>>>> This provides the rough size of the memory block and the offset of 
>>>>>>>> the rcu_head structure, which as least provides at least a few clues 
>>>>>>>> to help locate the problem. If the problem is reproducible, 
>>>>>>>> additional slab debugging can be enabled, for example, 
>>>>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
>>>>>>>>
>>>>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>>>>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>>>>>>>> ---
>>>>>>>> kernel/rcu/rcu.h      | 7 +++++++
>>>>>>>> kernel/rcu/srcutiny.c | 1 +
>>>>>>>> kernel/rcu/srcutree.c | 1 +
>>>>>>>> kernel/rcu/tasks.h    | 1 +
>>>>>>>> kernel/rcu/tiny.c     | 1 +
>>>>>>>> kernel/rcu/tree.c     | 1 +
>>>>>>>> 6 files changed, 12 insertions(+)
>>>>>>>>
>>>>>>>> v1 --> v2:
>>>>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>>>>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>>>>>>>
>>>>>>>
>>>>>>> Hi, Zhen Lei
>>>>>>>
>>>>>>> Maybe the following scenarios should be considered:
>>>>>>>
>>>>>>>                 CPU 0
>>>>>>> tasks context
>>>>>>>    spin_lock(&vmap_area_lock)
>>>>>>>           Interrupt 
>>>>>>> 	 RCU softirq
>>>>>>> 	      rcu_do_batch
>>>>>>> 		mem_dump_obj
>>>>>>>                                   vmalloc_dump_obj
>>>>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
>>>>>>
>>>>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
>>>>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
>>>>>>
>>>>>> This is a workaround, or maybe try a modification like the following, 
>>>>>> of course, need to ask Paul's opinion.
>>>>>
>>>>> Another approach is to schedule a workqueue handler to do the
>>>>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
>>>>> environment.
>>>>
>>>> It's about to panic, so no chance to schedule.
>>>
>>> It won't panic if you drop the callback on the floor.
>>>
>>> Though to your point, the ->next pointer is likely also trashed.  So you
>>> could just drop the remainder of the callback list on the floor.  That
>>> might provide a good (though not perfect) chance of getting decent output.
>>
>> OK, I think I understand what you mean.
>> if (!f)
>> 	schedule_work(&work);
>> else
>> 	f(rhp)
> 
> Yes, except that the "schedule_work()" also needs to be accompanied
> by something that refuses to execute the rest of those callbacks.
> This needs to break out of the loop (or return) and to adjust counts,
> among other things.  This might be as easy as setting count to the
> negative of the length of the "rcl" list, but does need some attention
> to the code following the callback-invocation loop.

Yes, doing so would cause other problems. As you mentioned, the ->next
pointer is likely also trashed. Some nodes may need to be executed in
sequence. For such a weak debug function, it's not worth the risk, or
overly complicated thinking.

> 
> 							Thanx, Paul
> 
>>>>> This would allow vmalloc_dump_obj() to be called unconditionally.
>>>>>
>>>>> Other thoughts?
>>>>
>>>> locked = spin_is_locked(&vmap_area_lock);
>>>> if (!locked)
>>>>     spin_lock(&vmap_area_lock)
>>>>
>>>> Careful analysis is required, which may cause other problems.
>>>>
>>>> Or in new function:
>>>> if (locked)
>>>>     return;
>>>> spin_lock(&vmap_area_lock);
>>>>
>>>> If there is a chance to dump the data, dump the data. If there is no
>>>> chance to dump the data, do not dump the data. This is the fate of
>>>> debugging information.
>>>
>>> My concern is that there will be increasing numbers of special cases
>>> over time.

The memory modules are mature and stable, so your concerns may not be true.

>>
>> OK, I got it.
>>
>>>
>>> 							Thanx, Paul
>>>
>>>>>> diff --git a/mm/util.c b/mm/util.c
>>>>>> index 12984e76767e..86da0739fe5d 100644
>>>>>> --- a/mm/util.c
>>>>>> +++ b/mm/util.c
>>>>>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
>>>>>>  {
>>>>>>         const char *type;
>>>>>>
>>>>>> +       if (is_vmalloc_addr(object)) {
>>>>>> +               if (in_task() && vmalloc_dump_obj(object))
>>>>>> +                       return;
>>>>>> +               type = "vmalloc memory";
>>>>>> +               goto end;
>>>>>> +       }
>>>>>> +
>>>>>>         if (kmem_valid_obj(object)) {
>>>>>>                 kmem_dump_obj(object);
>>>>>>                 return;
>>>>>>         }
>>>>>>
>>>>>> -       if (vmalloc_dump_obj(object))
>>>>>> -               return;
>>>>>> -
>>>>>>         if (virt_addr_valid(object))
>>>>>>                 type = "non-slab/vmalloc memory";
>>>>>>         else if (object == NULL)
>>>>>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
>>>>>>                 type = "zero-size pointer";
>>>>>>         else
>>>>>>                 type = "non-paged memory";
>>>>>> -
>>>>>> +end:
>>>>>>         pr_cont(" %s\n", type);
>>>>>>  }
>>>>>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
>>>>>>
>>>>>> Thanks
>>>>>> Zqiang
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> Thanks
>>>>>>> Zqiang
>>>>>>>
>>>>>>>
>>>>>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
>>>>>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
>>>>>>>> --- a/kernel/rcu/rcu.h
>>>>>>>> +++ b/kernel/rcu/rcu.h
>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>> #ifndef __LINUX_RCU_H
>>>>>>>> #define __LINUX_RCU_H
>>>>>>>>
>>>>>>>> +#include <linux/mm.h>
>>>>>>>> #include <trace/events/rcu.h>
>>>>>>>>
>>>>>>>> /*
>>>>>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
>>>>>>>> rcu_head *head) }
>>>>>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
>>>>>>>>
>>>>>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
>>>>>>>> +	if (unlikely(!rhp->func))
>>>>>>>> +		mem_dump_obj(rhp);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> extern int rcu_cpu_stall_suppress_at_boot;
>>>>>>>>
>>>>>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
>>>>>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
>>>>>>>> 33adafdad261389..5e7f336baa06ae0 100644
>>>>>>>> --- a/kernel/rcu/srcutiny.c
>>>>>>>> +++ b/kernel/rcu/srcutiny.c
>>>>>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
>>>>>>>> 	while (lh) {
>>>>>>>> 		rhp = lh;
>>>>>>>> 		lh = lh->next;
>>>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>>>> 		local_bh_disable();
>>>>>>>> 		rhp->func(rhp);
>>>>>>>> 		local_bh_enable();
>>>>>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
>>>>>>>> ca4b5dcec675bac..294972e66b31863 100644
>>>>>>>> --- a/kernel/rcu/srcutree.c
>>>>>>>> +++ b/kernel/rcu/srcutree.c
>>>>>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>>>>>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
>>>>>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
>>>>>>>> 		debug_rcu_head_unqueue(rhp);
>>>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>>>> 		local_bh_disable();
>>>>>>>> 		rhp->func(rhp);
>>>>>>>> 		local_bh_enable();
>>>>>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
>>>>>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
>>>>>>>> --- a/kernel/rcu/tasks.h
>>>>>>>> +++ b/kernel/rcu/tasks.h
>>>>>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>>>>>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
>>>>>>>> 	len = rcl.len;
>>>>>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
>>>>>>>> rcu_cblist_dequeue(&rcl)) {
>>>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>>>> 		local_bh_disable();
>>>>>>>> 		rhp->func(rhp);
>>>>>>>> 		local_bh_enable();
>>>>>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
>>>>>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
>>>>>>>> --- a/kernel/rcu/tiny.c
>>>>>>>> +++ b/kernel/rcu/tiny.c
>>>>>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
>>>>>>>> *head)
>>>>>>>>
>>>>>>>> 	trace_rcu_invoke_callback("", head);
>>>>>>>> 	f = head->func;
>>>>>>>> +	debug_rcu_head_callback(head);
>>>>>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
>>>>>>>> 	f(head);
>>>>>>>> 	rcu_lock_release(&rcu_callback_map);
>>>>>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
>>>>>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
>>>>>>>> --- a/kernel/rcu/tree.c
>>>>>>>> +++ b/kernel/rcu/tree.c
>>>>>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
>>>>>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
>>>>>>>>
>>>>>>>> 		f = rhp->func;
>>>>>>>> +		debug_rcu_head_callback(rhp);
>>>>>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
>>>>>>>> 		f(rhp);
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.25.1
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>>
>>>>>> --
>>>>>> Regards,
>>>>>>   Zhen Lei
>>>>> .
>>>>>
>>>>
>>>> -- 
>>>> Regards,
>>>>   Zhen Lei
>>> .
>>>
>>
>> -- 
>> Regards,
>>   Zhen Lei
> .
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-16 14:43                 ` Leizhen (ThunderTown)
@ 2022-11-16 19:57                   ` Paul E. McKenney
  2022-11-17  0:09                     ` Zhang, Qiang1
  0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2022-11-16 19:57 UTC (permalink / raw)
  To: Leizhen (ThunderTown)
  Cc: Zhang, Qiang1, Frederic Weisbecker, Neeraj Upadhyay,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, rcu, linux-kernel

On Wed, Nov 16, 2022 at 10:43:57PM +0800, Leizhen (ThunderTown) wrote:
> On 2022/11/15 0:06, Paul E. McKenney wrote:
> > On Mon, Nov 14, 2022 at 03:18:10PM +0800, Leizhen (ThunderTown) wrote:
> >> On 2022/11/12 14:08, Paul E. McKenney wrote:
> >>> On Sat, Nov 12, 2022 at 10:32:32AM +0800, Leizhen (ThunderTown) wrote:
> >>>> On 2022/11/12 2:42, Paul E. McKenney wrote:
> >>>>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> >>>>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> >>>>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
> >>>>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
> >>>>>>>> unusual for
> >>>>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
> >>>>>>>> rhp->by
> >>>>>>>> __call_rcu_common() in kernels built with 
> >>>>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> >>>>>>>> offending code using the identity of this function.
> >>>>>>>>
> >>>>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> >>>>>>>> are even worse, as can be seen from this splat:
> >>>>>>>>
> >>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
> >>>>>>>> ... ...
> >>>>>>>> PC is at 0x0
> >>>>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
> >>>>>>>> ... ...
> >>>>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> >>>>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
> >>>>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> >>>>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> >>>>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> >>>>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> >>>>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> >>>>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> >>>>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> >>>>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
> >>>>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
> >>>>>>>> (cpu_startup_entry) from (0xc01530)
> >>>>>>>>
> >>>>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> >>>>>>>> information, for example:
> >>>>>>>>
> >>>>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> >>>>>>>>
> >>>>>>>> This provides the rough size of the memory block and the offset of 
> >>>>>>>> the rcu_head structure, which as least provides at least a few clues 
> >>>>>>>> to help locate the problem. If the problem is reproducible, 
> >>>>>>>> additional slab debugging can be enabled, for example, 
> >>>>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> >>>>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >>>>>>>> ---
> >>>>>>>> kernel/rcu/rcu.h      | 7 +++++++
> >>>>>>>> kernel/rcu/srcutiny.c | 1 +
> >>>>>>>> kernel/rcu/srcutree.c | 1 +
> >>>>>>>> kernel/rcu/tasks.h    | 1 +
> >>>>>>>> kernel/rcu/tiny.c     | 1 +
> >>>>>>>> kernel/rcu/tree.c     | 1 +
> >>>>>>>> 6 files changed, 12 insertions(+)
> >>>>>>>>
> >>>>>>>> v1 --> v2:
> >>>>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> >>>>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
> >>>>>>>>
> >>>>>>>
> >>>>>>> Hi, Zhen Lei
> >>>>>>>
> >>>>>>> Maybe the following scenarios should be considered:
> >>>>>>>
> >>>>>>>                 CPU 0
> >>>>>>> tasks context
> >>>>>>>    spin_lock(&vmap_area_lock)
> >>>>>>>           Interrupt 
> >>>>>>> 	 RCU softirq
> >>>>>>> 	      rcu_do_batch
> >>>>>>> 		mem_dump_obj
> >>>>>>>                                   vmalloc_dump_obj
> >>>>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> >>>>>>
> >>>>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
> >>>>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> >>>>>>
> >>>>>> This is a workaround, or maybe try a modification like the following, 
> >>>>>> of course, need to ask Paul's opinion.
> >>>>>
> >>>>> Another approach is to schedule a workqueue handler to do the
> >>>>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> >>>>> environment.
> >>>>
> >>>> It's about to panic, so no chance to schedule.
> >>>
> >>> It won't panic if you drop the callback on the floor.
> >>>
> >>> Though to your point, the ->next pointer is likely also trashed.  So you
> >>> could just drop the remainder of the callback list on the floor.  That
> >>> might provide a good (though not perfect) chance of getting decent output.
> >>
> >> OK, I think I understand what you mean.
> >> if (!f)
> >> 	schedule_work(&work);
> >> else
> >> 	f(rhp)
> > 
> > Yes, except that the "schedule_work()" also needs to be accompanied
> > by something that refuses to execute the rest of those callbacks.
> > This needs to break out of the loop (or return) and to adjust counts,
> > among other things.  This might be as easy as setting count to the
> > negative of the length of the "rcl" list, but does need some attention
> > to the code following the callback-invocation loop.
> 
> Yes, doing so would cause other problems. As you mentioned, the ->next
> pointer is likely also trashed. Some nodes may need to be executed in
> sequence. For such a weak debug function, it's not worth the risk, or
> overly complicated thinking.

Do we have similar deadlock issues with the calls to mem_dump_obj() in
the call_rcu() code path?  These are somewhat less concerning because
they are invoked under a Kconfig option that is (as far as I know)
rarely set, but still...

							Thanx, Paul

> >>>>> This would allow vmalloc_dump_obj() to be called unconditionally.
> >>>>>
> >>>>> Other thoughts?
> >>>>
> >>>> locked = spin_is_locked(&vmap_area_lock);
> >>>> if (!locked)
> >>>>     spin_lock(&vmap_area_lock)
> >>>>
> >>>> Careful analysis is required, which may cause other problems.
> >>>>
> >>>> Or in new function:
> >>>> if (locked)
> >>>>     return;
> >>>> spin_lock(&vmap_area_lock);
> >>>>
> >>>> If there is a chance to dump the data, dump the data. If there is no
> >>>> chance to dump the data, do not dump the data. This is the fate of
> >>>> debugging information.
> >>>
> >>> My concern is that there will be increasing numbers of special cases
> >>> over time.
> 
> The memory modules are mature and stable, so your concerns may not be true.
> 
> >>
> >> OK, I got it.
> >>
> >>>
> >>> 							Thanx, Paul
> >>>
> >>>>>> diff --git a/mm/util.c b/mm/util.c
> >>>>>> index 12984e76767e..86da0739fe5d 100644
> >>>>>> --- a/mm/util.c
> >>>>>> +++ b/mm/util.c
> >>>>>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
> >>>>>>  {
> >>>>>>         const char *type;
> >>>>>>
> >>>>>> +       if (is_vmalloc_addr(object)) {
> >>>>>> +               if (in_task() && vmalloc_dump_obj(object))
> >>>>>> +                       return;
> >>>>>> +               type = "vmalloc memory";
> >>>>>> +               goto end;
> >>>>>> +       }
> >>>>>> +
> >>>>>>         if (kmem_valid_obj(object)) {
> >>>>>>                 kmem_dump_obj(object);
> >>>>>>                 return;
> >>>>>>         }
> >>>>>>
> >>>>>> -       if (vmalloc_dump_obj(object))
> >>>>>> -               return;
> >>>>>> -
> >>>>>>         if (virt_addr_valid(object))
> >>>>>>                 type = "non-slab/vmalloc memory";
> >>>>>>         else if (object == NULL)
> >>>>>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
> >>>>>>                 type = "zero-size pointer";
> >>>>>>         else
> >>>>>>                 type = "non-paged memory";
> >>>>>> -
> >>>>>> +end:
> >>>>>>         pr_cont(" %s\n", type);
> >>>>>>  }
> >>>>>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
> >>>>>>
> >>>>>> Thanks
> >>>>>> Zqiang
> >>>>>>
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks
> >>>>>>> Zqiang
> >>>>>>>
> >>>>>>>
> >>>>>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
> >>>>>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
> >>>>>>>> --- a/kernel/rcu/rcu.h
> >>>>>>>> +++ b/kernel/rcu/rcu.h
> >>>>>>>> @@ -10,6 +10,7 @@
> >>>>>>>> #ifndef __LINUX_RCU_H
> >>>>>>>> #define __LINUX_RCU_H
> >>>>>>>>
> >>>>>>>> +#include <linux/mm.h>
> >>>>>>>> #include <trace/events/rcu.h>
> >>>>>>>>
> >>>>>>>> /*
> >>>>>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
> >>>>>>>> rcu_head *head) }
> >>>>>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> >>>>>>>>
> >>>>>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
> >>>>>>>> +	if (unlikely(!rhp->func))
> >>>>>>>> +		mem_dump_obj(rhp);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> extern int rcu_cpu_stall_suppress_at_boot;
> >>>>>>>>
> >>>>>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
> >>>>>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
> >>>>>>>> 33adafdad261389..5e7f336baa06ae0 100644
> >>>>>>>> --- a/kernel/rcu/srcutiny.c
> >>>>>>>> +++ b/kernel/rcu/srcutiny.c
> >>>>>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> >>>>>>>> 	while (lh) {
> >>>>>>>> 		rhp = lh;
> >>>>>>>> 		lh = lh->next;
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		local_bh_disable();
> >>>>>>>> 		rhp->func(rhp);
> >>>>>>>> 		local_bh_enable();
> >>>>>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
> >>>>>>>> ca4b5dcec675bac..294972e66b31863 100644
> >>>>>>>> --- a/kernel/rcu/srcutree.c
> >>>>>>>> +++ b/kernel/rcu/srcutree.c
> >>>>>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> >>>>>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> >>>>>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> >>>>>>>> 		debug_rcu_head_unqueue(rhp);
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		local_bh_disable();
> >>>>>>>> 		rhp->func(rhp);
> >>>>>>>> 		local_bh_enable();
> >>>>>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
> >>>>>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
> >>>>>>>> --- a/kernel/rcu/tasks.h
> >>>>>>>> +++ b/kernel/rcu/tasks.h
> >>>>>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> >>>>>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> >>>>>>>> 	len = rcl.len;
> >>>>>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
> >>>>>>>> rcu_cblist_dequeue(&rcl)) {
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		local_bh_disable();
> >>>>>>>> 		rhp->func(rhp);
> >>>>>>>> 		local_bh_enable();
> >>>>>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
> >>>>>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
> >>>>>>>> --- a/kernel/rcu/tiny.c
> >>>>>>>> +++ b/kernel/rcu/tiny.c
> >>>>>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
> >>>>>>>> *head)
> >>>>>>>>
> >>>>>>>> 	trace_rcu_invoke_callback("", head);
> >>>>>>>> 	f = head->func;
> >>>>>>>> +	debug_rcu_head_callback(head);
> >>>>>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> >>>>>>>> 	f(head);
> >>>>>>>> 	rcu_lock_release(&rcu_callback_map);
> >>>>>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
> >>>>>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
> >>>>>>>> --- a/kernel/rcu/tree.c
> >>>>>>>> +++ b/kernel/rcu/tree.c
> >>>>>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >>>>>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> >>>>>>>>
> >>>>>>>> 		f = rhp->func;
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> >>>>>>>> 		f(rhp);
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.25.1
> >>>>>>>
> >>>>>>> .
> >>>>>>>
> >>>>>>
> >>>>>> --
> >>>>>> Regards,
> >>>>>>   Zhen Lei
> >>>>> .
> >>>>>
> >>>>
> >>>> -- 
> >>>> Regards,
> >>>>   Zhen Lei
> >>> .
> >>>
> >>
> >> -- 
> >> Regards,
> >>   Zhen Lei
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-16 19:57                   ` Paul E. McKenney
@ 2022-11-17  0:09                     ` Zhang, Qiang1
  0 siblings, 0 replies; 18+ messages in thread
From: Zhang, Qiang1 @ 2022-11-17  0:09 UTC (permalink / raw)
  To: paulmck, Leizhen (ThunderTown)
  Cc: Frederic Weisbecker, Neeraj Upadhyay, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes,
	rcu, linux-kernel

On Wed, Nov 16, 2022 at 10:43:57PM +0800, Leizhen (ThunderTown) wrote:
> On 2022/11/15 0:06, Paul E. McKenney wrote:
> > On Mon, Nov 14, 2022 at 03:18:10PM +0800, Leizhen (ThunderTown) wrote:
> >> On 2022/11/12 14:08, Paul E. McKenney wrote:
> >>> On Sat, Nov 12, 2022 at 10:32:32AM +0800, Leizhen (ThunderTown) wrote:
> >>>> On 2022/11/12 2:42, Paul E. McKenney wrote:
> >>>>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
> >>>>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
> >>>>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
> >>>>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
> >>>>>>>> unusual for
> >>>>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
> >>>>>>>> rhp->by
> >>>>>>>> __call_rcu_common() in kernels built with 
> >>>>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
> >>>>>>>> offending code using the identity of this function.
> >>>>>>>>
> >>>>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
> >>>>>>>> are even worse, as can be seen from this splat:
> >>>>>>>>
> >>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
> >>>>>>>> ... ...
> >>>>>>>> PC is at 0x0
> >>>>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
> >>>>>>>> ... ...
> >>>>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
> >>>>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
> >>>>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
> >>>>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
> >>>>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
> >>>>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
> >>>>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
> >>>>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
> >>>>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
> >>>>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
> >>>>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
> >>>>>>>> (cpu_startup_entry) from (0xc01530)
> >>>>>>>>
> >>>>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
> >>>>>>>> information, for example:
> >>>>>>>>
> >>>>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
> >>>>>>>>
> >>>>>>>> This provides the rough size of the memory block and the offset of 
> >>>>>>>> the rcu_head structure, which as least provides at least a few clues 
> >>>>>>>> to help locate the problem. If the problem is reproducible, 
> >>>>>>>> additional slab debugging can be enabled, for example, 
> >>>>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> >>>>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >>>>>>>> ---
> >>>>>>>> kernel/rcu/rcu.h      | 7 +++++++
> >>>>>>>> kernel/rcu/srcutiny.c | 1 +
> >>>>>>>> kernel/rcu/srcutree.c | 1 +
> >>>>>>>> kernel/rcu/tasks.h    | 1 +
> >>>>>>>> kernel/rcu/tiny.c     | 1 +
> >>>>>>>> kernel/rcu/tree.c     | 1 +
> >>>>>>>> 6 files changed, 12 insertions(+)
> >>>>>>>>
> >>>>>>>> v1 --> v2:
> >>>>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
> >>>>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
> >>>>>>>>
> >>>>>>>
> >>>>>>> Hi, Zhen Lei
> >>>>>>>
> >>>>>>> Maybe the following scenarios should be considered:
> >>>>>>>
> >>>>>>>                 CPU 0
> >>>>>>> tasks context
> >>>>>>>    spin_lock(&vmap_area_lock)
> >>>>>>>           Interrupt 
> >>>>>>> 	 RCU softirq
> >>>>>>> 	      rcu_do_batch
> >>>>>>> 		mem_dump_obj
> >>>>>>>                                   vmalloc_dump_obj
> >>>>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
> >>>>>>
> >>>>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
> >>>>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
> >>>>>>
> >>>>>> This is a workaround, or maybe try a modification like the following, 
> >>>>>> of course, need to ask Paul's opinion.
> >>>>>
> >>>>> Another approach is to schedule a workqueue handler to do the
> >>>>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
> >>>>> environment.
> >>>>
> >>>> It's about to panic, so no chance to schedule.
> >>>
> >>> It won't panic if you drop the callback on the floor.
> >>>
> >>> Though to your point, the ->next pointer is likely also trashed.  So you
> >>> could just drop the remainder of the callback list on the floor.  That
> >>> might provide a good (though not perfect) chance of getting decent output.
> >>
> >> OK, I think I understand what you mean.
> >> if (!f)
> >> 	schedule_work(&work);
> >> else
> >> 	f(rhp)
> > 
> > Yes, except that the "schedule_work()" also needs to be accompanied
> > by something that refuses to execute the rest of those callbacks.
> > This needs to break out of the loop (or return) and to adjust counts,
> > among other things.  This might be as easy as setting count to the
> > negative of the length of the "rcl" list, but does need some attention
> > to the code following the callback-invocation loop.
> 
> Yes, doing so would cause other problems. As you mentioned, the ->next
> pointer is likely also trashed. Some nodes may need to be executed in
> sequence. For such a weak debug function, it's not worth the risk, or
> overly complicated thinking.

>Do we have similar deadlock issues with the calls to mem_dump_obj() in
>the call_rcu() code path? 

I think it exists, and also consider PREEMPT_RT kernel ,  because the vmap_area_lock spinlock
convert to sleepable lock. I have sent a version of the patch.

Thanks
Zqiang

> These are somewhat less concerning because
>they are invoked under a Kconfig option that is (as far as I know)
>rarely set, but still...
>
>							Thanx, Paul

> >>>>> This would allow vmalloc_dump_obj() to be called unconditionally.
> >>>>>
> >>>>> Other thoughts?
> >>>>
> >>>> locked = spin_is_locked(&vmap_area_lock);
> >>>> if (!locked)
> >>>>     spin_lock(&vmap_area_lock)
> >>>>
> >>>> Careful analysis is required, which may cause other problems.
> >>>>
> >>>> Or in new function:
> >>>> if (locked)
> >>>>     return;
> >>>> spin_lock(&vmap_area_lock);
> >>>>
> >>>> If there is a chance to dump the data, dump the data. If there is no
> >>>> chance to dump the data, do not dump the data. This is the fate of
> >>>> debugging information.
> >>>
> >>> My concern is that there will be increasing numbers of special cases
> >>> over time.
> 
> The memory modules are mature and stable, so your concerns may not be true.
> 
> >>
> >> OK, I got it.
> >>
> >>>
> >>> 							Thanx, Paul
> >>>
> >>>>>> diff --git a/mm/util.c b/mm/util.c
> >>>>>> index 12984e76767e..86da0739fe5d 100644
> >>>>>> --- a/mm/util.c
> >>>>>> +++ b/mm/util.c
> >>>>>> @@ -1119,14 +1119,18 @@ void mem_dump_obj(void *object)
> >>>>>>  {
> >>>>>>         const char *type;
> >>>>>>
> >>>>>> +       if (is_vmalloc_addr(object)) {
> >>>>>> +               if (in_task() && vmalloc_dump_obj(object))
> >>>>>> +                       return;
> >>>>>> +               type = "vmalloc memory";
> >>>>>> +               goto end;
> >>>>>> +       }
> >>>>>> +
> >>>>>>         if (kmem_valid_obj(object)) {
> >>>>>>                 kmem_dump_obj(object);
> >>>>>>                 return;
> >>>>>>         }
> >>>>>>
> >>>>>> -       if (vmalloc_dump_obj(object))
> >>>>>> -               return;
> >>>>>> -
> >>>>>>         if (virt_addr_valid(object))
> >>>>>>                 type = "non-slab/vmalloc memory";
> >>>>>>         else if (object == NULL)
> >>>>>> @@ -1135,7 +1139,7 @@ void mem_dump_obj(void *object)
> >>>>>>                 type = "zero-size pointer";
> >>>>>>         else
> >>>>>>                 type = "non-paged memory";
> >>>>>> -
> >>>>>> +end:
> >>>>>>         pr_cont(" %s\n", type);
> >>>>>>  }
> >>>>>>  EXPORT_SYMBOL_GPL(mem_dump_obj);
> >>>>>>
> >>>>>> Thanks
> >>>>>> Zqiang
> >>>>>>
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks
> >>>>>>> Zqiang
> >>>>>>>
> >>>>>>>
> >>>>>>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 
> >>>>>>>> 65704cbc9df7b3d..32ab45fabf8eebf 100644
> >>>>>>>> --- a/kernel/rcu/rcu.h
> >>>>>>>> +++ b/kernel/rcu/rcu.h
> >>>>>>>> @@ -10,6 +10,7 @@
> >>>>>>>> #ifndef __LINUX_RCU_H
> >>>>>>>> #define __LINUX_RCU_H
> >>>>>>>>
> >>>>>>>> +#include <linux/mm.h>
> >>>>>>>> #include <trace/events/rcu.h>
> >>>>>>>>
> >>>>>>>> /*
> >>>>>>>> @@ -211,6 +212,12 @@ static inline void debug_rcu_head_unqueue(struct 
> >>>>>>>> rcu_head *head) }
> >>>>>>>> #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
> >>>>>>>>
> >>>>>>>> +static inline void debug_rcu_head_callback(struct rcu_head *rhp) {
> >>>>>>>> +	if (unlikely(!rhp->func))
> >>>>>>>> +		mem_dump_obj(rhp);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> extern int rcu_cpu_stall_suppress_at_boot;
> >>>>>>>>
> >>>>>>>> static inline bool rcu_stall_is_suppressed_at_boot(void)
> >>>>>>>> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 
> >>>>>>>> 33adafdad261389..5e7f336baa06ae0 100644
> >>>>>>>> --- a/kernel/rcu/srcutiny.c
> >>>>>>>> +++ b/kernel/rcu/srcutiny.c
> >>>>>>>> @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp)
> >>>>>>>> 	while (lh) {
> >>>>>>>> 		rhp = lh;
> >>>>>>>> 		lh = lh->next;
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		local_bh_disable();
> >>>>>>>> 		rhp->func(rhp);
> >>>>>>>> 		local_bh_enable();
> >>>>>>>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 
> >>>>>>>> ca4b5dcec675bac..294972e66b31863 100644
> >>>>>>>> --- a/kernel/rcu/srcutree.c
> >>>>>>>> +++ b/kernel/rcu/srcutree.c
> >>>>>>>> @@ -1631,6 +1631,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
> >>>>>>>> 	rhp = rcu_cblist_dequeue(&ready_cbs);
> >>>>>>>> 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
> >>>>>>>> 		debug_rcu_head_unqueue(rhp);
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		local_bh_disable();
> >>>>>>>> 		rhp->func(rhp);
> >>>>>>>> 		local_bh_enable();
> >>>>>>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 
> >>>>>>>> b0b885e071fa8dc..b7f8c67c586cdc4 100644
> >>>>>>>> --- a/kernel/rcu/tasks.h
> >>>>>>>> +++ b/kernel/rcu/tasks.h
> >>>>>>>> @@ -478,6 +478,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
> >>>>>>>> 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
> >>>>>>>> 	len = rcl.len;
> >>>>>>>> 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = 
> >>>>>>>> rcu_cblist_dequeue(&rcl)) {
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		local_bh_disable();
> >>>>>>>> 		rhp->func(rhp);
> >>>>>>>> 		local_bh_enable();
> >>>>>>>> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 
> >>>>>>>> bb8f7d270f01747..56e9a5d91d97ec5 100644
> >>>>>>>> --- a/kernel/rcu/tiny.c
> >>>>>>>> +++ b/kernel/rcu/tiny.c
> >>>>>>>> @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head 
> >>>>>>>> *head)
> >>>>>>>>
> >>>>>>>> 	trace_rcu_invoke_callback("", head);
> >>>>>>>> 	f = head->func;
> >>>>>>>> +	debug_rcu_head_callback(head);
> >>>>>>>> 	WRITE_ONCE(head->func, (rcu_callback_t)0L);
> >>>>>>>> 	f(head);
> >>>>>>>> 	rcu_lock_release(&rcu_callback_map);
> >>>>>>>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 
> >>>>>>>> 15aaff3203bf2d0..ed93ddb8203d42c 100644
> >>>>>>>> --- a/kernel/rcu/tree.c
> >>>>>>>> +++ b/kernel/rcu/tree.c
> >>>>>>>> @@ -2088,6 +2088,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >>>>>>>> 		trace_rcu_invoke_callback(rcu_state.name, rhp);
> >>>>>>>>
> >>>>>>>> 		f = rhp->func;
> >>>>>>>> +		debug_rcu_head_callback(rhp);
> >>>>>>>> 		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
> >>>>>>>> 		f(rhp);
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.25.1
> >>>>>>>
> >>>>>>> .
> >>>>>>>
> >>>>>>
> >>>>>> --
> >>>>>> Regards,
> >>>>>>   Zhen Lei
> >>>>> .
> >>>>>
> >>>>
> >>>> -- 
> >>>> Regards,
> >>>>   Zhen Lei
> >>> .
> >>>
> >>
> >> -- 
> >> Regards,
> >>   Zhen Lei
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] rcu: Dump memory object info if callback function is invalid
  2022-11-14 18:22         ` Uladzislau Rezki
@ 2022-11-17  1:29           ` Leizhen (ThunderTown)
  0 siblings, 0 replies; 18+ messages in thread
From: Leizhen (ThunderTown) @ 2022-11-17  1:29 UTC (permalink / raw)
  To: Uladzislau Rezki, Paul E. McKenney, Zhang, Qiang1
  Cc: Frederic Weisbecker, Neeraj Upadhyay, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, Joel Fernandes,
	rcu, linux-kernel



On 2022/11/15 2:22, Uladzislau Rezki wrote:
>> On Fri, Nov 11, 2022 at 01:05:56PM +0000, Zhang, Qiang1 wrote:
>>> On 2022/11/11 19:54, Zhang, Qiang1 wrote:
>>>>> When a structure containing an RCU callback rhp is (incorrectly) 
>>>>> freed and reallocated after rhp is passed to call_rcu(), it is not 
>>>>> unusual for
>>>>> rhp->func to be set to NULL. This defeats the debugging prints used 
>>>>> rhp->by
>>>>> __call_rcu_common() in kernels built with 
>>>>> CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the 
>>>>> offending code using the identity of this function.
>>>>>
>>>>> And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things 
>>>>> are even worse, as can be seen from this splat:
>>>>>
>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0 
>>>>> ... ...
>>>>> PC is at 0x0
>>>>> LR is at rcu_do_batch+0x1c0/0x3b8
>>>>> ... ...
>>>>> (rcu_do_batch) from (rcu_core+0x1d4/0x284)
>>>>> (rcu_core) from (__do_softirq+0x24c/0x344)
>>>>> (__do_softirq) from (__irq_exit_rcu+0x64/0x108)
>>>>> (__irq_exit_rcu) from (irq_exit+0x8/0x10)
>>>>> (irq_exit) from (__handle_domain_irq+0x74/0x9c)
>>>>> (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
>>>>> (gic_handle_irq) from (__irq_svc+0x5c/0x94)
>>>>> (__irq_svc) from (arch_cpu_idle+0x20/0x3c)
>>>>> (arch_cpu_idle) from (default_idle_call+0x4c/0x78)
>>>>> (default_idle_call) from (do_idle+0xf8/0x150)
>>>>> (do_idle) from (cpu_startup_entry+0x18/0x20)
>>>>> (cpu_startup_entry) from (0xc01530)
>>>>>
>>>>> This commit therefore adds calls to mem_dump_obj(rhp) to output some 
>>>>> information, for example:
>>>>>
>>>>>  slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
>>>>>
>>>>> This provides the rough size of the memory block and the offset of 
>>>>> the rcu_head structure, which as least provides at least a few clues 
>>>>> to help locate the problem. If the problem is reproducible, 
>>>>> additional slab debugging can be enabled, for example, 
>>>>> CONFIG_DEBUG_SLAB=y, which can provide significantly more information.
>>>>>
>>>>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>>>>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>>>>> ---
>>>>> kernel/rcu/rcu.h      | 7 +++++++
>>>>> kernel/rcu/srcutiny.c | 1 +
>>>>> kernel/rcu/srcutree.c | 1 +
>>>>> kernel/rcu/tasks.h    | 1 +
>>>>> kernel/rcu/tiny.c     | 1 +
>>>>> kernel/rcu/tree.c     | 1 +
>>>>> 6 files changed, 12 insertions(+)
>>>>>
>>>>> v1 --> v2:
>>>>> 1. Remove condition "(unsigned long)rhp->func & 0x3", it have problems on x86.
>>>>> 2. Paul E. McKenney helped me update the commit message, thanks.
>>>>>
>>>>
>>>> Hi, Zhen Lei
>>>>
>>>> Maybe the following scenarios should be considered:
>>>>
>>>>                 CPU 0
>>>> tasks context
>>>>    spin_lock(&vmap_area_lock)
>>>>           Interrupt 
>>>> 	 RCU softirq
>>>> 	      rcu_do_batch
>>>> 		mem_dump_obj
>>>>                                   vmalloc_dump_obj
>>>>                                        spin_lock(&vmap_area_lock)   <--  deadlock     
>>>
>>>> Right, thanks. I just saw the robot's report. So this patch should be dropped.
>>>> I'll try to add an helper in mm, where I can check whether the lock has been held, and dump the content of memory object.
>>>
>>> This is a workaround, or maybe try a modification like the following, 
>>> of course, need to ask Paul's opinion.
>>
>> Another approach is to schedule a workqueue handler to do the
>> mem_dump_obj().  This would allow mem_dump_obj() to run in a clean
>> environment.
>>
>> This would allow vmalloc_dump_obj() to be called unconditionally.
>>
>> Other thoughts?
>>
> <snip>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index ca71de7c9d77..956eb28f9c77 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -2591,6 +2591,19 @@ struct vm_struct *find_vm_area(const void *addr)
>  	return va->vm;
>  }
>  
> +static struct vm_struct *
> +find_vm_area_trylock(const void *addr)
> +{
> +	struct vmap_area *va = NULL;
> +
> +	if (spin_trylock(&vmap_area_lock)) {
> +		va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
> +		spin_unlock(&vmap_area_lock);
> +	}
> +
> +	return va ? va->vm : NULL;
> +}

This method also has the problems mentioned by Andrew Morton.
https://lkml.org/lkml/2022/11/14/1238

> +
>  /**
>   * remove_vm_area - find and remove a continuous kernel virtual area
>   * @addr:	    base address
> @@ -4048,7 +4061,7 @@ bool vmalloc_dump_obj(void *object)
>  	struct vm_struct *vm;
>  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
>  
> -	vm = find_vm_area(objp);
> +	vm = find_vm_area_trylock(objp);
>  	if (!vm)
>  		return false;
>  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
> <snip>
> 
> There is one issue though, it is not correct to reference vm->members
> after a lock is dropped because of: use after free bugs. It is better
> to embed the search and read out: nr_pages, addr, caller and drop the
> lock.
> 
> --
> Uladzislau Rezki
> .
> 

-- 
Regards,
  Zhen Lei

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-11-17  1:30 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-11 10:28 [PATCH v2] rcu: Dump memory object info if callback function is invalid Zhen Lei
2022-11-11 11:54 ` Zhang, Qiang1
2022-11-11 12:34   ` Leizhen (ThunderTown)
2022-11-11 13:05     ` Zhang, Qiang1
2022-11-11 18:42       ` Paul E. McKenney
2022-11-12  2:32         ` Leizhen (ThunderTown)
2022-11-12  2:39           ` Leizhen (ThunderTown)
2022-11-12  6:09             ` Paul E. McKenney
2022-11-12  6:08           ` Paul E. McKenney
2022-11-14  7:18             ` Leizhen (ThunderTown)
2022-11-14 16:06               ` Paul E. McKenney
2022-11-16 14:43                 ` Leizhen (ThunderTown)
2022-11-16 19:57                   ` Paul E. McKenney
2022-11-17  0:09                     ` Zhang, Qiang1
2022-11-14 18:22         ` Uladzislau Rezki
2022-11-17  1:29           ` Leizhen (ThunderTown)
2022-11-11 15:50 ` Elliott, Robert (Servers)
2022-11-14  7:05   ` Leizhen (ThunderTown)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).