linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings
@ 2022-01-25  2:47 Zqiang
  2022-01-25  7:04 ` Ammar Faizi
  0 siblings, 1 reply; 4+ messages in thread
From: Zqiang @ 2022-01-25  2:47 UTC (permalink / raw)
  To: paulmck; +Cc: linux-kernel, ammarfaizi2, qiang1.zhang

When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing
be moved to per-CPU rcuc kthreads, if the rcuc kthreads is
being starved, quiescent state can not report in time. the
RCU stall may be triggered. this commit adds a stack trace of
this CPU and dump rcuc kthreads stack to help analyze what
prevents rcuc kthreads from running.

Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
 v1->v2:
 Avoid print anything when CPU is offline or idle.
 v2->v3:
 Make code more compact.

 kernel/rcu/tree.c        |  3 +++
 kernel/rcu/tree.h        |  1 +
 kernel/rcu/tree_plugin.h |  3 +++
 kernel/rcu/tree_stall.h  | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0..e3fc31a0f546 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2850,10 +2850,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
 {
 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
 	int spincnt;
 
 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
 	for (spincnt = 0; spincnt < 10; spincnt++) {
+		WRITE_ONCE(*j, jiffies);
 		local_bh_disable();
 		*statusp = RCU_KTHREAD_RUNNING;
 		local_irq_disable();
@@ -2874,6 +2876,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
 	schedule_timeout_idle(2);
 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
 	*statusp = RCU_KTHREAD_WAITING;
+	WRITE_ONCE(*j, jiffies);
 }
 
 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd08..4e0fdebb62e8 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -237,6 +237,7 @@ struct rcu_data {
 					/* rcuc per-CPU kthread or NULL. */
 	unsigned int rcu_cpu_kthread_status;
 	char rcu_cpu_has_work;
+	unsigned long rcuc_activity;
 
 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a1..327bbfd79cc6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
  */
 static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 #ifdef CONFIG_RCU_BOOST
 	struct sched_param sp;
 
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
+
+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
 }
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030..0c5d8516516a 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
 	return j > 2 * HZ;
 }
 
+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+	if (jp)
+		*jp = j;
+	return j > 2 * HZ;
+}
+
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
 	       falsepositive ? " (false positive?)" : "");
 }
 
+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+	int cpu;
+	unsigned long j;
+	struct task_struct *rcuc;
+
+	rcuc = rdp->rcu_cpu_kthread_task;
+	if (!rcuc)
+		return;
+
+	cpu = task_cpu(rcuc);
+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
+		return;
+
+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+		return;
+
+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+	sched_show_task(rcuc);
+	if (!trigger_single_cpu_backtrace(cpu))
+		dump_cpu_task(cpu);
+}
+
 /* Complain about starvation of grace-period kthread.  */
 static void rcu_check_gp_kthread_starvation(void)
 {
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
 	rcu_check_gp_kthread_expired_fqs_timer();
 	rcu_check_gp_kthread_starvation();
 
+	if (!use_softirq)
+		rcuc_kthread_dump(rdp);
+
 	rcu_dump_cpu_stacks();
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings
  2022-01-25  2:47 [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings Zqiang
@ 2022-01-25  7:04 ` Ammar Faizi
  2022-01-25 19:15   ` Paul E. McKenney
  0 siblings, 1 reply; 4+ messages in thread
From: Ammar Faizi @ 2022-01-25  7:04 UTC (permalink / raw)
  To: Zqiang, Paul E. McKenney; +Cc: linux-kernel

On 1/25/22 9:47 AM, Zqiang wrote:
> When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing
> be moved to per-CPU rcuc kthreads, if the rcuc kthreads is
> being starved, quiescent state can not report in time. the
> RCU stall may be triggered. this commit adds a stack trace of
> this CPU and dump rcuc kthreads stack to help analyze what
> prevents rcuc kthreads from running.
> 
> Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---

For https://lore.kernel.org/lkml/20220125024744.4186726-1-qiang1.zhang@intel.com/T/

```

Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>

```
Thank you!
-- 
Ammar Faizi


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings
  2022-01-25  7:04 ` Ammar Faizi
@ 2022-01-25 19:15   ` Paul E. McKenney
  2022-01-26  2:34     ` Zhang, Qiang1
  0 siblings, 1 reply; 4+ messages in thread
From: Paul E. McKenney @ 2022-01-25 19:15 UTC (permalink / raw)
  To: Ammar Faizi; +Cc: Zqiang, linux-kernel

On Tue, Jan 25, 2022 at 02:04:04PM +0700, Ammar Faizi wrote:
> On 1/25/22 9:47 AM, Zqiang wrote:
> > When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing
> > be moved to per-CPU rcuc kthreads, if the rcuc kthreads is
> > being starved, quiescent state can not report in time. the
> > RCU stall may be triggered. this commit adds a stack trace of
> > this CPU and dump rcuc kthreads stack to help analyze what
> > prevents rcuc kthreads from running.
> > 
> > Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > ---
> 
> For https://lore.kernel.org/lkml/20220125024744.4186726-1-qiang1.zhang@intel.com/T/
> 
> ```
> 
> Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> 
> ```

I queued this wordsmithed as shown below.  Please check to see if
I messed anything up.  And thank you both!

							Thanx, Paul

------------------------------------------------------------------------

commit 72bdc0a6f8cfb7cc56238dc850b7fe236e7371cb
Author: Zqiang <qiang1.zhang@intel.com>
Date:   Tue Jan 25 10:47:44 2022 +0800

    rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings
    
    When the rcutree.use_softirq kernel boot parameter is set to zero, all
    RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
    If these kthreads are being starved, quiescent states will not be
    reported, which in turn means that the grace period will not end, which
    can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
    stack traces of stalled CPUs' rcuc kthreads, which can help identify
    what is preventing those kthreads from running.
    
    Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
    Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
    Signed-off-by: Zqiang <qiang1.zhang@intel.com>
    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f071c49b37118..e6ad532cffe78 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2859,10 +2859,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
 {
 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
 	int spincnt;
 
 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
 	for (spincnt = 0; spincnt < 10; spincnt++) {
+		WRITE_ONCE(*j, jiffies);
 		local_bh_disable();
 		*statusp = RCU_KTHREAD_RUNNING;
 		local_irq_disable();
@@ -2883,6 +2885,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
 	schedule_timeout_idle(2);
 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
 	*statusp = RCU_KTHREAD_WAITING;
+	WRITE_ONCE(*j, jiffies);
 }
 
 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a87f31b1701db..926673ebe355f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -241,6 +241,7 @@ struct rcu_data {
 					/* rcuc per-CPU kthread or NULL. */
 	unsigned int rcu_cpu_kthread_status;
 	char rcu_cpu_has_work;
+	unsigned long rcuc_activity;
 
 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 4781aa338c7c1..6082dd23408f8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -994,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
  */
 static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 #ifdef CONFIG_RCU_BOOST
 	struct sched_param sp;
 
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
+
+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
 }
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030b..0c5d8516516af 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
 	return j > 2 * HZ;
 }
 
+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+	if (jp)
+		*jp = j;
+	return j > 2 * HZ;
+}
+
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
 	       falsepositive ? " (false positive?)" : "");
 }
 
+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+	int cpu;
+	unsigned long j;
+	struct task_struct *rcuc;
+
+	rcuc = rdp->rcu_cpu_kthread_task;
+	if (!rcuc)
+		return;
+
+	cpu = task_cpu(rcuc);
+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
+		return;
+
+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+		return;
+
+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+	sched_show_task(rcuc);
+	if (!trigger_single_cpu_backtrace(cpu))
+		dump_cpu_task(cpu);
+}
+
 /* Complain about starvation of grace-period kthread.  */
 static void rcu_check_gp_kthread_starvation(void)
 {
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
 	rcu_check_gp_kthread_expired_fqs_timer();
 	rcu_check_gp_kthread_starvation();
 
+	if (!use_softirq)
+		rcuc_kthread_dump(rdp);
+
 	rcu_dump_cpu_stacks();
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings
  2022-01-25 19:15   ` Paul E. McKenney
@ 2022-01-26  2:34     ` Zhang, Qiang1
  0 siblings, 0 replies; 4+ messages in thread
From: Zhang, Qiang1 @ 2022-01-26  2:34 UTC (permalink / raw)
  To: paulmck, Ammar Faizi; +Cc: linux-kernel


On Tue, Jan 25, 2022 at 02:04:04PM +0700, Ammar Faizi wrote:
> On 1/25/22 9:47 AM, Zqiang wrote:
> > When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing be 
> > moved to per-CPU rcuc kthreads, if the rcuc kthreads is being 
> > starved, quiescent state can not report in time. the RCU stall may 
> > be triggered. this commit adds a stack trace of this CPU and dump 
> > rcuc kthreads stack to help analyze what prevents rcuc kthreads from 
> > running.
> > 
> > Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > ---
> 
> For 
> https://lore.kernel.org/lkml/20220125024744.4186726-1-qiang1.zhang@int
> el.com/T/
> 
> ```
> 
> Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> 
> ```

>I queued this wordsmithed as shown below.  Please check to see if I messed anything up.  And thank you both!
>
>							Thanx, Paul
>

Thanks Paul,  this description is clearer.


>------------------------------------------------------------------------
>
>commit 72bdc0a6f8cfb7cc56238dc850b7fe236e7371cb
>Author: Zqiang <qiang1.zhang@intel.com>
>Date:   Tue Jan 25 10:47:44 2022 +0800
>
>    rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings
>    
>    When the rcutree.use_softirq kernel boot parameter is set to zero, all
>    RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
>    If these kthreads are being starved, quiescent states will not be
>    reported, which in turn means that the grace period will not end, which
>    can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
>    stack traces of stalled CPUs' rcuc kthreads, which can help identify
>    what is preventing those kthreads from running.
>    
>    Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
>    Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
>    Signed-off-by: Zqiang <qiang1.zhang@intel.com>
>    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

>diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f071c49b37118..e6ad532cffe78 100644
>--- a/kernel/rcu/tree.c
>+++ b/kernel/rcu/tree.c
>@@ -2859,10 +2859,12 @@ static void rcu_cpu_kthread(unsigned int cpu)  {
> 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
>+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
> 	int spincnt;
> 
> 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
> 	for (spincnt = 0; spincnt < 10; spincnt++) {
>+		WRITE_ONCE(*j, jiffies);
> 		local_bh_disable();
> 		*statusp = RCU_KTHREAD_RUNNING;
> 		local_irq_disable();
>@@ -2883,6 +2885,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
> 	schedule_timeout_idle(2);
> 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
>	*statusp = RCU_KTHREAD_WAITING;
>+	WRITE_ONCE(*j, jiffies);
> }
> 
> static struct smp_hotplug_thread rcu_cpu_thread_spec = { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a87f31b1701db..926673ebe355f 100644
>--- a/kernel/rcu/tree.h
>+++ b/kernel/rcu/tree.h
>@@ -241,6 +241,7 @@ struct rcu_data {
> 					/* rcuc per-CPU kthread or NULL. */
> 	unsigned int rcu_cpu_kthread_status;
> 	char rcu_cpu_has_work;
>+	unsigned long rcuc_activity;
> 
> 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
> 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
>diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4781aa338c7c1..6082dd23408f8 100644
>--- a/kernel/rcu/tree_plugin.h
>+++ b/kernel/rcu/tree_plugin.h
>@@ -994,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  */
> static void rcu_cpu_kthread_setup(unsigned int cpu)  {
>+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> #ifdef CONFIG_RCU_BOOST
> 	struct sched_param sp;
> 
> 	sp.sched_priority = kthread_prio;
> 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  #endif /* #ifdef CONFIG_RCU_BOOST */
>+
>+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
> }
 >
> #ifdef CONFIG_RCU_BOOST
>diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c9030b..0c5d8516516af 100644
>--- a/kernel/rcu/tree_stall.h
>+++ b/kernel/rcu/tree_stall.h
>@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
> 	return j > 2 * HZ;
> }
> 
>+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned 
>+long *jp) {
>+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
>+
>+	if (jp)
>+		*jp = j;
>+	return j > 2 * HZ;
>+}
>+
> /*
>  * Print out diagnostic information for the specified stalled CPU.
>  *
>@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
> 	       falsepositive ? " (false positive?)" : "");  }
> 
>+static void rcuc_kthread_dump(struct rcu_data *rdp) {
>+	int cpu;
>+	unsigned long j;
>+	struct task_struct *rcuc;
>+
>+	rcuc = rdp->rcu_cpu_kthread_task;
>+	if (!rcuc)
>+		return;
>+
>+	cpu = task_cpu(rcuc);
>+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
>+		return;
>+
>+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>+		return;
>+
>+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>+	sched_show_task(rcuc);
>+	if (!trigger_single_cpu_backtrace(cpu))
>+		dump_cpu_task(cpu);
>+}
>+
> /* Complain about starvation of grace-period kthread.  */  static void rcu_check_gp_kthread_starvation(void)
> {
>@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
>	rcu_check_gp_kthread_expired_fqs_timer();
> 	rcu_check_gp_kthread_starvation();
> 
>+	if (!use_softirq)
>+		rcuc_kthread_dump(rdp);
>+
> 	rcu_dump_cpu_stacks();
> 
> 	raw_spin_lock_irqsave_rcu_node(rnp, flags);

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-01-26  2:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-25  2:47 [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings Zqiang
2022-01-25  7:04 ` Ammar Faizi
2022-01-25 19:15   ` Paul E. McKenney
2022-01-26  2:34     ` Zhang, Qiang1

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).