linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Zhang, Qiang1" <qiang1.zhang@intel.com>
To: "paulmck@kernel.org" <paulmck@kernel.org>,
	Ammar Faizi <ammarfaizi2@gnuweeb.org>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: RE: [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings
Date: Wed, 26 Jan 2022 02:34:31 +0000	[thread overview]
Message-ID: <PH0PR11MB58801E3E64CA745CA99135ADDA209@PH0PR11MB5880.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20220125191551.GR4285@paulmck-ThinkPad-P17-Gen-1>


On Tue, Jan 25, 2022 at 02:04:04PM +0700, Ammar Faizi wrote:
> On 1/25/22 9:47 AM, Zqiang wrote:
> > When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing be 
> > moved to per-CPU rcuc kthreads, if the rcuc kthreads is being 
> > starved, quiescent state can not report in time. the RCU stall may 
> > be triggered. this commit adds a stack trace of this CPU and dump 
> > rcuc kthreads stack to help analyze what prevents rcuc kthreads from 
> > running.
> > 
> > Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > ---
> 
> For 
> https://lore.kernel.org/lkml/20220125024744.4186726-1-qiang1.zhang@int
> el.com/T/
> 
> ```
> 
> Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
> 
> ```

>I queued this wordsmithed as shown below.  Please check to see if I messed anything up.  And thank you both!
>
>							Thanx, Paul
>

Thanks Paul,  this description is clearer.


>------------------------------------------------------------------------
>
>commit 72bdc0a6f8cfb7cc56238dc850b7fe236e7371cb
>Author: Zqiang <qiang1.zhang@intel.com>
>Date:   Tue Jan 25 10:47:44 2022 +0800
>
>    rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings
>    
>    When the rcutree.use_softirq kernel boot parameter is set to zero, all
>    RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
>    If these kthreads are being starved, quiescent states will not be
>    reported, which in turn means that the grace period will not end, which
>    can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
>    stack traces of stalled CPUs' rcuc kthreads, which can help identify
>    what is preventing those kthreads from running.
>    
>    Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
>    Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
>    Signed-off-by: Zqiang <qiang1.zhang@intel.com>
>    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

>diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f071c49b37118..e6ad532cffe78 100644
>--- a/kernel/rcu/tree.c
>+++ b/kernel/rcu/tree.c
>@@ -2859,10 +2859,12 @@ static void rcu_cpu_kthread(unsigned int cpu)  {
> 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
>+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
> 	int spincnt;
> 
> 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
> 	for (spincnt = 0; spincnt < 10; spincnt++) {
>+		WRITE_ONCE(*j, jiffies);
> 		local_bh_disable();
> 		*statusp = RCU_KTHREAD_RUNNING;
> 		local_irq_disable();
>@@ -2883,6 +2885,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
> 	schedule_timeout_idle(2);
> 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
>	*statusp = RCU_KTHREAD_WAITING;
>+	WRITE_ONCE(*j, jiffies);
> }
> 
> static struct smp_hotplug_thread rcu_cpu_thread_spec = { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a87f31b1701db..926673ebe355f 100644
>--- a/kernel/rcu/tree.h
>+++ b/kernel/rcu/tree.h
>@@ -241,6 +241,7 @@ struct rcu_data {
> 					/* rcuc per-CPU kthread or NULL. */
> 	unsigned int rcu_cpu_kthread_status;
> 	char rcu_cpu_has_work;
>+	unsigned long rcuc_activity;
> 
> 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
> 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
>diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4781aa338c7c1..6082dd23408f8 100644
>--- a/kernel/rcu/tree_plugin.h
>+++ b/kernel/rcu/tree_plugin.h
>@@ -994,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  */
> static void rcu_cpu_kthread_setup(unsigned int cpu)  {
>+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> #ifdef CONFIG_RCU_BOOST
> 	struct sched_param sp;
> 
> 	sp.sched_priority = kthread_prio;
> 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  #endif /* #ifdef CONFIG_RCU_BOOST */
>+
>+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
> }
 >
> #ifdef CONFIG_RCU_BOOST
>diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c9030b..0c5d8516516af 100644
>--- a/kernel/rcu/tree_stall.h
>+++ b/kernel/rcu/tree_stall.h
>@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
> 	return j > 2 * HZ;
> }
> 
>+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned 
>+long *jp) {
>+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
>+
>+	if (jp)
>+		*jp = j;
>+	return j > 2 * HZ;
>+}
>+
> /*
>  * Print out diagnostic information for the specified stalled CPU.
>  *
>@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
> 	       falsepositive ? " (false positive?)" : "");  }
> 
>+static void rcuc_kthread_dump(struct rcu_data *rdp) {
>+	int cpu;
>+	unsigned long j;
>+	struct task_struct *rcuc;
>+
>+	rcuc = rdp->rcu_cpu_kthread_task;
>+	if (!rcuc)
>+		return;
>+
>+	cpu = task_cpu(rcuc);
>+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
>+		return;
>+
>+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>+		return;
>+
>+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>+	sched_show_task(rcuc);
>+	if (!trigger_single_cpu_backtrace(cpu))
>+		dump_cpu_task(cpu);
>+}
>+
> /* Complain about starvation of grace-period kthread.  */  static void rcu_check_gp_kthread_starvation(void)
> {
>@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
>	rcu_check_gp_kthread_expired_fqs_timer();
> 	rcu_check_gp_kthread_starvation();
> 
>+	if (!use_softirq)
>+		rcuc_kthread_dump(rdp);
>+
> 	rcu_dump_cpu_stacks();
> 
> 	raw_spin_lock_irqsave_rcu_node(rnp, flags);

      reply	other threads:[~2022-01-26  2:35 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-25  2:47 [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings Zqiang
2022-01-25  7:04 ` Ammar Faizi
2022-01-25 19:15   ` Paul E. McKenney
2022-01-26  2:34     ` Zhang, Qiang1 [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=PH0PR11MB58801E3E64CA745CA99135ADDA209@PH0PR11MB5880.namprd11.prod.outlook.com \
    --to=qiang1.zhang@intel.com \
    --cc=ammarfaizi2@gnuweeb.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).