linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
@ 2020-01-25 10:42 tip-bot2 for Paul E. McKenney
  2020-01-25 13:14 ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: tip-bot2 for Paul E. McKenney @ 2020-01-25 10:42 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Paul E. McKenney, x86, LKML

The following commit has been merged into the core/rcu branch of tip:

Commit-ID:     df1e849ae4559544ff00ff5052eefe2479750539
Gitweb:        https://git.kernel.org/tip/df1e849ae4559544ff00ff5052eefe2479750539
Author:        Paul E. McKenney <paulmck@kernel.org>
AuthorDate:    Wed, 27 Nov 2019 16:36:45 -08:00
Committer:     Paul E. McKenney <paulmck@kernel.org>
CommitterDate: Mon, 09 Dec 2019 12:32:59 -08:00

rcu: Enable tick for nohz_full CPUs slow to provide expedited QS

An expedited grace period can be stalled by a nohz_full CPU looping
in kernel context.  This possibility is currently handled by some
carefully crafted checks in rcu_read_unlock_special() that enlist help
from ksoftirqd when permitted by the scheduler.  However, it is exactly
these checks that require the scheduler avoid holding any of its rq or
pi locks across rcu_read_unlock() without also having held them across
the entire RCU read-side critical section.

It would therefore be very nice if expedited grace periods could
handle nohz_full CPUs looping in kernel context without such checks.
This commit therefore adds code to the expedited grace period's wait
and cleanup code that forces the scheduler-clock interrupt on for CPUs
that fail to quickly supply a quiescent state.  "Quickly" is currently
a hard-coded single-jiffy delay.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/tick.h  |  5 +++-
 kernel/rcu/tree.h     |  1 +-
 kernel/rcu/tree_exp.h | 52 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7896f79..7340613 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -109,8 +109,10 @@ enum tick_dep_bits {
 	TICK_DEP_BIT_PERF_EVENTS	= 1,
 	TICK_DEP_BIT_SCHED		= 2,
 	TICK_DEP_BIT_CLOCK_UNSTABLE	= 3,
-	TICK_DEP_BIT_RCU		= 4
+	TICK_DEP_BIT_RCU		= 4,
+	TICK_DEP_BIT_RCU_EXP		= 5
 };
+#define TICK_DEP_BIT_MAX TICK_DEP_BIT_RCU_EXP
 
 #define TICK_DEP_MASK_NONE		0
 #define TICK_DEP_MASK_POSIX_TIMER	(1 << TICK_DEP_BIT_POSIX_TIMER)
@@ -118,6 +120,7 @@ enum tick_dep_bits {
 #define TICK_DEP_MASK_SCHED		(1 << TICK_DEP_BIT_SCHED)
 #define TICK_DEP_MASK_CLOCK_UNSTABLE	(1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
 #define TICK_DEP_MASK_RCU		(1 << TICK_DEP_BIT_RCU)
+#define TICK_DEP_MASK_RCU_EXP		(1 << TICK_DEP_BIT_RCU_EXP)
 
 #ifdef CONFIG_NO_HZ_COMMON
 extern bool tick_nohz_enabled;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 055c317..f9253ed 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -182,6 +182,7 @@ struct rcu_data {
 	bool rcu_need_heavy_qs;		/* GP old, so heavy quiescent state! */
 	bool rcu_urgent_qs;		/* GP old need light quiescent state. */
 	bool rcu_forced_tick;		/* Forced tick to provide QS. */
+	bool rcu_forced_tick_exp;	/*   ... provide QS to expedited GP. */
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	bool all_lazy;			/* All CPU's CBs lazy at idle start? */
 	unsigned long last_accelerate;	/* Last jiffy CBs were accelerated. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 081a179..30b2a02 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -230,7 +230,9 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
 static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
 				    unsigned long mask, bool wake)
 {
+	int cpu;
 	unsigned long flags;
+	struct rcu_data *rdp;
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	if (!(rnp->expmask & mask)) {
@@ -238,6 +240,13 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
 		return;
 	}
 	WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
+	for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
+		rdp = per_cpu_ptr(&rcu_data, cpu);
+		if (!IS_ENABLED(CONFIG_NO_HZ_FULL) || !rdp->rcu_forced_tick_exp)
+			continue;
+		rdp->rcu_forced_tick_exp = false;
+		tick_dep_clear_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+	}
 	__rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
 }
 
@@ -450,6 +459,26 @@ static void sync_rcu_exp_select_cpus(void)
 }
 
 /*
+ * Wait for the expedited grace period to elapse, within time limit.
+ * If the time limit is exceeded without the grace period elapsing,
+ * return false, otherwise return true.
+ */
+static bool synchronize_rcu_expedited_wait_once(long tlimit)
+{
+	int t;
+	struct rcu_node *rnp_root = rcu_get_root();
+
+	t = swait_event_timeout_exclusive(rcu_state.expedited_wq,
+					  sync_rcu_exp_done_unlocked(rnp_root),
+					  tlimit);
+	// Workqueues should not be signaled.
+	if (t > 0 || sync_rcu_exp_done_unlocked(rnp_root))
+		return true;
+	WARN_ON(t < 0);  /* workqueues should not be signaled. */
+	return false;
+}
+
+/*
  * Wait for the expedited grace period to elapse, issuing any needed
  * RCU CPU stall warnings along the way.
  */
@@ -460,22 +489,31 @@ static void synchronize_rcu_expedited_wait(void)
 	unsigned long jiffies_start;
 	unsigned long mask;
 	int ndetected;
+	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	struct rcu_node *rnp_root = rcu_get_root();
-	int ret;
 
 	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
 	jiffies_stall = rcu_jiffies_till_stall_check();
 	jiffies_start = jiffies;
+	if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+		if (synchronize_rcu_expedited_wait_once(1))
+			return;
+		rcu_for_each_leaf_node(rnp) {
+			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+				rdp = per_cpu_ptr(&rcu_data, cpu);
+				if (rdp->rcu_forced_tick_exp)
+					continue;
+				rdp->rcu_forced_tick_exp = true;
+				tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+			}
+		}
+		WARN_ON_ONCE(1);
+	}
 
 	for (;;) {
-		ret = swait_event_timeout_exclusive(
-				rcu_state.expedited_wq,
-				sync_rcu_exp_done_unlocked(rnp_root),
-				jiffies_stall);
-		if (ret > 0 || sync_rcu_exp_done_unlocked(rnp_root))
+		if (synchronize_rcu_expedited_wait_once(jiffies_stall))
 			return;
-		WARN_ON(ret < 0);  /* workqueues should not be signaled. */
 		if (rcu_cpu_stall_suppress)
 			continue;
 		panic_on_rcu_stall();

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 10:42 [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS tip-bot2 for Paul E. McKenney
@ 2020-01-25 13:14 ` Borislav Petkov
  2020-01-25 16:10   ` Paul E. McKenney
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2020-01-25 13:14 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, linux-tip-commits, x86

Hi Paul,

On Sat, Jan 25, 2020 at 10:42:56AM -0000, tip-bot2 for Paul E. McKenney wrote:
> The following commit has been merged into the core/rcu branch of tip:
> 
> Commit-ID:     df1e849ae4559544ff00ff5052eefe2479750539
> Gitweb:        https://git.kernel.org/tip/df1e849ae4559544ff00ff5052eefe2479750539
> Author:        Paul E. McKenney <paulmck@kernel.org>
> AuthorDate:    Wed, 27 Nov 2019 16:36:45 -08:00
> Committer:     Paul E. McKenney <paulmck@kernel.org>
> CommitterDate: Mon, 09 Dec 2019 12:32:59 -08:00
> 
> rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
> 
> An expedited grace period can be stalled by a nohz_full CPU looping
> in kernel context.  This possibility is currently handled by some
> carefully crafted checks in rcu_read_unlock_special() that enlist help
> from ksoftirqd when permitted by the scheduler.  However, it is exactly
> these checks that require the scheduler avoid holding any of its rq or
> pi locks across rcu_read_unlock() without also having held them across
> the entire RCU read-side critical section.
> 
> It would therefore be very nice if expedited grace periods could
> handle nohz_full CPUs looping in kernel context without such checks.
> This commit therefore adds code to the expedited grace period's wait
> and cleanup code that forces the scheduler-clock interrupt on for CPUs
> that fail to quickly supply a quiescent state.  "Quickly" is currently
> a hard-coded single-jiffy delay.
> 
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> ---
>  include/linux/tick.h  |  5 +++-
>  kernel/rcu/tree.h     |  1 +-
>  kernel/rcu/tree_exp.h | 52 ++++++++++++++++++++++++++++++++++++------
>  3 files changed, 50 insertions(+), 8 deletions(-)

...

> @@ -460,22 +489,31 @@ static void synchronize_rcu_expedited_wait(void)
>  	unsigned long jiffies_start;
>  	unsigned long mask;
>  	int ndetected;
> +	struct rcu_data *rdp;
>  	struct rcu_node *rnp;
>  	struct rcu_node *rnp_root = rcu_get_root();
> -	int ret;
>  
>  	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
>  	jiffies_stall = rcu_jiffies_till_stall_check();
>  	jiffies_start = jiffies;
> +	if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
> +		if (synchronize_rcu_expedited_wait_once(1))
> +			return;
> +		rcu_for_each_leaf_node(rnp) {
> +			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
> +				rdp = per_cpu_ptr(&rcu_data, cpu);
> +				if (rdp->rcu_forced_tick_exp)
> +					continue;
> +				rdp->rcu_forced_tick_exp = true;
> +				tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
> +			}
> +		}
> +		WARN_ON_ONCE(1);

I'm hitting this on a big AMD box, CONFIG_NO_HZ_FULL=y.

Kernel is Linus master + tip/master from today.

...
[   23.094781] BTRFS info (device sdb2): disk space caching is enabled
[   23.094784] BTRFS info (device sdb2): has skinny extents
[   23.139134] BTRFS info (device sdb2): enabling ssd optimizations
[   23.395434] ------------[ cut here ]------------
[   23.402616] WARNING: CPU: 6 PID: 1921 at kernel/rcu/tree_exp.h:511 rcu_exp_wait_wake+0x11f/0x740
[   23.402616] Modules linked in: btrfs(E) libcrc32c(E) xor(E) hid_generic(E) usbhid(E) raid6_pq(E) sd_mod(E) crc32c_intel(E) ast(E) i2c_algo_bit(E) drm_vram_helper(E) drm_ttm_helper(E) ttm(E) ahci(E) drm_kms_helper(E) syscopyarea(E) libahci(E) sysfillrect(E) xhci_pci(E) sysimgblt(E) fb_sys_fops(E) xhci_hcd(E) libata(E) drm(E) usbcore(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) efivarfs(E)
[   23.402630] CPU: 6 PID: 1921 Comm: kworker/6:2 Tainted: G            E     5.5.0-rc7+ #1
[   23.402632] Workqueue: rcu_gp wait_rcu_exp_gp
[   23.402634] RIP: 0010:rcu_exp_wait_wake+0x11f/0x740
[   23.402636] Code: 3b 45 6c 7e be 48 63 05 8b 30 50 01 49 81 c5 40 02 00 00 48 8d 04 c0 48 c1 e0 06 48 05 80 b2 67 b5 49 39 c5 0f 82 72 ff ff ff <0f> 0b 49 c7 c7 00 fd 02 00 bb 01 00 00 00 4c 89 e7 e8 ab e7 ff ff
[   23.402637] RSP: 0018:ffffbde38a8f3e50 EFLAGS: 00010246
[   23.402637] RAX: ffffffffb567d8c0 RBX: ffffffffb567d6c0 RCX: 0000000000000040
[   23.402638] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb567d6c0
[   23.402638] RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000000001
[   23.402639] R10: ffffbde3802cfdc0 R11: 0000000000000271 R12: 0000000000003a98
[   23.402640] R13: ffffffffb567d8c0 R14: 00000000000000f0 R15: 000000000002fd00
[   23.402640] FS:  0000000000000000(0000) GS:ffffa016cd380000(0000) knlGS:0000000000000000
[   23.402641] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   23.402641] CR2: 00007fc9957c5180 CR3: 0000800bb3e0a000 CR4: 0000000000340ee0
[   23.402642] Call Trace:
[   23.402647]  ? sync_rcu_exp_select_cpus+0x219/0x3e0
[   23.402650]  process_one_work+0x20b/0x400
[   23.402652]  worker_thread+0x2d/0x3f0
[   23.402653]  ? process_one_work+0x400/0x400
[   23.402655]  kthread+0x10d/0x130
[   23.402656]  ? kthread_park+0x90/0x90
[   23.402660]  ret_from_fork+0x27/0x50
[   23.402663] ---[ end trace 85fb288edc35e984 ]---
[   24.322452] BTRFS info (device sdb2): disk space caching is enabled

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 13:14 ` Borislav Petkov
@ 2020-01-25 16:10   ` Paul E. McKenney
  2020-01-25 17:54     ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: Paul E. McKenney @ 2020-01-25 16:10 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 02:14:26PM +0100, Borislav Petkov wrote:
> Hi Paul,
> 
> On Sat, Jan 25, 2020 at 10:42:56AM -0000, tip-bot2 for Paul E. McKenney wrote:
> > The following commit has been merged into the core/rcu branch of tip:
> > 
> > Commit-ID:     df1e849ae4559544ff00ff5052eefe2479750539
> > Gitweb:        https://git.kernel.org/tip/df1e849ae4559544ff00ff5052eefe2479750539
> > Author:        Paul E. McKenney <paulmck@kernel.org>
> > AuthorDate:    Wed, 27 Nov 2019 16:36:45 -08:00
> > Committer:     Paul E. McKenney <paulmck@kernel.org>
> > CommitterDate: Mon, 09 Dec 2019 12:32:59 -08:00
> > 
> > rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
> > 
> > An expedited grace period can be stalled by a nohz_full CPU looping
> > in kernel context.  This possibility is currently handled by some
> > carefully crafted checks in rcu_read_unlock_special() that enlist help
> > from ksoftirqd when permitted by the scheduler.  However, it is exactly
> > these checks that require the scheduler avoid holding any of its rq or
> > pi locks across rcu_read_unlock() without also having held them across
> > the entire RCU read-side critical section.
> > 
> > It would therefore be very nice if expedited grace periods could
> > handle nohz_full CPUs looping in kernel context without such checks.
> > This commit therefore adds code to the expedited grace period's wait
> > and cleanup code that forces the scheduler-clock interrupt on for CPUs
> > that fail to quickly supply a quiescent state.  "Quickly" is currently
> > a hard-coded single-jiffy delay.
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > ---
> >  include/linux/tick.h  |  5 +++-
> >  kernel/rcu/tree.h     |  1 +-
> >  kernel/rcu/tree_exp.h | 52 ++++++++++++++++++++++++++++++++++++------
> >  3 files changed, 50 insertions(+), 8 deletions(-)
> 
> ...
> 
> > @@ -460,22 +489,31 @@ static void synchronize_rcu_expedited_wait(void)
> >  	unsigned long jiffies_start;
> >  	unsigned long mask;
> >  	int ndetected;
> > +	struct rcu_data *rdp;
> >  	struct rcu_node *rnp;
> >  	struct rcu_node *rnp_root = rcu_get_root();
> > -	int ret;
> >  
> >  	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
> >  	jiffies_stall = rcu_jiffies_till_stall_check();
> >  	jiffies_start = jiffies;
> > +	if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
> > +		if (synchronize_rcu_expedited_wait_once(1))
> > +			return;
> > +		rcu_for_each_leaf_node(rnp) {
> > +			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
> > +				rdp = per_cpu_ptr(&rcu_data, cpu);
> > +				if (rdp->rcu_forced_tick_exp)
> > +					continue;
> > +				rdp->rcu_forced_tick_exp = true;
> > +				tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
> > +			}
> > +		}
> > +		WARN_ON_ONCE(1);
> 
> I'm hitting this on a big AMD box, CONFIG_NO_HZ_FULL=y.

How big?  (Seriously, given that the fix may depend on the number of CPUs.)

So the problem appears to be that some of the boot-time processing
is looping in the kernel, which is preventing the grace period from
completing.  One could argue that such code should be fixed, but on the
other hand, boot time is a bit special.  Later in -rcu's dev branch,
there are commits that forgive this boot-time misbehavior, but this is
a bit late in process to dump all of those commits into -tip.

The RT guys might need the warning, and it was them that I was thinking
of when adding it.  But let's see what works for mainline first.  And
since your box was booting fine without the warning before, I bet that
it boots just fine with that warning removed.

So could you please try out the (untested) patch below?

If that works, I will re-introduce the warning with proper protection
for the merge window following this coming one.

							Thanx, Paul

> Kernel is Linus master + tip/master from today.
> ...
> [   23.094781] BTRFS info (device sdb2): disk space caching is enabled
> [   23.094784] BTRFS info (device sdb2): has skinny extents
> [   23.139134] BTRFS info (device sdb2): enabling ssd optimizations
> [   23.395434] ------------[ cut here ]------------
> [   23.402616] WARNING: CPU: 6 PID: 1921 at kernel/rcu/tree_exp.h:511 rcu_exp_wait_wake+0x11f/0x740
> [   23.402616] Modules linked in: btrfs(E) libcrc32c(E) xor(E) hid_generic(E) usbhid(E) raid6_pq(E) sd_mod(E) crc32c_intel(E) ast(E) i2c_algo_bit(E) drm_vram_helper(E) drm_ttm_helper(E) ttm(E) ahci(E) drm_kms_helper(E) syscopyarea(E) libahci(E) sysfillrect(E) xhci_pci(E) sysimgblt(E) fb_sys_fops(E) xhci_hcd(E) libata(E) drm(E) usbcore(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) efivarfs(E)
> [   23.402630] CPU: 6 PID: 1921 Comm: kworker/6:2 Tainted: G            E     5.5.0-rc7+ #1
> [   23.402632] Workqueue: rcu_gp wait_rcu_exp_gp
> [   23.402634] RIP: 0010:rcu_exp_wait_wake+0x11f/0x740
> [   23.402636] Code: 3b 45 6c 7e be 48 63 05 8b 30 50 01 49 81 c5 40 02 00 00 48 8d 04 c0 48 c1 e0 06 48 05 80 b2 67 b5 49 39 c5 0f 82 72 ff ff ff <0f> 0b 49 c7 c7 00 fd 02 00 bb 01 00 00 00 4c 89 e7 e8 ab e7 ff ff
> [   23.402637] RSP: 0018:ffffbde38a8f3e50 EFLAGS: 00010246
> [   23.402637] RAX: ffffffffb567d8c0 RBX: ffffffffb567d6c0 RCX: 0000000000000040
> [   23.402638] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb567d6c0
> [   23.402638] RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000000001
> [   23.402639] R10: ffffbde3802cfdc0 R11: 0000000000000271 R12: 0000000000003a98
> [   23.402640] R13: ffffffffb567d8c0 R14: 00000000000000f0 R15: 000000000002fd00
> [   23.402640] FS:  0000000000000000(0000) GS:ffffa016cd380000(0000) knlGS:0000000000000000
> [   23.402641] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   23.402641] CR2: 00007fc9957c5180 CR3: 0000800bb3e0a000 CR4: 0000000000340ee0
> [   23.402642] Call Trace:
> [   23.402647]  ? sync_rcu_exp_select_cpus+0x219/0x3e0
> [   23.402650]  process_one_work+0x20b/0x400
> [   23.402652]  worker_thread+0x2d/0x3f0
> [   23.402653]  ? process_one_work+0x400/0x400
> [   23.402655]  kthread+0x10d/0x130
> [   23.402656]  ? kthread_park+0x90/0x90
> [   23.402660]  ret_from_fork+0x27/0x50
> [   23.402663] ---[ end trace 85fb288edc35e984 ]---
> [   24.322452] BTRFS info (device sdb2): disk space caching is enabled
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6935a9e..dcbd757 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -508,7 +508,6 @@ static void synchronize_rcu_expedited_wait(void)
 				tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
 			}
 		}
-		WARN_ON_ONCE(1);
 	}
 
 	for (;;) {

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 16:10   ` Paul E. McKenney
@ 2020-01-25 17:54     ` Borislav Petkov
  2020-01-25 19:48       ` Paul E. McKenney
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2020-01-25 17:54 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 08:10:50AM -0800, Paul E. McKenney wrote:
> How big?  (Seriously, given that the fix may depend on the number of CPUs.)

[    7.660017] smp: Brought up 2 nodes, 256 CPUs

> So the problem appears to be that some of the boot-time processing
> is looping in the kernel, which is preventing the grace period from
> completing.  One could argue that such code should be fixed, but on the
> other hand, boot time is a bit special.  Later in -rcu's dev branch,
> there are commits that forgive this boot-time misbehavior, but this is
> a bit late in process to dump all of those commits into -tip.

Aha.

> The RT guys might need the warning, and it was them that I was thinking
> of when adding it. 

But "boot time is a bit special". Or do they care about deadlines during
boot too?

> But let's see what works for mainline first.  And
> since your box was booting fine without the warning before, I bet that
> it boots just fine with that warning removed.

Yes, it does.

> So could you please try out the (untested) patch below?

Warning's gone.

> If that works, I will re-introduce the warning with proper protection
> for the merge window following this coming one.

My big box is at your service if you need stuff tested later.

Thx Paul.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 17:54     ` Borislav Petkov
@ 2020-01-25 19:48       ` Paul E. McKenney
  2020-01-25 20:08         ` Paul E. McKenney
                           ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Paul E. McKenney @ 2020-01-25 19:48 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 06:54:42PM +0100, Borislav Petkov wrote:
> On Sat, Jan 25, 2020 at 08:10:50AM -0800, Paul E. McKenney wrote:
> > How big?  (Seriously, given that the fix may depend on the number of CPUs.)
> 
> [    7.660017] smp: Brought up 2 nodes, 256 CPUs
> 
> > So the problem appears to be that some of the boot-time processing
> > is looping in the kernel, which is preventing the grace period from
> > completing.  One could argue that such code should be fixed, but on the
> > other hand, boot time is a bit special.  Later in -rcu's dev branch,
> > there are commits that forgive this boot-time misbehavior, but this is
> > a bit late in process to dump all of those commits into -tip.
> 
> Aha.
> 
> > The RT guys might need the warning, and it was them that I was thinking
> > of when adding it. 
> 
> But "boot time is a bit special". Or do they care about deadlines during
> boot too?

Maybe, but not that I know of.  If they do, this would be an excellent
time for them to let me know!

My guess is "no" because the real-time application would not yet be
running during boot.  On the other hand, if this issue is due not so much
to boot, but to (say) expensive filesystem operations on large systems,
that might be a different story.

Except that I would have hard questions to ask of someone doing expensive
filesystem operations while their deep-sub-millisecond real-time
application was running.  So even then, I doubt that they would care.

Again, if I am wrong about this, this would be an excellent time for
them to let me know.

> > But let's see what works for mainline first.  And
> > since your box was booting fine without the warning before, I bet that
> > it boots just fine with that warning removed.
> 
> Yes, it does.

Woo-hoo!!!

> > So could you please try out the (untested) patch below?
> 
> Warning's gone.

Very good.  I will get it property prepared and tested, then send it
along to Ingo.

> > If that works, I will re-introduce the warning with proper protection
> > for the merge window following this coming one.
> 
> My big box is at your service if you need stuff tested later.

Thank you in advance!  I just might take you up on that!

In the meantime, one question...  Are you testing for realtime suitability
on your big box?  If so, to what extent?

> Thx Paul.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

Aside from habitually failing to trim emails, which of these was I
violating?  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 19:48       ` Paul E. McKenney
@ 2020-01-25 20:08         ` Paul E. McKenney
  2020-01-25 20:23           ` Borislav Petkov
  2020-01-25 20:19         ` Borislav Petkov
  2020-01-26  1:43         ` Paul E. McKenney
  2 siblings, 1 reply; 12+ messages in thread
From: Paul E. McKenney @ 2020-01-25 20:08 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 11:48:46AM -0800, Paul E. McKenney wrote:
> On Sat, Jan 25, 2020 at 06:54:42PM +0100, Borislav Petkov wrote:
> > On Sat, Jan 25, 2020 at 08:10:50AM -0800, Paul E. McKenney wrote:

[ . . . ]

> > > If that works, I will re-introduce the warning with proper protection
> > > for the merge window following this coming one.
> > 
> > My big box is at your service if you need stuff tested later.
> 
> Thank you in advance!  I just might take you up on that!

And I do have an alleged fix on branch dev of the -rcu tree:

	git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git

I am just now starting testing, so the probability of failure is
decidedly non-zero.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 19:48       ` Paul E. McKenney
  2020-01-25 20:08         ` Paul E. McKenney
@ 2020-01-25 20:19         ` Borislav Petkov
  2020-01-26  1:43         ` Paul E. McKenney
  2 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2020-01-25 20:19 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 11:48:46AM -0800, Paul E. McKenney wrote:
> My guess is "no" because the real-time application would not yet be
> running during boot.  On the other hand, if this issue is due not so much
> to boot, but to (say) expensive filesystem operations on large systems,
> that might be a different story.

Possible - that warn happened around a btrfs init-something.

> Except that I would have hard questions to ask of someone doing expensive
> filesystem operations while their deep-sub-millisecond real-time
> application was running.  So even then, I doubt that they would care.
> 
> Again, if I am wrong about this, this would be an excellent time for
> them to let me know.

I can see the hint there. :-)

> In the meantime, one question...  Are you testing for realtime suitability
> on your big box?  If so, to what extent?

Nah, just boot-testing tip/master before the merge window opens. Got a
couple of boxes on which I throw tip/master on from time to time and see
what breaks. I have caught a number of issues in past years, so it is a
useful exercise.

> Aside from habitually failing to trim emails, which of these was I
> violating?  ;-)

That's my mail signature. In the hope that people see that doc and stop
doing the same annoying things on LKML, it gets pasted in every mail of
mine. I didn't mean you or your mail.

:-)

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 20:08         ` Paul E. McKenney
@ 2020-01-25 20:23           ` Borislav Petkov
  0 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2020-01-25 20:23 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 12:08:54PM -0800, Paul E. McKenney wrote:
> And I do have an alleged fix on branch dev of the -rcu tree:
> 
> 	git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
> 
> I am just now starting testing, so the probability of failure is
> decidedly non-zero.  ;-)

Lemme run it here too.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-25 19:48       ` Paul E. McKenney
  2020-01-25 20:08         ` Paul E. McKenney
  2020-01-25 20:19         ` Borislav Petkov
@ 2020-01-26  1:43         ` Paul E. McKenney
  2020-01-26 11:25           ` Borislav Petkov
  2 siblings, 1 reply; 12+ messages in thread
From: Paul E. McKenney @ 2020-01-26  1:43 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 11:48:46AM -0800, Paul E. McKenney wrote:
> On Sat, Jan 25, 2020 at 06:54:42PM +0100, Borislav Petkov wrote:
> > On Sat, Jan 25, 2020 at 08:10:50AM -0800, Paul E. McKenney wrote:

[ . . . ]

> > > So could you please try out the (untested) patch below?
> > 
> > Warning's gone.
> 
> Very good.  I will get it property prepared and tested, then send it
> along to Ingo.

And it passes my rcutorture testing as well!  If it does fine with 0day
and -next, I will send a pull request Sunday evening, Pacific Time.
In the meantime, it is right here in -rcu:

59d8cc6b2e37 ("rcu: Forgive slow expedited grace periods at boot time")

							Thanx, Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-26  1:43         ` Paul E. McKenney
@ 2020-01-26 11:25           ` Borislav Petkov
  2020-01-26 15:28             ` Paul E. McKenney
  0 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2020-01-26 11:25 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, linux-tip-commits, x86

On Sat, Jan 25, 2020 at 05:43:18PM -0800, Paul E. McKenney wrote:
> And it passes my rcutorture testing as well!  If it does fine with 0day
> and -next, I will send a pull request Sunday evening, Pacific Time.
> In the meantime, it is right here in -rcu:
> 
> 59d8cc6b2e37 ("rcu: Forgive slow expedited grace periods at boot time")

Yap, testing looks good here too.

Thx Paul.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-26 11:25           ` Borislav Petkov
@ 2020-01-26 15:28             ` Paul E. McKenney
  2020-01-26 17:19               ` Borislav Petkov
  0 siblings, 1 reply; 12+ messages in thread
From: Paul E. McKenney @ 2020-01-26 15:28 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, linux-tip-commits, x86

On Sun, Jan 26, 2020 at 12:25:40PM +0100, Borislav Petkov wrote:
> On Sat, Jan 25, 2020 at 05:43:18PM -0800, Paul E. McKenney wrote:
> > And it passes my rcutorture testing as well!  If it does fine with 0day
> > and -next, I will send a pull request Sunday evening, Pacific Time.
> > In the meantime, it is right here in -rcu:
> > 
> > 59d8cc6b2e37 ("rcu: Forgive slow expedited grace periods at boot time")
> 
> Yap, testing looks good here too.
> 
> Thx Paul.

And thank you for finding this and for the testing!

May I add your Tested-by?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS
  2020-01-26 15:28             ` Paul E. McKenney
@ 2020-01-26 17:19               ` Borislav Petkov
  0 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2020-01-26 17:19 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, linux-tip-commits, x86

On Sun, Jan 26, 2020 at 07:28:31AM -0800, Paul E. McKenney wrote:
> And thank you for finding this and for the testing!
> 
> May I add your Tested-by?

Sure, thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-01-26 17:20 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-25 10:42 [tip: core/rcu] rcu: Enable tick for nohz_full CPUs slow to provide expedited QS tip-bot2 for Paul E. McKenney
2020-01-25 13:14 ` Borislav Petkov
2020-01-25 16:10   ` Paul E. McKenney
2020-01-25 17:54     ` Borislav Petkov
2020-01-25 19:48       ` Paul E. McKenney
2020-01-25 20:08         ` Paul E. McKenney
2020-01-25 20:23           ` Borislav Petkov
2020-01-25 20:19         ` Borislav Petkov
2020-01-26  1:43         ` Paul E. McKenney
2020-01-26 11:25           ` Borislav Petkov
2020-01-26 15:28             ` Paul E. McKenney
2020-01-26 17:19               ` Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).