linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* RCU vs data_race()
@ 2021-06-18  8:24 Peter Zijlstra
  2021-06-18  8:59 ` Marco Elver
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-06-18  8:24 UTC (permalink / raw)
  To: Paul McKenney, elver; +Cc: linux-kernel

Hi Paul,

Due to a merge conflict I had to look at some recent RCU code, and I saw
you went a little overboard with data_race(). How's something like the
below look to you?

The idea being that we fundamentally don't care about data races for
debug/error condition prints, so marking every single variable access is
just clutter.

---
diff --git a/include/linux/printk.h b/include/linux/printk.h
index f589b8b60806..8f21916c2fe2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -315,7 +315,7 @@ extern int kptr_restrict;
  * generate the format string.
  */
 #define pr_emerg(fmt, ...) \
-	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+	data_race(printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__))
 /**
  * pr_alert - Print an alert-level message
  * @fmt: format string
@@ -325,7 +325,7 @@ extern int kptr_restrict;
  * generate the format string.
  */
 #define pr_alert(fmt, ...) \
-	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+	data_race(printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__))
 /**
  * pr_crit - Print a critical-level message
  * @fmt: format string
@@ -335,7 +335,7 @@ extern int kptr_restrict;
  * generate the format string.
  */
 #define pr_crit(fmt, ...) \
-	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+	data_race(printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__))
 /**
  * pr_err - Print an error-level message
  * @fmt: format string
@@ -345,7 +345,7 @@ extern int kptr_restrict;
  * generate the format string.
  */
 #define pr_err(fmt, ...) \
-	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+	data_race(printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__))
 /**
  * pr_warn - Print a warning-level message
  * @fmt: format string
@@ -355,7 +355,7 @@ extern int kptr_restrict;
  * to generate the format string.
  */
 #define pr_warn(fmt, ...) \
-	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+	data_race(printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__))
 /**
  * pr_notice - Print a notice-level message
  * @fmt: format string
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 6833d8887181..8bb4ec3c7e6e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1353,8 +1353,10 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 		struct srcu_data *sdp;
 
 		sdp = per_cpu_ptr(ssp->sda, cpu);
-		u0 = data_race(sdp->srcu_unlock_count[!idx]);
-		u1 = data_race(sdp->srcu_unlock_count[idx]);
+
+		data_race(
+		u0 = sdp->srcu_unlock_count[!idx];
+		u1 = sdp->srcu_unlock_count[idx];
 
 		/*
 		 * Make sure that a lock is always counted if the corresponding
@@ -1362,14 +1364,15 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 		 */
 		smp_rmb();
 
-		l0 = data_race(sdp->srcu_lock_count[!idx]);
-		l1 = data_race(sdp->srcu_lock_count[idx]);
+		l0 = sdp->srcu_lock_count[!idx];
+		l1 = sdp->srcu_lock_count[idx];
 
 		c0 = l0 - u0;
 		c1 = l1 - u1;
 		pr_cont(" %d(%ld,%ld %c)",
 			cpu, c0, c1,
-			"C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
+			"C."[rcu_segcblist_empty(&sdp->srcu_cblist)]));
+
 		s0 += c0;
 		s1 += c1;
 	}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 1cece5e9be9a..53080c3bede2 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -280,15 +280,15 @@ static void __init rcu_tasks_bootup_oddness(void)
 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 {
+	data_race(
 	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
 		rtp->kname,
-		tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
-		jiffies - data_race(rtp->gp_jiffies),
-		data_race(rtp->n_gps),
-		data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
-		".k"[!!data_race(rtp->kthread_ptr)],
-		".C"[!!data_race(rtp->cbs_head)],
-		s);
+		tasks_gp_state_getname(rtp), rtp->gp_state,
+		jiffies - rtp->gp_jiffies,
+		rtp->n_gps, rtp->n_ipis_fails, rtp->n_ipis,
+		".k"[!!rtp->kthread_ptr],
+		".C"[!!rtp->cbs_head],
+		s));
 }
 #endif // #ifndef CONFIG_TINY_RCU
 
@@ -1291,10 +1291,11 @@ void show_rcu_tasks_trace_gp_kthread(void)
 {
 	char buf[64];
 
+	data_race(
 	sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
-		data_race(n_heavy_reader_ofl_updates),
-		data_race(n_heavy_reader_updates),
-		data_race(n_heavy_reader_attempts));
+		n_heavy_reader_ofl_updates,
+		n_heavy_reader_updates,
+		n_heavy_reader_attempts));
 	show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
 }
 EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 24065f1acb8b..1e392beabbba 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -448,7 +448,7 @@ static void print_cpu_stall_info(int cpu)
 	       rcu_dynticks_snap(rdp) & 0xfff,
 	       rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
 	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
-	       data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
+	       rcu_state.n_force_qs - rcu_state.n_force_qs_gpstart,
 	       fast_no_hz,
 	       falsepositive ? " (false positive?)" : "");
 }
@@ -465,10 +465,10 @@ static void rcu_check_gp_kthread_starvation(void)
 		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
 		       rcu_state.name, j,
 		       (long)rcu_seq_current(&rcu_state.gp_seq),
-		       data_race(READ_ONCE(rcu_state.gp_flags)),
+		       READ_ONCE(rcu_state.gp_flags),
 		       gp_state_getname(rcu_state.gp_state),
-		       data_race(READ_ONCE(rcu_state.gp_state)),
-		       gpk ? data_race(READ_ONCE(gpk->state)) : ~0, cpu);
+		       READ_ONCE(rcu_state.gp_state),
+		       gpk ? READ_ONCE(gpk->state) : ~0, cpu);
 		if (gpk) {
 			pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
 			pr_err("RCU grace-period kthread stack dump:\n");
@@ -509,9 +509,9 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
 		pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n",
 		       rcu_state.name, (jiffies - jiffies_fqs),
 		       (long)rcu_seq_current(&rcu_state.gp_seq),
-		       data_race(rcu_state.gp_flags),
+		       rcu_state.gp_flags,
 		       gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
-		       data_race(READ_ONCE(gpk->state)));
+		       READ_ONCE(gpk->state));
 		pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
 		       cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
 	}
@@ -573,8 +573,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 			gpa = data_race(READ_ONCE(rcu_state.gp_activity));
 			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
 			       rcu_state.name, j - gpa, j, gpa,
-			       data_race(READ_ONCE(jiffies_till_next_fqs)),
-			       data_race(READ_ONCE(rcu_get_root()->qsmask)));
+			       READ_ONCE(jiffies_till_next_fqs),
+			       READ_ONCE(rcu_get_root()->qsmask));
 		}
 	}
 	/* Rewrite if needed in case of slow consoles. */
@@ -815,37 +815,43 @@ void show_rcu_gp_kthreads(void)
 	struct rcu_node *rnp;
 	struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
 
+	kcsan_disable_current();
+
 	j = jiffies;
-	ja = j - data_race(READ_ONCE(rcu_state.gp_activity));
-	jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity));
-	js = j - data_race(READ_ONCE(rcu_state.gp_start));
-	jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time));
+	ja = j - READ_ONCE(rcu_state.gp_activity);
+	jr = j - READ_ONCE(rcu_state.gp_req_activity);
+	js = j - READ_ONCE(rcu_state.gp_start);
+	jw = j - READ_ONCE(rcu_state.gp_wake_time);
 	pr_info("%s: wait state: %s(%d) ->state: %#lx ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n",
 		rcu_state.name, gp_state_getname(rcu_state.gp_state),
-		data_race(READ_ONCE(rcu_state.gp_state)),
-		t ? data_race(READ_ONCE(t->state)) : 0x1ffffL, t ? t->rt_priority : 0xffU,
-		js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)),
-		(long)data_race(READ_ONCE(rcu_state.gp_seq)),
-		(long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)),
-		data_race(READ_ONCE(rcu_state.gp_max)),
-		data_race(READ_ONCE(rcu_state.gp_flags)));
+		READ_ONCE(rcu_state.gp_state),
+		t ? READ_ONCE(t->state) : 0x1ffffL, t ? t->rt_priority : 0xffU,
+		js, ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
+		(long)READ_ONCE(rcu_state.gp_seq),
+		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
+		READ_ONCE(rcu_state.gp_max),
+		READ_ONCE(rcu_state.gp_flags));
+
 	rcu_for_each_node_breadth_first(rnp) {
 		if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) &&
-		    !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) &&
-		    !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks)))
+		    !READ_ONCE(rnp->qsmask) && !READ_ONCE(rnp->boost_tasks) &&
+		    !READ_ONCE(rnp->exp_tasks) && !READ_ONCE(rnp->gp_tasks))
 			continue;
+
 		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n",
 			rnp->grplo, rnp->grphi,
-			(long)data_race(READ_ONCE(rnp->gp_seq)),
-			(long)data_race(READ_ONCE(rnp->gp_seq_needed)),
-			data_race(READ_ONCE(rnp->qsmask)),
-			".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))],
-			".B"[!!data_race(READ_ONCE(rnp->boost_tasks))],
-			".E"[!!data_race(READ_ONCE(rnp->exp_tasks))],
-			".G"[!!data_race(READ_ONCE(rnp->gp_tasks))],
-			data_race(READ_ONCE(rnp->n_boosts)));
+			(long)READ_ONCE(rnp->gp_seq),
+			(long)READ_ONCE(rnp->gp_seq_needed),
+			READ_ONCE(rnp->qsmask),
+			".b"[!!READ_ONCE(rnp->boost_kthread_task)],
+			".B"[!!READ_ONCE(rnp->boost_tasks)],
+			".E"[!!READ_ONCE(rnp->exp_tasks)],
+			".G"[!!READ_ONCE(rnp->gp_tasks)],
+			READ_ONCE(rnp->n_boosts));
+
 		if (!rcu_is_leaf_node(rnp))
 			continue;
+
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
 			rdp = per_cpu_ptr(&rcu_data, cpu);
 			if (READ_ONCE(rdp->gpwrap) ||
@@ -853,17 +859,19 @@ void show_rcu_gp_kthreads(void)
 					 READ_ONCE(rdp->gp_seq_needed)))
 				continue;
 			pr_info("\tcpu %d ->gp_seq_needed %ld\n",
-				cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed)));
+				cpu, (long)READ_ONCE(rdp->gp_seq_needed));
 		}
 	}
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(&rcu_data, cpu);
-		cbs += data_race(READ_ONCE(rdp->n_cbs_invoked));
+		cbs += READ_ONCE(rdp->n_cbs_invoked);
 		if (rcu_segcblist_is_offloaded(&rdp->cblist))
 			show_rcu_nocb_state(rdp);
 	}
 	pr_info("RCU callbacks invoked since boot: %lu\n", cbs);
 	show_rcu_tasks_gp_kthreads();
+
+	kcsan_enable_current();
 }
 EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-18  8:24 RCU vs data_race() Peter Zijlstra
@ 2021-06-18  8:59 ` Marco Elver
  2021-06-18 11:26   ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Marco Elver @ 2021-06-18  8:59 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Paul McKenney, linux-kernel

On Fri, Jun 18, 2021 at 10:24AM +0200, Peter Zijlstra wrote:
> Hi Paul,
> 
> Due to a merge conflict I had to look at some recent RCU code, and I saw
> you went a little overboard with data_race(). How's something like the
> below look to you?

I commented below. The main thing is just using the __no_kcsan function
attribute if it's only about accesses within the function (and not
also about called functions elsewhere).

Using the attribute also improves performance slightly (not that it
matters much in a KCSAN-enabled kernel) due to no instrumentation.

> The idea being that we fundamentally don't care about data races for
> debug/error condition prints, so marking every single variable access is
> just clutter.

Having data_race() around the pr_* helpers seems reasonable, if you
worry about future unnecessary markings that might pop up due to them.

> ---
> diff --git a/include/linux/printk.h b/include/linux/printk.h
> index f589b8b60806..8f21916c2fe2 100644
> --- a/include/linux/printk.h
> +++ b/include/linux/printk.h
> @@ -315,7 +315,7 @@ extern int kptr_restrict;
>   * generate the format string.
>   */
>  #define pr_emerg(fmt, ...) \
> -	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
> +	data_race(printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__))
>  /**
>   * pr_alert - Print an alert-level message
>   * @fmt: format string
> @@ -325,7 +325,7 @@ extern int kptr_restrict;
>   * generate the format string.
>   */
>  #define pr_alert(fmt, ...) \
> -	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
> +	data_race(printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__))
>  /**
>   * pr_crit - Print a critical-level message
>   * @fmt: format string
> @@ -335,7 +335,7 @@ extern int kptr_restrict;
>   * generate the format string.
>   */
>  #define pr_crit(fmt, ...) \
> -	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
> +	data_race(printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__))
>  /**
>   * pr_err - Print an error-level message
>   * @fmt: format string
> @@ -345,7 +345,7 @@ extern int kptr_restrict;
>   * generate the format string.
>   */
>  #define pr_err(fmt, ...) \
> -	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
> +	data_race(printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__))
>  /**
>   * pr_warn - Print a warning-level message
>   * @fmt: format string
> @@ -355,7 +355,7 @@ extern int kptr_restrict;
>   * to generate the format string.
>   */
>  #define pr_warn(fmt, ...) \
> -	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
> +	data_race(printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__))
>  /**
>   * pr_notice - Print a notice-level message
>   * @fmt: format string
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 6833d8887181..8bb4ec3c7e6e 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -1353,8 +1353,10 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)

Looks like this is the whole function, so this could just be:

	void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) __no_kcsan

	[Attribute can be placed in front or at end, but it seems less
	 intrusive if at the end.]

>  		struct srcu_data *sdp;
>  
>  		sdp = per_cpu_ptr(ssp->sda, cpu);
> -		u0 = data_race(sdp->srcu_unlock_count[!idx]);
> -		u1 = data_race(sdp->srcu_unlock_count[idx]);
> +
> +		data_race(

... and we'd no longer require this if using the attribute for the whole
function.

> +		u0 = sdp->srcu_unlock_count[!idx];
> +		u1 = sdp->srcu_unlock_count[idx];
>  
>  		/*
>  		 * Make sure that a lock is always counted if the corresponding
> @@ -1362,14 +1364,15 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
>  		 */
>  		smp_rmb();
>  
> -		l0 = data_race(sdp->srcu_lock_count[!idx]);
> -		l1 = data_race(sdp->srcu_lock_count[idx]);
> +		l0 = sdp->srcu_lock_count[!idx];
> +		l1 = sdp->srcu_lock_count[idx];
>  
>  		c0 = l0 - u0;
>  		c1 = l1 - u1;
>  		pr_cont(" %d(%ld,%ld %c)",
>  			cpu, c0, c1,
> -			"C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
> +			"C."[rcu_segcblist_empty(&sdp->srcu_cblist)]));
> +
>  		s0 += c0;
>  		s1 += c1;
>  	}
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> index 1cece5e9be9a..53080c3bede2 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -280,15 +280,15 @@ static void __init rcu_tasks_bootup_oddness(void)
>  /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
>  static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)

	static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s) __no_kcsan

... and so on below.

>  {
> +	data_race(
>  	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
>  		rtp->kname,
> -		tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
> -		jiffies - data_race(rtp->gp_jiffies),
> -		data_race(rtp->n_gps),
> -		data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
> -		".k"[!!data_race(rtp->kthread_ptr)],
> -		".C"[!!data_race(rtp->cbs_head)],
> -		s);
> +		tasks_gp_state_getname(rtp), rtp->gp_state,
> +		jiffies - rtp->gp_jiffies,
> +		rtp->n_gps, rtp->n_ipis_fails, rtp->n_ipis,
> +		".k"[!!rtp->kthread_ptr],
> +		".C"[!!rtp->cbs_head],
> +		s));
>  }
>  #endif // #ifndef CONFIG_TINY_RCU
>  
> @@ -1291,10 +1291,11 @@ void show_rcu_tasks_trace_gp_kthread(void)
>  {
>  	char buf[64];
>  
> +	data_race(
>  	sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
> -		data_race(n_heavy_reader_ofl_updates),
> -		data_race(n_heavy_reader_updates),
> -		data_race(n_heavy_reader_attempts));
> +		n_heavy_reader_ofl_updates,
> +		n_heavy_reader_updates,
> +		n_heavy_reader_attempts));
>  	show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
>  }
>  EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
> index 24065f1acb8b..1e392beabbba 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -448,7 +448,7 @@ static void print_cpu_stall_info(int cpu)
>  	       rcu_dynticks_snap(rdp) & 0xfff,
>  	       rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
>  	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
> -	       data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
> +	       rcu_state.n_force_qs - rcu_state.n_force_qs_gpstart,
>  	       fast_no_hz,
>  	       falsepositive ? " (false positive?)" : "");
>  }
> @@ -465,10 +465,10 @@ static void rcu_check_gp_kthread_starvation(void)
>  		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
>  		       rcu_state.name, j,
>  		       (long)rcu_seq_current(&rcu_state.gp_seq),
> -		       data_race(READ_ONCE(rcu_state.gp_flags)),
> +		       READ_ONCE(rcu_state.gp_flags),
>  		       gp_state_getname(rcu_state.gp_state),
> -		       data_race(READ_ONCE(rcu_state.gp_state)),
> -		       gpk ? data_race(READ_ONCE(gpk->state)) : ~0, cpu);
> +		       READ_ONCE(rcu_state.gp_state),
> +		       gpk ? READ_ONCE(gpk->state) : ~0, cpu);
>  		if (gpk) {
>  			pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
>  			pr_err("RCU grace-period kthread stack dump:\n");
> @@ -509,9 +509,9 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
>  		pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n",
>  		       rcu_state.name, (jiffies - jiffies_fqs),
>  		       (long)rcu_seq_current(&rcu_state.gp_seq),
> -		       data_race(rcu_state.gp_flags),
> +		       rcu_state.gp_flags,
>  		       gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
> -		       data_race(READ_ONCE(gpk->state)));
> +		       READ_ONCE(gpk->state));
>  		pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
>  		       cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
>  	}
> @@ -573,8 +573,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  			gpa = data_race(READ_ONCE(rcu_state.gp_activity));
>  			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
>  			       rcu_state.name, j - gpa, j, gpa,
> -			       data_race(READ_ONCE(jiffies_till_next_fqs)),
> -			       data_race(READ_ONCE(rcu_get_root()->qsmask)));
> +			       READ_ONCE(jiffies_till_next_fqs),
> +			       READ_ONCE(rcu_get_root()->qsmask));
>  		}
>  	}
>  	/* Rewrite if needed in case of slow consoles. */
> @@ -815,37 +815,43 @@ void show_rcu_gp_kthreads(void)
>  	struct rcu_node *rnp;
>  	struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
>  
> +	kcsan_disable_current();

I guess the main difference is if there are calls that should also
ignore data races, in which case kcsan_disable_current() is required and
__no_kcsan doesn't work.

> +
>  	j = jiffies;
> -	ja = j - data_race(READ_ONCE(rcu_state.gp_activity));
> -	jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity));
> -	js = j - data_race(READ_ONCE(rcu_state.gp_start));
> -	jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time));
> +	ja = j - READ_ONCE(rcu_state.gp_activity);
> +	jr = j - READ_ONCE(rcu_state.gp_req_activity);
> +	js = j - READ_ONCE(rcu_state.gp_start);
> +	jw = j - READ_ONCE(rcu_state.gp_wake_time);
>  	pr_info("%s: wait state: %s(%d) ->state: %#lx ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n",
>  		rcu_state.name, gp_state_getname(rcu_state.gp_state),
> -		data_race(READ_ONCE(rcu_state.gp_state)),
> -		t ? data_race(READ_ONCE(t->state)) : 0x1ffffL, t ? t->rt_priority : 0xffU,
> -		js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)),
> -		(long)data_race(READ_ONCE(rcu_state.gp_seq)),
> -		(long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)),
> -		data_race(READ_ONCE(rcu_state.gp_max)),
> -		data_race(READ_ONCE(rcu_state.gp_flags)));
> +		READ_ONCE(rcu_state.gp_state),
> +		t ? READ_ONCE(t->state) : 0x1ffffL, t ? t->rt_priority : 0xffU,
> +		js, ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
> +		(long)READ_ONCE(rcu_state.gp_seq),
> +		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
> +		READ_ONCE(rcu_state.gp_max),
> +		READ_ONCE(rcu_state.gp_flags));
> +
>  	rcu_for_each_node_breadth_first(rnp) {
>  		if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) &&
> -		    !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) &&
> -		    !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks)))
> +		    !READ_ONCE(rnp->qsmask) && !READ_ONCE(rnp->boost_tasks) &&
> +		    !READ_ONCE(rnp->exp_tasks) && !READ_ONCE(rnp->gp_tasks))
>  			continue;
> +
>  		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n",
>  			rnp->grplo, rnp->grphi,
> -			(long)data_race(READ_ONCE(rnp->gp_seq)),
> -			(long)data_race(READ_ONCE(rnp->gp_seq_needed)),
> -			data_race(READ_ONCE(rnp->qsmask)),
> -			".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))],
> -			".B"[!!data_race(READ_ONCE(rnp->boost_tasks))],
> -			".E"[!!data_race(READ_ONCE(rnp->exp_tasks))],
> -			".G"[!!data_race(READ_ONCE(rnp->gp_tasks))],
> -			data_race(READ_ONCE(rnp->n_boosts)));
> +			(long)READ_ONCE(rnp->gp_seq),
> +			(long)READ_ONCE(rnp->gp_seq_needed),
> +			READ_ONCE(rnp->qsmask),
> +			".b"[!!READ_ONCE(rnp->boost_kthread_task)],
> +			".B"[!!READ_ONCE(rnp->boost_tasks)],
> +			".E"[!!READ_ONCE(rnp->exp_tasks)],
> +			".G"[!!READ_ONCE(rnp->gp_tasks)],
> +			READ_ONCE(rnp->n_boosts));
> +
>  		if (!rcu_is_leaf_node(rnp))
>  			continue;
> +
>  		for_each_leaf_node_possible_cpu(rnp, cpu) {
>  			rdp = per_cpu_ptr(&rcu_data, cpu);
>  			if (READ_ONCE(rdp->gpwrap) ||
> @@ -853,17 +859,19 @@ void show_rcu_gp_kthreads(void)
>  					 READ_ONCE(rdp->gp_seq_needed)))
>  				continue;
>  			pr_info("\tcpu %d ->gp_seq_needed %ld\n",
> -				cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed)));
> +				cpu, (long)READ_ONCE(rdp->gp_seq_needed));
>  		}
>  	}
>  	for_each_possible_cpu(cpu) {
>  		rdp = per_cpu_ptr(&rcu_data, cpu);
> -		cbs += data_race(READ_ONCE(rdp->n_cbs_invoked));
> +		cbs += READ_ONCE(rdp->n_cbs_invoked);
>  		if (rcu_segcblist_is_offloaded(&rdp->cblist))
>  			show_rcu_nocb_state(rdp);
>  	}
>  	pr_info("RCU callbacks invoked since boot: %lu\n", cbs);
>  	show_rcu_tasks_gp_kthreads();
> +
> +	kcsan_enable_current();
>  }
>  EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
>  

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-18  8:59 ` Marco Elver
@ 2021-06-18 11:26   ` Peter Zijlstra
  2021-06-18 20:48     ` Paul E. McKenney
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-06-18 11:26 UTC (permalink / raw)
  To: Marco Elver; +Cc: Paul McKenney, linux-kernel

On Fri, Jun 18, 2021 at 10:59:26AM +0200, Marco Elver wrote:
> On Fri, Jun 18, 2021 at 10:24AM +0200, Peter Zijlstra wrote:
> > Hi Paul,
> > 
> > Due to a merge conflict I had to look at some recent RCU code, and I saw
> > you went a little overboard with data_race(). How's something like the
> > below look to you?
> 
> I commented below. The main thing is just using the __no_kcsan function
> attribute if it's only about accesses within the function (and not
> also about called functions elsewhere).
> 
> Using the attribute also improves performance slightly (not that it
> matters much in a KCSAN-enabled kernel) due to no instrumentation.

Aah yes ofcourse! Much better still.

> > The idea being that we fundamentally don't care about data races for
> > debug/error condition prints, so marking every single variable access is
> > just clutter.
> 
> Having data_race() around the pr_* helpers seems reasonable, if you
> worry about future unnecessary markings that might pop up due to them.

Right, so I did them on WARN and higher, figuring that those really only
happen when there's smoething wrong and we're way past caring about
data races. Paul has a few pr_info() users that are heavy on
data_race(), but your __no_kcsan attribute suggestion helps with that.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-18 11:26   ` Peter Zijlstra
@ 2021-06-18 20:48     ` Paul E. McKenney
  2021-06-20 19:14       ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Paul E. McKenney @ 2021-06-18 20:48 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Marco Elver, linux-kernel

On Fri, Jun 18, 2021 at 01:26:10PM +0200, Peter Zijlstra wrote:
> On Fri, Jun 18, 2021 at 10:59:26AM +0200, Marco Elver wrote:
> > On Fri, Jun 18, 2021 at 10:24AM +0200, Peter Zijlstra wrote:
> > > Hi Paul,
> > > 
> > > Due to a merge conflict I had to look at some recent RCU code, and I saw
> > > you went a little overboard with data_race(). How's something like the
> > > below look to you?
> > 
> > I commented below. The main thing is just using the __no_kcsan function
> > attribute if it's only about accesses within the function (and not
> > also about called functions elsewhere).
> > 
> > Using the attribute also improves performance slightly (not that it
> > matters much in a KCSAN-enabled kernel) due to no instrumentation.
> 
> Aah yes ofcourse! Much better still.
> 
> > > The idea being that we fundamentally don't care about data races for
> > > debug/error condition prints, so marking every single variable access is
> > > just clutter.
> > 
> > Having data_race() around the pr_* helpers seems reasonable, if you
> > worry about future unnecessary markings that might pop up due to them.
> 
> Right, so I did them on WARN and higher, figuring that those really only
> happen when there's smoething wrong and we're way past caring about
> data races. Paul has a few pr_info() users that are heavy on
> data_race(), but your __no_kcsan attribute suggestion helps with that.

But there can be cases where some mutex is supposed to be held across
updates to one of the fields to be printed, and that mutex is held across
the pr_*().  In that case, we -want- KCSAN to yell if there is a data
race involving that field.

So I am not at all a fan of this change.

But a similar technique might help elsewhere.  RCU uses strict
KCSAN settings (something about me not wanting minor code-generation
performance issues turnign into full-fledged RCU concurrency bugs),
but invokes code that uses looser settings.  This means that RCU gets
"false-positive" KCSAN complaints on racing calls to (for example)
schedule_timeout_interruptible().

My thought is to create a rcu_schedule_timeout_interruptible(), for one
example, that suppresses KCSAN warnings under the assumption that they
will be caught by KCSAN runs on other parts of the kernel.  Among other
things, this would also allow them to be easily adjusted as appropriate.

Thoughts?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-18 20:48     ` Paul E. McKenney
@ 2021-06-20 19:14       ` Peter Zijlstra
  2021-06-20 21:01         ` Paul E. McKenney
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-06-20 19:14 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: Marco Elver, linux-kernel

On Fri, Jun 18, 2021 at 01:48:00PM -0700, Paul E. McKenney wrote:
> On Fri, Jun 18, 2021 at 01:26:10PM +0200, Peter Zijlstra wrote:
> > On Fri, Jun 18, 2021 at 10:59:26AM +0200, Marco Elver wrote:
> > > On Fri, Jun 18, 2021 at 10:24AM +0200, Peter Zijlstra wrote:
> > > > Hi Paul,
> > > > 
> > > > Due to a merge conflict I had to look at some recent RCU code, and I saw
> > > > you went a little overboard with data_race(). How's something like the
> > > > below look to you?
> > > 
> > > I commented below. The main thing is just using the __no_kcsan function
> > > attribute if it's only about accesses within the function (and not
> > > also about called functions elsewhere).
> > > 
> > > Using the attribute also improves performance slightly (not that it
> > > matters much in a KCSAN-enabled kernel) due to no instrumentation.
> > 
> > Aah yes ofcourse! Much better still.
> > 
> > > > The idea being that we fundamentally don't care about data races for
> > > > debug/error condition prints, so marking every single variable access is
> > > > just clutter.
> > > 
> > > Having data_race() around the pr_* helpers seems reasonable, if you
> > > worry about future unnecessary markings that might pop up due to them.
> > 
> > Right, so I did them on WARN and higher, figuring that those really only
> > happen when there's smoething wrong and we're way past caring about
> > data races. Paul has a few pr_info() users that are heavy on
> > data_race(), but your __no_kcsan attribute suggestion helps with that.
> 
> But there can be cases where some mutex is supposed to be held across
> updates to one of the fields to be printed, and that mutex is held across
> the pr_*().  In that case, we -want- KCSAN to yell if there is a data
> race involving that field.

I don't buy that argument. pr_err() (or worse) is not supposed to
happen, ever. If it does, *that* is a far worse condition that any data
race possibly found by kcsan.

So the only way the pr_err() expression itself can lead to kcsan
determining a data-race, if something far worse triggered the pr_err()
itself.

> So I am not at all a fan of this change.
> 
> But a similar technique might help elsewhere.  RCU uses strict
> KCSAN settings (something about me not wanting minor code-generation
> performance issues turnign into full-fledged RCU concurrency bugs),
> but invokes code that uses looser settings.  This means that RCU gets
> "false-positive" KCSAN complaints on racing calls to (for example)
> schedule_timeout_interruptible().
> 
> My thought is to create a rcu_schedule_timeout_interruptible(), for one
> example, that suppresses KCSAN warnings under the assumption that they
> will be caught by KCSAN runs on other parts of the kernel.  Among other
> things, this would also allow them to be easily adjusted as appropriate.
> 
> Thoughts?

You've lost me on the schedule thing, what?

All I'm saying is that RCU is turning into an unreadable mess of
data_race(), and marking whole functions or whole statements that should
only ever happen on actual errors *anyway*, seems to significantly
reduce the clutter.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-20 19:14       ` Peter Zijlstra
@ 2021-06-20 21:01         ` Paul E. McKenney
  2021-06-21  7:28           ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Paul E. McKenney @ 2021-06-20 21:01 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Marco Elver, linux-kernel

On Sun, Jun 20, 2021 at 09:14:28PM +0200, Peter Zijlstra wrote:
> On Fri, Jun 18, 2021 at 01:48:00PM -0700, Paul E. McKenney wrote:
> > On Fri, Jun 18, 2021 at 01:26:10PM +0200, Peter Zijlstra wrote:
> > > On Fri, Jun 18, 2021 at 10:59:26AM +0200, Marco Elver wrote:
> > > > On Fri, Jun 18, 2021 at 10:24AM +0200, Peter Zijlstra wrote:
> > > > > Hi Paul,
> > > > > 
> > > > > Due to a merge conflict I had to look at some recent RCU code, and I saw
> > > > > you went a little overboard with data_race(). How's something like the
> > > > > below look to you?
> > > > 
> > > > I commented below. The main thing is just using the __no_kcsan function
> > > > attribute if it's only about accesses within the function (and not
> > > > also about called functions elsewhere).
> > > > 
> > > > Using the attribute also improves performance slightly (not that it
> > > > matters much in a KCSAN-enabled kernel) due to no instrumentation.
> > > 
> > > Aah yes ofcourse! Much better still.
> > > 
> > > > > The idea being that we fundamentally don't care about data races for
> > > > > debug/error condition prints, so marking every single variable access is
> > > > > just clutter.
> > > > 
> > > > Having data_race() around the pr_* helpers seems reasonable, if you
> > > > worry about future unnecessary markings that might pop up due to them.
> > > 
> > > Right, so I did them on WARN and higher, figuring that those really only
> > > happen when there's smoething wrong and we're way past caring about
> > > data races. Paul has a few pr_info() users that are heavy on
> > > data_race(), but your __no_kcsan attribute suggestion helps with that.
> > 
> > But there can be cases where some mutex is supposed to be held across
> > updates to one of the fields to be printed, and that mutex is held across
> > the pr_*().  In that case, we -want- KCSAN to yell if there is a data
> > race involving that field.
> 
> I don't buy that argument. pr_err() (or worse) is not supposed to
> happen, ever. If it does, *that* is a far worse condition that any data
> race possibly found by kcsan.
> 
> So the only way the pr_err() expression itself can lead to kcsan
> determining a data-race, if something far worse triggered the pr_err()
> itself.

Earlier, you said pr_warn().  Above, I said pr_*().  Now you say
pr_err().  But OK...

Let's take for example the pr_err() in __call_rcu(), that is, the
double-free diagnostic.  A KCSAN warning on the unmarked load from
head->func could give valuable information on the whereabouts of the
other code interfering with the callback.  Blanket disabling of KCSAN
across all pr_err() calls (let alone all pr_*() calls) would be the
opposite of helpful.

> > So I am not at all a fan of this change.
> > 
> > But a similar technique might help elsewhere.  RCU uses strict
> > KCSAN settings (something about me not wanting minor code-generation
> > performance issues turnign into full-fledged RCU concurrency bugs),
> > but invokes code that uses looser settings.  This means that RCU gets
> > "false-positive" KCSAN complaints on racing calls to (for example)
> > schedule_timeout_interruptible().
> > 
> > My thought is to create a rcu_schedule_timeout_interruptible(), for one
> > example, that suppresses KCSAN warnings under the assumption that they
> > will be caught by KCSAN runs on other parts of the kernel.  Among other
> > things, this would also allow them to be easily adjusted as appropriate.
> > 
> > Thoughts?
> 
> You've lost me on the schedule thing, what?

The definition of schedule_timeout_interruptible() is in part of the
kernel that uses much looser KCSAN checking.  Thus there are some
KCSAN warnings from RCU involving schedule_timeout_interruptible().
But at least some of these warnings are for conflicting writes, which
many parts of the kernel suppress KCSAN warnings for.

So a wrapper for some functions could get to clean KCSAN for RCU
without me having to push not-yet-wanted markings into other parts
of the kernel.

> All I'm saying is that RCU is turning into an unreadable mess of
> data_race(), and marking whole functions or whole statements that should
> only ever happen on actual errors *anyway*, seems to significantly
> reduce the clutter.

Sorry, but no.  Between your eyes and my sanity, my sanity wins.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-20 21:01         ` Paul E. McKenney
@ 2021-06-21  7:28           ` Peter Zijlstra
  2021-06-21 13:37             ` Paul E. McKenney
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-06-21  7:28 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: Marco Elver, linux-kernel

On Sun, Jun 20, 2021 at 02:01:27PM -0700, Paul E. McKenney wrote:
> On Sun, Jun 20, 2021 at 09:14:28PM +0200, Peter Zijlstra wrote:

> > I don't buy that argument. pr_err() (or worse) is not supposed to
> > happen, ever. If it does, *that* is a far worse condition that any data
> > race possibly found by kcsan.
> > 
> > So the only way the pr_err() expression itself can lead to kcsan
> > determining a data-race, if something far worse triggered the pr_err()
> > itself.
> 
> Earlier, you said pr_warn().  Above, I said pr_*().  Now you say
> pr_err().  But OK...

Same, thing.. also Sundays aren't great for details it seems :-)

> Let's take for example the pr_err() in __call_rcu(), that is, the
> double-free diagnostic.  A KCSAN warning on the unmarked load from
> head->func could give valuable information on the whereabouts of the
> other code interfering with the callback.  Blanket disabling of KCSAN
> across all pr_err() calls (let alone all pr_*() calls) would be the
> opposite of helpful.

I'm confused. That pr_err() should never happen in a correct program. If
it happens, fix it and any data race as a consequence of that pr_err()
no longer exists either.

I fundementally don't see the relevance of a possible data race from a
statement that should never happen in a correct program to begin with.

Why do you think otherwise?

> > You've lost me on the schedule thing, what?
> 
> The definition of schedule_timeout_interruptible() is in part of the
> kernel that uses much looser KCSAN checking.  Thus there are some
> KCSAN warnings from RCU involving schedule_timeout_interruptible().
> But at least some of these warnings are for conflicting writes, which
> many parts of the kernel suppress KCSAN warnings for.

You've lost me again.. schedule_timeout_interruptible() doesn't do
writes to rcu state, does it? So how can there be problems?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-21  7:28           ` Peter Zijlstra
@ 2021-06-21 13:37             ` Paul E. McKenney
  2021-07-06  8:00               ` Peter Zijlstra
  2021-07-06  8:37               ` Peter Zijlstra
  0 siblings, 2 replies; 14+ messages in thread
From: Paul E. McKenney @ 2021-06-21 13:37 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Marco Elver, linux-kernel

On Mon, Jun 21, 2021 at 09:28:02AM +0200, Peter Zijlstra wrote:
> On Sun, Jun 20, 2021 at 02:01:27PM -0700, Paul E. McKenney wrote:
> > On Sun, Jun 20, 2021 at 09:14:28PM +0200, Peter Zijlstra wrote:
> 
> > > I don't buy that argument. pr_err() (or worse) is not supposed to
> > > happen, ever. If it does, *that* is a far worse condition that any data
> > > race possibly found by kcsan.
> > > 
> > > So the only way the pr_err() expression itself can lead to kcsan
> > > determining a data-race, if something far worse triggered the pr_err()
> > > itself.
> > 
> > Earlier, you said pr_warn().  Above, I said pr_*().  Now you say
> > pr_err().  But OK...
> 
> Same, thing.. also Sundays aren't great for details it seems :-)

I know that feeling!  ;-)

> > Let's take for example the pr_err() in __call_rcu(), that is, the
> > double-free diagnostic.  A KCSAN warning on the unmarked load from
> > head->func could give valuable information on the whereabouts of the
> > other code interfering with the callback.  Blanket disabling of KCSAN
> > across all pr_err() calls (let alone all pr_*() calls) would be the
> > opposite of helpful.
> 
> I'm confused. That pr_err() should never happen in a correct program. If
> it happens, fix it and any data race as a consequence of that pr_err()
> no longer exists either.
> 
> I fundementally don't see the relevance of a possible data race from a
> statement that should never happen in a correct program to begin with.
> 
> Why do you think otherwise?

Because detection of that data race can provide valuable debugging help.

> > > You've lost me on the schedule thing, what?
> > 
> > The definition of schedule_timeout_interruptible() is in part of the
> > kernel that uses much looser KCSAN checking.  Thus there are some
> > KCSAN warnings from RCU involving schedule_timeout_interruptible().
> > But at least some of these warnings are for conflicting writes, which
> > many parts of the kernel suppress KCSAN warnings for.
> 
> You've lost me again.. schedule_timeout_interruptible() doesn't do
> writes to rcu state, does it? So how can there be problems?

Because the static inline functions end up in RCU's tranlation units,
and they do write to other state.  See for example the line marked with
the asterisk at the end of this email, which is apparently between
__run_timers() and rcu_torture_reader().  But gdb says that this is
really between this statement in __run_timers():

	base->running_timer = NULL;

And this statement in try_to_del_timer_sync():

	if (base->running_timer != timer)

Because of inline functions, running KCSAN on RCU can detect races in
other subsystems.  In this case, I could argue for a READ_ONCE() on the
"if" condition, but last I knew, the rules for timers are that C-language
writes do not participate in data races.  So maybe I should add that
READ_ONCE(), but running KCSAN on rcutorture would still complain.

Here is the stack leading to try_to_del_timer_sync():

	read to 0xffff95bbdf49c280 of 8 bytes by task 167 on cpu 0:
	 try_to_del_timer_sync+0x96/0x1b0
	 del_timer_sync+0xa8/0xe0
	 schedule_timeout+0xc9/0x140
	 schedule_timeout_interruptible+0x28/0x30
	 stutter_wait+0x18a/0x220
	 rcu_torture_reader+0xfc/0x330
	 kthread+0x1fa/0x210
	 ret_from_fork+0x22/0x30

So one option would be to add the READ_ONCE(), but put an RCU wrapper
around schedule_timeout_interruptible() to allow rcutorture to either
show all data races or only those directly related to RCU.  The reason for
wanting it to show only those directly related to RCU is that this allows
me to much more easily spot a new RCU-related data race.  The reason
for wanting it to (only sometimes!) show all data races is to help track
down any non-RCU bugs that might be afficting my rcutorture runs.

Make sense?

							Thanx, Paul

------------------------------------------------------------------------

    129 BUG: KCSAN: data-race in prandom_bytes / prandom_u32
    123 BUG: KCSAN: data-race in __hrtimer_run_queues / hrtimer_active
     56 BUG: KCSAN: data-race in mutex_spin_on_owner+0x13d/0x260
     51 BUG: KCSAN: data-race in prandom_u32 / prandom_u32
     41 BUG: KCSAN: data-race in __run_timers / try_to_del_timer_sync
     37 BUG: KCSAN: data-race in tick_sched_timer / tick_sched_timer
     34 BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
     26 BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_idle_stop_tick
*    18 BUG: KCSAN: data-race in __run_timers / rcu_torture_reader
     16 BUG: KCSAN: data-race in __hrtimer_run_queues / hrtimer_try_to_cancel
     16 BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_sched_timer
      6 BUG: KCSAN: data-race in cpumask_next+0x20/0x50
      6 BUG: KCSAN: data-race in __mod_timer / __run_timers
      6 BUG: KCSAN: data-race in run_timer_softirq / try_to_del_timer_sync
      6 BUG: KCSAN: data-race in rwsem_spin_on_owner+0x144/0x230
      6 BUG: KCSAN: data-race in tick_nohz_next_event / tick_sched_timer
      5 BUG: KCSAN: data-race in cpumask_next_and+0x25/0x60
      4 BUG: KCSAN: data-race in _find_next_bit+0x3f/0x120
      4 BUG: KCSAN: data-race in ps2_do_sendbyte / ps2_handle_ack
      4 BUG: KCSAN: data-race in rcu_preempt_deferred_qs_irqrestore / try_to_take_rt_mutex
      4 BUG: KCSAN: data-race in rcu_torture_reader / run_timer_softirq
      3 BUG: KCSAN: data-race in cpumask_next_and+0x27/0x70
      3 BUG: KCSAN: data-race in cpumask_next_and+0x30/0x60
      3 BUG: KCSAN: data-race in cpumask_next_wrap+0x4b/0xa0
      3 BUG: KCSAN: data-race in do_raw_write_lock / do_raw_write_unlock
      3 BUG: KCSAN: data-race in process_one_work / queue_work_on
      3 BUG: KCSAN: data-race in timer_clear_idle / trigger_dyntick_cpu
      2 BUG: KCSAN: data-race in do_raw_write_lock / do_raw_write_lock
      2 BUG: KCSAN: data-race in ktime_get_real_fast_ns+0x7d/0x130
      2 BUG: KCSAN: data-race in ktime_get_real_fast_ns+0xb5/0x130
      2 BUG: KCSAN: data-race in __mod_timer / run_timer_softirq
      2 BUG: KCSAN: data-race in queue_work_on / worker_thread
      1 BUG: KCSAN: data-race in bringup_cpu / cpuhp_should_run
      1 BUG: KCSAN: data-race in can_stop_idle_tick / tick_sched_timer
      1 BUG: KCSAN: data-race in internal_add_timer / update_process_times
      1 BUG: KCSAN: data-race in rcu_nocb_cb_kthread / rcu_nocb_gp_kthread
      1 BUG: KCSAN: data-race in rcu_preempt_deferred_qs_irqrestore / rt_mutex_unlock
      1 BUG: KCSAN: data-race in __ww_mutex_lock+0x1a3/0x1610

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-21 13:37             ` Paul E. McKenney
@ 2021-07-06  8:00               ` Peter Zijlstra
  2021-07-06  8:44                 ` Marco Elver
  2021-07-06  8:37               ` Peter Zijlstra
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-07-06  8:00 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: Marco Elver, linux-kernel


Sorry for the late reply, thread got snowed under :/

On Mon, Jun 21, 2021 at 06:37:57AM -0700, Paul E. McKenney wrote:
> On Mon, Jun 21, 2021 at 09:28:02AM +0200, Peter Zijlstra wrote:
> > On Sun, Jun 20, 2021 at 02:01:27PM -0700, Paul E. McKenney wrote:
> > > On Sun, Jun 20, 2021 at 09:14:28PM +0200, Peter Zijlstra wrote:
> > 
> > > > I don't buy that argument. pr_err() (or worse) is not supposed to
> > > > happen, ever. If it does, *that* is a far worse condition that any data
> > > > race possibly found by kcsan.
> > > > 
> > > > So the only way the pr_err() expression itself can lead to kcsan
> > > > determining a data-race, if something far worse triggered the pr_err()
> > > > itself.
> > > 
> > > Earlier, you said pr_warn().  Above, I said pr_*().  Now you say
> > > pr_err().  But OK...
> > 
> > Same, thing.. also Sundays aren't great for details it seems :-)
> 
> I know that feeling!  ;-)
> 
> > > Let's take for example the pr_err() in __call_rcu(), that is, the
> > > double-free diagnostic.  A KCSAN warning on the unmarked load from
> > > head->func could give valuable information on the whereabouts of the
> > > other code interfering with the callback.  Blanket disabling of KCSAN
> > > across all pr_err() calls (let alone all pr_*() calls) would be the
> > > opposite of helpful.
> > 
> > I'm confused. That pr_err() should never happen in a correct program. If
> > it happens, fix it and any data race as a consequence of that pr_err()
> > no longer exists either.
> > 
> > I fundementally don't see the relevance of a possible data race from a
> > statement that should never happen in a correct program to begin with.
> > 
> > Why do you think otherwise?
> 
> Because detection of that data race can provide valuable debugging help.

In that case, would not an explicit: data_debug(addr) call (implemented
by KASAN/KCSAN/whoever), which would report whatever knowledge they have
about that address, be even more useful?

Then you don't need to hope that there's a data race of sorts in order
to obtain said information.

That is; if you want information, explicitly asking for it seems *much*
better than hoping.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-06-21 13:37             ` Paul E. McKenney
  2021-07-06  8:00               ` Peter Zijlstra
@ 2021-07-06  8:37               ` Peter Zijlstra
  2021-07-06 14:16                 ` Paul E. McKenney
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-07-06  8:37 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: Marco Elver, linux-kernel

On Mon, Jun 21, 2021 at 06:37:57AM -0700, Paul E. McKenney wrote:
> Because the static inline functions end up in RCU's tranlation units,
> and they do write to other state.  See for example the line marked with
> the asterisk at the end of this email, which is apparently between
> __run_timers() and rcu_torture_reader().  But gdb says that this is
> really between this statement in __run_timers():
> 
> 	base->running_timer = NULL;

(I'm finding that in expire_timers(), not in __run_timers(), and both
are in kernel/time/timer.c)

> And this statement in try_to_del_timer_sync():
> 
> 	if (base->running_timer != timer)
> 
> Because of inline functions, running KCSAN on RCU can detect races in
> other subsystems.  In this case, I could argue for a READ_ONCE() on the
> "if" condition, but last I knew, the rules for timers are that C-language
> writes do not participate in data races.  So maybe I should add that
> READ_ONCE(), but running KCSAN on rcutorture would still complain.

That code is correct as is, AFAICT, so READ_ONCE() is not the right
annotation.  Also, READ_ONCE() would not actively help the situation in
any case, and arguably make the code worse and more confusing.


What happens here is that we read base->running_timer while holding
base->lock. We set base->running_timer to !0 holding that same
base->lock from the timer IRQ context. We clear base->running_timer
*without* base->lock, from the timer IRQ context.

So yes, there's a race between the locked load of base->running_timer
and the timer IRQ on another CPU clearing it.

But since the comparison is an equality test and the only purpose of the
clear is to destroy that equality, any partial clear serves that
purpose.

Now, a partial clear could create a false positive match for another
timer, which in turn could make try_to_del_timer_sync() fail spuriously
I suppose, but any caller needs to be able to deal with failure of that
function, so no harm done there.


*HOWEVER* timer_curr_running() violates all that.. but that looks like
it's only ever used as a diagnostic, so that should be fine too.


Anyway, AFAIU the problem is on the WRITE side, so any READ_ONCE() will
not ever fix anything here. If we want to get rid of the possible
spurious fail in try_to_del_timer_sync() we need to consistently
WRITE/READ_ONCE() all of base->running_timer, if we don't care, we need
a data_race() annotation somewhere, although I'm not currently seeing
that inlining you mention that's needed for this problem to manifest in
the first place.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-07-06  8:00               ` Peter Zijlstra
@ 2021-07-06  8:44                 ` Marco Elver
  2021-07-06 10:33                   ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Marco Elver @ 2021-07-06  8:44 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Paul E. McKenney, linux-kernel

On Tue, 6 Jul 2021 at 10:00, Peter Zijlstra <peterz@infradead.org> wrote:
[...]
> In that case, would not an explicit: data_debug(addr) call (implemented
> by KASAN/KCSAN/whoever), which would report whatever knowledge they have
> about that address, be even more useful?

KCSAN/KASAN report data-races/memory errors as soon as they encounter
them, but before they do, cannot give you any more than that (metadata
if it exists, but not sure it can be interpreted in any useful way
before an error occurs).

But maybe I misunderstood. Is data_debug() meant to not return
anything and instead just be a "fake access"?

For those, we already have various existing primitives:

-- KCSAN: ASSERT_EXCLUSIVE_{ACCESS,WRITER}()
(https://www.kernel.org/doc/html/latest/dev-tools/kcsan.html#race-detection-beyond-data-races)

-- All sanitizers (KASAN & KCSAN right now): <linux/instrumented.h>:
instrument_*() calls, for generic explicit instrumentation.

-- For sanitizer-specific checks, there's everything else in
<linux/{kasan,kcsan}-checks.h>, but instrumented.h should be
preferred.

Thanks,
-- Marco

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-07-06  8:44                 ` Marco Elver
@ 2021-07-06 10:33                   ` Peter Zijlstra
  2021-07-06 14:03                     ` Paul E. McKenney
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2021-07-06 10:33 UTC (permalink / raw)
  To: Marco Elver; +Cc: Paul E. McKenney, linux-kernel

On Tue, Jul 06, 2021 at 10:44:46AM +0200, Marco Elver wrote:
> On Tue, 6 Jul 2021 at 10:00, Peter Zijlstra <peterz@infradead.org> wrote:
> [...]
> > In that case, would not an explicit: data_debug(addr) call (implemented
> > by KASAN/KCSAN/whoever), which would report whatever knowledge they have
> > about that address, be even more useful?
> 
> KCSAN/KASAN report data-races/memory errors as soon as they encounter
> them, but before they do, cannot give you any more than that (metadata
> if it exists, but not sure it can be interpreted in any useful way
> before an error occurs).
> 
> But maybe I misunderstood. Is data_debug() meant to not return
> anything and instead just be a "fake access"?

Mostly just print any meta data that you might have. Like who allocated
it, or which code touched it. I'm thinking KASAN/KCSAN need to keep
track of such stuff for when a violation is detected.

If I understand Paul right; and there's a fair chance I didn't; I tihnk
the issue is that when RCU finds a double call_rcu() (or some other
fail), it has very little clue how we got there, and any addition
information might be useful.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-07-06 10:33                   ` Peter Zijlstra
@ 2021-07-06 14:03                     ` Paul E. McKenney
  0 siblings, 0 replies; 14+ messages in thread
From: Paul E. McKenney @ 2021-07-06 14:03 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Marco Elver, linux-kernel

On Tue, Jul 06, 2021 at 12:33:04PM +0200, Peter Zijlstra wrote:
> On Tue, Jul 06, 2021 at 10:44:46AM +0200, Marco Elver wrote:
> > On Tue, 6 Jul 2021 at 10:00, Peter Zijlstra <peterz@infradead.org> wrote:
> > [...]
> > > In that case, would not an explicit: data_debug(addr) call (implemented
> > > by KASAN/KCSAN/whoever), which would report whatever knowledge they have
> > > about that address, be even more useful?
> > 
> > KCSAN/KASAN report data-races/memory errors as soon as they encounter
> > them, but before they do, cannot give you any more than that (metadata
> > if it exists, but not sure it can be interpreted in any useful way
> > before an error occurs).
> > 
> > But maybe I misunderstood. Is data_debug() meant to not return
> > anything and instead just be a "fake access"?
> 
> Mostly just print any meta data that you might have. Like who allocated
> it, or which code touched it. I'm thinking KASAN/KCSAN need to keep
> track of such stuff for when a violation is detected.
> 
> If I understand Paul right; and there's a fair chance I didn't; I tihnk
> the issue is that when RCU finds a double call_rcu() (or some other
> fail), it has very little clue how we got there, and any addition
> information might be useful.

If it is a current reference, we know that reference is relevant.
After all, if the structure is being passed to call_rcu(), then there
better not not be something else referencing it right now.  But the
historical data you are (I think?) asking for might be completely
irrelevant due to its having happened too long ago.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: RCU vs data_race()
  2021-07-06  8:37               ` Peter Zijlstra
@ 2021-07-06 14:16                 ` Paul E. McKenney
  0 siblings, 0 replies; 14+ messages in thread
From: Paul E. McKenney @ 2021-07-06 14:16 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Marco Elver, linux-kernel

On Tue, Jul 06, 2021 at 10:37:49AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 21, 2021 at 06:37:57AM -0700, Paul E. McKenney wrote:
> > Because the static inline functions end up in RCU's tranlation units,
> > and they do write to other state.  See for example the line marked with
> > the asterisk at the end of this email, which is apparently between
> > __run_timers() and rcu_torture_reader().  But gdb says that this is
> > really between this statement in __run_timers():
> > 
> > 	base->running_timer = NULL;
> 
> (I'm finding that in expire_timers(), not in __run_timers(), and both
> are in kernel/time/timer.c)
> 
> > And this statement in try_to_del_timer_sync():
> > 
> > 	if (base->running_timer != timer)
> > 
> > Because of inline functions, running KCSAN on RCU can detect races in
> > other subsystems.  In this case, I could argue for a READ_ONCE() on the
> > "if" condition, but last I knew, the rules for timers are that C-language
> > writes do not participate in data races.  So maybe I should add that
> > READ_ONCE(), but running KCSAN on rcutorture would still complain.
> 
> That code is correct as is, AFAICT, so READ_ONCE() is not the right
> annotation.  Also, READ_ONCE() would not actively help the situation in
> any case, and arguably make the code worse and more confusing.
> 
> 
> What happens here is that we read base->running_timer while holding
> base->lock. We set base->running_timer to !0 holding that same
> base->lock from the timer IRQ context. We clear base->running_timer
> *without* base->lock, from the timer IRQ context.
> 
> So yes, there's a race between the locked load of base->running_timer
> and the timer IRQ on another CPU clearing it.
> 
> But since the comparison is an equality test and the only purpose of the
> clear is to destroy that equality, any partial clear serves that
> purpose.
> 
> Now, a partial clear could create a false positive match for another
> timer, which in turn could make try_to_del_timer_sync() fail spuriously
> I suppose, but any caller needs to be able to deal with failure of that
> function, so no harm done there.
> 
> 
> *HOWEVER* timer_curr_running() violates all that.. but that looks like
> it's only ever used as a diagnostic, so that should be fine too.

Maybe a data_race(), then?  Except that in current mainline (as opposed
to v5.13) timer_curr_running() seems to have been inlined.

> Anyway, AFAIU the problem is on the WRITE side, so any READ_ONCE() will
> not ever fix anything here. If we want to get rid of the possible
> spurious fail in try_to_del_timer_sync() we need to consistently
> WRITE/READ_ONCE() all of base->running_timer, if we don't care, we need
> a data_race() annotation somewhere, although I'm not currently seeing
> that inlining you mention that's needed for this problem to manifest in
> the first place.

Coming back to my original question, is my best strategy for RCU
to create data_race()-wrapped variants of the schedule_timeout*()
functions?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-07-06 14:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-18  8:24 RCU vs data_race() Peter Zijlstra
2021-06-18  8:59 ` Marco Elver
2021-06-18 11:26   ` Peter Zijlstra
2021-06-18 20:48     ` Paul E. McKenney
2021-06-20 19:14       ` Peter Zijlstra
2021-06-20 21:01         ` Paul E. McKenney
2021-06-21  7:28           ` Peter Zijlstra
2021-06-21 13:37             ` Paul E. McKenney
2021-07-06  8:00               ` Peter Zijlstra
2021-07-06  8:44                 ` Marco Elver
2021-07-06 10:33                   ` Peter Zijlstra
2021-07-06 14:03                     ` Paul E. McKenney
2021-07-06  8:37               ` Peter Zijlstra
2021-07-06 14:16                 ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).