All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -rcu 0/2] kcsan: Improvements to reporting
@ 2020-01-09 15:23 Marco Elver
  2020-01-09 15:23 ` [PATCH -rcu 1/2] kcsan: Show full access type in report Marco Elver
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Marco Elver @ 2020-01-09 15:23 UTC (permalink / raw)
  To: elver; +Cc: paulmck, andreyknvl, glider, dvyukov, kasan-dev, linux-kernel

Improvements to KCSAN data race reporting:
1. Show if access is marked (*_ONCE, atomic, etc.).
2. Rate limit reporting to avoid spamming console.

Marco Elver (2):
  kcsan: Show full access type in report
  kcsan: Rate-limit reporting per data races

 kernel/kcsan/core.c   |  15 +++--
 kernel/kcsan/kcsan.h  |   2 +-
 kernel/kcsan/report.c | 153 +++++++++++++++++++++++++++++++++++-------
 lib/Kconfig.kcsan     |  10 +++
 4 files changed, 148 insertions(+), 32 deletions(-)

-- 
2.25.0.rc1.283.g88dfdc4193-goog


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH -rcu 1/2] kcsan: Show full access type in report
  2020-01-09 15:23 [PATCH -rcu 0/2] kcsan: Improvements to reporting Marco Elver
@ 2020-01-09 15:23 ` Marco Elver
  2020-01-09 15:23 ` [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races Marco Elver
  2020-01-09 16:27 ` [PATCH -rcu 0/2] kcsan: Improvements to reporting Paul E. McKenney
  2 siblings, 0 replies; 11+ messages in thread
From: Marco Elver @ 2020-01-09 15:23 UTC (permalink / raw)
  To: elver; +Cc: paulmck, andreyknvl, glider, dvyukov, kasan-dev, linux-kernel

This change adds support for showing the complete access type in the
report. Currently the following access types can be shown:
  "read", "read (marked)", "write", "write (marked)".

Suggested-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Marco Elver <elver@google.com>
---
 kernel/kcsan/core.c   | 15 ++++++++-------
 kernel/kcsan/kcsan.h  |  2 +-
 kernel/kcsan/report.c | 43 ++++++++++++++++++++++++++++---------------
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 4d4ab5c5dc53..87bf857c8893 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -255,7 +255,7 @@ static inline unsigned int get_delay(void)
 
 static noinline void kcsan_found_watchpoint(const volatile void *ptr,
 					    size_t size,
-					    bool is_write,
+					    int type,
 					    atomic_long_t *watchpoint,
 					    long encoded_watchpoint)
 {
@@ -276,7 +276,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
 	flags = user_access_save();
 
 	if (consumed) {
-		kcsan_report(ptr, size, is_write, true, raw_smp_processor_id(),
+		kcsan_report(ptr, size, type, true, raw_smp_processor_id(),
 			     KCSAN_REPORT_CONSUMED_WATCHPOINT);
 	} else {
 		/*
@@ -292,8 +292,9 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
 }
 
 static noinline void
-kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write)
+kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
 {
+	const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
 	atomic_long_t *watchpoint;
 	union {
 		u8 _1;
@@ -415,13 +416,13 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write)
 		 * No need to increment 'data_races' counter, as the racing
 		 * thread already did.
 		 */
-		kcsan_report(ptr, size, is_write, size > 8 || value_change,
+		kcsan_report(ptr, size, type, size > 8 || value_change,
 			     smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL);
 	} else if (value_change) {
 		/* Inferring a race, since the value should not have changed. */
 		kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN);
 		if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN))
-			kcsan_report(ptr, size, is_write, true,
+			kcsan_report(ptr, size, type, true,
 				     smp_processor_id(),
 				     KCSAN_REPORT_RACE_UNKNOWN_ORIGIN);
 	}
@@ -455,10 +456,10 @@ static __always_inline void check_access(const volatile void *ptr, size_t size,
 	 */
 
 	if (unlikely(watchpoint != NULL))
-		kcsan_found_watchpoint(ptr, size, is_write, watchpoint,
+		kcsan_found_watchpoint(ptr, size, type, watchpoint,
 				       encoded_watchpoint);
 	else if (unlikely(should_watch(ptr, type)))
-		kcsan_setup_watchpoint(ptr, size, is_write);
+		kcsan_setup_watchpoint(ptr, size, type);
 }
 
 /* === Public interface ===================================================== */
diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h
index d3b9a96ac8a4..8492da45494b 100644
--- a/kernel/kcsan/kcsan.h
+++ b/kernel/kcsan/kcsan.h
@@ -103,7 +103,7 @@ enum kcsan_report_type {
 /*
  * Print a race report from thread that encountered the race.
  */
-extern void kcsan_report(const volatile void *ptr, size_t size, bool is_write,
+extern void kcsan_report(const volatile void *ptr, size_t size, int access_type,
 			 bool value_change, int cpu_id, enum kcsan_report_type type);
 
 #endif /* _KERNEL_KCSAN_KCSAN_H */
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index 0eea05a3135b..9f503ca2ff7a 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -24,7 +24,7 @@
 static struct {
 	const volatile void	*ptr;
 	size_t			size;
-	bool			is_write;
+	int			access_type;
 	int			task_pid;
 	int			cpu_id;
 	unsigned long		stack_entries[NUM_STACK_ENTRIES];
@@ -41,8 +41,10 @@ static DEFINE_SPINLOCK(report_lock);
  * Special rules to skip reporting.
  */
 static bool
-skip_report(bool is_write, bool value_change, unsigned long top_frame)
+skip_report(int access_type, bool value_change, unsigned long top_frame)
 {
+	const bool is_write = (access_type & KCSAN_ACCESS_WRITE) != 0;
+
 	if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write &&
 	    !value_change) {
 		/*
@@ -63,9 +65,20 @@ skip_report(bool is_write, bool value_change, unsigned long top_frame)
 	return kcsan_skip_report_debugfs(top_frame);
 }
 
-static inline const char *get_access_type(bool is_write)
+static const char *get_access_type(int type)
 {
-	return is_write ? "write" : "read";
+	switch (type) {
+	case 0:
+		return "read";
+	case KCSAN_ACCESS_ATOMIC:
+		return "read (marked)";
+	case KCSAN_ACCESS_WRITE:
+		return "write";
+	case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
+		return "write (marked)";
+	default:
+		BUG();
+	}
 }
 
 /* Return thread description: in task or interrupt. */
@@ -112,7 +125,7 @@ static int sym_strcmp(void *addr1, void *addr2)
 /*
  * Returns true if a report was generated, false otherwise.
  */
-static bool print_report(const volatile void *ptr, size_t size, bool is_write,
+static bool print_report(const volatile void *ptr, size_t size, int access_type,
 			 bool value_change, int cpu_id,
 			 enum kcsan_report_type type)
 {
@@ -124,7 +137,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write,
 	/*
 	 * Must check report filter rules before starting to print.
 	 */
-	if (skip_report(is_write, true, stack_entries[skipnr]))
+	if (skip_report(access_type, true, stack_entries[skipnr]))
 		return false;
 
 	if (type == KCSAN_REPORT_RACE_SIGNAL) {
@@ -132,7 +145,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write,
 						other_info.num_stack_entries);
 
 		/* @value_change is only known for the other thread */
-		if (skip_report(other_info.is_write, value_change,
+		if (skip_report(other_info.access_type, value_change,
 				other_info.stack_entries[other_skipnr]))
 			return false;
 	}
@@ -170,7 +183,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write,
 	switch (type) {
 	case KCSAN_REPORT_RACE_SIGNAL:
 		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
-		       get_access_type(other_info.is_write), other_info.ptr,
+		       get_access_type(other_info.access_type), other_info.ptr,
 		       other_info.size, get_thread_desc(other_info.task_pid),
 		       other_info.cpu_id);
 
@@ -181,14 +194,14 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write,
 
 		pr_err("\n");
 		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
-		       get_access_type(is_write), ptr, size,
+		       get_access_type(access_type), ptr, size,
 		       get_thread_desc(in_task() ? task_pid_nr(current) : -1),
 		       cpu_id);
 		break;
 
 	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
 		pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
-		       get_access_type(is_write), ptr, size,
+		       get_access_type(access_type), ptr, size,
 		       get_thread_desc(in_task() ? task_pid_nr(current) : -1),
 		       cpu_id);
 		break;
@@ -223,7 +236,7 @@ static void release_report(unsigned long *flags, enum kcsan_report_type type)
  * required for the report type, simply acquires report_lock and returns true.
  */
 static bool prepare_report(unsigned long *flags, const volatile void *ptr,
-			   size_t size, bool is_write, int cpu_id,
+			   size_t size, int access_type, int cpu_id,
 			   enum kcsan_report_type type)
 {
 	if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT &&
@@ -243,7 +256,7 @@ static bool prepare_report(unsigned long *flags, const volatile void *ptr,
 
 		other_info.ptr			= ptr;
 		other_info.size			= size;
-		other_info.is_write		= is_write;
+		other_info.access_type		= access_type;
 		other_info.task_pid		= in_task() ? task_pid_nr(current) : -1;
 		other_info.cpu_id		= cpu_id;
 		other_info.num_stack_entries	= stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1);
@@ -302,14 +315,14 @@ static bool prepare_report(unsigned long *flags, const volatile void *ptr,
 	goto retry;
 }
 
-void kcsan_report(const volatile void *ptr, size_t size, bool is_write,
+void kcsan_report(const volatile void *ptr, size_t size, int access_type,
 		  bool value_change, int cpu_id, enum kcsan_report_type type)
 {
 	unsigned long flags = 0;
 
 	kcsan_disable_current();
-	if (prepare_report(&flags, ptr, size, is_write, cpu_id, type)) {
-		if (print_report(ptr, size, is_write, value_change, cpu_id, type) && panic_on_warn)
+	if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) {
+		if (print_report(ptr, size, access_type, value_change, cpu_id, type) && panic_on_warn)
 			panic("panic_on_warn set ...\n");
 
 		release_report(&flags, type);
-- 
2.25.0.rc1.283.g88dfdc4193-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races
  2020-01-09 15:23 [PATCH -rcu 0/2] kcsan: Improvements to reporting Marco Elver
  2020-01-09 15:23 ` [PATCH -rcu 1/2] kcsan: Show full access type in report Marco Elver
@ 2020-01-09 15:23 ` Marco Elver
  2020-01-10 18:20   ` Marco Elver
  2020-01-09 16:27 ` [PATCH -rcu 0/2] kcsan: Improvements to reporting Paul E. McKenney
  2 siblings, 1 reply; 11+ messages in thread
From: Marco Elver @ 2020-01-09 15:23 UTC (permalink / raw)
  To: elver
  Cc: paulmck, andreyknvl, glider, dvyukov, kasan-dev, linux-kernel, Qian Cai

Adds support for rate limiting reports. This uses a time based rate
limit, that limits any given data race report to no more than one in a
fixed time window (default is 3 sec). This should prevent the console
from being spammed with data race reports, that would render the system
unusable.

The implementation assumes that unique data races and the rate at which
they occur is bounded, since we cannot store arbitrarily many past data
race report information: we use a fixed-size array to store the required
information. We cannot use kmalloc/krealloc and resize the list when
needed, as reporting is triggered by the instrumentation calls; to
permit using KCSAN on the allocators, we cannot (re-)allocate any memory
during report generation (data races in the allocators lead to
deadlock).

Reported-by: Qian Cai <cai@lca.pw>
Suggested-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Marco Elver <elver@google.com>
---
 kernel/kcsan/report.c | 112 ++++++++++++++++++++++++++++++++++++++----
 lib/Kconfig.kcsan     |  10 ++++
 2 files changed, 112 insertions(+), 10 deletions(-)

diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index 9f503ca2ff7a..e324af7d14c9 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/preempt.h>
 #include <linux/printk.h>
 #include <linux/sched.h>
@@ -31,12 +32,101 @@ static struct {
 	int			num_stack_entries;
 } other_info = { .ptr = NULL };
 
+/*
+ * Information about reported data races; used to rate limit reporting.
+ */
+struct report_time {
+	/*
+	 * The last time the data race was reported.
+	 */
+	ktime_t time;
+
+	/*
+	 * The frames of the 2 threads; if only 1 thread is known, one frame
+	 * will be 0.
+	 */
+	unsigned long frame1;
+	unsigned long frame2;
+};
+
+/*
+ * Since we also want to be able to debug allocators with KCSAN, to avoid
+ * deadlock, report_times cannot be dynamically resized with krealloc in
+ * rate_limit_report.
+ *
+ * Therefore, we use a fixed-size array, which at most will occupy a page. This
+ * still adequately rate limits reports, assuming that a) number of unique data
+ * races is not excessive, and b) occurrence of unique data races within the
+ * same time window is limited.
+ */
+#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
+#define REPORT_TIMES_SIZE                                                      \
+	(CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
+		 REPORT_TIMES_MAX :                                            \
+		 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
+static struct report_time report_times[REPORT_TIMES_SIZE];
+
 /*
  * This spinlock protects reporting and other_info, since other_info is usually
  * required when reporting.
  */
 static DEFINE_SPINLOCK(report_lock);
 
+/*
+ * Checks if the data race identified by thread frames frame1 and frame2 has
+ * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
+ */
+static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
+{
+	struct report_time *use_entry = &report_times[0];
+	ktime_t now;
+	ktime_t invalid_before;
+	int i;
+
+	BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
+
+	if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
+		return false;
+
+	now = ktime_get();
+	invalid_before = ktime_sub_ms(now, CONFIG_KCSAN_REPORT_ONCE_IN_MS);
+
+	/* Check if a matching data race report exists. */
+	for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
+		struct report_time *rt = &report_times[i];
+
+		/*
+		 * Must always select an entry for use to store info as we
+		 * cannot resize report_times; at the end of the scan, use_entry
+		 * will be the oldest entry, which ideally also happened before
+		 * KCSAN_REPORT_ONCE_IN_MS ago.
+		 */
+		if (ktime_before(rt->time, use_entry->time))
+			use_entry = rt;
+
+		/*
+		 * Initially, no need to check any further as this entry as well
+		 * as following entries have never been used.
+		 */
+		if (rt->time == 0)
+			break;
+
+		/* Check if entry expired. */
+		if (ktime_before(rt->time, invalid_before))
+			continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
+
+		/* Reported recently, check if data race matches. */
+		if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
+		    (rt->frame1 == frame2 && rt->frame2 == frame1))
+			return true;
+	}
+
+	use_entry->time = now;
+	use_entry->frame1 = frame1;
+	use_entry->frame2 = frame2;
+	return false;
+}
+
 /*
  * Special rules to skip reporting.
  */
@@ -132,7 +222,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
 	unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
 	int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
 	int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
-	int other_skipnr;
+	unsigned long this_frame = stack_entries[skipnr];
+	unsigned long other_frame = 0;
+	int other_skipnr = 0; /* silence uninit warnings */
 
 	/*
 	 * Must check report filter rules before starting to print.
@@ -143,34 +235,34 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
 	if (type == KCSAN_REPORT_RACE_SIGNAL) {
 		other_skipnr = get_stack_skipnr(other_info.stack_entries,
 						other_info.num_stack_entries);
+		other_frame = other_info.stack_entries[other_skipnr];
 
 		/* @value_change is only known for the other thread */
-		if (skip_report(other_info.access_type, value_change,
-				other_info.stack_entries[other_skipnr]))
+		if (skip_report(other_info.access_type, value_change, other_frame))
 			return false;
 	}
 
+	if (rate_limit_report(this_frame, other_frame))
+		return false;
+
 	/* Print report header. */
 	pr_err("==================================================================\n");
 	switch (type) {
 	case KCSAN_REPORT_RACE_SIGNAL: {
-		void *this_fn = (void *)stack_entries[skipnr];
-		void *other_fn = (void *)other_info.stack_entries[other_skipnr];
 		int cmp;
 
 		/*
 		 * Order functions lexographically for consistent bug titles.
 		 * Do not print offset of functions to keep title short.
 		 */
-		cmp = sym_strcmp(other_fn, this_fn);
+		cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
 		pr_err("BUG: KCSAN: data-race in %ps / %ps\n",
-		       cmp < 0 ? other_fn : this_fn,
-		       cmp < 0 ? this_fn : other_fn);
+		       (void *)(cmp < 0 ? other_frame : this_frame),
+		       (void *)(cmp < 0 ? this_frame : other_frame));
 	} break;
 
 	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
-		pr_err("BUG: KCSAN: data-race in %pS\n",
-		       (void *)stack_entries[skipnr]);
+		pr_err("BUG: KCSAN: data-race in %pS\n", (void *)this_frame);
 		break;
 
 	default:
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 3f78b1434375..3552990abcfe 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -81,6 +81,16 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
 	  KCSAN_WATCH_SKIP. If false, the chosen value is always
 	  KCSAN_WATCH_SKIP.
 
+config KCSAN_REPORT_ONCE_IN_MS
+	int "Duration in milliseconds, in which any given data race is only reported once"
+	default 3000
+	help
+	  Any given data race is only reported once in the defined time window.
+	  Different data races may still generate reports within a duration
+	  that is smaller than the duration defined here. This allows rate
+	  limiting reporting to avoid flooding the console with reports.
+	  Setting this to 0 disables rate limiting.
+
 # Note that, while some of the below options could be turned into boot
 # parameters, to optimize for the common use-case, we avoid this because: (a)
 # it would impact performance (and we want to avoid static branch for all
-- 
2.25.0.rc1.283.g88dfdc4193-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 0/2] kcsan: Improvements to reporting
  2020-01-09 15:23 [PATCH -rcu 0/2] kcsan: Improvements to reporting Marco Elver
  2020-01-09 15:23 ` [PATCH -rcu 1/2] kcsan: Show full access type in report Marco Elver
  2020-01-09 15:23 ` [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races Marco Elver
@ 2020-01-09 16:27 ` Paul E. McKenney
  2020-01-09 17:03   ` Marco Elver
  2 siblings, 1 reply; 11+ messages in thread
From: Paul E. McKenney @ 2020-01-09 16:27 UTC (permalink / raw)
  To: Marco Elver; +Cc: andreyknvl, glider, dvyukov, kasan-dev, linux-kernel

On Thu, Jan 09, 2020 at 04:23:20PM +0100, Marco Elver wrote:
> Improvements to KCSAN data race reporting:
> 1. Show if access is marked (*_ONCE, atomic, etc.).
> 2. Rate limit reporting to avoid spamming console.
> 
> Marco Elver (2):
>   kcsan: Show full access type in report
>   kcsan: Rate-limit reporting per data races

Queued and pushed, thank you!  I edited the commit logs a bit, so could
you please check to make sure that I didn't mess anything up?

At some point, boot-time-allocated per-CPU arrays might be needed to
avoid contention on large systems, but one step at a time.  ;-)

							Thanx, Paul

>  kernel/kcsan/core.c   |  15 +++--
>  kernel/kcsan/kcsan.h  |   2 +-
>  kernel/kcsan/report.c | 153 +++++++++++++++++++++++++++++++++++-------
>  lib/Kconfig.kcsan     |  10 +++
>  4 files changed, 148 insertions(+), 32 deletions(-)
> 
> -- 
> 2.25.0.rc1.283.g88dfdc4193-goog
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 0/2] kcsan: Improvements to reporting
  2020-01-09 16:27 ` [PATCH -rcu 0/2] kcsan: Improvements to reporting Paul E. McKenney
@ 2020-01-09 17:03   ` Marco Elver
  2020-01-09 17:31     ` Paul E. McKenney
  0 siblings, 1 reply; 11+ messages in thread
From: Marco Elver @ 2020-01-09 17:03 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Andrey Konovalov, Alexander Potapenko, Dmitry Vyukov, kasan-dev, LKML

On Thu, 9 Jan 2020 at 17:27, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Thu, Jan 09, 2020 at 04:23:20PM +0100, Marco Elver wrote:
> > Improvements to KCSAN data race reporting:
> > 1. Show if access is marked (*_ONCE, atomic, etc.).
> > 2. Rate limit reporting to avoid spamming console.
> >
> > Marco Elver (2):
> >   kcsan: Show full access type in report
> >   kcsan: Rate-limit reporting per data races
>
> Queued and pushed, thank you!  I edited the commit logs a bit, so could
> you please check to make sure that I didn't mess anything up?

Looks good to me, thank you.

> At some point, boot-time-allocated per-CPU arrays might be needed to
> avoid contention on large systems, but one step at a time.  ;-)

I certainly hope the rate of fixing/avoiding data races will not be
eclipsed by the rate at which new ones are introduced. :-)

Thanks,
-- Marco

>                                                         Thanx, Paul
>
> >  kernel/kcsan/core.c   |  15 +++--
> >  kernel/kcsan/kcsan.h  |   2 +-
> >  kernel/kcsan/report.c | 153 +++++++++++++++++++++++++++++++++++-------
> >  lib/Kconfig.kcsan     |  10 +++
> >  4 files changed, 148 insertions(+), 32 deletions(-)
> >
> > --
> > 2.25.0.rc1.283.g88dfdc4193-goog
> >
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20200109162739.GS13449%40paulmck-ThinkPad-P72.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 0/2] kcsan: Improvements to reporting
  2020-01-09 17:03   ` Marco Elver
@ 2020-01-09 17:31     ` Paul E. McKenney
  2020-01-09 17:42       ` Marco Elver
  0 siblings, 1 reply; 11+ messages in thread
From: Paul E. McKenney @ 2020-01-09 17:31 UTC (permalink / raw)
  To: Marco Elver
  Cc: Andrey Konovalov, Alexander Potapenko, Dmitry Vyukov, kasan-dev, LKML

On Thu, Jan 09, 2020 at 06:03:39PM +0100, Marco Elver wrote:
> On Thu, 9 Jan 2020 at 17:27, Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Thu, Jan 09, 2020 at 04:23:20PM +0100, Marco Elver wrote:
> > > Improvements to KCSAN data race reporting:
> > > 1. Show if access is marked (*_ONCE, atomic, etc.).
> > > 2. Rate limit reporting to avoid spamming console.
> > >
> > > Marco Elver (2):
> > >   kcsan: Show full access type in report
> > >   kcsan: Rate-limit reporting per data races
> >
> > Queued and pushed, thank you!  I edited the commit logs a bit, so could
> > you please check to make sure that I didn't mess anything up?
> 
> Looks good to me, thank you.
> 
> > At some point, boot-time-allocated per-CPU arrays might be needed to
> > avoid contention on large systems, but one step at a time.  ;-)
> 
> I certainly hope the rate of fixing/avoiding data races will not be
> eclipsed by the rate at which new ones are introduced. :-)

Me too!

However, on a large system, duplicate reports might happen quite
frequently, which might cause slowdowns given the single global
array.  Or maybe not -- I guess we will find out soon enough. ;-)

But I must confess that I am missing how concurrent access to the
report_times[] array is handled.  I would have expected that
rate_limit_report() would choose a random starting entry and
search circularly.  And I would expect that the code at the end
of that function would instead look something like this:

	if (ktime_before(oldtime, invalid_before) &&
	    cmpxchg(&use_entry->time, oldtime, now) == oldtime) {
		use_entry->frame1 = frame1;
		use_entry->frame2 = frame2;
	} else {
		// Too bad, next duplicate report won't be suppressed.
	}

Where "oldtime" is captured from the entry during the scan, and from the
first entry scanned.  This cmpxchg() approach is of course vulnerable
to the ->frame1 and ->frame2 assignments taking more than three seconds
(by default), but if that becomes a problem, a WARN_ON() could be added:

	if (ktime_before(oldtime, invalid_before) &&
	    cmpxchg(&use_entry->time, oldtime, now) == oldtime) {
		use_entry->frame1 = frame1;
		use_entry->frame2 = frame2;
		WARN_ON_ONCE(use_entry->time != now);
	} else {
		// Too bad, next duplicate report won't be suppressed.
	}

So what am I missing here?

							Thanx, Paul

> Thanks,
> -- Marco
> 
> >                                                         Thanx, Paul
> >
> > >  kernel/kcsan/core.c   |  15 +++--
> > >  kernel/kcsan/kcsan.h  |   2 +-
> > >  kernel/kcsan/report.c | 153 +++++++++++++++++++++++++++++++++++-------
> > >  lib/Kconfig.kcsan     |  10 +++
> > >  4 files changed, 148 insertions(+), 32 deletions(-)
> > >
> > > --
> > > 2.25.0.rc1.283.g88dfdc4193-goog
> > >
> >
> > --
> > You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20200109162739.GS13449%40paulmck-ThinkPad-P72.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 0/2] kcsan: Improvements to reporting
  2020-01-09 17:31     ` Paul E. McKenney
@ 2020-01-09 17:42       ` Marco Elver
  2020-01-09 20:46         ` Paul E. McKenney
  0 siblings, 1 reply; 11+ messages in thread
From: Marco Elver @ 2020-01-09 17:42 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Andrey Konovalov, Alexander Potapenko, Dmitry Vyukov, kasan-dev, LKML

On Thu, 9 Jan 2020 at 18:31, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Thu, Jan 09, 2020 at 06:03:39PM +0100, Marco Elver wrote:
> > On Thu, 9 Jan 2020 at 17:27, Paul E. McKenney <paulmck@kernel.org> wrote:
> > >
> > > On Thu, Jan 09, 2020 at 04:23:20PM +0100, Marco Elver wrote:
> > > > Improvements to KCSAN data race reporting:
> > > > 1. Show if access is marked (*_ONCE, atomic, etc.).
> > > > 2. Rate limit reporting to avoid spamming console.
> > > >
> > > > Marco Elver (2):
> > > >   kcsan: Show full access type in report
> > > >   kcsan: Rate-limit reporting per data races
> > >
> > > Queued and pushed, thank you!  I edited the commit logs a bit, so could
> > > you please check to make sure that I didn't mess anything up?
> >
> > Looks good to me, thank you.
> >
> > > At some point, boot-time-allocated per-CPU arrays might be needed to
> > > avoid contention on large systems, but one step at a time.  ;-)
> >
> > I certainly hope the rate of fixing/avoiding data races will not be
> > eclipsed by the rate at which new ones are introduced. :-)
>
> Me too!
>
> However, on a large system, duplicate reports might happen quite
> frequently, which might cause slowdowns given the single global
> array.  Or maybe not -- I guess we will find out soon enough. ;-)
>
> But I must confess that I am missing how concurrent access to the
> report_times[] array is handled.  I would have expected that
> rate_limit_report() would choose a random starting entry and
> search circularly.  And I would expect that the code at the end
> of that function would instead look something like this:
>
>         if (ktime_before(oldtime, invalid_before) &&
>             cmpxchg(&use_entry->time, oldtime, now) == oldtime) {
>                 use_entry->frame1 = frame1;
>                 use_entry->frame2 = frame2;
>         } else {
>                 // Too bad, next duplicate report won't be suppressed.
>         }
>
> Where "oldtime" is captured from the entry during the scan, and from the
> first entry scanned.  This cmpxchg() approach is of course vulnerable
> to the ->frame1 and ->frame2 assignments taking more than three seconds
> (by default), but if that becomes a problem, a WARN_ON() could be added:
>
>         if (ktime_before(oldtime, invalid_before) &&
>             cmpxchg(&use_entry->time, oldtime, now) == oldtime) {
>                 use_entry->frame1 = frame1;
>                 use_entry->frame2 = frame2;
>                 WARN_ON_ONCE(use_entry->time != now);
>         } else {
>                 // Too bad, next duplicate report won't be suppressed.
>         }
>
> So what am I missing here?

Ah right, sorry, I should have clarified or commented in the code that
all of this is happening under 'report_lock' (taken in prepare_report,
held in print_report->rate_limit_report, released in release_report).
That also means that any optimization here won't matter until
report_lock is removed.

Thanks,
-- Marco

>                                                         Thanx, Paul
>
> > Thanks,
> > -- Marco
> >
> > >                                                         Thanx, Paul
> > >
> > > >  kernel/kcsan/core.c   |  15 +++--
> > > >  kernel/kcsan/kcsan.h  |   2 +-
> > > >  kernel/kcsan/report.c | 153 +++++++++++++++++++++++++++++++++++-------
> > > >  lib/Kconfig.kcsan     |  10 +++
> > > >  4 files changed, 148 insertions(+), 32 deletions(-)
> > > >
> > > > --
> > > > 2.25.0.rc1.283.g88dfdc4193-goog
> > > >
> > >
> > > --
> > > You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> > > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> > > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20200109162739.GS13449%40paulmck-ThinkPad-P72.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 0/2] kcsan: Improvements to reporting
  2020-01-09 17:42       ` Marco Elver
@ 2020-01-09 20:46         ` Paul E. McKenney
  0 siblings, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2020-01-09 20:46 UTC (permalink / raw)
  To: Marco Elver
  Cc: Andrey Konovalov, Alexander Potapenko, Dmitry Vyukov, kasan-dev, LKML

On Thu, Jan 09, 2020 at 06:42:16PM +0100, Marco Elver wrote:
> On Thu, 9 Jan 2020 at 18:31, Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Thu, Jan 09, 2020 at 06:03:39PM +0100, Marco Elver wrote:
> > > On Thu, 9 Jan 2020 at 17:27, Paul E. McKenney <paulmck@kernel.org> wrote:
> > > >
> > > > On Thu, Jan 09, 2020 at 04:23:20PM +0100, Marco Elver wrote:
> > > > > Improvements to KCSAN data race reporting:
> > > > > 1. Show if access is marked (*_ONCE, atomic, etc.).
> > > > > 2. Rate limit reporting to avoid spamming console.
> > > > >
> > > > > Marco Elver (2):
> > > > >   kcsan: Show full access type in report
> > > > >   kcsan: Rate-limit reporting per data races
> > > >
> > > > Queued and pushed, thank you!  I edited the commit logs a bit, so could
> > > > you please check to make sure that I didn't mess anything up?
> > >
> > > Looks good to me, thank you.
> > >
> > > > At some point, boot-time-allocated per-CPU arrays might be needed to
> > > > avoid contention on large systems, but one step at a time.  ;-)
> > >
> > > I certainly hope the rate of fixing/avoiding data races will not be
> > > eclipsed by the rate at which new ones are introduced. :-)
> >
> > Me too!
> >
> > However, on a large system, duplicate reports might happen quite
> > frequently, which might cause slowdowns given the single global
> > array.  Or maybe not -- I guess we will find out soon enough. ;-)
> >
> > But I must confess that I am missing how concurrent access to the
> > report_times[] array is handled.  I would have expected that
> > rate_limit_report() would choose a random starting entry and
> > search circularly.  And I would expect that the code at the end
> > of that function would instead look something like this:
> >
> >         if (ktime_before(oldtime, invalid_before) &&
> >             cmpxchg(&use_entry->time, oldtime, now) == oldtime) {
> >                 use_entry->frame1 = frame1;
> >                 use_entry->frame2 = frame2;
> >         } else {
> >                 // Too bad, next duplicate report won't be suppressed.
> >         }
> >
> > Where "oldtime" is captured from the entry during the scan, and from the
> > first entry scanned.  This cmpxchg() approach is of course vulnerable
> > to the ->frame1 and ->frame2 assignments taking more than three seconds
> > (by default), but if that becomes a problem, a WARN_ON() could be added:
> >
> >         if (ktime_before(oldtime, invalid_before) &&
> >             cmpxchg(&use_entry->time, oldtime, now) == oldtime) {
> >                 use_entry->frame1 = frame1;
> >                 use_entry->frame2 = frame2;
> >                 WARN_ON_ONCE(use_entry->time != now);
> >         } else {
> >                 // Too bad, next duplicate report won't be suppressed.
> >         }
> >
> > So what am I missing here?
> 
> Ah right, sorry, I should have clarified or commented in the code that
> all of this is happening under 'report_lock' (taken in prepare_report,
> held in print_report->rate_limit_report, released in release_report).
> That also means that any optimization here won't matter until
> report_lock is removed.

Got it, thank you!  And yes, lock contention on report_lock might be
a problem on large systems.  But let's see how it goes.

							Thanx, Paul

> Thanks,
> -- Marco
> 
> >                                                         Thanx, Paul
> >
> > > Thanks,
> > > -- Marco
> > >
> > > >                                                         Thanx, Paul
> > > >
> > > > >  kernel/kcsan/core.c   |  15 +++--
> > > > >  kernel/kcsan/kcsan.h  |   2 +-
> > > > >  kernel/kcsan/report.c | 153 +++++++++++++++++++++++++++++++++++-------
> > > > >  lib/Kconfig.kcsan     |  10 +++
> > > > >  4 files changed, 148 insertions(+), 32 deletions(-)
> > > > >
> > > > > --
> > > > > 2.25.0.rc1.283.g88dfdc4193-goog
> > > > >
> > > >
> > > > --
> > > > You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> > > > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> > > > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20200109162739.GS13449%40paulmck-ThinkPad-P72.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races
  2020-01-09 15:23 ` [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races Marco Elver
@ 2020-01-10 18:20   ` Marco Elver
  2020-01-10 18:54     ` Marco Elver
  0 siblings, 1 reply; 11+ messages in thread
From: Marco Elver @ 2020-01-10 18:20 UTC (permalink / raw)
  To: Marco Elver
  Cc: Paul E. McKenney, Andrey Konovalov, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, LKML, Qian Cai

On Thu, 9 Jan 2020 at 16:23, Marco Elver <elver@google.com> wrote:
>
> Adds support for rate limiting reports. This uses a time based rate
> limit, that limits any given data race report to no more than one in a
> fixed time window (default is 3 sec). This should prevent the console
> from being spammed with data race reports, that would render the system
> unusable.
>
> The implementation assumes that unique data races and the rate at which
> they occur is bounded, since we cannot store arbitrarily many past data
> race report information: we use a fixed-size array to store the required
> information. We cannot use kmalloc/krealloc and resize the list when
> needed, as reporting is triggered by the instrumentation calls; to
> permit using KCSAN on the allocators, we cannot (re-)allocate any memory
> during report generation (data races in the allocators lead to
> deadlock).
>
> Reported-by: Qian Cai <cai@lca.pw>
> Suggested-by: Paul E. McKenney <paulmck@kernel.org>
> Signed-off-by: Marco Elver <elver@google.com>
> ---
>  kernel/kcsan/report.c | 112 ++++++++++++++++++++++++++++++++++++++----
>  lib/Kconfig.kcsan     |  10 ++++
>  2 files changed, 112 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
> index 9f503ca2ff7a..e324af7d14c9 100644
> --- a/kernel/kcsan/report.c
> +++ b/kernel/kcsan/report.c
> @@ -1,6 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
>
>  #include <linux/kernel.h>
> +#include <linux/ktime.h>
>  #include <linux/preempt.h>
>  #include <linux/printk.h>
>  #include <linux/sched.h>
> @@ -31,12 +32,101 @@ static struct {
>         int                     num_stack_entries;
>  } other_info = { .ptr = NULL };
>
> +/*
> + * Information about reported data races; used to rate limit reporting.
> + */
> +struct report_time {
> +       /*
> +        * The last time the data race was reported.
> +        */
> +       ktime_t time;
> +
> +       /*
> +        * The frames of the 2 threads; if only 1 thread is known, one frame
> +        * will be 0.
> +        */
> +       unsigned long frame1;
> +       unsigned long frame2;
> +};
> +
> +/*
> + * Since we also want to be able to debug allocators with KCSAN, to avoid
> + * deadlock, report_times cannot be dynamically resized with krealloc in
> + * rate_limit_report.
> + *
> + * Therefore, we use a fixed-size array, which at most will occupy a page. This
> + * still adequately rate limits reports, assuming that a) number of unique data
> + * races is not excessive, and b) occurrence of unique data races within the
> + * same time window is limited.
> + */
> +#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
> +#define REPORT_TIMES_SIZE                                                      \
> +       (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
> +                REPORT_TIMES_MAX :                                            \
> +                CONFIG_KCSAN_REPORT_ONCE_IN_MS)
> +static struct report_time report_times[REPORT_TIMES_SIZE];
> +
>  /*
>   * This spinlock protects reporting and other_info, since other_info is usually
>   * required when reporting.
>   */
>  static DEFINE_SPINLOCK(report_lock);
>
> +/*
> + * Checks if the data race identified by thread frames frame1 and frame2 has
> + * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
> + */
> +static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
> +{
> +       struct report_time *use_entry = &report_times[0];
> +       ktime_t now;
> +       ktime_t invalid_before;
> +       int i;
> +
> +       BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
> +
> +       if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
> +               return false;
> +
> +       now = ktime_get();
> +       invalid_before = ktime_sub_ms(now, CONFIG_KCSAN_REPORT_ONCE_IN_MS);

Been thinking about this a bit more, and wondering if we should just
use jiffies here?  Don't think we need the precision.

Thanks,
-- Marco

> +       /* Check if a matching data race report exists. */
> +       for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
> +               struct report_time *rt = &report_times[i];
> +
> +               /*
> +                * Must always select an entry for use to store info as we
> +                * cannot resize report_times; at the end of the scan, use_entry
> +                * will be the oldest entry, which ideally also happened before
> +                * KCSAN_REPORT_ONCE_IN_MS ago.
> +                */
> +               if (ktime_before(rt->time, use_entry->time))
> +                       use_entry = rt;
> +
> +               /*
> +                * Initially, no need to check any further as this entry as well
> +                * as following entries have never been used.
> +                */
> +               if (rt->time == 0)
> +                       break;
> +
> +               /* Check if entry expired. */
> +               if (ktime_before(rt->time, invalid_before))
> +                       continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
> +
> +               /* Reported recently, check if data race matches. */
> +               if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
> +                   (rt->frame1 == frame2 && rt->frame2 == frame1))
> +                       return true;
> +       }
> +
> +       use_entry->time = now;
> +       use_entry->frame1 = frame1;
> +       use_entry->frame2 = frame2;
> +       return false;
> +}
> +
>  /*
>   * Special rules to skip reporting.
>   */
> @@ -132,7 +222,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
>         unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
>         int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
>         int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
> -       int other_skipnr;
> +       unsigned long this_frame = stack_entries[skipnr];
> +       unsigned long other_frame = 0;
> +       int other_skipnr = 0; /* silence uninit warnings */
>
>         /*
>          * Must check report filter rules before starting to print.
> @@ -143,34 +235,34 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type,
>         if (type == KCSAN_REPORT_RACE_SIGNAL) {
>                 other_skipnr = get_stack_skipnr(other_info.stack_entries,
>                                                 other_info.num_stack_entries);
> +               other_frame = other_info.stack_entries[other_skipnr];
>
>                 /* @value_change is only known for the other thread */
> -               if (skip_report(other_info.access_type, value_change,
> -                               other_info.stack_entries[other_skipnr]))
> +               if (skip_report(other_info.access_type, value_change, other_frame))
>                         return false;
>         }
>
> +       if (rate_limit_report(this_frame, other_frame))
> +               return false;
> +
>         /* Print report header. */
>         pr_err("==================================================================\n");
>         switch (type) {
>         case KCSAN_REPORT_RACE_SIGNAL: {
> -               void *this_fn = (void *)stack_entries[skipnr];
> -               void *other_fn = (void *)other_info.stack_entries[other_skipnr];
>                 int cmp;
>
>                 /*
>                  * Order functions lexographically for consistent bug titles.
>                  * Do not print offset of functions to keep title short.
>                  */
> -               cmp = sym_strcmp(other_fn, this_fn);
> +               cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
>                 pr_err("BUG: KCSAN: data-race in %ps / %ps\n",
> -                      cmp < 0 ? other_fn : this_fn,
> -                      cmp < 0 ? this_fn : other_fn);
> +                      (void *)(cmp < 0 ? other_frame : this_frame),
> +                      (void *)(cmp < 0 ? this_frame : other_frame));
>         } break;
>
>         case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
> -               pr_err("BUG: KCSAN: data-race in %pS\n",
> -                      (void *)stack_entries[skipnr]);
> +               pr_err("BUG: KCSAN: data-race in %pS\n", (void *)this_frame);
>                 break;
>
>         default:
> diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
> index 3f78b1434375..3552990abcfe 100644
> --- a/lib/Kconfig.kcsan
> +++ b/lib/Kconfig.kcsan
> @@ -81,6 +81,16 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
>           KCSAN_WATCH_SKIP. If false, the chosen value is always
>           KCSAN_WATCH_SKIP.
>
> +config KCSAN_REPORT_ONCE_IN_MS
> +       int "Duration in milliseconds, in which any given data race is only reported once"
> +       default 3000
> +       help
> +         Any given data race is only reported once in the defined time window.
> +         Different data races may still generate reports within a duration
> +         that is smaller than the duration defined here. This allows rate
> +         limiting reporting to avoid flooding the console with reports.
> +         Setting this to 0 disables rate limiting.
> +
>  # Note that, while some of the below options could be turned into boot
>  # parameters, to optimize for the common use-case, we avoid this because: (a)
>  # it would impact performance (and we want to avoid static branch for all
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races
  2020-01-10 18:20   ` Marco Elver
@ 2020-01-10 18:54     ` Marco Elver
  2020-01-11  5:13       ` Paul E. McKenney
  0 siblings, 1 reply; 11+ messages in thread
From: Marco Elver @ 2020-01-10 18:54 UTC (permalink / raw)
  To: Marco Elver
  Cc: Paul E. McKenney, Andrey Konovalov, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, LKML, Qian Cai

On Fri, 10 Jan 2020 at 19:20, Marco Elver <elver@google.com> wrote:
>
> On Thu, 9 Jan 2020 at 16:23, Marco Elver <elver@google.com> wrote:
> >
> > Adds support for rate limiting reports. This uses a time based rate
> > limit, that limits any given data race report to no more than one in a
> > fixed time window (default is 3 sec). This should prevent the console
> > from being spammed with data race reports, that would render the system
> > unusable.
> >
> > The implementation assumes that unique data races and the rate at which
> > they occur is bounded, since we cannot store arbitrarily many past data
> > race report information: we use a fixed-size array to store the required
> > information. We cannot use kmalloc/krealloc and resize the list when
> > needed, as reporting is triggered by the instrumentation calls; to
> > permit using KCSAN on the allocators, we cannot (re-)allocate any memory
> > during report generation (data races in the allocators lead to
> > deadlock).
> >
> > Reported-by: Qian Cai <cai@lca.pw>
> > Suggested-by: Paul E. McKenney <paulmck@kernel.org>
> > Signed-off-by: Marco Elver <elver@google.com>
> > ---
> >  kernel/kcsan/report.c | 112 ++++++++++++++++++++++++++++++++++++++----
> >  lib/Kconfig.kcsan     |  10 ++++
> >  2 files changed, 112 insertions(+), 10 deletions(-)
> >
> > diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
> > index 9f503ca2ff7a..e324af7d14c9 100644
> > --- a/kernel/kcsan/report.c
> > +++ b/kernel/kcsan/report.c
> > @@ -1,6 +1,7 @@
> >  // SPDX-License-Identifier: GPL-2.0
> >
> >  #include <linux/kernel.h>
> > +#include <linux/ktime.h>
> >  #include <linux/preempt.h>
> >  #include <linux/printk.h>
> >  #include <linux/sched.h>
> > @@ -31,12 +32,101 @@ static struct {
> >         int                     num_stack_entries;
> >  } other_info = { .ptr = NULL };
> >
> > +/*
> > + * Information about reported data races; used to rate limit reporting.
> > + */
> > +struct report_time {
> > +       /*
> > +        * The last time the data race was reported.
> > +        */
> > +       ktime_t time;
> > +
> > +       /*
> > +        * The frames of the 2 threads; if only 1 thread is known, one frame
> > +        * will be 0.
> > +        */
> > +       unsigned long frame1;
> > +       unsigned long frame2;
> > +};
> > +
> > +/*
> > + * Since we also want to be able to debug allocators with KCSAN, to avoid
> > + * deadlock, report_times cannot be dynamically resized with krealloc in
> > + * rate_limit_report.
> > + *
> > + * Therefore, we use a fixed-size array, which at most will occupy a page. This
> > + * still adequately rate limits reports, assuming that a) number of unique data
> > + * races is not excessive, and b) occurrence of unique data races within the
> > + * same time window is limited.
> > + */
> > +#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
> > +#define REPORT_TIMES_SIZE                                                      \
> > +       (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
> > +                REPORT_TIMES_MAX :                                            \
> > +                CONFIG_KCSAN_REPORT_ONCE_IN_MS)
> > +static struct report_time report_times[REPORT_TIMES_SIZE];
> > +
> >  /*
> >   * This spinlock protects reporting and other_info, since other_info is usually
> >   * required when reporting.
> >   */
> >  static DEFINE_SPINLOCK(report_lock);
> >
> > +/*
> > + * Checks if the data race identified by thread frames frame1 and frame2 has
> > + * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
> > + */
> > +static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
> > +{
> > +       struct report_time *use_entry = &report_times[0];
> > +       ktime_t now;
> > +       ktime_t invalid_before;
> > +       int i;
> > +
> > +       BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
> > +
> > +       if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
> > +               return false;
> > +
> > +       now = ktime_get();
> > +       invalid_before = ktime_sub_ms(now, CONFIG_KCSAN_REPORT_ONCE_IN_MS);
>
> Been thinking about this a bit more, and wondering if we should just
> use jiffies here?  Don't think we need the precision.

Sent v2: http://lkml.kernel.org/r/20200110184834.192636-1-elver@google.com
I think it's also safer to use jiffies, as noted in the v2 patch.

Paul: sorry for sending v2, seeing you already had these in your tree.
Hope this is ok.

Thanks,
-- Marco

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races
  2020-01-10 18:54     ` Marco Elver
@ 2020-01-11  5:13       ` Paul E. McKenney
  0 siblings, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2020-01-11  5:13 UTC (permalink / raw)
  To: Marco Elver
  Cc: Andrey Konovalov, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	LKML, Qian Cai

On Fri, Jan 10, 2020 at 07:54:09PM +0100, Marco Elver wrote:
> On Fri, 10 Jan 2020 at 19:20, Marco Elver <elver@google.com> wrote:
> >
> > On Thu, 9 Jan 2020 at 16:23, Marco Elver <elver@google.com> wrote:
> > >
> > > Adds support for rate limiting reports. This uses a time based rate
> > > limit, that limits any given data race report to no more than one in a
> > > fixed time window (default is 3 sec). This should prevent the console
> > > from being spammed with data race reports, that would render the system
> > > unusable.
> > >
> > > The implementation assumes that unique data races and the rate at which
> > > they occur is bounded, since we cannot store arbitrarily many past data
> > > race report information: we use a fixed-size array to store the required
> > > information. We cannot use kmalloc/krealloc and resize the list when
> > > needed, as reporting is triggered by the instrumentation calls; to
> > > permit using KCSAN on the allocators, we cannot (re-)allocate any memory
> > > during report generation (data races in the allocators lead to
> > > deadlock).
> > >
> > > Reported-by: Qian Cai <cai@lca.pw>
> > > Suggested-by: Paul E. McKenney <paulmck@kernel.org>
> > > Signed-off-by: Marco Elver <elver@google.com>
> > > ---
> > >  kernel/kcsan/report.c | 112 ++++++++++++++++++++++++++++++++++++++----
> > >  lib/Kconfig.kcsan     |  10 ++++
> > >  2 files changed, 112 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
> > > index 9f503ca2ff7a..e324af7d14c9 100644
> > > --- a/kernel/kcsan/report.c
> > > +++ b/kernel/kcsan/report.c
> > > @@ -1,6 +1,7 @@
> > >  // SPDX-License-Identifier: GPL-2.0
> > >
> > >  #include <linux/kernel.h>
> > > +#include <linux/ktime.h>
> > >  #include <linux/preempt.h>
> > >  #include <linux/printk.h>
> > >  #include <linux/sched.h>
> > > @@ -31,12 +32,101 @@ static struct {
> > >         int                     num_stack_entries;
> > >  } other_info = { .ptr = NULL };
> > >
> > > +/*
> > > + * Information about reported data races; used to rate limit reporting.
> > > + */
> > > +struct report_time {
> > > +       /*
> > > +        * The last time the data race was reported.
> > > +        */
> > > +       ktime_t time;
> > > +
> > > +       /*
> > > +        * The frames of the 2 threads; if only 1 thread is known, one frame
> > > +        * will be 0.
> > > +        */
> > > +       unsigned long frame1;
> > > +       unsigned long frame2;
> > > +};
> > > +
> > > +/*
> > > + * Since we also want to be able to debug allocators with KCSAN, to avoid
> > > + * deadlock, report_times cannot be dynamically resized with krealloc in
> > > + * rate_limit_report.
> > > + *
> > > + * Therefore, we use a fixed-size array, which at most will occupy a page. This
> > > + * still adequately rate limits reports, assuming that a) number of unique data
> > > + * races is not excessive, and b) occurrence of unique data races within the
> > > + * same time window is limited.
> > > + */
> > > +#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
> > > +#define REPORT_TIMES_SIZE                                                      \
> > > +       (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
> > > +                REPORT_TIMES_MAX :                                            \
> > > +                CONFIG_KCSAN_REPORT_ONCE_IN_MS)
> > > +static struct report_time report_times[REPORT_TIMES_SIZE];
> > > +
> > >  /*
> > >   * This spinlock protects reporting and other_info, since other_info is usually
> > >   * required when reporting.
> > >   */
> > >  static DEFINE_SPINLOCK(report_lock);
> > >
> > > +/*
> > > + * Checks if the data race identified by thread frames frame1 and frame2 has
> > > + * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
> > > + */
> > > +static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
> > > +{
> > > +       struct report_time *use_entry = &report_times[0];
> > > +       ktime_t now;
> > > +       ktime_t invalid_before;
> > > +       int i;
> > > +
> > > +       BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
> > > +
> > > +       if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
> > > +               return false;
> > > +
> > > +       now = ktime_get();
> > > +       invalid_before = ktime_sub_ms(now, CONFIG_KCSAN_REPORT_ONCE_IN_MS);
> >
> > Been thinking about this a bit more, and wondering if we should just
> > use jiffies here?  Don't think we need the precision.
> 
> Sent v2: http://lkml.kernel.org/r/20200110184834.192636-1-elver@google.com
> I think it's also safer to use jiffies, as noted in the v2 patch.
> 
> Paul: sorry for sending v2, seeing you already had these in your tree.
> Hope this is ok.

Not a problem!  Pulling in the replacements shortly.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-01-11  5:13 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-09 15:23 [PATCH -rcu 0/2] kcsan: Improvements to reporting Marco Elver
2020-01-09 15:23 ` [PATCH -rcu 1/2] kcsan: Show full access type in report Marco Elver
2020-01-09 15:23 ` [PATCH -rcu 2/2] kcsan: Rate-limit reporting per data races Marco Elver
2020-01-10 18:20   ` Marco Elver
2020-01-10 18:54     ` Marco Elver
2020-01-11  5:13       ` Paul E. McKenney
2020-01-09 16:27 ` [PATCH -rcu 0/2] kcsan: Improvements to reporting Paul E. McKenney
2020-01-09 17:03   ` Marco Elver
2020-01-09 17:31     ` Paul E. McKenney
2020-01-09 17:42       ` Marco Elver
2020-01-09 20:46         ` Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.