From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Steven Rostedt <rostedt@goodmis.org>,
"Paul E. McKenney" <paulmck@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Stefan Metzmacher <metze@samba.org>
Cc: linux-kernel@vger.kernel.org,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Subject: [RFC PATCH 2/3] Fix: tracepoint: static call function vs data state mismatch
Date: Tue, 27 Jul 2021 11:06:12 -0400 [thread overview]
Message-ID: <20210727150613.20886-3-mathieu.desnoyers@efficios.com> (raw)
In-Reply-To: <20210727150613.20886-1-mathieu.desnoyers@efficios.com>
On a 1->0->1 callbacks transition, there is an issue with the new
callback using the old callback's data.
Considering __DO_TRACE_CALL:
do { \
struct tracepoint_func *it_func_ptr; \
void *__data; \
it_func_ptr = \
rcu_dereference_raw((&__tracepoint_##name)->funcs); \
if (it_func_ptr) { \
__data = (it_func_ptr)->data; \
----> [ delayed here on one CPU (e.g. vcpu preempted by the host) ]
static_call(tp_func_##name)(__data, args); \
} \
} while (0)
It has loaded the tp->funcs of the old callback, so it will try to use the old
data. This can be fixed by adding a RCU sync anywhere in the 1->0->1
transition chain.
On a N->2->1 transition, we need an rcu-sync because you may have a
sequence of 3->2->1 (or 1->2->1) where the element 0 data is unchanged
between 2->1, but was changed from 3->2 (or from 1->2), which may be
observed by the static call. This can be fixed by adding an
unconditional RCU sync in transition 2->1.
A follow up fix will introduce a more lightweight scheme based on RCU
get_state and cond_sync.
[ Build tested only. ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
kernel/tracepoint.c | 100 +++++++++++++++++++++++++++++++++++---------
1 file changed, 80 insertions(+), 20 deletions(-)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 133b6454b287..a85e7dc8b490 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -15,6 +15,13 @@
#include <linux/sched/task.h>
#include <linux/static_key.h>
+enum tp_func_state {
+ TP_FUNC_0,
+ TP_FUNC_1,
+ TP_FUNC_2,
+ TP_FUNC_N,
+};
+
extern tracepoint_ptr_t __start___tracepoints_ptrs[];
extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
@@ -246,26 +253,29 @@ static void *func_remove(struct tracepoint_func **funcs,
return old;
}
-static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync)
+/*
+ * Count the number of functions (enum tp_func_state) in a tp_funcs array.
+ */
+static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs)
+{
+ if (!tp_funcs)
+ return TP_FUNC_0;
+ if (!tp_funcs[1].func)
+ return TP_FUNC_1;
+ if (!tp_funcs[2].func)
+ return TP_FUNC_2;
+ return TP_FUNC_N; /* 3 or more */
+}
+
+static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs)
{
void *func = tp->iterator;
/* Synthetic events do not have static call sites */
if (!tp->static_call_key)
return;
-
- if (!tp_funcs[1].func) {
+ if (nr_func_state(tp_funcs) == TP_FUNC_1)
func = tp_funcs[0].func;
- /*
- * If going from the iterator back to a single caller,
- * we need to synchronize with __DO_TRACE to make sure
- * that the data passed to the callback is the one that
- * belongs to that callback.
- */
- if (sync)
- tracepoint_synchronize_unregister();
- }
-
__static_call_update(tp->static_call_key, tp->static_call_tramp, func);
}
@@ -299,9 +309,31 @@ static int tracepoint_add_func(struct tracepoint *tp,
* a pointer to it. This array is referenced by __DO_TRACE from
* include/linux/tracepoint.h using rcu_dereference_sched().
*/
- tracepoint_update_call(tp, tp_funcs, false);
- rcu_assign_pointer(tp->funcs, tp_funcs);
- static_key_enable(&tp->key);
+ switch (nr_func_state(tp_funcs)) {
+ case TP_FUNC_1: /* 0->1 */
+ /* Set static call to first function */
+ tracepoint_update_call(tp, tp_funcs);
+ /* Both iterator and static call handle NULL tp->funcs */
+ rcu_assign_pointer(tp->funcs, tp_funcs);
+ static_key_enable(&tp->key);
+ break;
+ case TP_FUNC_2: /* 1->2 */
+ /* Set iterator static call */
+ tracepoint_update_call(tp, tp_funcs);
+ /*
+ * Iterator callback installed before updating tp->funcs.
+ * Requires ordering between RCU assign/dereference and
+ * static call update/call.
+ */
+ rcu_assign_pointer(tp->funcs, tp_funcs);
+ break;
+ case TP_FUNC_N: /* N->N+1 (N>1) */
+ rcu_assign_pointer(tp->funcs, tp_funcs);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
release_probes(old);
return 0;
@@ -328,17 +360,45 @@ static int tracepoint_remove_func(struct tracepoint *tp,
/* Failed allocating new tp_funcs, replaced func with stub */
return 0;
- if (!tp_funcs) {
+ switch (nr_func_state(tp_funcs)) {
+ case TP_FUNC_0: /* 1->0 */
/* Removed last function */
if (tp->unregfunc && static_key_enabled(&tp->key))
tp->unregfunc();
static_key_disable(&tp->key);
+ /* Set iterator static call */
+ tracepoint_update_call(tp, tp_funcs);
+ /* Both iterator and static call handle NULL tp->funcs */
+ rcu_assign_pointer(tp->funcs, NULL);
+ /*
+ * Make sure new func never uses old data after a 1->0->1
+ * transition sequence.
+ * Considering that transition 0->1 is the common case
+ * and don't have rcu-sync, issue rcu-sync after
+ * transition 1->0 to break that sequence by waiting for
+ * readers to be quiescent.
+ */
+ tracepoint_synchronize_unregister();
+ break;
+ case TP_FUNC_1: /* 2->1 */
rcu_assign_pointer(tp->funcs, tp_funcs);
- } else {
+ /*
+ * On 2->1 transition, RCU sync is needed before setting
+ * static call to first callback, because the observer
+ * may have loaded any prior tp->funcs after the last one
+ * associated with an rcu-sync.
+ */
+ tracepoint_synchronize_unregister();
+ /* Set static call to first function */
+ tracepoint_update_call(tp, tp_funcs);
+ break;
+ case TP_FUNC_N: /* N->N-1 (N>2) */
rcu_assign_pointer(tp->funcs, tp_funcs);
- tracepoint_update_call(tp, tp_funcs,
- tp_funcs[0].data != old[0].data);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
release_probes(old);
return 0;
--
2.20.1
next prev parent reply other threads:[~2021-07-27 15:06 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-27 15:06 [RFC PATCH 0/3] tracepoint static call fixes Mathieu Desnoyers
2021-07-27 15:06 ` [RFC PATCH 1/3] Fix: tracepoint: static call: compare data on transition from 2->1 callees Mathieu Desnoyers
2021-07-27 15:06 ` Mathieu Desnoyers [this message]
2021-07-27 15:06 ` [RFC PATCH 3/3] Fix: tracepoint: rcu get state and cond sync for static call updates Mathieu Desnoyers
2021-07-27 16:18 ` Paul E. McKenney
2021-07-27 19:13 ` [RFC PATCH v2] " Mathieu Desnoyers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210727150613.20886-3-mathieu.desnoyers@efficios.com \
--to=mathieu.desnoyers@efficios.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=metze@samba.org \
--cc=mingo@redhat.com \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).