bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/2] bpf: fix use of trace_printk() in BPF
@ 2020-07-03 14:44 Alan Maguire
  2020-07-03 14:44 ` [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk() Alan Maguire
  2020-07-03 14:44 ` [PATCH bpf-next 2/2] selftests/bpf: add selftests verifying bpf_trace_printk() behaviour Alan Maguire
  0 siblings, 2 replies; 7+ messages in thread
From: Alan Maguire @ 2020-07-03 14:44 UTC (permalink / raw)
  To: rostedt, mingo, ast, daniel
  Cc: kafai, songliubraving, yhs, andriin, john.fastabend, kpsingh,
	linux-kernel, netdev, bpf, Alan Maguire

Steven suggested a way to resolve the appearance of the warning banner
that appears as a result of using trace_printk() in BPF [1].
Applying the patch and testing reveals all works as expected; we
can call bpf_trace_printk() and see the trace messages in
/sys/kernel/debug/tracing/trace_pipe and no banner message appears.

Also add a test prog to verify basic bpf_trace_printk() helper behaviour.

Possible future work: ftrace supports trace instances, and one thing
that strikes me is that we could make use of these in BPF to separate
BPF program bpf_trace_printk() output from output of other tracing
activities.

I was thinking something like a sysctl net.core.bpf_trace_instance,
defaulting to an empty value signifying we use the root trace
instance.  This would preserve existing behaviour while giving a
way to separate BPF tracing output from other tracing output if wanted.

[1]  https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home

Alan Maguire (2):
  bpf: use dedicated bpf_trace_printk event instead of trace_printk()
  selftests/bpf: add selftests verifying bpf_trace_printk() behaviour

 kernel/trace/Makefile                              |  2 +
 kernel/trace/bpf_trace.c                           | 41 +++++++++++--
 kernel/trace/bpf_trace.h                           | 34 +++++++++++
 .../selftests/bpf/prog_tests/trace_printk.c        | 71 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/trace_printk.c   | 21 +++++++
 5 files changed, 165 insertions(+), 4 deletions(-)
 create mode 100644 kernel/trace/bpf_trace.h
 create mode 100644 tools/testing/selftests/bpf/prog_tests/trace_printk.c
 create mode 100644 tools/testing/selftests/bpf/progs/trace_printk.c

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk()
  2020-07-03 14:44 [PATCH bpf-next 0/2] bpf: fix use of trace_printk() in BPF Alan Maguire
@ 2020-07-03 14:44 ` Alan Maguire
  2020-07-08  5:56   ` Andrii Nakryiko
  2020-07-09 21:41   ` Steven Rostedt
  2020-07-03 14:44 ` [PATCH bpf-next 2/2] selftests/bpf: add selftests verifying bpf_trace_printk() behaviour Alan Maguire
  1 sibling, 2 replies; 7+ messages in thread
From: Alan Maguire @ 2020-07-03 14:44 UTC (permalink / raw)
  To: rostedt, mingo, ast, daniel
  Cc: kafai, songliubraving, yhs, andriin, john.fastabend, kpsingh,
	linux-kernel, netdev, bpf, Alan Maguire

The bpf helper bpf_trace_printk() uses trace_printk() under the hood.
This leads to an alarming warning message originating from trace
buffer allocation which occurs the first time a program using
bpf_trace_printk() is loaded.

We can instead create a trace event for bpf_trace_printk() and enable
it in-kernel when/if we encounter a program using the
bpf_trace_printk() helper.  With this approach, trace_printk()
is not used directly and no warning message appears.

This work was started by Steven (see Link) and finished by Alan; added
Steven's Signed-off-by with his permission.

Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 kernel/trace/Makefile    |  2 ++
 kernel/trace/bpf_trace.c | 41 +++++++++++++++++++++++++++++++++++++----
 kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 4 deletions(-)
 create mode 100644 kernel/trace/bpf_trace.h

diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 6575bb0..aeba5ee 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -31,6 +31,8 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE
 GCOV_PROFILE := y
 endif
 
+CFLAGS_bpf_trace.o := -I$(src)
+
 CFLAGS_trace_benchmark.o := -I$(src)
 CFLAGS_trace_events_filter.o := -I$(src)
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1d874d8..cdbafc4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2,6 +2,10 @@
 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
  * Copyright (c) 2016 Facebook
  */
+#define CREATE_TRACE_POINTS
+
+#include "bpf_trace.h"
+
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -11,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/kprobes.h>
+#include <linux/spinlock.h>
 #include <linux/syscalls.h>
 #include <linux/error-injection.h>
 
@@ -374,6 +379,28 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 	}
 }
 
+static DEFINE_SPINLOCK(trace_printk_lock);
+
+#define BPF_TRACE_PRINTK_SIZE   1024
+
+static inline int bpf_do_trace_printk(const char *fmt, ...)
+{
+	static char buf[BPF_TRACE_PRINTK_SIZE];
+	unsigned long flags;
+	va_list ap;
+	int ret;
+
+	spin_lock_irqsave(&trace_printk_lock, flags);
+	va_start(ap, fmt);
+	ret = vsnprintf(buf, BPF_TRACE_PRINTK_SIZE, fmt, ap);
+	va_end(ap);
+	if (ret > 0)
+		trace_bpf_trace_printk(buf);
+	spin_unlock_irqrestore(&trace_printk_lock, flags);
+
+	return ret;
+}
+
 /*
  * Only limited trace_printk() conversion specifiers allowed:
  * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
@@ -483,8 +510,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
  */
 #define __BPF_TP_EMIT()	__BPF_ARG3_TP()
 #define __BPF_TP(...)							\
-	__trace_printk(0 /* Fake ip */,					\
-		       fmt, ##__VA_ARGS__)
+	bpf_do_trace_printk(fmt, ##__VA_ARGS__)
 
 #define __BPF_ARG1_TP(...)						\
 	((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))	\
@@ -518,13 +544,20 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
+int bpf_trace_printk_enabled;
+
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 {
 	/*
 	 * this program might be calling bpf_trace_printk,
-	 * so allocate per-cpu printk buffers
+	 * so enable the associated bpf_trace/bpf_trace_printk event.
 	 */
-	trace_printk_init_buffers();
+	if (!bpf_trace_printk_enabled) {
+		if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
+			pr_warn_ratelimited("could not enable bpf_trace_printk events");
+		else
+			bpf_trace_printk_enabled = 1;
+	}
 
 	return &bpf_trace_printk_proto;
 }
diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h
new file mode 100644
index 0000000..9acbc11
--- /dev/null
+++ b/kernel/trace/bpf_trace.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_trace
+
+#if !defined(_TRACE_BPF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_BPF_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(bpf_trace_printk,
+
+	TP_PROTO(const char *bpf_string),
+
+	TP_ARGS(bpf_string),
+
+	TP_STRUCT__entry(
+		__string(bpf_string, bpf_string)
+	),
+
+	TP_fast_assign(
+		__assign_str(bpf_string, bpf_string);
+	),
+
+	TP_printk("%s", __get_str(bpf_string))
+);
+
+#endif /* _TRACE_BPF_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE bpf_trace
+
+#include <trace/define_trace.h>
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH bpf-next 2/2] selftests/bpf: add selftests verifying bpf_trace_printk() behaviour
  2020-07-03 14:44 [PATCH bpf-next 0/2] bpf: fix use of trace_printk() in BPF Alan Maguire
  2020-07-03 14:44 ` [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk() Alan Maguire
@ 2020-07-03 14:44 ` Alan Maguire
  2020-07-08  6:01   ` Andrii Nakryiko
  1 sibling, 1 reply; 7+ messages in thread
From: Alan Maguire @ 2020-07-03 14:44 UTC (permalink / raw)
  To: rostedt, mingo, ast, daniel
  Cc: kafai, songliubraving, yhs, andriin, john.fastabend, kpsingh,
	linux-kernel, netdev, bpf, Alan Maguire

Simple selftest that verifies bpf_trace_printk() returns a sensible
value and tracing messages appear.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 .../selftests/bpf/prog_tests/trace_printk.c        | 71 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/trace_printk.c   | 21 +++++++
 2 files changed, 92 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/trace_printk.c
 create mode 100644 tools/testing/selftests/bpf/progs/trace_printk.c

diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
new file mode 100644
index 0000000..a850cba
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+#include "trace_printk.skel.h"
+
+#define TRACEBUF	"/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG	"testing,testing"
+
+void test_trace_printk(void)
+{
+	int err, duration = 0, found = 0;
+	struct trace_printk *skel;
+	struct trace_printk__bss *bss;
+	char buf[1024];
+	int fd = -1;
+
+	skel = trace_printk__open();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	err = trace_printk__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+		goto cleanup;
+
+	bss = skel->bss;
+
+	err = trace_printk__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	fd = open(TRACEBUF, O_RDONLY);
+	if (CHECK(fd < 0, "could not open trace buffer",
+		  "error %d opening %s", errno, TRACEBUF))
+		goto cleanup;
+
+	/* We do not want to wait forever if this test fails... */
+	fcntl(fd, F_SETFL, O_NONBLOCK);
+
+	/* wait for tracepoint to trigger */
+	sleep(1);
+	trace_printk__detach(skel);
+
+	if (CHECK(bss->trace_printk_ran == 0,
+		  "bpf_trace_printk never ran",
+		  "ran == %d", bss->trace_printk_ran))
+		goto cleanup;
+
+	if (CHECK(bss->trace_printk_ret <= 0,
+		  "bpf_trace_printk returned <= 0 value",
+		  "got %d", bss->trace_printk_ret))
+		goto cleanup;
+
+	/* verify our search string is in the trace buffer */
+	while (read(fd, buf, sizeof(buf)) >= 0) {
+		if (strstr(buf, SEARCHMSG) != NULL)
+			found++;
+	}
+
+	if (CHECK(!found, "message from bpf_trace_printk not found",
+		  "no instance of %s in %s", SEARCHMSG, TRACEBUF))
+		goto cleanup;
+
+	printf("ran %d times; last return value %d, with %d instances of msg\n",
+	       bss->trace_printk_ran, bss->trace_printk_ret, found);
+cleanup:
+	trace_printk__destroy(skel);
+	if (fd != -1)
+		close(fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
new file mode 100644
index 0000000..8ff6d49
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020, Oracle and/or its affiliates.
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int trace_printk_ret = 0;
+int trace_printk_ran = 0;
+
+SEC("tracepoint/sched/sched_switch")
+int sched_switch(void *ctx)
+{
+	static const char fmt[] = "testing,testing %d\n";
+
+	trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
+					    ++trace_printk_ran);
+	return 0;
+}
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk()
  2020-07-03 14:44 ` [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk() Alan Maguire
@ 2020-07-08  5:56   ` Andrii Nakryiko
  2020-07-09 16:04     ` Alan Maguire
  2020-07-09 21:41   ` Steven Rostedt
  1 sibling, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2020-07-08  5:56 UTC (permalink / raw)
  To: Alan Maguire
  Cc: Steven Rostedt, Ingo Molnar, Alexei Starovoitov, Daniel Borkmann,
	Martin Lau, Song Liu, Yonghong Song, Andrii Nakryiko,
	john fastabend, KP Singh, open list, Networking, bpf

On Fri, Jul 3, 2020 at 7:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> The bpf helper bpf_trace_printk() uses trace_printk() under the hood.
> This leads to an alarming warning message originating from trace
> buffer allocation which occurs the first time a program using
> bpf_trace_printk() is loaded.
>
> We can instead create a trace event for bpf_trace_printk() and enable
> it in-kernel when/if we encounter a program using the
> bpf_trace_printk() helper.  With this approach, trace_printk()
> is not used directly and no warning message appears.
>
> This work was started by Steven (see Link) and finished by Alan; added
> Steven's Signed-off-by with his permission.
>
> Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  kernel/trace/Makefile    |  2 ++
>  kernel/trace/bpf_trace.c | 41 +++++++++++++++++++++++++++++++++++++----
>  kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++
>  3 files changed, 73 insertions(+), 4 deletions(-)
>  create mode 100644 kernel/trace/bpf_trace.h
>

[...]

> +static DEFINE_SPINLOCK(trace_printk_lock);
> +
> +#define BPF_TRACE_PRINTK_SIZE   1024
> +
> +static inline int bpf_do_trace_printk(const char *fmt, ...)
> +{
> +       static char buf[BPF_TRACE_PRINTK_SIZE];
> +       unsigned long flags;
> +       va_list ap;
> +       int ret;
> +
> +       spin_lock_irqsave(&trace_printk_lock, flags);
> +       va_start(ap, fmt);
> +       ret = vsnprintf(buf, BPF_TRACE_PRINTK_SIZE, fmt, ap);
> +       va_end(ap);
> +       if (ret > 0)
> +               trace_bpf_trace_printk(buf);

Is there any reason to artificially limit the case of printing empty
string? It's kind of an awkward use case, for sure, but having
guarantee that every bpf_trace_printk() invocation triggers tracepoint
is a nice property, no?

> +       spin_unlock_irqrestore(&trace_printk_lock, flags);
> +
> +       return ret;
> +}
> +
>  /*
>   * Only limited trace_printk() conversion specifiers allowed:
>   * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> @@ -483,8 +510,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
>   */
>  #define __BPF_TP_EMIT()        __BPF_ARG3_TP()
>  #define __BPF_TP(...)                                                  \
> -       __trace_printk(0 /* Fake ip */,                                 \
> -                      fmt, ##__VA_ARGS__)
> +       bpf_do_trace_printk(fmt, ##__VA_ARGS__)
>
>  #define __BPF_ARG1_TP(...)                                             \
>         ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))        \
> @@ -518,13 +544,20 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
>         .arg2_type      = ARG_CONST_SIZE,
>  };
>
> +int bpf_trace_printk_enabled;

static?

> +
>  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
>  {
>         /*
>          * this program might be calling bpf_trace_printk,
> -        * so allocate per-cpu printk buffers
> +        * so enable the associated bpf_trace/bpf_trace_printk event.
>          */
> -       trace_printk_init_buffers();
> +       if (!bpf_trace_printk_enabled) {
> +               if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))

just to double check, it's ok to simultaneously enable same event in
parallel, right?

> +                       pr_warn_ratelimited("could not enable bpf_trace_printk events");
> +               else
> +                       bpf_trace_printk_enabled = 1;
> +       }
>
>         return &bpf_trace_printk_proto;
>  }

[...]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: add selftests verifying bpf_trace_printk() behaviour
  2020-07-03 14:44 ` [PATCH bpf-next 2/2] selftests/bpf: add selftests verifying bpf_trace_printk() behaviour Alan Maguire
@ 2020-07-08  6:01   ` Andrii Nakryiko
  0 siblings, 0 replies; 7+ messages in thread
From: Andrii Nakryiko @ 2020-07-08  6:01 UTC (permalink / raw)
  To: Alan Maguire
  Cc: Steven Rostedt, Ingo Molnar, Alexei Starovoitov, Daniel Borkmann,
	Martin Lau, Song Liu, Yonghong Song, Andrii Nakryiko,
	john fastabend, KP Singh, open list, Networking, bpf

On Fri, Jul 3, 2020 at 7:45 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> Simple selftest that verifies bpf_trace_printk() returns a sensible
> value and tracing messages appear.
>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  .../selftests/bpf/prog_tests/trace_printk.c        | 71 ++++++++++++++++++++++
>  tools/testing/selftests/bpf/progs/trace_printk.c   | 21 +++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/trace_printk.c
>  create mode 100644 tools/testing/selftests/bpf/progs/trace_printk.c
>

[...]

> +       fd = open(TRACEBUF, O_RDONLY);
> +       if (CHECK(fd < 0, "could not open trace buffer",
> +                 "error %d opening %s", errno, TRACEBUF))
> +               goto cleanup;
> +
> +       /* We do not want to wait forever if this test fails... */
> +       fcntl(fd, F_SETFL, O_NONBLOCK);
> +
> +       /* wait for tracepoint to trigger */
> +       sleep(1);

that's a long sleep, it's better to use tp/raw_syscalls/sys_enter
tracepoint to trigger BPF program and then just usleep(1)

> +       trace_printk__detach(skel);
> +
> +       if (CHECK(bss->trace_printk_ran == 0,
> +                 "bpf_trace_printk never ran",
> +                 "ran == %d", bss->trace_printk_ran))
> +               goto cleanup;
> +

[...]

> +
> +int trace_printk_ret = 0;
> +int trace_printk_ran = 0;
> +
> +SEC("tracepoint/sched/sched_switch")

see above, probably better to stick to something like
tp/raw_syscalls/sys_enter or raw_tp/sys_enter. Also, to not overwhelm
trace_pipe output, might want to filter by PID and emit messages for
test_prog's PID only.


> +int sched_switch(void *ctx)
> +{
> +       static const char fmt[] = "testing,testing %d\n";
> +
> +       trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
> +                                           ++trace_printk_ran);
> +       return 0;
> +}
> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk()
  2020-07-08  5:56   ` Andrii Nakryiko
@ 2020-07-09 16:04     ` Alan Maguire
  0 siblings, 0 replies; 7+ messages in thread
From: Alan Maguire @ 2020-07-09 16:04 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Alan Maguire, Steven Rostedt, Ingo Molnar, Alexei Starovoitov,
	Daniel Borkmann, Martin Lau, Song Liu, Yonghong Song,
	Andrii Nakryiko, john fastabend, KP Singh, open list, Networking,
	bpf


On Tue, 7 Jul 2020, Andrii Nakryiko wrote:

> On Fri, Jul 3, 2020 at 7:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >
> > The bpf helper bpf_trace_printk() uses trace_printk() under the hood.
> > This leads to an alarming warning message originating from trace
> > buffer allocation which occurs the first time a program using
> > bpf_trace_printk() is loaded.
> >
> > We can instead create a trace event for bpf_trace_printk() and enable
> > it in-kernel when/if we encounter a program using the
> > bpf_trace_printk() helper.  With this approach, trace_printk()
> > is not used directly and no warning message appears.
> >
> > This work was started by Steven (see Link) and finished by Alan; added
> > Steven's Signed-off-by with his permission.
> >
> > Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home
> > Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> > Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> > ---
> >  kernel/trace/Makefile    |  2 ++
> >  kernel/trace/bpf_trace.c | 41 +++++++++++++++++++++++++++++++++++++----
> >  kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++
> >  3 files changed, 73 insertions(+), 4 deletions(-)
> >  create mode 100644 kernel/trace/bpf_trace.h
> >
> 
> [...]
> 
> > +static DEFINE_SPINLOCK(trace_printk_lock);
> > +
> > +#define BPF_TRACE_PRINTK_SIZE   1024
> > +
> > +static inline int bpf_do_trace_printk(const char *fmt, ...)
> > +{
> > +       static char buf[BPF_TRACE_PRINTK_SIZE];
> > +       unsigned long flags;
> > +       va_list ap;
> > +       int ret;
> > +
> > +       spin_lock_irqsave(&trace_printk_lock, flags);
> > +       va_start(ap, fmt);
> > +       ret = vsnprintf(buf, BPF_TRACE_PRINTK_SIZE, fmt, ap);
> > +       va_end(ap);
> > +       if (ret > 0)
> > +               trace_bpf_trace_printk(buf);
> 
> Is there any reason to artificially limit the case of printing empty
> string? It's kind of an awkward use case, for sure, but having
> guarantee that every bpf_trace_printk() invocation triggers tracepoint
> is a nice property, no?
>

True enough; I'll modify the above to support empty string display also.
 
> > +       spin_unlock_irqrestore(&trace_printk_lock, flags);
> > +
> > +       return ret;
> > +}
> > +
> >  /*
> >   * Only limited trace_printk() conversion specifiers allowed:
> >   * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> > @@ -483,8 +510,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
> >   */
> >  #define __BPF_TP_EMIT()        __BPF_ARG3_TP()
> >  #define __BPF_TP(...)                                                  \
> > -       __trace_printk(0 /* Fake ip */,                                 \
> > -                      fmt, ##__VA_ARGS__)
> > +       bpf_do_trace_printk(fmt, ##__VA_ARGS__)
> >
> >  #define __BPF_ARG1_TP(...)                                             \
> >         ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))        \
> > @@ -518,13 +544,20 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
> >         .arg2_type      = ARG_CONST_SIZE,
> >  };
> >
> > +int bpf_trace_printk_enabled;
> 
> static?
> 

oops, will fix.

> > +
> >  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> >  {
> >         /*
> >          * this program might be calling bpf_trace_printk,
> > -        * so allocate per-cpu printk buffers
> > +        * so enable the associated bpf_trace/bpf_trace_printk event.
> >          */
> > -       trace_printk_init_buffers();
> > +       if (!bpf_trace_printk_enabled) {
> > +               if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
> 
> just to double check, it's ok to simultaneously enable same event in
> parallel, right?
>

From an ftrace perspective, it looks fine since the actual enable is 
mutex-protected. We could grab the trace_printk_lock here too I guess,
but I don't _think_ there's a need. 
 
Thanks for reviewing! I'll spin up a v2 with the above fixes shortly
plus I'll change to using tp/raw_syscalls/sys_enter in the test as you 
suggested.

Alan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk()
  2020-07-03 14:44 ` [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk() Alan Maguire
  2020-07-08  5:56   ` Andrii Nakryiko
@ 2020-07-09 21:41   ` Steven Rostedt
  1 sibling, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2020-07-09 21:41 UTC (permalink / raw)
  To: Alan Maguire
  Cc: mingo, ast, daniel, kafai, songliubraving, yhs, andriin,
	john.fastabend, kpsingh, linux-kernel, netdev, bpf

On Fri,  3 Jul 2020 15:44:27 +0100
Alan Maguire <alan.maguire@oracle.com> wrote:

> The bpf helper bpf_trace_printk() uses trace_printk() under the hood.
> This leads to an alarming warning message originating from trace
> buffer allocation which occurs the first time a program using
> bpf_trace_printk() is loaded.
> 
> We can instead create a trace event for bpf_trace_printk() and enable
> it in-kernel when/if we encounter a program using the
> bpf_trace_printk() helper.  With this approach, trace_printk()
> is not used directly and no warning message appears.
> 
> This work was started by Steven (see Link) and finished by Alan; added
> Steven's Signed-off-by with his permission.
> 
> Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  kernel/trace/Makefile    |  2 ++
>  kernel/trace/bpf_trace.c | 41 +++++++++++++++++++++++++++++++++++++----
>  kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++
>  3 files changed, 73 insertions(+), 4 deletions(-)
>  create mode 100644 kernel/trace/bpf_trace.h
> 
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 6575bb0..aeba5ee 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -31,6 +31,8 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE
>  GCOV_PROFILE := y
>  endif
>  
> +CFLAGS_bpf_trace.o := -I$(src)
> +
>  CFLAGS_trace_benchmark.o := -I$(src)
>  CFLAGS_trace_events_filter.o := -I$(src)
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 1d874d8..cdbafc4 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2,6 +2,10 @@
>  /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
>   * Copyright (c) 2016 Facebook
>   */
> +#define CREATE_TRACE_POINTS
> +
> +#include "bpf_trace.h"
> +

Note, it's probably best to include the above after all the other
headers. The CREATE_TRACE_POINTS might cause issues if a header has
other trace event headers included.

>  #include <linux/kernel.h>
>  #include <linux/types.h>
>  #include <linux/slab.h>
> @@ -11,6 +15,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/ctype.h>
>  #include <linux/kprobes.h>
> +#include <linux/spinlock.h>
>  #include <linux/syscalls.h>
>  #include <linux/error-injection.h>
>  
> @@ -374,6 +379,28 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
>  	}
>  }
>  
> +static DEFINE_SPINLOCK(trace_printk_lock);

I wonder if we should make this a RAW_SPINLOCK(). That way it wont
cause any issues on PREEMPT_RT if a bpf trace event is called within
other raw spinlocks.

> +
> +#define BPF_TRACE_PRINTK_SIZE   1024
> +
> +static inline int bpf_do_trace_printk(const char *fmt, ...)
> +{
> +	static char buf[BPF_TRACE_PRINTK_SIZE];
> +	unsigned long flags;
> +	va_list ap;
> +	int ret;
> +
> +	spin_lock_irqsave(&trace_printk_lock, flags);
> +	va_start(ap, fmt);
> +	ret = vsnprintf(buf, BPF_TRACE_PRINTK_SIZE, fmt, ap);
> +	va_end(ap);
> +	if (ret > 0)
> +		trace_bpf_trace_printk(buf);
> +	spin_unlock_irqrestore(&trace_printk_lock, flags);
> +
> +	return ret;
> +}
> +
>  /*
>   * Only limited trace_printk() conversion specifiers allowed:
>   * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> @@ -483,8 +510,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
>   */
>  #define __BPF_TP_EMIT()	__BPF_ARG3_TP()
>  #define __BPF_TP(...)							\
> -	__trace_printk(0 /* Fake ip */,					\
> -		       fmt, ##__VA_ARGS__)
> +	bpf_do_trace_printk(fmt, ##__VA_ARGS__)
>  
>  #define __BPF_ARG1_TP(...)						\
>  	((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))	\
> @@ -518,13 +544,20 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
>  	.arg2_type	= ARG_CONST_SIZE,
>  };
>  
> +int bpf_trace_printk_enabled;
> +
>  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
>  {
>  	/*
>  	 * this program might be calling bpf_trace_printk,
> -	 * so allocate per-cpu printk buffers
> +	 * so enable the associated bpf_trace/bpf_trace_printk event.
>  	 */
> -	trace_printk_init_buffers();
> +	if (!bpf_trace_printk_enabled) {
> +		if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))

If you just keep it enabled, it may be best to always call this. It
doesn't hurt if you call it when it is already enabled.

This is because if someone were to disable the bpf_trace_printk via
tracefs, and then load another bpf program with another
bpf_trace_printk in it, that program wont re-enable the
bpf_trace_printk trace event again do to this check.

The rest looks fine to me.

-- Steve


> +			pr_warn_ratelimited("could not enable bpf_trace_printk events");
> +		else
> +			bpf_trace_printk_enabled = 1;
> +	}
>  
>  	return &bpf_trace_printk_proto;
>  }
> diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h
> new file mode 100644
> index 0000000..9acbc11
> --- /dev/null
> +++ b/kernel/trace/bpf_trace.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM bpf_trace
> +
> +#if !defined(_TRACE_BPF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
> +
> +#define _TRACE_BPF_TRACE_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(bpf_trace_printk,
> +
> +	TP_PROTO(const char *bpf_string),
> +
> +	TP_ARGS(bpf_string),
> +
> +	TP_STRUCT__entry(
> +		__string(bpf_string, bpf_string)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(bpf_string, bpf_string);
> +	),
> +
> +	TP_printk("%s", __get_str(bpf_string))
> +);
> +
> +#endif /* _TRACE_BPF_TRACE_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#define TRACE_INCLUDE_FILE bpf_trace
> +
> +#include <trace/define_trace.h>


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-07-09 21:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-03 14:44 [PATCH bpf-next 0/2] bpf: fix use of trace_printk() in BPF Alan Maguire
2020-07-03 14:44 ` [PATCH bpf-next 1/2] bpf: use dedicated bpf_trace_printk event instead of trace_printk() Alan Maguire
2020-07-08  5:56   ` Andrii Nakryiko
2020-07-09 16:04     ` Alan Maguire
2020-07-09 21:41   ` Steven Rostedt
2020-07-03 14:44 ` [PATCH bpf-next 2/2] selftests/bpf: add selftests verifying bpf_trace_printk() behaviour Alan Maguire
2020-07-08  6:01   ` Andrii Nakryiko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).