bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] ftrace: Add support to keep some functions out of ftrace
@ 2022-07-22 11:08 Jiri Olsa
  2022-07-22 11:26 ` Steven Rostedt
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-07-22 11:08 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Steven Rostedt, Ingo Molnar
  Cc: bpf, Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo

hi,
we recently hit bug where ftrace update raced with bpf_dispatcher_update
that calls directly bpf_arch_text_poke [1].

The bpf_dispatcher_update creates special trampoline and attaches it to
designated bpf_dispatcher_xdp_func function, which is run for xdp bpf
programs from several places.

After discussion with Alexei we'd rather keep this code update out of
ftrace, because it's already slow and had troubles with CI because of
that.

This patch is presenting the idea to allow some functions not to be
managed by ftrace by marking them with NOFTRACE_SYMBOL macro and
such symbols will not be added to ftrace_pages on the kernel start.

Please note it's RFC so I did not bother with some fast search for
is_noftrace_function function.

Perhaps we could use existing NOKPROBE_SYMBOL for this? but I'm not
sure you can (or want) to run function trace on such symbols.

thoughts? thanks
jirka


[1] https://lore.kernel.org/bpf/20220714082316.479181-1-jolsa@kernel.org/
---
 include/asm-generic/vmlinux.lds.h | 10 ++++++
 include/linux/bpf.h               |  2 ++
 include/linux/ftrace.h            | 10 ++++++
 kernel/trace/ftrace.c             | 53 +++++++++++++++++++++++++++++++
 4 files changed, 75 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7515a465ec03..94c3cbe82ffd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -210,6 +210,15 @@
 #define KPROBE_BLACKLIST()
 #endif
 
+#ifdef CONFIG_FTRACE
+#define NOFTRACE()	. = ALIGN(8);			      \
+			__start_noftrace = .;		      \
+			KEEP(*(_no_ftrace))		      \
+			__stop_noftrace = .;
+#else
+#define NOFTRACE()
+#endif
+
 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
 #define ERROR_INJECT_WHITELIST()	STRUCT_ALIGN();			      \
 			__start_error_injection_whitelist = .;		      \
@@ -705,6 +714,7 @@
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
 	KPROBE_BLACKLIST()						\
+	NOFTRACE()							\
 	ERROR_INJECT_WHITELIST()					\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a5bf00649995..1330b84eb20f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -27,6 +27,7 @@
 #include <linux/bpfptr.h>
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/ftrace.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -919,6 +920,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 		return bpf_func(ctx, insnsi);				\
 	}								\
 	EXPORT_SYMBOL(bpf_dispatcher_##name##_func);			\
+	NOFTRACE_SYMBOL(bpf_dispatcher_##name##_func);			\
 	struct bpf_dispatcher bpf_dispatcher_##name =			\
 		BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
 #define DECLARE_BPF_DISPATCHER(name)					\
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 979f6bfa2c25..cde80cd57f2f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1141,4 +1141,14 @@ unsigned long arch_syscall_addr(int nr);
 
 #endif /* CONFIG_FTRACE_SYSCALLS */
 
+#ifdef CONFIG_FUNCTION_TRACER
+#define __NOFTRACE_SYMBOL(fname)				\
+static unsigned long __used					\
+	__section("_no_ftrace")					\
+	_noftrace_addr_##fname = (unsigned long)fname;
+#define NOFTRACE_SYMBOL(fname) __NOFTRACE_SYMBOL(fname)
+#else
+#define NOFTRACE_SYMBOL(fname)
+#endif /* CONFIG_FUNCTION_TRACER */
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 601ccf1b2f09..e0ebd71135b4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6575,6 +6575,51 @@ static void test_is_sorted(unsigned long *start, unsigned long count)
 }
 #endif
 
+struct noftrace_entry {
+	struct list_head list;
+	unsigned long addr;
+};
+
+static LIST_HEAD(noftrace);
+
+static int __init noftrace_add(unsigned long addr)
+{
+	struct noftrace_entry *ent;
+
+	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+	ent->addr = addr;
+	INIT_LIST_HEAD(&ent->list);
+	list_add_tail(&ent->list, &noftrace);
+	return 0;
+}
+
+static int __init noftrace_init(void)
+{
+	extern unsigned long __start_noftrace[];
+	extern unsigned long __stop_noftrace[];
+	unsigned long *iter, entry;
+
+	for (iter = __start_noftrace; iter < __stop_noftrace; iter++) {
+		entry = (unsigned long) dereference_symbol_descriptor((void *)*iter);
+		if (noftrace_add(entry))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static bool is_noftrace_function(unsigned long addr)
+{
+	struct noftrace_entry *ent;
+
+	list_for_each_entry(ent, &noftrace, list) {
+		if (ent->addr == addr)
+			return true;
+	}
+	return false;
+}
+
 static int ftrace_process_locs(struct module *mod,
 			       unsigned long *start,
 			       unsigned long *end)
@@ -6646,6 +6691,9 @@ static int ftrace_process_locs(struct module *mod,
 		 */
 		if (!addr)
 			continue;
+		/* applies only for kernel for now */
+		if (!mod && is_noftrace_function(addr))
+			continue;
 
 		end_offset = (pg->index+1) * sizeof(pg->records[0]);
 		if (end_offset > PAGE_SIZE << pg->order) {
@@ -7300,6 +7348,11 @@ void __init ftrace_init(void)
 	pr_info("ftrace: allocating %ld entries in %ld pages\n",
 		count, count / ENTRIES_PER_PAGE + 1);
 
+	if (noftrace_init()) {
+		pr_warn("ftrace: failed to allocate noftrace list\n");
+		goto failed;
+	}
+
 	ret = ftrace_process_locs(NULL,
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 11:08 [RFC] ftrace: Add support to keep some functions out of ftrace Jiri Olsa
@ 2022-07-22 11:26 ` Steven Rostedt
  2022-07-22 16:04   ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Steven Rostedt @ 2022-07-22 11:26 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, 22 Jul 2022 13:08:11 +0200
Jiri Olsa <jolsa@kernel.org> wrote:

> hi,
> we recently hit bug where ftrace update raced with bpf_dispatcher_update
> that calls directly bpf_arch_text_poke [1].
> 
> The bpf_dispatcher_update creates special trampoline and attaches it to
> designated bpf_dispatcher_xdp_func function, which is run for xdp bpf
> programs from several places.
> 
> After discussion with Alexei we'd rather keep this code update out of
> ftrace, because it's already slow and had troubles with CI because of
> that.
> 
> This patch is presenting the idea to allow some functions not to be
> managed by ftrace by marking them with NOFTRACE_SYMBOL macro and
> such symbols will not be added to ftrace_pages on the kernel start.

NACK on any generic way to hide mcount/fentry functions from ftrace.

There's a lot of infrastructure to see what functions are being
modified, as the user should know. (See tracefs/enabled_functions).

There's no need for a generic way to hide functions. Once that happens,
it will grow and then it will be more confusing to why some functions are
traced while others are not.

> 
> Please note it's RFC so I did not bother with some fast search for
> is_noftrace_function function.
> 
> Perhaps we could use existing NOKPROBE_SYMBOL for this? but I'm not
> sure you can (or want) to run function trace on such symbols.

I trace those functions all the time. Yes, I want to continue doing so.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 11:26 ` Steven Rostedt
@ 2022-07-22 16:04   ` Alexei Starovoitov
  2022-07-22 16:08     ` Steven Rostedt
  0 siblings, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-07-22 16:04 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, Jul 22, 2022 at 4:26 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 22 Jul 2022 13:08:11 +0200
> Jiri Olsa <jolsa@kernel.org> wrote:
>
> > hi,
> > we recently hit bug where ftrace update raced with bpf_dispatcher_update
> > that calls directly bpf_arch_text_poke [1].
> >
> > The bpf_dispatcher_update creates special trampoline and attaches it to
> > designated bpf_dispatcher_xdp_func function, which is run for xdp bpf
> > programs from several places.
> >
> > After discussion with Alexei we'd rather keep this code update out of
> > ftrace, because it's already slow and had troubles with CI because of
> > that.
> >
> > This patch is presenting the idea to allow some functions not to be
> > managed by ftrace by marking them with NOFTRACE_SYMBOL macro and
> > such symbols will not be added to ftrace_pages on the kernel start.
>
> NACK on any generic way to hide mcount/fentry functions from ftrace.
>
> There's a lot of infrastructure to see what functions are being
> modified, as the user should know. (See tracefs/enabled_functions).
>
> There's no need for a generic way to hide functions. Once that happens,
> it will grow and then it will be more confusing to why some functions are
> traced while others are not.

Steven,

ftrace must not peek into bpf specific functions.
Currently ftrace is causing the kernel to crash.
What Jiri is proposing is to fix ftrace bug.
And you're saying nack? let ftrace be broken ?

If you don't like Jiri's approach please propose something else.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:04   ` Alexei Starovoitov
@ 2022-07-22 16:08     ` Steven Rostedt
  2022-07-22 16:25       ` Steven Rostedt
  2022-07-22 16:26       ` Jiri Olsa
  0 siblings, 2 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-07-22 16:08 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, 22 Jul 2022 09:04:29 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> ftrace must not peek into bpf specific functions.
> Currently ftrace is causing the kernel to crash.
> What Jiri is proposing is to fix ftrace bug.
> And you're saying nack? let ftrace be broken ?
> 
> If you don't like Jiri's approach please propose something else.

So, why not mark it as notrace? That will prevent ftrace from looking at it.

-- Steve


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:08     ` Steven Rostedt
@ 2022-07-22 16:25       ` Steven Rostedt
  2022-07-22 16:53         ` Alexei Starovoitov
  2022-07-22 21:05         ` Jiri Olsa
  2022-07-22 16:26       ` Jiri Olsa
  1 sibling, 2 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-07-22 16:25 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, 22 Jul 2022 12:08:54 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 22 Jul 2022 09:04:29 -0700
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> > ftrace must not peek into bpf specific functions.
> > Currently ftrace is causing the kernel to crash.
> > What Jiri is proposing is to fix ftrace bug.
> > And you're saying nack? let ftrace be broken ?

Sounds like a BPF bug to me. Ftrace did nothing to cause this breakage. It
was something BPF must have done. What exactly is BPF doing to ftrace
locations anyway?

> > 
> > If you don't like Jiri's approach please propose something else.  
> 
> So, why not mark it as notrace? That will prevent ftrace from looking at it.
> 

And if for some strange reason you need the mcount/fentry on some internal
BPF infrastructure, the work around is to register two ftrace_ops() that
have filters to that function. In which case ftrace will force the call to
the ftrace iterator loop, and any more ops attached will simply be added to
that loop, and ftrace will no longer touch that location.

Then you can do whatever you want to it without fear of racing with ftrace.

But other than that, we don't need infrastructure to hide any mcount/fentry
locations from ftrace. Those were add *for* ftrace.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:08     ` Steven Rostedt
  2022-07-22 16:25       ` Steven Rostedt
@ 2022-07-22 16:26       ` Jiri Olsa
  2022-07-22 16:37         ` Steven Rostedt
  1 sibling, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-07-22 16:26 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, Jul 22, 2022 at 12:08:54PM -0400, Steven Rostedt wrote:
> On Fri, 22 Jul 2022 09:04:29 -0700
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> > ftrace must not peek into bpf specific functions.
> > Currently ftrace is causing the kernel to crash.
> > What Jiri is proposing is to fix ftrace bug.
> > And you're saying nack? let ftrace be broken ?
> > 
> > If you don't like Jiri's approach please propose something else.
> 
> So, why not mark it as notrace? That will prevent ftrace from looking at it.

there's still needs to be the instrument jump generated
in order to use bpf_arch_text_poke on that

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:26       ` Jiri Olsa
@ 2022-07-22 16:37         ` Steven Rostedt
  2022-07-22 16:54           ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Steven Rostedt @ 2022-07-22 16:37 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, 22 Jul 2022 18:26:09 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> > So, why not mark it as notrace? That will prevent ftrace from looking at it.  
> 
> there's still needs to be the instrument jump generated
> in order to use bpf_arch_text_poke on that

Can you explain the background on this. Why is bpf doing text_poke on
function entries? What was the direct use case for?

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:25       ` Steven Rostedt
@ 2022-07-22 16:53         ` Alexei Starovoitov
  2022-07-22 17:14           ` Steven Rostedt
  2022-07-22 21:05         ` Jiri Olsa
  1 sibling, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-07-22 16:53 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, Jul 22, 2022 at 9:25 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 22 Jul 2022 12:08:54 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
>
> > On Fri, 22 Jul 2022 09:04:29 -0700
> > Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >
> > > ftrace must not peek into bpf specific functions.
> > > Currently ftrace is causing the kernel to crash.
> > > What Jiri is proposing is to fix ftrace bug.
> > > And you're saying nack? let ftrace be broken ?
>
> Sounds like a BPF bug to me. Ftrace did nothing to cause this breakage. It
> was something BPF must have done. What exactly is BPF doing to ftrace
> locations anyway?

ftrace location?
fentry != ftrace.
nop5 in the beginning of the function or in the middle of it
doesn't mean that it's safe for ftrace to attach there.
In some cases bpf has custom calling convention
like it preserves %rax.
In other cases there will be multiple nop5 locations
through the function where special care needs to be taken
to attach.

> > >
> > > If you don't like Jiri's approach please propose something else.
> >
> > So, why not mark it as notrace? That will prevent ftrace from looking at it.
> >
>
> And if for some strange reason you need the mcount/fentry on some internal
> BPF infrastructure, the work around is to register two ftrace_ops() that
> have filters to that function. In which case ftrace will force the call to
> the ftrace iterator loop, and any more ops attached will simply be added to
> that loop, and ftrace will no longer touch that location.
>
> Then you can do whatever you want to it without fear of racing with ftrace.

Jiri,
that sounds like a workable solution.
wdyt?

> But other than that, we don't need infrastructure to hide any mcount/fentry
> locations from ftrace. Those were add *for* ftrace.

fentry != ftrace.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:37         ` Steven Rostedt
@ 2022-07-22 16:54           ` Alexei Starovoitov
  0 siblings, 0 replies; 58+ messages in thread
From: Alexei Starovoitov @ 2022-07-22 16:54 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, Jul 22, 2022 at 9:37 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 22 Jul 2022 18:26:09 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
>
> > > So, why not mark it as notrace? That will prevent ftrace from looking at it.
> >
> > there's still needs to be the instrument jump generated
> > in order to use bpf_arch_text_poke on that
>
> Can you explain the background on this. Why is bpf doing text_poke on
> function entries? What was the direct use case for?

It's a bpf specific dispatcher.
Like static_call but more dynamic and for bpf progs.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:53         ` Alexei Starovoitov
@ 2022-07-22 17:14           ` Steven Rostedt
  0 siblings, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-07-22 17:14 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo

On Fri, 22 Jul 2022 09:53:32 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >
> > Sounds like a BPF bug to me. Ftrace did nothing to cause this breakage. It
> > was something BPF must have done. What exactly is BPF doing to ftrace
> > locations anyway?  
> 
> ftrace location?
> fentry != ftrace.

It was the entire reason to add that. Basically, Yes, fentry == ftrace!
BPF can not steal it. Look at the history of it. Everything to do with
fentry being added to Linux was for ftrace, and ftrace was designed
specifically for fentry.

The ftrace infrastructure was designed around fentry/mcount. The rest of
the tracing infrastructure was called "ftrace" for the same reason people
call distros Linux (and why FSF hates that). Just because it was built
around the ftrace infrastructure, not the other way around. This is why I
renamed all the ftrace.h files in include/trace/ to trace_event.h. Because
calling it ftrace.h was a misnomer.

> nop5 in the beginning of the function or in the middle of it
> doesn't mean that it's safe for ftrace to attach there.

How did that nop5 get put there?

Before compilers added support for doing that at compile time (which they
added *FOR* ftrace), it was ftrace that converted the calls to
mcount/fentry to nops. In fact, the fentry trampoline exists in ftrace_64.S.


> In some cases bpf has custom calling convention
> like it preserves %rax.
> In other cases there will be multiple nop5 locations
> through the function where special care needs to be taken
> to attach.
> 
> > > >
> > > > If you don't like Jiri's approach please propose something else.  
> > >
> > > So, why not mark it as notrace? That will prevent ftrace from looking at it.
> > >  
> >
> > And if for some strange reason you need the mcount/fentry on some internal
> > BPF infrastructure, the work around is to register two ftrace_ops() that
> > have filters to that function. In which case ftrace will force the call to
> > the ftrace iterator loop, and any more ops attached will simply be added to
> > that loop, and ftrace will no longer touch that location.
> >
> > Then you can do whatever you want to it without fear of racing with ftrace.  
> 
> Jiri,
> that sounds like a workable solution.
> wdyt?
> 
> > But other than that, we don't need infrastructure to hide any mcount/fentry
> > locations from ftrace. Those were add *for* ftrace.  
> 
> fentry != ftrace.

I 100% disagree with the above statement.

  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kernel/ftrace_64.S#n134

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 16:25       ` Steven Rostedt
  2022-07-22 16:53         ` Alexei Starovoitov
@ 2022-07-22 21:05         ` Jiri Olsa
  2022-07-22 21:41           ` Steven Rostedt
  1 sibling, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-07-22 21:05 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, Jul 22, 2022 at 12:25:48PM -0400, Steven Rostedt wrote:
> On Fri, 22 Jul 2022 12:08:54 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Fri, 22 Jul 2022 09:04:29 -0700
> > Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > 
> > > ftrace must not peek into bpf specific functions.
> > > Currently ftrace is causing the kernel to crash.
> > > What Jiri is proposing is to fix ftrace bug.
> > > And you're saying nack? let ftrace be broken ?
> 
> Sounds like a BPF bug to me. Ftrace did nothing to cause this breakage. It
> was something BPF must have done. What exactly is BPF doing to ftrace
> locations anyway?
> 
> > > 
> > > If you don't like Jiri's approach please propose something else.  
> > 
> > So, why not mark it as notrace? That will prevent ftrace from looking at it.
> > 
> 
> And if for some strange reason you need the mcount/fentry on some internal
> BPF infrastructure, the work around is to register two ftrace_ops() that
> have filters to that function. In which case ftrace will force the call to
> the ftrace iterator loop, and any more ops attached will simply be added to
> that loop, and ftrace will no longer touch that location.
> 
> Then you can do whatever you want to it without fear of racing with ftrace.

ok, I think we could use that, I'll check

> 
> But other than that, we don't need infrastructure to hide any mcount/fentry
> locations from ftrace. Those were add *for* ftrace.

I think I understand the fentry/ftrace equivalence you see, I remember
the perl mcount script ;-)

still I think we should be able to define function that has fentry
profile call and be able to manage it without ftrace

one other thought.. how about adding function that would allow to disable
function in ftrace, with existing FTRACE_FL_DISABLED or some new flag

that way ftrace still keeps track of it, but won't allow to use it in
ftrace infra

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 21:05         ` Jiri Olsa
@ 2022-07-22 21:41           ` Steven Rostedt
  2022-07-23  3:53             ` Steven Rostedt
  2022-07-23 21:39             ` Jiri Olsa
  0 siblings, 2 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-07-22 21:41 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, 22 Jul 2022 23:05:19 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> ok, I think we could use that, I'll check
> 
> > 
> > But other than that, we don't need infrastructure to hide any mcount/fentry
> > locations from ftrace. Those were add *for* ftrace.  
> 
> I think I understand the fentry/ftrace equivalence you see, I remember
> the perl mcount script ;-)

It's even more than that. We worked with the compiler folks to get fentry
for ftrace purposes (namely to speed it up, and not rely on frame
pointers, which mcount did). fentry never existed until then. Like I said.
fentry was created *for* ftrace. And currently it's x86 specific, as it
relies on the calling convention that a call does both, push the return
address onto the  stack, and jump to a function. The blr
(branch-link-register) method is more complex, which is where the
"patchable" work comes from.

> 
> still I think we should be able to define function that has fentry
> profile call and be able to manage it without ftrace
> 
> one other thought.. how about adding function that would allow to disable
> function in ftrace, with existing FTRACE_FL_DISABLED or some new flag
> 
> that way ftrace still keeps track of it, but won't allow to use it in
> ftrace infra

Another way is to remove it at compile time from the mcount_loc table, and
add it to your own table. I take it, this is for bpf infrastructure code
and not for any code that's in the day to day processing of the kernel,
right?

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 21:41           ` Steven Rostedt
@ 2022-07-23  3:53             ` Steven Rostedt
  2022-07-23  3:56               ` Steven Rostedt
  2022-07-23 21:39             ` Jiri Olsa
  1 sibling, 1 reply; 58+ messages in thread
From: Steven Rostedt @ 2022-07-23  3:53 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, 22 Jul 2022 17:41:20 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > I think I understand the fentry/ftrace equivalence you see, I remember
> > the perl mcount script ;-)  
> 
> It's even more than that. We worked with the compiler folks to get fentry
> for ftrace purposes (namely to speed it up, and not rely on frame
> pointers, which mcount did). fentry never existed until then. Like I said.
> fentry was created *for* ftrace. And currently it's x86 specific, as it
> relies on the calling convention that a call does both, push the return
> address onto the  stack, and jump to a function. The blr
> (branch-link-register) method is more complex, which is where the
> "patchable" work comes from.

If you are interested in more details about the birth of fentry, here's
the email that started it all:

 https://lore.kernel.org/lkml/1258657614.22249.824.camel@gandalf.stny.rr.com/

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-23  3:53             ` Steven Rostedt
@ 2022-07-23  3:56               ` Steven Rostedt
  2022-07-25  7:00                 ` Jiri Olsa
  0 siblings, 1 reply; 58+ messages in thread
From: Steven Rostedt @ 2022-07-23  3:56 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, 22 Jul 2022 23:53:58 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> If you are interested in more details about the birth of fentry, here's
> the email that started it all:
> 
>  https://lore.kernel.org/lkml/1258657614.22249.824.camel@gandalf.stny.rr.com/

And I was the one to suggest the "fentry" name as well.

 https://lore.kernel.org/lkml/1258720459.22249.1018.camel@gandalf.stny.rr.com/

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-22 21:41           ` Steven Rostedt
  2022-07-23  3:53             ` Steven Rostedt
@ 2022-07-23 21:39             ` Jiri Olsa
  2022-08-12 21:18               ` Jiri Olsa
  1 sibling, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-07-23 21:39 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo

On Fri, Jul 22, 2022 at 05:41:20PM -0400, Steven Rostedt wrote:
> On Fri, 22 Jul 2022 23:05:19 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
> 
> > ok, I think we could use that, I'll check
> > 
> > > 
> > > But other than that, we don't need infrastructure to hide any mcount/fentry
> > > locations from ftrace. Those were add *for* ftrace.  
> > 
> > I think I understand the fentry/ftrace equivalence you see, I remember
> > the perl mcount script ;-)
> 
> It's even more than that. We worked with the compiler folks to get fentry
> for ftrace purposes (namely to speed it up, and not rely on frame
> pointers, which mcount did). fentry never existed until then. Like I said.
> fentry was created *for* ftrace. And currently it's x86 specific, as it
> relies on the calling convention that a call does both, push the return
> address onto the  stack, and jump to a function. The blr
> (branch-link-register) method is more complex, which is where the
> "patchable" work comes from.
> 
> > 
> > still I think we should be able to define function that has fentry
> > profile call and be able to manage it without ftrace
> > 
> > one other thought.. how about adding function that would allow to disable
> > function in ftrace, with existing FTRACE_FL_DISABLED or some new flag
> > 
> > that way ftrace still keeps track of it, but won't allow to use it in
> > ftrace infra
> 
> Another way is to remove it at compile time from the mcount_loc table, and
> add it to your own table. I take it, this is for bpf infrastructure code

hm, perhaps we could move it to separate object and switch off
-mrecord-mcount for it, I'll check

> and not for any code that's in the day to day processing of the kernel,
> right?

yes, it's bpf specific code

thanks,
jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-23  3:56               ` Steven Rostedt
@ 2022-07-25  7:00                 ` Jiri Olsa
  0 siblings, 0 replies; 58+ messages in thread
From: Jiri Olsa @ 2022-07-25  7:00 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo

On Fri, Jul 22, 2022 at 11:56:19PM -0400, Steven Rostedt wrote:
> On Fri, 22 Jul 2022 23:53:58 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > If you are interested in more details about the birth of fentry, here's
> > the email that started it all:
> > 
> >  https://lore.kernel.org/lkml/1258657614.22249.824.camel@gandalf.stny.rr.com/
> 
> And I was the one to suggest the "fentry" name as well.
> 
>  https://lore.kernel.org/lkml/1258720459.22249.1018.camel@gandalf.stny.rr.com/

ah, did not know that, thanks for sharing

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-07-23 21:39             ` Jiri Olsa
@ 2022-08-12 21:18               ` Jiri Olsa
  2022-08-12 21:50                 ` Steven Rostedt
  2022-08-13 19:02                 ` Steven Rostedt
  0 siblings, 2 replies; 58+ messages in thread
From: Jiri Olsa @ 2022-08-12 21:18 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Steven Rostedt, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo

On Sat, Jul 23, 2022 at 11:39:27PM +0200, Jiri Olsa wrote:
> On Fri, Jul 22, 2022 at 05:41:20PM -0400, Steven Rostedt wrote:
> > On Fri, 22 Jul 2022 23:05:19 +0200
> > Jiri Olsa <olsajiri@gmail.com> wrote:
> > 
> > > ok, I think we could use that, I'll check
> > > 
> > > > 
> > > > But other than that, we don't need infrastructure to hide any mcount/fentry
> > > > locations from ftrace. Those were add *for* ftrace.  
> > > 
> > > I think I understand the fentry/ftrace equivalence you see, I remember
> > > the perl mcount script ;-)
> > 
> > It's even more than that. We worked with the compiler folks to get fentry
> > for ftrace purposes (namely to speed it up, and not rely on frame
> > pointers, which mcount did). fentry never existed until then. Like I said.
> > fentry was created *for* ftrace. And currently it's x86 specific, as it
> > relies on the calling convention that a call does both, push the return
> > address onto the  stack, and jump to a function. The blr
> > (branch-link-register) method is more complex, which is where the
> > "patchable" work comes from.
> > 
> > > 
> > > still I think we should be able to define function that has fentry
> > > profile call and be able to manage it without ftrace
> > > 
> > > one other thought.. how about adding function that would allow to disable
> > > function in ftrace, with existing FTRACE_FL_DISABLED or some new flag
> > > 
> > > that way ftrace still keeps track of it, but won't allow to use it in
> > > ftrace infra
> > 
> > Another way is to remove it at compile time from the mcount_loc table, and
> > add it to your own table. I take it, this is for bpf infrastructure code
> 
> hm, perhaps we could move it to separate object and switch off
> -mrecord-mcount for it, I'll check

the patch below moves the bpf function into sepatate object and switches
off the -mrecord-mcount for it.. so the function gets profile call
generated but it's not visible to ftrace

this seems to work, but it depends on -mrecord-mcount support in gcc and
it's x86 specific... other archs seems to use -fpatchable-function-entry,
which does not seem to have option to omit symbol from being collected
to the section

disabling specific ftrace symbol with FTRACE_FL_DISABLED flag seems to
be easir and generic solution.. I'll send RFC for that

jirka


---
diff --git a/net/core/Makefile b/net/core/Makefile
index e8ce3bd283a6..7d7ba2038879 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,10 +12,14 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
-			fib_notifier.o xdp.o flow_offload.o gro.o
+			fib_notifier.o xdp.o flow_offload.o gro.o \
+			dispatcher.o
 
 obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
 
+# remove dispatcher function from ftrace sight
+CFLAGS_REMOVE_dispatcher.o = -mrecord-mcount
+
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/dispatcher.c b/net/core/dispatcher.c
new file mode 100644
index 000000000000..7d2730b15de0
--- /dev/null
+++ b/net/core/dispatcher.c
@@ -0,0 +1,3 @@
+#include <linux/bpf.h>
+
+DEFINE_BPF_DISPATCHER(xdp)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5669248aff25..23fe2c5dfe9d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11333,8 +11333,6 @@ const struct bpf_verifier_ops sk_lookup_verifier_ops = {
 
 #endif /* CONFIG_INET */
 
-DEFINE_BPF_DISPATCHER(xdp)
-
 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
 {
 	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-12 21:18               ` Jiri Olsa
@ 2022-08-12 21:50                 ` Steven Rostedt
  2022-08-13 19:02                 ` Steven Rostedt
  1 sibling, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-12 21:50 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo

On Fri, 12 Aug 2022 23:18:15 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> the patch below moves the bpf function into sepatate object and switches
> off the -mrecord-mcount for it.. so the function gets profile call
> generated but it's not visible to ftrace
> 
> this seems to work, but it depends on -mrecord-mcount support in gcc and
> it's x86 specific... other archs seems to use -fpatchable-function-entry,
> which does not seem to have option to omit symbol from being collected
> to the section
> 
> disabling specific ftrace symbol with FTRACE_FL_DISABLED flag seems to
> be easir and generic solution.. I'll send RFC for that

No, please don't.

I could help you with recordmcount (which creates the .mcount_loc
locations when -mrecordmcount is not enabled) and remove it for you.

I still want this solution over the easy-way-out that can lead to half
of the kernel being hidden from ftrace.

This is the point that I made about fentry == ftrace. Because we did
the hard part to make fentry work. That included creating sections that
point to them.

All your patch needs is to tell the build not to run recordmcount on
the file. Remember that perl script I wrote? It (and the C version) is
what creates the mcount_loc location that you need to hide these files from.


-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-12 21:18               ` Jiri Olsa
  2022-08-12 21:50                 ` Steven Rostedt
@ 2022-08-13 19:02                 ` Steven Rostedt
  2022-08-14 11:32                   ` Steven Rostedt
                                     ` (2 more replies)
  1 sibling, 3 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-13 19:02 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Peter Zijlstra

On Fri, 12 Aug 2022 23:18:15 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> the patch below moves the bpf function into sepatate object and switches
> off the -mrecord-mcount for it.. so the function gets profile call
> generated but it's not visible to ftrace
> 
> this seems to work, but it depends on -mrecord-mcount support in gcc and
> it's x86 specific... other archs seems to use -fpatchable-function-entry,
> which does not seem to have option to omit symbol from being collected
> to the section

As I stated. the __mcount_loc section was created by ftrace. It has
nothing to do with -fpatchable-function-entry. It's just that the archs
that use that, do not have a gcc that creates the __mcount_loc section.

> 
> disabling specific ftrace symbol with FTRACE_FL_DISABLED flag seems to
> be easir and generic solution.. I'll send RFC for that

It's not easier.

Here, I have a POC for you and some more history.

The recordmcount.pl Perl script was the first to create the
__mcount_loc section in all objects that ftrace needed to hook to. It
did an objdump, found the locations of the calls to mcount, created
another file that had a section __mcount_loc that referenced all those
locations. Compiled and relinked that back into the original object.

This was performed on all object files during the build, and had an
impact on build times. But this is when I also created and introduced
"make localmodconfig", which shrunk the build times for many people, so
nobody noticed the build time increase! :-)

Then John Reiser sent me a patch that created recordmcount.c that did
the same work but directly modified the ELF object files without having
to run scripts. This got rid of that horrible overhead from the scripts.

Then, the gcc folks decided to be helpful here as well and created the
--mrecord-mcount option that would create the __mcount_loc section for
us, where we no longer needed the recordmcount scripts/C program. But
is not available across the board.

Today, objtool has also got involved, and added an "--mcount" option
that will create the section too.

Below is a patch that extends yours by adding a NO_MCOUNT_FILES list,
that you add the object file to and it will prevent the other methods
from adding an mcount_loc location.

I'm adding the objtool folks to make sure this is fine with them.
Again, this is a proof of concept, but works. It may need to be cleaned
a bit before it is final.

-- Steve

Index: linux-trace.git/scripts/Makefile.build
===================================================================
--- linux-trace.git.orig/scripts/Makefile.build
+++ linux-trace.git/scripts/Makefile.build
@@ -206,8 +206,10 @@ sub_cmd_record_mcount = perl $(srctree)/
 	"$(if $(part-of-module),1,0)" "$(@)";
 recordmcount_source := $(srctree)/scripts/recordmcount.pl
 endif # BUILD_C_RECORDMCOUNT
-cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
+chk_sub_cmd_record_mcount = $(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),, \
 	$(sub_cmd_record_mcount))
+cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
+	$(chk_sub_cmd_record_mcount))
 endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
 
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
Index: linux-trace.git/scripts/Makefile.lib
===================================================================
--- linux-trace.git.orig/scripts/Makefile.lib
+++ linux-trace.git/scripts/Makefile.lib
@@ -233,7 +233,8 @@ objtool_args =								\
 	$(if $(CONFIG_HAVE_JUMP_LABEL_HACK), --hacks=jump_label)	\
 	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
 	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
-	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
+	$(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),,	\
+		$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount))	\
 	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
 	$(if $(CONFIG_RETPOLINE), --retpoline)				\
 	$(if $(CONFIG_SLS), --sls)					\
Index: linux-trace.git/net/core/Makefile
===================================================================
--- linux-trace.git.orig/net/core/Makefile
+++ linux-trace.git/net/core/Makefile
@@ -11,10 +11,15 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.
 obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
-			fib_notifier.o xdp.o flow_offload.o gro.o
+			fib_notifier.o xdp.o flow_offload.o gro.o \
+			dispatcher.o
 
 obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
 
+# remove dispatcher function from ftrace sight
+CFLAGS_REMOVE_dispatcher.o = -mrecord-mcount
+NO_MCOUNT_FILES += dispatcher.o
+
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
Index: linux-trace.git/net/core/dispatcher.c
===================================================================
--- /dev/null
+++ linux-trace.git/net/core/dispatcher.c
@@ -0,0 +1,3 @@
+#include <linux/bpf.h>
+
+DEFINE_BPF_DISPATCHER(xdp)
Index: linux-trace.git/net/core/filter.c
===================================================================
--- linux-trace.git.orig/net/core/filter.c
+++ linux-trace.git/net/core/filter.c
@@ -11162,8 +11162,6 @@ const struct bpf_verifier_ops sk_lookup_
 
 #endif /* CONFIG_INET */
 
-DEFINE_BPF_DISPATCHER(xdp)
-
 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
 {
 	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-13 19:02                 ` Steven Rostedt
@ 2022-08-14 11:32                   ` Steven Rostedt
  2022-08-14 15:22                   ` Jiri Olsa
  2022-08-15  8:03                   ` Peter Zijlstra
  2 siblings, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-14 11:32 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Peter Zijlstra

On Sat, 13 Aug 2022 15:02:52 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> Index: linux-trace.git/scripts/Makefile.lib
> ===================================================================
> --- linux-trace.git.orig/scripts/Makefile.lib
> +++ linux-trace.git/scripts/Makefile.lib
> @@ -233,7 +233,8 @@ objtool_args =								\
>  	$(if $(CONFIG_HAVE_JUMP_LABEL_HACK), --hacks=jump_label)	\
>  	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
>  	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
> -	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
> +	$(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),,	\
> +		$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount))	\

I believe there's some security and other validations that objtool does
that requires it to know about the mcount locations.

If BPF is doing something unique, and modifying code as well (outside
the jump label and ftrace work), does objtool need to know about that too?

-- Steve


>  	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
>  	$(if $(CONFIG_RETPOLINE), --retpoline)				\
>  	$(if $(CONFIG_SLS), --sls)					\

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-13 19:02                 ` Steven Rostedt
  2022-08-14 11:32                   ` Steven Rostedt
@ 2022-08-14 15:22                   ` Jiri Olsa
  2022-08-15  2:07                     ` Chen Zhongjin
  2022-08-15  8:03                   ` Peter Zijlstra
  2 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-14 15:22 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf,
	Peter Zijlstra

On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> On Fri, 12 Aug 2022 23:18:15 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
> 
> > the patch below moves the bpf function into sepatate object and switches
> > off the -mrecord-mcount for it.. so the function gets profile call
> > generated but it's not visible to ftrace
> > 
> > this seems to work, but it depends on -mrecord-mcount support in gcc and
> > it's x86 specific... other archs seems to use -fpatchable-function-entry,
> > which does not seem to have option to omit symbol from being collected
> > to the section
> 
> As I stated. the __mcount_loc section was created by ftrace. It has
> nothing to do with -fpatchable-function-entry. It's just that the archs
> that use that, do not have a gcc that creates the __mcount_loc section.
> 
> > 
> > disabling specific ftrace symbol with FTRACE_FL_DISABLED flag seems to
> > be easir and generic solution.. I'll send RFC for that
> 
> It's not easier.
> 
> Here, I have a POC for you and some more history.
> 
> The recordmcount.pl Perl script was the first to create the
> __mcount_loc section in all objects that ftrace needed to hook to. It
> did an objdump, found the locations of the calls to mcount, created
> another file that had a section __mcount_loc that referenced all those
> locations. Compiled and relinked that back into the original object.
> 
> This was performed on all object files during the build, and had an
> impact on build times. But this is when I also created and introduced
> "make localmodconfig", which shrunk the build times for many people, so
> nobody noticed the build time increase! :-)
> 
> Then John Reiser sent me a patch that created recordmcount.c that did
> the same work but directly modified the ELF object files without having
> to run scripts. This got rid of that horrible overhead from the scripts.
> 
> Then, the gcc folks decided to be helpful here as well and created the
> --mrecord-mcount option that would create the __mcount_loc section for
> us, where we no longer needed the recordmcount scripts/C program. But
> is not available across the board.
> 
> Today, objtool has also got involved, and added an "--mcount" option
> that will create the section too.

I overlooked that objtool is involved as well,
will check on that

> 
> Below is a patch that extends yours by adding a NO_MCOUNT_FILES list,
> that you add the object file to and it will prevent the other methods
> from adding an mcount_loc location.

thanks,
jirka

> 
> I'm adding the objtool folks to make sure this is fine with them.
> Again, this is a proof of concept, but works. It may need to be cleaned
> a bit before it is final.
> 
> -- Steve
> 
> Index: linux-trace.git/scripts/Makefile.build
> ===================================================================
> --- linux-trace.git.orig/scripts/Makefile.build
> +++ linux-trace.git/scripts/Makefile.build
> @@ -206,8 +206,10 @@ sub_cmd_record_mcount = perl $(srctree)/
>  	"$(if $(part-of-module),1,0)" "$(@)";
>  recordmcount_source := $(srctree)/scripts/recordmcount.pl
>  endif # BUILD_C_RECORDMCOUNT
> -cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
> +chk_sub_cmd_record_mcount = $(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),, \
>  	$(sub_cmd_record_mcount))
> +cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
> +	$(chk_sub_cmd_record_mcount))
>  endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
>  
>  # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
> Index: linux-trace.git/scripts/Makefile.lib
> ===================================================================
> --- linux-trace.git.orig/scripts/Makefile.lib
> +++ linux-trace.git/scripts/Makefile.lib
> @@ -233,7 +233,8 @@ objtool_args =								\
>  	$(if $(CONFIG_HAVE_JUMP_LABEL_HACK), --hacks=jump_label)	\
>  	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
>  	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
> -	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
> +	$(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),,	\
> +		$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount))	\
>  	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
>  	$(if $(CONFIG_RETPOLINE), --retpoline)				\
>  	$(if $(CONFIG_SLS), --sls)					\
> Index: linux-trace.git/net/core/Makefile
> ===================================================================
> --- linux-trace.git.orig/net/core/Makefile
> +++ linux-trace.git/net/core/Makefile
> @@ -11,10 +11,15 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.
>  obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
>  			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
>  			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
> -			fib_notifier.o xdp.o flow_offload.o gro.o
> +			fib_notifier.o xdp.o flow_offload.o gro.o \
> +			dispatcher.o
>  
>  obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
>  
> +# remove dispatcher function from ftrace sight
> +CFLAGS_REMOVE_dispatcher.o = -mrecord-mcount
> +NO_MCOUNT_FILES += dispatcher.o
> +
>  obj-y += net-sysfs.o
>  obj-$(CONFIG_PAGE_POOL) += page_pool.o
>  obj-$(CONFIG_PROC_FS) += net-procfs.o
> Index: linux-trace.git/net/core/dispatcher.c
> ===================================================================
> --- /dev/null
> +++ linux-trace.git/net/core/dispatcher.c
> @@ -0,0 +1,3 @@
> +#include <linux/bpf.h>
> +
> +DEFINE_BPF_DISPATCHER(xdp)
> Index: linux-trace.git/net/core/filter.c
> ===================================================================
> --- linux-trace.git.orig/net/core/filter.c
> +++ linux-trace.git/net/core/filter.c
> @@ -11162,8 +11162,6 @@ const struct bpf_verifier_ops sk_lookup_
>  
>  #endif /* CONFIG_INET */
>  
> -DEFINE_BPF_DISPATCHER(xdp)
> -
>  void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
>  {
>  	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-14 15:22                   ` Jiri Olsa
@ 2022-08-15  2:07                     ` Chen Zhongjin
  2022-08-15  8:04                       ` Jiri Olsa
  0 siblings, 1 reply; 58+ messages in thread
From: Chen Zhongjin @ 2022-08-15  2:07 UTC (permalink / raw)
  To: Jiri Olsa, Steven Rostedt
  Cc: Alexei Starovoitov, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Peter Zijlstra


On 2022/8/14 23:22, Jiri Olsa wrote:
> On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
>> On Fri, 12 Aug 2022 23:18:15 +0200
>> Jiri Olsa <olsajiri@gmail.com> wrote:
>>
>>> the patch below moves the bpf function into sepatate object and switches
>>> off the -mrecord-mcount for it.. so the function gets profile call
>>> generated but it's not visible to ftrace
>>>
>>> this seems to work, but it depends on -mrecord-mcount support in gcc and
>>> it's x86 specific... other archs seems to use -fpatchable-function-entry,
>>> which does not seem to have option to omit symbol from being collected
>>> to the section
>> As I stated. the __mcount_loc section was created by ftrace. It has
>> nothing to do with -fpatchable-function-entry. It's just that the archs
>> that use that, do not have a gcc that creates the __mcount_loc section.
>>
>>> disabling specific ftrace symbol with FTRACE_FL_DISABLED flag seems to
>>> be easir and generic solution.. I'll send RFC for that
>> It's not easier.
>>
>> Here, I have a POC for you and some more history.
>>
>> The recordmcount.pl Perl script was the first to create the
>> __mcount_loc section in all objects that ftrace needed to hook to. It
>> did an objdump, found the locations of the calls to mcount, created
>> another file that had a section __mcount_loc that referenced all those
>> locations. Compiled and relinked that back into the original object.
>>
>> This was performed on all object files during the build, and had an
>> impact on build times. But this is when I also created and introduced
>> "make localmodconfig", which shrunk the build times for many people, so
>> nobody noticed the build time increase! :-)
>>
>> Then John Reiser sent me a patch that created recordmcount.c that did
>> the same work but directly modified the ELF object files without having
>> to run scripts. This got rid of that horrible overhead from the scripts.
>>
>> Then, the gcc folks decided to be helpful here as well and created the
>> --mrecord-mcount option that would create the __mcount_loc section for
>> us, where we no longer needed the recordmcount scripts/C program. But
>> is not available across the board.
>>
>> Today, objtool has also got involved, and added an "--mcount" option
>> that will create the section too.
> I overlooked that objtool is involved as well,
> will check on that

objtool --mcount option only involves mcount_loc generation (see 
annotate_call_site) and other validation check call destination directly 
(see is_fentry_call).

Some simply removing --mcount option dose work for this.


Another question, it seems we can export and use DEFINE_BPF_DISPATCHER 
out of kernel, does that means we should add NO_MCOUNT_FILES for these 
single uages as well?

I dont think it can be made automatically. If ignored, this can be a 
trouble.


Best,

Chen

>> Below is a patch that extends yours by adding a NO_MCOUNT_FILES list,
>> that you add the object file to and it will prevent the other methods
>> from adding an mcount_loc location.
> thanks,
> jirka
>
>> I'm adding the objtool folks to make sure this is fine with them.
>> Again, this is a proof of concept, but works. It may need to be cleaned
>> a bit before it is final.
>>
>> -- Steve
>>
>> Index: linux-trace.git/scripts/Makefile.build
>> ===================================================================
>> --- linux-trace.git.orig/scripts/Makefile.build
>> +++ linux-trace.git/scripts/Makefile.build
>> @@ -206,8 +206,10 @@ sub_cmd_record_mcount = perl $(srctree)/
>>   	"$(if $(part-of-module),1,0)" "$(@)";
>>   recordmcount_source := $(srctree)/scripts/recordmcount.pl
>>   endif # BUILD_C_RECORDMCOUNT
>> -cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
>> +chk_sub_cmd_record_mcount = $(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),, \
>>   	$(sub_cmd_record_mcount))
>> +cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
>> +	$(chk_sub_cmd_record_mcount))
>>   endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
>>   
>>   # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
>> Index: linux-trace.git/scripts/Makefile.lib
>> ===================================================================
>> --- linux-trace.git.orig/scripts/Makefile.lib
>> +++ linux-trace.git/scripts/Makefile.lib
>> @@ -233,7 +233,8 @@ objtool_args =								\
>>   	$(if $(CONFIG_HAVE_JUMP_LABEL_HACK), --hacks=jump_label)	\
>>   	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
>>   	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
>> -	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
>> +	$(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),,	\
>> +		$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount))	\
>>   	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
>>   	$(if $(CONFIG_RETPOLINE), --retpoline)				\
>>   	$(if $(CONFIG_SLS), --sls)					\
>> Index: linux-trace.git/net/core/Makefile
>> ===================================================================
>> --- linux-trace.git.orig/net/core/Makefile
>> +++ linux-trace.git/net/core/Makefile
>> @@ -11,10 +11,15 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.
>>   obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
>>   			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
>>   			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
>> -			fib_notifier.o xdp.o flow_offload.o gro.o
>> +			fib_notifier.o xdp.o flow_offload.o gro.o \
>> +			dispatcher.o
>>   
>>   obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
>>   
>> +# remove dispatcher function from ftrace sight
>> +CFLAGS_REMOVE_dispatcher.o = -mrecord-mcount
>> +NO_MCOUNT_FILES += dispatcher.o
>> +
>>   obj-y += net-sysfs.o
>>   obj-$(CONFIG_PAGE_POOL) += page_pool.o
>>   obj-$(CONFIG_PROC_FS) += net-procfs.o
>> Index: linux-trace.git/net/core/dispatcher.c
>> ===================================================================
>> --- /dev/null
>> +++ linux-trace.git/net/core/dispatcher.c
>> @@ -0,0 +1,3 @@
>> +#include <linux/bpf.h>
>> +
>> +DEFINE_BPF_DISPATCHER(xdp)
>> Index: linux-trace.git/net/core/filter.c
>> ===================================================================
>> --- linux-trace.git.orig/net/core/filter.c
>> +++ linux-trace.git/net/core/filter.c
>> @@ -11162,8 +11162,6 @@ const struct bpf_verifier_ops sk_lookup_
>>   
>>   #endif /* CONFIG_INET */
>>   
>> -DEFINE_BPF_DISPATCHER(xdp)
>> -
>>   void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
>>   {
>>   	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
>>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-13 19:02                 ` Steven Rostedt
  2022-08-14 11:32                   ` Steven Rostedt
  2022-08-14 15:22                   ` Jiri Olsa
@ 2022-08-15  8:03                   ` Peter Zijlstra
  2022-08-15  9:44                     ` Jiri Olsa
  2 siblings, 1 reply; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15  8:03 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf

On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> On Fri, 12 Aug 2022 23:18:15 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
> 
> > the patch below moves the bpf function into sepatate object and switches
> > off the -mrecord-mcount for it.. so the function gets profile call
> > generated but it's not visible to ftrace

Why ?!?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15  2:07                     ` Chen Zhongjin
@ 2022-08-15  8:04                       ` Jiri Olsa
  2022-08-15 11:01                         ` Björn Töpel
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-15  8:04 UTC (permalink / raw)
  To: Chen Zhongjin
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf, Peter Zijlstra, Björn Töpel

On Mon, Aug 15, 2022 at 10:07:54AM +0800, Chen Zhongjin wrote:
> 
> On 2022/8/14 23:22, Jiri Olsa wrote:
> > On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> > > On Fri, 12 Aug 2022 23:18:15 +0200
> > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > 
> > > > the patch below moves the bpf function into sepatate object and switches
> > > > off the -mrecord-mcount for it.. so the function gets profile call
> > > > generated but it's not visible to ftrace
> > > > 
> > > > this seems to work, but it depends on -mrecord-mcount support in gcc and
> > > > it's x86 specific... other archs seems to use -fpatchable-function-entry,
> > > > which does not seem to have option to omit symbol from being collected
> > > > to the section
> > > As I stated. the __mcount_loc section was created by ftrace. It has
> > > nothing to do with -fpatchable-function-entry. It's just that the archs
> > > that use that, do not have a gcc that creates the __mcount_loc section.
> > > 
> > > > disabling specific ftrace symbol with FTRACE_FL_DISABLED flag seems to
> > > > be easir and generic solution.. I'll send RFC for that
> > > It's not easier.
> > > 
> > > Here, I have a POC for you and some more history.
> > > 
> > > The recordmcount.pl Perl script was the first to create the
> > > __mcount_loc section in all objects that ftrace needed to hook to. It
> > > did an objdump, found the locations of the calls to mcount, created
> > > another file that had a section __mcount_loc that referenced all those
> > > locations. Compiled and relinked that back into the original object.
> > > 
> > > This was performed on all object files during the build, and had an
> > > impact on build times. But this is when I also created and introduced
> > > "make localmodconfig", which shrunk the build times for many people, so
> > > nobody noticed the build time increase! :-)
> > > 
> > > Then John Reiser sent me a patch that created recordmcount.c that did
> > > the same work but directly modified the ELF object files without having
> > > to run scripts. This got rid of that horrible overhead from the scripts.
> > > 
> > > Then, the gcc folks decided to be helpful here as well and created the
> > > --mrecord-mcount option that would create the __mcount_loc section for
> > > us, where we no longer needed the recordmcount scripts/C program. But
> > > is not available across the board.
> > > 
> > > Today, objtool has also got involved, and added an "--mcount" option
> > > that will create the section too.
> > I overlooked that objtool is involved as well,
> > will check on that
> 
> objtool --mcount option only involves mcount_loc generation (see
> annotate_call_site) and other validation check call destination directly
> (see is_fentry_call).
> 
> Some simply removing --mcount option dose work for this.
> 
> 
> Another question, it seems we can export and use DEFINE_BPF_DISPATCHER out
> of kernel, does that means we should add NO_MCOUNT_FILES for these single
> uages as well?

yes, cc-ing Björn to make sure it's valid use case for dispatcher

jirka

> 
> I dont think it can be made automatically. If ignored, this can be a
> trouble.
> 
> 
> Best,
> 
> Chen
> 
> > > Below is a patch that extends yours by adding a NO_MCOUNT_FILES list,
> > > that you add the object file to and it will prevent the other methods
> > > from adding an mcount_loc location.
> > thanks,
> > jirka
> > 
> > > I'm adding the objtool folks to make sure this is fine with them.
> > > Again, this is a proof of concept, but works. It may need to be cleaned
> > > a bit before it is final.
> > > 
> > > -- Steve
> > > 
> > > Index: linux-trace.git/scripts/Makefile.build
> > > ===================================================================
> > > --- linux-trace.git.orig/scripts/Makefile.build
> > > +++ linux-trace.git/scripts/Makefile.build
> > > @@ -206,8 +206,10 @@ sub_cmd_record_mcount = perl $(srctree)/
> > >   	"$(if $(part-of-module),1,0)" "$(@)";
> > >   recordmcount_source := $(srctree)/scripts/recordmcount.pl
> > >   endif # BUILD_C_RECORDMCOUNT
> > > -cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
> > > +chk_sub_cmd_record_mcount = $(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),, \
> > >   	$(sub_cmd_record_mcount))
> > > +cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),	\
> > > +	$(chk_sub_cmd_record_mcount))
> > >   endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
> > >   # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
> > > Index: linux-trace.git/scripts/Makefile.lib
> > > ===================================================================
> > > --- linux-trace.git.orig/scripts/Makefile.lib
> > > +++ linux-trace.git/scripts/Makefile.lib
> > > @@ -233,7 +233,8 @@ objtool_args =								\
> > >   	$(if $(CONFIG_HAVE_JUMP_LABEL_HACK), --hacks=jump_label)	\
> > >   	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
> > >   	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
> > > -	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
> > > +	$(if $(filter $(shell basename $@),$(NO_MCOUNT_FILES)),,	\
> > > +		$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount))	\
> > >   	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
> > >   	$(if $(CONFIG_RETPOLINE), --retpoline)				\
> > >   	$(if $(CONFIG_SLS), --sls)					\
> > > Index: linux-trace.git/net/core/Makefile
> > > ===================================================================
> > > --- linux-trace.git.orig/net/core/Makefile
> > > +++ linux-trace.git/net/core/Makefile
> > > @@ -11,10 +11,15 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.
> > >   obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
> > >   			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
> > >   			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
> > > -			fib_notifier.o xdp.o flow_offload.o gro.o
> > > +			fib_notifier.o xdp.o flow_offload.o gro.o \
> > > +			dispatcher.o
> > >   obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
> > > +# remove dispatcher function from ftrace sight
> > > +CFLAGS_REMOVE_dispatcher.o = -mrecord-mcount
> > > +NO_MCOUNT_FILES += dispatcher.o
> > > +
> > >   obj-y += net-sysfs.o
> > >   obj-$(CONFIG_PAGE_POOL) += page_pool.o
> > >   obj-$(CONFIG_PROC_FS) += net-procfs.o
> > > Index: linux-trace.git/net/core/dispatcher.c
> > > ===================================================================
> > > --- /dev/null
> > > +++ linux-trace.git/net/core/dispatcher.c
> > > @@ -0,0 +1,3 @@
> > > +#include <linux/bpf.h>
> > > +
> > > +DEFINE_BPF_DISPATCHER(xdp)
> > > Index: linux-trace.git/net/core/filter.c
> > > ===================================================================
> > > --- linux-trace.git.orig/net/core/filter.c
> > > +++ linux-trace.git/net/core/filter.c
> > > @@ -11162,8 +11162,6 @@ const struct bpf_verifier_ops sk_lookup_
> > >   #endif /* CONFIG_INET */
> > > -DEFINE_BPF_DISPATCHER(xdp)
> > > -
> > >   void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
> > >   {
> > >   	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
> > > 
> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15  8:03                   ` Peter Zijlstra
@ 2022-08-15  9:44                     ` Jiri Olsa
  2022-08-15 12:37                       ` Peter Zijlstra
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-15  9:44 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Steven Rostedt, Jiri Olsa, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf

On Mon, Aug 15, 2022 at 10:03:17AM +0200, Peter Zijlstra wrote:
> On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> > On Fri, 12 Aug 2022 23:18:15 +0200
> > Jiri Olsa <olsajiri@gmail.com> wrote:
> > 
> > > the patch below moves the bpf function into sepatate object and switches
> > > off the -mrecord-mcount for it.. so the function gets profile call
> > > generated but it's not visible to ftrace
> 
> Why ?!?

there's bpf dispatcher code that updates bpf_dispatcher_xdp_func
function with bpf_arch_text_poke and that can race with ftrace update
if the function is traced

the idea to solve it is to 'mark' the function independent of ftrace,
and add a way to make the function invissible to ftrace but with the
profile code fentry call generated

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15  8:04                       ` Jiri Olsa
@ 2022-08-15 11:01                         ` Björn Töpel
  2022-08-15 11:29                           ` Jiri Olsa
  0 siblings, 1 reply; 58+ messages in thread
From: Björn Töpel @ 2022-08-15 11:01 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Chen Zhongjin, Steven Rostedt, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf, Peter Zijlstra

On Mon, 15 Aug 2022 at 10:04, Jiri Olsa <olsajiri@gmail.com> wrote:
[...]
> > > >
> > > > Today, objtool has also got involved, and added an "--mcount" option
> > > > that will create the section too.
> > > I overlooked that objtool is involved as well,
> > > will check on that
> >
> > objtool --mcount option only involves mcount_loc generation (see
> > annotate_call_site) and other validation check call destination directly
> > (see is_fentry_call).
> >
> > Some simply removing --mcount option dose work for this.
> >
> >
> > Another question, it seems we can export and use DEFINE_BPF_DISPATCHER out
> > of kernel, does that means we should add NO_MCOUNT_FILES for these single
> > uages as well?
>
> yes, cc-ing Björn to make sure it's valid use case for dispatcher
>

Hmm, could you expand a bit on how this would work?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 11:01                         ` Björn Töpel
@ 2022-08-15 11:29                           ` Jiri Olsa
  2022-08-15 12:19                             ` Björn Töpel
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-15 11:29 UTC (permalink / raw)
  To: Björn Töpel
  Cc: Jiri Olsa, Chen Zhongjin, Steven Rostedt, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf, Peter Zijlstra

On Mon, Aug 15, 2022 at 01:01:06PM +0200, Björn Töpel wrote:
> On Mon, 15 Aug 2022 at 10:04, Jiri Olsa <olsajiri@gmail.com> wrote:
> [...]
> > > > >
> > > > > Today, objtool has also got involved, and added an "--mcount" option
> > > > > that will create the section too.
> > > > I overlooked that objtool is involved as well,
> > > > will check on that
> > >
> > > objtool --mcount option only involves mcount_loc generation (see
> > > annotate_call_site) and other validation check call destination directly
> > > (see is_fentry_call).
> > >
> > > Some simply removing --mcount option dose work for this.
> > >
> > >
> > > Another question, it seems we can export and use DEFINE_BPF_DISPATCHER out
> > > of kernel, does that means we should add NO_MCOUNT_FILES for these single
> > > uages as well?
> >
> > yes, cc-ing Björn to make sure it's valid use case for dispatcher
> >
> 
> Hmm, could you expand a bit on how this would work?

the goal here is to remove bpf_dispatcher_<FUNC>_func functions from
ftrace, because it's updated by dispatcher code with bpf_arch_text_poke,
but it's also visible and attachable to ftrace.. and will cause problems
when these 2 updates will race

question was if DEFINE_BPF_DISPATCHER can be used in kernel module,
which would bring another realm of problems ;-)

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 11:29                           ` Jiri Olsa
@ 2022-08-15 12:19                             ` Björn Töpel
  2022-08-15 12:30                               ` Björn Töpel
  0 siblings, 1 reply; 58+ messages in thread
From: Björn Töpel @ 2022-08-15 12:19 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Chen Zhongjin, Steven Rostedt, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf, Peter Zijlstra

On Mon, 15 Aug 2022 at 13:29, Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Aug 15, 2022 at 01:01:06PM +0200, Björn Töpel wrote:
> > On Mon, 15 Aug 2022 at 10:04, Jiri Olsa <olsajiri@gmail.com> wrote:
> > [...]
> > > > > >
> > > > > > Today, objtool has also got involved, and added an "--mcount" option
> > > > > > that will create the section too.
> > > > > I overlooked that objtool is involved as well,
> > > > > will check on that
> > > >
> > > > objtool --mcount option only involves mcount_loc generation (see
> > > > annotate_call_site) and other validation check call destination directly
> > > > (see is_fentry_call).
> > > >
> > > > Some simply removing --mcount option dose work for this.
> > > >
> > > >
> > > > Another question, it seems we can export and use DEFINE_BPF_DISPATCHER out
> > > > of kernel, does that means we should add NO_MCOUNT_FILES for these single
> > > > uages as well?
> > >
> > > yes, cc-ing Björn to make sure it's valid use case for dispatcher
> > >
> >
> > Hmm, could you expand a bit on how this would work?
>
> the goal here is to remove bpf_dispatcher_<FUNC>_func functions from
> ftrace, because it's updated by dispatcher code with bpf_arch_text_poke,
> but it's also visible and attachable to ftrace.. and will cause problems
> when these 2 updates will race
>
> question was if DEFINE_BPF_DISPATCHER can be used in kernel module,
> which would bring another realm of problems ;-)
>

Oh, now I follow. AFAIK there is only one flavor of BPF dispatcher in
use, and that's the XDP dispatcher, which does not reside in module
code, but is typically *called* by module code.

A module could define a BPF dispatcher, but it wouldn't be able to
update it, since bpf_arch_text_poke() does not support poking in
modules.


Björn

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 12:19                             ` Björn Töpel
@ 2022-08-15 12:30                               ` Björn Töpel
  0 siblings, 0 replies; 58+ messages in thread
From: Björn Töpel @ 2022-08-15 12:30 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Chen Zhongjin, Steven Rostedt, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf, Peter Zijlstra

On Mon, 15 Aug 2022 at 14:19, Björn Töpel <bjorn@kernel.org> wrote:
>
> On Mon, 15 Aug 2022 at 13:29, Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Mon, Aug 15, 2022 at 01:01:06PM +0200, Björn Töpel wrote:
> > > On Mon, 15 Aug 2022 at 10:04, Jiri Olsa <olsajiri@gmail.com> wrote:
> > > [...]
> > > > > > >
> > > > > > > Today, objtool has also got involved, and added an "--mcount" option
> > > > > > > that will create the section too.
> > > > > > I overlooked that objtool is involved as well,
> > > > > > will check on that
> > > > >
> > > > > objtool --mcount option only involves mcount_loc generation (see
> > > > > annotate_call_site) and other validation check call destination directly
> > > > > (see is_fentry_call).
> > > > >
> > > > > Some simply removing --mcount option dose work for this.
> > > > >
> > > > >
> > > > > Another question, it seems we can export and use DEFINE_BPF_DISPATCHER out
> > > > > of kernel, does that means we should add NO_MCOUNT_FILES for these single
> > > > > uages as well?
> > > >
> > > > yes, cc-ing Björn to make sure it's valid use case for dispatcher
> > > >
> > >
> > > Hmm, could you expand a bit on how this would work?
> >
> > the goal here is to remove bpf_dispatcher_<FUNC>_func functions from
> > ftrace, because it's updated by dispatcher code with bpf_arch_text_poke,
> > but it's also visible and attachable to ftrace.. and will cause problems
> > when these 2 updates will race
> >
> > question was if DEFINE_BPF_DISPATCHER can be used in kernel module,
> > which would bring another realm of problems ;-)
> >
>
> Oh, now I follow. AFAIK there is only one flavor of BPF dispatcher in
> use, and that's the XDP dispatcher, which does not reside in module
> code, but is typically *called* by module code.
>

Some history why the EXPORT is required:
https://lore.kernel.org/bpf/CAADnVQ+eD-=FZrg8L+YcdCyAS+E30W=Z-ShtEXAXVFjmxV4usg@mail.gmail.com/

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15  9:44                     ` Jiri Olsa
@ 2022-08-15 12:37                       ` Peter Zijlstra
  2022-08-15 14:25                         ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15 12:37 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Steven Rostedt, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 11:44:32AM +0200, Jiri Olsa wrote:
> On Mon, Aug 15, 2022 at 10:03:17AM +0200, Peter Zijlstra wrote:
> > On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> > > On Fri, 12 Aug 2022 23:18:15 +0200
> > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > 
> > > > the patch below moves the bpf function into sepatate object and switches
> > > > off the -mrecord-mcount for it.. so the function gets profile call
> > > > generated but it's not visible to ftrace
> > 
> > Why ?!?
> 
> there's bpf dispatcher code that updates bpf_dispatcher_xdp_func
> function with bpf_arch_text_poke and that can race with ftrace update
> if the function is traced

I thought bpf_arch_text_poke() wasn't allowed to touch kernel code and
ftrace is in full control of it ?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 12:37                       ` Peter Zijlstra
@ 2022-08-15 14:25                         ` Alexei Starovoitov
  2022-08-15 14:33                           ` Peter Zijlstra
  0 siblings, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-15 14:25 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 5:37 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 15, 2022 at 11:44:32AM +0200, Jiri Olsa wrote:
> > On Mon, Aug 15, 2022 at 10:03:17AM +0200, Peter Zijlstra wrote:
> > > On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> > > > On Fri, 12 Aug 2022 23:18:15 +0200
> > > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > >
> > > > > the patch below moves the bpf function into sepatate object and switches
> > > > > off the -mrecord-mcount for it.. so the function gets profile call
> > > > > generated but it's not visible to ftrace
> > >
> > > Why ?!?
> >
> > there's bpf dispatcher code that updates bpf_dispatcher_xdp_func
> > function with bpf_arch_text_poke and that can race with ftrace update
> > if the function is traced
>
> I thought bpf_arch_text_poke() wasn't allowed to touch kernel code and
> ftrace is in full control of it ?

ftrace is not in "full control" of nop5 and must not be.
Soon we will have nop5 in the middle of the function.
ftrace must not touch it.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 14:25                         ` Alexei Starovoitov
@ 2022-08-15 14:33                           ` Peter Zijlstra
  2022-08-15 14:45                             ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15 14:33 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 07:25:24AM -0700, Alexei Starovoitov wrote:
> On Mon, Aug 15, 2022 at 5:37 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, Aug 15, 2022 at 11:44:32AM +0200, Jiri Olsa wrote:
> > > On Mon, Aug 15, 2022 at 10:03:17AM +0200, Peter Zijlstra wrote:
> > > > On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> > > > > On Fri, 12 Aug 2022 23:18:15 +0200
> > > > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > > >
> > > > > > the patch below moves the bpf function into sepatate object and switches
> > > > > > off the -mrecord-mcount for it.. so the function gets profile call
> > > > > > generated but it's not visible to ftrace
> > > >
> > > > Why ?!?
> > >
> > > there's bpf dispatcher code that updates bpf_dispatcher_xdp_func
> > > function with bpf_arch_text_poke and that can race with ftrace update
> > > if the function is traced
> >
> > I thought bpf_arch_text_poke() wasn't allowed to touch kernel code and
> > ftrace is in full control of it ?
> 
> ftrace is not in "full control" of nop5 and must not be.

It is in full control of the 'call __fentry__'. Absolute full NAK on you
trying to make it otherwise.

> Soon we will have nop5 in the middle of the function.
> ftrace must not touch it.

How are you generating that NOP and what for?


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 14:33                           ` Peter Zijlstra
@ 2022-08-15 14:45                             ` Alexei Starovoitov
  2022-08-15 15:02                               ` Peter Zijlstra
  0 siblings, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-15 14:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 7:33 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 15, 2022 at 07:25:24AM -0700, Alexei Starovoitov wrote:
> > On Mon, Aug 15, 2022 at 5:37 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Mon, Aug 15, 2022 at 11:44:32AM +0200, Jiri Olsa wrote:
> > > > On Mon, Aug 15, 2022 at 10:03:17AM +0200, Peter Zijlstra wrote:
> > > > > On Sat, Aug 13, 2022 at 03:02:52PM -0400, Steven Rostedt wrote:
> > > > > > On Fri, 12 Aug 2022 23:18:15 +0200
> > > > > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > > > >
> > > > > > > the patch below moves the bpf function into sepatate object and switches
> > > > > > > off the -mrecord-mcount for it.. so the function gets profile call
> > > > > > > generated but it's not visible to ftrace
> > > > >
> > > > > Why ?!?
> > > >
> > > > there's bpf dispatcher code that updates bpf_dispatcher_xdp_func
> > > > function with bpf_arch_text_poke and that can race with ftrace update
> > > > if the function is traced
> > >
> > > I thought bpf_arch_text_poke() wasn't allowed to touch kernel code and
> > > ftrace is in full control of it ?
> >
> > ftrace is not in "full control" of nop5 and must not be.
>
> It is in full control of the 'call __fentry__'. Absolute full NAK on you
> trying to make it otherwise.

Don't mix 'call fentry' generated by the compiler with nop5 inserted
by macroses or JITs.

> > Soon we will have nop5 in the middle of the function.
> > ftrace must not touch it.
>
> How are you generating that NOP and what for?

We're generating nop5-s in JITed code to further
attach to. For example when one bpf prog is being replaced by another.
Currently it's in the func prologue only.
In the future it will be anywhere in the body.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 14:45                             ` Alexei Starovoitov
@ 2022-08-15 15:02                               ` Peter Zijlstra
  2022-08-15 15:17                                 ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15 15:02 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 07:45:16AM -0700, Alexei Starovoitov wrote:
> On Mon, Aug 15, 2022 at 7:33 AM Peter Zijlstra <peterz@infradead.org> wrote:

> > It is in full control of the 'call __fentry__'. Absolute full NAK on you
> > trying to make it otherwise.
> 
> Don't mix 'call fentry' generated by the compiler with nop5 inserted
> by macroses or JITs.

Looking at:

 https://lore.kernel.org/all/20191211123017.13212-3-bjorn.topel@gmail.com/

this seems to want to prod at the __fentry__ sites.

> > > Soon we will have nop5 in the middle of the function.
> > > ftrace must not touch it.
> >
> > How are you generating that NOP and what for?
> 
> We're generating nop5-s in JITed code to further
> attach to.

Ftrace doesn't know about those; so how can it break that?

Likewise it doesn't know about the static_branch/static_call NOPs and
nothing is broken.

Ftrace only knows about the __fentry__ sites, and it *does* own them.
Are you saying ftrace is writing to a code location not a __fentry__
site?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:02                               ` Peter Zijlstra
@ 2022-08-15 15:17                                 ` Alexei Starovoitov
  2022-08-15 15:28                                   ` Peter Zijlstra
  2022-08-15 15:32                                   ` Steven Rostedt
  0 siblings, 2 replies; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-15 15:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 8:02 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 15, 2022 at 07:45:16AM -0700, Alexei Starovoitov wrote:
> > On Mon, Aug 15, 2022 at 7:33 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> > > It is in full control of the 'call __fentry__'. Absolute full NAK on you
> > > trying to make it otherwise.
> >
> > Don't mix 'call fentry' generated by the compiler with nop5 inserted
> > by macroses or JITs.
>
> Looking at:
>
>  https://lore.kernel.org/all/20191211123017.13212-3-bjorn.topel@gmail.com/
>
> this seems to want to prod at the __fentry__ sites.
>
> > > > Soon we will have nop5 in the middle of the function.
> > > > ftrace must not touch it.
> > >
> > > How are you generating that NOP and what for?
> >
> > We're generating nop5-s in JITed code to further
> > attach to.
>
> Ftrace doesn't know about those; so how can it break that?
>
> Likewise it doesn't know about the static_branch/static_call NOPs and
> nothing is broken.
>
> Ftrace only knows about the __fentry__ sites, and it *does* own them.
> Are you saying ftrace is writing to a code location not a __fentry__
> site?

Let's keep it in one thread:

> It wasn't long before. Yes it landed a few months prior to the
> static_call work, but the whole static_call thing was in progress for a
> long long time.
>
> Anyway, yes it is different. But it's still very much broken. You simply
> cannot step on __fentry__ sites like that.

Ask yourself: should static_call patching logic go through
ftrace infra ? No. Right?
static_call has nothing to do with ftrace (function tracing).
Same thing here. bpf dispatching logic is nothing to do with
function tracing.
In this case bpf_dispatcher_xdp_func is a placeholder written C.
If it was written in asm, fentry recording wouldn't have known about it.
And that's more or less what Jiri patch is doing.
It's hiding a fake function from ftrace, since it's not a function
and ftrace infra shouldn't show it tracing logs.
In other words it's a _notrace_ function with nop5.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:17                                 ` Alexei Starovoitov
@ 2022-08-15 15:28                                   ` Peter Zijlstra
  2022-08-15 15:35                                     ` Alexei Starovoitov
  2022-08-15 15:41                                     ` Peter Zijlstra
  2022-08-15 15:32                                   ` Steven Rostedt
  1 sibling, 2 replies; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15 15:28 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> It's hiding a fake function from ftrace, since it's not a function
> and ftrace infra shouldn't show it tracing logs.
> In other words it's a _notrace_ function with nop5.

Then make it a notrace function with a nop5 in it. That isn't hard.

The whole problem is that it isn't a notrace function and you're abusing
a __fentry__ site.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:17                                 ` Alexei Starovoitov
  2022-08-15 15:28                                   ` Peter Zijlstra
@ 2022-08-15 15:32                                   ` Steven Rostedt
  1 sibling, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-15 15:32 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Peter Zijlstra, Jiri Olsa, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, 15 Aug 2022 08:17:42 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> Ask yourself: should static_call patching logic go through
> ftrace infra ? No. Right?

I agree that static_call (and jump_labels) are not part of the ftrace
infrastructure (but ftrace was a strong motivator for those).

> static_call has nothing to do with ftrace (function tracing).

Besides the motivation, I agree.

> Same thing here. bpf dispatching logic is nothing to do with
> function tracing.

But it used fentry, which is part of function tracing. Which is what I'm
against. And why it broke ftrace.

> In this case bpf_dispatcher_xdp_func is a placeholder written C.
> If it was written in asm, fentry recording wouldn't have known about it.

And I would not have had an issue with that approach (for ftrace that is).
But that brings up other concerns (see below).

> And that's more or less what Jiri patch is doing.
> It's hiding a fake function from ftrace, since it's not a function
> and ftrace infra shouldn't show it tracing logs.
> In other words it's a _notrace_ function with nop5.

On the ftrace side, I'm perfectly happy with Jiri's approach (the one I
help extend).

But dynamic code modification is something we need to take very seriously.
It's very similar to writing your own locking primitives (which Linus
always says "Don't do"). It's complex and easy to get wrong. The more
dynamic code modifications we have, the less secure the kernel is.

Here's the list of dynamic code modification infrastructures:

 ftrace
 kprobes
 jump_labels
 static_calls

We now have the bpf dispatcher.

The ftrace, kprobes, jump_labels and static_calls developers work together
to make sure that we are all in line, not breaking anything, and try to
consolidate when possible. We also review each others code.

The issue I have is that BPF is largely doing it alone, and not
communicating with the others. This gives me cause for concern on both a
robustness and security point of view.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:28                                   ` Peter Zijlstra
@ 2022-08-15 15:35                                     ` Alexei Starovoitov
  2022-08-15 15:44                                       ` Steven Rostedt
  2022-08-15 15:48                                       ` Peter Zijlstra
  2022-08-15 15:41                                     ` Peter Zijlstra
  1 sibling, 2 replies; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-15 15:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 8:28 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > It's hiding a fake function from ftrace, since it's not a function
> > and ftrace infra shouldn't show it tracing logs.
> > In other words it's a _notrace_ function with nop5.
>
> Then make it a notrace function with a nop5 in it. That isn't hard.

That's exactly what we're trying to do.
Jiri's patch is one way to achieve that.
What is your suggestion?
Move it from C to asm ?
Make it naked function with explicit inline asm?
What else?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:28                                   ` Peter Zijlstra
  2022-08-15 15:35                                     ` Alexei Starovoitov
@ 2022-08-15 15:41                                     ` Peter Zijlstra
  2022-08-15 15:49                                       ` Alexei Starovoitov
  2022-08-18 20:27                                       ` Jiri Olsa
  1 sibling, 2 replies; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15 15:41 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 05:28:02PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > It's hiding a fake function from ftrace, since it's not a function
> > and ftrace infra shouldn't show it tracing logs.
> > In other words it's a _notrace_ function with nop5.
> 
> Then make it a notrace function with a nop5 in it. That isn't hard.
> 
> The whole problem is that it isn't a notrace function and you're abusing
> a __fentry__ site.

https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=x86/fineibt&id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e

foo.c:

__attribute__((__no_instrument_function__))
__attribute__((patchable_function_entry(5)))
void my_func(void)
{
}

void my_foo(void)
{
}

gcc -c foo.c -pg -mfentry -mcmodel=kernel -fno-PIE -O2

foo.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <my_func>:
   0:   f3 0f 1e fa             endbr64 
   4:   90                      nop
   5:   90                      nop
   6:   90                      nop
   7:   90                      nop
   8:   90                      nop
   9:   c3                      ret    
   a:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

0000000000000010 <my_foo>:
  10:   f3 0f 1e fa             endbr64 
  14:   e8 00 00 00 00          call   19 <my_foo+0x9>  15: R_X86_64_PLT32      __fentry__-0x4
  19:   c3                      ret    


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:35                                     ` Alexei Starovoitov
@ 2022-08-15 15:44                                       ` Steven Rostedt
  2022-08-15 15:53                                         ` Alexei Starovoitov
  2022-08-15 15:48                                       ` Peter Zijlstra
  1 sibling, 1 reply; 58+ messages in thread
From: Steven Rostedt @ 2022-08-15 15:44 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Peter Zijlstra, Jiri Olsa, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Linus Torvalds, Andrew Morton,
	Christoph Hellwig

On Mon, 15 Aug 2022 08:35:53 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> > Then make it a notrace function with a nop5 in it. That isn't hard.  
> 
> That's exactly what we're trying to do.
> Jiri's patch is one way to achieve that.
> What is your suggestion?
> Move it from C to asm ?
> Make it naked function with explicit inline asm?
> What else?

The dispatcher is already in the kernel so it's too late to complain about
it. Jiri's patch (with my extensions) will hopefully fix the breakage BPF
did to ftrace.

My ask now is to be more inclusive when doing anything that deals with
modification of text, or other infrastructures. This "go it alone" approach
really needs to stop. Linux is an open source project and collaboration is
key. I know you don't care about others use cases (as you told me in that
BPF meeting last year), but any maintainer in the Linux kernel must care
about the use case of others or this will all fail.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:35                                     ` Alexei Starovoitov
  2022-08-15 15:44                                       ` Steven Rostedt
@ 2022-08-15 15:48                                       ` Peter Zijlstra
  2022-08-16  6:56                                         ` Jiri Olsa
  1 sibling, 1 reply; 58+ messages in thread
From: Peter Zijlstra @ 2022-08-15 15:48 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 08:35:53AM -0700, Alexei Starovoitov wrote:
> On Mon, Aug 15, 2022 at 8:28 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > It's hiding a fake function from ftrace, since it's not a function
> > > and ftrace infra shouldn't show it tracing logs.
> > > In other words it's a _notrace_ function with nop5.
> >
> > Then make it a notrace function with a nop5 in it. That isn't hard.
> 
> That's exactly what we're trying to do.

All the while claiming ftrace is broken while it is not.

> Jiri's patch is one way to achieve that.

Fairly horrible way.

> What is your suggestion?

Mailed it already.

> Move it from C to asm ?

Would be much better than proposed IMO.

> Make it naked function with explicit inline asm?

Can be made to work but is iffy because the compiler can do horrible
things with placing the asm().

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:41                                     ` Peter Zijlstra
@ 2022-08-15 15:49                                       ` Alexei Starovoitov
  2022-08-15 16:08                                         ` Steven Rostedt
  2022-08-18 20:27                                       ` Jiri Olsa
  1 sibling, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-15 15:49 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, Steven Rostedt, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Mon, Aug 15, 2022 at 8:41 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 15, 2022 at 05:28:02PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > It's hiding a fake function from ftrace, since it's not a function
> > > and ftrace infra shouldn't show it tracing logs.
> > > In other words it's a _notrace_ function with nop5.
> >
> > Then make it a notrace function with a nop5 in it. That isn't hard.
> >
> > The whole problem is that it isn't a notrace function and you're abusing
> > a __fentry__ site.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=x86/fineibt&id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e

Brand new stuff.
Awesome. That should fit perfectly.

> foo.c:
>
> __attribute__((__no_instrument_function__))
> __attribute__((patchable_function_entry(5)))

Interesting. Didn't know about this attribute.

> void my_func(void)
> {
> }
>
> void my_foo(void)
> {
> }

Great.
Jiri, could you please revise your patch with this approach?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:44                                       ` Steven Rostedt
@ 2022-08-15 15:53                                         ` Alexei Starovoitov
  2022-08-15 16:13                                           ` Steven Rostedt
  0 siblings, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-15 15:53 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Peter Zijlstra, Jiri Olsa, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Linus Torvalds, Andrew Morton,
	Christoph Hellwig

On Mon, Aug 15, 2022 at 8:44 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Mon, 15 Aug 2022 08:35:53 -0700
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>
> > > Then make it a notrace function with a nop5 in it. That isn't hard.
> >
> > That's exactly what we're trying to do.
> > Jiri's patch is one way to achieve that.
> > What is your suggestion?
> > Move it from C to asm ?
> > Make it naked function with explicit inline asm?
> > What else?
>
> The dispatcher is already in the kernel so it's too late to complain about
> it. Jiri's patch (with my extensions) will hopefully fix the breakage BPF
> did to ftrace.
>
> My ask now is to be more inclusive when doing anything that deals with
> modification of text, or other infrastructures. This "go it alone" approach
> really needs to stop. Linux is an open source project and collaboration is
> key. I know you don't care about others use cases (as you told me in that
> BPF meeting last year), but any maintainer in the Linux kernel must care
> about the use case of others or this will all fail.

Please don't misrepresent. Not cool.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:49                                       ` Alexei Starovoitov
@ 2022-08-15 16:08                                         ` Steven Rostedt
  0 siblings, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-15 16:08 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Peter Zijlstra, Jiri Olsa, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Linus Torvalds, Thomas Gleixner

On Mon, 15 Aug 2022 08:49:11 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> > > The whole problem is that it isn't a notrace function and you're abusing
> > > a __fentry__ site.  
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=x86/fineibt&id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e  
> 
> Brand new stuff.
> Awesome. That should fit perfectly.
> 
> > foo.c:
> >
> > __attribute__((__no_instrument_function__))
> > __attribute__((patchable_function_entry(5)))  
> 
> Interesting. Didn't know about this attribute.
> 
> > void my_func(void)
> > {
> > }
> >
> > void my_foo(void)
> > {
> > }  
> 
> Great.
> Jiri, could you please revise your patch with this approach?

This is the exact result I was looking for. Something we can all agree to.

The point being, include others when developing code that is similar to
what other subsystems do. On the code modification front, please Cc the
ftrace, kprobe, static_call and jump_label maintainers, as we like to work
together. The BPF dispatcher modifications should be no different. There's
a lot of experience in this field throughout the kernel. Please utilize it.
If it wasn't for this thread, we would never had found out about this easy
solution.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:53                                         ` Alexei Starovoitov
@ 2022-08-15 16:13                                           ` Steven Rostedt
  0 siblings, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-15 16:13 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Peter Zijlstra, Jiri Olsa, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf, Linus Torvalds, Andrew Morton,
	Christoph Hellwig

On Mon, 15 Aug 2022 08:53:05 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> > My ask now is to be more inclusive when doing anything that deals with
> > modification of text, or other infrastructures. This "go it alone" approach
> > really needs to stop. Linux is an open source project and collaboration is
> > key. I know you don't care about others use cases (as you told me in that
> > BPF meeting last year), but any maintainer in the Linux kernel must care
> > about the use case of others or this will all fail.  
> 
> Please don't misrepresent. Not cool.

Sorry. To quote exactly what you told me when you cut me off in that
meeting, "I don't care about your use cases". I guess you care about others
use cases, just not mine.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:48                                       ` Peter Zijlstra
@ 2022-08-16  6:56                                         ` Jiri Olsa
  2022-08-17  9:29                                           ` Jiri Olsa
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-16  6:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Jiri Olsa, Steven Rostedt,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf

On Mon, Aug 15, 2022 at 05:48:44PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 08:35:53AM -0700, Alexei Starovoitov wrote:
> > On Mon, Aug 15, 2022 at 8:28 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > > It's hiding a fake function from ftrace, since it's not a function
> > > > and ftrace infra shouldn't show it tracing logs.
> > > > In other words it's a _notrace_ function with nop5.
> > >
> > > Then make it a notrace function with a nop5 in it. That isn't hard.
> > 
> > That's exactly what we're trying to do.
> 
> All the while claiming ftrace is broken while it is not.
> 
> > Jiri's patch is one way to achieve that.
> 
> Fairly horrible way.
> 
> > What is your suggestion?
> 
> Mailed it already.
> 
> > Move it from C to asm ?
> 
> Would be much better than proposed IMO.

nice, that would be independent of the compiler atributes
and config checking..  will check on this one ;-)

thanks,
jirka

> 
> > Make it naked function with explicit inline asm?
> 
> Can be made to work but is iffy because the compiler can do horrible
> things with placing the asm().

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-16  6:56                                         ` Jiri Olsa
@ 2022-08-17  9:29                                           ` Jiri Olsa
  2022-08-17 16:57                                             ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-17  9:29 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, Alexei Starovoitov, Steven Rostedt,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf, Björn Töpel

On Tue, Aug 16, 2022 at 08:56:33AM +0200, Jiri Olsa wrote:
> On Mon, Aug 15, 2022 at 05:48:44PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 15, 2022 at 08:35:53AM -0700, Alexei Starovoitov wrote:
> > > On Mon, Aug 15, 2022 at 8:28 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > > >
> > > > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > > > It's hiding a fake function from ftrace, since it's not a function
> > > > > and ftrace infra shouldn't show it tracing logs.
> > > > > In other words it's a _notrace_ function with nop5.
> > > >
> > > > Then make it a notrace function with a nop5 in it. That isn't hard.
> > > 
> > > That's exactly what we're trying to do.
> > 
> > All the while claiming ftrace is broken while it is not.
> > 
> > > Jiri's patch is one way to achieve that.
> > 
> > Fairly horrible way.
> > 
> > > What is your suggestion?
> > 
> > Mailed it already.
> > 
> > > Move it from C to asm ?
> > 
> > Would be much better than proposed IMO.
> 
> nice, that would be independent of the compiler atributes
> and config checking..  will check on this one ;-)

how about something like below?

dispatcher code is generated only for x86_64, so that will be covered
by the assembly version (free of ftrace table) other archs stay same

jirka


----
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 383c87300b0d..94964002eaae 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -7,4 +7,5 @@ ifeq ($(CONFIG_X86_32),y)
         obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
 else
         obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+        obj-$(CONFIG_BPF_JIT) += bpf_dispatcher.o
 endif
diff --git a/arch/x86/net/bpf_dispatcher.S b/arch/x86/net/bpf_dispatcher.S
new file mode 100644
index 000000000000..65790a1286e8
--- /dev/null
+++ b/arch/x86/net/bpf_dispatcher.S
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/nops.h>
+#include <asm/nospec-branch.h>
+
+	.text
+SYM_FUNC_START(bpf_dispatcher_xdp_func)
+	ASM_NOP5
+	JMP_NOSPEC rdx
+SYM_FUNC_END(bpf_dispatcher_xdp_func)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a627a02cf8ab..03b54c820b95 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -924,7 +924,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 }
 
 #define DEFINE_BPF_DISPATCHER(name)					\
-	noinline __nocfi unsigned int bpf_dispatcher_##name##_func(	\
+	noinline __nocfi unsigned int __weak bpf_dispatcher_##name##_func(\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
 		bpf_func_t bpf_func)					\

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-17  9:29                                           ` Jiri Olsa
@ 2022-08-17 16:57                                             ` Alexei Starovoitov
  2022-08-17 19:39                                               ` Jiri Olsa
  0 siblings, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-17 16:57 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, Steven Rostedt, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf,
	Björn Töpel

On Wed, Aug 17, 2022 at 2:29 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Tue, Aug 16, 2022 at 08:56:33AM +0200, Jiri Olsa wrote:
> > On Mon, Aug 15, 2022 at 05:48:44PM +0200, Peter Zijlstra wrote:
> > > On Mon, Aug 15, 2022 at 08:35:53AM -0700, Alexei Starovoitov wrote:
> > > > On Mon, Aug 15, 2022 at 8:28 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > > > >
> > > > > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > > > > It's hiding a fake function from ftrace, since it's not a function
> > > > > > and ftrace infra shouldn't show it tracing logs.
> > > > > > In other words it's a _notrace_ function with nop5.
> > > > >
> > > > > Then make it a notrace function with a nop5 in it. That isn't hard.
> > > >
> > > > That's exactly what we're trying to do.
> > >
> > > All the while claiming ftrace is broken while it is not.
> > >
> > > > Jiri's patch is one way to achieve that.
> > >
> > > Fairly horrible way.
> > >
> > > > What is your suggestion?
> > >
> > > Mailed it already.
> > >
> > > > Move it from C to asm ?
> > >
> > > Would be much better than proposed IMO.
> >
> > nice, that would be independent of the compiler atributes
> > and config checking..  will check on this one ;-)
>
> how about something like below?
>
> dispatcher code is generated only for x86_64, so that will be covered
> by the assembly version (free of ftrace table) other archs stay same
>
> jirka
>
>
> ----
> diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
> index 383c87300b0d..94964002eaae 100644
> --- a/arch/x86/net/Makefile
> +++ b/arch/x86/net/Makefile
> @@ -7,4 +7,5 @@ ifeq ($(CONFIG_X86_32),y)
>          obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
>  else
>          obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
> +        obj-$(CONFIG_BPF_JIT) += bpf_dispatcher.o
>  endif
> diff --git a/arch/x86/net/bpf_dispatcher.S b/arch/x86/net/bpf_dispatcher.S
> new file mode 100644
> index 000000000000..65790a1286e8
> --- /dev/null
> +++ b/arch/x86/net/bpf_dispatcher.S
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#include <linux/linkage.h>
> +#include <asm/nops.h>
> +#include <asm/nospec-branch.h>
> +
> +       .text
> +SYM_FUNC_START(bpf_dispatcher_xdp_func)
> +       ASM_NOP5
> +       JMP_NOSPEC rdx
> +SYM_FUNC_END(bpf_dispatcher_xdp_func)

Wait. Why asm ? Did you try Peter's suggestion:
__attribute__((__no_instrument_function__))
__attribute__((patchable_function_entry(5)))

?

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-17 16:57                                             ` Alexei Starovoitov
@ 2022-08-17 19:39                                               ` Jiri Olsa
  0 siblings, 0 replies; 58+ messages in thread
From: Jiri Olsa @ 2022-08-17 19:39 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Peter Zijlstra, Steven Rostedt, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf,
	Björn Töpel

On Wed, Aug 17, 2022 at 09:57:45AM -0700, Alexei Starovoitov wrote:
> On Wed, Aug 17, 2022 at 2:29 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Tue, Aug 16, 2022 at 08:56:33AM +0200, Jiri Olsa wrote:
> > > On Mon, Aug 15, 2022 at 05:48:44PM +0200, Peter Zijlstra wrote:
> > > > On Mon, Aug 15, 2022 at 08:35:53AM -0700, Alexei Starovoitov wrote:
> > > > > On Mon, Aug 15, 2022 at 8:28 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > > > > >
> > > > > > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > > > > > It's hiding a fake function from ftrace, since it's not a function
> > > > > > > and ftrace infra shouldn't show it tracing logs.
> > > > > > > In other words it's a _notrace_ function with nop5.
> > > > > >
> > > > > > Then make it a notrace function with a nop5 in it. That isn't hard.
> > > > >
> > > > > That's exactly what we're trying to do.
> > > >
> > > > All the while claiming ftrace is broken while it is not.
> > > >
> > > > > Jiri's patch is one way to achieve that.
> > > >
> > > > Fairly horrible way.
> > > >
> > > > > What is your suggestion?
> > > >
> > > > Mailed it already.
> > > >
> > > > > Move it from C to asm ?
> > > >
> > > > Would be much better than proposed IMO.
> > >
> > > nice, that would be independent of the compiler atributes
> > > and config checking..  will check on this one ;-)
> >
> > how about something like below?
> >
> > dispatcher code is generated only for x86_64, so that will be covered
> > by the assembly version (free of ftrace table) other archs stay same
> >
> > jirka
> >
> >
> > ----
> > diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
> > index 383c87300b0d..94964002eaae 100644
> > --- a/arch/x86/net/Makefile
> > +++ b/arch/x86/net/Makefile
> > @@ -7,4 +7,5 @@ ifeq ($(CONFIG_X86_32),y)
> >          obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
> >  else
> >          obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
> > +        obj-$(CONFIG_BPF_JIT) += bpf_dispatcher.o
> >  endif
> > diff --git a/arch/x86/net/bpf_dispatcher.S b/arch/x86/net/bpf_dispatcher.S
> > new file mode 100644
> > index 000000000000..65790a1286e8
> > --- /dev/null
> > +++ b/arch/x86/net/bpf_dispatcher.S
> > @@ -0,0 +1,10 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#include <linux/linkage.h>
> > +#include <asm/nops.h>
> > +#include <asm/nospec-branch.h>
> > +
> > +       .text
> > +SYM_FUNC_START(bpf_dispatcher_xdp_func)
> > +       ASM_NOP5
> > +       JMP_NOSPEC rdx
> > +SYM_FUNC_END(bpf_dispatcher_xdp_func)
> 
> Wait. Why asm ? Did you try Peter's suggestion:
> __attribute__((__no_instrument_function__))
> __attribute__((patchable_function_entry(5)))

ah so this suggestion came in the other thread after the asm one.. ok, will check

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-15 15:41                                     ` Peter Zijlstra
  2022-08-15 15:49                                       ` Alexei Starovoitov
@ 2022-08-18 20:27                                       ` Jiri Olsa
  2022-08-18 20:50                                         ` Steven Rostedt
  1 sibling, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-18 20:27 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Jiri Olsa, Steven Rostedt,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf

On Mon, Aug 15, 2022 at 05:41:27PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 15, 2022 at 05:28:02PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 15, 2022 at 08:17:42AM -0700, Alexei Starovoitov wrote:
> > > It's hiding a fake function from ftrace, since it's not a function
> > > and ftrace infra shouldn't show it tracing logs.
> > > In other words it's a _notrace_ function with nop5.
> > 
> > Then make it a notrace function with a nop5 in it. That isn't hard.
> > 
> > The whole problem is that it isn't a notrace function and you're abusing
> > a __fentry__ site.
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=x86/fineibt&id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e
> 
> foo.c:
> 
> __attribute__((__no_instrument_function__))
> __attribute__((patchable_function_entry(5)))
> void my_func(void)
> {
> }
> 
> void my_foo(void)
> {
> }
> 
> gcc -c foo.c -pg -mfentry -mcmodel=kernel -fno-PIE -O2
> 
> foo.o:     file format elf64-x86-64
> 
> 
> Disassembly of section .text:
> 
> 0000000000000000 <my_func>:
>    0:   f3 0f 1e fa             endbr64 
>    4:   90                      nop
>    5:   90                      nop
>    6:   90                      nop
>    7:   90                      nop
>    8:   90                      nop
>    9:   c3                      ret    
>    a:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
> 
> 0000000000000010 <my_foo>:
>   10:   f3 0f 1e fa             endbr64 
>   14:   e8 00 00 00 00          call   19 <my_foo+0x9>  15: R_X86_64_PLT32      __fentry__-0x4
>   19:   c3                      ret    
> 

ok, so the problem with __attribute__((patchable_function_entry(5))) is that
it puts function address into __patchable_function_entries section, which is
one of ftrace locations source:

  #define MCOUNT_REC()    . = ALIGN(8);     \
    __start_mcount_loc = .;                 \
    KEEP(*(__mcount_loc))                   \
    KEEP(*(__patchable_function_entries))   \
    __stop_mcount_loc = .;                  \
   ...


it looks like __patchable_function_entries is used for other than x86 archs,
so we perhaps we could have x86 specific MCOUNT_REC macro just with
__mcount_loc section?

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-18 20:27                                       ` Jiri Olsa
@ 2022-08-18 20:50                                         ` Steven Rostedt
  2022-08-18 21:00                                           ` Alexei Starovoitov
  2022-08-18 21:14                                           ` Jiri Olsa
  0 siblings, 2 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-18 20:50 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, Alexei Starovoitov, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf

On Thu, 18 Aug 2022 22:27:07 +0200
Jiri Olsa <olsajiri@gmail.com> wrote:

> ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> it puts function address into __patchable_function_entries section, which is
> one of ftrace locations source:
> 
>   #define MCOUNT_REC()    . = ALIGN(8);     \
>     __start_mcount_loc = .;                 \
>     KEEP(*(__mcount_loc))                   \
>     KEEP(*(__patchable_function_entries))   \
>     __stop_mcount_loc = .;                  \
>    ...
> 
> 
> it looks like __patchable_function_entries is used for other than x86 archs,
> so we perhaps we could have x86 specific MCOUNT_REC macro just with
> __mcount_loc section?

So something like this:

#ifdef CONFIG_X86
# define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
# define MCOUNT_PATCHABLE
#else
# define NON_MCOUNT_PATCHABLE
# define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
#endif

  #define MCOUNT_REC()    . = ALIGN(8);     \
    __start_mcount_loc = .;                 \
    KEEP(*(__mcount_loc))                   \
    MCOUNT_PATCHABLE			    \
    __stop_mcount_loc = .;                  \
    NON_MCOUNT_PATCHABLE		    \
   ...

??

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-18 20:50                                         ` Steven Rostedt
@ 2022-08-18 21:00                                           ` Alexei Starovoitov
  2022-08-18 21:05                                             ` Steven Rostedt
  2022-08-18 21:32                                             ` Jiri Olsa
  2022-08-18 21:14                                           ` Jiri Olsa
  1 sibling, 2 replies; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-18 21:00 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Thu, Aug 18, 2022 at 1:50 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Thu, 18 Aug 2022 22:27:07 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
>
> > ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> > it puts function address into __patchable_function_entries section, which is
> > one of ftrace locations source:
> >
> >   #define MCOUNT_REC()    . = ALIGN(8);     \
> >     __start_mcount_loc = .;                 \
> >     KEEP(*(__mcount_loc))                   \
> >     KEEP(*(__patchable_function_entries))   \
> >     __stop_mcount_loc = .;                  \
> >    ...
> >
> >
> > it looks like __patchable_function_entries is used for other than x86 archs,
> > so we perhaps we could have x86 specific MCOUNT_REC macro just with
> > __mcount_loc section?
>
> So something like this:
>
> #ifdef CONFIG_X86
> # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> # define MCOUNT_PATCHABLE
> #else
> # define NON_MCOUNT_PATCHABLE
> # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> #endif
>
>   #define MCOUNT_REC()    . = ALIGN(8);     \
>     __start_mcount_loc = .;                 \
>     KEEP(*(__mcount_loc))                   \
>     MCOUNT_PATCHABLE                        \
>     __stop_mcount_loc = .;                  \
>     NON_MCOUNT_PATCHABLE                    \
>    ...
>
> ??

That's what more or less Peter's patch is doing:
Here it is again for reference:
https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-18 21:00                                           ` Alexei Starovoitov
@ 2022-08-18 21:05                                             ` Steven Rostedt
  2022-08-18 21:32                                             ` Jiri Olsa
  1 sibling, 0 replies; 58+ messages in thread
From: Steven Rostedt @ 2022-08-18 21:05 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Ingo Molnar, bpf, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, LKML, Josh Poimboeuf

On Thu, 18 Aug 2022 14:00:21 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> > #ifdef CONFIG_X86
> > # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> > # define MCOUNT_PATCHABLE
> > #else
> > # define NON_MCOUNT_PATCHABLE
> > # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> > #endif
> >
> >   #define MCOUNT_REC()    . = ALIGN(8);     \
> >     __start_mcount_loc = .;                 \
> >     KEEP(*(__mcount_loc))                   \
> >     MCOUNT_PATCHABLE                        \
> >     __stop_mcount_loc = .;                  \
> >     NON_MCOUNT_PATCHABLE                    \
> >    ...
> >
> > ??  
> 
> That's what more or less Peter's patch is doing:

Heh.

> Here it is again for reference:
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e

Thanks, I missed seeing this.

-- Steve

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-18 20:50                                         ` Steven Rostedt
  2022-08-18 21:00                                           ` Alexei Starovoitov
@ 2022-08-18 21:14                                           ` Jiri Olsa
  1 sibling, 0 replies; 58+ messages in thread
From: Jiri Olsa @ 2022-08-18 21:14 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jiri Olsa, Peter Zijlstra, Alexei Starovoitov,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf

On Thu, Aug 18, 2022 at 04:50:24PM -0400, Steven Rostedt wrote:
> On Thu, 18 Aug 2022 22:27:07 +0200
> Jiri Olsa <olsajiri@gmail.com> wrote:
> 
> > ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> > it puts function address into __patchable_function_entries section, which is
> > one of ftrace locations source:
> > 
> >   #define MCOUNT_REC()    . = ALIGN(8);     \
> >     __start_mcount_loc = .;                 \
> >     KEEP(*(__mcount_loc))                   \
> >     KEEP(*(__patchable_function_entries))   \
> >     __stop_mcount_loc = .;                  \
> >    ...
> > 
> > 
> > it looks like __patchable_function_entries is used for other than x86 archs,
> > so we perhaps we could have x86 specific MCOUNT_REC macro just with
> > __mcount_loc section?
> 
> So something like this:
> 
> #ifdef CONFIG_X86
> # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> # define MCOUNT_PATCHABLE
> #else
> # define NON_MCOUNT_PATCHABLE
> # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> #endif
> 
>   #define MCOUNT_REC()    . = ALIGN(8);     \
>     __start_mcount_loc = .;                 \
>     KEEP(*(__mcount_loc))                   \
>     MCOUNT_PATCHABLE			    \
>     __stop_mcount_loc = .;                  \
>     NON_MCOUNT_PATCHABLE		    \
>    ...
> 

is there a reason to keep NON_MCOUNT_PATCHABLE section for x86?  otherwise LGTM

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-18 21:00                                           ` Alexei Starovoitov
  2022-08-18 21:05                                             ` Steven Rostedt
@ 2022-08-18 21:32                                             ` Jiri Olsa
  2022-08-19 11:45                                               ` Jiri Olsa
  1 sibling, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-18 21:32 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Steven Rostedt, Jiri Olsa, Peter Zijlstra, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf

On Thu, Aug 18, 2022 at 02:00:21PM -0700, Alexei Starovoitov wrote:
> On Thu, Aug 18, 2022 at 1:50 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> > On Thu, 18 Aug 2022 22:27:07 +0200
> > Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > > ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> > > it puts function address into __patchable_function_entries section, which is
> > > one of ftrace locations source:
> > >
> > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > >     __start_mcount_loc = .;                 \
> > >     KEEP(*(__mcount_loc))                   \
> > >     KEEP(*(__patchable_function_entries))   \
> > >     __stop_mcount_loc = .;                  \
> > >    ...
> > >
> > >
> > > it looks like __patchable_function_entries is used for other than x86 archs,
> > > so we perhaps we could have x86 specific MCOUNT_REC macro just with
> > > __mcount_loc section?
> >
> > So something like this:
> >
> > #ifdef CONFIG_X86
> > # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> > # define MCOUNT_PATCHABLE
> > #else
> > # define NON_MCOUNT_PATCHABLE
> > # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> > #endif
> >
> >   #define MCOUNT_REC()    . = ALIGN(8);     \
> >     __start_mcount_loc = .;                 \
> >     KEEP(*(__mcount_loc))                   \
> >     MCOUNT_PATCHABLE                        \
> >     __stop_mcount_loc = .;                  \
> >     NON_MCOUNT_PATCHABLE                    \
> >    ...
> >
> > ??
> 
> That's what more or less Peter's patch is doing:
> Here it is again for reference:
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e

ah nice, and discards the __patchable_function_entries section, great

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-18 21:32                                             ` Jiri Olsa
@ 2022-08-19 11:45                                               ` Jiri Olsa
  2022-08-23 17:23                                                 ` Alexei Starovoitov
  0 siblings, 1 reply; 58+ messages in thread
From: Jiri Olsa @ 2022-08-19 11:45 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Alexei Starovoitov, Steven Rostedt, Peter Zijlstra,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Ingo Molnar, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, LKML,
	Josh Poimboeuf

On Thu, Aug 18, 2022 at 11:32:55PM +0200, Jiri Olsa wrote:
> On Thu, Aug 18, 2022 at 02:00:21PM -0700, Alexei Starovoitov wrote:
> > On Thu, Aug 18, 2022 at 1:50 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> > >
> > > On Thu, 18 Aug 2022 22:27:07 +0200
> > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > >
> > > > ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> > > > it puts function address into __patchable_function_entries section, which is
> > > > one of ftrace locations source:
> > > >
> > > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > > >     __start_mcount_loc = .;                 \
> > > >     KEEP(*(__mcount_loc))                   \
> > > >     KEEP(*(__patchable_function_entries))   \
> > > >     __stop_mcount_loc = .;                  \
> > > >    ...
> > > >
> > > >
> > > > it looks like __patchable_function_entries is used for other than x86 archs,
> > > > so we perhaps we could have x86 specific MCOUNT_REC macro just with
> > > > __mcount_loc section?
> > >
> > > So something like this:
> > >
> > > #ifdef CONFIG_X86
> > > # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> > > # define MCOUNT_PATCHABLE
> > > #else
> > > # define NON_MCOUNT_PATCHABLE
> > > # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> > > #endif
> > >
> > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > >     __start_mcount_loc = .;                 \
> > >     KEEP(*(__mcount_loc))                   \
> > >     MCOUNT_PATCHABLE                        \
> > >     __stop_mcount_loc = .;                  \
> > >     NON_MCOUNT_PATCHABLE                    \
> > >    ...
> > >
> > > ??
> > 
> > That's what more or less Peter's patch is doing:
> > Here it is again for reference:
> > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e
> 
> ah nice, and discards the __patchable_function_entries section, great
> 

tested change below with Peter's change above and it seems to work,
once it get merged I'll send full patch

thanks,
jirka


---
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 39bd36359c1e..39b6807058e9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -924,6 +924,8 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 }
 
 #define DEFINE_BPF_DISPATCHER(name)					\
+	__attribute__((__no_instrument_function__))			\
+	__attribute__((patchable_function_entry(5)))			\
 	noinline __nocfi unsigned int bpf_dispatcher_##name##_func(	\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-19 11:45                                               ` Jiri Olsa
@ 2022-08-23 17:23                                                 ` Alexei Starovoitov
  2022-08-26  8:00                                                   ` Jiri Olsa
  0 siblings, 1 reply; 58+ messages in thread
From: Alexei Starovoitov @ 2022-08-23 17:23 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Steven Rostedt, Peter Zijlstra, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf

On Fri, Aug 19, 2022 at 4:45 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Aug 18, 2022 at 11:32:55PM +0200, Jiri Olsa wrote:
> > On Thu, Aug 18, 2022 at 02:00:21PM -0700, Alexei Starovoitov wrote:
> > > On Thu, Aug 18, 2022 at 1:50 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> > > >
> > > > On Thu, 18 Aug 2022 22:27:07 +0200
> > > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > >
> > > > > ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> > > > > it puts function address into __patchable_function_entries section, which is
> > > > > one of ftrace locations source:
> > > > >
> > > > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > > > >     __start_mcount_loc = .;                 \
> > > > >     KEEP(*(__mcount_loc))                   \
> > > > >     KEEP(*(__patchable_function_entries))   \
> > > > >     __stop_mcount_loc = .;                  \
> > > > >    ...
> > > > >
> > > > >
> > > > > it looks like __patchable_function_entries is used for other than x86 archs,
> > > > > so we perhaps we could have x86 specific MCOUNT_REC macro just with
> > > > > __mcount_loc section?
> > > >
> > > > So something like this:
> > > >
> > > > #ifdef CONFIG_X86
> > > > # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> > > > # define MCOUNT_PATCHABLE
> > > > #else
> > > > # define NON_MCOUNT_PATCHABLE
> > > > # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> > > > #endif
> > > >
> > > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > > >     __start_mcount_loc = .;                 \
> > > >     KEEP(*(__mcount_loc))                   \
> > > >     MCOUNT_PATCHABLE                        \
> > > >     __stop_mcount_loc = .;                  \
> > > >     NON_MCOUNT_PATCHABLE                    \
> > > >    ...
> > > >
> > > > ??
> > >
> > > That's what more or less Peter's patch is doing:
> > > Here it is again for reference:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e
> >
> > ah nice, and discards the __patchable_function_entries section, great
> >
>
> tested change below with Peter's change above and it seems to work,
> once it get merged I'll send full patch

Peter,
what is the ETA to land your changes?
That particular commit is detached in your git tree.
Did you move it to a different branch?

Just trying to figure out the logistics to land Jiri's fix below.
We can take it into bpf-next, since it's harmless as-is,
but it won't have an effect until your change lands.
Sounds like they both will get in during the next merge window?

> thanks,
> jirka
>
>
> ---
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 39bd36359c1e..39b6807058e9 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -924,6 +924,8 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
>  }
>
>  #define DEFINE_BPF_DISPATCHER(name)                                    \
> +       __attribute__((__no_instrument_function__))                     \
> +       __attribute__((patchable_function_entry(5)))                    \
>         noinline __nocfi unsigned int bpf_dispatcher_##name##_func(     \
>                 const void *ctx,                                        \
>                 const struct bpf_insn *insnsi,                          \

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [RFC] ftrace: Add support to keep some functions out of ftrace
  2022-08-23 17:23                                                 ` Alexei Starovoitov
@ 2022-08-26  8:00                                                   ` Jiri Olsa
  0 siblings, 0 replies; 58+ messages in thread
From: Jiri Olsa @ 2022-08-26  8:00 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiri Olsa, Steven Rostedt, Peter Zijlstra, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Ingo Molnar, bpf,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, LKML, Josh Poimboeuf

On Tue, Aug 23, 2022 at 10:23:24AM -0700, Alexei Starovoitov wrote:
> On Fri, Aug 19, 2022 at 4:45 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Thu, Aug 18, 2022 at 11:32:55PM +0200, Jiri Olsa wrote:
> > > On Thu, Aug 18, 2022 at 02:00:21PM -0700, Alexei Starovoitov wrote:
> > > > On Thu, Aug 18, 2022 at 1:50 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> > > > >
> > > > > On Thu, 18 Aug 2022 22:27:07 +0200
> > > > > Jiri Olsa <olsajiri@gmail.com> wrote:
> > > > >
> > > > > > ok, so the problem with __attribute__((patchable_function_entry(5))) is that
> > > > > > it puts function address into __patchable_function_entries section, which is
> > > > > > one of ftrace locations source:
> > > > > >
> > > > > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > > > > >     __start_mcount_loc = .;                 \
> > > > > >     KEEP(*(__mcount_loc))                   \
> > > > > >     KEEP(*(__patchable_function_entries))   \
> > > > > >     __stop_mcount_loc = .;                  \
> > > > > >    ...
> > > > > >
> > > > > >
> > > > > > it looks like __patchable_function_entries is used for other than x86 archs,
> > > > > > so we perhaps we could have x86 specific MCOUNT_REC macro just with
> > > > > > __mcount_loc section?
> > > > >
> > > > > So something like this:
> > > > >
> > > > > #ifdef CONFIG_X86
> > > > > # define NON_MCOUNT_PATCHABLE KEEP(*(__patchable_function_entries))
> > > > > # define MCOUNT_PATCHABLE
> > > > > #else
> > > > > # define NON_MCOUNT_PATCHABLE
> > > > > # define MCOUNT_PATCHABLE  KEEP(*(__patchable_function_entries))
> > > > > #endif
> > > > >
> > > > >   #define MCOUNT_REC()    . = ALIGN(8);     \
> > > > >     __start_mcount_loc = .;                 \
> > > > >     KEEP(*(__mcount_loc))                   \
> > > > >     MCOUNT_PATCHABLE                        \
> > > > >     __stop_mcount_loc = .;                  \
> > > > >     NON_MCOUNT_PATCHABLE                    \
> > > > >    ...
> > > > >
> > > > > ??
> > > >
> > > > That's what more or less Peter's patch is doing:
> > > > Here it is again for reference:
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?id=8d075bdf11193f1d276bf19fa56b4b8dfe24df9e
> > >
> > > ah nice, and discards the __patchable_function_entries section, great
> > >
> >
> > tested change below with Peter's change above and it seems to work,
> > once it get merged I'll send full patch
> 
> Peter,
> what is the ETA to land your changes?
> That particular commit is detached in your git tree.
> Did you move it to a different branch?
> 
> Just trying to figure out the logistics to land Jiri's fix below.
> We can take it into bpf-next, since it's harmless as-is,
> but it won't have an effect until your change lands.
> Sounds like they both will get in during the next merge window?

I discussed with Peter and I'll send his change together with my fix

jirka

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2022-08-26  8:01 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-22 11:08 [RFC] ftrace: Add support to keep some functions out of ftrace Jiri Olsa
2022-07-22 11:26 ` Steven Rostedt
2022-07-22 16:04   ` Alexei Starovoitov
2022-07-22 16:08     ` Steven Rostedt
2022-07-22 16:25       ` Steven Rostedt
2022-07-22 16:53         ` Alexei Starovoitov
2022-07-22 17:14           ` Steven Rostedt
2022-07-22 21:05         ` Jiri Olsa
2022-07-22 21:41           ` Steven Rostedt
2022-07-23  3:53             ` Steven Rostedt
2022-07-23  3:56               ` Steven Rostedt
2022-07-25  7:00                 ` Jiri Olsa
2022-07-23 21:39             ` Jiri Olsa
2022-08-12 21:18               ` Jiri Olsa
2022-08-12 21:50                 ` Steven Rostedt
2022-08-13 19:02                 ` Steven Rostedt
2022-08-14 11:32                   ` Steven Rostedt
2022-08-14 15:22                   ` Jiri Olsa
2022-08-15  2:07                     ` Chen Zhongjin
2022-08-15  8:04                       ` Jiri Olsa
2022-08-15 11:01                         ` Björn Töpel
2022-08-15 11:29                           ` Jiri Olsa
2022-08-15 12:19                             ` Björn Töpel
2022-08-15 12:30                               ` Björn Töpel
2022-08-15  8:03                   ` Peter Zijlstra
2022-08-15  9:44                     ` Jiri Olsa
2022-08-15 12:37                       ` Peter Zijlstra
2022-08-15 14:25                         ` Alexei Starovoitov
2022-08-15 14:33                           ` Peter Zijlstra
2022-08-15 14:45                             ` Alexei Starovoitov
2022-08-15 15:02                               ` Peter Zijlstra
2022-08-15 15:17                                 ` Alexei Starovoitov
2022-08-15 15:28                                   ` Peter Zijlstra
2022-08-15 15:35                                     ` Alexei Starovoitov
2022-08-15 15:44                                       ` Steven Rostedt
2022-08-15 15:53                                         ` Alexei Starovoitov
2022-08-15 16:13                                           ` Steven Rostedt
2022-08-15 15:48                                       ` Peter Zijlstra
2022-08-16  6:56                                         ` Jiri Olsa
2022-08-17  9:29                                           ` Jiri Olsa
2022-08-17 16:57                                             ` Alexei Starovoitov
2022-08-17 19:39                                               ` Jiri Olsa
2022-08-15 15:41                                     ` Peter Zijlstra
2022-08-15 15:49                                       ` Alexei Starovoitov
2022-08-15 16:08                                         ` Steven Rostedt
2022-08-18 20:27                                       ` Jiri Olsa
2022-08-18 20:50                                         ` Steven Rostedt
2022-08-18 21:00                                           ` Alexei Starovoitov
2022-08-18 21:05                                             ` Steven Rostedt
2022-08-18 21:32                                             ` Jiri Olsa
2022-08-19 11:45                                               ` Jiri Olsa
2022-08-23 17:23                                                 ` Alexei Starovoitov
2022-08-26  8:00                                                   ` Jiri Olsa
2022-08-18 21:14                                           ` Jiri Olsa
2022-08-15 15:32                                   ` Steven Rostedt
2022-07-22 16:26       ` Jiri Olsa
2022-07-22 16:37         ` Steven Rostedt
2022-07-22 16:54           ` Alexei Starovoitov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).