From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: davem@davemloft.net
Cc: daniel@iogearbox.net, bpf@vger.kernel.org, kernel-team@fb.com
Subject: [PATCH v3 bpf-next 2/8] bpf: Compute program stats for sleepable programs
Date: Tue, 9 Feb 2021 11:48:50 -0800 [thread overview]
Message-ID: <20210209194856.24269-3-alexei.starovoitov@gmail.com> (raw)
In-Reply-To: <20210209194856.24269-1-alexei.starovoitov@gmail.com>
From: Alexei Starovoitov <ast@kernel.org>
In older non-RT kernels migrate_disable() was the same as preempt_disable().
Since commit 74d862b682f5 ("sched: Make migrate_disable/enable() independent of RT")
migrate_disable() is real and doesn't prevent sleeping.
Use it to efficiently compute execution stats for sleepable bpf programs.
migrate_disable() will also be used to enable per-cpu maps in sleepable programs
in the future patches.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
arch/x86/net/bpf_jit_comp.c | 31 ++++++++++----------------
include/linux/bpf.h | 4 ++--
kernel/bpf/trampoline.c | 44 +++++++++++++++++++++++++------------
3 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a3dc3bd154ac..d11b9bcebbea 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1742,15 +1742,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
u8 *prog = *pprog;
int cnt = 0;
- if (p->aux->sleepable) {
- if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
+ if (emit_call(&prog,
+ p->aux->sleepable ? __bpf_prog_enter_sleepable :
+ __bpf_prog_enter, prog))
return -EINVAL;
- } else {
- if (emit_call(&prog, __bpf_prog_enter, prog))
- return -EINVAL;
- /* remember prog start time returned by __bpf_prog_enter */
- emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
- }
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1770,18 +1767,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (mod_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- if (p->aux->sleepable) {
- if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog,
+ p->aux->sleepable ? __bpf_prog_exit_sleepable :
+ __bpf_prog_exit, prog))
return -EINVAL;
- } else {
- /* arg1: mov rdi, progs[i] */
- emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
- (u32) (long) p);
- /* arg2: mov rsi, rbx <- start time in nsec */
- emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
- if (emit_call(&prog, __bpf_prog_exit, prog))
- return -EINVAL;
- }
*pprog = prog;
return 0;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 026fa8873c5d..2fa48439ef31 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -563,8 +563,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-void notrace __bpf_prog_enter_sleepable(void);
-void notrace __bpf_prog_exit_sleepable(void);
+u64 notrace __bpf_prog_enter_sleepable(void);
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
struct bpf_ksym {
unsigned long start;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5be3beeedd74..48eb021e1421 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -381,55 +381,71 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
mutex_unlock(&trampoline_mutex);
}
+#define NO_START_TIME 0
+static u64 notrace bpf_prog_start_time(void)
+{
+ u64 start = NO_START_TIME;
+
+ if (static_branch_unlikely(&bpf_stats_enabled_key))
+ start = sched_clock();
+ return start;
+}
+
/* The logic is similar to BPF_PROG_RUN, but with an explicit
* rcu_read_lock() and migrate_disable() which are required
* for the trampoline. The macro is split into
- * call _bpf_prog_enter
+ * call __bpf_prog_enter
* call prog->bpf_func
* call __bpf_prog_exit
*/
u64 notrace __bpf_prog_enter(void)
__acquires(RCU)
{
- u64 start = 0;
-
rcu_read_lock();
migrate_disable();
- if (static_branch_unlikely(&bpf_stats_enabled_key))
- start = sched_clock();
- return start;
+ return bpf_prog_start_time();
}
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
- __releases(RCU)
+static void notrace update_prog_stats(struct bpf_prog *prog,
+ u64 start)
{
struct bpf_prog_stats *stats;
if (static_branch_unlikely(&bpf_stats_enabled_key) &&
- /* static_key could be enabled in __bpf_prog_enter
- * and disabled in __bpf_prog_exit.
+ /* static_key could be enabled in __bpf_prog_enter*
+ * and disabled in __bpf_prog_exit*.
* And vice versa.
- * Hence check that 'start' is not zero.
+ * Hence check that 'start' is valid.
*/
- start) {
+ start > NO_START_TIME) {
stats = this_cpu_ptr(prog->stats);
u64_stats_update_begin(&stats->syncp);
stats->cnt++;
stats->nsecs += sched_clock() - start;
u64_stats_update_end(&stats->syncp);
}
+}
+
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+ __releases(RCU)
+{
+ update_prog_stats(prog, start);
migrate_enable();
rcu_read_unlock();
}
-void notrace __bpf_prog_enter_sleepable(void)
+u64 notrace __bpf_prog_enter_sleepable(void)
{
rcu_read_lock_trace();
+ migrate_disable();
might_fault();
+ return bpf_prog_start_time();
}
-void notrace __bpf_prog_exit_sleepable(void)
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
{
+ update_prog_stats(prog, start);
+ migrate_enable();
rcu_read_unlock_trace();
}
--
2.24.1
next prev parent reply other threads:[~2021-02-09 21:01 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-09 19:48 [PATCH v3 bpf-next 0/8] bpf: Misc improvements Alexei Starovoitov
2021-02-09 19:48 ` [PATCH v3 bpf-next 1/8] bpf: Optimize program stats Alexei Starovoitov
2021-02-09 19:48 ` Alexei Starovoitov [this message]
2021-02-09 22:47 ` [PATCH v3 bpf-next 2/8] bpf: Compute program stats for sleepable programs KP Singh
2021-02-09 23:11 ` Alexei Starovoitov
2021-02-09 23:17 ` KP Singh
2021-02-09 19:48 ` [PATCH v3 bpf-next 3/8] bpf: Add per-program recursion prevention mechanism Alexei Starovoitov
2021-02-09 19:48 ` [PATCH v3 bpf-next 4/8] selftest/bpf: Add a recursion test Alexei Starovoitov
2021-02-09 19:48 ` [PATCH v3 bpf-next 5/8] bpf: Count the number of times recursion was prevented Alexei Starovoitov
2021-02-09 19:48 ` [PATCH v3 bpf-next 6/8] selftests/bpf: Improve recursion selftest Alexei Starovoitov
2021-02-09 19:48 ` [PATCH v3 bpf-next 7/8] bpf: Allows per-cpu maps and map-in-map in sleepable programs Alexei Starovoitov
2021-02-09 21:12 ` KP Singh
2021-02-09 22:31 ` Alexei Starovoitov
2021-02-09 22:43 ` KP Singh
2021-02-09 23:13 ` Alexei Starovoitov
2021-02-09 23:22 ` KP Singh
2021-02-09 19:48 ` [PATCH v3 bpf-next 8/8] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs Alexei Starovoitov
2021-02-09 21:14 ` KP Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210209194856.24269-3-alexei.starovoitov@gmail.com \
--to=alexei.starovoitov@gmail.com \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=kernel-team@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).