bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next 0/7] bpf: Misc improvements
@ 2021-02-06 17:03 Alexei Starovoitov
  2021-02-06 17:03 ` [PATCH v2 bpf-next 1/7] bpf: Optimize program stats Alexei Starovoitov
                   ` (6 more replies)
  0 siblings, 7 replies; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Several improvements:
- optimize prog stats
- compute stats for sleepable progs
- prevent recursion fentry/fexit and sleepable progs
- allow map-in-map and per-cpu maps in sleepable progs

Alexei Starovoitov (7):
  bpf: Optimize program stats
  bpf: Compute program stats for sleepable programs
  bpf: Add per-program recursion prevention mechanism
  selftest/bpf: Add a recursion test
  bpf: Count the number of times recursion was prevented
  bpf: Allows per-cpu maps and map-in-map in sleepable programs
  selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable
    progs

 arch/x86/net/bpf_jit_comp.c                   | 46 ++++++++-----
 include/linux/bpf.h                           | 16 ++---
 include/linux/filter.h                        | 12 +++-
 include/uapi/linux/bpf.h                      |  1 +
 kernel/bpf/core.c                             | 16 +++--
 kernel/bpf/hashtab.c                          |  4 +-
 kernel/bpf/syscall.c                          | 16 +++--
 kernel/bpf/trampoline.c                       | 59 +++++++++++++---
 kernel/bpf/verifier.c                         |  9 ++-
 tools/bpf/bpftool/prog.c                      |  5 ++
 tools/include/uapi/linux/bpf.h                |  1 +
 .../selftests/bpf/prog_tests/fexit_stress.c   |  2 +-
 .../selftests/bpf/prog_tests/recursion.c      | 33 +++++++++
 .../bpf/prog_tests/trampoline_count.c         |  4 +-
 tools/testing/selftests/bpf/progs/lsm.c       | 69 +++++++++++++++++++
 tools/testing/selftests/bpf/progs/recursion.c | 46 +++++++++++++
 16 files changed, 282 insertions(+), 57 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/recursion.c
 create mode 100644 tools/testing/selftests/bpf/progs/recursion.c

-- 
2.24.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 1/7] bpf: Optimize program stats
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 18:57   ` Andrii Nakryiko
  2021-02-08 21:28   ` Daniel Borkmann
  2021-02-06 17:03 ` [PATCH v2 bpf-next 2/7] bpf: Compute program stats for sleepable programs Alexei Starovoitov
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Move bpf_prog_stats from prog->aux into prog to avoid one extra load
in critical path of program execution.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h     |  8 --------
 include/linux/filter.h  | 10 +++++++++-
 kernel/bpf/core.c       |  8 ++++----
 kernel/bpf/syscall.c    |  2 +-
 kernel/bpf/trampoline.c |  2 +-
 kernel/bpf/verifier.c   |  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 321966fc35db..026fa8873c5d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,7 +14,6 @@
 #include <linux/numa.h>
 #include <linux/mm_types.h>
 #include <linux/wait.h>
-#include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
  */
 #define MAX_BPF_FUNC_ARGS 12
 
-struct bpf_prog_stats {
-	u64 cnt;
-	u64 nsecs;
-	struct u64_stats_sync syncp;
-} __aligned(2 * sizeof(u64));
-
 struct btf_func_model {
 	u8 ret_size;
 	u8 nr_args;
@@ -845,7 +838,6 @@ struct bpf_prog_aux {
 	u32 linfo_idx;
 	u32 num_exentries;
 	struct exception_table_entry *extable;
-	struct bpf_prog_stats __percpu *stats;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5b3137d7b690..c6592590a0b7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sockptr.h>
 #include <crypto/sha1.h>
+#include <linux/u64_stats_sync.h>
 
 #include <net/sch_generic.h>
 
@@ -539,6 +540,12 @@ struct bpf_binary_header {
 	u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
 };
 
+struct bpf_prog_stats {
+	u64 cnt;
+	u64 nsecs;
+	struct u64_stats_sync syncp;
+} __aligned(2 * sizeof(u64));
+
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
@@ -559,6 +566,7 @@ struct bpf_prog {
 	u8			tag[BPF_TAG_SIZE];
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
+	struct bpf_prog_stats __percpu *stats;
 	unsigned int		(*bpf_func)(const void *ctx,
 					    const struct bpf_insn *insn);
 	/* Instructions for interpreter */
@@ -581,7 +589,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 		struct bpf_prog_stats *__stats;				\
 		u64 __start = sched_clock();				\
 		__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);	\
-		__stats = this_cpu_ptr(prog->aux->stats);		\
+		__stats = this_cpu_ptr(prog->stats);			\
 		u64_stats_update_begin(&__stats->syncp);		\
 		__stats->cnt++;						\
 		__stats->nsecs += sched_clock() - __start;		\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5bbd4884ff7a..fa3da4cda476 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -114,8 +114,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (!prog)
 		return NULL;
 
-	prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
-	if (!prog->aux->stats) {
+	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+	if (!prog->stats) {
 		kfree(prog->aux);
 		vfree(prog);
 		return NULL;
@@ -124,7 +124,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	for_each_possible_cpu(cpu) {
 		struct bpf_prog_stats *pstats;
 
-		pstats = per_cpu_ptr(prog->aux->stats, cpu);
+		pstats = per_cpu_ptr(prog->stats, cpu);
 		u64_stats_init(&pstats->syncp);
 	}
 	return prog;
@@ -249,7 +249,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 	if (fp->aux) {
 		mutex_destroy(&fp->aux->used_maps_mutex);
 		mutex_destroy(&fp->aux->dst_mutex);
-		free_percpu(fp->aux->stats);
+		free_percpu(fp->stats);
 		kfree(fp->aux->poke_tab);
 		kfree(fp->aux);
 	}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e5999d86c76e..f7df56a704de 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1739,7 +1739,7 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
 		unsigned int start;
 		u64 tnsecs, tcnt;
 
-		st = per_cpu_ptr(prog->aux->stats, cpu);
+		st = per_cpu_ptr(prog->stats, cpu);
 		do {
 			start = u64_stats_fetch_begin_irq(&st->syncp);
 			tnsecs = st->nsecs;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 35c5887d82ff..5be3beeedd74 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -412,7 +412,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 	     * Hence check that 'start' is not zero.
 	     */
 	    start) {
-		stats = this_cpu_ptr(prog->aux->stats);
+		stats = this_cpu_ptr(prog->stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->cnt++;
 		stats->nsecs += sched_clock() - start;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 15694246f854..4189edb41b73 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10889,7 +10889,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		/* BPF_PROG_RUN doesn't call subprogs directly,
 		 * hence main prog stats include the runtime of subprogs.
 		 * subprogs don't have IDs and not reachable via prog_get_next_id
-		 * func[i]->aux->stats will never be accessed and stays NULL
+		 * func[i]->stats will never be accessed and stays NULL
 		 */
 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
 		if (!func[i])
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 2/7] bpf: Compute program stats for sleepable programs
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
  2021-02-06 17:03 ` [PATCH v2 bpf-next 1/7] bpf: Optimize program stats Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 20:35   ` Andrii Nakryiko
  2021-02-06 17:03 ` [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism Alexei Starovoitov
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

In older non-RT kernels migrate_disable() was the same as preempt_disable().
Since commit 74d862b682f5 ("sched: Make migrate_disable/enable() independent of RT")
migrate_disable() is real and doesn't prevent sleeping.
Use it to efficiently compute execution stats for sleepable bpf programs.
migrate_disable() will also be used to enable per-cpu maps in sleepable programs
in the future patches.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 31 ++++++++++++-------------------
 include/linux/bpf.h         |  4 ++--
 kernel/bpf/trampoline.c     | 27 +++++++++++++++++++++------
 3 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a3dc3bd154ac..d11b9bcebbea 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1742,15 +1742,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	u8 *prog = *pprog;
 	int cnt = 0;
 
-	if (p->aux->sleepable) {
-		if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
+	if (emit_call(&prog,
+		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
+		      __bpf_prog_enter, prog))
 			return -EINVAL;
-	} else {
-		if (emit_call(&prog, __bpf_prog_enter, prog))
-			return -EINVAL;
-		/* remember prog start time returned by __bpf_prog_enter */
-		emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
-	}
+	/* remember prog start time returned by __bpf_prog_enter */
+	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
 
 	/* arg1: lea rdi, [rbp - stack_size] */
 	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1770,18 +1767,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	if (mod_ret)
 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
-	if (p->aux->sleepable) {
-		if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+	/* arg1: mov rdi, progs[i] */
+	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+	/* arg2: mov rsi, rbx <- start time in nsec */
+	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+	if (emit_call(&prog,
+		      p->aux->sleepable ? __bpf_prog_exit_sleepable :
+		      __bpf_prog_exit, prog))
 			return -EINVAL;
-	} else {
-		/* arg1: mov rdi, progs[i] */
-		emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
-			       (u32) (long) p);
-		/* arg2: mov rsi, rbx <- start time in nsec */
-		emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
-		if (emit_call(&prog, __bpf_prog_exit, prog))
-			return -EINVAL;
-	}
 
 	*pprog = prog;
 	return 0;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 026fa8873c5d..2fa48439ef31 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -563,8 +563,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 /* these two functions are called from generated trampoline */
 u64 notrace __bpf_prog_enter(void);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-void notrace __bpf_prog_enter_sleepable(void);
-void notrace __bpf_prog_exit_sleepable(void);
+u64 notrace __bpf_prog_enter_sleepable(void);
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
 
 struct bpf_ksym {
 	unsigned long		 start;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5be3beeedd74..b1f567514b7e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -388,10 +388,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
  * call prog->bpf_func
  * call __bpf_prog_exit
  */
+#define NO_START_TIME 0
 u64 notrace __bpf_prog_enter(void)
 	__acquires(RCU)
 {
-	u64 start = 0;
+	u64 start = NO_START_TIME;
 
 	rcu_read_lock();
 	migrate_disable();
@@ -400,8 +401,8 @@ u64 notrace __bpf_prog_enter(void)
 	return start;
 }
 
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
-	__releases(RCU)
+static void notrace update_prog_stats(struct bpf_prog *prog,
+				      u64 start)
 {
 	struct bpf_prog_stats *stats;
 
@@ -411,25 +412,39 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 	     * And vice versa.
 	     * Hence check that 'start' is not zero.
 	     */
-	    start) {
+	    start > NO_START_TIME) {
 		stats = this_cpu_ptr(prog->stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->cnt++;
 		stats->nsecs += sched_clock() - start;
 		u64_stats_update_end(&stats->syncp);
 	}
+}
+
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+	__releases(RCU)
+{
+	update_prog_stats(prog, start);
 	migrate_enable();
 	rcu_read_unlock();
 }
 
-void notrace __bpf_prog_enter_sleepable(void)
+u64 notrace __bpf_prog_enter_sleepable(void)
 {
+	u64 start = NO_START_TIME;
+
 	rcu_read_lock_trace();
+	migrate_disable();
 	might_fault();
+	if (static_branch_unlikely(&bpf_stats_enabled_key))
+		start = sched_clock();
+	return start;
 }
 
-void notrace __bpf_prog_exit_sleepable(void)
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
 {
+	update_prog_stats(prog, start);
+	migrate_enable();
 	rcu_read_unlock_trace();
 }
 
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
  2021-02-06 17:03 ` [PATCH v2 bpf-next 1/7] bpf: Optimize program stats Alexei Starovoitov
  2021-02-06 17:03 ` [PATCH v2 bpf-next 2/7] bpf: Compute program stats for sleepable programs Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 20:51   ` Andrii Nakryiko
  2021-02-06 17:03 ` [PATCH v2 bpf-next 4/7] selftest/bpf: Add a recursion test Alexei Starovoitov
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Since both sleepable and non-sleepable programs execute under migrate_disable
add recursion prevention mechanism to both types of programs when they're
executed via bpf trampoline.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c                   | 15 +++++++++++++
 include/linux/bpf.h                           |  6 ++---
 include/linux/filter.h                        |  1 +
 kernel/bpf/core.c                             |  8 +++++++
 kernel/bpf/trampoline.c                       | 22 ++++++++++++++-----
 .../selftests/bpf/prog_tests/fexit_stress.c   |  2 +-
 .../bpf/prog_tests/trampoline_count.c         |  4 ++--
 7 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d11b9bcebbea..79e7a0ec1da5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1740,8 +1740,11 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 			   struct bpf_prog *p, int stack_size, bool mod_ret)
 {
 	u8 *prog = *pprog;
+	u8 *jmp_insn;
 	int cnt = 0;
 
+	/* arg1: mov rdi, progs[i] */
+	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 	if (emit_call(&prog,
 		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
 		      __bpf_prog_enter, prog))
@@ -1749,6 +1752,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	/* remember prog start time returned by __bpf_prog_enter */
 	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
 
+	/* if (__bpf_prog_enter*(prog) == 0)
+	 *	goto skip_exec_of_prog;
+	 */
+	EMIT3(0x48, 0x85, 0xC0);  /* test rax,rax */
+	/* emit 2 nops that will be replaced with JE insn */
+	jmp_insn = prog;
+	emit_nops(&prog, 2);
+
 	/* arg1: lea rdi, [rbp - stack_size] */
 	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
 	/* arg2: progs[i]->insnsi for interpreter */
@@ -1767,6 +1778,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	if (mod_ret)
 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
+	/* replace 2 nops with JE insn, since jmp target is known */
+	jmp_insn[0] = X86_JE;
+	jmp_insn[1] = prog - jmp_insn - 2;
+
 	/* arg1: mov rdi, progs[i] */
 	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 	/* arg2: mov rsi, rbx <- start time in nsec */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2fa48439ef31..6f019b06a2fd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -529,7 +529,7 @@ struct btf_func_model {
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
  * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
  */
-#define BPF_MAX_TRAMP_PROGS 40
+#define BPF_MAX_TRAMP_PROGS 38
 
 struct bpf_tramp_progs {
 	struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
@@ -561,9 +561,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 				struct bpf_tramp_progs *tprogs,
 				void *orig_call);
 /* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(void);
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-u64 notrace __bpf_prog_enter_sleepable(void);
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
 void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
 
 struct bpf_ksym {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c6592590a0b7..d6c740eac056 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -567,6 +567,7 @@ struct bpf_prog {
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	struct bpf_prog_stats __percpu *stats;
+	int __percpu		*active;
 	unsigned int		(*bpf_func)(const void *ctx,
 					    const struct bpf_insn *insn);
 	/* Instructions for interpreter */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index fa3da4cda476..f4560dbe7f31 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -91,6 +91,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 		vfree(fp);
 		return NULL;
 	}
+	fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
+	if (!fp->active) {
+		vfree(fp);
+		kfree(aux);
+		return NULL;
+	}
 
 	fp->pages = size / PAGE_SIZE;
 	fp->aux = aux;
@@ -116,6 +122,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 
 	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 	if (!prog->stats) {
+		free_percpu(prog->active);
 		kfree(prog->aux);
 		vfree(prog);
 		return NULL;
@@ -250,6 +257,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 		mutex_destroy(&fp->aux->used_maps_mutex);
 		mutex_destroy(&fp->aux->dst_mutex);
 		free_percpu(fp->stats);
+		free_percpu(fp->active);
 		kfree(fp->aux->poke_tab);
 		kfree(fp->aux);
 	}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index b1f567514b7e..226f613ab289 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -388,16 +388,21 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
  * call prog->bpf_func
  * call __bpf_prog_exit
  */
-#define NO_START_TIME 0
-u64 notrace __bpf_prog_enter(void)
+#define NO_START_TIME 1
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
 	__acquires(RCU)
 {
 	u64 start = NO_START_TIME;
 
 	rcu_read_lock();
 	migrate_disable();
-	if (static_branch_unlikely(&bpf_stats_enabled_key))
+	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
+		return 0;
+	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 		start = sched_clock();
+		if (unlikely(!start))
+			start = NO_START_TIME;
+	}
 	return start;
 }
 
@@ -425,25 +430,32 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 	__releases(RCU)
 {
 	update_prog_stats(prog, start);
+	__this_cpu_dec(*(prog->active));
 	migrate_enable();
 	rcu_read_unlock();
 }
 
-u64 notrace __bpf_prog_enter_sleepable(void)
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
 {
 	u64 start = NO_START_TIME;
 
 	rcu_read_lock_trace();
 	migrate_disable();
 	might_fault();
-	if (static_branch_unlikely(&bpf_stats_enabled_key))
+	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
+		return 0;
+	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 		start = sched_clock();
+		if (unlikely(!start))
+			start = NO_START_TIME;
+	}
 	return start;
 }
 
 void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
 {
 	update_prog_stats(prog, start);
+	__this_cpu_dec(*(prog->active));
 	migrate_enable();
 	rcu_read_unlock_trace();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 3b9dbf7433f0..4698b0d2de36 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -3,7 +3,7 @@
 #include <test_progs.h>
 
 /* x86-64 fits 55 JITed and 43 interpreted progs into half page */
-#define CNT 40
+#define CNT 38
 
 void test_fexit_stress(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 781c8d11604b..f3022d934e2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -4,7 +4,7 @@
 #include <sys/prctl.h>
 #include <test_progs.h>
 
-#define MAX_TRAMP_PROGS 40
+#define MAX_TRAMP_PROGS 38
 
 struct inst {
 	struct bpf_object *obj;
@@ -52,7 +52,7 @@ void test_trampoline_count(void)
 	struct bpf_link *link;
 	char comm[16] = {};
 
-	/* attach 'allowed' 40 trampoline programs */
+	/* attach 'allowed' trampoline programs */
 	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
 		obj = bpf_object__open_file(object, NULL);
 		if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 4/7] selftest/bpf: Add a recursion test
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
                   ` (2 preceding siblings ...)
  2021-02-06 17:03 ` [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 20:54   ` Andrii Nakryiko
  2021-02-06 17:03 ` [PATCH v2 bpf-next 5/7] bpf: Count the number of times recursion was prevented Alexei Starovoitov
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Add recursive non-sleepable fentry program as a test.
All attach points where sleepable progs can execute are non recursive so far.
The recursion protection mechanism for sleepable cannot be activated yet.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/recursion.c      | 33 +++++++++++++
 tools/testing/selftests/bpf/progs/recursion.c | 46 +++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/recursion.c
 create mode 100644 tools/testing/selftests/bpf/progs/recursion.c

diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c
new file mode 100644
index 000000000000..16e8eab5a29d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/recursion.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "recursion.skel.h"
+
+void test_recursion(void)
+{
+	struct recursion *skel;
+	int key = 0;
+	int err;
+
+	skel = recursion__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	err = recursion__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0");
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0);
+	ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1");
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0);
+	ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2");
+	
+	ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0");
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0);
+	ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1");
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0);
+	ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2");
+out:
+	recursion__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c
new file mode 100644
index 000000000000..49f679375b9d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recursion.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, long);
+} hash1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, long);
+} hash2 SEC(".maps");
+
+int pass1 = 0;
+int pass2 = 0;
+
+SEC("fentry/__htab_map_lookup_elem")
+int BPF_PROG(on_lookup, struct bpf_map *map)
+{
+	int key = 0;
+
+	if (map == (void *)&hash1) {
+		pass1++;
+		return 0;
+	}
+	if (map == (void *)&hash2) {
+		pass2++;
+		/* htab_map_gen_lookup() will inline below call
+		 * into direct call to __htab_map_lookup_elem()
+		 */
+		bpf_map_lookup_elem(&hash2, &key);
+		return 0;
+	}
+
+	return 0;
+}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 5/7] bpf: Count the number of times recursion was prevented
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
                   ` (3 preceding siblings ...)
  2021-02-06 17:03 ` [PATCH v2 bpf-next 4/7] selftest/bpf: Add a recursion test Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 20:58   ` Andrii Nakryiko
  2021-02-06 17:03 ` [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs Alexei Starovoitov
  2021-02-06 17:03 ` [PATCH v2 bpf-next 7/7] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs Alexei Starovoitov
  6 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Add per-program counter for number of times recursion prevention mechanism
was triggered and expose it via show_fdinfo and bpf_prog_info.
Teach bpftool to print it.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h         |  1 +
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/syscall.c           | 14 ++++++++++----
 kernel/bpf/trampoline.c        | 18 ++++++++++++++++--
 tools/bpf/bpftool/prog.c       |  5 +++++
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d6c740eac056..4c9850b35744 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,6 +543,7 @@ struct bpf_binary_header {
 struct bpf_prog_stats {
 	u64 cnt;
 	u64 nsecs;
+	u64 misses;
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c001766adcbc..c547ad1ffe43 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4501,6 +4501,7 @@ struct bpf_prog_info {
 	__aligned_u64 prog_tags;
 	__u64 run_time_ns;
 	__u64 run_cnt;
+	__u64 recursion_misses;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f7df56a704de..c859bc46d06c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1731,25 +1731,28 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 static void bpf_prog_get_stats(const struct bpf_prog *prog,
 			       struct bpf_prog_stats *stats)
 {
-	u64 nsecs = 0, cnt = 0;
+	u64 nsecs = 0, cnt = 0, misses = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
 		const struct bpf_prog_stats *st;
 		unsigned int start;
-		u64 tnsecs, tcnt;
+		u64 tnsecs, tcnt, tmisses;
 
 		st = per_cpu_ptr(prog->stats, cpu);
 		do {
 			start = u64_stats_fetch_begin_irq(&st->syncp);
 			tnsecs = st->nsecs;
 			tcnt = st->cnt;
+			tmisses = st->misses;
 		} while (u64_stats_fetch_retry_irq(&st->syncp, start));
 		nsecs += tnsecs;
 		cnt += tcnt;
+		misses += tmisses;
 	}
 	stats->nsecs = nsecs;
 	stats->cnt = cnt;
+	stats->misses = misses;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -1768,14 +1771,16 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "memlock:\t%llu\n"
 		   "prog_id:\t%u\n"
 		   "run_time_ns:\t%llu\n"
-		   "run_cnt:\t%llu\n",
+		   "run_cnt:\t%llu\n"
+		   "recursion_misses:\t%llu\n",
 		   prog->type,
 		   prog->jited,
 		   prog_tag,
 		   prog->pages * 1ULL << PAGE_SHIFT,
 		   prog->aux->id,
 		   stats.nsecs,
-		   stats.cnt);
+		   stats.cnt,
+		   stats.misses);
 }
 #endif
 
@@ -3438,6 +3443,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 	bpf_prog_get_stats(prog, &stats);
 	info.run_time_ns = stats.nsecs;
 	info.run_cnt = stats.cnt;
+	info.recursion_misses = stats.misses;
 
 	if (!bpf_capable()) {
 		info.jited_prog_len = 0;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 226f613ab289..83b77883bd77 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -381,6 +381,16 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 	mutex_unlock(&trampoline_mutex);
 }
 
+static void notrace inc_misses_counter(struct bpf_prog *prog)
+{
+	struct bpf_prog_stats *stats;
+
+	stats = this_cpu_ptr(prog->stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->misses++;
+	u64_stats_update_end(&stats->syncp);
+}
+
 /* The logic is similar to BPF_PROG_RUN, but with an explicit
  * rcu_read_lock() and migrate_disable() which are required
  * for the trampoline. The macro is split into
@@ -396,8 +406,10 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
 
 	rcu_read_lock();
 	migrate_disable();
-	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
+	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
+		inc_misses_counter(prog);
 		return 0;
+	}
 	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 		start = sched_clock();
 		if (unlikely(!start))
@@ -442,8 +454,10 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
 	rcu_read_lock_trace();
 	migrate_disable();
 	might_fault();
-	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
+	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
+		inc_misses_counter(prog);
 		return 0;
+	}
 	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 		start = sched_clock();
 		if (unlikely(!start))
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 1fe3ba255bad..2e1cd12589c5 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -368,6 +368,8 @@ static void print_prog_header_json(struct bpf_prog_info *info)
 		jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns);
 		jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt);
 	}
+	if (info->recursion_misses)
+		jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses);
 }
 
 static void print_prog_json(struct bpf_prog_info *info, int fd)
@@ -446,6 +448,9 @@ static void print_prog_header_plain(struct bpf_prog_info *info)
 	if (info->run_time_ns)
 		printf(" run_time_ns %lld run_cnt %lld",
 		       info->run_time_ns, info->run_cnt);
+	if (info->recursion_misses)
+		printf(" recursion_misses %lld",
+		       info->recursion_misses);
 	printf("\n");
 }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c001766adcbc..c547ad1ffe43 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4501,6 +4501,7 @@ struct bpf_prog_info {
 	__aligned_u64 prog_tags;
 	__u64 run_time_ns;
 	__u64 run_cnt;
+	__u64 recursion_misses;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
                   ` (4 preceding siblings ...)
  2021-02-06 17:03 ` [PATCH v2 bpf-next 5/7] bpf: Count the number of times recursion was prevented Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 21:00   ` Andrii Nakryiko
  2021-02-06 17:03 ` [PATCH v2 bpf-next 7/7] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs Alexei Starovoitov
  6 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Since sleepable programs are now executing under migrate_disable
the per-cpu maps are safe to use.
The map-in-map were ok to use in sleepable from the time sleepable
progs were introduced.

Note that non-preallocated maps are still not safe, since there is
no rcu_read_lock yet in sleepable programs and dynamically allocated
map elements are relying on rcu protection. The sleepable programs
have rcu_read_lock_trace instead. That limitation will be addresses
in the future.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/hashtab.c  | 4 ++--
 kernel/bpf/verifier.c | 7 ++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index c1ac7f964bc9..d63912e73ad9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1148,7 +1148,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
@@ -1202,7 +1202,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4189edb41b73..9561f2af7710 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10020,9 +10020,14 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		case BPF_MAP_TYPE_HASH:
 		case BPF_MAP_TYPE_LRU_HASH:
 		case BPF_MAP_TYPE_ARRAY:
+		case BPF_MAP_TYPE_PERCPU_HASH:
+		case BPF_MAP_TYPE_PERCPU_ARRAY:
+		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+		case BPF_MAP_TYPE_HASH_OF_MAPS:
 			if (!is_preallocated_map(map)) {
 				verbose(env,
-					"Sleepable programs can only use preallocated hash maps\n");
+					"Sleepable programs can only use preallocated maps\n");
 				return -EINVAL;
 			}
 			break;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 bpf-next 7/7] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs
  2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
                   ` (5 preceding siblings ...)
  2021-02-06 17:03 ` [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs Alexei Starovoitov
@ 2021-02-06 17:03 ` Alexei Starovoitov
  2021-02-08 21:04   ` Andrii Nakryiko
  6 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-06 17:03 UTC (permalink / raw)
  To: davem; +Cc: daniel, bpf, kernel-team

From: Alexei Starovoitov <ast@kernel.org>

Add a basic test for map-in-map and per-cpu maps in sleepable programs.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/lsm.c | 69 +++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index ff4d343b94b5..33694ef8acfa 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -30,6 +30,53 @@ struct {
 	__type(value, __u64);
 } lru_hash SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} lru_percpu_hash SEC(".maps");
+
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, __u64);
+} inner_map SEC(".maps");
+
+struct outer_arr {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+	.values = { [0] = &inner_map },
+};
+
+struct outer_hash {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__array(values, struct inner_map);
+} outer_hash SEC(".maps") = {
+	.values = { [0] = &inner_map },
+};
+
 char _license[] SEC("license") = "GPL";
 
 int monitored_pid = 0;
@@ -61,6 +108,7 @@ SEC("lsm.s/bprm_committed_creds")
 int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct inner_map *inner_map;
 	char args[64];
 	__u32 key = 0;
 	__u64 *value;
@@ -80,6 +128,27 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
 	value = bpf_map_lookup_elem(&lru_hash, &key);
 	if (value)
 		*value = 0;
+	value = bpf_map_lookup_elem(&percpu_array, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&percpu_hash, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&lru_percpu_hash, &key);
+	if (value)
+		*value = 0;
+	inner_map = bpf_map_lookup_elem(&outer_arr, &key);
+	if (inner_map) {
+		value = bpf_map_lookup_elem(inner_map, &key);
+		if (value)
+			*value = 0;
+	}
+	inner_map = bpf_map_lookup_elem(&outer_hash, &key);
+	if (inner_map) {
+		value = bpf_map_lookup_elem(inner_map, &key);
+		if (value)
+			*value = 0;
+	}
 
 	return 0;
 }
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 1/7] bpf: Optimize program stats
  2021-02-06 17:03 ` [PATCH v2 bpf-next 1/7] bpf: Optimize program stats Alexei Starovoitov
@ 2021-02-08 18:57   ` Andrii Nakryiko
  2021-02-08 21:28   ` Daniel Borkmann
  1 sibling, 0 replies; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 18:57 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:05 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Move bpf_prog_stats from prog->aux into prog to avoid one extra load
> in critical path of program execution.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  include/linux/bpf.h     |  8 --------
>  include/linux/filter.h  | 10 +++++++++-
>  kernel/bpf/core.c       |  8 ++++----
>  kernel/bpf/syscall.c    |  2 +-
>  kernel/bpf/trampoline.c |  2 +-
>  kernel/bpf/verifier.c   |  2 +-
>  6 files changed, 16 insertions(+), 16 deletions(-)
>

[...]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 2/7] bpf: Compute program stats for sleepable programs
  2021-02-06 17:03 ` [PATCH v2 bpf-next 2/7] bpf: Compute program stats for sleepable programs Alexei Starovoitov
@ 2021-02-08 20:35   ` Andrii Nakryiko
  0 siblings, 0 replies; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 20:35 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:05 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> In older non-RT kernels migrate_disable() was the same as preempt_disable().
> Since commit 74d862b682f5 ("sched: Make migrate_disable/enable() independent of RT")
> migrate_disable() is real and doesn't prevent sleeping.
> Use it to efficiently compute execution stats for sleepable bpf programs.
> migrate_disable() will also be used to enable per-cpu maps in sleepable programs
> in the future patches.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

LGTM (see comment about outdated comment).

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  arch/x86/net/bpf_jit_comp.c | 31 ++++++++++++-------------------
>  include/linux/bpf.h         |  4 ++--
>  kernel/bpf/trampoline.c     | 27 +++++++++++++++++++++------
>  3 files changed, 35 insertions(+), 27 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index a3dc3bd154ac..d11b9bcebbea 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1742,15 +1742,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>         u8 *prog = *pprog;
>         int cnt = 0;
>
> -       if (p->aux->sleepable) {
> -               if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
> +       if (emit_call(&prog,
> +                     p->aux->sleepable ? __bpf_prog_enter_sleepable :
> +                     __bpf_prog_enter, prog))
>                         return -EINVAL;
> -       } else {
> -               if (emit_call(&prog, __bpf_prog_enter, prog))
> -                       return -EINVAL;
> -               /* remember prog start time returned by __bpf_prog_enter */
> -               emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
> -       }
> +       /* remember prog start time returned by __bpf_prog_enter */
> +       emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
>
>         /* arg1: lea rdi, [rbp - stack_size] */
>         EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> @@ -1770,18 +1767,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>         if (mod_ret)
>                 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
>
> -       if (p->aux->sleepable) {
> -               if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
> +       /* arg1: mov rdi, progs[i] */
> +       emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
> +       /* arg2: mov rsi, rbx <- start time in nsec */
> +       emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> +       if (emit_call(&prog,
> +                     p->aux->sleepable ? __bpf_prog_exit_sleepable :
> +                     __bpf_prog_exit, prog))
>                         return -EINVAL;
> -       } else {
> -               /* arg1: mov rdi, progs[i] */
> -               emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
> -                              (u32) (long) p);
> -               /* arg2: mov rsi, rbx <- start time in nsec */
> -               emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> -               if (emit_call(&prog, __bpf_prog_exit, prog))
> -                       return -EINVAL;
> -       }
>
>         *pprog = prog;
>         return 0;
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 026fa8873c5d..2fa48439ef31 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -563,8 +563,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
>  /* these two functions are called from generated trampoline */
>  u64 notrace __bpf_prog_enter(void);
>  void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
> -void notrace __bpf_prog_enter_sleepable(void);
> -void notrace __bpf_prog_exit_sleepable(void);
> +u64 notrace __bpf_prog_enter_sleepable(void);
> +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
>
>  struct bpf_ksym {
>         unsigned long            start;
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 5be3beeedd74..b1f567514b7e 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -388,10 +388,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
>   * call prog->bpf_func
>   * call __bpf_prog_exit
>   */
> +#define NO_START_TIME 0
>  u64 notrace __bpf_prog_enter(void)
>         __acquires(RCU)
>  {
> -       u64 start = 0;
> +       u64 start = NO_START_TIME;
>
>         rcu_read_lock();
>         migrate_disable();
> @@ -400,8 +401,8 @@ u64 notrace __bpf_prog_enter(void)
>         return start;
>  }
>
> -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
> -       __releases(RCU)
> +static void notrace update_prog_stats(struct bpf_prog *prog,
> +                                     u64 start)
>  {
>         struct bpf_prog_stats *stats;
>
> @@ -411,25 +412,39 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
>              * And vice versa.
>              * Hence check that 'start' is not zero.

This comment still references __bpf_prog_enter and __bpf_prog_exit
(only). See for yourself if it needs to be updated.

>              */
> -           start) {
> +           start > NO_START_TIME) {
>                 stats = this_cpu_ptr(prog->stats);
>                 u64_stats_update_begin(&stats->syncp);
>                 stats->cnt++;
>                 stats->nsecs += sched_clock() - start;
>                 u64_stats_update_end(&stats->syncp);
>         }
> +}
> +
> +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
> +       __releases(RCU)
> +{
> +       update_prog_stats(prog, start);
>         migrate_enable();
>         rcu_read_unlock();
>  }
>
> -void notrace __bpf_prog_enter_sleepable(void)
> +u64 notrace __bpf_prog_enter_sleepable(void)
>  {
> +       u64 start = NO_START_TIME;
> +
>         rcu_read_lock_trace();
> +       migrate_disable();
>         might_fault();
> +       if (static_branch_unlikely(&bpf_stats_enabled_key))
> +               start = sched_clock();
> +       return start;
>  }
>
> -void notrace __bpf_prog_exit_sleepable(void)
> +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
>  {
> +       update_prog_stats(prog, start);
> +       migrate_enable();
>         rcu_read_unlock_trace();
>  }
>
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism
  2021-02-06 17:03 ` [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism Alexei Starovoitov
@ 2021-02-08 20:51   ` Andrii Nakryiko
  2021-02-09 19:06     ` Alexei Starovoitov
  0 siblings, 1 reply; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 20:51 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:05 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Since both sleepable and non-sleepable programs execute under migrate_disable
> add recursion prevention mechanism to both types of programs when they're
> executed via bpf trampoline.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  arch/x86/net/bpf_jit_comp.c                   | 15 +++++++++++++
>  include/linux/bpf.h                           |  6 ++---
>  include/linux/filter.h                        |  1 +
>  kernel/bpf/core.c                             |  8 +++++++
>  kernel/bpf/trampoline.c                       | 22 ++++++++++++++-----
>  .../selftests/bpf/prog_tests/fexit_stress.c   |  2 +-
>  .../bpf/prog_tests/trampoline_count.c         |  4 ++--
>  7 files changed, 47 insertions(+), 11 deletions(-)
>

[...]

> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index b1f567514b7e..226f613ab289 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -388,16 +388,21 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
>   * call prog->bpf_func
>   * call __bpf_prog_exit
>   */
> -#define NO_START_TIME 0
> -u64 notrace __bpf_prog_enter(void)
> +#define NO_START_TIME 1
> +u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
>         __acquires(RCU)
>  {
>         u64 start = NO_START_TIME;
>
>         rcu_read_lock();
>         migrate_disable();
> -       if (static_branch_unlikely(&bpf_stats_enabled_key))
> +       if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
> +               return 0;
> +       if (static_branch_unlikely(&bpf_stats_enabled_key)) {
>                 start = sched_clock();
> +               if (unlikely(!start))
> +                       start = NO_START_TIME;
> +       }
>         return start;
>  }
>
> @@ -425,25 +430,32 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
>         __releases(RCU)
>  {
>         update_prog_stats(prog, start);
> +       __this_cpu_dec(*(prog->active));
>         migrate_enable();
>         rcu_read_unlock();
>  }
>
> -u64 notrace __bpf_prog_enter_sleepable(void)
> +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
>  {
>         u64 start = NO_START_TIME;
>
>         rcu_read_lock_trace();
>         migrate_disable();
>         might_fault();
> -       if (static_branch_unlikely(&bpf_stats_enabled_key))
> +       if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
> +               return 0;
> +       if (static_branch_unlikely(&bpf_stats_enabled_key)) {
>                 start = sched_clock();
> +               if (unlikely(!start))
> +                       start = NO_START_TIME;
> +       }
>         return start;


maybe extract this piece into a function, so that enter functions
would look like:

...
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
         return 0;
return bpf_prog_start_time();

no need for u64 start initialization, more linear code, and no
duplication of logic?

Oh, and actually, given you have `start > NO_START_TIME` condition in
exit function, you don't need this `if (unlikely(!start))` bit at all,
because you are going to ignore both 0 and 1. So maybe no need for a
new function, but no need for extra if as well.

>  }
>
>  void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
>  {
>         update_prog_stats(prog, start);
> +       __this_cpu_dec(*(prog->active));
>         migrate_enable();
>         rcu_read_unlock_trace();
>  }
> diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
> index 3b9dbf7433f0..4698b0d2de36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
> +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
> @@ -3,7 +3,7 @@
>  #include <test_progs.h>
>
>  /* x86-64 fits 55 JITed and 43 interpreted progs into half page */

Probably the comment is a bit outdated now forcing you to decrease CNT?

> -#define CNT 40
> +#define CNT 38
>
>  void test_fexit_stress(void)
>  {
> diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
> index 781c8d11604b..f3022d934e2d 100644
> --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
> +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
> @@ -4,7 +4,7 @@
>  #include <sys/prctl.h>
>  #include <test_progs.h>
>
> -#define MAX_TRAMP_PROGS 40
> +#define MAX_TRAMP_PROGS 38
>
>  struct inst {
>         struct bpf_object *obj;
> @@ -52,7 +52,7 @@ void test_trampoline_count(void)
>         struct bpf_link *link;
>         char comm[16] = {};
>
> -       /* attach 'allowed' 40 trampoline programs */
> +       /* attach 'allowed' trampoline programs */
>         for (i = 0; i < MAX_TRAMP_PROGS; i++) {
>                 obj = bpf_object__open_file(object, NULL);
>                 if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 4/7] selftest/bpf: Add a recursion test
  2021-02-06 17:03 ` [PATCH v2 bpf-next 4/7] selftest/bpf: Add a recursion test Alexei Starovoitov
@ 2021-02-08 20:54   ` Andrii Nakryiko
  0 siblings, 0 replies; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 20:54 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:06 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Add recursive non-sleepable fentry program as a test.
> All attach points where sleepable progs can execute are non recursive so far.
> The recursion protection mechanism for sleepable cannot be activated yet.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

LGTM.

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  .../selftests/bpf/prog_tests/recursion.c      | 33 +++++++++++++
>  tools/testing/selftests/bpf/progs/recursion.c | 46 +++++++++++++++++++
>  2 files changed, 79 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/recursion.c
>  create mode 100644 tools/testing/selftests/bpf/progs/recursion.c
>

[...]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 5/7] bpf: Count the number of times recursion was prevented
  2021-02-06 17:03 ` [PATCH v2 bpf-next 5/7] bpf: Count the number of times recursion was prevented Alexei Starovoitov
@ 2021-02-08 20:58   ` Andrii Nakryiko
  0 siblings, 0 replies; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 20:58 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:06 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Add per-program counter for number of times recursion prevention mechanism
> was triggered and expose it via show_fdinfo and bpf_prog_info.
> Teach bpftool to print it.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  include/linux/filter.h         |  1 +
>  include/uapi/linux/bpf.h       |  1 +
>  kernel/bpf/syscall.c           | 14 ++++++++++----
>  kernel/bpf/trampoline.c        | 18 ++++++++++++++++--
>  tools/bpf/bpftool/prog.c       |  5 +++++
>  tools/include/uapi/linux/bpf.h |  1 +
>  6 files changed, 34 insertions(+), 6 deletions(-)
>

[...]

>  static void print_prog_json(struct bpf_prog_info *info, int fd)
> @@ -446,6 +448,9 @@ static void print_prog_header_plain(struct bpf_prog_info *info)
>         if (info->run_time_ns)
>                 printf(" run_time_ns %lld run_cnt %lld",
>                        info->run_time_ns, info->run_cnt);
> +       if (info->recursion_misses)
> +               printf(" recursion_misses %lld",
> +                      info->recursion_misses);

no need for wrapping the line

>         printf("\n");
>  }
>
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index c001766adcbc..c547ad1ffe43 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4501,6 +4501,7 @@ struct bpf_prog_info {
>         __aligned_u64 prog_tags;
>         __u64 run_time_ns;
>         __u64 run_cnt;
> +       __u64 recursion_misses;
>  } __attribute__((aligned(8)));
>
>  struct bpf_map_info {
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs
  2021-02-06 17:03 ` [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs Alexei Starovoitov
@ 2021-02-08 21:00   ` Andrii Nakryiko
  2021-02-08 21:01     ` Andrii Nakryiko
  0 siblings, 1 reply; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 21:00 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:06 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Since sleepable programs are now executing under migrate_disable
> the per-cpu maps are safe to use.
> The map-in-map were ok to use in sleepable from the time sleepable
> progs were introduced.
>
> Note that non-preallocated maps are still not safe, since there is
> no rcu_read_lock yet in sleepable programs and dynamically allocated
> map elements are relying on rcu protection. The sleepable programs
> have rcu_read_lock_trace instead. That limitation will be addresses
> in the future.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

Great.

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  kernel/bpf/hashtab.c  | 4 ++--
>  kernel/bpf/verifier.c | 7 ++++++-
>  2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index c1ac7f964bc9..d63912e73ad9 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -1148,7 +1148,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
>                 /* unknown flags */
>                 return -EINVAL;
>
> -       WARN_ON_ONCE(!rcu_read_lock_held());
> +       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
>
>         key_size = map->key_size;
>
> @@ -1202,7 +1202,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
>                 /* unknown flags */
>                 return -EINVAL;
>
> -       WARN_ON_ONCE(!rcu_read_lock_held());
> +       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
>
>         key_size = map->key_size;
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 4189edb41b73..9561f2af7710 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -10020,9 +10020,14 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
>                 case BPF_MAP_TYPE_HASH:
>                 case BPF_MAP_TYPE_LRU_HASH:
>                 case BPF_MAP_TYPE_ARRAY:
> +               case BPF_MAP_TYPE_PERCPU_HASH:
> +               case BPF_MAP_TYPE_PERCPU_ARRAY:
> +               case BPF_MAP_TYPE_LRU_PERCPU_HASH:
> +               case BPF_MAP_TYPE_ARRAY_OF_MAPS:
> +               case BPF_MAP_TYPE_HASH_OF_MAPS:
>                         if (!is_preallocated_map(map)) {
>                                 verbose(env,
> -                                       "Sleepable programs can only use preallocated hash maps\n");
> +                                       "Sleepable programs can only use preallocated maps\n");
>                                 return -EINVAL;
>                         }
>                         break;
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs
  2021-02-08 21:00   ` Andrii Nakryiko
@ 2021-02-08 21:01     ` Andrii Nakryiko
  2021-02-08 23:20       ` Alexei Starovoitov
  0 siblings, 1 reply; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 21:01 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Mon, Feb 8, 2021 at 1:00 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sat, Feb 6, 2021 at 9:06 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > From: Alexei Starovoitov <ast@kernel.org>
> >
> > Since sleepable programs are now executing under migrate_disable
> > the per-cpu maps are safe to use.

Also made me wonder if PERF_EVENT_ARRAY map is usable in sleepable now?

> > The map-in-map were ok to use in sleepable from the time sleepable
> > progs were introduced.
> >
> > Note that non-preallocated maps are still not safe, since there is
> > no rcu_read_lock yet in sleepable programs and dynamically allocated
> > map elements are relying on rcu protection. The sleepable programs
> > have rcu_read_lock_trace instead. That limitation will be addresses
> > in the future.
> >
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> > ---
>
> Great.
>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
>
> >  kernel/bpf/hashtab.c  | 4 ++--
> >  kernel/bpf/verifier.c | 7 ++++++-
> >  2 files changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > index c1ac7f964bc9..d63912e73ad9 100644
> > --- a/kernel/bpf/hashtab.c
> > +++ b/kernel/bpf/hashtab.c
> > @@ -1148,7 +1148,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
> >                 /* unknown flags */
> >                 return -EINVAL;
> >
> > -       WARN_ON_ONCE(!rcu_read_lock_held());
> > +       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
> >
> >         key_size = map->key_size;
> >
> > @@ -1202,7 +1202,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
> >                 /* unknown flags */
> >                 return -EINVAL;
> >
> > -       WARN_ON_ONCE(!rcu_read_lock_held());
> > +       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
> >
> >         key_size = map->key_size;
> >
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 4189edb41b73..9561f2af7710 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -10020,9 +10020,14 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
> >                 case BPF_MAP_TYPE_HASH:
> >                 case BPF_MAP_TYPE_LRU_HASH:
> >                 case BPF_MAP_TYPE_ARRAY:
> > +               case BPF_MAP_TYPE_PERCPU_HASH:
> > +               case BPF_MAP_TYPE_PERCPU_ARRAY:
> > +               case BPF_MAP_TYPE_LRU_PERCPU_HASH:
> > +               case BPF_MAP_TYPE_ARRAY_OF_MAPS:
> > +               case BPF_MAP_TYPE_HASH_OF_MAPS:
> >                         if (!is_preallocated_map(map)) {
> >                                 verbose(env,
> > -                                       "Sleepable programs can only use preallocated hash maps\n");
> > +                                       "Sleepable programs can only use preallocated maps\n");
> >                                 return -EINVAL;
> >                         }
> >                         break;
> > --
> > 2.24.1
> >

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 7/7] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs
  2021-02-06 17:03 ` [PATCH v2 bpf-next 7/7] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs Alexei Starovoitov
@ 2021-02-08 21:04   ` Andrii Nakryiko
  0 siblings, 0 replies; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-08 21:04 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Sat, Feb 6, 2021 at 9:06 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Add a basic test for map-in-map and per-cpu maps in sleepable programs.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

LGTM.

Acked-by: Andrii Nakryiko <andrii@kernel.org>


>  tools/testing/selftests/bpf/progs/lsm.c | 69 +++++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
>

[...]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 1/7] bpf: Optimize program stats
  2021-02-06 17:03 ` [PATCH v2 bpf-next 1/7] bpf: Optimize program stats Alexei Starovoitov
  2021-02-08 18:57   ` Andrii Nakryiko
@ 2021-02-08 21:28   ` Daniel Borkmann
  2021-02-08 23:13     ` Alexei Starovoitov
  1 sibling, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2021-02-08 21:28 UTC (permalink / raw)
  To: Alexei Starovoitov, davem; +Cc: bpf, kernel-team

On 2/6/21 6:03 PM, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Move bpf_prog_stats from prog->aux into prog to avoid one extra load
> in critical path of program execution.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>   include/linux/bpf.h     |  8 --------
>   include/linux/filter.h  | 10 +++++++++-
>   kernel/bpf/core.c       |  8 ++++----
>   kernel/bpf/syscall.c    |  2 +-
>   kernel/bpf/trampoline.c |  2 +-
>   kernel/bpf/verifier.c   |  2 +-
>   6 files changed, 16 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 321966fc35db..026fa8873c5d 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -14,7 +14,6 @@
>   #include <linux/numa.h>
>   #include <linux/mm_types.h>
>   #include <linux/wait.h>
> -#include <linux/u64_stats_sync.h>
>   #include <linux/refcount.h>
>   #include <linux/mutex.h>
>   #include <linux/module.h>
> @@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
>    */
>   #define MAX_BPF_FUNC_ARGS 12
>   
> -struct bpf_prog_stats {
> -	u64 cnt;
> -	u64 nsecs;
> -	struct u64_stats_sync syncp;
> -} __aligned(2 * sizeof(u64));
> -
>   struct btf_func_model {
>   	u8 ret_size;
>   	u8 nr_args;
> @@ -845,7 +838,6 @@ struct bpf_prog_aux {
>   	u32 linfo_idx;
>   	u32 num_exentries;
>   	struct exception_table_entry *extable;
> -	struct bpf_prog_stats __percpu *stats;
>   	union {
>   		struct work_struct work;
>   		struct rcu_head	rcu;
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 5b3137d7b690..c6592590a0b7 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -22,6 +22,7 @@
>   #include <linux/vmalloc.h>
>   #include <linux/sockptr.h>
>   #include <crypto/sha1.h>
> +#include <linux/u64_stats_sync.h>
>   
>   #include <net/sch_generic.h>
>   
> @@ -539,6 +540,12 @@ struct bpf_binary_header {
>   	u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
>   };
>   
> +struct bpf_prog_stats {
> +	u64 cnt;
> +	u64 nsecs;
> +	struct u64_stats_sync syncp;
> +} __aligned(2 * sizeof(u64));
> +
>   struct bpf_prog {
>   	u16			pages;		/* Number of allocated pages */
>   	u16			jited:1,	/* Is our filter JIT'ed? */
> @@ -559,6 +566,7 @@ struct bpf_prog {
>   	u8			tag[BPF_TAG_SIZE];
>   	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
>   	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
> +	struct bpf_prog_stats __percpu *stats;
>   	unsigned int		(*bpf_func)(const void *ctx,
>   					    const struct bpf_insn *insn);

nit: could we move aux & orig_prog while at it behind bpf_func just to avoid it slipping
into next cacheline by accident when someone extends this again? (Or maybe build_bug_on
to enforce it being in first cacheline could also be an option.)

>   	/* Instructions for interpreter */
> @@ -581,7 +589,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
>   		struct bpf_prog_stats *__stats;				\
>   		u64 __start = sched_clock();				\
>   		__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);	\
> -		__stats = this_cpu_ptr(prog->aux->stats);		\
> +		__stats = this_cpu_ptr(prog->stats);			\
>   		u64_stats_update_begin(&__stats->syncp);		\
>   		__stats->cnt++;						\
>   		__stats->nsecs += sched_clock() - __start;		\
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 5bbd4884ff7a..fa3da4cda476 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -114,8 +114,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
>   	if (!prog)
>   		return NULL;
>   
> -	prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
> -	if (!prog->aux->stats) {
> +	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
> +	if (!prog->stats) {
>   		kfree(prog->aux);
>   		vfree(prog);
>   		return NULL;
> @@ -124,7 +124,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
>   	for_each_possible_cpu(cpu) {
>   		struct bpf_prog_stats *pstats;
>   
> -		pstats = per_cpu_ptr(prog->aux->stats, cpu);
> +		pstats = per_cpu_ptr(prog->stats, cpu);
>   		u64_stats_init(&pstats->syncp);
>   	}
>   	return prog;
> @@ -249,7 +249,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
>   	if (fp->aux) {
>   		mutex_destroy(&fp->aux->used_maps_mutex);
>   		mutex_destroy(&fp->aux->dst_mutex);
> -		free_percpu(fp->aux->stats);
> +		free_percpu(fp->stats);

This doesn't look correct, stats is now /not/ tied to fp->aux anymore which this if block
is taking care of freeing. It needs to be moved outside so we don't leak fp->stats.

>   		kfree(fp->aux->poke_tab);
>   		kfree(fp->aux);
>   	}
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index e5999d86c76e..f7df56a704de 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1739,7 +1739,7 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
>   		unsigned int start;
>   		u64 tnsecs, tcnt;
>   
> -		st = per_cpu_ptr(prog->aux->stats, cpu);
> +		st = per_cpu_ptr(prog->stats, cpu);
>   		do {
>   			start = u64_stats_fetch_begin_irq(&st->syncp);
>   			tnsecs = st->nsecs;
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 35c5887d82ff..5be3beeedd74 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -412,7 +412,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
>   	     * Hence check that 'start' is not zero.
>   	     */
>   	    start) {
> -		stats = this_cpu_ptr(prog->aux->stats);
> +		stats = this_cpu_ptr(prog->stats);
>   		u64_stats_update_begin(&stats->syncp);
>   		stats->cnt++;
>   		stats->nsecs += sched_clock() - start;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 15694246f854..4189edb41b73 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -10889,7 +10889,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>   		/* BPF_PROG_RUN doesn't call subprogs directly,
>   		 * hence main prog stats include the runtime of subprogs.
>   		 * subprogs don't have IDs and not reachable via prog_get_next_id
> -		 * func[i]->aux->stats will never be accessed and stays NULL
> +		 * func[i]->stats will never be accessed and stays NULL
>   		 */
>   		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
>   		if (!func[i])
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 1/7] bpf: Optimize program stats
  2021-02-08 21:28   ` Daniel Borkmann
@ 2021-02-08 23:13     ` Alexei Starovoitov
  2021-02-09  0:53       ` Daniel Borkmann
  0 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-08 23:13 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov, davem; +Cc: bpf, kernel-team

On 2/8/21 1:28 PM, Daniel Borkmann wrote:
> On 2/6/21 6:03 PM, Alexei Starovoitov wrote:
>> From: Alexei Starovoitov <ast@kernel.org>
>>
>> Move bpf_prog_stats from prog->aux into prog to avoid one extra load
>> in critical path of program execution.
>>
>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>> ---
>>   include/linux/bpf.h     |  8 --------
>>   include/linux/filter.h  | 10 +++++++++-
>>   kernel/bpf/core.c       |  8 ++++----
>>   kernel/bpf/syscall.c    |  2 +-
>>   kernel/bpf/trampoline.c |  2 +-
>>   kernel/bpf/verifier.c   |  2 +-
>>   6 files changed, 16 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 321966fc35db..026fa8873c5d 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -14,7 +14,6 @@
>>   #include <linux/numa.h>
>>   #include <linux/mm_types.h>
>>   #include <linux/wait.h>
>> -#include <linux/u64_stats_sync.h>
>>   #include <linux/refcount.h>
>>   #include <linux/mutex.h>
>>   #include <linux/module.h>
>> @@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
>>    */
>>   #define MAX_BPF_FUNC_ARGS 12
>> -struct bpf_prog_stats {
>> -    u64 cnt;
>> -    u64 nsecs;
>> -    struct u64_stats_sync syncp;
>> -} __aligned(2 * sizeof(u64));
>> -
>>   struct btf_func_model {
>>       u8 ret_size;
>>       u8 nr_args;
>> @@ -845,7 +838,6 @@ struct bpf_prog_aux {
>>       u32 linfo_idx;
>>       u32 num_exentries;
>>       struct exception_table_entry *extable;
>> -    struct bpf_prog_stats __percpu *stats;
>>       union {
>>           struct work_struct work;
>>           struct rcu_head    rcu;
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 5b3137d7b690..c6592590a0b7 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -22,6 +22,7 @@
>>   #include <linux/vmalloc.h>
>>   #include <linux/sockptr.h>
>>   #include <crypto/sha1.h>
>> +#include <linux/u64_stats_sync.h>
>>   #include <net/sch_generic.h>
>> @@ -539,6 +540,12 @@ struct bpf_binary_header {
>>       u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
>>   };
>> +struct bpf_prog_stats {
>> +    u64 cnt;
>> +    u64 nsecs;
>> +    struct u64_stats_sync syncp;
>> +} __aligned(2 * sizeof(u64));
>> +
>>   struct bpf_prog {
>>       u16            pages;        /* Number of allocated pages */
>>       u16            jited:1,    /* Is our filter JIT'ed? */
>> @@ -559,6 +566,7 @@ struct bpf_prog {
>>       u8            tag[BPF_TAG_SIZE];
>>       struct bpf_prog_aux    *aux;        /* Auxiliary fields */
>>       struct sock_fprog_kern    *orig_prog;    /* Original BPF program */
>> +    struct bpf_prog_stats __percpu *stats;
>>       unsigned int        (*bpf_func)(const void *ctx,
>>                           const struct bpf_insn *insn);
> 
> nit: could we move aux & orig_prog while at it behind bpf_func just to 
> avoid it slipping
> into next cacheline by accident when someone extends this again?

I don't understand what moving aux+orig_prog after bpf_func will do.
Currently it's this:
struct bpf_prog_aux *      aux;                  /*    32     8 */
struct sock_fprog_kern *   orig_prog;            /*    40     8 */
unsigned int               (*bpf_func)(const void  *, const struct 
bpf_insn  *); /*    48     8 */

With stats and active pointers the bpf_func goes into 2nd cacheline.
In the past the motivation for bpf_func right next to insns were
due to interpreter. Now everyone has JIT on. The interpreter
is often removed from .text too. So having insn and bpf_func in
the same cache line is not important.
Whereas having bpf_func with stats and active could be important
if stats/active are also used in other places than fexit/fentry.
For this patch set bpf_func location is irrelevant, since the
prog addr is hardcoded inside bpf trampoline generated asm.
For the best speed only stats and active should be close to each other.
And they both will be in the 1st.

>> @@ -249,7 +249,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
>>       if (fp->aux) {
>>           mutex_destroy(&fp->aux->used_maps_mutex);
>>           mutex_destroy(&fp->aux->dst_mutex);
>> -        free_percpu(fp->aux->stats);
>> +        free_percpu(fp->stats);
> 
> This doesn't look correct, stats is now /not/ tied to fp->aux anymore 
> which this if block
> is taking care of freeing. It needs to be moved outside so we don't leak 
> fp->stats.

Great catch. thanks!

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs
  2021-02-08 21:01     ` Andrii Nakryiko
@ 2021-02-08 23:20       ` Alexei Starovoitov
  0 siblings, 0 replies; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-08 23:20 UTC (permalink / raw)
  To: Andrii Nakryiko, Alexei Starovoitov
  Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On 2/8/21 1:01 PM, Andrii Nakryiko wrote:
> On Mon, Feb 8, 2021 at 1:00 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>>
>> On Sat, Feb 6, 2021 at 9:06 AM Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>>>
>>> From: Alexei Starovoitov <ast@kernel.org>
>>>
>>> Since sleepable programs are now executing under migrate_disable
>>> the per-cpu maps are safe to use.
> 
> Also made me wonder if PERF_EVENT_ARRAY map is usable in sleepable now?

maybe. Probably would need to add explicit preempt_disable to 
__bpf_perf_event_output around perf_event_output.
It needs more analysis.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 1/7] bpf: Optimize program stats
  2021-02-08 23:13     ` Alexei Starovoitov
@ 2021-02-09  0:53       ` Daniel Borkmann
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Borkmann @ 2021-02-09  0:53 UTC (permalink / raw)
  To: Alexei Starovoitov, Alexei Starovoitov, davem; +Cc: bpf, kernel-team

On 2/9/21 12:13 AM, Alexei Starovoitov wrote:
> On 2/8/21 1:28 PM, Daniel Borkmann wrote:
>> On 2/6/21 6:03 PM, Alexei Starovoitov wrote:
[...]
>>> @@ -539,6 +540,12 @@ struct bpf_binary_header {
>>>       u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
>>>   };
>>> +struct bpf_prog_stats {
>>> +    u64 cnt;
>>> +    u64 nsecs;
>>> +    struct u64_stats_sync syncp;
>>> +} __aligned(2 * sizeof(u64));
>>> +
>>>   struct bpf_prog {
>>>       u16            pages;        /* Number of allocated pages */
>>>       u16            jited:1,    /* Is our filter JIT'ed? */
>>> @@ -559,6 +566,7 @@ struct bpf_prog {
>>>       u8            tag[BPF_TAG_SIZE];
>>>       struct bpf_prog_aux    *aux;        /* Auxiliary fields */
>>>       struct sock_fprog_kern    *orig_prog;    /* Original BPF program */
>>> +    struct bpf_prog_stats __percpu *stats;
>>>       unsigned int        (*bpf_func)(const void *ctx,
>>>                           const struct bpf_insn *insn);
>>
>> nit: could we move aux & orig_prog while at it behind bpf_func just to avoid it slipping
>> into next cacheline by accident when someone extends this again?
> 
> I don't understand what moving aux+orig_prog after bpf_func will do.
> Currently it's this:
> struct bpf_prog_aux *      aux;                  /*    32     8 */
> struct sock_fprog_kern *   orig_prog;            /*    40     8 */
> unsigned int               (*bpf_func)(const void  *, const struct bpf_insn  *); /*    48     8 */
> 
> With stats and active pointers the bpf_func goes into 2nd cacheline.
> In the past the motivation for bpf_func right next to insns were
> due to interpreter. Now everyone has JIT on. The interpreter
> is often removed from .text too. So having insn and bpf_func in
> the same cache line is not important.

Yeah that's not what I meant, the interpreter case is less important.

> Whereas having bpf_func with stats and active could be important
> if stats/active are also used in other places than fexit/fentry.
> For this patch set bpf_func location is irrelevant, since the
> prog addr is hardcoded inside bpf trampoline generated asm.
> For the best speed only stats and active should be close to each other.
> And they both will be in the 1st.

I meant to say that it's zero effort to move aux/orig_prog behind the
bpf_func, so stats/active/bpf_func can still be on same cacheline. Yes,
it's potentially less important with dispatcher being up to 64, but
still more relevant to fast path than aux/orig_prog. Also for non-x86
case.

>>> @@ -249,7 +249,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
>>>       if (fp->aux) {
>>>           mutex_destroy(&fp->aux->used_maps_mutex);
>>>           mutex_destroy(&fp->aux->dst_mutex);
>>> -        free_percpu(fp->aux->stats);
>>> +        free_percpu(fp->stats);
>>
>> This doesn't look correct, stats is now /not/ tied to fp->aux anymore which this if block
>> is taking care of freeing. It needs to be moved outside so we don't leak fp->stats.
> 
> Great catch. thanks!


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism
  2021-02-08 20:51   ` Andrii Nakryiko
@ 2021-02-09 19:06     ` Alexei Starovoitov
  2021-02-09 19:15       ` Andrii Nakryiko
  0 siblings, 1 reply; 22+ messages in thread
From: Alexei Starovoitov @ 2021-02-09 19:06 UTC (permalink / raw)
  To: Andrii Nakryiko, Alexei Starovoitov
  Cc: David S. Miller, Daniel Borkmann, bpf, Kernel Team

On 2/8/21 12:51 PM, Andrii Nakryiko wrote:
>>                  start = sched_clock();
>> +               if (unlikely(!start))
>> +                       start = NO_START_TIME;
>> +       }
>>          return start;
> 
> 
> Oh, and actually, given you have `start > NO_START_TIME` condition in
> exit function, you don't need this `if (unlikely(!start))` bit at all,
> because you are going to ignore both 0 and 1. So maybe no need for a
> new function, but no need for extra if as well.

This unlikely(!start) is needed for very unlikely case when
sched_clock() returns 0. In such case the prog should still be executed.




^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism
  2021-02-09 19:06     ` Alexei Starovoitov
@ 2021-02-09 19:15       ` Andrii Nakryiko
  0 siblings, 0 replies; 22+ messages in thread
From: Andrii Nakryiko @ 2021-02-09 19:15 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, David S. Miller, Daniel Borkmann, bpf, Kernel Team

On Tue, Feb 9, 2021 at 11:06 AM Alexei Starovoitov <ast@fb.com> wrote:
>
> On 2/8/21 12:51 PM, Andrii Nakryiko wrote:
> >>                  start = sched_clock();
> >> +               if (unlikely(!start))
> >> +                       start = NO_START_TIME;
> >> +       }
> >>          return start;
> >
> >
> > Oh, and actually, given you have `start > NO_START_TIME` condition in
> > exit function, you don't need this `if (unlikely(!start))` bit at all,
> > because you are going to ignore both 0 and 1. So maybe no need for a
> > new function, but no need for extra if as well.
>
> This unlikely(!start) is needed for very unlikely case when
> sched_clock() returns 0. In such case the prog should still be executed.
>

oh, right, I forgot we now skip execution when start == 0. Then I
guess the point of a helper function stands.
>
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2021-02-09 19:19 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-06 17:03 [PATCH v2 bpf-next 0/7] bpf: Misc improvements Alexei Starovoitov
2021-02-06 17:03 ` [PATCH v2 bpf-next 1/7] bpf: Optimize program stats Alexei Starovoitov
2021-02-08 18:57   ` Andrii Nakryiko
2021-02-08 21:28   ` Daniel Borkmann
2021-02-08 23:13     ` Alexei Starovoitov
2021-02-09  0:53       ` Daniel Borkmann
2021-02-06 17:03 ` [PATCH v2 bpf-next 2/7] bpf: Compute program stats for sleepable programs Alexei Starovoitov
2021-02-08 20:35   ` Andrii Nakryiko
2021-02-06 17:03 ` [PATCH v2 bpf-next 3/7] bpf: Add per-program recursion prevention mechanism Alexei Starovoitov
2021-02-08 20:51   ` Andrii Nakryiko
2021-02-09 19:06     ` Alexei Starovoitov
2021-02-09 19:15       ` Andrii Nakryiko
2021-02-06 17:03 ` [PATCH v2 bpf-next 4/7] selftest/bpf: Add a recursion test Alexei Starovoitov
2021-02-08 20:54   ` Andrii Nakryiko
2021-02-06 17:03 ` [PATCH v2 bpf-next 5/7] bpf: Count the number of times recursion was prevented Alexei Starovoitov
2021-02-08 20:58   ` Andrii Nakryiko
2021-02-06 17:03 ` [PATCH v2 bpf-next 6/7] bpf: Allows per-cpu maps and map-in-map in sleepable programs Alexei Starovoitov
2021-02-08 21:00   ` Andrii Nakryiko
2021-02-08 21:01     ` Andrii Nakryiko
2021-02-08 23:20       ` Alexei Starovoitov
2021-02-06 17:03 ` [PATCH v2 bpf-next 7/7] selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs Alexei Starovoitov
2021-02-08 21:04   ` Andrii Nakryiko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).