BPF Archive on lore.kernel.org
 help / Atom feed
* [PATCH v2 bpf-next 0/3] bpf: optimize explored_states
@ 2019-05-22  3:17 Alexei Starovoitov
  2019-05-22  3:17 ` [PATCH v2 bpf-next 1/3] bpf: cleanup explored_states Alexei Starovoitov
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Alexei Starovoitov @ 2019-05-22  3:17 UTC (permalink / raw)
  To: davem; +Cc: daniel, netdev, bpf, kernel-team

Convert explored_states array into hash table and use simple hash to
reduce verifier peak memory consumption for programs with bpf2bpf calls.
More details in patch 3.

v1->v2: fixed Jakub's small nit in patch 1

Alexei Starovoitov (3):
  bpf: cleanup explored_states
  bpf: split explored_states
  bpf: convert explored_states to hash table

 include/linux/bpf_verifier.h |  2 +
 kernel/bpf/verifier.c        | 77 ++++++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 29 deletions(-)

-- 
2.20.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 bpf-next 1/3] bpf: cleanup explored_states
  2019-05-22  3:17 [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Alexei Starovoitov
@ 2019-05-22  3:17 ` Alexei Starovoitov
  2019-05-22  3:17 ` [PATCH v2 bpf-next 2/3] bpf: split explored_states Alexei Starovoitov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Alexei Starovoitov @ 2019-05-22  3:17 UTC (permalink / raw)
  To: davem; +Cc: daniel, netdev, bpf, kernel-team

clean up explored_states to prep for introduction of hashtable
No functional changes.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 95f9354495ad..f8a540b29c12 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5436,6 +5436,17 @@ enum {
 };
 
 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
+static struct bpf_verifier_state_list **explored_state(
+					struct bpf_verifier_env *env,
+					int idx)
+{
+	return &env->explored_states[idx];
+}
+
+static void init_explored_state(struct bpf_verifier_env *env, int idx)
+{
+	env->explored_states[idx] = STATE_LIST_MARK;
+}
 
 /* t, w, e - match pseudo-code above:
  * t - index of current instruction
@@ -5461,7 +5472,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 
 	if (e == BRANCH)
 		/* mark branch target for state pruning */
-		env->explored_states[w] = STATE_LIST_MARK;
+		init_explored_state(env, w);
 
 	if (insn_state[w] == 0) {
 		/* tree-edge */
@@ -5529,9 +5540,9 @@ static int check_cfg(struct bpf_verifier_env *env)
 			else if (ret < 0)
 				goto err_free;
 			if (t + 1 < insn_cnt)
-				env->explored_states[t + 1] = STATE_LIST_MARK;
+				init_explored_state(env, t + 1);
 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
-				env->explored_states[t] = STATE_LIST_MARK;
+				init_explored_state(env, t);
 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
 				if (ret == 1)
 					goto peek_stack;
@@ -5554,10 +5565,10 @@ static int check_cfg(struct bpf_verifier_env *env)
 			 * after every call and jump
 			 */
 			if (t + 1 < insn_cnt)
-				env->explored_states[t + 1] = STATE_LIST_MARK;
+				init_explored_state(env, t + 1);
 		} else {
 			/* conditional jump with two edges */
-			env->explored_states[t] = STATE_LIST_MARK;
+			init_explored_state(env, t);
 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
 			if (ret == 1)
 				goto peek_stack;
@@ -6005,7 +6016,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
 	struct bpf_verifier_state_list *sl;
 	int i;
 
-	sl = env->explored_states[insn];
+	sl = *explored_state(env, insn);
 	if (!sl)
 		return;
 
@@ -6364,7 +6375,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	struct bpf_verifier_state *cur = env->cur_state, *new;
 	int i, j, err, states_cnt = 0;
 
-	pprev = &env->explored_states[insn_idx];
+	pprev = explored_state(env, insn_idx);
 	sl = *pprev;
 
 	if (!sl)
@@ -6451,8 +6462,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 		kfree(new_sl);
 		return err;
 	}
-	new_sl->next = env->explored_states[insn_idx];
-	env->explored_states[insn_idx] = new_sl;
+	new_sl->next = *explored_state(env, insn_idx);
+	*explored_state(env, insn_idx) = new_sl;
 	/* connect new state to parentage chain. Current frame needs all
 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
 	 * to the stack implicitly by JITs) so in callers' frames connect just
-- 
2.20.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 bpf-next 2/3] bpf: split explored_states
  2019-05-22  3:17 [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Alexei Starovoitov
  2019-05-22  3:17 ` [PATCH v2 bpf-next 1/3] bpf: cleanup explored_states Alexei Starovoitov
@ 2019-05-22  3:17 ` Alexei Starovoitov
  2019-05-22  3:17 ` [PATCH v2 bpf-next 3/3] bpf: convert explored_states to hash table Alexei Starovoitov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Alexei Starovoitov @ 2019-05-22  3:17 UTC (permalink / raw)
  To: davem; +Cc: daniel, netdev, bpf, kernel-team

split explored_states into prune_point boolean mark
and link list of explored states.
This removes STATE_LIST_MARK hack and allows marks to be separate from states.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/verifier.c        | 31 +++++++++++++------------------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1305ccbd8fe6..02bba09a0ea1 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -233,6 +233,7 @@ struct bpf_insn_aux_data {
 	int sanitize_stack_off; /* stack slot to be cleared */
 	bool seen; /* this insn was processed by the verifier */
 	u8 alu_state; /* used in combination with alu_limit */
+	bool prune_point;
 	unsigned int orig_idx; /* original instruction index */
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f8a540b29c12..acf186a8111e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5435,7 +5435,6 @@ enum {
 	BRANCH = 2,
 };
 
-#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
 static struct bpf_verifier_state_list **explored_state(
 					struct bpf_verifier_env *env,
 					int idx)
@@ -5445,7 +5444,7 @@ static struct bpf_verifier_state_list **explored_state(
 
 static void init_explored_state(struct bpf_verifier_env *env, int idx)
 {
-	env->explored_states[idx] = STATE_LIST_MARK;
+	env->insn_aux_data[idx].prune_point = true;
 }
 
 /* t, w, e - match pseudo-code above:
@@ -6017,10 +6016,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
 	int i;
 
 	sl = *explored_state(env, insn);
-	if (!sl)
-		return;
-
-	while (sl != STATE_LIST_MARK) {
+	while (sl) {
 		if (sl->state.curframe != cur->curframe)
 			goto next;
 		for (i = 0; i <= cur->curframe; i++)
@@ -6375,18 +6371,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	struct bpf_verifier_state *cur = env->cur_state, *new;
 	int i, j, err, states_cnt = 0;
 
-	pprev = explored_state(env, insn_idx);
-	sl = *pprev;
-
-	if (!sl)
+	if (!env->insn_aux_data[insn_idx].prune_point)
 		/* this 'insn_idx' instruction wasn't marked, so we will not
 		 * be doing state search here
 		 */
 		return 0;
 
+	pprev = explored_state(env, insn_idx);
+	sl = *pprev;
+
 	clean_live_states(env, insn_idx, cur);
 
-	while (sl != STATE_LIST_MARK) {
+	while (sl) {
 		if (states_equal(env, &sl->state, cur)) {
 			sl->hit_cnt++;
 			/* reached equivalent register/stack state,
@@ -8144,13 +8140,12 @@ static void free_states(struct bpf_verifier_env *env)
 	for (i = 0; i < env->prog->len; i++) {
 		sl = env->explored_states[i];
 
-		if (sl)
-			while (sl != STATE_LIST_MARK) {
-				sln = sl->next;
-				free_verifier_state(&sl->state, false);
-				kfree(sl);
-				sl = sln;
-			}
+		while (sl) {
+			sln = sl->next;
+			free_verifier_state(&sl->state, false);
+			kfree(sl);
+			sl = sln;
+		}
 	}
 
 	kvfree(env->explored_states);
-- 
2.20.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 bpf-next 3/3] bpf: convert explored_states to hash table
  2019-05-22  3:17 [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Alexei Starovoitov
  2019-05-22  3:17 ` [PATCH v2 bpf-next 1/3] bpf: cleanup explored_states Alexei Starovoitov
  2019-05-22  3:17 ` [PATCH v2 bpf-next 2/3] bpf: split explored_states Alexei Starovoitov
@ 2019-05-22  3:17 ` Alexei Starovoitov
  2019-05-22  5:54 ` [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Andrii Nakryiko
  2019-05-24  8:05 ` Daniel Borkmann
  4 siblings, 0 replies; 6+ messages in thread
From: Alexei Starovoitov @ 2019-05-22  3:17 UTC (permalink / raw)
  To: davem; +Cc: daniel, netdev, bpf, kernel-team

All prune points inside a callee bpf function most likely will have
different callsites. For example, if function foo() is called from
two callsites the half of explored states in all prune points in foo()
will be useless for subsequent walking of one of those callsites.
Fortunately explored_states pruning heuristics keeps the number of states
per prune point small, but walking these states is still a waste of cpu
time when the callsite of the current state is different from the callsite
of the explored state.

To improve pruning logic convert explored_states into hash table and
use simple insn_idx ^ callsite hash to select hash bucket.
This optimization has no effect on programs without bpf2bpf calls
and drastically improves programs with calls.
In the later case it reduces total memory consumption in 1M scale tests
by almost 3 times (peak_states drops from 5752 to 2016).

Care should be taken when comparing the states for equivalency.
Since the same hash bucket can now contain states with different indices
the insn_idx has to be part of verifier_state and compared.

Different hash table sizes and different hash functions were explored,
but the results were not significantly better vs this patch.
They can be improved in the future.

Hit/miss heuristic is not counting index miscompare as a miss.
Otherwise verifier stats become unstable when experimenting
with different hash functions.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/verifier.c        | 23 ++++++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 02bba09a0ea1..405b502283c5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -187,6 +187,7 @@ struct bpf_func_state {
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
+	u32 insn_idx;
 	u32 curframe;
 	u32 active_spin_lock;
 	bool speculative;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index acf186a8111e..e9ed9f2e3b9a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5435,11 +5435,19 @@ enum {
 	BRANCH = 2,
 };
 
+static u32 state_htab_size(struct bpf_verifier_env *env)
+{
+	return env->prog->len;
+}
+
 static struct bpf_verifier_state_list **explored_state(
 					struct bpf_verifier_env *env,
 					int idx)
 {
-	return &env->explored_states[idx];
+	struct bpf_verifier_state *cur = env->cur_state;
+	struct bpf_func_state *state = cur->frame[cur->curframe];
+
+	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
 }
 
 static void init_explored_state(struct bpf_verifier_env *env, int idx)
@@ -6017,7 +6025,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
 
 	sl = *explored_state(env, insn);
 	while (sl) {
-		if (sl->state.curframe != cur->curframe)
+		if (sl->state.insn_idx != insn ||
+		    sl->state.curframe != cur->curframe)
 			goto next;
 		for (i = 0; i <= cur->curframe; i++)
 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
@@ -6383,6 +6392,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	clean_live_states(env, insn_idx, cur);
 
 	while (sl) {
+		states_cnt++;
+		if (sl->state.insn_idx != insn_idx)
+			goto next;
 		if (states_equal(env, &sl->state, cur)) {
 			sl->hit_cnt++;
 			/* reached equivalent register/stack state,
@@ -6400,7 +6412,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 				return err;
 			return 1;
 		}
-		states_cnt++;
 		sl->miss_cnt++;
 		/* heuristic to determine whether this state is beneficial
 		 * to keep checking from state equivalence point of view.
@@ -6427,6 +6438,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 			sl = *pprev;
 			continue;
 		}
+next:
 		pprev = &sl->next;
 		sl = *pprev;
 	}
@@ -6458,6 +6470,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 		kfree(new_sl);
 		return err;
 	}
+	new->insn_idx = insn_idx;
 	new_sl->next = *explored_state(env, insn_idx);
 	*explored_state(env, insn_idx) = new_sl;
 	/* connect new state to parentage chain. Current frame needs all
@@ -8137,7 +8150,7 @@ static void free_states(struct bpf_verifier_env *env)
 	if (!env->explored_states)
 		return;
 
-	for (i = 0; i < env->prog->len; i++) {
+	for (i = 0; i < state_htab_size(env); i++) {
 		sl = env->explored_states[i];
 
 		while (sl) {
@@ -8245,7 +8258,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 			goto skip_full_check;
 	}
 
-	env->explored_states = kvcalloc(env->prog->len,
+	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
-- 
2.20.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 bpf-next 0/3] bpf: optimize explored_states
  2019-05-22  3:17 [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Alexei Starovoitov
                   ` (2 preceding siblings ...)
  2019-05-22  3:17 ` [PATCH v2 bpf-next 3/3] bpf: convert explored_states to hash table Alexei Starovoitov
@ 2019-05-22  5:54 ` Andrii Nakryiko
  2019-05-24  8:05 ` Daniel Borkmann
  4 siblings, 0 replies; 6+ messages in thread
From: Andrii Nakryiko @ 2019-05-22  5:54 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: davem, Daniel Borkmann, Networking, bpf, Kernel Team

On Tue, May 21, 2019 at 8:17 PM Alexei Starovoitov <ast@kernel.org> wrote:
>
> Convert explored_states array into hash table and use simple hash to
> reduce verifier peak memory consumption for programs with bpf2bpf calls.
> More details in patch 3.
>
> v1->v2: fixed Jakub's small nit in patch 1
>
> Alexei Starovoitov (3):
>   bpf: cleanup explored_states
>   bpf: split explored_states
>   bpf: convert explored_states to hash table
>
>  include/linux/bpf_verifier.h |  2 +
>  kernel/bpf/verifier.c        | 77 ++++++++++++++++++++++--------------
>  2 files changed, 50 insertions(+), 29 deletions(-)
>
> --
> 2.20.0
>

For the series:

Acked-by: Andrii Nakryiko <andriin@fb.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 bpf-next 0/3] bpf: optimize explored_states
  2019-05-22  3:17 [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Alexei Starovoitov
                   ` (3 preceding siblings ...)
  2019-05-22  5:54 ` [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Andrii Nakryiko
@ 2019-05-24  8:05 ` Daniel Borkmann
  4 siblings, 0 replies; 6+ messages in thread
From: Daniel Borkmann @ 2019-05-24  8:05 UTC (permalink / raw)
  To: Alexei Starovoitov, davem; +Cc: netdev, bpf, kernel-team

On 05/22/2019 05:17 AM, Alexei Starovoitov wrote:
> Convert explored_states array into hash table and use simple hash to
> reduce verifier peak memory consumption for programs with bpf2bpf calls.
> More details in patch 3.
> 
> v1->v2: fixed Jakub's small nit in patch 1
> 
> Alexei Starovoitov (3):
>   bpf: cleanup explored_states
>   bpf: split explored_states
>   bpf: convert explored_states to hash table
> 
>  include/linux/bpf_verifier.h |  2 +
>  kernel/bpf/verifier.c        | 77 ++++++++++++++++++++++--------------
>  2 files changed, 50 insertions(+), 29 deletions(-)
> 

Applied, thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-22  3:17 [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Alexei Starovoitov
2019-05-22  3:17 ` [PATCH v2 bpf-next 1/3] bpf: cleanup explored_states Alexei Starovoitov
2019-05-22  3:17 ` [PATCH v2 bpf-next 2/3] bpf: split explored_states Alexei Starovoitov
2019-05-22  3:17 ` [PATCH v2 bpf-next 3/3] bpf: convert explored_states to hash table Alexei Starovoitov
2019-05-22  5:54 ` [PATCH v2 bpf-next 0/3] bpf: optimize explored_states Andrii Nakryiko
2019-05-24  8:05 ` Daniel Borkmann

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org bpf@archiver.kernel.org
	public-inbox-index bpf


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/ public-inbox