All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bpf: optimize constant blinding
@ 2019-06-12 11:32 Naveen N. Rao
  2019-06-12 14:47 ` Alexei Starovoitov
  2019-06-14  4:30 ` kbuild test robot
  0 siblings, 2 replies; 17+ messages in thread
From: Naveen N. Rao @ 2019-06-12 11:32 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann; +Cc: bpf, netdev, Michael Ellerman

Currently, for constant blinding, we re-allocate the bpf program to
account for its new size and adjust all branches to accommodate the
same, for each BPF instruction that needs constant blinding. This is
inefficient and can lead to soft lockup with sufficiently large
programs, such as the new verifier scalability test (ld_dw: xor
semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)

Re-implement BPF constant blinding to instead be performed in a single
pass. We do a dry run to count the additional instructions, allocate bpf
program for the corresponding size and adjust branches at once.

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 kernel/bpf/core.c | 232 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 168 insertions(+), 64 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 33fb292f2e30..82338b5cd98d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -897,8 +897,16 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 			      struct bpf_insn *to_buff)
 {
 	struct bpf_insn *to = to_buff;
-	u32 imm_rnd = get_random_int();
-	s16 off;
+	u32 imm_rnd = 0;
+
+	if (to_buff)
+		imm_rnd = get_random_int();
+
+#define COPY_BPF_INSN(insn)		do {				\
+						if (to_buff)		\
+							*(to) = insn;	\
+						(to)++;			\
+					} while (0)
 
 	BUILD_BUG_ON(BPF_REG_AX  + 1 != MAX_BPF_JIT_REG);
 	BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
@@ -926,7 +934,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 	if (from->imm == 0 &&
 	    (from->code == (BPF_ALU   | BPF_MOV | BPF_K) ||
 	     from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
-		*to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
+		COPY_BPF_INSN(BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg));
 		goto out;
 	}
 
@@ -940,9 +948,9 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 	case BPF_ALU | BPF_MOV | BPF_K:
 	case BPF_ALU | BPF_DIV | BPF_K:
 	case BPF_ALU | BPF_MOD | BPF_K:
-		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
+		COPY_BPF_INSN(BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+		COPY_BPF_INSN(BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX));
 		break;
 
 	case BPF_ALU64 | BPF_ADD | BPF_K:
@@ -954,9 +962,9 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 	case BPF_ALU64 | BPF_MOV | BPF_K:
 	case BPF_ALU64 | BPF_DIV | BPF_K:
 	case BPF_ALU64 | BPF_MOD | BPF_K:
-		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX));
 		break;
 
 	case BPF_JMP | BPF_JEQ  | BPF_K:
@@ -970,13 +978,9 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 	case BPF_JMP | BPF_JSGE | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP | BPF_JSET | BPF_K:
-		/* Accommodate for extra offset in case of a backjump. */
-		off = from->off;
-		if (off < 0)
-			off -= 2;
-		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, from->off));
 		break;
 
 	case BPF_JMP32 | BPF_JEQ  | BPF_K:
@@ -990,37 +994,36 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 	case BPF_JMP32 | BPF_JSGE | BPF_K:
 	case BPF_JMP32 | BPF_JSLE | BPF_K:
 	case BPF_JMP32 | BPF_JSET | BPF_K:
-		/* Accommodate for extra offset in case of a backjump. */
-		off = from->off;
-		if (off < 0)
-			off -= 2;
-		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
-				      off);
+		COPY_BPF_INSN(BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+		COPY_BPF_INSN(BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
+				      from->off));
 		break;
 
 	case BPF_LD | BPF_IMM | BPF_DW:
-		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
-		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
-		*to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm));
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32));
+		COPY_BPF_INSN(BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX));
 		break;
 	case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
-		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
-		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
+		COPY_BPF_INSN(BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm));
+		COPY_BPF_INSN(BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX));
 		break;
 
 	case BPF_ST | BPF_MEM | BPF_DW:
 	case BPF_ST | BPF_MEM | BPF_W:
 	case BPF_ST | BPF_MEM | BPF_H:
 	case BPF_ST | BPF_MEM | BPF_B:
-		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
-		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-		*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+		COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+		COPY_BPF_INSN(BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off));
 		break;
 	}
+
+#undef COPY_BPF_INSN
+
 out:
 	return to - to_buff;
 }
@@ -1065,57 +1068,158 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
 	bpf_prog_clone_free(fp_other);
 }
 
+static int bpf_jit_blind_adj_imm_off(struct bpf_insn *insn, int prog_index,
+		int clone_index, int blnd[])
+{
+	const s64 imm_min = S32_MIN, imm_max = S32_MAX;
+	const s32 off_min = S16_MIN, off_max = S16_MAX;
+	u8 code = insn->code;
+	s64 imm;
+	s32 off;
+
+	if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+					BPF_OP(code) == BPF_EXIT)
+		return 0;
+
+	/* Adjust offset of jmps if we cross patch boundaries. */
+	if (BPF_OP(code) == BPF_CALL) {
+		if (insn->src_reg != BPF_PSEUDO_CALL)
+			return 0;
+		imm = blnd[prog_index + insn->imm + 1] - clone_index - 1;
+		if (imm < imm_min || imm > imm_max)
+			return -ERANGE;
+		insn->imm = imm;
+	} else {
+		off = blnd[prog_index + insn->off + 1] - clone_index - 1;
+		if (off < off_min || off > off_max)
+			return -ERANGE;
+		insn->off = off;
+	}
+
+	return 0;
+}
+
 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
 {
+	int i, j, rewritten, new_len, insn_cnt, ret = 0;
 	struct bpf_insn insn_buff[16], aux[2];
 	struct bpf_prog *clone, *tmp;
-	int insn_delta, insn_cnt;
 	struct bpf_insn *insn;
-	int i, rewritten;
+	u32 *clone_index;
 
 	if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
 		return prog;
 
-	clone = bpf_prog_clone_create(prog, GFP_USER);
-	if (!clone)
+	/* Dry run to figure out the final number of instructions */
+	clone_index = vmalloc(prog->len * sizeof(u32));
+	if (!clone_index)
 		return ERR_PTR(-ENOMEM);
 
-	insn_cnt = clone->len;
-	insn = clone->insnsi;
-
+	insn_cnt = prog->len;
+	insn = prog->insnsi;
+	rewritten = 0;
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		/* We temporarily need to hold the original ld64 insn
-		 * so that we can still access the first part in the
-		 * second blinding run.
-		 */
-		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
-		    insn[1].code == 0)
-			memcpy(aux, insn, sizeof(aux));
+		clone_index[i] = bpf_jit_blind_insn(insn, 0, 0);
+		if (clone_index[i] > 1)
+			rewritten += clone_index[i] - 1;
+	}
 
-		rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
-		if (!rewritten)
-			continue;
+	if (rewritten) {
+		/* Needs new allocation, branch adjustment, et al... */
+		clone = bpf_prog_clone_create(prog, GFP_USER);
+		if (!clone) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		new_len = prog->len + rewritten;
+		tmp = bpf_prog_realloc(clone, bpf_prog_size(new_len), GFP_USER);
+		if (!tmp) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		clone = tmp;
+		clone->len = new_len;
+
+		/* rewrite instructions with constant blinding */
+		insn_cnt = prog->len;
+		insn = prog->insnsi;
+		for (i = 0, j = 0; i < insn_cnt; i++, j++, insn++) {
+			/* capture new instruction index in clone_index */
+			clone_index[i] = j;
 
-		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
-		if (IS_ERR(tmp)) {
-			/* Patching may have repointed aux->prog during
-			 * realloc from the original one, so we need to
-			 * fix it up here on error.
+			/* We temporarily need to hold the original ld64 insn
+			 * so that we can still access the first part in the
+			 * second blinding run.
 			 */
-			bpf_jit_prog_release_other(prog, clone);
-			return tmp;
+			if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+			    insn[1].code == 0)
+				memcpy(aux, insn, sizeof(aux));
+
+			rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
+			if (!rewritten) {
+				memcpy(clone->insnsi + j, insn,
+					sizeof(struct bpf_insn));
+			} else {
+				memcpy(clone->insnsi + j, insn_buff,
+					sizeof(struct bpf_insn) * rewritten);
+				j += rewritten - 1;
+			}
 		}
 
-		clone = tmp;
-		insn_delta = rewritten - 1;
+		/* adjust branches */
+		for (i = 0; i < insn_cnt; i++) {
+			int next_insn_idx = clone->len;
+
+			if (i < insn_cnt - 1)
+				next_insn_idx = clone_index[i + 1];
+
+			insn = clone->insnsi + clone_index[i];
+			for (j = clone_index[i]; j < next_insn_idx; j++, insn++) {
+				ret = bpf_jit_blind_adj_imm_off(insn, i, j, clone_index);
+				if (ret) {
+					goto err;
+				}
+			}
+		}
+
+		/* adjust linfo */
+		if (clone->aux->nr_linfo) {
+			struct bpf_line_info *linfo = clone->aux->linfo;
 
-		/* Walk new program and skip insns we just inserted. */
-		insn = clone->insnsi + i + insn_delta;
-		insn_cnt += insn_delta;
-		i        += insn_delta;
+			for (i = 0; i < clone->aux->nr_linfo; i++)
+				linfo[i].insn_off = clone_index[linfo[i].insn_off];
+		}
+	} else {
+		/* if prog length remains same, not much work to do */
+		clone = bpf_prog_clone_create(prog, GFP_USER);
+		if (!clone) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		insn_cnt = clone->len;
+		insn = clone->insnsi;
+
+		for (i = 0; i < insn_cnt; i++, insn++) {
+			if (clone_index[i]) {
+				bpf_jit_blind_insn(insn, aux, insn_buff);
+				memcpy(insn, insn_buff, sizeof(struct bpf_insn));
+			}
+		}
 	}
 
 	clone->blinded = 1;
+
+err:
+	vfree(clone_index);
+
+	if (ret) {
+		if (clone)
+			bpf_jit_prog_release_other(prog, clone);
+		return ERR_PTR(ret);
+	}
+
 	return clone;
 }
 #endif /* CONFIG_BPF_JIT */
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-12 11:32 [PATCH] bpf: optimize constant blinding Naveen N. Rao
@ 2019-06-12 14:47 ` Alexei Starovoitov
  2019-06-12 15:04   ` Jiong Wang
  2019-06-14  4:30 ` kbuild test robot
  1 sibling, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-12 14:47 UTC (permalink / raw)
  To: Naveen N. Rao
  Cc: Daniel Borkmann, bpf, Network Development, Michael Ellerman,
	Jiong Wang, Jakub Kicinski

On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
<naveen.n.rao@linux.vnet.ibm.com> wrote:
>
> Currently, for constant blinding, we re-allocate the bpf program to
> account for its new size and adjust all branches to accommodate the
> same, for each BPF instruction that needs constant blinding. This is
> inefficient and can lead to soft lockup with sufficiently large
> programs, such as the new verifier scalability test (ld_dw: xor
> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)

Slowdown you see is due to patch_insn right?
In such case I prefer to fix the scaling issue of patch_insn instead.
This specific fix for blinding only is not addressing the core of the problem.
Jiong,
how is the progress on fixing patch_insn?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-12 14:47 ` Alexei Starovoitov
@ 2019-06-12 15:04   ` Jiong Wang
  2019-06-12 15:25     ` Jiong Wang
  0 siblings, 1 reply; 17+ messages in thread
From: Jiong Wang @ 2019-06-12 15:04 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
	Michael Ellerman, Jiong Wang, Jakub Kicinski


Alexei Starovoitov writes:

> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> <naveen.n.rao@linux.vnet.ibm.com> wrote:
>>
>> Currently, for constant blinding, we re-allocate the bpf program to
>> account for its new size and adjust all branches to accommodate the
>> same, for each BPF instruction that needs constant blinding. This is
>> inefficient and can lead to soft lockup with sufficiently large
>> programs, such as the new verifier scalability test (ld_dw: xor
>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
>
> Slowdown you see is due to patch_insn right?
> In such case I prefer to fix the scaling issue of patch_insn instead.
> This specific fix for blinding only is not addressing the core of the problem.
> Jiong,
> how is the progress on fixing patch_insn?

I actually was about to reply this email as we have discussed exactly the
same issue on jit blinding here:

  https://www.spinics.net/lists/bpf/msg01836.html

And sorry for the slow progress on fixing patch_insn, please give me one
more week, I will try to send out a RFC for it.

Regards,
Jiong

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-12 15:04   ` Jiong Wang
@ 2019-06-12 15:25     ` Jiong Wang
  2019-06-12 15:28       ` Alexei Starovoitov
  0 siblings, 1 reply; 17+ messages in thread
From: Jiong Wang @ 2019-06-12 15:25 UTC (permalink / raw)
  To: Jiong Wang
  Cc: Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann, bpf,
	Network Development, Michael Ellerman, Jakub Kicinski


Jiong Wang writes:

> Alexei Starovoitov writes:
>
>> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
>> <naveen.n.rao@linux.vnet.ibm.com> wrote:
>>>
>>> Currently, for constant blinding, we re-allocate the bpf program to
>>> account for its new size and adjust all branches to accommodate the
>>> same, for each BPF instruction that needs constant blinding. This is
>>> inefficient and can lead to soft lockup with sufficiently large
>>> programs, such as the new verifier scalability test (ld_dw: xor
>>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
>>
>> Slowdown you see is due to patch_insn right?
>> In such case I prefer to fix the scaling issue of patch_insn instead.
>> This specific fix for blinding only is not addressing the core of the problem.
>> Jiong,
>> how is the progress on fixing patch_insn?

And what I have done is I have digested your conversion with Edward, and is
slightly incline to the BB based approach as it also exposes the inserted
insn to later pass in a natural way, then was trying to find a way that
could create BB info in little extra code based on current verifier code,
for example as a side effect of check_subprogs which is doing two insn
traversal already. (I had some such code before in the historical
wip/bpf-loop-detection branch, but feel it might be still too heavy for
just improving insn patching)

>
> I actually was about to reply this email as we have discussed exactly the
> same issue on jit blinding here:
>
>   https://www.spinics.net/lists/bpf/msg01836.html
>
> And sorry for the slow progress on fixing patch_insn, please give me one
> more week, I will try to send out a RFC for it.
>
> Regards,
> Jiong


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-12 15:25     ` Jiong Wang
@ 2019-06-12 15:28       ` Alexei Starovoitov
  2019-06-14 15:13         ` Jiong Wang
  0 siblings, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-12 15:28 UTC (permalink / raw)
  To: Jiong Wang
  Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
	Michael Ellerman, Jakub Kicinski

On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
>
>
> Jiong Wang writes:
>
> > Alexei Starovoitov writes:
> >
> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
> >>>
> >>> Currently, for constant blinding, we re-allocate the bpf program to
> >>> account for its new size and adjust all branches to accommodate the
> >>> same, for each BPF instruction that needs constant blinding. This is
> >>> inefficient and can lead to soft lockup with sufficiently large
> >>> programs, such as the new verifier scalability test (ld_dw: xor
> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
> >>
> >> Slowdown you see is due to patch_insn right?
> >> In such case I prefer to fix the scaling issue of patch_insn instead.
> >> This specific fix for blinding only is not addressing the core of the problem.
> >> Jiong,
> >> how is the progress on fixing patch_insn?
>
> And what I have done is I have digested your conversion with Edward, and is
> slightly incline to the BB based approach as it also exposes the inserted
> insn to later pass in a natural way, then was trying to find a way that
> could create BB info in little extra code based on current verifier code,
> for example as a side effect of check_subprogs which is doing two insn
> traversal already. (I had some such code before in the historical
> wip/bpf-loop-detection branch, but feel it might be still too heavy for
> just improving insn patching)

BB - basic block?
I'm not sure that was necessary.
The idea was that patching is adding stuff to linked list instead
and single pass at the end to linearize it.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-12 11:32 [PATCH] bpf: optimize constant blinding Naveen N. Rao
  2019-06-12 14:47 ` Alexei Starovoitov
@ 2019-06-14  4:30 ` kbuild test robot
  1 sibling, 0 replies; 17+ messages in thread
From: kbuild test robot @ 2019-06-14  4:30 UTC (permalink / raw)
  To: Naveen N. Rao
  Cc: kbuild-all, Alexei Starovoitov, Daniel Borkmann, bpf, netdev,
	Michael Ellerman

Hi "Naveen,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]
[also build test WARNING on v5.2-rc4 next-20190613]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Naveen-N-Rao/bpf-optimize-constant-blinding/20190614-023948
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.1-rc1-7-g2b96cd8-dirty
        make ARCH=x86_64 allmodconfig
        make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

   kernel/bpf/core.c:210:49: sparse: sparse: arithmetics on pointers to functions
   include/linux/rbtree.h:120:9: sparse: sparse: incompatible types in comparison expression (different address spaces):
   include/linux/rbtree.h:120:9: sparse:    struct rb_node [noderef] <asn:4> *
   include/linux/rbtree.h:120:9: sparse:    struct rb_node *
   include/linux/rbtree.h:120:9: sparse: sparse: incompatible types in comparison expression (different address spaces):
   include/linux/rbtree.h:120:9: sparse:    struct rb_node [noderef] <asn:4> *
   include/linux/rbtree.h:120:9: sparse:    struct rb_node *
>> kernel/bpf/core.c:1122:59: sparse: sparse: Using plain integer as NULL pointer
   kernel/bpf/core.c:1122:62: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:28:1: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:53:1: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:111:1: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:126:1: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:161:1: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:196:1: sparse: sparse: Using plain integer as NULL pointer
   include/trace/events/xdp.h:231:1: sparse: sparse: Using plain integer as NULL pointer

vim +1122 kernel/bpf/core.c

  1101	
  1102	struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
  1103	{
  1104		int i, j, rewritten, new_len, insn_cnt, ret = 0;
  1105		struct bpf_insn insn_buff[16], aux[2];
  1106		struct bpf_prog *clone, *tmp;
  1107		struct bpf_insn *insn;
  1108		u32 *clone_index;
  1109	
  1110		if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
  1111			return prog;
  1112	
  1113		/* Dry run to figure out the final number of instructions */
  1114		clone_index = vmalloc(prog->len * sizeof(u32));
  1115		if (!clone_index)
  1116			return ERR_PTR(-ENOMEM);
  1117	
  1118		insn_cnt = prog->len;
  1119		insn = prog->insnsi;
  1120		rewritten = 0;
  1121		for (i = 0; i < insn_cnt; i++, insn++) {
> 1122			clone_index[i] = bpf_jit_blind_insn(insn, 0, 0);
  1123			if (clone_index[i] > 1)
  1124				rewritten += clone_index[i] - 1;
  1125		}
  1126	
  1127		if (rewritten) {
  1128			/* Needs new allocation, branch adjustment, et al... */
  1129			clone = bpf_prog_clone_create(prog, GFP_USER);
  1130			if (!clone) {
  1131				ret = -ENOMEM;
  1132				goto err;
  1133			}
  1134	
  1135			new_len = prog->len + rewritten;
  1136			tmp = bpf_prog_realloc(clone, bpf_prog_size(new_len), GFP_USER);
  1137			if (!tmp) {
  1138				ret = -ENOMEM;
  1139				goto err;
  1140			}
  1141			clone = tmp;
  1142			clone->len = new_len;
  1143	
  1144			/* rewrite instructions with constant blinding */
  1145			insn_cnt = prog->len;
  1146			insn = prog->insnsi;
  1147			for (i = 0, j = 0; i < insn_cnt; i++, j++, insn++) {
  1148				/* capture new instruction index in clone_index */
  1149				clone_index[i] = j;
  1150	
  1151				/* We temporarily need to hold the original ld64 insn
  1152				 * so that we can still access the first part in the
  1153				 * second blinding run.
  1154				 */
  1155				if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
  1156				    insn[1].code == 0)
  1157					memcpy(aux, insn, sizeof(aux));
  1158	
  1159				rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
  1160				if (!rewritten) {
  1161					memcpy(clone->insnsi + j, insn,
  1162						sizeof(struct bpf_insn));
  1163				} else {
  1164					memcpy(clone->insnsi + j, insn_buff,
  1165						sizeof(struct bpf_insn) * rewritten);
  1166					j += rewritten - 1;
  1167				}
  1168			}
  1169	
  1170			/* adjust branches */
  1171			for (i = 0; i < insn_cnt; i++) {
  1172				int next_insn_idx = clone->len;
  1173	
  1174				if (i < insn_cnt - 1)
  1175					next_insn_idx = clone_index[i + 1];
  1176	
  1177				insn = clone->insnsi + clone_index[i];
  1178				for (j = clone_index[i]; j < next_insn_idx; j++, insn++) {
  1179					ret = bpf_jit_blind_adj_imm_off(insn, i, j, clone_index);
  1180					if (ret) {
  1181						goto err;
  1182					}
  1183				}
  1184			}
  1185	
  1186			/* adjust linfo */
  1187			if (clone->aux->nr_linfo) {
  1188				struct bpf_line_info *linfo = clone->aux->linfo;
  1189	
  1190				for (i = 0; i < clone->aux->nr_linfo; i++)
  1191					linfo[i].insn_off = clone_index[linfo[i].insn_off];
  1192			}
  1193		} else {
  1194			/* if prog length remains same, not much work to do */
  1195			clone = bpf_prog_clone_create(prog, GFP_USER);
  1196			if (!clone) {
  1197				ret = -ENOMEM;
  1198				goto err;
  1199			}
  1200	
  1201			insn_cnt = clone->len;
  1202			insn = clone->insnsi;
  1203	
  1204			for (i = 0; i < insn_cnt; i++, insn++) {
  1205				if (clone_index[i]) {
  1206					bpf_jit_blind_insn(insn, aux, insn_buff);
  1207					memcpy(insn, insn_buff, sizeof(struct bpf_insn));
  1208				}
  1209			}
  1210		}
  1211	
  1212		clone->blinded = 1;
  1213	
  1214	err:
  1215		vfree(clone_index);
  1216	
  1217		if (ret) {
  1218			if (clone)
  1219				bpf_jit_prog_release_other(prog, clone);
  1220			return ERR_PTR(ret);
  1221		}
  1222	
  1223		return clone;
  1224	}
  1225	#endif /* CONFIG_BPF_JIT */
  1226	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-12 15:28       ` Alexei Starovoitov
@ 2019-06-14 15:13         ` Jiong Wang
  2019-06-14 17:05           ` Alexei Starovoitov
  2019-06-17 19:47           ` Edward Cree
  0 siblings, 2 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-14 15:13 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiong Wang, Naveen N. Rao, Daniel Borkmann, bpf,
	Network Development, Michael Ellerman, Jakub Kicinski


Alexei Starovoitov writes:

> On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
>>
>>
>> Jiong Wang writes:
>>
>> > Alexei Starovoitov writes:
>> >
>> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
>> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
>> >>>
>> >>> Currently, for constant blinding, we re-allocate the bpf program to
>> >>> account for its new size and adjust all branches to accommodate the
>> >>> same, for each BPF instruction that needs constant blinding. This is
>> >>> inefficient and can lead to soft lockup with sufficiently large
>> >>> programs, such as the new verifier scalability test (ld_dw: xor
>> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
>> >>
>> >> Slowdown you see is due to patch_insn right?
>> >> In such case I prefer to fix the scaling issue of patch_insn instead.
>> >> This specific fix for blinding only is not addressing the core of the problem.
>> >> Jiong,
>> >> how is the progress on fixing patch_insn?
>>
>> And what I have done is I have digested your conversion with Edward, and is
>> slightly incline to the BB based approach as it also exposes the inserted
>> insn to later pass in a natural way, then was trying to find a way that
>> could create BB info in little extra code based on current verifier code,
>> for example as a side effect of check_subprogs which is doing two insn
>> traversal already. (I had some such code before in the historical
>> wip/bpf-loop-detection branch, but feel it might be still too heavy for
>> just improving insn patching)
>
> BB - basic block?
> I'm not sure that was necessary.
> The idea was that patching is adding stuff to linked list instead
> and single pass at the end to linearize it.

Just an update and keep people posted.

Working on linked list based approach, the implementation looks like the
following, mostly a combine of discussions happened and Naveen's patch,
please feel free to comment.

  - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
    BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).

  - Introduce patch pool into bpf_prog->aux to keep all patched insns.
    Pool structure looks like:

    struct {
      int num;
      int prev;
      int next;
    } head_0;
    NUM patched insns for head_0
    head_1;
    patched insns for head_1
    head_2;
    ...

  - Now when doing bpf_patch_insn_single, it doesn't change the original
    prog etc, instead, it merely update the insn at patched offset into a
    BPF_LIST_INSN, and pushed the patched insns plus a patch header into
    the patch pool. Fields of BPF_LIST_INSN is updated to setup the links:
    
      BPF_LIST_INSN.off += patched_size
      (accumulating the size attached to this list_insn, it is possible a
      later patch pass patches insn in the patch pool, this means insn
      traversal needs to be changed, when seeing BPF_LIST_INSN, should go
      through the list)
      
      BPF_LIST_INSN.imm = offset of the patch header in patch pool
      (off is 16-bit, imm is 32-bit, the patch pool is 32-bit length, so
      use imm for keeping offset, meaning a BPF_LIST_INSN can contains no
      more than 8192 insns, guess it is enough)

  - When doing linearize:
    1. a quick scan of prog->insnsi to know the final
       image size, would be simple as:

      fini_size = 0;
      for_each_insn:
        if (insn.code == (BPF_ALU | BPF_LIST_HEAD))
          fini_size += insn->off;
        else
          fini_size++;

    2. Resize prog into fini_size, and a second scan of prog->insnsi to
       copy over all insns and patched insns, at the same time generate a
       auxiliary index array which maps an old index to the new index in
       final image, like the "clone_index" in Naveen's patch.

    3. Finally, a single pass to update branch target, the same algo used
       by this patch.
       
  - The APIs for doing insning patch looks like:
      bpf_patch_insn_init:   init the generic patch pool.
      bpf_patch_insn_single: push patched insns to the pool.
                             link them to the associated BPF_LIST_INSN.
      bpf_patch_insn_fini:   linearize a bpf_prog contains BPF_LIST_INSN.
                             destroy patch pool in prog->aux.

I am trying to making the implementation working with jit blind first to make
sure basic things are ready. As JIT blinds happens after verification so no
need to both aux update etc. Then will cleanup quite a few things for
example patch a patched insn, adjust aux data, what to do with insn delete
etc.

Regards,
Jiong

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-14 15:13         ` Jiong Wang
@ 2019-06-14 17:05           ` Alexei Starovoitov
  2019-06-14 22:28             ` Andrii Nakryiko
  2019-06-17 19:47           ` Edward Cree
  1 sibling, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-14 17:05 UTC (permalink / raw)
  To: Jiong Wang
  Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
	Michael Ellerman, Jakub Kicinski

On Fri, Jun 14, 2019 at 8:13 AM Jiong Wang <jiong.wang@netronome.com> wrote:
>
>
> Alexei Starovoitov writes:
>
> > On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
> >>
> >>
> >> Jiong Wang writes:
> >>
> >> > Alexei Starovoitov writes:
> >> >
> >> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> >> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
> >> >>>
> >> >>> Currently, for constant blinding, we re-allocate the bpf program to
> >> >>> account for its new size and adjust all branches to accommodate the
> >> >>> same, for each BPF instruction that needs constant blinding. This is
> >> >>> inefficient and can lead to soft lockup with sufficiently large
> >> >>> programs, such as the new verifier scalability test (ld_dw: xor
> >> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
> >> >>
> >> >> Slowdown you see is due to patch_insn right?
> >> >> In such case I prefer to fix the scaling issue of patch_insn instead.
> >> >> This specific fix for blinding only is not addressing the core of the problem.
> >> >> Jiong,
> >> >> how is the progress on fixing patch_insn?
> >>
> >> And what I have done is I have digested your conversion with Edward, and is
> >> slightly incline to the BB based approach as it also exposes the inserted
> >> insn to later pass in a natural way, then was trying to find a way that
> >> could create BB info in little extra code based on current verifier code,
> >> for example as a side effect of check_subprogs which is doing two insn
> >> traversal already. (I had some such code before in the historical
> >> wip/bpf-loop-detection branch, but feel it might be still too heavy for
> >> just improving insn patching)
> >
> > BB - basic block?
> > I'm not sure that was necessary.
> > The idea was that patching is adding stuff to linked list instead
> > and single pass at the end to linearize it.
>
> Just an update and keep people posted.
>
> Working on linked list based approach, the implementation looks like the
> following, mostly a combine of discussions happened and Naveen's patch,
> please feel free to comment.
>
>   - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>     BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>
>   - Introduce patch pool into bpf_prog->aux to keep all patched insns.
>     Pool structure looks like:
>
>     struct {
>       int num;
>       int prev;
>       int next;
>     } head_0;
>     NUM patched insns for head_0
>     head_1;
>     patched insns for head_1
>     head_2;
>     ...
>
>   - Now when doing bpf_patch_insn_single, it doesn't change the original
>     prog etc, instead, it merely update the insn at patched offset into a
>     BPF_LIST_INSN, and pushed the patched insns plus a patch header into
>     the patch pool. Fields of BPF_LIST_INSN is updated to setup the links:
>
>       BPF_LIST_INSN.off += patched_size
>       (accumulating the size attached to this list_insn, it is possible a
>       later patch pass patches insn in the patch pool, this means insn
>       traversal needs to be changed, when seeing BPF_LIST_INSN, should go
>       through the list)
>
>       BPF_LIST_INSN.imm = offset of the patch header in patch pool
>       (off is 16-bit, imm is 32-bit, the patch pool is 32-bit length, so
>       use imm for keeping offset, meaning a BPF_LIST_INSN can contains no
>       more than 8192 insns, guess it is enough)
>
>   - When doing linearize:
>     1. a quick scan of prog->insnsi to know the final
>        image size, would be simple as:
>
>       fini_size = 0;
>       for_each_insn:
>         if (insn.code == (BPF_ALU | BPF_LIST_HEAD))
>           fini_size += insn->off;
>         else
>           fini_size++;
>
>     2. Resize prog into fini_size, and a second scan of prog->insnsi to
>        copy over all insns and patched insns, at the same time generate a
>        auxiliary index array which maps an old index to the new index in
>        final image, like the "clone_index" in Naveen's patch.
>
>     3. Finally, a single pass to update branch target, the same algo used
>        by this patch.
>
>   - The APIs for doing insning patch looks like:
>       bpf_patch_insn_init:   init the generic patch pool.
>       bpf_patch_insn_single: push patched insns to the pool.
>                              link them to the associated BPF_LIST_INSN.
>       bpf_patch_insn_fini:   linearize a bpf_prog contains BPF_LIST_INSN.
>                              destroy patch pool in prog->aux.
>
> I am trying to making the implementation working with jit blind first to make
> sure basic things are ready. As JIT blinds happens after verification so no
> need to both aux update etc. Then will cleanup quite a few things for
> example patch a patched insn, adjust aux data, what to do with insn delete
> etc.

explicit indices feels like premature optimization.
May be use vanilla singly linked list instead?
Also do we have a case when patched insn will be patched again?
In such case 'patch insn pool' will become recursive?
Feels difficult to think through all offsets and indices.
Whereas with linked list patching patched insns will be inserting
them into link list.

May be better alternative is to convert the whole program to link list
of insns with branch targets becoming pointers and insert patched
insns into this single singly linked list ?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-14 17:05           ` Alexei Starovoitov
@ 2019-06-14 22:28             ` Andrii Nakryiko
  0 siblings, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-06-14 22:28 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiong Wang, Naveen N. Rao, Daniel Borkmann, bpf,
	Network Development, Michael Ellerman, Jakub Kicinski

On Fri, Jun 14, 2019 at 10:06 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Jun 14, 2019 at 8:13 AM Jiong Wang <jiong.wang@netronome.com> wrote:
> >
> >
> > Alexei Starovoitov writes:
> >
> > > On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
> > >>
> > >>
> > >> Jiong Wang writes:
> > >>
> > >> > Alexei Starovoitov writes:
> > >> >
> > >> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> > >> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
> > >> >>>
> > >> >>> Currently, for constant blinding, we re-allocate the bpf program to
> > >> >>> account for its new size and adjust all branches to accommodate the
> > >> >>> same, for each BPF instruction that needs constant blinding. This is
> > >> >>> inefficient and can lead to soft lockup with sufficiently large
> > >> >>> programs, such as the new verifier scalability test (ld_dw: xor
> > >> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
> > >> >>
> > >> >> Slowdown you see is due to patch_insn right?
> > >> >> In such case I prefer to fix the scaling issue of patch_insn instead.
> > >> >> This specific fix for blinding only is not addressing the core of the problem.
> > >> >> Jiong,
> > >> >> how is the progress on fixing patch_insn?
> > >>
> > >> And what I have done is I have digested your conversion with Edward, and is
> > >> slightly incline to the BB based approach as it also exposes the inserted
> > >> insn to later pass in a natural way, then was trying to find a way that
> > >> could create BB info in little extra code based on current verifier code,
> > >> for example as a side effect of check_subprogs which is doing two insn
> > >> traversal already. (I had some such code before in the historical
> > >> wip/bpf-loop-detection branch, but feel it might be still too heavy for
> > >> just improving insn patching)
> > >
> > > BB - basic block?
> > > I'm not sure that was necessary.
> > > The idea was that patching is adding stuff to linked list instead
> > > and single pass at the end to linearize it.
> >
> > Just an update and keep people posted.
> >
> > Working on linked list based approach, the implementation looks like the
> > following, mostly a combine of discussions happened and Naveen's patch,
> > please feel free to comment.
> >
> >   - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
> >     BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
> >
> >   - Introduce patch pool into bpf_prog->aux to keep all patched insns.
> >     Pool structure looks like:
> >
> >     struct {
> >       int num;
> >       int prev;
> >       int next;
> >     } head_0;
> >     NUM patched insns for head_0
> >     head_1;
> >     patched insns for head_1
> >     head_2;
> >     ...
> >
> >   - Now when doing bpf_patch_insn_single, it doesn't change the original
> >     prog etc, instead, it merely update the insn at patched offset into a
> >     BPF_LIST_INSN, and pushed the patched insns plus a patch header into
> >     the patch pool. Fields of BPF_LIST_INSN is updated to setup the links:
> >
> >       BPF_LIST_INSN.off += patched_size
> >       (accumulating the size attached to this list_insn, it is possible a
> >       later patch pass patches insn in the patch pool, this means insn
> >       traversal needs to be changed, when seeing BPF_LIST_INSN, should go
> >       through the list)
> >
> >       BPF_LIST_INSN.imm = offset of the patch header in patch pool
> >       (off is 16-bit, imm is 32-bit, the patch pool is 32-bit length, so
> >       use imm for keeping offset, meaning a BPF_LIST_INSN can contains no
> >       more than 8192 insns, guess it is enough)
> >
> >   - When doing linearize:
> >     1. a quick scan of prog->insnsi to know the final
> >        image size, would be simple as:
> >
> >       fini_size = 0;
> >       for_each_insn:
> >         if (insn.code == (BPF_ALU | BPF_LIST_HEAD))
> >           fini_size += insn->off;
> >         else
> >           fini_size++;
> >
> >     2. Resize prog into fini_size, and a second scan of prog->insnsi to
> >        copy over all insns and patched insns, at the same time generate a
> >        auxiliary index array which maps an old index to the new index in
> >        final image, like the "clone_index" in Naveen's patch.
> >
> >     3. Finally, a single pass to update branch target, the same algo used
> >        by this patch.
> >
> >   - The APIs for doing insning patch looks like:
> >       bpf_patch_insn_init:   init the generic patch pool.
> >       bpf_patch_insn_single: push patched insns to the pool.
> >                              link them to the associated BPF_LIST_INSN.
> >       bpf_patch_insn_fini:   linearize a bpf_prog contains BPF_LIST_INSN.
> >                              destroy patch pool in prog->aux.
> >
> > I am trying to making the implementation working with jit blind first to make
> > sure basic things are ready. As JIT blinds happens after verification so no
> > need to both aux update etc. Then will cleanup quite a few things for
> > example patch a patched insn, adjust aux data, what to do with insn delete
> > etc.
>
> explicit indices feels like premature optimization.
> May be use vanilla singly linked list instead?
> Also do we have a case when patched insn will be patched again?
> In such case 'patch insn pool' will become recursive?
> Feels difficult to think through all offsets and indices.
> Whereas with linked list patching patched insns will be inserting
> them into link list.
>
> May be better alternative is to convert the whole program to link list
> of insns with branch targets becoming pointers and insert patched
> insns into this single singly linked list ?

I think converting to a singly-linked list is a good, simple and
straightforward approach. The only downside is 3x more memory (insn +
next pointer + branch pointer for instructions that branch/call), but
it shouldn't be a big deal in practice. But it seems like a good
opportunity to simplify and clean up patching code, so I'd definitely
vote to start with this approach.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-14 15:13         ` Jiong Wang
  2019-06-14 17:05           ` Alexei Starovoitov
@ 2019-06-17 19:47           ` Edward Cree
  2019-06-17 19:59             ` Jiong Wang
  1 sibling, 1 reply; 17+ messages in thread
From: Edward Cree @ 2019-06-17 19:47 UTC (permalink / raw)
  To: Jiong Wang, Alexei Starovoitov
  Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
	Michael Ellerman, Jakub Kicinski

On 14/06/2019 16:13, Jiong Wang wrote:
> Just an update and keep people posted.
>
> Working on linked list based approach, the implementation looks like the
> following, mostly a combine of discussions happened and Naveen's patch,
> please feel free to comment.
>
>   - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>     BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>
>   - Introduce patch pool into bpf_prog->aux to keep all patched insns.
It's not clear to me what the point of the patch pool is, rather than just
 doing the patch straight away.  Idea is that when prog is half-patched,
 insn idxs / jump offsets / etc. all refer to the original locations, only
 some of those might now be a list rather than a single insn.  Concretely:

struct bpf_insn_list { bpf_insn insn; struct list_head list; }
orig prog: array bpf_insn[3] = {cond jump +1, insn_foo, exit};
start of day verifier converts this into array bpf_insn_list[3],
 each with new.insn = orig.insn; INIT_LIST_HEAD(new.list)

verifier decides to patch insn_foo into {insn_bar, insn_baz}.
so we allocate bpf_insn_list *x;
insn_foo.insn = insn_bar;
x->insn = insn_baz;
list_add_tail(&x->list, &insn_foo.list);

the cond jump +1 is _not_ changed at this point, because it counts
 bpf_insn_lists and not insns.

Then at end of day to linearise we just have int new_idx[3];
 populate it by iterating over the array of bpf_insn_list and adding on
 the list length each time (so we get {0, 1, 3})
 then allocate output prog of the total length (here 4, calculated by
 that same pass as effectively off-the-end entry of new_idx)
 then iterate again to write out the output prog, when we see that 'cond
 jump +1' in old_idx 0 we see that (new_idx[2] - new_idx[0] - 1) = 2, so
 it becomes 'cond jump +2'.


This seems to me like it'd be easier to work with than making the whole
 program one big linked list (where you have to separately keep track of
 what each insn's idx was before patching).  But I haven't tried to code
 it yet, so anything could happen ;-)  It does rely on the assumption
 that only original insns (or the first insn of a list they get patched
 with) will ever be jump targets or otherwise need their insn_idx taken.

-Ed

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 19:47           ` Edward Cree
@ 2019-06-17 19:59             ` Jiong Wang
  2019-06-17 20:11               ` Edward Cree
  0 siblings, 1 reply; 17+ messages in thread
From: Jiong Wang @ 2019-06-17 19:59 UTC (permalink / raw)
  To: Edward Cree
  Cc: Jiong Wang, Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann,
	bpf, Network Development, Michael Ellerman, Jakub Kicinski


Edward Cree writes:

> On 14/06/2019 16:13, Jiong Wang wrote:
>> Just an update and keep people posted.
>>
>> Working on linked list based approach, the implementation looks like the
>> following, mostly a combine of discussions happened and Naveen's patch,
>> please feel free to comment.
>>
>>   - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>>     BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>>
>>   - Introduce patch pool into bpf_prog->aux to keep all patched insns.
> It's not clear to me what the point of the patch pool is, rather than just
>  doing the patch straight away. 

I used pool because I was thinking insn to be patched could be high
percentage, so doing lots of alloc call is going to be less efficient? so
allocate a big pool, and each time when creating new patch node, allocate
it from the pool directly. Node is addressed using pool_base + offset, each
node only need to keep offset.

> Idea is that when prog is half-patched,
>  insn idxs / jump offsets / etc. all refer to the original locations, only
>  some of those might now be a list rather than a single insn.  Concretely:
>
> struct bpf_insn_list { bpf_insn insn; struct list_head list; }
> orig prog: array bpf_insn[3] = {cond jump +1, insn_foo, exit};
> start of day verifier converts this into array bpf_insn_list[3],
>  each with new.insn = orig.insn; INIT_LIST_HEAD(new.list)
>
> verifier decides to patch insn_foo into {insn_bar, insn_baz}.
> so we allocate bpf_insn_list *x;
> insn_foo.insn = insn_bar;
> x->insn = insn_baz;
> list_add_tail(&x->list, &insn_foo.list);
>
> the cond jump +1 is _not_ changed at this point, because it counts
>  bpf_insn_lists and not insns.
>
> Then at end of day to linearise we just have int new_idx[3];
>  populate it by iterating over the array of bpf_insn_list and adding on
>  the list length each time (so we get {0, 1, 3})
>  then allocate output prog of the total length (here 4, calculated by
>  that same pass as effectively off-the-end entry of new_idx)
>  then iterate again to write out the output prog, when we see that 'cond
>  jump +1' in old_idx 0 we see that (new_idx[2] - new_idx[0] - 1) = 2, so
>  it becomes 'cond jump +2'.
>
>
> This seems to me like it'd be easier to work with than making the whole
>  program one big linked list (where you have to separately keep track of
>  what each insn's idx was before patching).  But I haven't tried to code
>  it yet, so anything could happen ;-)  It does rely on the assumption
>  that only original insns (or the first insn of a list they get patched
>  with) will ever be jump targets or otherwise need their insn_idx taken.
>
> -Ed


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 19:59             ` Jiong Wang
@ 2019-06-17 20:11               ` Edward Cree
  2019-06-17 20:40                 ` Jiong Wang
  0 siblings, 1 reply; 17+ messages in thread
From: Edward Cree @ 2019-06-17 20:11 UTC (permalink / raw)
  To: Jiong Wang
  Cc: Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann, bpf,
	Network Development, Michael Ellerman, Jakub Kicinski

On 17/06/2019 20:59, Jiong Wang wrote:
> Edward Cree writes:
>
>> On 14/06/2019 16:13, Jiong Wang wrote:
>>> Just an update and keep people posted.
>>>
>>> Working on linked list based approach, the implementation looks like the
>>> following, mostly a combine of discussions happened and Naveen's patch,
>>> please feel free to comment.
>>>
>>>   - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>>>     BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>>>
>>>   - Introduce patch pool into bpf_prog->aux to keep all patched insns.
>> It's not clear to me what the point of the patch pool is, rather than just
>>  doing the patch straight away. 
> I used pool because I was thinking insn to be patched could be high
> percentage, so doing lots of alloc call is going to be less efficient? so
> allocate a big pool, and each time when creating new patch node, allocate
> it from the pool directly. Node is addressed using pool_base + offset, each
> node only need to keep offset.
Good idea; but in that case it doesn't need to be a pool of patches (storing
 their prev and next), just a pool of insns.  I.e. struct bpf_insn pool[many];
 then in orig prog when patching an insn replace it with BPF_LIST_INSN.  If
 we later decide to patch an insn within a patch, we can replace it (i.e. the
 entry in bpf_insn_pool) with another BPF_LIST_INSN pointing to some later bit
 of the pool, then we just have a little bit of recursion at linearise time.
Does that work?

-Ed

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 20:11               ` Edward Cree
@ 2019-06-17 20:40                 ` Jiong Wang
  2019-06-17 20:53                   ` Alexei Starovoitov
  2019-06-17 21:16                   ` Edward Cree
  0 siblings, 2 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-17 20:40 UTC (permalink / raw)
  To: Edward Cree, Alexei Starovoitov, Andrii Nakryiko
  Cc: Jiong Wang, Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann,
	bpf, Network Development, Michael Ellerman, Jakub Kicinski


Edward Cree writes:

> On 17/06/2019 20:59, Jiong Wang wrote:
>> Edward Cree writes:
>>
>>> On 14/06/2019 16:13, Jiong Wang wrote:
>>>> Just an update and keep people posted.
>>>>
>>>> Working on linked list based approach, the implementation looks like the
>>>> following, mostly a combine of discussions happened and Naveen's patch,
>>>> please feel free to comment.
>>>>
>>>>   - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>>>>     BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>>>>
>>>>   - Introduce patch pool into bpf_prog->aux to keep all patched insns.
>>> It's not clear to me what the point of the patch pool is, rather than just
>>>  doing the patch straight away. 
>> I used pool because I was thinking insn to be patched could be high
>> percentage, so doing lots of alloc call is going to be less efficient? so
>> allocate a big pool, and each time when creating new patch node, allocate
>> it from the pool directly. Node is addressed using pool_base + offset, each
>> node only need to keep offset.
> Good idea; but in that case it doesn't need to be a pool of patches (storing
>  their prev and next), just a pool of insns.  I.e. struct bpf_insn pool[many];
>  then in orig prog when patching an insn replace it with BPF_LIST_INSN.  If
>  we later decide to patch an insn within a patch, we can replace it (i.e. the
>  entry in bpf_insn_pool) with another BPF_LIST_INSN pointing to some later bit
>  of the pool, then we just have a little bit of recursion at linearise time.
> Does that work?

I feel it is not going to work well. What I have proposed initially is
something similar, except when we are patching an insn within a patch (do
exist, for example zext insertion will apply to some patched alu insn
inserted in ctx convert pass), then we split the patch, this then makes the
data structures used in two shapes,

1. original prog->insnsi is still maintained as array.
2. if one insn is BPF_LIST_INSN, then it is a list, and could traverse it
using pool_base + insn.imm = list head.

But then there is data structure inconsistent, so now when doing insn
traversal we need something like:
   for (idx = 0; idx < insn_cnt; idx++) {
     if (insns[idx] is not BPF_LIST_INSN) {
       do_insn(...)
     }
     else if (insns[idx] is BPF_LIST_INSN) {
       list = pool_base + insn.imm;
       while (list) {
         insn = list_head->insn;
         do_insn(...)
         list = pool_base + list->next;
       }
     }
   }

Code logic inside convert_ctx_accesses/fixup_bpf_call etc needs to be
re-factored out into a separate function do_NNN so it could be called
in above logic.

Now if we don't split patch when patch an insn inside patch, instead, if we
replace the patched insn using what you suggested, then the logic looks to
me becomes even more complex, something like

   for (idx = 0; idx < insn_cnt; idx++) {
     if (insns[idx] is not BPF_LIST_INSN) {
       do_insn(...)
     }
     else if (insns[idx] is BPF_LIST_INSN) {
       list = pool_base + insn.imm;
       while (list) {
         insn = list_head->insn;
         if (insn is BF_LIST_INSN) {
           sub_list = ...
           while ()
             do_insn()
           continue;
         }
         do_insn(...)
         list = pool_base + list->next;
       }
     }
   }

So, I am thinking what Alexei and Andrii suggested make sense, just use
single data structure (singly linked list) to represent everything, so the
insn traversal etc could be simple, but I am considering it is better to
still base the list on top of the pool infrastructure mentioned?

I have somethingn like the following:

+struct bpf_list_insn {
+       struct bpf_insn insn;
+       u32 next;
+};

struct bpf_prog_aux {:

+       struct {
+               struct bpf_list_insn *pool;
+               u32 size;
+               u32 used;
+       };

Whenever you want to do intensive patching work you could call
bpf_patch_init to setup the pool, the setup including:
  1. convert the original prog->insnsi into a list of bpf_list_insn.
     next is index into the pool, so for those initial insnsi, they are
     1, 2, 3, 4, 5, ...

Then when doing patching, just allocate new slot from the pool, and link
them into the list.

When patching finished call bpf_patch_fini to do lineraize, could use the
same algo in Navee's patch.

After digest Alexei and Andrii's reply, I still don't see the need to turn
branch target into list, and I am not sure whether pool based list sound
good? it saves size, resize pool doesn't invalid allocated node (the offset
doesn't change) but requires one extra addition to calculate the pointer.

>
> -Ed


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 20:40                 ` Jiong Wang
@ 2019-06-17 20:53                   ` Alexei Starovoitov
  2019-06-17 21:01                     ` Jiong Wang
  2019-06-17 21:16                   ` Edward Cree
  1 sibling, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-17 20:53 UTC (permalink / raw)
  To: Jiong Wang
  Cc: Edward Cree, Alexei Starovoitov, Andrii Nakryiko, Naveen N. Rao,
	Daniel Borkmann, bpf, Network Development, Michael Ellerman,
	Jakub Kicinski

On Mon, Jun 17, 2019 at 1:40 PM Jiong Wang <jiong.wang@netronome.com> wrote:
>
> After digest Alexei and Andrii's reply, I still don't see the need to turn
> branch target into list, and I am not sure whether pool based list sound
> good? it saves size, resize pool doesn't invalid allocated node (the offset
> doesn't change) but requires one extra addition to calculate the pointer.

I don't think it worth to do a pool to accelerate kmalloc.
I doubt it will be faster either.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 20:53                   ` Alexei Starovoitov
@ 2019-06-17 21:01                     ` Jiong Wang
  0 siblings, 0 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-17 21:01 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jiong Wang, Edward Cree, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
	Michael Ellerman, Jakub Kicinski


Alexei Starovoitov writes:

> On Mon, Jun 17, 2019 at 1:40 PM Jiong Wang <jiong.wang@netronome.com> wrote:
>>
>> After digest Alexei and Andrii's reply, I still don't see the need to turn
>> branch target into list, and I am not sure whether pool based list sound
>> good? it saves size, resize pool doesn't invalid allocated node (the offset
>> doesn't change) but requires one extra addition to calculate the pointer.
>
> I don't think it worth to do a pool to accelerate kmalloc.

Got it.

> I doubt it will be faster either.

Will benchmark.

Regards,
Jiong

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 20:40                 ` Jiong Wang
  2019-06-17 20:53                   ` Alexei Starovoitov
@ 2019-06-17 21:16                   ` Edward Cree
  2019-06-19 20:45                     ` Jiong Wang
  1 sibling, 1 reply; 17+ messages in thread
From: Edward Cree @ 2019-06-17 21:16 UTC (permalink / raw)
  To: Jiong Wang, Alexei Starovoitov, Andrii Nakryiko
  Cc: Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann, bpf,
	Network Development, Michael Ellerman, Jakub Kicinski

On 17/06/2019 21:40, Jiong Wang wrote:
> Now if we don't split patch when patch an insn inside patch, instead, if we
> replace the patched insn using what you suggested, then the logic looks to
> me becomes even more complex, something like
>
>    for (idx = 0; idx < insn_cnt; idx++) {
>      if (insns[idx] is not BPF_LIST_INSN) {
>        do_insn(...)
>      }
>      else if (insns[idx] is BPF_LIST_INSN) {
>        list = pool_base + insn.imm;
>        while (list) {
>          insn = list_head->insn;
>          if (insn is BF_LIST_INSN) {
>            sub_list = ...
>            while ()
>              do_insn()
>            continue;
>          }
>          do_insn(...)
>          list = pool_base + list->next;
>        }
>      }
>    }
Why can't do_insn() just go like:
    if (insn is BPF_LIST_INSN)
        for (idx = 0; idx < LIST_COUNT(insn); idx++)
            do_insn(pool_base + LIST_START(insn) + idx);
    else
        rest of processing
?

Alternatively, iterate with something more sophisticated than 'idx++'
 (standard recursion-to-loop transformation).
You shouldn't ever need a for() tower statically in the code...

> So, I am thinking what Alexei and Andrii suggested make sense, just use
> single data structure (singly linked list) to represent everything, so the
> insn traversal etc could be simple
But then you have to also store orig_insn_idx with each insn, so you can
 calculate the new jump offsets when you linearise.  Having an array of
 patched_orig_insns gives you that for free.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bpf: optimize constant blinding
  2019-06-17 21:16                   ` Edward Cree
@ 2019-06-19 20:45                     ` Jiong Wang
  0 siblings, 0 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-19 20:45 UTC (permalink / raw)
  To: Edward Cree, Alexei Starovoitov, Andrii Nakryiko
  Cc: Jiong Wang, Naveen N. Rao, Daniel Borkmann, bpf,
	Network Development, Michael Ellerman, Jakub Kicinski


Edward Cree writes:

> On 17/06/2019 21:40, Jiong Wang wrote:
>> Now if we don't split patch when patch an insn inside patch, instead, if we
>> replace the patched insn using what you suggested, then the logic looks to
>> me becomes even more complex, something like
>>
>>    for (idx = 0; idx < insn_cnt; idx++) {
>>      if (insns[idx] is not BPF_LIST_INSN) {
>>        do_insn(...)
>>      }
>>      else if (insns[idx] is BPF_LIST_INSN) {
>>        list = pool_base + insn.imm;
>>        while (list) {
>>          insn = list_head->insn;
>>          if (insn is BF_LIST_INSN) {
>>            sub_list = ...
>>            while ()
>>              do_insn()
>>            continue;
>>          }
>>          do_insn(...)
>>          list = pool_base + list->next;
>>        }
>>      }
>>    }
> Why can't do_insn() just go like:
>     if (insn is BPF_LIST_INSN)
>         for (idx = 0; idx < LIST_COUNT(insn); idx++)
>             do_insn(pool_base + LIST_START(insn) + idx);
>     else
>         rest of processing
> ?
>
> Alternatively, iterate with something more sophisticated than 'idx++'
>  (standard recursion-to-loop transformation).
> You shouldn't ever need a for() tower statically in the code...

I don't think this changes things much, the point is we still have two data
structures for insns, array + list, so I fell you anyway need some tweak on
existing traverse code while using singly linked list incurs very little
changes, for example:

  for (i = 0; i < insn_cnt; i++, insn++) {

  =>

  for (elem = list; elem; elem = elem->next) {
    insn = elem->insn;

>> So, I am thinking what Alexei and Andrii suggested make sense, just use
>> single data structure (singly linked list) to represent everything, so the
>> insn traversal etc could be simple
> But then you have to also store orig_insn_idx with each insn, so you can
>  calculate the new jump offsets when you linearise.  Having an array of
>  patched_orig_insns gives you that for free.

For pool based list, you don't need to store orig_insn_idx, those orig ones
are guaranteed at the bottom of the pool, so just use index < orig_prog_len
then you could know it is the orig insn. And for both pool and non-pool
based list, the order of orig node in the list is the same as in array, so
it quite easy to calculate the orig index as a by-product inside insn copy
traverse, for non-pool base list, each node needs at least one bit to
indicate it is orig node. I also found when patching a patched buffer which
contains jmp insn is an issue (need to double check to see if there is such
case), because then we will need to record the jump destination index of
the jmp insn when it was inserted.

And some updates on my side, did some benchmarking on both pool and
non-pool based list.

Patching time (ns) benchmarked using JIT blinding
===

                    existing    non-pool      pool

"scale test 1"      no stop    ~0x1c600000  ~0x8800000
Bench(~3.4K insns)  ~0xc50000  ~0xf1000     ~6b000

(The non-pool means kmalloc a list node for each patch snippet, pool means
vmalloc a big chunk of mem and allocate node from it, node is located using
pool_base + index)

For "scale test 1" which contains ~1M JIT blindable insns, using list based
infra for patching could reduce most of the patching time, and pool based
alloc only consumes 1/3 time of non-pool.

And for a normal program with reasonable size (~3.4K), pool based alloc
only consumes 1/30 time of exsisting infra, and 1/2.3 of non-pool.

On the other hand, non-pool based implementation is cleaner and less error
prone than pool based implementation.

And for both pool and non-pool, I got the following kernel warning when
running "scale test 11" (perhaps needs 3M * 16 ~= 48M mem)

[   92.319792] WARNING: CPU: 6 PID: 2322 at mm/page_alloc.c:4639 __alloc_pages_nodemask+0x29e/0x330

I am finishing aux adjust and code delete support.

Regards,
Jiong

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-06-19 20:45 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-12 11:32 [PATCH] bpf: optimize constant blinding Naveen N. Rao
2019-06-12 14:47 ` Alexei Starovoitov
2019-06-12 15:04   ` Jiong Wang
2019-06-12 15:25     ` Jiong Wang
2019-06-12 15:28       ` Alexei Starovoitov
2019-06-14 15:13         ` Jiong Wang
2019-06-14 17:05           ` Alexei Starovoitov
2019-06-14 22:28             ` Andrii Nakryiko
2019-06-17 19:47           ` Edward Cree
2019-06-17 19:59             ` Jiong Wang
2019-06-17 20:11               ` Edward Cree
2019-06-17 20:40                 ` Jiong Wang
2019-06-17 20:53                   ` Alexei Starovoitov
2019-06-17 21:01                     ` Jiong Wang
2019-06-17 21:16                   ` Edward Cree
2019-06-19 20:45                     ` Jiong Wang
2019-06-14  4:30 ` kbuild test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.