* [PATCH] bpf: optimize constant blinding
@ 2019-06-12 11:32 Naveen N. Rao
2019-06-12 14:47 ` Alexei Starovoitov
2019-06-14 4:30 ` kbuild test robot
0 siblings, 2 replies; 17+ messages in thread
From: Naveen N. Rao @ 2019-06-12 11:32 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann; +Cc: bpf, netdev, Michael Ellerman
Currently, for constant blinding, we re-allocate the bpf program to
account for its new size and adjust all branches to accommodate the
same, for each BPF instruction that needs constant blinding. This is
inefficient and can lead to soft lockup with sufficiently large
programs, such as the new verifier scalability test (ld_dw: xor
semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
Re-implement BPF constant blinding to instead be performed in a single
pass. We do a dry run to count the additional instructions, allocate bpf
program for the corresponding size and adjust branches at once.
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
kernel/bpf/core.c | 232 +++++++++++++++++++++++++++++++++-------------
1 file changed, 168 insertions(+), 64 deletions(-)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 33fb292f2e30..82338b5cd98d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -897,8 +897,16 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
struct bpf_insn *to_buff)
{
struct bpf_insn *to = to_buff;
- u32 imm_rnd = get_random_int();
- s16 off;
+ u32 imm_rnd = 0;
+
+ if (to_buff)
+ imm_rnd = get_random_int();
+
+#define COPY_BPF_INSN(insn) do { \
+ if (to_buff) \
+ *(to) = insn; \
+ (to)++; \
+ } while (0)
BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
@@ -926,7 +934,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
if (from->imm == 0 &&
(from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
- *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
+ COPY_BPF_INSN(BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg));
goto out;
}
@@ -940,9 +948,9 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case BPF_ALU | BPF_MOV | BPF_K:
case BPF_ALU | BPF_DIV | BPF_K:
case BPF_ALU | BPF_MOD | BPF_K:
- *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
- *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
+ COPY_BPF_INSN(BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+ COPY_BPF_INSN(BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX));
break;
case BPF_ALU64 | BPF_ADD | BPF_K:
@@ -954,9 +962,9 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case BPF_ALU64 | BPF_MOV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_K:
- *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
- *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX));
break;
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -970,13 +978,9 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSET | BPF_K:
- /* Accommodate for extra offset in case of a backjump. */
- off = from->off;
- if (off < 0)
- off -= 2;
- *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
- *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, from->off));
break;
case BPF_JMP32 | BPF_JEQ | BPF_K:
@@ -990,37 +994,36 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
- /* Accommodate for extra offset in case of a backjump. */
- off = from->off;
- if (off < 0)
- off -= 2;
- *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
- *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
- off);
+ COPY_BPF_INSN(BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+ COPY_BPF_INSN(BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
+ from->off));
break;
case BPF_LD | BPF_IMM | BPF_DW:
- *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
- *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
- *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm));
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32));
+ COPY_BPF_INSN(BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX));
break;
case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
- *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
- *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
+ COPY_BPF_INSN(BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm));
+ COPY_BPF_INSN(BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX));
break;
case BPF_ST | BPF_MEM | BPF_DW:
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
- *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
- *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
- *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm));
+ COPY_BPF_INSN(BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd));
+ COPY_BPF_INSN(BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off));
break;
}
+
+#undef COPY_BPF_INSN
+
out:
return to - to_buff;
}
@@ -1065,57 +1068,158 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
bpf_prog_clone_free(fp_other);
}
+static int bpf_jit_blind_adj_imm_off(struct bpf_insn *insn, int prog_index,
+ int clone_index, int blnd[])
+{
+ const s64 imm_min = S32_MIN, imm_max = S32_MAX;
+ const s32 off_min = S16_MIN, off_max = S16_MAX;
+ u8 code = insn->code;
+ s64 imm;
+ s32 off;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_EXIT)
+ return 0;
+
+ /* Adjust offset of jmps if we cross patch boundaries. */
+ if (BPF_OP(code) == BPF_CALL) {
+ if (insn->src_reg != BPF_PSEUDO_CALL)
+ return 0;
+ imm = blnd[prog_index + insn->imm + 1] - clone_index - 1;
+ if (imm < imm_min || imm > imm_max)
+ return -ERANGE;
+ insn->imm = imm;
+ } else {
+ off = blnd[prog_index + insn->off + 1] - clone_index - 1;
+ if (off < off_min || off > off_max)
+ return -ERANGE;
+ insn->off = off;
+ }
+
+ return 0;
+}
+
struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
{
+ int i, j, rewritten, new_len, insn_cnt, ret = 0;
struct bpf_insn insn_buff[16], aux[2];
struct bpf_prog *clone, *tmp;
- int insn_delta, insn_cnt;
struct bpf_insn *insn;
- int i, rewritten;
+ u32 *clone_index;
if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
return prog;
- clone = bpf_prog_clone_create(prog, GFP_USER);
- if (!clone)
+ /* Dry run to figure out the final number of instructions */
+ clone_index = vmalloc(prog->len * sizeof(u32));
+ if (!clone_index)
return ERR_PTR(-ENOMEM);
- insn_cnt = clone->len;
- insn = clone->insnsi;
-
+ insn_cnt = prog->len;
+ insn = prog->insnsi;
+ rewritten = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
- /* We temporarily need to hold the original ld64 insn
- * so that we can still access the first part in the
- * second blinding run.
- */
- if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
- insn[1].code == 0)
- memcpy(aux, insn, sizeof(aux));
+ clone_index[i] = bpf_jit_blind_insn(insn, 0, 0);
+ if (clone_index[i] > 1)
+ rewritten += clone_index[i] - 1;
+ }
- rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
- if (!rewritten)
- continue;
+ if (rewritten) {
+ /* Needs new allocation, branch adjustment, et al... */
+ clone = bpf_prog_clone_create(prog, GFP_USER);
+ if (!clone) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ new_len = prog->len + rewritten;
+ tmp = bpf_prog_realloc(clone, bpf_prog_size(new_len), GFP_USER);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ clone = tmp;
+ clone->len = new_len;
+
+ /* rewrite instructions with constant blinding */
+ insn_cnt = prog->len;
+ insn = prog->insnsi;
+ for (i = 0, j = 0; i < insn_cnt; i++, j++, insn++) {
+ /* capture new instruction index in clone_index */
+ clone_index[i] = j;
- tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
- if (IS_ERR(tmp)) {
- /* Patching may have repointed aux->prog during
- * realloc from the original one, so we need to
- * fix it up here on error.
+ /* We temporarily need to hold the original ld64 insn
+ * so that we can still access the first part in the
+ * second blinding run.
*/
- bpf_jit_prog_release_other(prog, clone);
- return tmp;
+ if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ insn[1].code == 0)
+ memcpy(aux, insn, sizeof(aux));
+
+ rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
+ if (!rewritten) {
+ memcpy(clone->insnsi + j, insn,
+ sizeof(struct bpf_insn));
+ } else {
+ memcpy(clone->insnsi + j, insn_buff,
+ sizeof(struct bpf_insn) * rewritten);
+ j += rewritten - 1;
+ }
}
- clone = tmp;
- insn_delta = rewritten - 1;
+ /* adjust branches */
+ for (i = 0; i < insn_cnt; i++) {
+ int next_insn_idx = clone->len;
+
+ if (i < insn_cnt - 1)
+ next_insn_idx = clone_index[i + 1];
+
+ insn = clone->insnsi + clone_index[i];
+ for (j = clone_index[i]; j < next_insn_idx; j++, insn++) {
+ ret = bpf_jit_blind_adj_imm_off(insn, i, j, clone_index);
+ if (ret) {
+ goto err;
+ }
+ }
+ }
+
+ /* adjust linfo */
+ if (clone->aux->nr_linfo) {
+ struct bpf_line_info *linfo = clone->aux->linfo;
- /* Walk new program and skip insns we just inserted. */
- insn = clone->insnsi + i + insn_delta;
- insn_cnt += insn_delta;
- i += insn_delta;
+ for (i = 0; i < clone->aux->nr_linfo; i++)
+ linfo[i].insn_off = clone_index[linfo[i].insn_off];
+ }
+ } else {
+ /* if prog length remains same, not much work to do */
+ clone = bpf_prog_clone_create(prog, GFP_USER);
+ if (!clone) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ insn_cnt = clone->len;
+ insn = clone->insnsi;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (clone_index[i]) {
+ bpf_jit_blind_insn(insn, aux, insn_buff);
+ memcpy(insn, insn_buff, sizeof(struct bpf_insn));
+ }
+ }
}
clone->blinded = 1;
+
+err:
+ vfree(clone_index);
+
+ if (ret) {
+ if (clone)
+ bpf_jit_prog_release_other(prog, clone);
+ return ERR_PTR(ret);
+ }
+
return clone;
}
#endif /* CONFIG_BPF_JIT */
--
2.21.0
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-12 11:32 [PATCH] bpf: optimize constant blinding Naveen N. Rao
@ 2019-06-12 14:47 ` Alexei Starovoitov
2019-06-12 15:04 ` Jiong Wang
2019-06-14 4:30 ` kbuild test robot
1 sibling, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-12 14:47 UTC (permalink / raw)
To: Naveen N. Rao
Cc: Daniel Borkmann, bpf, Network Development, Michael Ellerman,
Jiong Wang, Jakub Kicinski
On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
<naveen.n.rao@linux.vnet.ibm.com> wrote:
>
> Currently, for constant blinding, we re-allocate the bpf program to
> account for its new size and adjust all branches to accommodate the
> same, for each BPF instruction that needs constant blinding. This is
> inefficient and can lead to soft lockup with sufficiently large
> programs, such as the new verifier scalability test (ld_dw: xor
> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
Slowdown you see is due to patch_insn right?
In such case I prefer to fix the scaling issue of patch_insn instead.
This specific fix for blinding only is not addressing the core of the problem.
Jiong,
how is the progress on fixing patch_insn?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-12 14:47 ` Alexei Starovoitov
@ 2019-06-12 15:04 ` Jiong Wang
2019-06-12 15:25 ` Jiong Wang
0 siblings, 1 reply; 17+ messages in thread
From: Jiong Wang @ 2019-06-12 15:04 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
Michael Ellerman, Jiong Wang, Jakub Kicinski
Alexei Starovoitov writes:
> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> <naveen.n.rao@linux.vnet.ibm.com> wrote:
>>
>> Currently, for constant blinding, we re-allocate the bpf program to
>> account for its new size and adjust all branches to accommodate the
>> same, for each BPF instruction that needs constant blinding. This is
>> inefficient and can lead to soft lockup with sufficiently large
>> programs, such as the new verifier scalability test (ld_dw: xor
>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
>
> Slowdown you see is due to patch_insn right?
> In such case I prefer to fix the scaling issue of patch_insn instead.
> This specific fix for blinding only is not addressing the core of the problem.
> Jiong,
> how is the progress on fixing patch_insn?
I actually was about to reply this email as we have discussed exactly the
same issue on jit blinding here:
https://www.spinics.net/lists/bpf/msg01836.html
And sorry for the slow progress on fixing patch_insn, please give me one
more week, I will try to send out a RFC for it.
Regards,
Jiong
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-12 15:04 ` Jiong Wang
@ 2019-06-12 15:25 ` Jiong Wang
2019-06-12 15:28 ` Alexei Starovoitov
0 siblings, 1 reply; 17+ messages in thread
From: Jiong Wang @ 2019-06-12 15:25 UTC (permalink / raw)
To: Jiong Wang
Cc: Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann, bpf,
Network Development, Michael Ellerman, Jakub Kicinski
Jiong Wang writes:
> Alexei Starovoitov writes:
>
>> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
>> <naveen.n.rao@linux.vnet.ibm.com> wrote:
>>>
>>> Currently, for constant blinding, we re-allocate the bpf program to
>>> account for its new size and adjust all branches to accommodate the
>>> same, for each BPF instruction that needs constant blinding. This is
>>> inefficient and can lead to soft lockup with sufficiently large
>>> programs, such as the new verifier scalability test (ld_dw: xor
>>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
>>
>> Slowdown you see is due to patch_insn right?
>> In such case I prefer to fix the scaling issue of patch_insn instead.
>> This specific fix for blinding only is not addressing the core of the problem.
>> Jiong,
>> how is the progress on fixing patch_insn?
And what I have done is I have digested your conversion with Edward, and is
slightly incline to the BB based approach as it also exposes the inserted
insn to later pass in a natural way, then was trying to find a way that
could create BB info in little extra code based on current verifier code,
for example as a side effect of check_subprogs which is doing two insn
traversal already. (I had some such code before in the historical
wip/bpf-loop-detection branch, but feel it might be still too heavy for
just improving insn patching)
>
> I actually was about to reply this email as we have discussed exactly the
> same issue on jit blinding here:
>
> https://www.spinics.net/lists/bpf/msg01836.html
>
> And sorry for the slow progress on fixing patch_insn, please give me one
> more week, I will try to send out a RFC for it.
>
> Regards,
> Jiong
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-12 15:25 ` Jiong Wang
@ 2019-06-12 15:28 ` Alexei Starovoitov
2019-06-14 15:13 ` Jiong Wang
0 siblings, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-12 15:28 UTC (permalink / raw)
To: Jiong Wang
Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
Michael Ellerman, Jakub Kicinski
On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
>
>
> Jiong Wang writes:
>
> > Alexei Starovoitov writes:
> >
> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
> >>>
> >>> Currently, for constant blinding, we re-allocate the bpf program to
> >>> account for its new size and adjust all branches to accommodate the
> >>> same, for each BPF instruction that needs constant blinding. This is
> >>> inefficient and can lead to soft lockup with sufficiently large
> >>> programs, such as the new verifier scalability test (ld_dw: xor
> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
> >>
> >> Slowdown you see is due to patch_insn right?
> >> In such case I prefer to fix the scaling issue of patch_insn instead.
> >> This specific fix for blinding only is not addressing the core of the problem.
> >> Jiong,
> >> how is the progress on fixing patch_insn?
>
> And what I have done is I have digested your conversion with Edward, and is
> slightly incline to the BB based approach as it also exposes the inserted
> insn to later pass in a natural way, then was trying to find a way that
> could create BB info in little extra code based on current verifier code,
> for example as a side effect of check_subprogs which is doing two insn
> traversal already. (I had some such code before in the historical
> wip/bpf-loop-detection branch, but feel it might be still too heavy for
> just improving insn patching)
BB - basic block?
I'm not sure that was necessary.
The idea was that patching is adding stuff to linked list instead
and single pass at the end to linearize it.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-12 15:28 ` Alexei Starovoitov
@ 2019-06-14 15:13 ` Jiong Wang
2019-06-14 17:05 ` Alexei Starovoitov
2019-06-17 19:47 ` Edward Cree
0 siblings, 2 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-14 15:13 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Jiong Wang, Naveen N. Rao, Daniel Borkmann, bpf,
Network Development, Michael Ellerman, Jakub Kicinski
Alexei Starovoitov writes:
> On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
>>
>>
>> Jiong Wang writes:
>>
>> > Alexei Starovoitov writes:
>> >
>> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
>> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
>> >>>
>> >>> Currently, for constant blinding, we re-allocate the bpf program to
>> >>> account for its new size and adjust all branches to accommodate the
>> >>> same, for each BPF instruction that needs constant blinding. This is
>> >>> inefficient and can lead to soft lockup with sufficiently large
>> >>> programs, such as the new verifier scalability test (ld_dw: xor
>> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
>> >>
>> >> Slowdown you see is due to patch_insn right?
>> >> In such case I prefer to fix the scaling issue of patch_insn instead.
>> >> This specific fix for blinding only is not addressing the core of the problem.
>> >> Jiong,
>> >> how is the progress on fixing patch_insn?
>>
>> And what I have done is I have digested your conversion with Edward, and is
>> slightly incline to the BB based approach as it also exposes the inserted
>> insn to later pass in a natural way, then was trying to find a way that
>> could create BB info in little extra code based on current verifier code,
>> for example as a side effect of check_subprogs which is doing two insn
>> traversal already. (I had some such code before in the historical
>> wip/bpf-loop-detection branch, but feel it might be still too heavy for
>> just improving insn patching)
>
> BB - basic block?
> I'm not sure that was necessary.
> The idea was that patching is adding stuff to linked list instead
> and single pass at the end to linearize it.
Just an update and keep people posted.
Working on linked list based approach, the implementation looks like the
following, mostly a combine of discussions happened and Naveen's patch,
please feel free to comment.
- Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
- Introduce patch pool into bpf_prog->aux to keep all patched insns.
Pool structure looks like:
struct {
int num;
int prev;
int next;
} head_0;
NUM patched insns for head_0
head_1;
patched insns for head_1
head_2;
...
- Now when doing bpf_patch_insn_single, it doesn't change the original
prog etc, instead, it merely update the insn at patched offset into a
BPF_LIST_INSN, and pushed the patched insns plus a patch header into
the patch pool. Fields of BPF_LIST_INSN is updated to setup the links:
BPF_LIST_INSN.off += patched_size
(accumulating the size attached to this list_insn, it is possible a
later patch pass patches insn in the patch pool, this means insn
traversal needs to be changed, when seeing BPF_LIST_INSN, should go
through the list)
BPF_LIST_INSN.imm = offset of the patch header in patch pool
(off is 16-bit, imm is 32-bit, the patch pool is 32-bit length, so
use imm for keeping offset, meaning a BPF_LIST_INSN can contains no
more than 8192 insns, guess it is enough)
- When doing linearize:
1. a quick scan of prog->insnsi to know the final
image size, would be simple as:
fini_size = 0;
for_each_insn:
if (insn.code == (BPF_ALU | BPF_LIST_HEAD))
fini_size += insn->off;
else
fini_size++;
2. Resize prog into fini_size, and a second scan of prog->insnsi to
copy over all insns and patched insns, at the same time generate a
auxiliary index array which maps an old index to the new index in
final image, like the "clone_index" in Naveen's patch.
3. Finally, a single pass to update branch target, the same algo used
by this patch.
- The APIs for doing insning patch looks like:
bpf_patch_insn_init: init the generic patch pool.
bpf_patch_insn_single: push patched insns to the pool.
link them to the associated BPF_LIST_INSN.
bpf_patch_insn_fini: linearize a bpf_prog contains BPF_LIST_INSN.
destroy patch pool in prog->aux.
I am trying to making the implementation working with jit blind first to make
sure basic things are ready. As JIT blinds happens after verification so no
need to both aux update etc. Then will cleanup quite a few things for
example patch a patched insn, adjust aux data, what to do with insn delete
etc.
Regards,
Jiong
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-14 15:13 ` Jiong Wang
@ 2019-06-14 17:05 ` Alexei Starovoitov
2019-06-14 22:28 ` Andrii Nakryiko
2019-06-17 19:47 ` Edward Cree
1 sibling, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-14 17:05 UTC (permalink / raw)
To: Jiong Wang
Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
Michael Ellerman, Jakub Kicinski
On Fri, Jun 14, 2019 at 8:13 AM Jiong Wang <jiong.wang@netronome.com> wrote:
>
>
> Alexei Starovoitov writes:
>
> > On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
> >>
> >>
> >> Jiong Wang writes:
> >>
> >> > Alexei Starovoitov writes:
> >> >
> >> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> >> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
> >> >>>
> >> >>> Currently, for constant blinding, we re-allocate the bpf program to
> >> >>> account for its new size and adjust all branches to accommodate the
> >> >>> same, for each BPF instruction that needs constant blinding. This is
> >> >>> inefficient and can lead to soft lockup with sufficiently large
> >> >>> programs, such as the new verifier scalability test (ld_dw: xor
> >> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
> >> >>
> >> >> Slowdown you see is due to patch_insn right?
> >> >> In such case I prefer to fix the scaling issue of patch_insn instead.
> >> >> This specific fix for blinding only is not addressing the core of the problem.
> >> >> Jiong,
> >> >> how is the progress on fixing patch_insn?
> >>
> >> And what I have done is I have digested your conversion with Edward, and is
> >> slightly incline to the BB based approach as it also exposes the inserted
> >> insn to later pass in a natural way, then was trying to find a way that
> >> could create BB info in little extra code based on current verifier code,
> >> for example as a side effect of check_subprogs which is doing two insn
> >> traversal already. (I had some such code before in the historical
> >> wip/bpf-loop-detection branch, but feel it might be still too heavy for
> >> just improving insn patching)
> >
> > BB - basic block?
> > I'm not sure that was necessary.
> > The idea was that patching is adding stuff to linked list instead
> > and single pass at the end to linearize it.
>
> Just an update and keep people posted.
>
> Working on linked list based approach, the implementation looks like the
> following, mostly a combine of discussions happened and Naveen's patch,
> please feel free to comment.
>
> - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
> BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>
> - Introduce patch pool into bpf_prog->aux to keep all patched insns.
> Pool structure looks like:
>
> struct {
> int num;
> int prev;
> int next;
> } head_0;
> NUM patched insns for head_0
> head_1;
> patched insns for head_1
> head_2;
> ...
>
> - Now when doing bpf_patch_insn_single, it doesn't change the original
> prog etc, instead, it merely update the insn at patched offset into a
> BPF_LIST_INSN, and pushed the patched insns plus a patch header into
> the patch pool. Fields of BPF_LIST_INSN is updated to setup the links:
>
> BPF_LIST_INSN.off += patched_size
> (accumulating the size attached to this list_insn, it is possible a
> later patch pass patches insn in the patch pool, this means insn
> traversal needs to be changed, when seeing BPF_LIST_INSN, should go
> through the list)
>
> BPF_LIST_INSN.imm = offset of the patch header in patch pool
> (off is 16-bit, imm is 32-bit, the patch pool is 32-bit length, so
> use imm for keeping offset, meaning a BPF_LIST_INSN can contains no
> more than 8192 insns, guess it is enough)
>
> - When doing linearize:
> 1. a quick scan of prog->insnsi to know the final
> image size, would be simple as:
>
> fini_size = 0;
> for_each_insn:
> if (insn.code == (BPF_ALU | BPF_LIST_HEAD))
> fini_size += insn->off;
> else
> fini_size++;
>
> 2. Resize prog into fini_size, and a second scan of prog->insnsi to
> copy over all insns and patched insns, at the same time generate a
> auxiliary index array which maps an old index to the new index in
> final image, like the "clone_index" in Naveen's patch.
>
> 3. Finally, a single pass to update branch target, the same algo used
> by this patch.
>
> - The APIs for doing insning patch looks like:
> bpf_patch_insn_init: init the generic patch pool.
> bpf_patch_insn_single: push patched insns to the pool.
> link them to the associated BPF_LIST_INSN.
> bpf_patch_insn_fini: linearize a bpf_prog contains BPF_LIST_INSN.
> destroy patch pool in prog->aux.
>
> I am trying to making the implementation working with jit blind first to make
> sure basic things are ready. As JIT blinds happens after verification so no
> need to both aux update etc. Then will cleanup quite a few things for
> example patch a patched insn, adjust aux data, what to do with insn delete
> etc.
explicit indices feels like premature optimization.
May be use vanilla singly linked list instead?
Also do we have a case when patched insn will be patched again?
In such case 'patch insn pool' will become recursive?
Feels difficult to think through all offsets and indices.
Whereas with linked list patching patched insns will be inserting
them into link list.
May be better alternative is to convert the whole program to link list
of insns with branch targets becoming pointers and insert patched
insns into this single singly linked list ?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-14 17:05 ` Alexei Starovoitov
@ 2019-06-14 22:28 ` Andrii Nakryiko
0 siblings, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-06-14 22:28 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Jiong Wang, Naveen N. Rao, Daniel Borkmann, bpf,
Network Development, Michael Ellerman, Jakub Kicinski
On Fri, Jun 14, 2019 at 10:06 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Jun 14, 2019 at 8:13 AM Jiong Wang <jiong.wang@netronome.com> wrote:
> >
> >
> > Alexei Starovoitov writes:
> >
> > > On Wed, Jun 12, 2019 at 8:25 AM Jiong Wang <jiong.wang@netronome.com> wrote:
> > >>
> > >>
> > >> Jiong Wang writes:
> > >>
> > >> > Alexei Starovoitov writes:
> > >> >
> > >> >> On Wed, Jun 12, 2019 at 4:32 AM Naveen N. Rao
> > >> >> <naveen.n.rao@linux.vnet.ibm.com> wrote:
> > >> >>>
> > >> >>> Currently, for constant blinding, we re-allocate the bpf program to
> > >> >>> account for its new size and adjust all branches to accommodate the
> > >> >>> same, for each BPF instruction that needs constant blinding. This is
> > >> >>> inefficient and can lead to soft lockup with sufficiently large
> > >> >>> programs, such as the new verifier scalability test (ld_dw: xor
> > >> >>> semi-random 64 bit imms, test 5 -- with net.core.bpf_jit_harden=2)
> > >> >>
> > >> >> Slowdown you see is due to patch_insn right?
> > >> >> In such case I prefer to fix the scaling issue of patch_insn instead.
> > >> >> This specific fix for blinding only is not addressing the core of the problem.
> > >> >> Jiong,
> > >> >> how is the progress on fixing patch_insn?
> > >>
> > >> And what I have done is I have digested your conversion with Edward, and is
> > >> slightly incline to the BB based approach as it also exposes the inserted
> > >> insn to later pass in a natural way, then was trying to find a way that
> > >> could create BB info in little extra code based on current verifier code,
> > >> for example as a side effect of check_subprogs which is doing two insn
> > >> traversal already. (I had some such code before in the historical
> > >> wip/bpf-loop-detection branch, but feel it might be still too heavy for
> > >> just improving insn patching)
> > >
> > > BB - basic block?
> > > I'm not sure that was necessary.
> > > The idea was that patching is adding stuff to linked list instead
> > > and single pass at the end to linearize it.
> >
> > Just an update and keep people posted.
> >
> > Working on linked list based approach, the implementation looks like the
> > following, mostly a combine of discussions happened and Naveen's patch,
> > please feel free to comment.
> >
> > - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
> > BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
> >
> > - Introduce patch pool into bpf_prog->aux to keep all patched insns.
> > Pool structure looks like:
> >
> > struct {
> > int num;
> > int prev;
> > int next;
> > } head_0;
> > NUM patched insns for head_0
> > head_1;
> > patched insns for head_1
> > head_2;
> > ...
> >
> > - Now when doing bpf_patch_insn_single, it doesn't change the original
> > prog etc, instead, it merely update the insn at patched offset into a
> > BPF_LIST_INSN, and pushed the patched insns plus a patch header into
> > the patch pool. Fields of BPF_LIST_INSN is updated to setup the links:
> >
> > BPF_LIST_INSN.off += patched_size
> > (accumulating the size attached to this list_insn, it is possible a
> > later patch pass patches insn in the patch pool, this means insn
> > traversal needs to be changed, when seeing BPF_LIST_INSN, should go
> > through the list)
> >
> > BPF_LIST_INSN.imm = offset of the patch header in patch pool
> > (off is 16-bit, imm is 32-bit, the patch pool is 32-bit length, so
> > use imm for keeping offset, meaning a BPF_LIST_INSN can contains no
> > more than 8192 insns, guess it is enough)
> >
> > - When doing linearize:
> > 1. a quick scan of prog->insnsi to know the final
> > image size, would be simple as:
> >
> > fini_size = 0;
> > for_each_insn:
> > if (insn.code == (BPF_ALU | BPF_LIST_HEAD))
> > fini_size += insn->off;
> > else
> > fini_size++;
> >
> > 2. Resize prog into fini_size, and a second scan of prog->insnsi to
> > copy over all insns and patched insns, at the same time generate a
> > auxiliary index array which maps an old index to the new index in
> > final image, like the "clone_index" in Naveen's patch.
> >
> > 3. Finally, a single pass to update branch target, the same algo used
> > by this patch.
> >
> > - The APIs for doing insning patch looks like:
> > bpf_patch_insn_init: init the generic patch pool.
> > bpf_patch_insn_single: push patched insns to the pool.
> > link them to the associated BPF_LIST_INSN.
> > bpf_patch_insn_fini: linearize a bpf_prog contains BPF_LIST_INSN.
> > destroy patch pool in prog->aux.
> >
> > I am trying to making the implementation working with jit blind first to make
> > sure basic things are ready. As JIT blinds happens after verification so no
> > need to both aux update etc. Then will cleanup quite a few things for
> > example patch a patched insn, adjust aux data, what to do with insn delete
> > etc.
>
> explicit indices feels like premature optimization.
> May be use vanilla singly linked list instead?
> Also do we have a case when patched insn will be patched again?
> In such case 'patch insn pool' will become recursive?
> Feels difficult to think through all offsets and indices.
> Whereas with linked list patching patched insns will be inserting
> them into link list.
>
> May be better alternative is to convert the whole program to link list
> of insns with branch targets becoming pointers and insert patched
> insns into this single singly linked list ?
I think converting to a singly-linked list is a good, simple and
straightforward approach. The only downside is 3x more memory (insn +
next pointer + branch pointer for instructions that branch/call), but
it shouldn't be a big deal in practice. But it seems like a good
opportunity to simplify and clean up patching code, so I'd definitely
vote to start with this approach.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-14 15:13 ` Jiong Wang
2019-06-14 17:05 ` Alexei Starovoitov
@ 2019-06-17 19:47 ` Edward Cree
2019-06-17 19:59 ` Jiong Wang
1 sibling, 1 reply; 17+ messages in thread
From: Edward Cree @ 2019-06-17 19:47 UTC (permalink / raw)
To: Jiong Wang, Alexei Starovoitov
Cc: Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
Michael Ellerman, Jakub Kicinski
On 14/06/2019 16:13, Jiong Wang wrote:
> Just an update and keep people posted.
>
> Working on linked list based approach, the implementation looks like the
> following, mostly a combine of discussions happened and Naveen's patch,
> please feel free to comment.
>
> - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
> BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>
> - Introduce patch pool into bpf_prog->aux to keep all patched insns.
It's not clear to me what the point of the patch pool is, rather than just
doing the patch straight away. Idea is that when prog is half-patched,
insn idxs / jump offsets / etc. all refer to the original locations, only
some of those might now be a list rather than a single insn. Concretely:
struct bpf_insn_list { bpf_insn insn; struct list_head list; }
orig prog: array bpf_insn[3] = {cond jump +1, insn_foo, exit};
start of day verifier converts this into array bpf_insn_list[3],
each with new.insn = orig.insn; INIT_LIST_HEAD(new.list)
verifier decides to patch insn_foo into {insn_bar, insn_baz}.
so we allocate bpf_insn_list *x;
insn_foo.insn = insn_bar;
x->insn = insn_baz;
list_add_tail(&x->list, &insn_foo.list);
the cond jump +1 is _not_ changed at this point, because it counts
bpf_insn_lists and not insns.
Then at end of day to linearise we just have int new_idx[3];
populate it by iterating over the array of bpf_insn_list and adding on
the list length each time (so we get {0, 1, 3})
then allocate output prog of the total length (here 4, calculated by
that same pass as effectively off-the-end entry of new_idx)
then iterate again to write out the output prog, when we see that 'cond
jump +1' in old_idx 0 we see that (new_idx[2] - new_idx[0] - 1) = 2, so
it becomes 'cond jump +2'.
This seems to me like it'd be easier to work with than making the whole
program one big linked list (where you have to separately keep track of
what each insn's idx was before patching). But I haven't tried to code
it yet, so anything could happen ;-) It does rely on the assumption
that only original insns (or the first insn of a list they get patched
with) will ever be jump targets or otherwise need their insn_idx taken.
-Ed
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 19:47 ` Edward Cree
@ 2019-06-17 19:59 ` Jiong Wang
2019-06-17 20:11 ` Edward Cree
0 siblings, 1 reply; 17+ messages in thread
From: Jiong Wang @ 2019-06-17 19:59 UTC (permalink / raw)
To: Edward Cree
Cc: Jiong Wang, Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann,
bpf, Network Development, Michael Ellerman, Jakub Kicinski
Edward Cree writes:
> On 14/06/2019 16:13, Jiong Wang wrote:
>> Just an update and keep people posted.
>>
>> Working on linked list based approach, the implementation looks like the
>> following, mostly a combine of discussions happened and Naveen's patch,
>> please feel free to comment.
>>
>> - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>> BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>>
>> - Introduce patch pool into bpf_prog->aux to keep all patched insns.
> It's not clear to me what the point of the patch pool is, rather than just
> doing the patch straight away.
I used pool because I was thinking insn to be patched could be high
percentage, so doing lots of alloc call is going to be less efficient? so
allocate a big pool, and each time when creating new patch node, allocate
it from the pool directly. Node is addressed using pool_base + offset, each
node only need to keep offset.
> Idea is that when prog is half-patched,
> insn idxs / jump offsets / etc. all refer to the original locations, only
> some of those might now be a list rather than a single insn. Concretely:
>
> struct bpf_insn_list { bpf_insn insn; struct list_head list; }
> orig prog: array bpf_insn[3] = {cond jump +1, insn_foo, exit};
> start of day verifier converts this into array bpf_insn_list[3],
> each with new.insn = orig.insn; INIT_LIST_HEAD(new.list)
>
> verifier decides to patch insn_foo into {insn_bar, insn_baz}.
> so we allocate bpf_insn_list *x;
> insn_foo.insn = insn_bar;
> x->insn = insn_baz;
> list_add_tail(&x->list, &insn_foo.list);
>
> the cond jump +1 is _not_ changed at this point, because it counts
> bpf_insn_lists and not insns.
>
> Then at end of day to linearise we just have int new_idx[3];
> populate it by iterating over the array of bpf_insn_list and adding on
> the list length each time (so we get {0, 1, 3})
> then allocate output prog of the total length (here 4, calculated by
> that same pass as effectively off-the-end entry of new_idx)
> then iterate again to write out the output prog, when we see that 'cond
> jump +1' in old_idx 0 we see that (new_idx[2] - new_idx[0] - 1) = 2, so
> it becomes 'cond jump +2'.
>
>
> This seems to me like it'd be easier to work with than making the whole
> program one big linked list (where you have to separately keep track of
> what each insn's idx was before patching). But I haven't tried to code
> it yet, so anything could happen ;-) It does rely on the assumption
> that only original insns (or the first insn of a list they get patched
> with) will ever be jump targets or otherwise need their insn_idx taken.
>
> -Ed
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 19:59 ` Jiong Wang
@ 2019-06-17 20:11 ` Edward Cree
2019-06-17 20:40 ` Jiong Wang
0 siblings, 1 reply; 17+ messages in thread
From: Edward Cree @ 2019-06-17 20:11 UTC (permalink / raw)
To: Jiong Wang
Cc: Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann, bpf,
Network Development, Michael Ellerman, Jakub Kicinski
On 17/06/2019 20:59, Jiong Wang wrote:
> Edward Cree writes:
>
>> On 14/06/2019 16:13, Jiong Wang wrote:
>>> Just an update and keep people posted.
>>>
>>> Working on linked list based approach, the implementation looks like the
>>> following, mostly a combine of discussions happened and Naveen's patch,
>>> please feel free to comment.
>>>
>>> - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>>> BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>>>
>>> - Introduce patch pool into bpf_prog->aux to keep all patched insns.
>> It's not clear to me what the point of the patch pool is, rather than just
>> doing the patch straight away.
> I used pool because I was thinking insn to be patched could be high
> percentage, so doing lots of alloc call is going to be less efficient? so
> allocate a big pool, and each time when creating new patch node, allocate
> it from the pool directly. Node is addressed using pool_base + offset, each
> node only need to keep offset.
Good idea; but in that case it doesn't need to be a pool of patches (storing
their prev and next), just a pool of insns. I.e. struct bpf_insn pool[many];
then in orig prog when patching an insn replace it with BPF_LIST_INSN. If
we later decide to patch an insn within a patch, we can replace it (i.e. the
entry in bpf_insn_pool) with another BPF_LIST_INSN pointing to some later bit
of the pool, then we just have a little bit of recursion at linearise time.
Does that work?
-Ed
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 20:11 ` Edward Cree
@ 2019-06-17 20:40 ` Jiong Wang
2019-06-17 20:53 ` Alexei Starovoitov
2019-06-17 21:16 ` Edward Cree
0 siblings, 2 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-17 20:40 UTC (permalink / raw)
To: Edward Cree, Alexei Starovoitov, Andrii Nakryiko
Cc: Jiong Wang, Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann,
bpf, Network Development, Michael Ellerman, Jakub Kicinski
Edward Cree writes:
> On 17/06/2019 20:59, Jiong Wang wrote:
>> Edward Cree writes:
>>
>>> On 14/06/2019 16:13, Jiong Wang wrote:
>>>> Just an update and keep people posted.
>>>>
>>>> Working on linked list based approach, the implementation looks like the
>>>> following, mostly a combine of discussions happened and Naveen's patch,
>>>> please feel free to comment.
>>>>
>>>> - Use the reserved opcode 0xf0 with BPF_ALU as new pseudo insn code
>>>> BPF_LIST_INSN. (0xf0 is also used with BPF_JMP class for tail call).
>>>>
>>>> - Introduce patch pool into bpf_prog->aux to keep all patched insns.
>>> It's not clear to me what the point of the patch pool is, rather than just
>>> doing the patch straight away.
>> I used pool because I was thinking insn to be patched could be high
>> percentage, so doing lots of alloc call is going to be less efficient? so
>> allocate a big pool, and each time when creating new patch node, allocate
>> it from the pool directly. Node is addressed using pool_base + offset, each
>> node only need to keep offset.
> Good idea; but in that case it doesn't need to be a pool of patches (storing
> their prev and next), just a pool of insns. I.e. struct bpf_insn pool[many];
> then in orig prog when patching an insn replace it with BPF_LIST_INSN. If
> we later decide to patch an insn within a patch, we can replace it (i.e. the
> entry in bpf_insn_pool) with another BPF_LIST_INSN pointing to some later bit
> of the pool, then we just have a little bit of recursion at linearise time.
> Does that work?
I feel it is not going to work well. What I have proposed initially is
something similar, except when we are patching an insn within a patch (do
exist, for example zext insertion will apply to some patched alu insn
inserted in ctx convert pass), then we split the patch, this then makes the
data structures used in two shapes,
1. original prog->insnsi is still maintained as array.
2. if one insn is BPF_LIST_INSN, then it is a list, and could traverse it
using pool_base + insn.imm = list head.
But then there is data structure inconsistent, so now when doing insn
traversal we need something like:
for (idx = 0; idx < insn_cnt; idx++) {
if (insns[idx] is not BPF_LIST_INSN) {
do_insn(...)
}
else if (insns[idx] is BPF_LIST_INSN) {
list = pool_base + insn.imm;
while (list) {
insn = list_head->insn;
do_insn(...)
list = pool_base + list->next;
}
}
}
Code logic inside convert_ctx_accesses/fixup_bpf_call etc needs to be
re-factored out into a separate function do_NNN so it could be called
in above logic.
Now if we don't split patch when patch an insn inside patch, instead, if we
replace the patched insn using what you suggested, then the logic looks to
me becomes even more complex, something like
for (idx = 0; idx < insn_cnt; idx++) {
if (insns[idx] is not BPF_LIST_INSN) {
do_insn(...)
}
else if (insns[idx] is BPF_LIST_INSN) {
list = pool_base + insn.imm;
while (list) {
insn = list_head->insn;
if (insn is BF_LIST_INSN) {
sub_list = ...
while ()
do_insn()
continue;
}
do_insn(...)
list = pool_base + list->next;
}
}
}
So, I am thinking what Alexei and Andrii suggested make sense, just use
single data structure (singly linked list) to represent everything, so the
insn traversal etc could be simple, but I am considering it is better to
still base the list on top of the pool infrastructure mentioned?
I have somethingn like the following:
+struct bpf_list_insn {
+ struct bpf_insn insn;
+ u32 next;
+};
struct bpf_prog_aux {:
+ struct {
+ struct bpf_list_insn *pool;
+ u32 size;
+ u32 used;
+ };
Whenever you want to do intensive patching work you could call
bpf_patch_init to setup the pool, the setup including:
1. convert the original prog->insnsi into a list of bpf_list_insn.
next is index into the pool, so for those initial insnsi, they are
1, 2, 3, 4, 5, ...
Then when doing patching, just allocate new slot from the pool, and link
them into the list.
When patching finished call bpf_patch_fini to do lineraize, could use the
same algo in Navee's patch.
After digest Alexei and Andrii's reply, I still don't see the need to turn
branch target into list, and I am not sure whether pool based list sound
good? it saves size, resize pool doesn't invalid allocated node (the offset
doesn't change) but requires one extra addition to calculate the pointer.
>
> -Ed
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 20:40 ` Jiong Wang
@ 2019-06-17 20:53 ` Alexei Starovoitov
2019-06-17 21:01 ` Jiong Wang
2019-06-17 21:16 ` Edward Cree
1 sibling, 1 reply; 17+ messages in thread
From: Alexei Starovoitov @ 2019-06-17 20:53 UTC (permalink / raw)
To: Jiong Wang
Cc: Edward Cree, Alexei Starovoitov, Andrii Nakryiko, Naveen N. Rao,
Daniel Borkmann, bpf, Network Development, Michael Ellerman,
Jakub Kicinski
On Mon, Jun 17, 2019 at 1:40 PM Jiong Wang <jiong.wang@netronome.com> wrote:
>
> After digest Alexei and Andrii's reply, I still don't see the need to turn
> branch target into list, and I am not sure whether pool based list sound
> good? it saves size, resize pool doesn't invalid allocated node (the offset
> doesn't change) but requires one extra addition to calculate the pointer.
I don't think it worth to do a pool to accelerate kmalloc.
I doubt it will be faster either.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 20:53 ` Alexei Starovoitov
@ 2019-06-17 21:01 ` Jiong Wang
0 siblings, 0 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-17 21:01 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Jiong Wang, Edward Cree, Alexei Starovoitov, Andrii Nakryiko,
Naveen N. Rao, Daniel Borkmann, bpf, Network Development,
Michael Ellerman, Jakub Kicinski
Alexei Starovoitov writes:
> On Mon, Jun 17, 2019 at 1:40 PM Jiong Wang <jiong.wang@netronome.com> wrote:
>>
>> After digest Alexei and Andrii's reply, I still don't see the need to turn
>> branch target into list, and I am not sure whether pool based list sound
>> good? it saves size, resize pool doesn't invalid allocated node (the offset
>> doesn't change) but requires one extra addition to calculate the pointer.
>
> I don't think it worth to do a pool to accelerate kmalloc.
Got it.
> I doubt it will be faster either.
Will benchmark.
Regards,
Jiong
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 20:40 ` Jiong Wang
2019-06-17 20:53 ` Alexei Starovoitov
@ 2019-06-17 21:16 ` Edward Cree
2019-06-19 20:45 ` Jiong Wang
1 sibling, 1 reply; 17+ messages in thread
From: Edward Cree @ 2019-06-17 21:16 UTC (permalink / raw)
To: Jiong Wang, Alexei Starovoitov, Andrii Nakryiko
Cc: Alexei Starovoitov, Naveen N. Rao, Daniel Borkmann, bpf,
Network Development, Michael Ellerman, Jakub Kicinski
On 17/06/2019 21:40, Jiong Wang wrote:
> Now if we don't split patch when patch an insn inside patch, instead, if we
> replace the patched insn using what you suggested, then the logic looks to
> me becomes even more complex, something like
>
> for (idx = 0; idx < insn_cnt; idx++) {
> if (insns[idx] is not BPF_LIST_INSN) {
> do_insn(...)
> }
> else if (insns[idx] is BPF_LIST_INSN) {
> list = pool_base + insn.imm;
> while (list) {
> insn = list_head->insn;
> if (insn is BF_LIST_INSN) {
> sub_list = ...
> while ()
> do_insn()
> continue;
> }
> do_insn(...)
> list = pool_base + list->next;
> }
> }
> }
Why can't do_insn() just go like:
if (insn is BPF_LIST_INSN)
for (idx = 0; idx < LIST_COUNT(insn); idx++)
do_insn(pool_base + LIST_START(insn) + idx);
else
rest of processing
?
Alternatively, iterate with something more sophisticated than 'idx++'
(standard recursion-to-loop transformation).
You shouldn't ever need a for() tower statically in the code...
> So, I am thinking what Alexei and Andrii suggested make sense, just use
> single data structure (singly linked list) to represent everything, so the
> insn traversal etc could be simple
But then you have to also store orig_insn_idx with each insn, so you can
calculate the new jump offsets when you linearise. Having an array of
patched_orig_insns gives you that for free.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-17 21:16 ` Edward Cree
@ 2019-06-19 20:45 ` Jiong Wang
0 siblings, 0 replies; 17+ messages in thread
From: Jiong Wang @ 2019-06-19 20:45 UTC (permalink / raw)
To: Edward Cree, Alexei Starovoitov, Andrii Nakryiko
Cc: Jiong Wang, Naveen N. Rao, Daniel Borkmann, bpf,
Network Development, Michael Ellerman, Jakub Kicinski
Edward Cree writes:
> On 17/06/2019 21:40, Jiong Wang wrote:
>> Now if we don't split patch when patch an insn inside patch, instead, if we
>> replace the patched insn using what you suggested, then the logic looks to
>> me becomes even more complex, something like
>>
>> for (idx = 0; idx < insn_cnt; idx++) {
>> if (insns[idx] is not BPF_LIST_INSN) {
>> do_insn(...)
>> }
>> else if (insns[idx] is BPF_LIST_INSN) {
>> list = pool_base + insn.imm;
>> while (list) {
>> insn = list_head->insn;
>> if (insn is BF_LIST_INSN) {
>> sub_list = ...
>> while ()
>> do_insn()
>> continue;
>> }
>> do_insn(...)
>> list = pool_base + list->next;
>> }
>> }
>> }
> Why can't do_insn() just go like:
> if (insn is BPF_LIST_INSN)
> for (idx = 0; idx < LIST_COUNT(insn); idx++)
> do_insn(pool_base + LIST_START(insn) + idx);
> else
> rest of processing
> ?
>
> Alternatively, iterate with something more sophisticated than 'idx++'
> (standard recursion-to-loop transformation).
> You shouldn't ever need a for() tower statically in the code...
I don't think this changes things much, the point is we still have two data
structures for insns, array + list, so I fell you anyway need some tweak on
existing traverse code while using singly linked list incurs very little
changes, for example:
for (i = 0; i < insn_cnt; i++, insn++) {
=>
for (elem = list; elem; elem = elem->next) {
insn = elem->insn;
>> So, I am thinking what Alexei and Andrii suggested make sense, just use
>> single data structure (singly linked list) to represent everything, so the
>> insn traversal etc could be simple
> But then you have to also store orig_insn_idx with each insn, so you can
> calculate the new jump offsets when you linearise. Having an array of
> patched_orig_insns gives you that for free.
For pool based list, you don't need to store orig_insn_idx, those orig ones
are guaranteed at the bottom of the pool, so just use index < orig_prog_len
then you could know it is the orig insn. And for both pool and non-pool
based list, the order of orig node in the list is the same as in array, so
it quite easy to calculate the orig index as a by-product inside insn copy
traverse, for non-pool base list, each node needs at least one bit to
indicate it is orig node. I also found when patching a patched buffer which
contains jmp insn is an issue (need to double check to see if there is such
case), because then we will need to record the jump destination index of
the jmp insn when it was inserted.
And some updates on my side, did some benchmarking on both pool and
non-pool based list.
Patching time (ns) benchmarked using JIT blinding
===
existing non-pool pool
"scale test 1" no stop ~0x1c600000 ~0x8800000
Bench(~3.4K insns) ~0xc50000 ~0xf1000 ~6b000
(The non-pool means kmalloc a list node for each patch snippet, pool means
vmalloc a big chunk of mem and allocate node from it, node is located using
pool_base + index)
For "scale test 1" which contains ~1M JIT blindable insns, using list based
infra for patching could reduce most of the patching time, and pool based
alloc only consumes 1/3 time of non-pool.
And for a normal program with reasonable size (~3.4K), pool based alloc
only consumes 1/30 time of exsisting infra, and 1/2.3 of non-pool.
On the other hand, non-pool based implementation is cleaner and less error
prone than pool based implementation.
And for both pool and non-pool, I got the following kernel warning when
running "scale test 11" (perhaps needs 3M * 16 ~= 48M mem)
[ 92.319792] WARNING: CPU: 6 PID: 2322 at mm/page_alloc.c:4639 __alloc_pages_nodemask+0x29e/0x330
I am finishing aux adjust and code delete support.
Regards,
Jiong
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] bpf: optimize constant blinding
2019-06-12 11:32 [PATCH] bpf: optimize constant blinding Naveen N. Rao
2019-06-12 14:47 ` Alexei Starovoitov
@ 2019-06-14 4:30 ` kbuild test robot
1 sibling, 0 replies; 17+ messages in thread
From: kbuild test robot @ 2019-06-14 4:30 UTC (permalink / raw)
To: Naveen N. Rao
Cc: kbuild-all, Alexei Starovoitov, Daniel Borkmann, bpf, netdev,
Michael Ellerman
Hi "Naveen,
I love your patch! Perhaps something to improve:
[auto build test WARNING on bpf-next/master]
[also build test WARNING on v5.2-rc4 next-20190613]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Naveen-N-Rao/bpf-optimize-constant-blinding/20190614-023948
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
reproduce:
# apt-get install sparse
# sparse version: v0.6.1-rc1-7-g2b96cd8-dirty
make ARCH=x86_64 allmodconfig
make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
sparse warnings: (new ones prefixed by >>)
kernel/bpf/core.c:210:49: sparse: sparse: arithmetics on pointers to functions
include/linux/rbtree.h:120:9: sparse: sparse: incompatible types in comparison expression (different address spaces):
include/linux/rbtree.h:120:9: sparse: struct rb_node [noderef] <asn:4> *
include/linux/rbtree.h:120:9: sparse: struct rb_node *
include/linux/rbtree.h:120:9: sparse: sparse: incompatible types in comparison expression (different address spaces):
include/linux/rbtree.h:120:9: sparse: struct rb_node [noderef] <asn:4> *
include/linux/rbtree.h:120:9: sparse: struct rb_node *
>> kernel/bpf/core.c:1122:59: sparse: sparse: Using plain integer as NULL pointer
kernel/bpf/core.c:1122:62: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:28:1: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:53:1: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:111:1: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:126:1: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:161:1: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:196:1: sparse: sparse: Using plain integer as NULL pointer
include/trace/events/xdp.h:231:1: sparse: sparse: Using plain integer as NULL pointer
vim +1122 kernel/bpf/core.c
1101
1102 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
1103 {
1104 int i, j, rewritten, new_len, insn_cnt, ret = 0;
1105 struct bpf_insn insn_buff[16], aux[2];
1106 struct bpf_prog *clone, *tmp;
1107 struct bpf_insn *insn;
1108 u32 *clone_index;
1109
1110 if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
1111 return prog;
1112
1113 /* Dry run to figure out the final number of instructions */
1114 clone_index = vmalloc(prog->len * sizeof(u32));
1115 if (!clone_index)
1116 return ERR_PTR(-ENOMEM);
1117
1118 insn_cnt = prog->len;
1119 insn = prog->insnsi;
1120 rewritten = 0;
1121 for (i = 0; i < insn_cnt; i++, insn++) {
> 1122 clone_index[i] = bpf_jit_blind_insn(insn, 0, 0);
1123 if (clone_index[i] > 1)
1124 rewritten += clone_index[i] - 1;
1125 }
1126
1127 if (rewritten) {
1128 /* Needs new allocation, branch adjustment, et al... */
1129 clone = bpf_prog_clone_create(prog, GFP_USER);
1130 if (!clone) {
1131 ret = -ENOMEM;
1132 goto err;
1133 }
1134
1135 new_len = prog->len + rewritten;
1136 tmp = bpf_prog_realloc(clone, bpf_prog_size(new_len), GFP_USER);
1137 if (!tmp) {
1138 ret = -ENOMEM;
1139 goto err;
1140 }
1141 clone = tmp;
1142 clone->len = new_len;
1143
1144 /* rewrite instructions with constant blinding */
1145 insn_cnt = prog->len;
1146 insn = prog->insnsi;
1147 for (i = 0, j = 0; i < insn_cnt; i++, j++, insn++) {
1148 /* capture new instruction index in clone_index */
1149 clone_index[i] = j;
1150
1151 /* We temporarily need to hold the original ld64 insn
1152 * so that we can still access the first part in the
1153 * second blinding run.
1154 */
1155 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
1156 insn[1].code == 0)
1157 memcpy(aux, insn, sizeof(aux));
1158
1159 rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
1160 if (!rewritten) {
1161 memcpy(clone->insnsi + j, insn,
1162 sizeof(struct bpf_insn));
1163 } else {
1164 memcpy(clone->insnsi + j, insn_buff,
1165 sizeof(struct bpf_insn) * rewritten);
1166 j += rewritten - 1;
1167 }
1168 }
1169
1170 /* adjust branches */
1171 for (i = 0; i < insn_cnt; i++) {
1172 int next_insn_idx = clone->len;
1173
1174 if (i < insn_cnt - 1)
1175 next_insn_idx = clone_index[i + 1];
1176
1177 insn = clone->insnsi + clone_index[i];
1178 for (j = clone_index[i]; j < next_insn_idx; j++, insn++) {
1179 ret = bpf_jit_blind_adj_imm_off(insn, i, j, clone_index);
1180 if (ret) {
1181 goto err;
1182 }
1183 }
1184 }
1185
1186 /* adjust linfo */
1187 if (clone->aux->nr_linfo) {
1188 struct bpf_line_info *linfo = clone->aux->linfo;
1189
1190 for (i = 0; i < clone->aux->nr_linfo; i++)
1191 linfo[i].insn_off = clone_index[linfo[i].insn_off];
1192 }
1193 } else {
1194 /* if prog length remains same, not much work to do */
1195 clone = bpf_prog_clone_create(prog, GFP_USER);
1196 if (!clone) {
1197 ret = -ENOMEM;
1198 goto err;
1199 }
1200
1201 insn_cnt = clone->len;
1202 insn = clone->insnsi;
1203
1204 for (i = 0; i < insn_cnt; i++, insn++) {
1205 if (clone_index[i]) {
1206 bpf_jit_blind_insn(insn, aux, insn_buff);
1207 memcpy(insn, insn_buff, sizeof(struct bpf_insn));
1208 }
1209 }
1210 }
1211
1212 clone->blinded = 1;
1213
1214 err:
1215 vfree(clone_index);
1216
1217 if (ret) {
1218 if (clone)
1219 bpf_jit_prog_release_other(prog, clone);
1220 return ERR_PTR(ret);
1221 }
1222
1223 return clone;
1224 }
1225 #endif /* CONFIG_BPF_JIT */
1226
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2019-06-19 20:45 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-12 11:32 [PATCH] bpf: optimize constant blinding Naveen N. Rao
2019-06-12 14:47 ` Alexei Starovoitov
2019-06-12 15:04 ` Jiong Wang
2019-06-12 15:25 ` Jiong Wang
2019-06-12 15:28 ` Alexei Starovoitov
2019-06-14 15:13 ` Jiong Wang
2019-06-14 17:05 ` Alexei Starovoitov
2019-06-14 22:28 ` Andrii Nakryiko
2019-06-17 19:47 ` Edward Cree
2019-06-17 19:59 ` Jiong Wang
2019-06-17 20:11 ` Edward Cree
2019-06-17 20:40 ` Jiong Wang
2019-06-17 20:53 ` Alexei Starovoitov
2019-06-17 21:01 ` Jiong Wang
2019-06-17 21:16 ` Edward Cree
2019-06-19 20:45 ` Jiong Wang
2019-06-14 4:30 ` kbuild test robot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).