Re: [RFC PATCH 3/3] bpf, riscv: added eBPF JIT for RV64G

From: "Björn Töpel" <bjorn.topel@gmail.com>
To: Daniel Borkmann <daniel@iogearbox.net>
Cc: Netdev <netdev@vger.kernel.org>,
	linux-riscv@lists.infradead.org,
	Palmer Dabbelt <palmer@sifive.com>,
	davidlee@sifive.com
Subject: Re: [RFC PATCH 3/3] bpf, riscv: added eBPF JIT for RV64G
Date: Wed, 16 Jan 2019 08:23:56 +0100	[thread overview]
Message-ID: <CAJ+HfNg7Ud0RRZQHmbh_DmQDQYNRbSd3nTmNfjO-fX=0o0x-2A@mail.gmail.com> (raw)
In-Reply-To: <efd18130-85b6-263c-85cb-1b19d81141f3@iogearbox.net>

Den ons 16 jan. 2019 kl 00:50 skrev Daniel Borkmann <daniel@iogearbox.net>:
>
> On 01/15/2019 09:35 AM, Björn Töpel wrote:
> > This commit adds eBPF JIT for RV64G.
> >
> > Codewise, it needs some refactoring. Currently there's a bit too much
> > copy-and-paste going on, and I know some places where I could optimize
> > the code generation a bit (mostly BPF_K type of instructions, dealing
> > with immediates).
>
> Nice work! :)
>
> > From a features perspective, two things are missing:
> >
> > * tail calls
> > * "far-branches", i.e. conditional branches that reach beyond 13b.
> >
> > The test_bpf.ko passes all tests.
>
> Did you also check test_verifier under jit with/without jit hardening
> enabled? That one contains lots of runtime tests as well. Probably makes
> sense to check under CONFIG_BPF_JIT_ALWAYS_ON to see what fails the JIT;
> the test_verifier also contains various tail call tests targeted at JITs,
> for example.
>

Good point! I will do that. The only selftests/bpf program that I ran
(and passed) was "test_progs". I'll make sure that the complete bpf
selftests suite passes as well!

> Nit: please definitely also add a MAINTAINERS entry with at least yourself
> under BPF JIT section, and update Documentation/sysctl/net.txt with riscv64.
>

Ah! Yes, I'll fix that.

> > Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
> > ---
> >  arch/riscv/net/bpf_jit_comp.c | 1608 +++++++++++++++++++++++++++++++++
> >  1 file changed, 1608 insertions(+)
> >
> > diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c
> > index 7e359d3249ee..562d56eb8d23 100644
> > --- a/arch/riscv/net/bpf_jit_comp.c
> > +++ b/arch/riscv/net/bpf_jit_comp.c
> > @@ -1,4 +1,1612 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * BPF JIT compiler for RV64G
> > + *
> > + * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com>
> > + *
> > + */
> > +
> > +#include <linux/bpf.h>
> > +#include <linux/filter.h>
> > +#include <asm/cacheflush.h>
> > +
> > +#define TMP_REG_0 (MAX_BPF_JIT_REG + 0)
> > +#define TMP_REG_1 (MAX_BPF_JIT_REG + 1)
>
> Not used?
>

Correct! I'll get rid of them.

> > +#define TAIL_CALL_REG (MAX_BPF_JIT_REG + 2)
> > +
> > +enum rv_register {
> > +     RV_REG_ZERO =   0,      /* The constant value 0 */
> > +     RV_REG_RA =     1,      /* Return address */
> > +     RV_REG_SP =     2,      /* Stack pointer */
> > +     RV_REG_GP =     3,      /* Global pointer */
> > +     RV_REG_TP =     4,      /* Thread pointer */
> > +     RV_REG_T0 =     5,      /* Temporaries */
> > +     RV_REG_T1 =     6,
> > +     RV_REG_T2 =     7,
> > +     RV_REG_FP =     8,
> > +     RV_REG_S1 =     9,      /* Saved registers */
> > +     RV_REG_A0 =     10,     /* Function argument/return values */
> > +     RV_REG_A1 =     11,     /* Function arguments */
> > +     RV_REG_A2 =     12,
> > +     RV_REG_A3 =     13,
> > +     RV_REG_A4 =     14,
> > +     RV_REG_A5 =     15,
> > +     RV_REG_A6 =     16,
> > +     RV_REG_A7 =     17,
> > +     RV_REG_S2 =     18,     /* Saved registers */
> > +     RV_REG_S3 =     19,
> > +     RV_REG_S4 =     20,
> > +     RV_REG_S5 =     21,
> > +     RV_REG_S6 =     22,
> > +     RV_REG_S7 =     23,
> > +     RV_REG_S8 =     24,
> > +     RV_REG_S9 =     25,
> > +     RV_REG_S10 =    26,
> > +     RV_REG_S11 =    27,
> > +     RV_REG_T3 =     28,     /* Temporaries */
> > +     RV_REG_T4 =     29,
> > +     RV_REG_T5 =     30,
> > +     RV_REG_T6 =     31,
> > +};
> > +
> > +struct rv_jit_context {
> > +     struct bpf_prog *prog;
> > +     u32 *insns; /* RV insns */
> > +     int ninsns;
> > +     int epilogue_offset;
> > +     int *offset; /* BPF to RV */
> > +     unsigned long seen_reg_bits;
> > +     int stack_size;
> > +};
> > +
> > +struct rv_jit_data {
> > +     struct bpf_binary_header *header;
> > +     u8 *image;
> > +     struct rv_jit_context ctx;
> > +};
> > +
> > +static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
> > +{
>
> This one can also be simplified by having a simple mapping as in
> other JITs and then mark __set_bit(<reg>) in the small bpf_to_rv_reg()
> helper.
>

Yeah, I agree. Much better. I'll take that route.

> > +     switch (bpf_reg) {
> > +     /* Return value */
> > +     case BPF_REG_0:
> > +             __set_bit(RV_REG_A5, &ctx->seen_reg_bits);
> > +             return RV_REG_A5;
> > +     /* Function arguments */
> > +     case BPF_REG_1:
> > +             __set_bit(RV_REG_A0, &ctx->seen_reg_bits);
> > +             return RV_REG_A0;
> > +     case BPF_REG_2:
> > +             __set_bit(RV_REG_A1, &ctx->seen_reg_bits);
> > +             return RV_REG_A1;
> > +     case BPF_REG_3:
> > +             __set_bit(RV_REG_A2, &ctx->seen_reg_bits);
> > +             return RV_REG_A2;
> > +     case BPF_REG_4:
> > +             __set_bit(RV_REG_A3, &ctx->seen_reg_bits);
> > +             return RV_REG_A3;
> > +     case BPF_REG_5:
> > +             __set_bit(RV_REG_A4, &ctx->seen_reg_bits);
> > +             return RV_REG_A4;
> > +     /* Callee saved registers */
> > +     case BPF_REG_6:
> > +             __set_bit(RV_REG_S1, &ctx->seen_reg_bits);
> > +             return RV_REG_S1;
> > +     case BPF_REG_7:
> > +             __set_bit(RV_REG_S2, &ctx->seen_reg_bits);
> > +             return RV_REG_S2;
> > +     case BPF_REG_8:
> > +             __set_bit(RV_REG_S3, &ctx->seen_reg_bits);
> > +             return RV_REG_S3;
> > +     case BPF_REG_9:
> > +             __set_bit(RV_REG_S4, &ctx->seen_reg_bits);
> > +             return RV_REG_S4;
> > +     /* Stack read-only frame pointer to access stack */
> > +     case BPF_REG_FP:
> > +             __set_bit(RV_REG_S5, &ctx->seen_reg_bits);
> > +             return RV_REG_S5;
> > +     /* Temporary register */
> > +     case BPF_REG_AX:
> > +             __set_bit(RV_REG_T0, &ctx->seen_reg_bits);
> > +             return RV_REG_T0;
> > +     /* Tail call counter */
> > +     case TAIL_CALL_REG:
> > +             __set_bit(RV_REG_S6, &ctx->seen_reg_bits);
> > +             return RV_REG_S6;
> > +     default:
> > +             return 0;
> > +     }
> > +};
> [...]
> > +     /* tail call */
> > +     case BPF_JMP | BPF_TAIL_CALL:
> > +             rd = bpf_to_rv_reg(TAIL_CALL_REG, ctx);
> > +             pr_err("bpf-jit: tail call not supported yet!\n");
> > +             return -1;
>
> There are two options here, either fixed size prologue where you can
> then jump over it in tail call case, or dynamic one which would make
> it slower due to reg restore but shrinks image for non-tail calls.
>

So, it would be the latter then, which is pretty much like a more
expensive (due to the tail call depth checks) function call.

For the fixed prologue: how does, say x86, deal with BPF stack usage
in the tail call case? If the caller doesn't use the bpf stack, but
the callee does. From a quick glance in the code, the x86 prologue
still uses aux->stack_depth. If the callee has a different stack usage
that the caller, and then the callee does a function call, wouldn't
this mess up the frame? (Yeah, obviously missing something! :-))

> > +     /* function return */
> > +     case BPF_JMP | BPF_EXIT:
> > +             if (i == ctx->prog->len - 1)
> > +                     break;
> > +
> > +             rvoff = epilogue_offset(ctx);
> > +             if (!is_21b_int(rvoff)) {
> > +                     pr_err("bpf-jit: %d offset=%d not supported yet!\n",
> > +                            __LINE__, rvoff);
> > +                     return -1;
> > +             }
> > +
> > +             emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
> > +             break;
> > +
> > +     /* dst = imm64 */
> > +     case BPF_LD | BPF_IMM | BPF_DW:
> > +     {
> > +             struct bpf_insn insn1 = insn[1];
> > +             u64 imm64;
> > +
> [...]
> > +
> > +static void build_prologue(struct rv_jit_context *ctx)
> > +{
> > +     int stack_adjust = 0, store_offset, bpf_stack_adjust;
> > +
> > +     if (seen_reg(RV_REG_RA, ctx))
> > +             stack_adjust += 8;
> > +     stack_adjust += 8; /* RV_REG_FP */
> > +     if (seen_reg(RV_REG_S1, ctx))
> > +             stack_adjust += 8;
> > +     if (seen_reg(RV_REG_S2, ctx))
> > +             stack_adjust += 8;
> > +     if (seen_reg(RV_REG_S3, ctx))
> > +             stack_adjust += 8;
> > +     if (seen_reg(RV_REG_S4, ctx))
> > +             stack_adjust += 8;
> > +     if (seen_reg(RV_REG_S5, ctx))
> > +             stack_adjust += 8;
> > +     if (seen_reg(RV_REG_S6, ctx))
> > +             stack_adjust += 8;
> > +
> > +     stack_adjust = round_up(stack_adjust, 16);
> > +     bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
> > +     stack_adjust += bpf_stack_adjust;
> > +
> > +     store_offset = stack_adjust - 8;
> > +
> > +     emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
> > +
> > +     if (seen_reg(RV_REG_RA, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx);
> > +     store_offset -= 8;
> > +     if (seen_reg(RV_REG_S1, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S2, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S3, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S4, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S5, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S6, ctx)) {
> > +             emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx);
> > +             store_offset -= 8;
> > +     }
> > +
> > +     emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
> > +
> > +     if (bpf_stack_adjust) {
> > +             if (!seen_reg(RV_REG_S5, ctx))
> > +                     pr_warn("bpf-jit: not seen BPF_REG_FP, stack is %d\n",
> > +                             bpf_stack_adjust);
> > +             emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx);
> > +     }
> > +
> > +     ctx->stack_size = stack_adjust;
> > +}
> > +
> > +static void build_epilogue(struct rv_jit_context *ctx)
> > +{
> > +     int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
> > +
> > +     if (seen_reg(RV_REG_RA, ctx)) {
> > +             emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
> > +     store_offset -= 8;
> > +     if (seen_reg(RV_REG_S1, ctx)) {
> > +             emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S2, ctx)) {
> > +             emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S3, ctx)) {
> > +             emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S4, ctx)) {
> > +             emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S5, ctx)) {
> > +             emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +     if (seen_reg(RV_REG_S6, ctx)) {
> > +             emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
> > +             store_offset -= 8;
> > +     }
> > +
> > +     emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
> > +     /* Set return value. */
> > +     emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
> > +     emit(rv_jalr(RV_REG_ZERO, RV_REG_RA, 0), ctx);
> > +}
> > +
> > +static int build_body(struct rv_jit_context *ctx, bool extra_pass)
> > +{
> > +     const struct bpf_prog *prog = ctx->prog;
> > +     int i;
> > +
> > +     for (i = 0; i < prog->len; i++) {
> > +             const struct bpf_insn *insn = &prog->insnsi[i];
> > +             int ret;
> > +
> > +             ret = emit_insn(insn, ctx, extra_pass);
> > +             if (ret > 0) {
> > +                     i++;
> > +                     if (ctx->insns == NULL)
> > +                             ctx->offset[i] = ctx->ninsns;
> > +                     continue;
> > +             }
> > +             if (ctx->insns == NULL)
> > +                     ctx->offset[i] = ctx->ninsns;
> > +             if (ret)
> > +                     return ret;
> > +     }
> > +     return 0;
> > +}
> > +
> > +static void bpf_fill_ill_insns(void *area, unsigned int size)
> > +{
> > +     memset(area, 0, size);
>
> Needs update as well?
>

No, bitpattern of all zeros is an illegal instruction, but a comment
would be good!

> > +}
> > +
> > +static void bpf_flush_icache(void *start, void *end)
> > +{
> > +     flush_icache_range((unsigned long)start, (unsigned long)end);
> > +}
> > +

Thanks a lot for the comments, Daniel! I'll get back with a v2.

Björn

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv