From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jakub Kicinski Subject: [PATCH bpf-next 12/14] nfp: bpf: add support for atomic add of unknown values Date: Wed, 28 Mar 2018 17:48:36 -0700 Message-ID: <20180329004839.4506-13-jakub.kicinski@netronome.com> References: <20180329004839.4506-1-jakub.kicinski@netronome.com> Cc: netdev@vger.kernel.org, oss-drivers@netronome.com, Jan Gossens , Jakub Kicinski To: alexei.starovoitov@gmail.com, daniel@iogearbox.net Return-path: Received: from mail-pg0-f66.google.com ([74.125.83.66]:32875 "EHLO mail-pg0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751976AbeC2Atj (ORCPT ); Wed, 28 Mar 2018 20:49:39 -0400 Received: by mail-pg0-f66.google.com with SMTP id i194so1953216pgd.0 for ; Wed, 28 Mar 2018 17:49:39 -0700 (PDT) In-Reply-To: <20180329004839.4506-1-jakub.kicinski@netronome.com> Sender: netdev-owner@vger.kernel.org List-ID: Allow atomic add to be used even when the value is not guaranteed to fit into a 16 bit immediate. This requires the value to be pulled as data, and therefore use of a transfer register and a context swap. Track the information about possible lengths of the value, if it's guaranteed to be larger than 16bits don't generate the code for the optimized case at all. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Reviewed-by: Jiong Wang --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 78 ++++++++++++++++++++--- drivers/net/ethernet/netronome/nfp/bpf/main.h | 7 ++ drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 14 ++-- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 1 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 3 + 5 files changed, 88 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index db73f56de59a..62431a0aa0f5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2125,12 +2125,49 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) { - swreg addra, addrb, off, prev_alu = imm_a(nfp_prog); u8 dst_gpr = meta->insn.dst_reg * 2; u8 src_gpr = meta->insn.src_reg * 2; + unsigned int full_add, out; + swreg addra, addrb, off; off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + /* We can fit 16 bits into command immediate, if we know the immediate + * is guaranteed to either always or never fit into 16 bit we only + * generate code to handle that particular case, otherwise generate + * code for both. + */ + out = nfp_prog_current_offset(nfp_prog); + full_add = nfp_prog_current_offset(nfp_prog); + + if (meta->insn.off) { + out += 2; + full_add += 2; + } + if (meta->xadd_maybe_16bit) { + out += 3; + full_add += 3; + } + if (meta->xadd_over_16bit) + out += 2 + is64; + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + out += 5; + full_add += 5; + } + + /* Generate the branch for choosing add_imm vs add */ + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + swreg max_imm = imm_a(nfp_prog); + + wrp_immed(nfp_prog, max_imm, 0xffff); + emit_alu(nfp_prog, reg_none(), + max_imm, ALU_OP_SUB, reg_b(src_gpr)); + emit_alu(nfp_prog, reg_none(), + reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1)); + emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0); + /* defer for add */ + } + /* If insn has an offset add to the address */ if (!meta->insn.off) { addra = reg_a(dst_gpr); @@ -2144,13 +2181,38 @@ mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) addrb = imma_b(nfp_prog); } - wrp_immed(nfp_prog, prev_alu, - FIELD_PREP(CMD_OVE_DATA, 2) | - CMD_OVE_LEN | - FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); - wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); - emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, - addra, addrb, 0, CMD_CTX_NO_SWAP); + /* Generate the add_imm if 16 bits are possible */ + if (meta->xadd_maybe_16bit) { + swreg prev_alu = imm_a(nfp_prog); + + wrp_immed(nfp_prog, prev_alu, + FIELD_PREP(CMD_OVE_DATA, 2) | + CMD_OVE_LEN | + FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); + wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); + emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, + addra, addrb, 0, CMD_CTX_NO_SWAP); + + if (meta->xadd_over_16bit) + emit_br(nfp_prog, BR_UNC, out, 0); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, full_add)) + return -EINVAL; + + /* Generate the add if 16 bits are not guaranteed */ + if (meta->xadd_over_16bit) { + emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0, + addra, addrb, is64 << 2, + is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); + + wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); + if (is64) + wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, out)) + return -EINVAL; return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 877be7143991..a73b86c6ce52 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -229,6 +229,8 @@ struct nfp_bpf_reg_state { * @pkt_cache.range_start: start offset for associated packet data cache * @pkt_cache.range_end: end offset for associated packet data cache * @pkt_cache.do_init: this read needs to initialize packet data cache + * @xadd_over_16bit: 16bit immediate is not guaranteed + * @xadd_maybe_16bit: 16bit immediate is possible * @jmp_dst: destination info for jump instructions * @func_id: function id for call instructions * @arg1: arg1 for call instructions @@ -243,6 +245,7 @@ struct nfp_bpf_reg_state { struct nfp_insn_meta { struct bpf_insn insn; union { + /* pointer ops (ld/st/xadd) */ struct { struct bpf_reg_state ptr; struct bpf_insn *paired_st; @@ -253,8 +256,12 @@ struct nfp_insn_meta { s16 range_end; bool do_init; } pkt_cache; + bool xadd_over_16bit; + bool xadd_maybe_16bit; }; + /* jump */ struct nfp_insn_meta *jmp_dst; + /* function calls */ struct { u32 func_id; struct bpf_reg_state arg1; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 40619efea77d..486ffd1d5913 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -414,16 +414,16 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, dreg->type); return -EOPNOTSUPP; } - if (sreg->type != SCALAR_VALUE || - sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), sreg->var_off); - pr_vlog(env, "atomic add not of a small constant scalar: %s\n", - tn_buf); + if (sreg->type != SCALAR_VALUE) { + pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type); return -EOPNOTSUPP; } + meta->xadd_over_16bit |= + sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff; + meta->xadd_maybe_16bit |= + (sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff; + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 3c0107ac9a2c..cc6ace2be8a9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c }, [CMD_TGT_READ_LE] = { 0x01, 0x40 }, [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, + [CMD_TGT_ADD] = { 0x00, 0x47 }, [CMD_TGT_ADD_IMM] = { 0x02, 0x47 }, }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 185192590a17..36524dd6021b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -238,6 +238,7 @@ enum cmd_tgt_map { CMD_TGT_READ32_SWAP, CMD_TGT_READ_LE, CMD_TGT_READ_SWAP_LE, + CMD_TGT_ADD, CMD_TGT_ADD_IMM, __CMD_TGT_MAP_SIZE, }; @@ -252,6 +253,8 @@ enum cmd_mode { enum cmd_ctx_swap { CMD_CTX_SWAP = 0, + CMD_CTX_SWAP_DEFER1 = 1, + CMD_CTX_SWAP_DEFER2 = 2, CMD_CTX_NO_SWAP = 3, }; -- 2.16.2