All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <jakub.kicinski@netronome.com>
To: alexei.starovoitov@gmail.com, daniel@iogearbox.net
Cc: netdev@vger.kernel.org, oss-drivers@netronome.com,
	Jan Gossens <jan.gossens@rwth-aachen.de>,
	Jakub Kicinski <jakub.kicinski@netronome.com>
Subject: [PATCH bpf-next 12/14] nfp: bpf: add support for atomic add of unknown values
Date: Wed, 28 Mar 2018 17:48:36 -0700	[thread overview]
Message-ID: <20180329004839.4506-13-jakub.kicinski@netronome.com> (raw)
In-Reply-To: <20180329004839.4506-1-jakub.kicinski@netronome.com>

Allow atomic add to be used even when the value is not guaranteed
to fit into a 16 bit immediate.  This requires the value to be pulled
as data, and therefore use of a transfer register and a context swap.

Track the information about possible lengths of the value, if it's
guaranteed to be larger than 16bits don't generate the code for the
optimized case at all.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c      | 78 ++++++++++++++++++++---
 drivers/net/ethernet/netronome/nfp/bpf/main.h     |  7 ++
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 14 ++--
 drivers/net/ethernet/netronome/nfp/nfp_asm.c      |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h      |  3 +
 5 files changed, 88 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index db73f56de59a..62431a0aa0f5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2125,12 +2125,49 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 static int
 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
 {
-	swreg addra, addrb, off, prev_alu = imm_a(nfp_prog);
 	u8 dst_gpr = meta->insn.dst_reg * 2;
 	u8 src_gpr = meta->insn.src_reg * 2;
+	unsigned int full_add, out;
+	swreg addra, addrb, off;
 
 	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
+	/* We can fit 16 bits into command immediate, if we know the immediate
+	 * is guaranteed to either always or never fit into 16 bit we only
+	 * generate code to handle that particular case, otherwise generate
+	 * code for both.
+	 */
+	out = nfp_prog_current_offset(nfp_prog);
+	full_add = nfp_prog_current_offset(nfp_prog);
+
+	if (meta->insn.off) {
+		out += 2;
+		full_add += 2;
+	}
+	if (meta->xadd_maybe_16bit) {
+		out += 3;
+		full_add += 3;
+	}
+	if (meta->xadd_over_16bit)
+		out += 2 + is64;
+	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
+		out += 5;
+		full_add += 5;
+	}
+
+	/* Generate the branch for choosing add_imm vs add */
+	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
+		swreg max_imm = imm_a(nfp_prog);
+
+		wrp_immed(nfp_prog, max_imm, 0xffff);
+		emit_alu(nfp_prog, reg_none(),
+			 max_imm, ALU_OP_SUB, reg_b(src_gpr));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
+		emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
+		/* defer for add */
+	}
+
 	/* If insn has an offset add to the address */
 	if (!meta->insn.off) {
 		addra = reg_a(dst_gpr);
@@ -2144,13 +2181,38 @@ mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
 		addrb = imma_b(nfp_prog);
 	}
 
-	wrp_immed(nfp_prog, prev_alu,
-		  FIELD_PREP(CMD_OVE_DATA, 2) |
-		  CMD_OVE_LEN |
-		  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
-	wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
-	emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
-		       addra, addrb, 0, CMD_CTX_NO_SWAP);
+	/* Generate the add_imm if 16 bits are possible */
+	if (meta->xadd_maybe_16bit) {
+		swreg prev_alu = imm_a(nfp_prog);
+
+		wrp_immed(nfp_prog, prev_alu,
+			  FIELD_PREP(CMD_OVE_DATA, 2) |
+			  CMD_OVE_LEN |
+			  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
+		wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
+		emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
+			       addra, addrb, 0, CMD_CTX_NO_SWAP);
+
+		if (meta->xadd_over_16bit)
+			emit_br(nfp_prog, BR_UNC, out, 0);
+	}
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
+		return -EINVAL;
+
+	/* Generate the add if 16 bits are not guaranteed */
+	if (meta->xadd_over_16bit) {
+		emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
+			 addra, addrb, is64 << 2,
+			 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
+
+		wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
+		if (is64)
+			wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
+	}
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, out))
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 877be7143991..a73b86c6ce52 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -229,6 +229,8 @@ struct nfp_bpf_reg_state {
  * @pkt_cache.range_start: start offset for associated packet data cache
  * @pkt_cache.range_end: end offset for associated packet data cache
  * @pkt_cache.do_init: this read needs to initialize packet data cache
+ * @xadd_over_16bit: 16bit immediate is not guaranteed
+ * @xadd_maybe_16bit: 16bit immediate is possible
  * @jmp_dst: destination info for jump instructions
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
@@ -243,6 +245,7 @@ struct nfp_bpf_reg_state {
 struct nfp_insn_meta {
 	struct bpf_insn insn;
 	union {
+		/* pointer ops (ld/st/xadd) */
 		struct {
 			struct bpf_reg_state ptr;
 			struct bpf_insn *paired_st;
@@ -253,8 +256,12 @@ struct nfp_insn_meta {
 				s16 range_end;
 				bool do_init;
 			} pkt_cache;
+			bool xadd_over_16bit;
+			bool xadd_maybe_16bit;
 		};
+		/* jump */
 		struct nfp_insn_meta *jmp_dst;
+		/* function calls */
 		struct {
 			u32 func_id;
 			struct bpf_reg_state arg1;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 40619efea77d..486ffd1d5913 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -414,16 +414,16 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 			dreg->type);
 		return -EOPNOTSUPP;
 	}
-	if (sreg->type != SCALAR_VALUE ||
-	    sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff) {
-		char tn_buf[48];
-
-		tnum_strn(tn_buf, sizeof(tn_buf), sreg->var_off);
-		pr_vlog(env, "atomic add not of a small constant scalar: %s\n",
-			tn_buf);
+	if (sreg->type != SCALAR_VALUE) {
+		pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type);
 		return -EOPNOTSUPP;
 	}
 
+	meta->xadd_over_16bit |=
+		sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff;
+	meta->xadd_maybe_16bit |=
+		(sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff;
+
 	return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 3c0107ac9a2c..cc6ace2be8a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
 	[CMD_TGT_READ32_SWAP] =		{ 0x02, 0x5c },
 	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
 	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+	[CMD_TGT_ADD] =			{ 0x00, 0x47 },
 	[CMD_TGT_ADD_IMM] =		{ 0x02, 0x47 },
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 185192590a17..36524dd6021b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -238,6 +238,7 @@ enum cmd_tgt_map {
 	CMD_TGT_READ32_SWAP,
 	CMD_TGT_READ_LE,
 	CMD_TGT_READ_SWAP_LE,
+	CMD_TGT_ADD,
 	CMD_TGT_ADD_IMM,
 	__CMD_TGT_MAP_SIZE,
 };
@@ -252,6 +253,8 @@ enum cmd_mode {
 
 enum cmd_ctx_swap {
 	CMD_CTX_SWAP = 0,
+	CMD_CTX_SWAP_DEFER1 = 1,
+	CMD_CTX_SWAP_DEFER2 = 2,
 	CMD_CTX_NO_SWAP = 3,
 };
 
-- 
2.16.2

  parent reply	other threads:[~2018-03-29  0:49 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-29  0:48 [PATCH bpf-next 00/14] nfp: bpf: add updates, deletes, atomic ops, prandom and packet cache Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 01/14] nfp: bpf: read from packet data cache for PTR_TO_PACKET Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 02/14] nfp: bpf: support unaligned read offset Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 03/14] nfp: bpf: detect packet reads could be cached, enable the optimisation Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 04/14] nfp: bpf: rename map_lookup_stack() to map_call_stack_common() Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 05/14] nfp: bpf: add helper for validating stack pointers Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 06/14] nfp: bpf: add helper for basic map call checks Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 07/14] nfp: bpf: add map updates from the datapath Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 08/14] nfp: bpf: add map deletes " Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 09/14] bpf: add parenthesis around argument of BPF_LDST_BYTES() Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 10/14] nfp: bpf: add basic support for atomic adds Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 11/14] nfp: bpf: expose command delay slots Jakub Kicinski
2018-03-29  0:48 ` Jakub Kicinski [this message]
2018-03-29  0:48 ` [PATCH bpf-next 13/14] nfp: bpf: add support for bpf_get_prandom_u32() Jakub Kicinski
2018-03-29  0:48 ` [PATCH bpf-next 14/14] nfp: bpf: improve wrong FW response warnings Jakub Kicinski
2018-03-29  2:45 ` [PATCH bpf-next 00/14] nfp: bpf: add updates, deletes, atomic ops, prandom and packet cache Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180329004839.4506-13-jakub.kicinski@netronome.com \
    --to=jakub.kicinski@netronome.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=daniel@iogearbox.net \
    --cc=jan.gossens@rwth-aachen.de \
    --cc=netdev@vger.kernel.org \
    --cc=oss-drivers@netronome.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.