netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chema Gonzalez <chema@google.com>
To: David Miller <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Daniel Borkmann <dborkman@redhat.com>,
	Alexei Starovoitov <ast@plumgrid.com>
Cc: netdev@vger.kernel.org, Chema Gonzalez <chema@google.com>
Subject: [PATCH v6 net-next 1/4] net: flow_dissector: avoid multiple calls in eBPF
Date: Thu, 29 May 2014 11:55:58 -0700	[thread overview]
Message-ID: <1401389758-13252-1-git-send-email-chema@google.com> (raw)
In-Reply-To: <1398882591-30422-1-git-send-email-chema@google.com>

This patch makes __skb_get_pay_offset() (the function called by
the eBPF call generated from "ld #poff") store the output of the flow
dissector (a flow_keys structure) in the eBPF stack. This way, multiple
invocations of this eBPF call in the same BPF filter will only require
one single call to the flow dissector.

Note that other eBPF calls that use the flow dissector can use the same
approach, and share the results, so at the end there is only one single
call to the flow dissector per packet.

Tested:

$ cat tools/net/ipv4_tcp_poff2.bpf
ldh [12]
jne #0x800, drop
ldb [23]
jneq #6, drop
ld poff
ld poff
ld poff
ld poff
ret #-1
drop: ret #0
$ ./tools/net/bpf_asm tools/net/ipv4_tcp_poff2.bpf
10,40 0 0 12,21 0 7 2048,48 0 0 23,21 0 5 6,32 0 0 4294963252,32 0 0 4294963252,32 0 0 4294963252,32 0 0 4294963252,6 0 0 4294967295,6 0 0 0,

And then, in a VM, we ran:

$ tcpdump -n -i eth0 -f "10,40 0 0 12,21 0 7 2048,48 0 0 23,21 0 5 6,32 0 0 4294963252,32 0 0 4294963252,32 0 0 4294963252,32 0 0 4294963252,6 0 0 4294967295,6 0 0 0,"

This tcpdump is github's tcpdump HEAD with pull request #353, which
allows using raw filters in tcpdump.

Debugging the kernel, we can see that there only the first "ld poff" call
does invoke the flow dissector. The other calls reuse the value in the
eBPF stack.

Also tested the test_bpf module.

Signed-off-by: Chema Gonzalez <chema@google.com>
---
 include/linux/skbuff.h    |  3 +-
 lib/test_bpf.c            | 52 ++++++++++++++++++++++++++++
 net/core/filter.c         | 86 ++++++++++++++++++++++++++++++++++++++++++-----
 net/core/flow_dissector.c | 16 +++++----
 4 files changed, 141 insertions(+), 16 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7a9beeb..0214b5a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3065,7 +3065,8 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 
 int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
 
-u32 __skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, struct flow_keys *flow,
+		u32 *flow_inited);
 
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index af677cb..0f8128f 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -446,6 +446,58 @@ static struct bpf_test tests[] = {
 		{ { 30, 0 }, { 100, 42 } },
 	},
 	{
+		"LD_PAYLOAD_OFF_STACK",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0x11111111),
+			BPF_STMT(BPF_ST, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0x22222222),
+			BPF_STMT(BPF_ST, 1),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xeeeeeeee),
+			BPF_STMT(BPF_ST, 14),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff),
+			BPF_STMT(BPF_ST, 15),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+			    SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 54, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_LD | BPF_MEM, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11111111, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_LD | BPF_MEM, 1),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x22222222, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_LD | BPF_MEM, 14),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xeeeeeeee, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_LD | BPF_MEM, 15),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xffffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1)
+		},
+		CLASSIC,
+		/* 01:02:03:04:05:06 < 07:08:09:0a:0b:0c, ethertype IPv4(0x0800)
+		 * length 94: 10.1.1.1.10000 > 10.1.1.2.22: Flags [P.],
+		 * seq 1:21, ack 2, win 14400, length 20
+		 */
+		{ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+		  0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
+		  0x08, 0x00,
+		  /* ip header */
+		  0x45, 0x10, 0x00, 0x5e,
+		  0x75, 0xb5, 0x40, 0x00,
+		  0x40, 0x06, 0xad, 0x2e,
+		  0x0a, 0x01, 0x01, 0x01, /* ip src */
+		  0x0a, 0x01, 0x01, 0x02, /* ip dst */
+		  /* tcp header */
+		  0x27, 0x10, 0x00, 0x16, /* tcp src/dst port */
+		  0x00, 0x00, 0x00, 0x01, /* tcp seq# */
+		  0x00, 0x00, 0x00, 0x02, /* tcp ack# */
+			0x50, 0x00, 0x38, 0x40,
+			0x9a, 0x42, 0x00, 0x00,
+		},
+		{ { 100, 1 } },
+	},
+	{
 		"LD_ANC_XOR",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_IMM, 10),
diff --git a/net/core/filter.c b/net/core/filter.c
index 2c2d35d..0c55252 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -66,6 +66,24 @@
 #define CTX	regs[BPF_REG_CTX]
 #define K	insn->imm
 
+/* classic BPF stack layout:
+ *
+ * This is the layout for the stack for eBPF filters generated from
+ * classic BPF filters.
+ *
+ * Top (BPF_MEMWORDS * 4) bytes are used to represent classic BPF
+ * mem[0-15] slots.
+ *
+ * Flow dissector users (poff so far) use the space just below mem[]
+ * to share the flow_keys obtained from dissecting the flow, and a
+ * bool stating whether such field has been inited.
+ */
+struct classic_bpf_stack_layout {
+	u32 flow_inited;
+	struct flow_keys flow;
+	u32 mem[BPF_MEMWORDS];
+};
+
 /* No hurry in this branch
  *
  * Exported for the bpf jit load helper.
@@ -596,9 +614,13 @@ static unsigned int pkt_type_offset(void)
 	return -1;
 }
 
-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 fp, u64 r5)
 {
-	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+	struct classic_bpf_stack_layout *stack_layout =
+	    (void *) fp - sizeof(struct classic_bpf_stack_layout);
+	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx,
+			      &stack_layout->flow,
+			      &stack_layout->flow_inited);
 }
 
 static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
@@ -779,7 +801,11 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
 		insn++;
 
-		/* Emit call(ctx, arg2=A, arg3=X) */
+		/* arg4 = FP */
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG4, BPF_REG_FP);
+		insn++;
+
+		/* Emit call(ctx, arg2=A, arg3=X, arg4=FP) */
 		insn->code = BPF_JMP | BPF_CALL;
 		switch (fp->k) {
 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
@@ -818,6 +844,45 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	return true;
 }
 
+void __sk_convert_filter_prologue(struct sock_filter *fp, int len,
+		struct sock_filter_int **new_insn)
+{
+	bool use_flow_dissector = false;
+	bool do_write_code = false;
+	int i;
+
+	/* check if there are any insn's that use the flow dissector */
+	for (i = 0; i < len; fp++, i++) {
+		if (BPF_CLASS(fp->code) == BPF_LD &&
+		    BPF_MODE(fp->code) == BPF_ABS &&
+		    fp->k == SKF_AD_OFF + SKF_AD_PAY_OFFSET) {
+			use_flow_dissector = true;
+			break;
+		}
+	}
+
+	do_write_code = (*new_insn != NULL);
+
+	/* first init the stack */
+	if (use_flow_dissector) {
+		/* stack_layout->flow_inited = 0; */
+		if (do_write_code) {
+			(*new_insn)->code = BPF_ST | BPF_MEM | BPF_W;
+			(*new_insn)->a_reg = BPF_REG_FP;
+			(*new_insn)->x_reg = 0;
+			(*new_insn)->off = (s16) (
+			-sizeof(struct classic_bpf_stack_layout) +
+			offsetof(struct classic_bpf_stack_layout, flow_inited));
+			(*new_insn)->imm = 0;
+		}
+		(*new_insn)++;
+	}
+	/* ctx = arg1 */
+	if (do_write_code)
+		**new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
+	(*new_insn)++;
+}
+
 /**
  *	sk_convert_filter - convert filter program
  *	@prog: the user passed filter program
@@ -867,10 +932,7 @@ do_pass:
 	new_insn = new_prog;
 	fp = prog;
 
-	if (new_insn) {
-		*new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
-	}
-	new_insn++;
+	__sk_convert_filter_prologue(fp, len, &new_insn);
 
 	for (i = 0; i < len; fp++, i++) {
 		struct sock_filter_int tmp_insns[6] = { };
@@ -1041,7 +1103,10 @@ do_pass:
 			insn->a_reg = BPF_REG_FP;
 			insn->x_reg = fp->code == BPF_ST ?
 				      BPF_REG_A : BPF_REG_X;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			insn->off =
+			    -sizeof(struct classic_bpf_stack_layout) +
+			    offsetof(struct classic_bpf_stack_layout,
+				     mem[fp->k]);
 			break;
 
 		/* Load from stack. */
@@ -1051,7 +1116,10 @@ do_pass:
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
 				      BPF_REG_A : BPF_REG_X;
 			insn->x_reg = BPF_REG_FP;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			insn->off =
+			    -sizeof(struct classic_bpf_stack_layout) +
+			    offsetof(struct classic_bpf_stack_layout,
+				     mem[fp->k]);
 			break;
 
 		/* A = K or X = K */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 107ed12..bf3cb99 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -275,16 +275,20 @@ EXPORT_SYMBOL(__skb_tx_hash);
  * truncate packets without needing to push actual payload to the user
  * space and can analyze headers only, instead.
  */
-u32 __skb_get_poff(const struct sk_buff *skb)
+u32 __skb_get_poff(const struct sk_buff *skb, struct flow_keys *flow,
+		u32 *flow_inited)
 {
-	struct flow_keys keys;
 	u32 poff = 0;
 
-	if (!skb_flow_dissect(skb, &keys))
-		return 0;
+	/* check whether the flow dissector has already been run */
+	if (!*flow_inited) {
+		if (!skb_flow_dissect(skb, flow))
+			return 0;
+		*flow_inited = 1;
+	}
 
-	poff += keys.thoff;
-	switch (keys.ip_proto) {
+	poff += flow->thoff;
+	switch (flow->ip_proto) {
 	case IPPROTO_TCP: {
 		const struct tcphdr *tcph;
 		struct tcphdr _tcph;
-- 
1.9.1.423.g4596e3a

  parent reply	other threads:[~2014-05-29 18:56 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-30 18:29 [PATCH] net: filter: add insn for loading internal transport header offset Chema Gonzalez
2014-04-30 22:21 ` Alexei Starovoitov
2014-05-01 10:53 ` Daniel Borkmann
2014-05-01 10:55   ` Daniel Borkmann
2014-05-01 18:44     ` Chema Gonzalez
2014-05-01 18:44 ` [PATCH net-next v2] " Chema Gonzalez
2014-05-02 16:21   ` Ben Hutchings
2014-05-02 21:49   ` David Miller
2014-05-03  0:53     ` Chema Gonzalez
2014-05-03  2:52       ` David Miller
2014-05-05 18:42         ` Chema Gonzalez
2014-05-05 19:12           ` David Miller
2014-05-14 18:42             ` Chema Gonzalez
2014-05-14 18:51               ` Chema Gonzalez
2014-05-14 18:42 ` [PATCH v2 net-next 1/3] net: flow_dissector: avoid multiple calls in BPF Chema Gonzalez
2014-05-14 18:42   ` [PATCH v2 net-next 2/3] net: filter: add insn for loading internal transport header offset Chema Gonzalez
2014-05-14 18:42   ` [PATCH v2 net-next 3/3] net: filter: add insn for loading internal transport header proto Chema Gonzalez
2014-05-14 18:51 ` [PATCH v3 net-next 1/3] net: flow_dissector: avoid multiple calls in BPF Chema Gonzalez
2014-05-14 20:05   ` Alexei Starovoitov
2014-05-14 21:51     ` Chema Gonzalez
2014-05-14 22:44       ` Alexei Starovoitov
2014-05-16 18:41         ` Chema Gonzalez
2014-05-14 18:51 ` [PATCH v3 net-next 2/3] net: filter: add insn for loading internal transport header offset Chema Gonzalez
2014-05-14 18:51 ` [PATCH v3 net-next 3/3] net: filter: add insn for loading internal transport header proto Chema Gonzalez
2014-05-16 18:41 ` [PATCH v5 net-next 1/3] net: flow_dissector: avoid multiple calls in BPF Chema Gonzalez
2014-05-16 22:00   ` Alexei Starovoitov
2014-05-19 22:23     ` Chema Gonzalez
2014-05-20  9:58       ` Daniel Borkmann
2014-05-16 18:41 ` [PATCH v5 net-next 2/3] net: filter: add insn for loading internal transport header offset Chema Gonzalez
2014-05-16 18:41 ` [PATCH v5 net-next 3/3] net: filter: add insn for loading internal transport header proto Chema Gonzalez
2014-05-29 18:55 ` Chema Gonzalez [this message]
2014-05-29 23:54   ` [PATCH v6 net-next 1/4] net: flow_dissector: avoid multiple calls in eBPF Daniel Borkmann
2014-05-30 17:12     ` Chema Gonzalez
2014-06-02 12:36       ` Daniel Borkmann
2014-06-02 16:48         ` Alexei Starovoitov
2014-06-03  8:33           ` Daniel Borkmann
2014-06-03 20:15             ` Alexei Starovoitov
2014-06-03 21:12               ` Chema Gonzalez
2014-06-04  8:51                 ` Daniel Borkmann
2014-06-05  6:55                   ` David Miller
2014-06-20 21:56                   ` Chema Gonzalez
2014-06-24  8:14                     ` Daniel Borkmann
2014-06-25 22:00                       ` Chema Gonzalez
2014-06-27 10:19                         ` Daniel Borkmann
2014-05-29 18:56 ` [PATCH v6 net-next 2/4] net: filter: add insn for loading internal transport header offset Chema Gonzalez
2014-05-29 18:56 ` [PATCH v6 net-next 3/4] net: filter: add insn for loading internal transport header proto Chema Gonzalez
2014-05-29 18:56 ` [PATCH v6 net-next 4/4] net: filter: minor BPF cleanups Chema Gonzalez

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1401389758-13252-1-git-send-email-chema@google.com \
    --to=chema@google.com \
    --cc=ast@plumgrid.com \
    --cc=davem@davemloft.net \
    --cc=dborkman@redhat.com \
    --cc=edumazet@google.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).