[PATCH bpf-next v2 1/3] selftests/bpf: update tcp_custom_syncookie to use scalar packet offset

From: Eduard Zingerman <eddyz87@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org
Cc: andrii@kernel.org, daniel@iogearbox.net, martin.lau@linux.dev,
	kernel-team@fb.com, yonghong.song@linux.dev, kuniyu@amazon.com,
	Eduard Zingerman <eddyz87@gmail.com>
Subject: [PATCH bpf-next v2 1/3] selftests/bpf: update tcp_custom_syncookie to use scalar packet offset
Date: Fri, 16 Feb 2024 17:03:32 +0200	[thread overview]
Message-ID: <20240216150334.31937-2-eddyz87@gmail.com> (raw)
In-Reply-To: <20240216150334.31937-1-eddyz87@gmail.com>

The next commit in a series fixes bug in bpf_loop() handling.
That change makes tcp_custom_syncookie test too complex to verify.

This commit updates tcp_custom_syncookie.c:tcp_parse_option() to use
explicit packet offset (ctx->off) for packet access instead of ever
moving pointer (ctx->ptr), this reduces verification complexity:
- the tcp_parse_option() is passed as a callback to bpf_loop();
- suppose a checkpoint is created each time at function entry;
- the ctx->ptr is tracked by verifier as PTR_TO_PACKET;
- the ctx->ptr is incremented in tcp_parse_option(),
  thus umax_value field tracked for it is incremented as well;
- on each next iteration of tcp_parse_option()
  checkpoint from a previous iteration can't be reused
  for state pruning, because PTR_TO_PACKET registers are
  considered equivalent only if old->umax_value >= cur->umax_value;
- on the other hand, the ctx->off is a SCALAR,
  subject to widen_imprecise_scalars();
- it's exact bounds are eventually forgotten and it is tracked as
  unknown scalar at entry to tcp_parse_option();
- hence checkpoints created at the start of the function eventually
  converge.

The change is similar to one applied in [0] to xdp_synproxy_kern.c.

[0] commit 977bc146d4eb ("selftests/bpf: track tcp payload offset as scalar in xdp_synproxy")

Acked-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 .../bpf/progs/test_tcp_custom_syncookie.c     | 83 ++++++++++++-------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index a5501b29979a..c8e4553648bf 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -10,6 +10,8 @@
 #include "test_siphash.h"
 #include "test_tcp_custom_syncookie.h"
 
+#define MAX_PACKET_OFF 0xffff
+
 /* Hash is calculated for each client and split into ISN and TS.
  *
  *       MSB                                   LSB
@@ -52,16 +54,15 @@ static siphash_key_t test_key_siphash = {
 
 struct tcp_syncookie {
 	struct __sk_buff *skb;
+	void *data;
 	void *data_end;
 	struct ethhdr *eth;
 	struct iphdr *ipv4;
 	struct ipv6hdr *ipv6;
 	struct tcphdr *tcp;
-	union {
-		char *ptr;
-		__be32 *ptr32;
-	};
+	__be32 *ptr32;
 	struct bpf_tcp_req_attrs attrs;
+	u32 off;
 	u32 cookie;
 	u64 first;
 };
@@ -70,6 +71,7 @@ bool handled_syn, handled_ack;
 
 static int tcp_load_headers(struct tcp_syncookie *ctx)
 {
+	ctx->data = (void *)(long)ctx->skb->data;
 	ctx->data_end = (void *)(long)ctx->skb->data_end;
 	ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
 
@@ -134,6 +136,7 @@ static int tcp_reload_headers(struct tcp_syncookie *ctx)
 	if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0))
 		goto err;
 
+	ctx->data = (void *)(long)ctx->skb->data;
 	ctx->data_end = (void *)(long)ctx->skb->data_end;
 	ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
 	if (ctx->ipv4) {
@@ -195,47 +198,68 @@ static int tcp_validate_header(struct tcp_syncookie *ctx)
 	return -1;
 }
 
-static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz)
 {
-	char opcode, opsize;
+	__u64 off = ctx->off;
+	__u8 *data;
 
-	if (ctx->ptr + 1 > ctx->data_end)
-		goto stop;
+	/* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+	if (off > MAX_PACKET_OFF - sz)
+		return NULL;
+
+	data = ctx->data + off;
+	barrier_var(data);
+	if (data + sz >= ctx->data_end)
+		return NULL;
 
-	opcode = *ctx->ptr++;
+	ctx->off += sz;
+	return data;
+}
 
-	if (opcode == TCPOPT_EOL)
+static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+{
+	__u8 *opcode, *opsize, *wscale;
+	__u32 *tsval, *tsecr;
+	__u16 *mss;
+	__u32 off;
+
+	off = ctx->off;
+	opcode = next(ctx, 1);
+	if (!opcode)
 		goto stop;
 
-	if (opcode == TCPOPT_NOP)
+	if (*opcode == TCPOPT_EOL)
+		goto stop;
+
+	if (*opcode == TCPOPT_NOP)
 		goto next;
 
-	if (ctx->ptr + 1 > ctx->data_end)
+	opsize = next(ctx, 1);
+	if (!opsize)
 		goto stop;
 
-	opsize = *ctx->ptr++;
-
-	if (opsize < 2)
+	if (*opsize < 2)
 		goto stop;
 
-	switch (opcode) {
+	switch (*opcode) {
 	case TCPOPT_MSS:
-		if (opsize == TCPOLEN_MSS && ctx->tcp->syn &&
-		    ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end)
-			ctx->attrs.mss = get_unaligned_be16(ctx->ptr);
+		mss = next(ctx, 2);
+		if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss)
+			ctx->attrs.mss = get_unaligned_be16(mss);
 		break;
 	case TCPOPT_WINDOW:
-		if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn &&
-		    ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) {
+		wscale = next(ctx, 1);
+		if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) {
 			ctx->attrs.wscale_ok = 1;
-			ctx->attrs.snd_wscale = *ctx->ptr;
+			ctx->attrs.snd_wscale = *wscale;
 		}
 		break;
 	case TCPOPT_TIMESTAMP:
-		if (opsize == TCPOLEN_TIMESTAMP &&
-		    ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) {
-			ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr);
-			ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4);
+		tsval = next(ctx, 4);
+		tsecr = next(ctx, 4);
+		if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) {
+			ctx->attrs.rcv_tsval = get_unaligned_be32(tsval);
+			ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr);
 
 			if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
 				ctx->attrs.tstamp_ok = 0;
@@ -244,13 +268,12 @@ static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
 		}
 		break;
 	case TCPOPT_SACK_PERM:
-		if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn &&
-		    ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end)
+		if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn)
 			ctx->attrs.sack_ok = 1;
 		break;
 	}
 
-	ctx->ptr += opsize - 2;
+	ctx->off = off + *opsize;
 next:
 	return 0;
 stop:
@@ -259,7 +282,7 @@ static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
 
 static void tcp_parse_options(struct tcp_syncookie *ctx)
 {
-	ctx->ptr = (char *)(ctx->tcp + 1);
+	ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data,
 
 	bpf_loop(40, tcp_parse_option, ctx, 0);
 }
-- 
2.43.0