netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
@ 2024-04-24  3:00 Brad Cowie
  2024-04-24  3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
  2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
  0 siblings, 2 replies; 7+ messages in thread
From: Brad Cowie @ 2024-04-24  3:00 UTC (permalink / raw)
  To: bpf, martin.lau
  Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
	song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
	netdev, Brad Cowie

Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.

Signed-off-by: Brad Cowie <brad@faucet.nz>
---
v1 -> v2:
  - Make ct zone flags/dir configurable
---
 net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index d2492d050fe6..67f73b57089b 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -21,41 +21,44 @@
 /* bpf_ct_opts - Options for CT lookup helpers
  *
  * Members:
- * @netns_id   - Specify the network namespace for lookup
- *		 Values:
- *		   BPF_F_CURRENT_NETNS (-1)
- *		     Use namespace associated with ctx (xdp_md, __sk_buff)
- *		   [0, S32_MAX]
- *		     Network Namespace ID
- * @error      - Out parameter, set for any errors encountered
- *		 Values:
- *		   -EINVAL - Passed NULL for bpf_tuple pointer
- *		   -EINVAL - opts->reserved is not 0
- *		   -EINVAL - netns_id is less than -1
- *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
- *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
- *		   -ENONET - No network namespace found for netns_id
- *		   -ENOENT - Conntrack lookup could not find entry for tuple
- *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
- *				   or sizeof(tuple->ipv6)
- * @l4proto    - Layer 4 protocol
- *		 Values:
- *		   IPPROTO_TCP, IPPROTO_UDP
- * @dir:       - connection tracking tuple direction.
- * @reserved   - Reserved member, will be reused for more options in future
- *		 Values:
- *		   0
+ * @netns_id	  - Specify the network namespace for lookup
+ *		    Values:
+ *		      BPF_F_CURRENT_NETNS (-1)
+ *		        Use namespace associated with ctx (xdp_md, __sk_buff)
+ *		      [0, S32_MAX]
+ *		        Network Namespace ID
+ * @error	  - Out parameter, set for any errors encountered
+ *		    Values:
+ *		      -EINVAL - Passed NULL for bpf_tuple pointer
+ *		      -EINVAL - netns_id is less than -1
+ *		      -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
+ *			        or NF_BPF_CT_OPTS_OLD_SZ (12)
+ *		      -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *		      -ENONET - No network namespace found for netns_id
+ *		      -ENOENT - Conntrack lookup could not find entry for tuple
+ *		      -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *				      or sizeof(tuple->ipv6)
+ * @l4proto	  - Layer 4 protocol
+ *		    Values:
+ *		      IPPROTO_TCP, IPPROTO_UDP
+ * @dir:	  - connection tracking tuple direction.
+ * @ct_zone_id	  - connection tracking zone id.
+ * @ct_zone_flags - connection tracking zone flags.
+ * @ct_zone_dir   - connection tracking zone direction.
  */
 struct bpf_ct_opts {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
 	u8 dir;
-	u8 reserved[2];
+	u16 ct_zone_id;
+	u8 ct_zone_flags;
+	u8 ct_zone_dir;
 };
 
 enum {
-	NF_BPF_CT_OPTS_SZ = 12,
+	NF_BPF_CT_OPTS_SZ = 16,
+	NF_BPF_CT_OPTS_OLD_SZ = 12,
 };
 
 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
@@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			u32 timeout)
 {
 	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple)
+		return ERR_PTR(-EINVAL);
+	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
 		return ERR_PTR(-EINVAL);
 
 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
@@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			return ERR_PTR(-ENONET);
 	}
 
-	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+	if (opts_len == NF_BPF_CT_OPTS_SZ) {
+		if (opts->ct_zone_dir == 0)
+			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+		nf_ct_zone_init(&ct_zone,
+				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
+	} else {
+		ct_zone = nf_ct_zone_dflt;
+	}
+
+	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
 				GFP_ATOMIC);
 	if (IS_ERR(ct))
 		goto out;
@@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 {
 	struct nf_conntrack_tuple_hash *hash;
 	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple)
+		return ERR_PTR(-EINVAL);
+	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
 		return ERR_PTR(-EINVAL);
 	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
 		return ERR_PTR(-EPROTO);
@@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 			return ERR_PTR(-ENONET);
 	}
 
-	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+	if (opts_len == NF_BPF_CT_OPTS_SZ) {
+		if (opts->ct_zone_dir == 0)
+			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+		nf_ct_zone_init(&ct_zone,
+				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
+	} else {
+		ct_zone = nf_ct_zone_dflt;
+	}
+
+	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
 	if (opts->netns_id >= 0)
 		put_net(net);
 	if (!hash)
@@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
  * @opts	- Additional options for allocation (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn___init *
 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts	- Additional options for lookup (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn *
 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts	- Additional options for allocation (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn___init *
 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts	- Additional options for lookup (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn *
 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs
  2024-04-24  3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
@ 2024-04-24  3:00 ` Brad Cowie
  2024-04-25 23:34   ` Martin KaFai Lau
  2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
  1 sibling, 1 reply; 7+ messages in thread
From: Brad Cowie @ 2024-04-24  3:00 UTC (permalink / raw)
  To: bpf, martin.lau
  Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
	song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
	netdev, Brad Cowie

Remove test for reserved options in bpf_ct_opts,
which have been removed.

Add test for allocating and looking up ct entry in a
non-default ct zone with kfuncs bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.

Add negative test for looking up ct entry in a different
ct zone to where it was allocated.

Signed-off-by: Brad Cowie <brad@faucet.nz>
---
v1 -> v2:
  - Separate test changes into different patch
  - Add test for allocating/looking up entry in non-default ct zone
---
 tools/testing/selftests/bpf/config            |  1 +
 .../testing/selftests/bpf/prog_tests/bpf_nf.c |  6 +-
 .../testing/selftests/bpf/progs/test_bpf_nf.c | 88 ++++++++++++++++---
 3 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index afd675b1bf80..8d4110fe8d26 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -75,6 +75,7 @@ CONFIG_NETFILTER_XT_TARGET_CT=y
 CONFIG_NETKIT=y
 CONFIG_NF_CONNTRACK=y
 CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
 CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_NAT=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index b30ff6b3b81a..853f694af6d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
 		goto end;
 
 	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
-	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
 	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
 	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
 	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
@@ -122,6 +121,11 @@ static void test_bpf_nf_ct(int mode)
 	ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
 	ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
 	ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
+	ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone");
+	ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone");
+	ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone");
+	ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone");
+
 end:
 	if (client_fd != -1)
 		close(client_fd);
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 77ad8adf68da..b47fa0708f1e 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -12,7 +12,6 @@
 extern unsigned long CONFIG_HZ __kconfig;
 
 int test_einval_bpf_tuple = 0;
-int test_einval_reserved = 0;
 int test_einval_netns_id = 0;
 int test_einval_len_opts = 0;
 int test_eproto_l4proto = 0;
@@ -22,6 +21,10 @@ int test_eafnosupport = 0;
 int test_alloc_entry = -EINVAL;
 int test_insert_entry = -EAFNOSUPPORT;
 int test_succ_lookup = -ENOENT;
+int test_ct_zone_id_alloc_entry = -EINVAL;
+int test_ct_zone_id_insert_entry = -EAFNOSUPPORT;
+int test_ct_zone_id_succ_lookup = -ENOENT;
+int test_ct_zone_id_enoent_lookup = 0;
 u32 test_delta_timeout = 0;
 u32 test_status = 0;
 u32 test_insert_lookup_mark = 0;
@@ -45,7 +48,10 @@ struct bpf_ct_opts___local {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
-	u8 reserved[3];
+	u8 dir;
+	u16 ct_zone_id;
+	u8 ct_zone_flags;
+	u8 ct_zone_dir;
 } __attribute__((preserve_access_index));
 
 struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
@@ -84,16 +90,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 	else
 		test_einval_bpf_tuple = opts_def.error;
 
-	opts_def.reserved[0] = 1;
-	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
-		       sizeof(opts_def));
-	opts_def.reserved[0] = 0;
-	opts_def.l4proto = IPPROTO_TCP;
-	if (ct)
-		bpf_ct_release(ct);
-	else
-		test_einval_reserved = opts_def.error;
-
 	opts_def.netns_id = -2;
 	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
 		       sizeof(opts_def));
@@ -220,10 +216,77 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 	}
 }
 
+static __always_inline void
+nf_ct_zone_id_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+						struct bpf_ct_opts___local *, u32),
+		   struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+					       struct bpf_ct_opts___local *, u32),
+		   void *ctx)
+{
+	struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+	struct bpf_sock_tuple bpf_tuple;
+	struct nf_conn *ct;
+
+	__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+	bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+	bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+	bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+	bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+	/* use non-default ct zone */
+	opts_def.ct_zone_id = 10;
+	ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+		      sizeof(opts_def));
+	if (ct) {
+		__u16 sport = bpf_get_prandom_u32();
+		__u16 dport = bpf_get_prandom_u32();
+		union nf_inet_addr saddr = {};
+		union nf_inet_addr daddr = {};
+		struct nf_conn *ct_ins;
+
+		bpf_ct_set_timeout(ct, 10000);
+
+		/* snat */
+		saddr.ip = bpf_get_prandom_u32();
+		bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
+		/* dnat */
+		daddr.ip = bpf_get_prandom_u32();
+		bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
+
+		ct_ins = bpf_ct_insert_entry(ct);
+		if (ct_ins) {
+			struct nf_conn *ct_lk;
+
+			/* entry should exist in same ct zone we inserted it */
+			ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+					  &opts_def, sizeof(opts_def));
+			if (ct_lk) {
+				bpf_ct_release(ct_lk);
+				test_ct_zone_id_succ_lookup = 0;
+			}
+
+			/* entry should not exist in default ct zone */
+			opts_def.ct_zone_id = 0;
+			ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+					  &opts_def, sizeof(opts_def));
+			if (ct_lk)
+				bpf_ct_release(ct_lk);
+			else
+				test_ct_zone_id_enoent_lookup = opts_def.error;
+
+			bpf_ct_release(ct_ins);
+			test_ct_zone_id_insert_entry = 0;
+		}
+		test_ct_zone_id_alloc_entry = 0;
+	}
+}
+
 SEC("xdp")
 int nf_xdp_ct_test(struct xdp_md *ctx)
 {
 	nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
+	nf_ct_zone_id_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
 	return 0;
 }
 
@@ -231,6 +294,7 @@ SEC("tc")
 int nf_skb_ct_test(struct __sk_buff *ctx)
 {
 	nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
+	nf_ct_zone_id_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
 	return 0;
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
  2024-04-24  3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
  2024-04-24  3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
@ 2024-04-25 23:27 ` Martin KaFai Lau
  2024-05-01  4:59   ` Brad Cowie
  1 sibling, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2024-04-25 23:27 UTC (permalink / raw)
  To: Brad Cowie
  Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
	song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam, bpf,
	netdev

On 4/23/24 8:00 PM, Brad Cowie wrote:
> Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
> can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
> 
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
> v1 -> v2:
>    - Make ct zone flags/dir configurable
> ---
>   net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
>   1 file changed, 61 insertions(+), 36 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..67f73b57089b 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -21,41 +21,44 @@
>   /* bpf_ct_opts - Options for CT lookup helpers
>    *
>    * Members:
> - * @netns_id   - Specify the network namespace for lookup
> - *		 Values:
> - *		   BPF_F_CURRENT_NETNS (-1)
> - *		     Use namespace associated with ctx (xdp_md, __sk_buff)
> - *		   [0, S32_MAX]
> - *		     Network Namespace ID
> - * @error      - Out parameter, set for any errors encountered
> - *		 Values:
> - *		   -EINVAL - Passed NULL for bpf_tuple pointer
> - *		   -EINVAL - opts->reserved is not 0
> - *		   -EINVAL - netns_id is less than -1
> - *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
> - *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> - *		   -ENONET - No network namespace found for netns_id
> - *		   -ENOENT - Conntrack lookup could not find entry for tuple
> - *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> - *				   or sizeof(tuple->ipv6)
> - * @l4proto    - Layer 4 protocol
> - *		 Values:
> - *		   IPPROTO_TCP, IPPROTO_UDP
> - * @dir:       - connection tracking tuple direction.
> - * @reserved   - Reserved member, will be reused for more options in future
> - *		 Values:
> - *		   0
> + * @netns_id	  - Specify the network namespace for lookup
> + *		    Values:
> + *		      BPF_F_CURRENT_NETNS (-1)
> + *		        Use namespace associated with ctx (xdp_md, __sk_buff)
> + *		      [0, S32_MAX]
> + *		        Network Namespace ID
> + * @error	  - Out parameter, set for any errors encountered
> + *		    Values:
> + *		      -EINVAL - Passed NULL for bpf_tuple pointer
> + *		      -EINVAL - netns_id is less than -1
> + *		      -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
> + *			        or NF_BPF_CT_OPTS_OLD_SZ (12)
> + *		      -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> + *		      -ENONET - No network namespace found for netns_id
> + *		      -ENOENT - Conntrack lookup could not find entry for tuple
> + *		      -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> + *				      or sizeof(tuple->ipv6)
> + * @l4proto	  - Layer 4 protocol
> + *		    Values:
> + *		      IPPROTO_TCP, IPPROTO_UDP
> + * @dir:	  - connection tracking tuple direction.

Please avoid whitespace changes. It is unnecessary code churns.

> + * @ct_zone_id	  - connection tracking zone id.
> + * @ct_zone_flags - connection tracking zone flags.
> + * @ct_zone_dir   - connection tracking zone direction.
>    */
>   struct bpf_ct_opts {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
>   	u8 dir;
> -	u8 reserved[2];
> +	u16 ct_zone_id;
> +	u8 ct_zone_flags;
> +	u8 ct_zone_dir;

The size is 16 now with 2 tail paddings.
It needs a "u8 reserved[2];" at the end and need to check its 0.


>   };
>   
>   enum {
> -	NF_BPF_CT_OPTS_SZ = 12,
> +	NF_BPF_CT_OPTS_SZ = 16,
> +	NF_BPF_CT_OPTS_OLD_SZ = 12,

Avoid adding NF_BPF_CT_OPTS_OLD_SZ for now. I don't see how it may be useful.

>   };
>   
>   static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>   			u32 timeout)
>   {
>   	struct nf_conntrack_tuple otuple, rtuple;
> +	struct nf_conntrack_zone ct_zone;
>   	struct nf_conn *ct;
>   	int err;
>   
> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> -	    opts_len != NF_BPF_CT_OPTS_SZ)
> +	if (!opts || !bpf_tuple)
> +		return ERR_PTR(-EINVAL);
> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>   		return ERR_PTR(-EINVAL);
>   
>   	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>   			return ERR_PTR(-ENONET);
>   	}
>   
> -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> +		if (opts->ct_zone_dir == 0)

I don't know the details about the dir in ct_zone, so a question: a 0 
ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?

> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> +		nf_ct_zone_init(&ct_zone,
> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> +	} else {

Better enforce "ct_zone_id == 0" also instead of ignoring it.

> +		ct_zone = nf_ct_zone_dflt;
> +	}
> +
> +	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
>   				GFP_ATOMIC);
>   	if (IS_ERR(ct))
>   		goto out;
> @@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>   {
>   	struct nf_conntrack_tuple_hash *hash;
>   	struct nf_conntrack_tuple tuple;
> +	struct nf_conntrack_zone ct_zone;
>   	struct nf_conn *ct;
>   	int err;
>   
> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> -	    opts_len != NF_BPF_CT_OPTS_SZ)
> +	if (!opts || !bpf_tuple)
> +		return ERR_PTR(-EINVAL);
> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>   		return ERR_PTR(-EINVAL);
>   	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
>   		return ERR_PTR(-EPROTO);
> @@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>   			return ERR_PTR(-ENONET);
>   	}
>   
> -	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> +		if (opts->ct_zone_dir == 0)
> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> +		nf_ct_zone_init(&ct_zone,
> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> +	} else {
> +		ct_zone = nf_ct_zone_dflt;
> +	}
> +
> +	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
>   	if (opts->netns_id >= 0)
>   		put_net(net);
>   	if (!hash)
> @@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
>    * @opts	- Additional options for allocation (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn___init *
>   bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for lookup (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)

Either 16 or 12. Same for below.

>    */
>   __bpf_kfunc struct nf_conn *
>   bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for allocation (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn___init *
>   bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for lookup (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn *
>   bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs
  2024-04-24  3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
@ 2024-04-25 23:34   ` Martin KaFai Lau
  2024-05-01  5:03     ` Brad Cowie
  0 siblings, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2024-04-25 23:34 UTC (permalink / raw)
  To: Brad Cowie
  Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
	song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
	netdev, bpf

On 4/23/24 8:00 PM, Brad Cowie wrote:
> Remove test for reserved options in bpf_ct_opts,
> which have been removed.
> 
> Add test for allocating and looking up ct entry in a
> non-default ct zone with kfuncs bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
> 
> Add negative test for looking up ct entry in a different
> ct zone to where it was allocated.
> 
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
> v1 -> v2:
>    - Separate test changes into different patch
>    - Add test for allocating/looking up entry in non-default ct zone
> ---
>   tools/testing/selftests/bpf/config            |  1 +
>   .../testing/selftests/bpf/prog_tests/bpf_nf.c |  6 +-
>   .../testing/selftests/bpf/progs/test_bpf_nf.c | 88 ++++++++++++++++---
>   3 files changed, 82 insertions(+), 13 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
> index afd675b1bf80..8d4110fe8d26 100644
> --- a/tools/testing/selftests/bpf/config
> +++ b/tools/testing/selftests/bpf/config
> @@ -75,6 +75,7 @@ CONFIG_NETFILTER_XT_TARGET_CT=y
>   CONFIG_NETKIT=y
>   CONFIG_NF_CONNTRACK=y
>   CONFIG_NF_CONNTRACK_MARK=y
> +CONFIG_NF_CONNTRACK_ZONES=y
>   CONFIG_NF_DEFRAG_IPV4=y
>   CONFIG_NF_DEFRAG_IPV6=y
>   CONFIG_NF_NAT=y
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> index b30ff6b3b81a..853f694af6d4 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> @@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
>   		goto end;
>   
>   	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
> -	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
>   	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
>   	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
>   	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
> @@ -122,6 +121,11 @@ static void test_bpf_nf_ct(int mode)
>   	ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
>   	ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
>   	ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
> +	ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone");
> +	ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone");
> +	ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone");
> +	ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone");
> +
>   end:
>   	if (client_fd != -1)
>   		close(client_fd);
> diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> index 77ad8adf68da..b47fa0708f1e 100644
> --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> @@ -12,7 +12,6 @@
>   extern unsigned long CONFIG_HZ __kconfig;
>   
>   int test_einval_bpf_tuple = 0;
> -int test_einval_reserved = 0;
>   int test_einval_netns_id = 0;
>   int test_einval_len_opts = 0;
>   int test_eproto_l4proto = 0;
> @@ -22,6 +21,10 @@ int test_eafnosupport = 0;
>   int test_alloc_entry = -EINVAL;
>   int test_insert_entry = -EAFNOSUPPORT;
>   int test_succ_lookup = -ENOENT;
> +int test_ct_zone_id_alloc_entry = -EINVAL;
> +int test_ct_zone_id_insert_entry = -EAFNOSUPPORT;
> +int test_ct_zone_id_succ_lookup = -ENOENT;
> +int test_ct_zone_id_enoent_lookup = 0;
>   u32 test_delta_timeout = 0;
>   u32 test_status = 0;
>   u32 test_insert_lookup_mark = 0;
> @@ -45,7 +48,10 @@ struct bpf_ct_opts___local {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
> -	u8 reserved[3];
> +	u8 dir;
> +	u16 ct_zone_id;
> +	u8 ct_zone_flags;
> +	u8 ct_zone_dir;

Create a new struct instead of modifying the existing one such that the existing 
test can test the size 12 still works.

The new one can use the ___new suffix like "struct bpf_ct_opts___new".

>   } __attribute__((preserve_access_index));
>   
>   struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
> @@ -84,16 +90,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
>   	else
>   		test_einval_bpf_tuple = opts_def.error;
>   
> -	opts_def.reserved[0] = 1;
> -	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> -		       sizeof(opts_def));
> -	opts_def.reserved[0] = 0;
> -	opts_def.l4proto = IPPROTO_TCP;
> -	if (ct)
> -		bpf_ct_release(ct);
> -	else
> -		test_einval_reserved = opts_def.error;
> -
>   	opts_def.netns_id = -2;
>   	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
>   		       sizeof(opts_def));
> @@ -220,10 +216,77 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
>   	}
>   }
>   
> +static __always_inline void
> +nf_ct_zone_id_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> +						struct bpf_ct_opts___local *, u32),
> +		   struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
> +					       struct bpf_ct_opts___local *, u32),
> +		   void *ctx)
> +{
> +	struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
> +	struct bpf_sock_tuple bpf_tuple;
> +	struct nf_conn *ct;
> +
> +	__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
> +
> +	bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
> +	bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
> +	bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
> +	bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
> +
> +	/* use non-default ct zone */
> +	opts_def.ct_zone_id = 10;

Can the ct_zone_flags and ct_zone_dir be tested also?

pw-bot: cr

> +	ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> +		      sizeof(opts_def));
> +	if (ct) {
> +		__u16 sport = bpf_get_prandom_u32();
> +		__u16 dport = bpf_get_prandom_u32();
> +		union nf_inet_addr saddr = {};
> +		union nf_inet_addr daddr = {};
> +		struct nf_conn *ct_ins;
> +
> +		bpf_ct_set_timeout(ct, 10000);
> +
> +		/* snat */
> +		saddr.ip = bpf_get_prandom_u32();
> +		bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
> +		/* dnat */
> +		daddr.ip = bpf_get_prandom_u32();
> +		bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
> +
> +		ct_ins = bpf_ct_insert_entry(ct);
> +		if (ct_ins) {
> +			struct nf_conn *ct_lk;
> +
> +			/* entry should exist in same ct zone we inserted it */
> +			ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
> +					  &opts_def, sizeof(opts_def));
> +			if (ct_lk) {
> +				bpf_ct_release(ct_lk);
> +				test_ct_zone_id_succ_lookup = 0;
> +			}
> +
> +			/* entry should not exist in default ct zone */
> +			opts_def.ct_zone_id = 0;
> +			ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
> +					  &opts_def, sizeof(opts_def));
> +			if (ct_lk)
> +				bpf_ct_release(ct_lk);
> +			else
> +				test_ct_zone_id_enoent_lookup = opts_def.error;
> +
> +			bpf_ct_release(ct_ins);
> +			test_ct_zone_id_insert_entry = 0;
> +		}
> +		test_ct_zone_id_alloc_entry = 0;
> +	}
> +}
> +
>   SEC("xdp")
>   int nf_xdp_ct_test(struct xdp_md *ctx)
>   {
>   	nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
> +	nf_ct_zone_id_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
>   	return 0;
>   }
>   
> @@ -231,6 +294,7 @@ SEC("tc")
>   int nf_skb_ct_test(struct __sk_buff *ctx)
>   {
>   	nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
> +	nf_ct_zone_id_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
>   	return 0;
>   }
>   


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
  2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
@ 2024-05-01  4:59   ` Brad Cowie
  2024-05-01 22:11     ` Martin KaFai Lau
  0 siblings, 1 reply; 7+ messages in thread
From: Brad Cowie @ 2024-05-01  4:59 UTC (permalink / raw)
  To: martin.lau
  Cc: andrii, ast, bpf, brad, coreteam, daniel, davem, john.fastabend,
	jolsa, kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni,
	pablo, sdf, song

On Fri, 26 Apr 2024 at 11:27, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> On 4/23/24 8:00 PM, Brad Cowie wrote:
> >   };
> >
> >   static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> > @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> >   			u32 timeout)
> >   {
> >   	struct nf_conntrack_tuple otuple, rtuple;
> > +	struct nf_conntrack_zone ct_zone;
> >   	struct nf_conn *ct;
> >   	int err;
> >
> > -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> > -	    opts_len != NF_BPF_CT_OPTS_SZ)
> > +	if (!opts || !bpf_tuple)
> > +		return ERR_PTR(-EINVAL);
> > +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> >   		return ERR_PTR(-EINVAL);
> >
> >   	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> > @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> >   			return ERR_PTR(-ENONET);
> >   	}
> >
> > -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> > +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> > +		if (opts->ct_zone_dir == 0)
>
> I don't know the details about the dir in ct_zone, so a question: a 0 
> ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?

ct_zone_dir is a bitmask that can have two different bits set,
NF_CT_ZONE_DIR_ORIG (1) and NF_CT_ZONE_DIR_REPL (2).

The comparison function nf_ct_zone_matches_dir() in nf_conntrack_zones.h
checks if ct_zone_dir & (1 << ip_conntrack_dir dir). ip_conntrack_dir
has two possible values IP_CT_DIR_ORIGINAL (0) and IP_CT_DIR_REPLY (1).

If ct_zone_dir has a value of 0, this makes nf_ct_zone_matches_dir()
always return false which makes nf_ct_zone_id() always return
NF_CT_DEFAULT_ZONE_ID instead of the specified ct zone id.

I chose to override ct_zone_dir here and set NF_CT_DEFAULT_ZONE_DIR (3),
to make the behaviour more obvious when a user calls the bpf ct helper
kfuncs while only setting ct_zone_id but not ct_zone_dir.

> > +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> > +		nf_ct_zone_init(&ct_zone,
> > +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> > +	} else {
>
> Better enforce "ct_zone_id == 0" also instead of ignoring it.

Could I ask for clarification here, do you mean changing this
else statement to:

+	} else if (opts->ct_zone_id == 0) {

Or should I be setting opts->ct_zone_id = 0 inside the else block?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs
  2024-04-25 23:34   ` Martin KaFai Lau
@ 2024-05-01  5:03     ` Brad Cowie
  0 siblings, 0 replies; 7+ messages in thread
From: Brad Cowie @ 2024-05-01  5:03 UTC (permalink / raw)
  To: martin.lau
  Cc: andrii, ast, bpf, brad, coreteam, daniel, davem, john.fastabend,
	jolsa, kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni,
	pablo, sdf, song

On Fri, 26 Apr 2024 at 11:34, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> On 4/23/24 8:00 PM, Brad Cowie wrote:
> >   } __attribute__((preserve_access_index));
> >   
> >   struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
> > @@ -84,16 +90,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> >   	else
> >   		test_einval_bpf_tuple = opts_def.error;
> >   
> > -	opts_def.reserved[0] = 1;
> > -	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> > -		       sizeof(opts_def));
> > -	opts_def.reserved[0] = 0;
> > -	opts_def.l4proto = IPPROTO_TCP;
> > -	if (ct)
> > -		bpf_ct_release(ct);
> > -	else
> > -		test_einval_reserved = opts_def.error;
> > -
> >   	opts_def.netns_id = -2;
> >   	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> >   		       sizeof(opts_def));
> > @@ -220,10 +216,77 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> >   	}
> >   }
> >   
> > +static __always_inline void
> > +nf_ct_zone_id_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> > +						struct bpf_ct_opts___local *, u32),
> > +		   struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
> > +					       struct bpf_ct_opts___local *, u32),
> > +		   void *ctx)
> > +{
> > +	struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
> > +	struct bpf_sock_tuple bpf_tuple;
> > +	struct nf_conn *ct;
> > +
> > +	__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
> > +
> > +	bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
> > +	bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
> > +	bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
> > +	bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
> > +
> > +	/* use non-default ct zone */
> > +	opts_def.ct_zone_id = 10;
>
> Can the ct_zone_flags and ct_zone_dir be tested also?

I have added an additional test for ct_zone_dir, this will be included
in my v3 patchset.

While writing a test for ct_zone_flags, I realised this option is not
used for the conntrack functions that the bpf ct helper functions call,
nf_conntrack_alloc() and nf_conntrack_find_get(), it is only used by
nf_conntrack_in(), so I will remove ct_zone_flags from my v3 patchset.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
  2024-05-01  4:59   ` Brad Cowie
@ 2024-05-01 22:11     ` Martin KaFai Lau
  0 siblings, 0 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2024-05-01 22:11 UTC (permalink / raw)
  To: Brad Cowie
  Cc: andrii, ast, bpf, coreteam, daniel, davem, john.fastabend, jolsa,
	kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni, pablo,
	sdf, song

On 4/30/24 9:59 PM, Brad Cowie wrote:
> On Fri, 26 Apr 2024 at 11:27, Martin KaFai Lau <martin.lau@linux.dev> wrote:
>> On 4/23/24 8:00 PM, Brad Cowie wrote:
>>>    };
>>>
>>>    static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
>>> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>>>    			u32 timeout)
>>>    {
>>>    	struct nf_conntrack_tuple otuple, rtuple;
>>> +	struct nf_conntrack_zone ct_zone;
>>>    	struct nf_conn *ct;
>>>    	int err;
>>>
>>> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
>>> -	    opts_len != NF_BPF_CT_OPTS_SZ)
>>> +	if (!opts || !bpf_tuple)
>>> +		return ERR_PTR(-EINVAL);
>>> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>>>    		return ERR_PTR(-EINVAL);
>>>
>>>    	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
>>> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>>>    			return ERR_PTR(-ENONET);
>>>    	}
>>>
>>> -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
>>> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
>>> +		if (opts->ct_zone_dir == 0)
>>
>> I don't know the details about the dir in ct_zone, so a question: a 0
>> ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?
> 
> ct_zone_dir is a bitmask that can have two different bits set,
> NF_CT_ZONE_DIR_ORIG (1) and NF_CT_ZONE_DIR_REPL (2).
> 
> The comparison function nf_ct_zone_matches_dir() in nf_conntrack_zones.h
> checks if ct_zone_dir & (1 << ip_conntrack_dir dir). ip_conntrack_dir
> has two possible values IP_CT_DIR_ORIGINAL (0) and IP_CT_DIR_REPLY (1).
> 
> If ct_zone_dir has a value of 0, this makes nf_ct_zone_matches_dir()
> always return false which makes nf_ct_zone_id() always return
> NF_CT_DEFAULT_ZONE_ID instead of the specified ct zone id.
> 
> I chose to override ct_zone_dir here and set NF_CT_DEFAULT_ZONE_DIR (3),
> to make the behaviour more obvious when a user calls the bpf ct helper
> kfuncs while only setting ct_zone_id but not ct_zone_dir.

Ok. make sense.

> 
>>> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
>>> +		nf_ct_zone_init(&ct_zone,
>>> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
>>> +	} else {
>>
>> Better enforce "ct_zone_id == 0" also instead of ignoring it.
> 
> Could I ask for clarification here, do you mean changing this
> else statement to:
> 
> +	} else if (opts->ct_zone_id == 0) {
> 
> Or should I be setting opts->ct_zone_id = 0 inside the else block?

Testing non zero inside the else and return error instead of silently ignoring 
it and then use nf_ct_zone_dflt:

	} else {
		if (opts->ct_zone_id)
			/* don't ignore the opts->ct_zone_id */
			return ERR_PTR(-EINVAL);
		
		ct_zone = nf_ct_zone_dflt;
	}



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-05-01 22:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-24  3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
2024-04-24  3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
2024-04-25 23:34   ` Martin KaFai Lau
2024-05-01  5:03     ` Brad Cowie
2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
2024-05-01  4:59   ` Brad Cowie
2024-05-01 22:11     ` Martin KaFai Lau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).