All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions
@ 2024-03-29  4:14 Brad Cowie
  2024-04-05 20:01 ` Martin KaFai Lau
  0 siblings, 1 reply; 4+ messages in thread
From: Brad Cowie @ 2024-03-29  4:14 UTC (permalink / raw)
  To: bpf
  Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
	martin.lau, song, john.fastabend, sdf, jolsa, netfilter-devel,
	coreteam, netdev, Brad Cowie

Add ct zone id to bpf_ct_opts so that arbitrary ct zone can be
set for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.

Signed-off-by: Brad Cowie <brad@faucet.nz>
---
 net/netfilter/nf_conntrack_bpf.c              | 23 ++++++++++---------
 .../testing/selftests/bpf/prog_tests/bpf_nf.c |  1 -
 .../testing/selftests/bpf/progs/test_bpf_nf.c | 13 ++---------
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index d2492d050fe6..a0f8a64751ec 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -30,7 +30,6 @@
  * @error      - Out parameter, set for any errors encountered
  *		 Values:
  *		   -EINVAL - Passed NULL for bpf_tuple pointer
- *		   -EINVAL - opts->reserved is not 0
  *		   -EINVAL - netns_id is less than -1
  *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
  *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
@@ -42,16 +41,14 @@
  *		 Values:
  *		   IPPROTO_TCP, IPPROTO_UDP
  * @dir:       - connection tracking tuple direction.
- * @reserved   - Reserved member, will be reused for more options in future
- *		 Values:
- *		   0
+ * @ct_zone    - connection tracking zone id.
  */
 struct bpf_ct_opts {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
 	u8 dir;
-	u8 reserved[2];
+	u16 ct_zone;
 };
 
 enum {
@@ -104,11 +101,11 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			u32 timeout)
 {
 	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple || opts_len != NF_BPF_CT_OPTS_SZ)
 		return ERR_PTR(-EINVAL);
 
 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
@@ -130,7 +127,9 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			return ERR_PTR(-ENONET);
 	}
 
-	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+	nf_ct_zone_init(&ct_zone, opts->ct_zone, NF_CT_DEFAULT_ZONE_DIR, 0);
+
+	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
 				GFP_ATOMIC);
 	if (IS_ERR(ct))
 		goto out;
@@ -152,11 +151,11 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 {
 	struct nf_conntrack_tuple_hash *hash;
 	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple || opts_len != NF_BPF_CT_OPTS_SZ)
 		return ERR_PTR(-EINVAL);
 	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
 		return ERR_PTR(-EPROTO);
@@ -174,7 +173,9 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 			return ERR_PTR(-ENONET);
 	}
 
-	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+	nf_ct_zone_init(&ct_zone, opts->ct_zone, NF_CT_DEFAULT_ZONE_DIR, 0);
+
+	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
 	if (opts->netns_id >= 0)
 		put_net(net);
 	if (!hash)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index b30ff6b3b81a..25c3c4e87ed5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
 		goto end;
 
 	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
-	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
 	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
 	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
 	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 77ad8adf68da..4adb73bc1b33 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -45,7 +45,8 @@ struct bpf_ct_opts___local {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
-	u8 reserved[3];
+	u8 dir;
+	u16 ct_zone;
 } __attribute__((preserve_access_index));
 
 struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
@@ -84,16 +85,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 	else
 		test_einval_bpf_tuple = opts_def.error;
 
-	opts_def.reserved[0] = 1;
-	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
-		       sizeof(opts_def));
-	opts_def.reserved[0] = 0;
-	opts_def.l4proto = IPPROTO_TCP;
-	if (ct)
-		bpf_ct_release(ct);
-	else
-		test_einval_reserved = opts_def.error;
-
 	opts_def.netns_id = -2;
 	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
 		       sizeof(opts_def));
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions
  2024-03-29  4:14 [PATCH bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions Brad Cowie
@ 2024-04-05 20:01 ` Martin KaFai Lau
  2024-04-11  2:29   ` Brad Cowie
  0 siblings, 1 reply; 4+ messages in thread
From: Martin KaFai Lau @ 2024-04-05 20:01 UTC (permalink / raw)
  To: Brad Cowie
  Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
	song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
	netdev, bpf

On 3/28/24 9:14 PM, Brad Cowie wrote:
> Add ct zone id to bpf_ct_opts so that arbitrary ct zone can be
> set for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
> 
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
>   net/netfilter/nf_conntrack_bpf.c              | 23 ++++++++++---------
>   .../testing/selftests/bpf/prog_tests/bpf_nf.c |  1 -
>   .../testing/selftests/bpf/progs/test_bpf_nf.c | 13 ++---------
>   3 files changed, 14 insertions(+), 23 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..a0f8a64751ec 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -30,7 +30,6 @@
>    * @error      - Out parameter, set for any errors encountered
>    *		 Values:
>    *		   -EINVAL - Passed NULL for bpf_tuple pointer
> - *		   -EINVAL - opts->reserved is not 0
>    *		   -EINVAL - netns_id is less than -1
>    *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
>    *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> @@ -42,16 +41,14 @@
>    *		 Values:
>    *		   IPPROTO_TCP, IPPROTO_UDP
>    * @dir:       - connection tracking tuple direction.
> - * @reserved   - Reserved member, will be reused for more options in future
> - *		 Values:
> - *		   0
> + * @ct_zone    - connection tracking zone id.
>    */
>   struct bpf_ct_opts {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
>   	u8 dir;
> -	u8 reserved[2];
> +	u16 ct_zone;

How about the other fields (flags and dir) in the "struct nf_conntrack_zone" and 
would it be useful to have values other than the default?

[ ... ]

> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> index b30ff6b3b81a..25c3c4e87ed5 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> @@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
>   		goto end;
>   
>   	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
> -	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
>   	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
>   	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
>   	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
> diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> index 77ad8adf68da..4adb73bc1b33 100644
> --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> @@ -45,7 +45,8 @@ struct bpf_ct_opts___local {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
> -	u8 reserved[3];
> +	u8 dir;
> +	u16 ct_zone;
>   } __attribute__((preserve_access_index));
>   
>   struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
> @@ -84,16 +85,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
>   	else
>   		test_einval_bpf_tuple = opts_def.error;
>   
> -	opts_def.reserved[0] = 1;
> -	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> -		       sizeof(opts_def));
> -	opts_def.reserved[0] = 0;
> -	opts_def.l4proto = IPPROTO_TCP;
> -	if (ct)
> -		bpf_ct_release(ct);
> -	else
> -		test_einval_reserved = opts_def.error;
> -

Can it actually test an alloc and lookup of a non default zone id?

Please also separate the selftest into another patch.

pw-bot: cr

>   	opts_def.netns_id = -2;
>   	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
>   		       sizeof(opts_def));


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions
  2024-04-05 20:01 ` Martin KaFai Lau
@ 2024-04-11  2:29   ` Brad Cowie
  2024-04-12  0:45     ` Martin KaFai Lau
  0 siblings, 1 reply; 4+ messages in thread
From: Brad Cowie @ 2024-04-11  2:29 UTC (permalink / raw)
  To: martin.lau
  Cc: andrii, ast, bpf, brad, coreteam, daniel, davem, john.fastabend,
	jolsa, kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni,
	pablo, sdf, song

On Sat, 6 Apr 2024 at 09:01, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> How about the other fields (flags and dir) in the "struct nf_conntrack_zone" and
> would it be useful to have values other than the default?

Good question, it would probably be useful to make these configurable
as well. My reason for only adding ct zone id was to avoid changing
the size of bpf_ct_opts (NF_BPF_CT_OPTS_SZ).

I would be interested in some opinions here on if it's acceptable to
increase the size of bpf_ct_opts, if so, should I also add back some
reserved options to the struct for future use?

> Can it actually test an alloc and lookup of a non default zone id?

Yes, I have a test written now and will include this in my v2 submission.

> Please also separate the selftest into another patch.

Will do.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions
  2024-04-11  2:29   ` Brad Cowie
@ 2024-04-12  0:45     ` Martin KaFai Lau
  0 siblings, 0 replies; 4+ messages in thread
From: Martin KaFai Lau @ 2024-04-12  0:45 UTC (permalink / raw)
  To: Brad Cowie
  Cc: andrii, ast, bpf, coreteam, daniel, davem, john.fastabend, jolsa,
	kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni, pablo,
	sdf, song

On 4/10/24 7:29 PM, Brad Cowie wrote:
> On Sat, 6 Apr 2024 at 09:01, Martin KaFai Lau <martin.lau@linux.dev> wrote:
>> How about the other fields (flags and dir) in the "struct nf_conntrack_zone" and
>> would it be useful to have values other than the default?
> 
> Good question, it would probably be useful to make these configurable
> as well. My reason for only adding ct zone id was to avoid changing
> the size of bpf_ct_opts (NF_BPF_CT_OPTS_SZ).
> 
> I would be interested in some opinions here on if it's acceptable to
> increase the size of bpf_ct_opts, if so, should I also add back some
> reserved options to the struct for future use?

I think the reserved[2] was there for the padding reason.

It should be the first time there is a __sz increase. May be worth to explore 
how it should work.

The opts_len check will need to check == old_size or == new_size. Only use the 
new fields if it is new_size.

There is

enum {
         NF_BPF_CT_OPTS_SZ = 12,
};

This enum probably needs to update with the new size also. NF_BPF_CT_OPTS_SZ 
should be under CO-RE and its enum value will be updated with the running kernel.

The bpf prog has its own struct bpf_ct_opts during compilation (from vmlinux.h 
or defined a local one), so may be the bpf prog can do something like this:

#include "vmlinux.h"

struct bpf_ct_opts___newer {
	s32 netns_id;
	s32 error;
	u8 l4proto;
	u8 dir;
	u8 reserved[2];
	u32 new_field; /* for example */
} __attribute__((preserve_access_index));

SEC("tc")
int run_in_older_kernel(struct __sk_buff *ctx)
{
	struct bpf_ct_opts___newer opts = {};

	/* min of the running kernel opts size or the
	 * local ___newer opts size
	 */
	bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts,
			  min(NF_BPF_CT_OPTS_SZ, sizeof(opts));
}


> 
>> Can it actually test an alloc and lookup of a non default zone id?
> 
> Yes, I have a test written now and will include this in my v2 submission.
> 
>> Please also separate the selftest into another patch.
> 
> Will do.
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-04-12  0:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-29  4:14 [PATCH bpf-next] net: netfilter: Make ct zone id configurable for bpf ct helper functions Brad Cowie
2024-04-05 20:01 ` Martin KaFai Lau
2024-04-11  2:29   ` Brad Cowie
2024-04-12  0:45     ` Martin KaFai Lau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.