bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Brad Cowie <brad@faucet.nz>
Cc: lorenzo@kernel.org, memxor@gmail.com, pablo@netfilter.org,
	davem@davemloft.net, kuba@kernel.org, pabeni@redhat.com,
	ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	song@kernel.org, john.fastabend@gmail.com, sdf@google.com,
	jolsa@kernel.org, netfilter-devel@vger.kernel.org,
	coreteam@netfilter.org, bpf@vger.kernel.org,
	netdev@vger.kernel.org
Subject: Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
Date: Thu, 25 Apr 2024 16:27:22 -0700	[thread overview]
Message-ID: <463c8ea7-08cf-412e-bb31-6fbb15b4df8b@linux.dev> (raw)
In-Reply-To: <20240424030027.3932184-1-brad@faucet.nz>

On 4/23/24 8:00 PM, Brad Cowie wrote:
> Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
> can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
> 
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
> v1 -> v2:
>    - Make ct zone flags/dir configurable
> ---
>   net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
>   1 file changed, 61 insertions(+), 36 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..67f73b57089b 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -21,41 +21,44 @@
>   /* bpf_ct_opts - Options for CT lookup helpers
>    *
>    * Members:
> - * @netns_id   - Specify the network namespace for lookup
> - *		 Values:
> - *		   BPF_F_CURRENT_NETNS (-1)
> - *		     Use namespace associated with ctx (xdp_md, __sk_buff)
> - *		   [0, S32_MAX]
> - *		     Network Namespace ID
> - * @error      - Out parameter, set for any errors encountered
> - *		 Values:
> - *		   -EINVAL - Passed NULL for bpf_tuple pointer
> - *		   -EINVAL - opts->reserved is not 0
> - *		   -EINVAL - netns_id is less than -1
> - *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
> - *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> - *		   -ENONET - No network namespace found for netns_id
> - *		   -ENOENT - Conntrack lookup could not find entry for tuple
> - *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> - *				   or sizeof(tuple->ipv6)
> - * @l4proto    - Layer 4 protocol
> - *		 Values:
> - *		   IPPROTO_TCP, IPPROTO_UDP
> - * @dir:       - connection tracking tuple direction.
> - * @reserved   - Reserved member, will be reused for more options in future
> - *		 Values:
> - *		   0
> + * @netns_id	  - Specify the network namespace for lookup
> + *		    Values:
> + *		      BPF_F_CURRENT_NETNS (-1)
> + *		        Use namespace associated with ctx (xdp_md, __sk_buff)
> + *		      [0, S32_MAX]
> + *		        Network Namespace ID
> + * @error	  - Out parameter, set for any errors encountered
> + *		    Values:
> + *		      -EINVAL - Passed NULL for bpf_tuple pointer
> + *		      -EINVAL - netns_id is less than -1
> + *		      -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
> + *			        or NF_BPF_CT_OPTS_OLD_SZ (12)
> + *		      -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> + *		      -ENONET - No network namespace found for netns_id
> + *		      -ENOENT - Conntrack lookup could not find entry for tuple
> + *		      -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> + *				      or sizeof(tuple->ipv6)
> + * @l4proto	  - Layer 4 protocol
> + *		    Values:
> + *		      IPPROTO_TCP, IPPROTO_UDP
> + * @dir:	  - connection tracking tuple direction.

Please avoid whitespace changes. It is unnecessary code churns.

> + * @ct_zone_id	  - connection tracking zone id.
> + * @ct_zone_flags - connection tracking zone flags.
> + * @ct_zone_dir   - connection tracking zone direction.
>    */
>   struct bpf_ct_opts {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
>   	u8 dir;
> -	u8 reserved[2];
> +	u16 ct_zone_id;
> +	u8 ct_zone_flags;
> +	u8 ct_zone_dir;

The size is 16 now with 2 tail paddings.
It needs a "u8 reserved[2];" at the end and need to check its 0.


>   };
>   
>   enum {
> -	NF_BPF_CT_OPTS_SZ = 12,
> +	NF_BPF_CT_OPTS_SZ = 16,
> +	NF_BPF_CT_OPTS_OLD_SZ = 12,

Avoid adding NF_BPF_CT_OPTS_OLD_SZ for now. I don't see how it may be useful.

>   };
>   
>   static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>   			u32 timeout)
>   {
>   	struct nf_conntrack_tuple otuple, rtuple;
> +	struct nf_conntrack_zone ct_zone;
>   	struct nf_conn *ct;
>   	int err;
>   
> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> -	    opts_len != NF_BPF_CT_OPTS_SZ)
> +	if (!opts || !bpf_tuple)
> +		return ERR_PTR(-EINVAL);
> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>   		return ERR_PTR(-EINVAL);
>   
>   	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>   			return ERR_PTR(-ENONET);
>   	}
>   
> -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> +		if (opts->ct_zone_dir == 0)

I don't know the details about the dir in ct_zone, so a question: a 0 
ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?

> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> +		nf_ct_zone_init(&ct_zone,
> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> +	} else {

Better enforce "ct_zone_id == 0" also instead of ignoring it.

> +		ct_zone = nf_ct_zone_dflt;
> +	}
> +
> +	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
>   				GFP_ATOMIC);
>   	if (IS_ERR(ct))
>   		goto out;
> @@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>   {
>   	struct nf_conntrack_tuple_hash *hash;
>   	struct nf_conntrack_tuple tuple;
> +	struct nf_conntrack_zone ct_zone;
>   	struct nf_conn *ct;
>   	int err;
>   
> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> -	    opts_len != NF_BPF_CT_OPTS_SZ)
> +	if (!opts || !bpf_tuple)
> +		return ERR_PTR(-EINVAL);
> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>   		return ERR_PTR(-EINVAL);
>   	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
>   		return ERR_PTR(-EPROTO);
> @@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>   			return ERR_PTR(-ENONET);
>   	}
>   
> -	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> +		if (opts->ct_zone_dir == 0)
> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> +		nf_ct_zone_init(&ct_zone,
> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> +	} else {
> +		ct_zone = nf_ct_zone_dflt;
> +	}
> +
> +	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
>   	if (opts->netns_id >= 0)
>   		put_net(net);
>   	if (!hash)
> @@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
>    * @opts	- Additional options for allocation (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn___init *
>   bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for lookup (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)

Either 16 or 12. Same for below.

>    */
>   __bpf_kfunc struct nf_conn *
>   bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for allocation (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn___init *
>   bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for lookup (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn *
>   bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,


  parent reply	other threads:[~2024-04-25 23:27 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-24  3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
2024-04-24  3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
2024-04-25 23:34   ` Martin KaFai Lau
2024-05-01  5:03     ` Brad Cowie
2024-04-25 23:27 ` Martin KaFai Lau [this message]
2024-05-01  4:59   ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
2024-05-01 22:11     ` Martin KaFai Lau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=463c8ea7-08cf-412e-bb31-6fbb15b4df8b@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brad@faucet.nz \
    --cc=coreteam@netfilter.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kuba@kernel.org \
    --cc=lorenzo@kernel.org \
    --cc=memxor@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pablo@netfilter.org \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).