* [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
@ 2024-04-24 3:00 Brad Cowie
2024-04-24 3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
0 siblings, 2 replies; 7+ messages in thread
From: Brad Cowie @ 2024-04-24 3:00 UTC (permalink / raw)
To: bpf, martin.lau
Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
netdev, Brad Cowie
Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.
Signed-off-by: Brad Cowie <brad@faucet.nz>
---
v1 -> v2:
- Make ct zone flags/dir configurable
---
net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
1 file changed, 61 insertions(+), 36 deletions(-)
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index d2492d050fe6..67f73b57089b 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -21,41 +21,44 @@
/* bpf_ct_opts - Options for CT lookup helpers
*
* Members:
- * @netns_id - Specify the network namespace for lookup
- * Values:
- * BPF_F_CURRENT_NETNS (-1)
- * Use namespace associated with ctx (xdp_md, __sk_buff)
- * [0, S32_MAX]
- * Network Namespace ID
- * @error - Out parameter, set for any errors encountered
- * Values:
- * -EINVAL - Passed NULL for bpf_tuple pointer
- * -EINVAL - opts->reserved is not 0
- * -EINVAL - netns_id is less than -1
- * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
- * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
- * -ENONET - No network namespace found for netns_id
- * -ENOENT - Conntrack lookup could not find entry for tuple
- * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
- * or sizeof(tuple->ipv6)
- * @l4proto - Layer 4 protocol
- * Values:
- * IPPROTO_TCP, IPPROTO_UDP
- * @dir: - connection tracking tuple direction.
- * @reserved - Reserved member, will be reused for more options in future
- * Values:
- * 0
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx (xdp_md, __sk_buff)
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - Passed NULL for bpf_tuple pointer
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
+ * or NF_BPF_CT_OPTS_OLD_SZ (12)
+ * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - Conntrack lookup could not find entry for tuple
+ * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ * or sizeof(tuple->ipv6)
+ * @l4proto - Layer 4 protocol
+ * Values:
+ * IPPROTO_TCP, IPPROTO_UDP
+ * @dir: - connection tracking tuple direction.
+ * @ct_zone_id - connection tracking zone id.
+ * @ct_zone_flags - connection tracking zone flags.
+ * @ct_zone_dir - connection tracking zone direction.
*/
struct bpf_ct_opts {
s32 netns_id;
s32 error;
u8 l4proto;
u8 dir;
- u8 reserved[2];
+ u16 ct_zone_id;
+ u8 ct_zone_flags;
+ u8 ct_zone_dir;
};
enum {
- NF_BPF_CT_OPTS_SZ = 12,
+ NF_BPF_CT_OPTS_SZ = 16,
+ NF_BPF_CT_OPTS_OLD_SZ = 12,
};
static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
@@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
u32 timeout)
{
struct nf_conntrack_tuple otuple, rtuple;
+ struct nf_conntrack_zone ct_zone;
struct nf_conn *ct;
int err;
- if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts_len != NF_BPF_CT_OPTS_SZ)
+ if (!opts || !bpf_tuple)
+ return ERR_PTR(-EINVAL);
+ if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
return ERR_PTR(-EINVAL);
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
@@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
return ERR_PTR(-ENONET);
}
- ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+ if (opts_len == NF_BPF_CT_OPTS_SZ) {
+ if (opts->ct_zone_dir == 0)
+ opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+ nf_ct_zone_init(&ct_zone,
+ opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
+ } else {
+ ct_zone = nf_ct_zone_dflt;
+ }
+
+ ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
GFP_ATOMIC);
if (IS_ERR(ct))
goto out;
@@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
{
struct nf_conntrack_tuple_hash *hash;
struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_zone ct_zone;
struct nf_conn *ct;
int err;
- if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts_len != NF_BPF_CT_OPTS_SZ)
+ if (!opts || !bpf_tuple)
+ return ERR_PTR(-EINVAL);
+ if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
return ERR_PTR(-EINVAL);
if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
return ERR_PTR(-EPROTO);
@@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
return ERR_PTR(-ENONET);
}
- hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+ if (opts_len == NF_BPF_CT_OPTS_SZ) {
+ if (opts->ct_zone_dir == 0)
+ opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+ nf_ct_zone_init(&ct_zone,
+ opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
+ } else {
+ ct_zone = nf_ct_zone_dflt;
+ }
+
+ hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
if (opts->netns_id >= 0)
put_net(net);
if (!hash)
@@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
* @opts - Additional options for allocation (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16)
*/
__bpf_kfunc struct nf_conn___init *
bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts - Additional options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16)
*/
__bpf_kfunc struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts - Additional options for allocation (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16)
*/
__bpf_kfunc struct nf_conn___init *
bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts - Additional options for lookup (documented above)
* Cannot be NULL
* @opts__sz - Length of the bpf_ct_opts structure
- * Must be NF_BPF_CT_OPTS_SZ (12)
+ * Must be NF_BPF_CT_OPTS_SZ (16)
*/
__bpf_kfunc struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs
2024-04-24 3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
@ 2024-04-24 3:00 ` Brad Cowie
2024-04-25 23:34 ` Martin KaFai Lau
2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
1 sibling, 1 reply; 7+ messages in thread
From: Brad Cowie @ 2024-04-24 3:00 UTC (permalink / raw)
To: bpf, martin.lau
Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
netdev, Brad Cowie
Remove test for reserved options in bpf_ct_opts,
which have been removed.
Add test for allocating and looking up ct entry in a
non-default ct zone with kfuncs bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.
Add negative test for looking up ct entry in a different
ct zone to where it was allocated.
Signed-off-by: Brad Cowie <brad@faucet.nz>
---
v1 -> v2:
- Separate test changes into different patch
- Add test for allocating/looking up entry in non-default ct zone
---
tools/testing/selftests/bpf/config | 1 +
.../testing/selftests/bpf/prog_tests/bpf_nf.c | 6 +-
.../testing/selftests/bpf/progs/test_bpf_nf.c | 88 ++++++++++++++++---
3 files changed, 82 insertions(+), 13 deletions(-)
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index afd675b1bf80..8d4110fe8d26 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -75,6 +75,7 @@ CONFIG_NETFILTER_XT_TARGET_CT=y
CONFIG_NETKIT=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
CONFIG_NF_NAT=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index b30ff6b3b81a..853f694af6d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
goto end;
ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
- ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
@@ -122,6 +121,11 @@ static void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
+ ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone");
+ ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone");
+ ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone");
+ ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone");
+
end:
if (client_fd != -1)
close(client_fd);
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 77ad8adf68da..b47fa0708f1e 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -12,7 +12,6 @@
extern unsigned long CONFIG_HZ __kconfig;
int test_einval_bpf_tuple = 0;
-int test_einval_reserved = 0;
int test_einval_netns_id = 0;
int test_einval_len_opts = 0;
int test_eproto_l4proto = 0;
@@ -22,6 +21,10 @@ int test_eafnosupport = 0;
int test_alloc_entry = -EINVAL;
int test_insert_entry = -EAFNOSUPPORT;
int test_succ_lookup = -ENOENT;
+int test_ct_zone_id_alloc_entry = -EINVAL;
+int test_ct_zone_id_insert_entry = -EAFNOSUPPORT;
+int test_ct_zone_id_succ_lookup = -ENOENT;
+int test_ct_zone_id_enoent_lookup = 0;
u32 test_delta_timeout = 0;
u32 test_status = 0;
u32 test_insert_lookup_mark = 0;
@@ -45,7 +48,10 @@ struct bpf_ct_opts___local {
s32 netns_id;
s32 error;
u8 l4proto;
- u8 reserved[3];
+ u8 dir;
+ u16 ct_zone_id;
+ u8 ct_zone_flags;
+ u8 ct_zone_dir;
} __attribute__((preserve_access_index));
struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
@@ -84,16 +90,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
else
test_einval_bpf_tuple = opts_def.error;
- opts_def.reserved[0] = 1;
- ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
- sizeof(opts_def));
- opts_def.reserved[0] = 0;
- opts_def.l4proto = IPPROTO_TCP;
- if (ct)
- bpf_ct_release(ct);
- else
- test_einval_reserved = opts_def.error;
-
opts_def.netns_id = -2;
ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
sizeof(opts_def));
@@ -220,10 +216,77 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
}
}
+static __always_inline void
+nf_ct_zone_id_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+ bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+ bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+ bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+ /* use non-default ct zone */
+ opts_def.ct_zone_id = 10;
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ __u16 sport = bpf_get_prandom_u32();
+ __u16 dport = bpf_get_prandom_u32();
+ union nf_inet_addr saddr = {};
+ union nf_inet_addr daddr = {};
+ struct nf_conn *ct_ins;
+
+ bpf_ct_set_timeout(ct, 10000);
+
+ /* snat */
+ saddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
+ /* dnat */
+ daddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
+
+ ct_ins = bpf_ct_insert_entry(ct);
+ if (ct_ins) {
+ struct nf_conn *ct_lk;
+
+ /* entry should exist in same ct zone we inserted it */
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk) {
+ bpf_ct_release(ct_lk);
+ test_ct_zone_id_succ_lookup = 0;
+ }
+
+ /* entry should not exist in default ct zone */
+ opts_def.ct_zone_id = 0;
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk)
+ bpf_ct_release(ct_lk);
+ else
+ test_ct_zone_id_enoent_lookup = opts_def.error;
+
+ bpf_ct_release(ct_ins);
+ test_ct_zone_id_insert_entry = 0;
+ }
+ test_ct_zone_id_alloc_entry = 0;
+ }
+}
+
SEC("xdp")
int nf_xdp_ct_test(struct xdp_md *ctx)
{
nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
+ nf_ct_zone_id_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
return 0;
}
@@ -231,6 +294,7 @@ SEC("tc")
int nf_skb_ct_test(struct __sk_buff *ctx)
{
nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
+ nf_ct_zone_id_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
return 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
2024-04-24 3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
2024-04-24 3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
@ 2024-04-25 23:27 ` Martin KaFai Lau
2024-05-01 4:59 ` Brad Cowie
1 sibling, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2024-04-25 23:27 UTC (permalink / raw)
To: Brad Cowie
Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam, bpf,
netdev
On 4/23/24 8:00 PM, Brad Cowie wrote:
> Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
> can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
>
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
> v1 -> v2:
> - Make ct zone flags/dir configurable
> ---
> net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
> 1 file changed, 61 insertions(+), 36 deletions(-)
>
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..67f73b57089b 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -21,41 +21,44 @@
> /* bpf_ct_opts - Options for CT lookup helpers
> *
> * Members:
> - * @netns_id - Specify the network namespace for lookup
> - * Values:
> - * BPF_F_CURRENT_NETNS (-1)
> - * Use namespace associated with ctx (xdp_md, __sk_buff)
> - * [0, S32_MAX]
> - * Network Namespace ID
> - * @error - Out parameter, set for any errors encountered
> - * Values:
> - * -EINVAL - Passed NULL for bpf_tuple pointer
> - * -EINVAL - opts->reserved is not 0
> - * -EINVAL - netns_id is less than -1
> - * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
> - * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> - * -ENONET - No network namespace found for netns_id
> - * -ENOENT - Conntrack lookup could not find entry for tuple
> - * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> - * or sizeof(tuple->ipv6)
> - * @l4proto - Layer 4 protocol
> - * Values:
> - * IPPROTO_TCP, IPPROTO_UDP
> - * @dir: - connection tracking tuple direction.
> - * @reserved - Reserved member, will be reused for more options in future
> - * Values:
> - * 0
> + * @netns_id - Specify the network namespace for lookup
> + * Values:
> + * BPF_F_CURRENT_NETNS (-1)
> + * Use namespace associated with ctx (xdp_md, __sk_buff)
> + * [0, S32_MAX]
> + * Network Namespace ID
> + * @error - Out parameter, set for any errors encountered
> + * Values:
> + * -EINVAL - Passed NULL for bpf_tuple pointer
> + * -EINVAL - netns_id is less than -1
> + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
> + * or NF_BPF_CT_OPTS_OLD_SZ (12)
> + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> + * -ENONET - No network namespace found for netns_id
> + * -ENOENT - Conntrack lookup could not find entry for tuple
> + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> + * or sizeof(tuple->ipv6)
> + * @l4proto - Layer 4 protocol
> + * Values:
> + * IPPROTO_TCP, IPPROTO_UDP
> + * @dir: - connection tracking tuple direction.
Please avoid whitespace changes. It is unnecessary code churns.
> + * @ct_zone_id - connection tracking zone id.
> + * @ct_zone_flags - connection tracking zone flags.
> + * @ct_zone_dir - connection tracking zone direction.
> */
> struct bpf_ct_opts {
> s32 netns_id;
> s32 error;
> u8 l4proto;
> u8 dir;
> - u8 reserved[2];
> + u16 ct_zone_id;
> + u8 ct_zone_flags;
> + u8 ct_zone_dir;
The size is 16 now with 2 tail paddings.
It needs a "u8 reserved[2];" at the end and need to check its 0.
> };
>
> enum {
> - NF_BPF_CT_OPTS_SZ = 12,
> + NF_BPF_CT_OPTS_SZ = 16,
> + NF_BPF_CT_OPTS_OLD_SZ = 12,
Avoid adding NF_BPF_CT_OPTS_OLD_SZ for now. I don't see how it may be useful.
> };
>
> static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> u32 timeout)
> {
> struct nf_conntrack_tuple otuple, rtuple;
> + struct nf_conntrack_zone ct_zone;
> struct nf_conn *ct;
> int err;
>
> - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> - opts_len != NF_BPF_CT_OPTS_SZ)
> + if (!opts || !bpf_tuple)
> + return ERR_PTR(-EINVAL);
> + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> return ERR_PTR(-EINVAL);
>
> if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> return ERR_PTR(-ENONET);
> }
>
> - ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> + if (opts_len == NF_BPF_CT_OPTS_SZ) {
> + if (opts->ct_zone_dir == 0)
I don't know the details about the dir in ct_zone, so a question: a 0
ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?
> + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> + nf_ct_zone_init(&ct_zone,
> + opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> + } else {
Better enforce "ct_zone_id == 0" also instead of ignoring it.
> + ct_zone = nf_ct_zone_dflt;
> + }
> +
> + ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
> GFP_ATOMIC);
> if (IS_ERR(ct))
> goto out;
> @@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
> {
> struct nf_conntrack_tuple_hash *hash;
> struct nf_conntrack_tuple tuple;
> + struct nf_conntrack_zone ct_zone;
> struct nf_conn *ct;
> int err;
>
> - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> - opts_len != NF_BPF_CT_OPTS_SZ)
> + if (!opts || !bpf_tuple)
> + return ERR_PTR(-EINVAL);
> + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> return ERR_PTR(-EINVAL);
> if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
> return ERR_PTR(-EPROTO);
> @@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
> return ERR_PTR(-ENONET);
> }
>
> - hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
> + if (opts_len == NF_BPF_CT_OPTS_SZ) {
> + if (opts->ct_zone_dir == 0)
> + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> + nf_ct_zone_init(&ct_zone,
> + opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> + } else {
> + ct_zone = nf_ct_zone_dflt;
> + }
> +
> + hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
> if (opts->netns_id >= 0)
> put_net(net);
> if (!hash)
> @@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
> * @opts - Additional options for allocation (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
> */
> __bpf_kfunc struct nf_conn___init *
> bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> * @opts - Additional options for lookup (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
Either 16 or 12. Same for below.
> */
> __bpf_kfunc struct nf_conn *
> bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> * @opts - Additional options for allocation (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
> */
> __bpf_kfunc struct nf_conn___init *
> bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> * @opts - Additional options for lookup (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
> */
> __bpf_kfunc struct nf_conn *
> bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs
2024-04-24 3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
@ 2024-04-25 23:34 ` Martin KaFai Lau
2024-05-01 5:03 ` Brad Cowie
0 siblings, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2024-04-25 23:34 UTC (permalink / raw)
To: Brad Cowie
Cc: lorenzo, memxor, pablo, davem, kuba, pabeni, ast, daniel, andrii,
song, john.fastabend, sdf, jolsa, netfilter-devel, coreteam,
netdev, bpf
On 4/23/24 8:00 PM, Brad Cowie wrote:
> Remove test for reserved options in bpf_ct_opts,
> which have been removed.
>
> Add test for allocating and looking up ct entry in a
> non-default ct zone with kfuncs bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
>
> Add negative test for looking up ct entry in a different
> ct zone to where it was allocated.
>
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
> v1 -> v2:
> - Separate test changes into different patch
> - Add test for allocating/looking up entry in non-default ct zone
> ---
> tools/testing/selftests/bpf/config | 1 +
> .../testing/selftests/bpf/prog_tests/bpf_nf.c | 6 +-
> .../testing/selftests/bpf/progs/test_bpf_nf.c | 88 ++++++++++++++++---
> 3 files changed, 82 insertions(+), 13 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
> index afd675b1bf80..8d4110fe8d26 100644
> --- a/tools/testing/selftests/bpf/config
> +++ b/tools/testing/selftests/bpf/config
> @@ -75,6 +75,7 @@ CONFIG_NETFILTER_XT_TARGET_CT=y
> CONFIG_NETKIT=y
> CONFIG_NF_CONNTRACK=y
> CONFIG_NF_CONNTRACK_MARK=y
> +CONFIG_NF_CONNTRACK_ZONES=y
> CONFIG_NF_DEFRAG_IPV4=y
> CONFIG_NF_DEFRAG_IPV6=y
> CONFIG_NF_NAT=y
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> index b30ff6b3b81a..853f694af6d4 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
> @@ -103,7 +103,6 @@ static void test_bpf_nf_ct(int mode)
> goto end;
>
> ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
> - ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
> ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
> ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
> ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
> @@ -122,6 +121,11 @@ static void test_bpf_nf_ct(int mode)
> ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
> ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
> ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
> + ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone");
> + ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone");
> + ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone");
> + ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone");
> +
> end:
> if (client_fd != -1)
> close(client_fd);
> diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> index 77ad8adf68da..b47fa0708f1e 100644
> --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
> @@ -12,7 +12,6 @@
> extern unsigned long CONFIG_HZ __kconfig;
>
> int test_einval_bpf_tuple = 0;
> -int test_einval_reserved = 0;
> int test_einval_netns_id = 0;
> int test_einval_len_opts = 0;
> int test_eproto_l4proto = 0;
> @@ -22,6 +21,10 @@ int test_eafnosupport = 0;
> int test_alloc_entry = -EINVAL;
> int test_insert_entry = -EAFNOSUPPORT;
> int test_succ_lookup = -ENOENT;
> +int test_ct_zone_id_alloc_entry = -EINVAL;
> +int test_ct_zone_id_insert_entry = -EAFNOSUPPORT;
> +int test_ct_zone_id_succ_lookup = -ENOENT;
> +int test_ct_zone_id_enoent_lookup = 0;
> u32 test_delta_timeout = 0;
> u32 test_status = 0;
> u32 test_insert_lookup_mark = 0;
> @@ -45,7 +48,10 @@ struct bpf_ct_opts___local {
> s32 netns_id;
> s32 error;
> u8 l4proto;
> - u8 reserved[3];
> + u8 dir;
> + u16 ct_zone_id;
> + u8 ct_zone_flags;
> + u8 ct_zone_dir;
Create a new struct instead of modifying the existing one such that the existing
test can test the size 12 still works.
The new one can use the ___new suffix like "struct bpf_ct_opts___new".
> } __attribute__((preserve_access_index));
>
> struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
> @@ -84,16 +90,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> else
> test_einval_bpf_tuple = opts_def.error;
>
> - opts_def.reserved[0] = 1;
> - ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> - sizeof(opts_def));
> - opts_def.reserved[0] = 0;
> - opts_def.l4proto = IPPROTO_TCP;
> - if (ct)
> - bpf_ct_release(ct);
> - else
> - test_einval_reserved = opts_def.error;
> -
> opts_def.netns_id = -2;
> ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> sizeof(opts_def));
> @@ -220,10 +216,77 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> }
> }
>
> +static __always_inline void
> +nf_ct_zone_id_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> + struct bpf_ct_opts___local *, u32),
> + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
> + struct bpf_ct_opts___local *, u32),
> + void *ctx)
> +{
> + struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
> + struct bpf_sock_tuple bpf_tuple;
> + struct nf_conn *ct;
> +
> + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
> +
> + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
> + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
> + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
> + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
> +
> + /* use non-default ct zone */
> + opts_def.ct_zone_id = 10;
Can the ct_zone_flags and ct_zone_dir be tested also?
pw-bot: cr
> + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> + sizeof(opts_def));
> + if (ct) {
> + __u16 sport = bpf_get_prandom_u32();
> + __u16 dport = bpf_get_prandom_u32();
> + union nf_inet_addr saddr = {};
> + union nf_inet_addr daddr = {};
> + struct nf_conn *ct_ins;
> +
> + bpf_ct_set_timeout(ct, 10000);
> +
> + /* snat */
> + saddr.ip = bpf_get_prandom_u32();
> + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
> + /* dnat */
> + daddr.ip = bpf_get_prandom_u32();
> + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
> +
> + ct_ins = bpf_ct_insert_entry(ct);
> + if (ct_ins) {
> + struct nf_conn *ct_lk;
> +
> + /* entry should exist in same ct zone we inserted it */
> + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
> + &opts_def, sizeof(opts_def));
> + if (ct_lk) {
> + bpf_ct_release(ct_lk);
> + test_ct_zone_id_succ_lookup = 0;
> + }
> +
> + /* entry should not exist in default ct zone */
> + opts_def.ct_zone_id = 0;
> + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
> + &opts_def, sizeof(opts_def));
> + if (ct_lk)
> + bpf_ct_release(ct_lk);
> + else
> + test_ct_zone_id_enoent_lookup = opts_def.error;
> +
> + bpf_ct_release(ct_ins);
> + test_ct_zone_id_insert_entry = 0;
> + }
> + test_ct_zone_id_alloc_entry = 0;
> + }
> +}
> +
> SEC("xdp")
> int nf_xdp_ct_test(struct xdp_md *ctx)
> {
> nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
> + nf_ct_zone_id_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
> return 0;
> }
>
> @@ -231,6 +294,7 @@ SEC("tc")
> int nf_skb_ct_test(struct __sk_buff *ctx)
> {
> nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
> + nf_ct_zone_id_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
> return 0;
> }
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
@ 2024-05-01 4:59 ` Brad Cowie
2024-05-01 22:11 ` Martin KaFai Lau
0 siblings, 1 reply; 7+ messages in thread
From: Brad Cowie @ 2024-05-01 4:59 UTC (permalink / raw)
To: martin.lau
Cc: andrii, ast, bpf, brad, coreteam, daniel, davem, john.fastabend,
jolsa, kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni,
pablo, sdf, song
On Fri, 26 Apr 2024 at 11:27, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> On 4/23/24 8:00 PM, Brad Cowie wrote:
> > };
> >
> > static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> > @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> > u32 timeout)
> > {
> > struct nf_conntrack_tuple otuple, rtuple;
> > + struct nf_conntrack_zone ct_zone;
> > struct nf_conn *ct;
> > int err;
> >
> > - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> > - opts_len != NF_BPF_CT_OPTS_SZ)
> > + if (!opts || !bpf_tuple)
> > + return ERR_PTR(-EINVAL);
> > + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> > return ERR_PTR(-EINVAL);
> >
> > if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> > @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> > return ERR_PTR(-ENONET);
> > }
> >
> > - ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> > + if (opts_len == NF_BPF_CT_OPTS_SZ) {
> > + if (opts->ct_zone_dir == 0)
>
> I don't know the details about the dir in ct_zone, so a question: a 0
> ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?
ct_zone_dir is a bitmask that can have two different bits set,
NF_CT_ZONE_DIR_ORIG (1) and NF_CT_ZONE_DIR_REPL (2).
The comparison function nf_ct_zone_matches_dir() in nf_conntrack_zones.h
checks if ct_zone_dir & (1 << ip_conntrack_dir dir). ip_conntrack_dir
has two possible values IP_CT_DIR_ORIGINAL (0) and IP_CT_DIR_REPLY (1).
If ct_zone_dir has a value of 0, this makes nf_ct_zone_matches_dir()
always return false which makes nf_ct_zone_id() always return
NF_CT_DEFAULT_ZONE_ID instead of the specified ct zone id.
I chose to override ct_zone_dir here and set NF_CT_DEFAULT_ZONE_DIR (3),
to make the behaviour more obvious when a user calls the bpf ct helper
kfuncs while only setting ct_zone_id but not ct_zone_dir.
> > + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> > + nf_ct_zone_init(&ct_zone,
> > + opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> > + } else {
>
> Better enforce "ct_zone_id == 0" also instead of ignoring it.
Could I ask for clarification here, do you mean changing this
else statement to:
+ } else if (opts->ct_zone_id == 0) {
Or should I be setting opts->ct_zone_id = 0 inside the else block?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs
2024-04-25 23:34 ` Martin KaFai Lau
@ 2024-05-01 5:03 ` Brad Cowie
0 siblings, 0 replies; 7+ messages in thread
From: Brad Cowie @ 2024-05-01 5:03 UTC (permalink / raw)
To: martin.lau
Cc: andrii, ast, bpf, brad, coreteam, daniel, davem, john.fastabend,
jolsa, kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni,
pablo, sdf, song
On Fri, 26 Apr 2024 at 11:34, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> On 4/23/24 8:00 PM, Brad Cowie wrote:
> > } __attribute__((preserve_access_index));
> >
> > struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
> > @@ -84,16 +90,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> > else
> > test_einval_bpf_tuple = opts_def.error;
> >
> > - opts_def.reserved[0] = 1;
> > - ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> > - sizeof(opts_def));
> > - opts_def.reserved[0] = 0;
> > - opts_def.l4proto = IPPROTO_TCP;
> > - if (ct)
> > - bpf_ct_release(ct);
> > - else
> > - test_einval_reserved = opts_def.error;
> > -
> > opts_def.netns_id = -2;
> > ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
> > sizeof(opts_def));
> > @@ -220,10 +216,77 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> > }
> > }
> >
> > +static __always_inline void
> > +nf_ct_zone_id_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
> > + struct bpf_ct_opts___local *, u32),
> > + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
> > + struct bpf_ct_opts___local *, u32),
> > + void *ctx)
> > +{
> > + struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
> > + struct bpf_sock_tuple bpf_tuple;
> > + struct nf_conn *ct;
> > +
> > + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
> > +
> > + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
> > + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
> > + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
> > + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
> > +
> > + /* use non-default ct zone */
> > + opts_def.ct_zone_id = 10;
>
> Can the ct_zone_flags and ct_zone_dir be tested also?
I have added an additional test for ct_zone_dir, this will be included
in my v3 patchset.
While writing a test for ct_zone_flags, I realised this option is not
used for the conntrack functions that the bpf ct helper functions call,
nf_conntrack_alloc() and nf_conntrack_find_get(), it is only used by
nf_conntrack_in(), so I will remove ct_zone_flags from my v3 patchset.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers
2024-05-01 4:59 ` Brad Cowie
@ 2024-05-01 22:11 ` Martin KaFai Lau
0 siblings, 0 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2024-05-01 22:11 UTC (permalink / raw)
To: Brad Cowie
Cc: andrii, ast, bpf, coreteam, daniel, davem, john.fastabend, jolsa,
kuba, lorenzo, memxor, netdev, netfilter-devel, pabeni, pablo,
sdf, song
On 4/30/24 9:59 PM, Brad Cowie wrote:
> On Fri, 26 Apr 2024 at 11:27, Martin KaFai Lau <martin.lau@linux.dev> wrote:
>> On 4/23/24 8:00 PM, Brad Cowie wrote:
>>> };
>>>
>>> static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
>>> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>>> u32 timeout)
>>> {
>>> struct nf_conntrack_tuple otuple, rtuple;
>>> + struct nf_conntrack_zone ct_zone;
>>> struct nf_conn *ct;
>>> int err;
>>>
>>> - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
>>> - opts_len != NF_BPF_CT_OPTS_SZ)
>>> + if (!opts || !bpf_tuple)
>>> + return ERR_PTR(-EINVAL);
>>> + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>>> return ERR_PTR(-EINVAL);
>>>
>>> if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
>>> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>>> return ERR_PTR(-ENONET);
>>> }
>>>
>>> - ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
>>> + if (opts_len == NF_BPF_CT_OPTS_SZ) {
>>> + if (opts->ct_zone_dir == 0)
>>
>> I don't know the details about the dir in ct_zone, so a question: a 0
>> ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?
>
> ct_zone_dir is a bitmask that can have two different bits set,
> NF_CT_ZONE_DIR_ORIG (1) and NF_CT_ZONE_DIR_REPL (2).
>
> The comparison function nf_ct_zone_matches_dir() in nf_conntrack_zones.h
> checks if ct_zone_dir & (1 << ip_conntrack_dir dir). ip_conntrack_dir
> has two possible values IP_CT_DIR_ORIGINAL (0) and IP_CT_DIR_REPLY (1).
>
> If ct_zone_dir has a value of 0, this makes nf_ct_zone_matches_dir()
> always return false which makes nf_ct_zone_id() always return
> NF_CT_DEFAULT_ZONE_ID instead of the specified ct zone id.
>
> I chose to override ct_zone_dir here and set NF_CT_DEFAULT_ZONE_DIR (3),
> to make the behaviour more obvious when a user calls the bpf ct helper
> kfuncs while only setting ct_zone_id but not ct_zone_dir.
Ok. make sense.
>
>>> + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
>>> + nf_ct_zone_init(&ct_zone,
>>> + opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
>>> + } else {
>>
>> Better enforce "ct_zone_id == 0" also instead of ignoring it.
>
> Could I ask for clarification here, do you mean changing this
> else statement to:
>
> + } else if (opts->ct_zone_id == 0) {
>
> Or should I be setting opts->ct_zone_id = 0 inside the else block?
Testing non zero inside the else and return error instead of silently ignoring
it and then use nf_ct_zone_dflt:
} else {
if (opts->ct_zone_id)
/* don't ignore the opts->ct_zone_id */
return ERR_PTR(-EINVAL);
ct_zone = nf_ct_zone_dflt;
}
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2024-05-01 22:11 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-24 3:00 [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Brad Cowie
2024-04-24 3:00 ` [PATCH bpf-next v2 2/2] selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Brad Cowie
2024-04-25 23:34 ` Martin KaFai Lau
2024-05-01 5:03 ` Brad Cowie
2024-04-25 23:27 ` [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers Martin KaFai Lau
2024-05-01 4:59 ` Brad Cowie
2024-05-01 22:11 ` Martin KaFai Lau
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).