All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC nf-next v3 0/2] netfilter: bpf: support prog update
@ 2023-12-20 14:09 D. Wythe
  2023-12-20 14:09 ` [RFC nf-next v3 1/2] " D. Wythe
  2023-12-20 14:09 ` [RFC nf-next v3 2/2] selftests/bpf: Add netfilter link prog update test D. Wythe
  0 siblings, 2 replies; 9+ messages in thread
From: D. Wythe @ 2023-12-20 14:09 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: bpf, linux-kernel, netdev, coreteam, netfilter-devel, davem,
	edumazet, kuba, pabeni, ast

From: "D. Wythe" <alibuda@linux.alibaba.com>

This patches attempt to implements updating of progs within
bpf netfilter link, allowing user update their ebpf netfilter
prog in hot update manner.

Besides, a corresponding test case has been added to verify
whether the update works.

--
v1:
1. remove unnecessary context, access the prog directly via rcu.
2. remove synchronize_rcu(), dealloc the nf_link via kfree_rcu.
3. check the dead flag during the update.
--
v1->v2:
1. remove unnecessary nf_prog, accessing nf_link->link.prog in direct.
--
v2->v3:
1. access nf_link->link.prog via rcu_dereference_raw to avoid warning.


D. Wythe (2):
  netfilter: bpf: support prog update
  selftests/bpf: Add netfilter link prog update test

 net/netfilter/nf_bpf_link.c                        | 63 ++++++++++++----
 .../bpf/prog_tests/netfilter_link_update_prog.c    | 83 ++++++++++++++++++++++
 .../bpf/progs/test_netfilter_link_update_prog.c    | 24 +++++++
 3 files changed, 155 insertions(+), 15 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-20 14:09 [RFC nf-next v3 0/2] netfilter: bpf: support prog update D. Wythe
@ 2023-12-20 14:09 ` D. Wythe
  2023-12-20 21:11   ` Alexei Starovoitov
  2023-12-20 14:09 ` [RFC nf-next v3 2/2] selftests/bpf: Add netfilter link prog update test D. Wythe
  1 sibling, 1 reply; 9+ messages in thread
From: D. Wythe @ 2023-12-20 14:09 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: bpf, linux-kernel, netdev, coreteam, netfilter-devel, davem,
	edumazet, kuba, pabeni, ast

From: "D. Wythe" <alibuda@linux.alibaba.com>

To support the prog update, we need to ensure that the prog seen
within the hook is always valid. Considering that hooks are always
protected by rcu_read_lock(), which provide us the ability to
access the prog under rcu.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/netfilter/nf_bpf_link.c | 63 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index e502ec0..9bc91d1 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -8,17 +8,8 @@
 #include <net/netfilter/nf_bpf_link.h>
 #include <uapi/linux/netfilter_ipv4.h>
 
-static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
-				    const struct nf_hook_state *s)
-{
-	const struct bpf_prog *prog = bpf_prog;
-	struct bpf_nf_ctx ctx = {
-		.state = s,
-		.skb = skb,
-	};
-
-	return bpf_prog_run(prog, &ctx);
-}
+/* protect link update in parallel */
+static DEFINE_MUTEX(bpf_nf_mutex);
 
 struct bpf_nf_link {
 	struct bpf_link link;
@@ -26,8 +17,20 @@ struct bpf_nf_link {
 	struct net *net;
 	u32 dead;
 	const struct nf_defrag_hook *defrag_hook;
+	struct rcu_head head;
 };
 
+static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
+				    const struct nf_hook_state *s)
+{
+	const struct bpf_nf_link *nf_link = bpf_link;
+	struct bpf_nf_ctx ctx = {
+		.state = s,
+		.skb = skb,
+	};
+	return bpf_prog_run(rcu_dereference_raw(nf_link->link.prog), &ctx);
+}
+
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 static const struct nf_defrag_hook *
 get_proto_defrag_hook(struct bpf_nf_link *link,
@@ -126,8 +129,7 @@ static void bpf_nf_link_release(struct bpf_link *link)
 static void bpf_nf_link_dealloc(struct bpf_link *link)
 {
 	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
-
-	kfree(nf_link);
+	kfree_rcu(nf_link, head);
 }
 
 static int bpf_nf_link_detach(struct bpf_link *link)
@@ -162,7 +164,34 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
 static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
 			      struct bpf_prog *old_prog)
 {
-	return -EOPNOTSUPP;
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+	int err = 0;
+
+	mutex_lock(&bpf_nf_mutex);
+
+	if (nf_link->dead) {
+		err = -EPERM;
+		goto out;
+	}
+
+	/* target old_prog mismatch */
+	if (old_prog && link->prog != old_prog) {
+		err = -EPERM;
+		goto out;
+	}
+
+	old_prog = link->prog;
+	if (old_prog == new_prog) {
+		/* don't need update */
+		bpf_prog_put(new_prog);
+		goto out;
+	}
+
+	old_prog = xchg(&link->prog, new_prog);
+	bpf_prog_put(old_prog);
+out:
+	mutex_unlock(&bpf_nf_mutex);
+	return err;
 }
 
 static const struct bpf_link_ops bpf_nf_link_lops = {
@@ -226,7 +255,11 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 
 	link->hook_ops.hook = nf_hook_run_bpf;
 	link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
-	link->hook_ops.priv = prog;
+
+	/* bpf_nf_link_release & bpf_nf_link_dealloc() can ensures that link remains
+	 * valid at all times within nf_hook_run_bpf().
+	 */
+	link->hook_ops.priv = link;
 
 	link->hook_ops.pf = attr->link_create.netfilter.pf;
 	link->hook_ops.priority = attr->link_create.netfilter.priority;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC nf-next v3 2/2] selftests/bpf: Add netfilter link prog update test
  2023-12-20 14:09 [RFC nf-next v3 0/2] netfilter: bpf: support prog update D. Wythe
  2023-12-20 14:09 ` [RFC nf-next v3 1/2] " D. Wythe
@ 2023-12-20 14:09 ` D. Wythe
  1 sibling, 0 replies; 9+ messages in thread
From: D. Wythe @ 2023-12-20 14:09 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: bpf, linux-kernel, netdev, coreteam, netfilter-devel, davem,
	edumazet, kuba, pabeni, ast

From: "D. Wythe" <alibuda@linux.alibaba.com>

Update prog for active links and verify whether
the prog has been successfully replaced.

Expected output:

./test_progs -t netfilter_link_update_prog
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 .../bpf/prog_tests/netfilter_link_update_prog.c    | 83 ++++++++++++++++++++++
 .../bpf/progs/test_netfilter_link_update_prog.c    | 24 +++++++
 2 files changed, 107 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c

diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
new file mode 100644
index 00000000..d23b544
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <test_progs.h>
+#include <linux/netfilter.h>
+#include <network_helpers.h>
+#include "test_netfilter_link_update_prog.skel.h"
+
+#define SERVER_ADDR "127.0.0.1"
+#define SERVER_PORT 12345
+
+static const char dummy_message[] = "A dummy message";
+
+static int send_dummy(int client_fd)
+{
+	struct sockaddr_storage saddr;
+	struct sockaddr *saddr_p;
+	socklen_t saddr_len;
+	int err;
+
+	saddr_p = (struct sockaddr *)&saddr;
+	err = make_sockaddr(AF_INET, SERVER_ADDR, SERVER_PORT, &saddr, &saddr_len);
+	if (!ASSERT_OK(err, "make_sockaddr"))
+		return -1;
+
+	err = sendto(client_fd, dummy_message, sizeof(dummy_message) - 1, 0, saddr_p, saddr_len);
+	if (!ASSERT_GE(err, 0, "sendto"))
+		return -1;
+
+	return 0;
+}
+
+void test_netfilter_link_update_prog(void)
+{
+	LIBBPF_OPTS(bpf_netfilter_opts, opts,
+		.pf = NFPROTO_IPV4,
+		.hooknum = NF_INET_LOCAL_OUT,
+		.priority = 100);
+	struct test_netfilter_link_update_prog *skel;
+	struct bpf_program *prog;
+	int server_fd, client_fd;
+	int err;
+
+	skel = test_netfilter_link_update_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_netfilter_link_update_prog__open_and_load"))
+		goto out;
+
+	prog = skel->progs.nf_link_prog;
+
+	if (!ASSERT_OK_PTR(prog, "load program"))
+		goto out;
+
+	skel->links.nf_link_prog = bpf_program__attach_netfilter(prog, &opts);
+	if (!ASSERT_OK_PTR(skel->links.nf_link_prog, "attach netfilter program"))
+		goto out;
+
+	server_fd = start_server(AF_INET, SOCK_DGRAM, SERVER_ADDR, SERVER_PORT, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
+		goto out;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto out;
+
+	send_dummy(client_fd);
+
+	ASSERT_EQ(skel->bss->counter, 0, "counter should be zero");
+
+	err = bpf_link__update_program(skel->links.nf_link_prog, skel->progs.nf_link_prog_new);
+	if (!ASSERT_OK(err, "bpf_link__update_program"))
+		goto out;
+
+	send_dummy(client_fd);
+	ASSERT_GE(skel->bss->counter, 0, "counter should be greater than zero");
+out:
+	if (client_fd > 0)
+		close(client_fd);
+	if (server_fd > 0)
+		close(server_fd);
+
+	test_netfilter_link_update_prog__destroy(skel);
+}
+
+
diff --git a/tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c b/tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c
new file mode 100644
index 00000000..42ae332
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+int nf_link_prog(struct bpf_nf_ctx *ctx)
+{
+	return NF_ACCEPT;
+}
+
+u64 counter = 0;
+
+SEC("netfilter")
+int nf_link_prog_new(struct bpf_nf_ctx *ctx)
+{
+	counter++;
+	return NF_ACCEPT;
+}
+
+char _license[] SEC("license") = "GPL";
+
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-20 14:09 ` [RFC nf-next v3 1/2] " D. Wythe
@ 2023-12-20 21:11   ` Alexei Starovoitov
  2023-12-22  7:06     ` D. Wythe
  0 siblings, 1 reply; 9+ messages in thread
From: Alexei Starovoitov @ 2023-12-20 21:11 UTC (permalink / raw)
  To: D. Wythe
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Wed, Dec 20, 2023 at 6:09 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> To support the prog update, we need to ensure that the prog seen
> within the hook is always valid. Considering that hooks are always
> protected by rcu_read_lock(), which provide us the ability to
> access the prog under rcu.
>
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>  net/netfilter/nf_bpf_link.c | 63 ++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 48 insertions(+), 15 deletions(-)
>
> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> index e502ec0..9bc91d1 100644
> --- a/net/netfilter/nf_bpf_link.c
> +++ b/net/netfilter/nf_bpf_link.c
> @@ -8,17 +8,8 @@
>  #include <net/netfilter/nf_bpf_link.h>
>  #include <uapi/linux/netfilter_ipv4.h>
>
> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
> -                                   const struct nf_hook_state *s)
> -{
> -       const struct bpf_prog *prog = bpf_prog;
> -       struct bpf_nf_ctx ctx = {
> -               .state = s,
> -               .skb = skb,
> -       };
> -
> -       return bpf_prog_run(prog, &ctx);
> -}
> +/* protect link update in parallel */
> +static DEFINE_MUTEX(bpf_nf_mutex);
>
>  struct bpf_nf_link {
>         struct bpf_link link;
> @@ -26,8 +17,20 @@ struct bpf_nf_link {
>         struct net *net;
>         u32 dead;
>         const struct nf_defrag_hook *defrag_hook;
> +       struct rcu_head head;

I have to point out the same issues as before, but
will ask them differently...

Why do you think above rcu_head is necessary?

>  };
>
> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
> +                                   const struct nf_hook_state *s)
> +{
> +       const struct bpf_nf_link *nf_link = bpf_link;
> +       struct bpf_nf_ctx ctx = {
> +               .state = s,
> +               .skb = skb,
> +       };
> +       return bpf_prog_run(rcu_dereference_raw(nf_link->link.prog), &ctx);
> +}
> +
>  #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>  static const struct nf_defrag_hook *
>  get_proto_defrag_hook(struct bpf_nf_link *link,
> @@ -126,8 +129,7 @@ static void bpf_nf_link_release(struct bpf_link *link)
>  static void bpf_nf_link_dealloc(struct bpf_link *link)
>  {
>         struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
> -
> -       kfree(nf_link);
> +       kfree_rcu(nf_link, head);

Why is this needed ?
Have you looked at tcx_link_lops ?

>  }
>
>  static int bpf_nf_link_detach(struct bpf_link *link)
> @@ -162,7 +164,34 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>  static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>                               struct bpf_prog *old_prog)
>  {
> -       return -EOPNOTSUPP;
> +       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
> +       int err = 0;
> +
> +       mutex_lock(&bpf_nf_mutex);

Why do you need this mutex?
What race does it solve?

> +
> +       if (nf_link->dead) {
> +               err = -EPERM;
> +               goto out;
> +       }
> +
> +       /* target old_prog mismatch */
> +       if (old_prog && link->prog != old_prog) {
> +               err = -EPERM;
> +               goto out;
> +       }
> +
> +       old_prog = link->prog;
> +       if (old_prog == new_prog) {
> +               /* don't need update */
> +               bpf_prog_put(new_prog);
> +               goto out;
> +       }
> +
> +       old_prog = xchg(&link->prog, new_prog);
> +       bpf_prog_put(old_prog);
> +out:
> +       mutex_unlock(&bpf_nf_mutex);
> +       return err;
>  }
>
>  static const struct bpf_link_ops bpf_nf_link_lops = {
> @@ -226,7 +255,11 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>
>         link->hook_ops.hook = nf_hook_run_bpf;
>         link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
> -       link->hook_ops.priv = prog;
> +
> +       /* bpf_nf_link_release & bpf_nf_link_dealloc() can ensures that link remains
> +        * valid at all times within nf_hook_run_bpf().
> +        */
> +       link->hook_ops.priv = link;
>
>         link->hook_ops.pf = attr->link_create.netfilter.pf;
>         link->hook_ops.priority = attr->link_create.netfilter.priority;
> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-20 21:11   ` Alexei Starovoitov
@ 2023-12-22  7:06     ` D. Wythe
  2023-12-22 22:23       ` Alexei Starovoitov
  0 siblings, 1 reply; 9+ messages in thread
From: D. Wythe @ 2023-12-22  7:06 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov



On 12/21/23 5:11 AM, Alexei Starovoitov wrote:
> On Wed, Dec 20, 2023 at 6:09 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> To support the prog update, we need to ensure that the prog seen
>> within the hook is always valid. Considering that hooks are always
>> protected by rcu_read_lock(), which provide us the ability to
>> access the prog under rcu.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   net/netfilter/nf_bpf_link.c | 63 ++++++++++++++++++++++++++++++++++-----------
>>   1 file changed, 48 insertions(+), 15 deletions(-)
>>
>> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
>> index e502ec0..9bc91d1 100644
>> --- a/net/netfilter/nf_bpf_link.c
>> +++ b/net/netfilter/nf_bpf_link.c
>> @@ -8,17 +8,8 @@
>>   #include <net/netfilter/nf_bpf_link.h>
>>   #include <uapi/linux/netfilter_ipv4.h>
>>
>> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
>> -                                   const struct nf_hook_state *s)
>> -{
>> -       const struct bpf_prog *prog = bpf_prog;
>> -       struct bpf_nf_ctx ctx = {
>> -               .state = s,
>> -               .skb = skb,
>> -       };
>> -
>> -       return bpf_prog_run(prog, &ctx);
>> -}
>> +/* protect link update in parallel */
>> +static DEFINE_MUTEX(bpf_nf_mutex);
>>
>>   struct bpf_nf_link {
>>          struct bpf_link link;
>> @@ -26,8 +17,20 @@ struct bpf_nf_link {
>>          struct net *net;
>>          u32 dead;
>>          const struct nf_defrag_hook *defrag_hook;
>> +       struct rcu_head head;
> I have to point out the same issues as before, but
> will ask them differently...
>
> Why do you think above rcu_head is necessary?
>
>>   };
>>
>> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
>> +                                   const struct nf_hook_state *s)
>> +{
>> +       const struct bpf_nf_link *nf_link = bpf_link;
>> +       struct bpf_nf_ctx ctx = {
>> +               .state = s,
>> +               .skb = skb,
>> +       };
>> +       return bpf_prog_run(rcu_dereference_raw(nf_link->link.prog), &ctx);
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>>   static const struct nf_defrag_hook *
>>   get_proto_defrag_hook(struct bpf_nf_link *link,
>> @@ -126,8 +129,7 @@ static void bpf_nf_link_release(struct bpf_link *link)
>>   static void bpf_nf_link_dealloc(struct bpf_link *link)
>>   {
>>          struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
>> -
>> -       kfree(nf_link);
>> +       kfree_rcu(nf_link, head);
> Why is this needed ?
> Have you looked at tcx_link_lops ?

Introducing rcu_head/kfree_rcu is to address the situation where the 
netfilter hooks might
still access the link after bpf_nf_link_dealloc.

                                                      nf_hook_run_bpf
                                                      const struct 
bpf_nf_link *nf_link = bpf_link;

bpf_nf_link_release
     nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);

bpf_nf_link_dealloc
     free(link)
bpf_prog_run(link->prog);


I had checked the tcx_link_lops ,it's seems it use the synchronize_rcu() 
to solve the
same problem, which is also the way we used in the first version.

https://lore.kernel.org/bpf/1702467945-38866-1-git-send-email-alibuda@linux.alibaba.com/

However, we have received some opposing views, believing that this is a 
bit overkill,
so we decided to use kfree_rcu.

https://lore.kernel.org/bpf/20231213222415.GA13818@breakpoint.cc/

>>   }
>>
>>   static int bpf_nf_link_detach(struct bpf_link *link)
>> @@ -162,7 +164,34 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>>   static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>>                                struct bpf_prog *old_prog)
>>   {
>> -       return -EOPNOTSUPP;
>> +       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
>> +       int err = 0;
>> +
>> +       mutex_lock(&bpf_nf_mutex);
> Why do you need this mutex?
> What race does it solve?

To avoid user update a link with differ prog at the same time. I noticed 
that sys_bpf()
doesn't seem to prevent being invoked by user at the same time. Have I 
missed something?

Best wishes,
D. Wythe
>> +
>> +       if (nf_link->dead) {
>> +               err = -EPERM;
>> +               goto out;
>> +       }
>> +
>> +       /* target old_prog mismatch */
>> +       if (old_prog && link->prog != old_prog) {
>> +               err = -EPERM;
>> +               goto out;
>> +       }
>> +
>> +       old_prog = link->prog;
>> +       if (old_prog == new_prog) {
>> +               /* don't need update */
>> +               bpf_prog_put(new_prog);
>> +               goto out;
>> +       }
>> +
>> +       old_prog = xchg(&link->prog, new_prog);
>> +       bpf_prog_put(old_prog);
>> +out:
>> +       mutex_unlock(&bpf_nf_mutex);
>> +       return err;
>>   }
>>
>>   static const struct bpf_link_ops bpf_nf_link_lops = {
>> @@ -226,7 +255,11 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>
>>          link->hook_ops.hook = nf_hook_run_bpf;
>>          link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
>> -       link->hook_ops.priv = prog;
>> +
>> +       /* bpf_nf_link_release & bpf_nf_link_dealloc() can ensures that link remains
>> +        * valid at all times within nf_hook_run_bpf().
>> +        */
>> +       link->hook_ops.priv = link;
>>
>>          link->hook_ops.pf = attr->link_create.netfilter.pf;
>>          link->hook_ops.priority = attr->link_create.netfilter.priority;
>> --
>> 1.8.3.1
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-22  7:06     ` D. Wythe
@ 2023-12-22 22:23       ` Alexei Starovoitov
  2023-12-27  8:20         ` D. Wythe
  0 siblings, 1 reply; 9+ messages in thread
From: Alexei Starovoitov @ 2023-12-22 22:23 UTC (permalink / raw)
  To: D. Wythe
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Thu, Dec 21, 2023 at 11:06 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
>
>
> On 12/21/23 5:11 AM, Alexei Starovoitov wrote:
> > On Wed, Dec 20, 2023 at 6:09 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
> >> From: "D. Wythe" <alibuda@linux.alibaba.com>
> >>
> >> To support the prog update, we need to ensure that the prog seen
> >> within the hook is always valid. Considering that hooks are always
> >> protected by rcu_read_lock(), which provide us the ability to
> >> access the prog under rcu.
> >>
> >> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> >> ---
> >>   net/netfilter/nf_bpf_link.c | 63 ++++++++++++++++++++++++++++++++++-----------
> >>   1 file changed, 48 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> >> index e502ec0..9bc91d1 100644
> >> --- a/net/netfilter/nf_bpf_link.c
> >> +++ b/net/netfilter/nf_bpf_link.c
> >> @@ -8,17 +8,8 @@
> >>   #include <net/netfilter/nf_bpf_link.h>
> >>   #include <uapi/linux/netfilter_ipv4.h>
> >>
> >> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
> >> -                                   const struct nf_hook_state *s)
> >> -{
> >> -       const struct bpf_prog *prog = bpf_prog;
> >> -       struct bpf_nf_ctx ctx = {
> >> -               .state = s,
> >> -               .skb = skb,
> >> -       };
> >> -
> >> -       return bpf_prog_run(prog, &ctx);
> >> -}
> >> +/* protect link update in parallel */
> >> +static DEFINE_MUTEX(bpf_nf_mutex);
> >>
> >>   struct bpf_nf_link {
> >>          struct bpf_link link;
> >> @@ -26,8 +17,20 @@ struct bpf_nf_link {
> >>          struct net *net;
> >>          u32 dead;
> >>          const struct nf_defrag_hook *defrag_hook;
> >> +       struct rcu_head head;
> > I have to point out the same issues as before, but
> > will ask them differently...
> >
> > Why do you think above rcu_head is necessary?
> >
> >>   };
> >>
> >> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
> >> +                                   const struct nf_hook_state *s)
> >> +{
> >> +       const struct bpf_nf_link *nf_link = bpf_link;
> >> +       struct bpf_nf_ctx ctx = {
> >> +               .state = s,
> >> +               .skb = skb,
> >> +       };
> >> +       return bpf_prog_run(rcu_dereference_raw(nf_link->link.prog), &ctx);
> >> +}
> >> +
> >>   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> >>   static const struct nf_defrag_hook *
> >>   get_proto_defrag_hook(struct bpf_nf_link *link,
> >> @@ -126,8 +129,7 @@ static void bpf_nf_link_release(struct bpf_link *link)
> >>   static void bpf_nf_link_dealloc(struct bpf_link *link)
> >>   {
> >>          struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
> >> -
> >> -       kfree(nf_link);
> >> +       kfree_rcu(nf_link, head);
> > Why is this needed ?
> > Have you looked at tcx_link_lops ?
>
> Introducing rcu_head/kfree_rcu is to address the situation where the
> netfilter hooks might
> still access the link after bpf_nf_link_dealloc.

Why do you think so?

>
>                                                       nf_hook_run_bpf
>                                                       const struct
> bpf_nf_link *nf_link = bpf_link;
>
> bpf_nf_link_release
>      nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>
> bpf_nf_link_dealloc
>      free(link)
> bpf_prog_run(link->prog);
>
>
> I had checked the tcx_link_lops ,it's seems it use the synchronize_rcu()
> to solve the

Where do you see such code in tcx_link_lops ?

> same problem, which is also the way we used in the first version.
>
> https://lore.kernel.org/bpf/1702467945-38866-1-git-send-email-alibuda@linux.alibaba.com/
>
> However, we have received some opposing views, believing that this is a
> bit overkill,
> so we decided to use kfree_rcu.
>
> https://lore.kernel.org/bpf/20231213222415.GA13818@breakpoint.cc/
>
> >>   }
> >>
> >>   static int bpf_nf_link_detach(struct bpf_link *link)
> >> @@ -162,7 +164,34 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
> >>   static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
> >>                                struct bpf_prog *old_prog)
> >>   {
> >> -       return -EOPNOTSUPP;
> >> +       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
> >> +       int err = 0;
> >> +
> >> +       mutex_lock(&bpf_nf_mutex);
> > Why do you need this mutex?
> > What race does it solve?
>
> To avoid user update a link with differ prog at the same time. I noticed
> that sys_bpf()
> doesn't seem to prevent being invoked by user at the same time. Have I
> missed something?

You're correct that sys_bpf() doesn't lock anything.
But what are you serializing in this bpf_nf_link_update() ?
What will happen if multiple bpf_nf_link_update()
without mutex run on different CPUs in parallel ?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-22 22:23       ` Alexei Starovoitov
@ 2023-12-27  8:20         ` D. Wythe
  2023-12-27 19:00           ` Alexei Starovoitov
  0 siblings, 1 reply; 9+ messages in thread
From: D. Wythe @ 2023-12-27  8:20 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov



On 12/23/23 6:23 AM, Alexei Starovoitov wrote:
> On Thu, Dec 21, 2023 at 11:06 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
>>
>>
>> On 12/21/23 5:11 AM, Alexei Starovoitov wrote:
>>> On Wed, Dec 20, 2023 at 6:09 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>
>>>> To support the prog update, we need to ensure that the prog seen
>>>> within the hook is always valid. Considering that hooks are always
>>>> protected by rcu_read_lock(), which provide us the ability to
>>>> access the prog under rcu.
>>>>
>>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>>> ---
>>>>    net/netfilter/nf_bpf_link.c | 63 ++++++++++++++++++++++++++++++++++-----------
>>>>    1 file changed, 48 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
>>>> index e502ec0..9bc91d1 100644
>>>> --- a/net/netfilter/nf_bpf_link.c
>>>> +++ b/net/netfilter/nf_bpf_link.c
>>>> @@ -8,17 +8,8 @@
>>>>    #include <net/netfilter/nf_bpf_link.h>
>>>>    #include <uapi/linux/netfilter_ipv4.h>
>>>>
>>>> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
>>>> -                                   const struct nf_hook_state *s)
>>>> -{
>>>> -       const struct bpf_prog *prog = bpf_prog;
>>>> -       struct bpf_nf_ctx ctx = {
>>>> -               .state = s,
>>>> -               .skb = skb,
>>>> -       };
>>>> -
>>>> -       return bpf_prog_run(prog, &ctx);
>>>> -}
>>>> +/* protect link update in parallel */
>>>> +static DEFINE_MUTEX(bpf_nf_mutex);
>>>>
>>>>    struct bpf_nf_link {
>>>>           struct bpf_link link;
>>>> @@ -26,8 +17,20 @@ struct bpf_nf_link {
>>>>           struct net *net;
>>>>           u32 dead;
>>>>           const struct nf_defrag_hook *defrag_hook;
>>>> +       struct rcu_head head;
>>> I have to point out the same issues as before, but
>>> will ask them differently...
>>>
>>> Why do you think above rcu_head is necessary?
>>>
>>>>    };
>>>>
>>>> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
>>>> +                                   const struct nf_hook_state *s)
>>>> +{
>>>> +       const struct bpf_nf_link *nf_link = bpf_link;
>>>> +       struct bpf_nf_ctx ctx = {
>>>> +               .state = s,
>>>> +               .skb = skb,
>>>> +       };
>>>> +       return bpf_prog_run(rcu_dereference_raw(nf_link->link.prog), &ctx);
>>>> +}
>>>> +
>>>>    #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>>>>    static const struct nf_defrag_hook *
>>>>    get_proto_defrag_hook(struct bpf_nf_link *link,
>>>> @@ -126,8 +129,7 @@ static void bpf_nf_link_release(struct bpf_link *link)
>>>>    static void bpf_nf_link_dealloc(struct bpf_link *link)
>>>>    {
>>>>           struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
>>>> -
>>>> -       kfree(nf_link);
>>>> +       kfree_rcu(nf_link, head);
>>> Why is this needed ?
>>> Have you looked at tcx_link_lops ?
>> Introducing rcu_head/kfree_rcu is to address the situation where the
>> netfilter hooks might
>> still access the link after bpf_nf_link_dealloc.
> Why do you think so?
>

Hi Alexei,


IMMO, nf_unregister_net_hook does not wait for the completion of the 
execution of the hook that is being removed,
instead, it allocates a new array without the very hook to replace the 
old arrayvia rcu_assign_pointer() (in __nf_hook_entries_try_shrink),
then it use call_rcu() to release the old one.

You can find more details in commit 
8c873e2199700c2de7dbd5eedb9d90d5f109462b.

In other words, when nf_unregister_net_hook returns, there may still be 
contexts executing hooks on the
old array, which means that the `link` may still be accessed after 
nf_unregister_net_hook returns.

And that's the reason why we use kfree_rcu() to release the `link`.
>>                                                        nf_hook_run_bpf
>>                                                        const struct
>> bpf_nf_link *nf_link = bpf_link;
>>
>> bpf_nf_link_release
>>       nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>>
>> bpf_nf_link_dealloc
>>       free(link)
>> bpf_prog_run(link->prog);
>>
>>
>> I had checked the tcx_link_lops ,it's seems it use the synchronize_rcu()
>> to solve the
> Where do you see such code in tcx_link_lops ?

I'm not certain if the reason that it choose to use synchronize_rcu()is 
the same as mine,
but I did see it here:


tcx_link_release() -> tcx_entry_sync()


static inline void tcx_entry_sync(void)
{
     /* bpf_mprog_entry got a/b swapped, therefore ensure that
      * there are no inflight users on the old one anymore.
      */
     synchronize_rcu();
}

>> same problem, which is also the way we used in the first version.
>>
>> https://lore.kernel.org/bpf/1702467945-38866-1-git-send-email-alibuda@linux.alibaba.com/
>>
>> However, we have received some opposing views, believing that this is a
>> bit overkill,
>> so we decided to use kfree_rcu.
>>
>> https://lore.kernel.org/bpf/20231213222415.GA13818@breakpoint.cc/
>>
>>>>    }
>>>>
>>>>    static int bpf_nf_link_detach(struct bpf_link *link)
>>>> @@ -162,7 +164,34 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>>>>    static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>>>>                                 struct bpf_prog *old_prog)
>>>>    {
>>>> -       return -EOPNOTSUPP;
>>>> +       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
>>>> +       int err = 0;
>>>> +
>>>> +       mutex_lock(&bpf_nf_mutex);
>>> Why do you need this mutex?
>>> What race does it solve?
>> To avoid user update a link with differ prog at the same time. I noticed
>> that sys_bpf()
>> doesn't seem to prevent being invoked by user at the same time. Have I
>> missed something?
> You're correct that sys_bpf() doesn't lock anything.
> But what are you serializing in this bpf_nf_link_update() ?
> What will happen if multiple bpf_nf_link_update()
> without mutex run on different CPUs in parallel ?

I must admit that it is indeed feasible if we eliminate the mutex and 
use cmpxchg to swap the prog (we need to ensure that there is only one 
bpf_prog_put() on the old prog).
However, when cmpxchg fails, it means that this context has not 
outcompeted the other one, and we have to return a failure. Maybe 
something like this:

if (!cmpxchg(&link->prog, old_prog, new_prog)) {
     /* already replaced by another link_update */
     return -xxx;
}

As a comparison, The version with the mutex wouldn't encounter this 
error, every update would succeed. I think that it's too harsh for the 
user to receive a failure
in that case since they haven't done anything wrong.

Best wishes,
D. Wythe



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-27  8:20         ` D. Wythe
@ 2023-12-27 19:00           ` Alexei Starovoitov
  2023-12-28 11:06             ` D. Wythe
  0 siblings, 1 reply; 9+ messages in thread
From: Alexei Starovoitov @ 2023-12-27 19:00 UTC (permalink / raw)
  To: D. Wythe
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Wed, Dec 27, 2023 at 12:20 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
>
> Hi Alexei,
>
>
> IMMO, nf_unregister_net_hook does not wait for the completion of the
> execution of the hook that is being removed,
> instead, it allocates a new array without the very hook to replace the
> old arrayvia rcu_assign_pointer() (in __nf_hook_entries_try_shrink),
> then it use call_rcu() to release the old one.
>
> You can find more details in commit
> 8c873e2199700c2de7dbd5eedb9d90d5f109462b.
>
> In other words, when nf_unregister_net_hook returns, there may still be
> contexts executing hooks on the
> old array, which means that the `link` may still be accessed after
> nf_unregister_net_hook returns.
>
> And that's the reason why we use kfree_rcu() to release the `link`.
> >>                                                        nf_hook_run_bpf
> >>                                                        const struct
> >> bpf_nf_link *nf_link = bpf_link;
> >>
> >> bpf_nf_link_release
> >>       nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
> >>
> >> bpf_nf_link_dealloc
> >>       free(link)
> >> bpf_prog_run(link->prog);

Got it.
Sounds like it's an existing bug. If so it should be an independent
patch with Fixes tag.

Also please craft a test case to demonstrate UAF.

>
> I must admit that it is indeed feasible if we eliminate the mutex and
> use cmpxchg to swap the prog (we need to ensure that there is only one
> bpf_prog_put() on the old prog).
> However, when cmpxchg fails, it means that this context has not
> outcompeted the other one, and we have to return a failure. Maybe
> something like this:
>
> if (!cmpxchg(&link->prog, old_prog, new_prog)) {
>      /* already replaced by another link_update */
>      return -xxx;
> }
>
> As a comparison, The version with the mutex wouldn't encounter this
> error, every update would succeed. I think that it's too harsh for the
> user to receive a failure
> in that case since they haven't done anything wrong.

Disagree. The mutex doesn't prevent this issue.
There is always a race.
It happens when link_update.old_prog_fd and BPF_F_REPLACE
were specified.
One user space passes an FD of the old prog and
another user space doing the same. They both race and one of them
gets
if (old_prog && link->prog != old_prog) {
               err = -EPERM;

it's no different with dropping the mutex and doing:
if (old_prog) {
    if (!cmpxchg(&link->prog, old_prog, new_prog))
      -EPERM
} else {
   old_prog = xchg(&link->prog, new_prog);
}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC nf-next v3 1/2] netfilter: bpf: support prog update
  2023-12-27 19:00           ` Alexei Starovoitov
@ 2023-12-28 11:06             ` D. Wythe
  0 siblings, 0 replies; 9+ messages in thread
From: D. Wythe @ 2023-12-28 11:06 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov



On 12/28/23 3:00 AM, Alexei Starovoitov wrote:
> On Wed, Dec 27, 2023 at 12:20 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>>
>> Hi Alexei,
>>
>>
>> IMMO, nf_unregister_net_hook does not wait for the completion of the
>> execution of the hook that is being removed,
>> instead, it allocates a new array without the very hook to replace the
>> old arrayvia rcu_assign_pointer() (in __nf_hook_entries_try_shrink),
>> then it use call_rcu() to release the old one.
>>
>> You can find more details in commit
>> 8c873e2199700c2de7dbd5eedb9d90d5f109462b.
>>
>> In other words, when nf_unregister_net_hook returns, there may still be
>> contexts executing hooks on the
>> old array, which means that the `link` may still be accessed after
>> nf_unregister_net_hook returns.
>>
>> And that's the reason why we use kfree_rcu() to release the `link`.
>>>>                                                         nf_hook_run_bpf
>>>>                                                         const struct
>>>> bpf_nf_link *nf_link = bpf_link;
>>>>
>>>> bpf_nf_link_release
>>>>        nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>>>>
>>>> bpf_nf_link_dealloc
>>>>        free(link)
>>>> bpf_prog_run(link->prog);
> Got it.
> Sounds like it's an existing bug. If so it should be an independent
> patch with Fixes tag.
>
> Also please craft a test case to demonstrate UAF.
>

It is not an existing bug... Accessing the link within the hook was 
something I introduced here
to support updates😉, as previously there was no access to the link 
within the hook.
>> I must admit that it is indeed feasible if we eliminate the mutex and
>> use cmpxchg to swap the prog (we need to ensure that there is only one
>> bpf_prog_put() on the old prog).
>> However, when cmpxchg fails, it means that this context has not
>> outcompeted the other one, and we have to return a failure. Maybe
>> something like this:
>>
>> if (!cmpxchg(&link->prog, old_prog, new_prog)) {
>>       /* already replaced by another link_update */
>>       return -xxx;
>> }
>>
>> As a comparison, The version with the mutex wouldn't encounter this
>> error, every update would succeed. I think that it's too harsh for the
>> user to receive a failure
>> in that case since they haven't done anything wrong.
> Disagree. The mutex doesn't prevent this issue.
> There is always a race.
> It happens when link_update.old_prog_fd and BPF_F_REPLACE
> were specified.
> One user space passes an FD of the old prog and
> another user space doing the same. They both race and one of them
> gets
> if (old_prog && link->prog != old_prog) {
>                 err = -EPERM;
>
> it's no different with dropping the mutex and doing:
> if (old_prog) {
>      if (!cmpxchg(&link->prog, old_prog, new_prog))
>        -EPERM
> } else {
>     old_prog = xchg(&link->prog, new_prog);
> }

Got it!  It's very helpful, Thanks very much! I will modify my patch 
accordingly.


Best wishes,
D. Wythe






^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-12-28 11:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-20 14:09 [RFC nf-next v3 0/2] netfilter: bpf: support prog update D. Wythe
2023-12-20 14:09 ` [RFC nf-next v3 1/2] " D. Wythe
2023-12-20 21:11   ` Alexei Starovoitov
2023-12-22  7:06     ` D. Wythe
2023-12-22 22:23       ` Alexei Starovoitov
2023-12-27  8:20         ` D. Wythe
2023-12-27 19:00           ` Alexei Starovoitov
2023-12-28 11:06             ` D. Wythe
2023-12-20 14:09 ` [RFC nf-next v3 2/2] selftests/bpf: Add netfilter link prog update test D. Wythe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.