All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC nf-next 0/2] netfilter: bpf: support prog update
@ 2023-12-13 11:45 D. Wythe
  2023-12-13 11:45 ` [RFC nf-next 1/2] " D. Wythe
  2023-12-13 11:45 ` [RFC nf-next 2/2] selftests/bpf: Add netfilter link prog update test D. Wythe
  0 siblings, 2 replies; 12+ messages in thread
From: D. Wythe @ 2023-12-13 11:45 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: bpf, linux-kernel, netdev, coreteam, netfilter-devel, davem,
	edumazet, kuba, pabeni, ast, D. Wythe

From: "D. Wythe" <alibuda@linux.alibaba.com>

This patches attempt to support updating of progs with
bpf netfilter link, introducing a new RCU-protected context
to access the prog, and adding a corresponding test case.

D. Wythe (2):
  netfilter: bpf: support prog update
  selftests/bpf: Add netfilter link prog update test

 net/netfilter/nf_bpf_link.c                        | 124 ++++++++++++++++++---
 .../bpf/prog_tests/netfilter_link_update_prog.c    |  83 ++++++++++++++
 .../bpf/progs/test_netfilter_link_update_prog.c    |  24 ++++
 3 files changed, 218 insertions(+), 13 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-13 11:45 [RFC nf-next 0/2] netfilter: bpf: support prog update D. Wythe
@ 2023-12-13 11:45 ` D. Wythe
  2023-12-13 22:24   ` Florian Westphal
  2023-12-13 11:45 ` [RFC nf-next 2/2] selftests/bpf: Add netfilter link prog update test D. Wythe
  1 sibling, 1 reply; 12+ messages in thread
From: D. Wythe @ 2023-12-13 11:45 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: bpf, linux-kernel, netdev, coreteam, netfilter-devel, davem,
	edumazet, kuba, pabeni, ast, D. Wythe

From: "D. Wythe" <alibuda@linux.alibaba.com>

To support the prog update, we need to ensure that the prog seen
within the hook is always valid. Considering that hooks are always
protected by rcu_read_lock(), which provide us the ability to use a
new RCU-protected context to access the prog.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 111 insertions(+), 13 deletions(-)

diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index e502ec0..918c470 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -8,17 +8,11 @@
 #include <net/netfilter/nf_bpf_link.h>
 #include <uapi/linux/netfilter_ipv4.h>
 
-static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
-				    const struct nf_hook_state *s)
+struct bpf_nf_hook_ctx
 {
-	const struct bpf_prog *prog = bpf_prog;
-	struct bpf_nf_ctx ctx = {
-		.state = s,
-		.skb = skb,
-	};
-
-	return bpf_prog_run(prog, &ctx);
-}
+	struct bpf_prog *prog;
+	struct rcu_head rcu;
+};
 
 struct bpf_nf_link {
 	struct bpf_link link;
@@ -26,8 +20,59 @@ struct bpf_nf_link {
 	struct net *net;
 	u32 dead;
 	const struct nf_defrag_hook *defrag_hook;
+	/* protect link update in parallel */
+	struct mutex update_lock;
+	struct bpf_nf_hook_ctx __rcu *hook_ctx;
 };
 
+static struct bpf_nf_hook_ctx *
+bpf_nf_hook_ctx_from_prog(struct bpf_prog *prog, gfp_t flags)
+{
+	struct bpf_nf_hook_ctx *hook_ctx;
+
+	hook_ctx = kmalloc(sizeof(*hook_ctx), flags);
+	if (hook_ctx) {
+		hook_ctx->prog = prog;
+		bpf_prog_inc(prog);
+	}
+	return hook_ctx;
+}
+
+static void bpf_nf_hook_ctx_free(struct bpf_nf_hook_ctx *hook_ctx)
+{
+	if (!hook_ctx)
+		return;
+	if (hook_ctx->prog)
+		bpf_prog_put(hook_ctx->prog);
+	kfree(hook_ctx);
+}
+
+static void __bpf_nf_hook_ctx_free_rcu(struct rcu_head *head)
+{
+	struct bpf_nf_hook_ctx *hook_ctx = container_of(head, struct bpf_nf_hook_ctx, rcu);
+
+	bpf_nf_hook_ctx_free(hook_ctx);
+}
+
+static void bpf_nf_hook_ctx_free_rcu(struct bpf_nf_hook_ctx *hook_ctx)
+{
+	call_rcu(&hook_ctx->rcu, __bpf_nf_hook_ctx_free_rcu);
+}
+
+static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
+				    const struct nf_hook_state *s)
+{
+	const struct bpf_nf_link *link = bpf_link;
+	struct bpf_nf_hook_ctx *hook_ctx;
+	struct bpf_nf_ctx ctx = {
+		.state = s,
+		.skb = skb,
+	};
+
+	hook_ctx = rcu_dereference(link->hook_ctx);
+	return bpf_prog_run(hook_ctx->prog, &ctx);
+}
+
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 static const struct nf_defrag_hook *
 get_proto_defrag_hook(struct bpf_nf_link *link,
@@ -120,6 +165,10 @@ static void bpf_nf_link_release(struct bpf_link *link)
 	if (!cmpxchg(&nf_link->dead, 0, 1)) {
 		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
 		bpf_nf_disable_defrag(nf_link);
+		/* Wait for outstanding hook to complete before the
+		 * link gets released.
+		 */
+		synchronize_rcu();
 	}
 }
 
@@ -127,6 +176,7 @@ static void bpf_nf_link_dealloc(struct bpf_link *link)
 {
 	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
 
+	bpf_nf_hook_ctx_free(nf_link->hook_ctx);
 	kfree(nf_link);
 }
 
@@ -162,7 +212,42 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
 static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
 			      struct bpf_prog *old_prog)
 {
-	return -EOPNOTSUPP;
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+	struct bpf_nf_hook_ctx *hook_ctx;
+	int err = 0;
+
+	mutex_lock(&nf_link->update_lock);
+
+	/* target old_prog mismatch */
+	if (old_prog && link->prog != old_prog) {
+		err = -EPERM;
+		goto out;
+	}
+
+	old_prog = link->prog;
+	if (old_prog == new_prog) {
+		/* don't need update */
+		bpf_prog_put(new_prog);
+		goto out;
+	}
+
+	hook_ctx = bpf_nf_hook_ctx_from_prog(new_prog, GFP_USER);
+	if (!hook_ctx) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* replace and get the old one */
+	hook_ctx = rcu_replace_pointer(nf_link->hook_ctx, hook_ctx,
+				       lockdep_is_held(&nf_link->update_lock));
+	/* free old hook_ctx */
+	bpf_nf_hook_ctx_free_rcu(hook_ctx);
+
+	old_prog = xchg(&link->prog, new_prog);
+	bpf_prog_put(old_prog);
+out:
+	mutex_unlock(&nf_link->update_lock);
+	return err;
 }
 
 static const struct bpf_link_ops bpf_nf_link_lops = {
@@ -222,11 +307,22 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	if (!link)
 		return -ENOMEM;
 
+	link->hook_ctx = bpf_nf_hook_ctx_from_prog(prog, GFP_USER);
+	if (!link->hook_ctx) {
+		kfree(link);
+		return -ENOMEM;
+	}
+
 	bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
 
 	link->hook_ops.hook = nf_hook_run_bpf;
 	link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
-	link->hook_ops.priv = prog;
+
+	/* bpf_nf_link_release() ensures that after its execution, there will be
+	 * no ongoing or upcoming execution of nf_hook_run_bpf() within any context.
+	 * Therefore, within nf_hook_run_bpf(), the link remains valid at all times."
+	 */
+	link->hook_ops.priv = link;
 
 	link->hook_ops.pf = attr->link_create.netfilter.pf;
 	link->hook_ops.priority = attr->link_create.netfilter.priority;
@@ -236,9 +332,11 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	link->dead = false;
 	link->defrag_hook = NULL;
 
+	mutex_init(&link->update_lock);
+
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err) {
-		kfree(link);
+		bpf_nf_link_dealloc(&link->link);
 		return err;
 	}
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC nf-next 2/2] selftests/bpf: Add netfilter link prog update test
  2023-12-13 11:45 [RFC nf-next 0/2] netfilter: bpf: support prog update D. Wythe
  2023-12-13 11:45 ` [RFC nf-next 1/2] " D. Wythe
@ 2023-12-13 11:45 ` D. Wythe
  1 sibling, 0 replies; 12+ messages in thread
From: D. Wythe @ 2023-12-13 11:45 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: bpf, linux-kernel, netdev, coreteam, netfilter-devel, davem,
	edumazet, kuba, pabeni, ast, D. Wythe

From: "D. Wythe" <alibuda@linux.alibaba.com>

Update prog for active link and verify whether
the prog has been successfully replaced.

Expected output:

./test_progs -t netfilter_link_update_prog
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 .../bpf/prog_tests/netfilter_link_update_prog.c    | 83 ++++++++++++++++++++++
 .../bpf/progs/test_netfilter_link_update_prog.c    | 24 +++++++
 2 files changed, 107 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c

diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
new file mode 100644
index 00000000..d23b544
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_update_prog.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <test_progs.h>
+#include <linux/netfilter.h>
+#include <network_helpers.h>
+#include "test_netfilter_link_update_prog.skel.h"
+
+#define SERVER_ADDR "127.0.0.1"
+#define SERVER_PORT 12345
+
+static const char dummy_message[] = "A dummy message";
+
+static int send_dummy(int client_fd)
+{
+	struct sockaddr_storage saddr;
+	struct sockaddr *saddr_p;
+	socklen_t saddr_len;
+	int err;
+
+	saddr_p = (struct sockaddr *)&saddr;
+	err = make_sockaddr(AF_INET, SERVER_ADDR, SERVER_PORT, &saddr, &saddr_len);
+	if (!ASSERT_OK(err, "make_sockaddr"))
+		return -1;
+
+	err = sendto(client_fd, dummy_message, sizeof(dummy_message) - 1, 0, saddr_p, saddr_len);
+	if (!ASSERT_GE(err, 0, "sendto"))
+		return -1;
+
+	return 0;
+}
+
+void test_netfilter_link_update_prog(void)
+{
+	LIBBPF_OPTS(bpf_netfilter_opts, opts,
+		.pf = NFPROTO_IPV4,
+		.hooknum = NF_INET_LOCAL_OUT,
+		.priority = 100);
+	struct test_netfilter_link_update_prog *skel;
+	struct bpf_program *prog;
+	int server_fd, client_fd;
+	int err;
+
+	skel = test_netfilter_link_update_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_netfilter_link_update_prog__open_and_load"))
+		goto out;
+
+	prog = skel->progs.nf_link_prog;
+
+	if (!ASSERT_OK_PTR(prog, "load program"))
+		goto out;
+
+	skel->links.nf_link_prog = bpf_program__attach_netfilter(prog, &opts);
+	if (!ASSERT_OK_PTR(skel->links.nf_link_prog, "attach netfilter program"))
+		goto out;
+
+	server_fd = start_server(AF_INET, SOCK_DGRAM, SERVER_ADDR, SERVER_PORT, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
+		goto out;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto out;
+
+	send_dummy(client_fd);
+
+	ASSERT_EQ(skel->bss->counter, 0, "counter should be zero");
+
+	err = bpf_link__update_program(skel->links.nf_link_prog, skel->progs.nf_link_prog_new);
+	if (!ASSERT_OK(err, "bpf_link__update_program"))
+		goto out;
+
+	send_dummy(client_fd);
+	ASSERT_GE(skel->bss->counter, 0, "counter should be greater than zero");
+out:
+	if (client_fd > 0)
+		close(client_fd);
+	if (server_fd > 0)
+		close(server_fd);
+
+	test_netfilter_link_update_prog__destroy(skel);
+}
+
+
diff --git a/tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c b/tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c
new file mode 100644
index 00000000..42ae332
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_netfilter_link_update_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+int nf_link_prog(struct bpf_nf_ctx *ctx)
+{
+	return NF_ACCEPT;
+}
+
+u64 counter = 0;
+
+SEC("netfilter")
+int nf_link_prog_new(struct bpf_nf_ctx *ctx)
+{
+	counter++;
+	return NF_ACCEPT;
+}
+
+char _license[] SEC("license") = "GPL";
+
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-13 11:45 ` [RFC nf-next 1/2] " D. Wythe
@ 2023-12-13 22:24   ` Florian Westphal
  2023-12-14  3:25     ` Alexei Starovoitov
  2023-12-14  5:31     ` D. Wythe
  0 siblings, 2 replies; 12+ messages in thread
From: Florian Westphal @ 2023-12-13 22:24 UTC (permalink / raw)
  To: D. Wythe
  Cc: pablo, kadlec, fw, bpf, linux-kernel, netdev, coreteam,
	netfilter-devel, davem, edumazet, kuba, pabeni, ast

D. Wythe <alibuda@linux.alibaba.com> wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> To support the prog update, we need to ensure that the prog seen
> within the hook is always valid. Considering that hooks are always
> protected by rcu_read_lock(), which provide us the ability to use a
> new RCU-protected context to access the prog.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>  net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 111 insertions(+), 13 deletions(-)
> 
> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> index e502ec0..918c470 100644
> --- a/net/netfilter/nf_bpf_link.c
> +++ b/net/netfilter/nf_bpf_link.c
> @@ -8,17 +8,11 @@
>  #include <net/netfilter/nf_bpf_link.h>
>  #include <uapi/linux/netfilter_ipv4.h>
>  
> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
> -				    const struct nf_hook_state *s)
> +struct bpf_nf_hook_ctx
>  {
> -	const struct bpf_prog *prog = bpf_prog;
> -	struct bpf_nf_ctx ctx = {
> -		.state = s,
> -		.skb = skb,
> -	};
> -
> -	return bpf_prog_run(prog, &ctx);
> -}
> +	struct bpf_prog *prog;
> +	struct rcu_head rcu;
> +};

I don't understand the need for this structure.  AFAICS bpf_prog_put()
will always release the program via call_rcu()?

If it doesn't, we are probably already in trouble as-is without this
patch, I don't think anything that prevents us from ending up calling already
released bpf prog, or releasing it while another cpu is still running it
if bpf_prog_put releases the actual underlying prog instantly.

A BPF expert could confirm bpf-prog-put-is-call-rcu.

>  struct bpf_nf_link {
>  	struct bpf_link link;
> @@ -26,8 +20,59 @@ struct bpf_nf_link {
>  	struct net *net;
>  	u32 dead;
>  	const struct nf_defrag_hook *defrag_hook;
> +	/* protect link update in parallel */
> +	struct mutex update_lock;
> +	struct bpf_nf_hook_ctx __rcu *hook_ctx;

What kind of replacements-per-second rate are you aiming for?
I think

static DEFINE_MUTEX(bpf_nf_mutex);

is enough.

Then bpf_nf_link gains

	struct bpf_prog __rcu *prog

and possibly a trailing struct rcu_head, see below.

> +static void bpf_nf_hook_ctx_free_rcu(struct bpf_nf_hook_ctx *hook_ctx)
> +{
> +	call_rcu(&hook_ctx->rcu, __bpf_nf_hook_ctx_free_rcu);
> +}

Don't understand the need for call_rcu either, see below.

> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
> +				    const struct nf_hook_state *s)
> +{
> +	const struct bpf_nf_link *link = bpf_link;
> +	struct bpf_nf_hook_ctx *hook_ctx;
> +	struct bpf_nf_ctx ctx = {
> +		.state = s,
> +		.skb = skb,
> +	};
> +
> +	hook_ctx = rcu_dereference(link->hook_ctx);

This could then just rcu_deref link->prog.

> +	return bpf_prog_run(hook_ctx->prog, &ctx);
> +}
> +
>  #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>  static const struct nf_defrag_hook *
>  get_proto_defrag_hook(struct bpf_nf_link *link,
> @@ -120,6 +165,10 @@ static void bpf_nf_link_release(struct bpf_link *link)
>  	if (!cmpxchg(&nf_link->dead, 0, 1)) {
>  		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>  		bpf_nf_disable_defrag(nf_link);
> +		/* Wait for outstanding hook to complete before the
> +		 * link gets released.
> +		 */
> +		synchronize_rcu();
>  	}

Could you convert bpf_nf_link_dealloc to release via kfree_rcu instead?

> @@ -162,7 +212,42 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>  static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>  			      struct bpf_prog *old_prog)
>  {
> -	return -EOPNOTSUPP;
> +	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
> +	struct bpf_nf_hook_ctx *hook_ctx;
> +	int err = 0;
> +
> +	mutex_lock(&nf_link->update_lock);
> +

I think you need to check link->dead here too.

> +	/* bpf_nf_link_release() ensures that after its execution, there will be
> +	 * no ongoing or upcoming execution of nf_hook_run_bpf() within any context.
> +	 * Therefore, within nf_hook_run_bpf(), the link remains valid at all times."
> +	 */
> +	link->hook_ops.priv = link;

ATM we only need to make sure the bpf prog itself stays alive until after
all concurrent rcu critical sections have completed.

After this change, struct bpf_link gets passed instead, so we need to
keep that alive too.

Which works with synchronize_rcu, sure, but that seems a bit overkill here.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-13 22:24   ` Florian Westphal
@ 2023-12-14  3:25     ` Alexei Starovoitov
  2023-12-14  5:31     ` D. Wythe
  1 sibling, 0 replies; 12+ messages in thread
From: Alexei Starovoitov @ 2023-12-14  3:25 UTC (permalink / raw)
  To: Florian Westphal
  Cc: D. Wythe, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Wed, Dec 13, 2023 at 2:24 PM Florian Westphal <fw@strlen.de> wrote:
>
> D. Wythe <alibuda@linux.alibaba.com> wrote:
> > From: "D. Wythe" <alibuda@linux.alibaba.com>
> >
> > To support the prog update, we need to ensure that the prog seen
> > within the hook is always valid. Considering that hooks are always
> > protected by rcu_read_lock(), which provide us the ability to use a
> > new RCU-protected context to access the prog.
> >
> > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> > ---
> >  net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 111 insertions(+), 13 deletions(-)
> >
> > diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> > index e502ec0..918c470 100644
> > --- a/net/netfilter/nf_bpf_link.c
> > +++ b/net/netfilter/nf_bpf_link.c
> > @@ -8,17 +8,11 @@
> >  #include <net/netfilter/nf_bpf_link.h>
> >  #include <uapi/linux/netfilter_ipv4.h>
> >
> > -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
> > -                                 const struct nf_hook_state *s)
> > +struct bpf_nf_hook_ctx
> >  {
> > -     const struct bpf_prog *prog = bpf_prog;
> > -     struct bpf_nf_ctx ctx = {
> > -             .state = s,
> > -             .skb = skb,
> > -     };
> > -
> > -     return bpf_prog_run(prog, &ctx);
> > -}
> > +     struct bpf_prog *prog;
> > +     struct rcu_head rcu;
> > +};
>
> I don't understand the need for this structure.  AFAICS bpf_prog_put()
> will always release the program via call_rcu()?
>
> If it doesn't, we are probably already in trouble as-is without this
> patch, I don't think anything that prevents us from ending up calling already
> released bpf prog, or releasing it while another cpu is still running it
> if bpf_prog_put releases the actual underlying prog instantly.
>
> A BPF expert could confirm bpf-prog-put-is-call-rcu.

+1
These patches look unnecessary.
It seems that they accidently fix something else.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-13 22:24   ` Florian Westphal
  2023-12-14  3:25     ` Alexei Starovoitov
@ 2023-12-14  5:31     ` D. Wythe
  2023-12-14  5:50       ` Alexei Starovoitov
  1 sibling, 1 reply; 12+ messages in thread
From: D. Wythe @ 2023-12-14  5:31 UTC (permalink / raw)
  To: Florian Westphal
  Cc: pablo, kadlec, bpf, linux-kernel, netdev, coreteam,
	netfilter-devel, davem, edumazet, kuba, pabeni, ast



On 12/14/23 6:24 AM, Florian Westphal wrote:
> D. Wythe <alibuda@linux.alibaba.com> wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> To support the prog update, we need to ensure that the prog seen
>> within the hook is always valid. Considering that hooks are always
>> protected by rcu_read_lock(), which provide us the ability to use a
>> new RCU-protected context to access the prog.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 111 insertions(+), 13 deletions(-)
>>
>> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
>> index e502ec0..918c470 100644
>> --- a/net/netfilter/nf_bpf_link.c
>> +++ b/net/netfilter/nf_bpf_link.c
>> @@ -8,17 +8,11 @@
>>   #include <net/netfilter/nf_bpf_link.h>
>>   #include <uapi/linux/netfilter_ipv4.h>
>>   
>> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
>> -				    const struct nf_hook_state *s)
>> +struct bpf_nf_hook_ctx
>>   {
>> -	const struct bpf_prog *prog = bpf_prog;
>> -	struct bpf_nf_ctx ctx = {
>> -		.state = s,
>> -		.skb = skb,
>> -	};
>> -
>> -	return bpf_prog_run(prog, &ctx);
>> -}
>> +	struct bpf_prog *prog;
>> +	struct rcu_head rcu;
>> +};
> I don't understand the need for this structure.  AFAICS bpf_prog_put()
> will always release the program via call_rcu()?
>
> If it doesn't, we are probably already in trouble as-is without this
> patch, I don't think anything that prevents us from ending up calling already
> released bpf prog, or releasing it while another cpu is still running it
> if bpf_prog_put releases the actual underlying prog instantly.
>
> A BPF expert could confirm bpf-prog-put-is-call-rcu.

Hi Florian,

I must admit that I did not realize that bpf_prog is released
under RCU ...

>>   struct bpf_nf_link {
>>   	struct bpf_link link;
>> @@ -26,8 +20,59 @@ struct bpf_nf_link {
>>   	struct net *net;
>>   	u32 dead;
>>   	const struct nf_defrag_hook *defrag_hook;
>> +	/* protect link update in parallel */
>> +	struct mutex update_lock;
>> +	struct bpf_nf_hook_ctx __rcu *hook_ctx;
> What kind of replacements-per-second rate are you aiming for?
> I think
>
> static DEFINE_MUTEX(bpf_nf_mutex);
>
> is enough.

I'm okay with that.

>
> Then bpf_nf_link gains
>
> 	struct bpf_prog __rcu *prog
>
> and possibly a trailing struct rcu_head, see below.

Yes, that's what we need.

>> +static void bpf_nf_hook_ctx_free_rcu(struct bpf_nf_hook_ctx *hook_ctx)
>> +{
>> +	call_rcu(&hook_ctx->rcu, __bpf_nf_hook_ctx_free_rcu);
>> +}
> Don't understand the need for call_rcu either, see below.
>
>> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
>> +				    const struct nf_hook_state *s)
>> +{
>> +	const struct bpf_nf_link *link = bpf_link;
>> +	struct bpf_nf_hook_ctx *hook_ctx;
>> +	struct bpf_nf_ctx ctx = {
>> +		.state = s,
>> +		.skb = skb,
>> +	};
>> +
>> +	hook_ctx = rcu_dereference(link->hook_ctx);
> This could then just rcu_deref link->prog.
>
>> +	return bpf_prog_run(hook_ctx->prog, &ctx);
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>>   static const struct nf_defrag_hook *
>>   get_proto_defrag_hook(struct bpf_nf_link *link,
>> @@ -120,6 +165,10 @@ static void bpf_nf_link_release(struct bpf_link *link)
>>   	if (!cmpxchg(&nf_link->dead, 0, 1)) {
>>   		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>>   		bpf_nf_disable_defrag(nf_link);
>> +		/* Wait for outstanding hook to complete before the
>> +		 * link gets released.
>> +		 */
>> +		synchronize_rcu();
>>   	}
> Could you convert bpf_nf_link_dealloc to release via kfree_rcu instead?
>
Got it.
>> @@ -162,7 +212,42 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>>   static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>>   			      struct bpf_prog *old_prog)
>>   {
>> -	return -EOPNOTSUPP;
>> +	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
>> +	struct bpf_nf_hook_ctx *hook_ctx;
>> +	int err = 0;
>> +
>> +	mutex_lock(&nf_link->update_lock);
>> +
> I think you need to check link->dead here too.

Got that.
>
>> +	/* bpf_nf_link_release() ensures that after its execution, there will be
>> +	 * no ongoing or upcoming execution of nf_hook_run_bpf() within any context.
>> +	 * Therefore, within nf_hook_run_bpf(), the link remains valid at all times."
>> +	 */
>> +	link->hook_ops.priv = link;
> ATM we only need to make sure the bpf prog itself stays alive until after
> all concurrent rcu critical sections have completed.
>
> After this change, struct bpf_link gets passed instead, so we need to
> keep that alive too.
>
> Which works with synchronize_rcu, sure, but that seems a bit overkill here.

Got it! Thank you very much for your suggestion.
I will address those issues you mentioned in the next version.


Best wishes,
D. Wythe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-14  5:31     ` D. Wythe
@ 2023-12-14  5:50       ` Alexei Starovoitov
  2023-12-14  8:56         ` D. Wythe
  0 siblings, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2023-12-14  5:50 UTC (permalink / raw)
  To: D. Wythe
  Cc: Florian Westphal, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Wed, Dec 13, 2023 at 9:31 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
> I will address those issues you mentioned in the next version.

Don't. There is no need for the next version.
None of these changes are necessary.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-14  5:50       ` Alexei Starovoitov
@ 2023-12-14  8:56         ` D. Wythe
  2023-12-14 13:37           ` Alexei Starovoitov
  0 siblings, 1 reply; 12+ messages in thread
From: D. Wythe @ 2023-12-14  8:56 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov



On 12/14/23 1:50 PM, Alexei Starovoitov wrote:
> On Wed, Dec 13, 2023 at 9:31 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
>> I will address those issues you mentioned in the next version.
> Don't. There is no need for the next version.
> None of these changes are necessary.

Can I know the reason ?  Updating prog for active link is kind of 
important feature
for real application..

Best wishes,
D. Wythe

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-14  8:56         ` D. Wythe
@ 2023-12-14 13:37           ` Alexei Starovoitov
  2023-12-14 15:56             ` D. Wythe
  0 siblings, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2023-12-14 13:37 UTC (permalink / raw)
  To: D. Wythe
  Cc: Florian Westphal, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Thu, Dec 14, 2023 at 12:57 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
>
>
> On 12/14/23 1:50 PM, Alexei Starovoitov wrote:
> > On Wed, Dec 13, 2023 at 9:31 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
> >> I will address those issues you mentioned in the next version.
> > Don't. There is no need for the next version.
> > None of these changes are necessary.
>
> Can I know the reason ?  Updating prog for active link is kind of
> important feature
> for real application..

yes. it's and it's working as expected. Do you see an issue?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-14 13:37           ` Alexei Starovoitov
@ 2023-12-14 15:56             ` D. Wythe
  2023-12-14 16:02               ` Alexei Starovoitov
  0 siblings, 1 reply; 12+ messages in thread
From: D. Wythe @ 2023-12-14 15:56 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov



On 12/14/23 9:37 PM, Alexei Starovoitov wrote:
> yes. it's and it's working as expected. Do you see an issue?

Hi Alexei,

I see the issue here is that bpf_nf_link has not yet implemented 
prog_update,
which just simply returned -EOPNOTSUPP right now.

Do you mean that it is already implemented in the latest tree or
the not-supported was expected?

Thanks,
D. Wythe

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-14 15:56             ` D. Wythe
@ 2023-12-14 16:02               ` Alexei Starovoitov
  2023-12-14 16:10                 ` D. Wythe
  0 siblings, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2023-12-14 16:02 UTC (permalink / raw)
  To: D. Wythe
  Cc: Florian Westphal, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov

On Thu, Dec 14, 2023 at 7:56 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
>
>
> On 12/14/23 9:37 PM, Alexei Starovoitov wrote:
> > yes. it's and it's working as expected. Do you see an issue?
>
> Hi Alexei,
>
> I see the issue here is that bpf_nf_link has not yet implemented
> prog_update,
> which just simply returned -EOPNOTSUPP right now.

I see. The commit log didn't make it clear.
Yes. That would be good to support.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC nf-next 1/2] netfilter: bpf: support prog update
  2023-12-14 16:02               ` Alexei Starovoitov
@ 2023-12-14 16:10                 ` D. Wythe
  0 siblings, 0 replies; 12+ messages in thread
From: D. Wythe @ 2023-12-14 16:10 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Pablo Neira Ayuso, Jozsef Kadlecsik, bpf, LKML,
	Network Development, coreteam, netfilter-devel, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov



On 12/15/23 12:02 AM, Alexei Starovoitov wrote:
> I see. The commit log didn't make it clear.
> Yes. That would be good to support.

That's my bad. I will make the commit log more clear in the next version.
In any case, thanks very much for your feedback.

Besh wishes,
D. Wythe

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-12-14 16:10 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-13 11:45 [RFC nf-next 0/2] netfilter: bpf: support prog update D. Wythe
2023-12-13 11:45 ` [RFC nf-next 1/2] " D. Wythe
2023-12-13 22:24   ` Florian Westphal
2023-12-14  3:25     ` Alexei Starovoitov
2023-12-14  5:31     ` D. Wythe
2023-12-14  5:50       ` Alexei Starovoitov
2023-12-14  8:56         ` D. Wythe
2023-12-14 13:37           ` Alexei Starovoitov
2023-12-14 15:56             ` D. Wythe
2023-12-14 16:02               ` Alexei Starovoitov
2023-12-14 16:10                 ` D. Wythe
2023-12-13 11:45 ` [RFC nf-next 2/2] selftests/bpf: Add netfilter link prog update test D. Wythe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.