netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH nf-next] netfilter: nf_tables: add ebpf expression
@ 2022-08-31 10:16 Florian Westphal
  2022-08-31 12:13 ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 10:16 UTC (permalink / raw)
  To: netfilter-devel; +Cc: bpf, netdev, Florian Westphal

This expression is a native replacement for xtables 'bpf' match "pinned" mode.
Userspace needs to pass a file descriptor referencing the program (of socket
filter type).
Userspace should also pass the original pathname for that fd so userspace can
print the original filename again.

Tag and program id are dumped to userspace on 'list' to allow to see which
program is in use in case the filename isn't available/present.

cbpf bytecode isn't supported.

No new Kconfig option is added: Its included if BPF_SYSCALL is enabled.

Proposed nft userspace syntax is:

add rule ... ebpf pinned "/sys/fs/bpf/myprog"

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables_core.h   |   3 +
 include/uapi/linux/netfilter/nf_tables.h |  18 ++++
 net/netfilter/Makefile                   |   4 +
 net/netfilter/nf_tables_core.c           |   7 ++
 net/netfilter/nft_ebpf.c                 | 128 +++++++++++++++++++++++
 5 files changed, 160 insertions(+)
 create mode 100644 net/netfilter/nft_ebpf.c

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 1223af68cd9a..72ee4a6e2952 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -18,6 +18,7 @@ extern struct nft_expr_type nft_meta_type;
 extern struct nft_expr_type nft_rt_type;
 extern struct nft_expr_type nft_exthdr_type;
 extern struct nft_expr_type nft_last_type;
+extern struct nft_expr_type nft_ebpf_type;
 
 #ifdef CONFIG_NETWORK_SECMARK
 extern struct nft_object_type nft_secmark_obj_type;
@@ -148,4 +149,6 @@ void nft_rt_get_eval(const struct nft_expr *expr,
 		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
                       const struct nft_pktinfo *pkt);
+void nft_ebpf_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		   const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 466fd3f4447c..39e9442e8c2a 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -805,6 +805,24 @@ enum nft_payload_attributes {
 };
 #define NFTA_PAYLOAD_MAX	(__NFTA_PAYLOAD_MAX - 1)
 
+/**
+ * enum nft_ebpf_attributes - nf_tables ebpf expression netlink attributes
+ *
+ * @NFTA_EBPF_FD: file descriptor holding ebpf program (NLA_U32)
+ * @NFTA_EBPF_FILENAME: file name, only for storage/printing (NLA_STRING)
+ * @NFTA_EBPF_ID: bpf program id (NLA_U32)
+ * @NFTA_EBPF_TAG: bpf tag (NLA_BINARY)
+ */
+enum nft_ebpf_attributes {
+	NFTA_EBPF_UNSPEC,
+	NFTA_EBPF_FD,
+	NFTA_EBPF_FILENAME,
+	NFTA_EBPF_ID,
+	NFTA_EBPF_TAG,
+	__NFTA_EBPF_MAX,
+};
+#define NFTA_EBPF_MAX	(__NFTA_EBPF_MAX - 1)
+
 enum nft_exthdr_flags {
 	NFT_EXTHDR_F_PRESENT = (1 << 0),
 };
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 06df49ea6329..f335a1ea26b9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -90,6 +90,10 @@ nf_tables-objs += nft_set_pipapo_avx2.o
 endif
 endif
 
+ifdef CONFIG_BPF_SYSCALL
+nf_tables-objs += nft_ebpf.o
+endif
+
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
 obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
 obj-$(CONFIG_NFT_CONNLIMIT)	+= nft_connlimit.o
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index cee3e4e905ec..f33064959f6c 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/filter.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/skbuff.h>
@@ -209,6 +210,9 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
 	X(e, nft_dynset_eval);
 	X(e, nft_rt_get_eval);
 	X(e, nft_bitwise_eval);
+#ifdef CONFIG_BPF_SYSCALL
+	X(e, nft_ebpf_eval);
+#endif
 #undef  X
 #endif /* CONFIG_RETPOLINE */
 	expr->ops->eval(expr, regs, pkt);
@@ -340,6 +344,9 @@ static struct nft_expr_type *nft_basic_types[] = {
 	&nft_exthdr_type,
 	&nft_last_type,
 	&nft_counter_type,
+#ifdef CONFIG_BPF_SYSCALL
+	&nft_ebpf_type,
+#endif
 };
 
 static struct nft_object_type *nft_basic_objects[] = {
diff --git a/net/netfilter/nft_ebpf.c b/net/netfilter/nft_ebpf.c
new file mode 100644
index 000000000000..f4945f4e7bc5
--- /dev/null
+++ b/net/netfilter/nft_ebpf.c
@@ -0,0 +1,128 @@
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_ebpf {
+	struct bpf_prog *prog;
+	const char *name;
+};
+
+static const struct nla_policy nft_ebpf_policy[NFTA_EBPF_MAX + 1] = {
+	[NFTA_EBPF_FD] = { .type = NLA_U32 },
+	[NFTA_EBPF_FILENAME] = { .type = NLA_STRING,
+				 .len = PATH_MAX },
+	[NFTA_EBPF_ID] = { .type = NLA_U32 },
+	[NFTA_EBPF_TAG] = NLA_POLICY_EXACT_LEN(BPF_TAG_SIZE),
+};
+
+void nft_ebpf_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		   const struct nft_pktinfo *pkt)
+{
+	const struct nft_ebpf *priv = nft_expr_priv(expr);
+
+	if (!bpf_prog_run_save_cb(priv->prog, pkt->skb))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_ebpf_init(const struct nft_ctx *ctx,
+			 const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_ebpf *priv = nft_expr_priv(expr);
+	struct bpf_prog *prog;
+	int fd;
+
+	if (!tb[NFTA_EBPF_FD])
+		return -EINVAL;
+
+	if (tb[NFTA_EBPF_ID] || tb[NFTA_EBPF_TAG])
+		return -EOPNOTSUPP;
+
+	fd = ntohl(nla_get_u32(tb[NFTA_EBPF_FD]));
+	if (fd < 0)
+		return -EBADF;
+
+	if (tb[NFTA_EBPF_FILENAME]) {
+		priv->name = nla_strdup(tb[NFTA_EBPF_FILENAME], GFP_KERNEL);
+		if (!priv->name)
+			return -ENOMEM;
+	}
+
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	if (IS_ERR(prog)) {
+		kfree(priv->name);
+		return PTR_ERR(prog);
+	}
+
+	priv->prog = prog;
+	return 0;
+}
+
+static void nft_ebpf_destroy(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr)
+{
+	struct nft_ebpf *priv = nft_expr_priv(expr);
+
+	bpf_prog_destroy(priv->prog);
+	kfree(priv->name);
+}
+
+static int nft_ebpf_validate(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nft_data **data)
+{
+	static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
+						     (1 << NF_INET_LOCAL_IN) |
+						     (1 << NF_INET_FORWARD) |
+						     (1 << NF_INET_LOCAL_OUT) |
+						     (1 << NF_INET_POST_ROUTING));
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, supported_hooks);
+}
+
+static int nft_ebpf_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ebpf *priv = nft_expr_priv(expr);
+	const struct bpf_prog *prog = priv->prog;
+
+	if (nla_put_be32(skb, NFTA_EBPF_ID, htonl(prog->aux->id)))
+		return -1;
+
+	if (nla_put(skb, NFTA_EBPF_TAG, sizeof(prog->tag), prog->tag))
+		return -1;
+
+	if (priv->name && nla_put_string(skb, NFTA_EBPF_FILENAME, priv->name))
+		return -1;
+
+	return 0;
+}
+
+static const struct nft_expr_ops nft_ebpf_ops = {
+	.type		= &nft_ebpf_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ebpf)),
+	.init		= nft_ebpf_init,
+	.eval		= nft_ebpf_eval,
+	.reduce		= NFT_REDUCE_READONLY,
+	.destroy	= nft_ebpf_destroy,
+	.dump		= nft_ebpf_dump,
+	.validate	= nft_ebpf_validate,
+};
+
+struct nft_expr_type nft_ebpf_type __read_mostly = {
+	.name		= "ebpf",
+	.ops		= &nft_ebpf_ops,
+	.policy		= nft_ebpf_policy,
+	.maxattr	= NFTA_EBPF_MAX,
+	.owner		= THIS_MODULE,
+};
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 10:16 [PATCH nf-next] netfilter: nf_tables: add ebpf expression Florian Westphal
@ 2022-08-31 12:13 ` Toke Høiland-Jørgensen
  2022-08-31 12:56   ` Florian Westphal
  0 siblings, 1 reply; 27+ messages in thread
From: Toke Høiland-Jørgensen @ 2022-08-31 12:13 UTC (permalink / raw)
  To: Florian Westphal, netfilter-devel; +Cc: bpf, netdev

Florian Westphal <fw@strlen.de> writes:

> This expression is a native replacement for xtables 'bpf' match "pinned" mode.
> Userspace needs to pass a file descriptor referencing the program (of socket
> filter type).
> Userspace should also pass the original pathname for that fd so userspace can
> print the original filename again.
>
> Tag and program id are dumped to userspace on 'list' to allow to see which
> program is in use in case the filename isn't available/present.

It seems a bit odd to include the file path in the kernel as well. For
one thing, the same object can be pinned multiple times in different
paths (even in different mount namespaces), and there's also nothing
preventing a different program to have been substituted by the pinned
one by the time the value is echoed back.

Also, there's nothing checking that the path attribute actually contains
a path, so it's really just an arbitrary label that the kernel promises
to echo back. But doesn't NFT already have a per-rule comment feature,
so why add another specifically for BPF? Instead we could just teach the
userspace utility to extract metadata from the BPF program (based on the
ID) like bpftool does. This would include the program name, BTW, so it
does have a semantic identifier.

> cbpf bytecode isn't supported.
>
> No new Kconfig option is added: Its included if BPF_SYSCALL is enabled.
>
> Proposed nft userspace syntax is:
>
> add rule ... ebpf pinned "/sys/fs/bpf/myprog"

Any plan to also teach the nft binary to load a BPF program from an ELF
file (instead of relying on pinning)?

-Toke

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 12:13 ` Toke Høiland-Jørgensen
@ 2022-08-31 12:56   ` Florian Westphal
  2022-08-31 13:41     ` Toke Høiland-Jørgensen
  2022-08-31 13:44     ` Florian Westphal
  0 siblings, 2 replies; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 12:56 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Florian Westphal, netfilter-devel, bpf, netdev

Toke Høiland-Jørgensen <toke@kernel.org> wrote:
> > Tag and program id are dumped to userspace on 'list' to allow to see which
> > program is in use in case the filename isn't available/present.
> 
> It seems a bit odd to include the file path in the kernel as well.

Its needed to be able to re-load the ruleset.

> For
> one thing, the same object can be pinned multiple times in different
> paths (even in different mount namespaces),

Sure.

> and there's also nothing
> preventing a different program to have been substituted by the pinned
> one by the time the value is echoed back.

Yes, but what would you expect it should do?

> Also, there's nothing checking that the path attribute actually contains
> a path, so it's really just an arbitrary label that the kernel promises
> to echo back

Yes exactly.

> But doesn't NFT already have a per-rule comment feature,
> so why add another specifically for BPF?

You can attach up to 256 bytes to a rule, yes.
Might not be enough for a longer path, and there could be multiple
expressions in the same rule.

This way was the most simple solution.

> Instead we could just teach the
> userspace utility to extract metadata from the BPF program (based on the
> ID) like bpftool does. This would include the program name, BTW, so it
> does have a semantic identifier.

Sure, I could change the grammar so it expects a tag or ID, e.g.
'ebpf id 42'

If thats preferred, I can change this, it avoids the need for storing
the name.

> > cbpf bytecode isn't supported.
> > add rule ... ebpf pinned "/sys/fs/bpf/myprog"
> 
> Any plan to also teach the nft binary to load a BPF program from an ELF
> file (instead of relying on pinning)?

I used pinning because that is what '-m bpf' uses.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 12:56   ` Florian Westphal
@ 2022-08-31 13:41     ` Toke Høiland-Jørgensen
  2022-08-31 13:57       ` Florian Westphal
  2022-08-31 13:44     ` Florian Westphal
  1 sibling, 1 reply; 27+ messages in thread
From: Toke Høiland-Jørgensen @ 2022-08-31 13:41 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, bpf, netdev

Florian Westphal <fw@strlen.de> writes:

> Toke Høiland-Jørgensen <toke@kernel.org> wrote:
>> > Tag and program id are dumped to userspace on 'list' to allow to see which
>> > program is in use in case the filename isn't available/present.
>> 
>> It seems a bit odd to include the file path in the kernel as well.
>
> Its needed to be able to re-load the ruleset.

How does that work, exactly? Is this so that the userspace binary can
query the current ruleset, and feed it back to the kernel expecting it
to stay the same? Because in that case, if the pinned object goes away
in the meantime (or changes to a different program), this could lead to
some really hard to debug errors, where a reload subtly changes the
behaviour because the BPF program is not in fact the same.

Using IDs would avoid this ambiguity at least, so I think that's a
better solution. We'd have to make sure the BPF program is not released
completely until after the reload has finished, so that it doesn't
suddenly disappear.

>> But doesn't NFT already have a per-rule comment feature,
>> so why add another specifically for BPF?
>
> You can attach up to 256 bytes to a rule, yes.
> Might not be enough for a longer path, and there could be multiple
> expressions in the same rule.
>
> This way was the most simple solution.

My point here was more that if it's just a label for human consumption,
the comment field should be fine, didn't realise it was needed for the
tool operation (and see above re: that).

>> Instead we could just teach the
>> userspace utility to extract metadata from the BPF program (based on the
>> ID) like bpftool does. This would include the program name, BTW, so it
>> does have a semantic identifier.
>
> Sure, I could change the grammar so it expects a tag or ID, e.g.
> 'ebpf id 42'
>
> If thats preferred, I can change this, it avoids the need for storing
> the name.

I think for echoing back, just relying on the ID is better as that is at
least guaranteed to stay constant for the lifetime of the BPF program in
the kernel. We could still support the 'pinned <path>' syntax on the
command line so that the initial load could be done from a pinned file,
just as a user interface improvement...

>> > cbpf bytecode isn't supported.
>> > add rule ... ebpf pinned "/sys/fs/bpf/myprog"
>> 
>> Any plan to also teach the nft binary to load a BPF program from an ELF
>> file (instead of relying on pinning)?
>
> I used pinning because that is what '-m bpf' uses.

I'm not against supporting pinning, per se (except for the issues noted
above), but we could do multiple things, including supporting loading
the program from an object file. This is similar to how TC operates, for
instance...

-Toke

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 12:56   ` Florian Westphal
  2022-08-31 13:41     ` Toke Høiland-Jørgensen
@ 2022-08-31 13:44     ` Florian Westphal
  1 sibling, 0 replies; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 13:44 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Toke Høiland-Jørgensen, netfilter-devel, bpf, netdev

Florian Westphal <fw@strlen.de> wrote:
> Toke Høiland-Jørgensen <toke@kernel.org> wrote:
> > > Tag and program id are dumped to userspace on 'list' to allow to see which
> > > program is in use in case the filename isn't available/present.
> > 
> > It seems a bit odd to include the file path in the kernel as well.
> 
> Its needed to be able to re-load the ruleset.

In particular, I can't find any better alternative.

load by id -> works, easy to echo back to userspace, but not stable
identifier across reboots or add/del operations of the program.

load by tag -> similar, except that this time the tag needs to be
adjusted whenever the program changes, so not ideal either.

load via ELF name -> same problems as the proposed 'pinned' mode, but perhaps a bit
easier to use?

It has the slight advantage that users don't need to load/pin the program first,
lifetime of the program would be tied to the nftables rule.

The downside is that nft needs to deal with possible rejection of the program
instead of 'outsourcing' this problem to bpftool (or another program).

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 13:41     ` Toke Høiland-Jørgensen
@ 2022-08-31 13:57       ` Florian Westphal
  2022-08-31 14:43         ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 13:57 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Florian Westphal, netfilter-devel, bpf, netdev

Toke Høiland-Jørgensen <toke@kernel.org> wrote:
> >> It seems a bit odd to include the file path in the kernel as well.
> >
> > Its needed to be able to re-load the ruleset.
> 
> How does that work, exactly? Is this so that the userspace binary can
> query the current ruleset, and feed it back to the kernel expecting it
> to stay the same?

Yes.

> Because in that case, if the pinned object goes away
> in the meantime (or changes to a different program), this could lead to
> some really hard to debug errors, where a reload subtly changes the
> behaviour because the BPF program is not in fact the same.

Correct, but thats kind of expected when the user changes programs
logic.

Same with a 'nft list ruleset > /etc/nft.txt', reboot,
'nft -f /etc/nft.txt' fails because user forgot to load/pin the program
first.

> Using IDs would avoid this ambiguity at least, so I think that's a
> better solution. We'd have to make sure the BPF program is not released
> completely until after the reload has finished, so that it doesn't
> suddenly disappear.

This should be covered, the destructor runs after the ruleset has been
detached from the data plan (and after a synchronize_rcu).

> > This way was the most simple solution.
> 
> My point here was more that if it's just a label for human consumption,
> the comment field should be fine, didn't realise it was needed for the
> tool operation (and see above re: that).

Yes, this is unfortunate.  I would like to avoid introducing an
asymmetry between input and output (as in "... add rule ebpf pinned
bla', but 'nft list ruleset' showing 'ebpf id 42') or similar, UNLESS we
can somehow use that alternate output to reconstruct that was originally
intended.  And so far I can only see that happening with storing some
label in the kernel for userspace to consume (elf filename, pinned name,
program name ... ).

To give an example:

With 'ebpf id 42', we might be able to let this get echoed back as if
user would have said 'ebpf progname myfilter' (I am making this up!),
just to have a more 'stable' identifier.

This would make it necessary to also support load-by-program-name, of
course.

> > Sure, I could change the grammar so it expects a tag or ID, e.g.
> > 'ebpf id 42'
> >
> > If thats preferred, I can change this, it avoids the need for storing
> > the name.
> 
> I think for echoing back, just relying on the ID is better as that is at
> least guaranteed to stay constant for the lifetime of the BPF program in
> the kernel.

Yes, I realize that, this is why the id and tag are included in the
netlink dump, but on the userspace side this information is currently
hidden and only shown with --debug output.

> >> Any plan to also teach the nft binary to load a BPF program from an ELF
> >> file (instead of relying on pinning)?
> >
> > I used pinning because that is what '-m bpf' uses.
> 
> I'm not against supporting pinning, per se (except for the issues noted
> above),

Okay, thanks for clarifying.  -m bpf is a bit older so I was not sure if
pinning has been deprecated or something like that.

> But we could do multiple things, including supporting loading
> the program from an object file. This is similar to how TC operates, for
> instance...

Right, there is no need to restrict this to one method.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 13:57       ` Florian Westphal
@ 2022-08-31 14:43         ` Toke Høiland-Jørgensen
  2022-08-31 15:09           ` Pablo Neira Ayuso
  2022-08-31 15:26           ` Florian Westphal
  0 siblings, 2 replies; 27+ messages in thread
From: Toke Høiland-Jørgensen @ 2022-08-31 14:43 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, bpf, netdev

Florian Westphal <fw@strlen.de> writes:

> Toke Høiland-Jørgensen <toke@kernel.org> wrote:
>> >> It seems a bit odd to include the file path in the kernel as well.
>> >
>> > Its needed to be able to re-load the ruleset.
>> 
>> How does that work, exactly? Is this so that the userspace binary can
>> query the current ruleset, and feed it back to the kernel expecting it
>> to stay the same?
>
> Yes.
>
>> Because in that case, if the pinned object goes away
>> in the meantime (or changes to a different program), this could lead to
>> some really hard to debug errors, where a reload subtly changes the
>> behaviour because the BPF program is not in fact the same.
>
> Correct, but thats kind of expected when the user changes programs
> logic.
>
> Same with a 'nft list ruleset > /etc/nft.txt', reboot,
> 'nft -f /etc/nft.txt' fails because user forgot to load/pin the program
> first.

Right, so under what conditions is the identifier expected to survive,
exactly? It's okay if it fails after a reboot, but it should keep
working while the system is up?

>> > This way was the most simple solution.
>> 
>> My point here was more that if it's just a label for human consumption,
>> the comment field should be fine, didn't realise it was needed for the
>> tool operation (and see above re: that).
>
> Yes, this is unfortunate.  I would like to avoid introducing an
> asymmetry between input and output (as in "... add rule ebpf pinned
> bla', but 'nft list ruleset' showing 'ebpf id 42') or similar, UNLESS we
> can somehow use that alternate output to reconstruct that was originally
> intended.  And so far I can only see that happening with storing some
> label in the kernel for userspace to consume (elf filename, pinned name,
> program name ... ).
>
> To give an example:
>
> With 'ebpf id 42', we might be able to let this get echoed back as if
> user would have said 'ebpf progname myfilter' (I am making this up!),
> just to have a more 'stable' identifier.
>
> This would make it necessary to also support load-by-program-name, of
> course.

Seems like this kind of mapping can be done in userspace without
involving the kernel?

For example, the 'progname' thing could be implemented by defining an
nft-specific pinning location so that 'ebpf progname myfilter' is
equivalent to 'ebpf pinned /sys/bpf/nft/myfilter' and when nft receives
an ID from the kernel it goes looking in /sys/bpf/nft to see if it can
find the program with that ID and echoes it with the appropriate
progname if it does exist?

This could also be extended, so that if a user does '... add rule ebpf
file /usr/lib/bpf/myrule.o' the nft binary stashes the id -> .o file
mapping somewhere (in /run for instance) so that it can echo back where
it got it from later?

In either case I'm not really sure that there's much to be gained from
asking the kernel to store an additional label with the program rule?

-Toke

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 14:43         ` Toke Høiland-Jørgensen
@ 2022-08-31 15:09           ` Pablo Neira Ayuso
  2022-08-31 15:35             ` Florian Westphal
  2022-08-31 15:26           ` Florian Westphal
  1 sibling, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2022-08-31 15:09 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Florian Westphal, netfilter-devel, bpf, netdev

On Wed, Aug 31, 2022 at 04:43:26PM +0200, Toke Høiland-Jørgensen wrote:
> Florian Westphal <fw@strlen.de> writes:
> 
> > Toke Høiland-Jørgensen <toke@kernel.org> wrote:
> >> >> It seems a bit odd to include the file path in the kernel as well.
> >> >
> >> > Its needed to be able to re-load the ruleset.
> >> 
> >> How does that work, exactly? Is this so that the userspace binary can
> >> query the current ruleset, and feed it back to the kernel expecting it
> >> to stay the same?
> >
> > Yes.
> >
> >> Because in that case, if the pinned object goes away
> >> in the meantime (or changes to a different program), this could lead to
> >> some really hard to debug errors, where a reload subtly changes the
> >> behaviour because the BPF program is not in fact the same.
> >
> > Correct, but thats kind of expected when the user changes programs
> > logic.
> >
> > Same with a 'nft list ruleset > /etc/nft.txt', reboot,
> > 'nft -f /etc/nft.txt' fails because user forgot to load/pin the program
> > first.
> 
> Right, so under what conditions is the identifier expected to survive,
> exactly? It's okay if it fails after a reboot, but it should keep
> working while the system is up?
> 
> >> > This way was the most simple solution.
> >> 
> >> My point here was more that if it's just a label for human consumption,
> >> the comment field should be fine, didn't realise it was needed for the
> >> tool operation (and see above re: that).
> >
> > Yes, this is unfortunate.  I would like to avoid introducing an
> > asymmetry between input and output (as in "... add rule ebpf pinned
> > bla', but 'nft list ruleset' showing 'ebpf id 42') or similar, UNLESS we
> > can somehow use that alternate output to reconstruct that was originally
> > intended.  And so far I can only see that happening with storing some
> > label in the kernel for userspace to consume (elf filename, pinned name,
> > program name ... ).
> >
> > To give an example:
> >
> > With 'ebpf id 42', we might be able to let this get echoed back as if
> > user would have said 'ebpf progname myfilter' (I am making this up!),
> > just to have a more 'stable' identifier.
> >
> > This would make it necessary to also support load-by-program-name, of
> > course.
> 
> Seems like this kind of mapping can be done in userspace without
> involving the kernel?
> 
> For example, the 'progname' thing could be implemented by defining an
> nft-specific pinning location so that 'ebpf progname myfilter' is
> equivalent to 'ebpf pinned /sys/bpf/nft/myfilter' and when nft receives
> an ID from the kernel it goes looking in /sys/bpf/nft to see if it can
> find the program with that ID and echoes it with the appropriate
> progname if it does exist?
> 
> This could also be extended, so that if a user does '... add rule ebpf
> file /usr/lib/bpf/myrule.o' the nft binary stashes the id -> .o file
> mapping somewhere (in /run for instance) so that it can echo back where
> it got it from later?
> 
> In either case I'm not really sure that there's much to be gained from
> asking the kernel to store an additional label with the program rule?

@Florian, could you probably use the object infrastructure to refer to
the program?

This might also allow you to refer to this new object type from
nf_tables maps.

It would be good to avoid linear rule-based matching to select what
program to run.

Maybe this also fits fine for your requirements?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 14:43         ` Toke Høiland-Jørgensen
  2022-08-31 15:09           ` Pablo Neira Ayuso
@ 2022-08-31 15:26           ` Florian Westphal
  2022-08-31 15:39             ` Alexei Starovoitov
  2022-08-31 20:44             ` Toke Høiland-Jørgensen
  1 sibling, 2 replies; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 15:26 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Florian Westphal, netfilter-devel, bpf, netdev

Toke Høiland-Jørgensen <toke@kernel.org> wrote:
> > Same with a 'nft list ruleset > /etc/nft.txt', reboot,
> > 'nft -f /etc/nft.txt' fails because user forgot to load/pin the program
> > first.
> 
> Right, so under what conditions is the identifier expected to survive,
> exactly? It's okay if it fails after a reboot, but it should keep
> working while the system is up?

Right, thats the question.  I think it boils down to 'least surprise',
which to me would mean useable labels are:

1. pinned name
2. elf filename
3. filter name

3) has the advantage that afaiu I can extend nft to use the dumped
id + program tag to query the name from the kernel, whereas 1+2 would
need to store the label.

1 and 2 have the upside that its easy to handle a 'file not found'
error.

> >> My point here was more that if it's just a label for human consumption,
> >> the comment field should be fine, didn't realise it was needed for the
> >> tool operation (and see above re: that).
> >
> > Yes, this is unfortunate.  I would like to avoid introducing an
> > asymmetry between input and output (as in "... add rule ebpf pinned
> > bla', but 'nft list ruleset' showing 'ebpf id 42') or similar, UNLESS we
> > can somehow use that alternate output to reconstruct that was originally
> > intended.  And so far I can only see that happening with storing some
> > label in the kernel for userspace to consume (elf filename, pinned name,
> > program name ... ).
> >
> > To give an example:
> >
> > With 'ebpf id 42', we might be able to let this get echoed back as if
> > user would have said 'ebpf progname myfilter' (I am making this up!),
> > just to have a more 'stable' identifier.
> >
> > This would make it necessary to also support load-by-program-name, of
> > course.
> 
> Seems like this kind of mapping can be done in userspace without
> involving the kernel?
>
> For example, the 'progname' thing could be implemented by defining an
> nft-specific pinning location so that 'ebpf progname myfilter' is
> equivalent to 'ebpf pinned /sys/bpf/nft/myfilter' and when nft receives
> an ID from the kernel it goes looking in /sys/bpf/nft to see if it can
> find the program with that ID and echoes it with the appropriate
> progname if it does exist?

Thats an interesting proposal.

I had not considered an nft specific namespace, just the raw pinned
filename.

> This could also be extended, so that if a user does '... add rule ebpf
> file /usr/lib/bpf/myrule.o' the nft binary stashes the id -> .o file
> mapping somewhere (in /run for instance) so that it can echo back where
> it got it from later?

Oh, I would prefer to abuse the kernel as a "database" directly for
that rather than keeping text files that have to be managed externally.

But, all things considered, what about this:

I'll respin, with the FILENAME attribute removed, so user says
'ebpf pinned bla', and on listing nft walks /sys/bpf/nft/ to see if
it can find the name again.

If it can't find it, print the id instead.

This would mean nft would also have to understand
'ebpf id 12' on input, but I think thats fine. We can document that
this is not the preferred method due to the difficulty of determining
the correct id to use.

With this, no 'extra' userspace-sake info needs to be stored.
We can revisit what do with 'ebpf file /bla/foo.o' once/if that gets
added.

What do you think?
Will take a while because I'll need to extend the nft side first to cope
with lack of 'FILENAME' attribute.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 15:09           ` Pablo Neira Ayuso
@ 2022-08-31 15:35             ` Florian Westphal
  2022-08-31 20:38               ` Pablo Neira Ayuso
  0 siblings, 1 reply; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 15:35 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Toke Høiland-Jørgensen, Florian Westphal,
	netfilter-devel, bpf, netdev

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > asking the kernel to store an additional label with the program rule?
> 
> @Florian, could you probably use the object infrastructure to refer to
> the program?

Yes, I would like to extend objref infra later once this is accepted.

> This might also allow you to refer to this new object type from
> nf_tables maps.

Yes, but first nft needs to be able to construct some meaningful output
again.  If we don't attach a specific label (such as filename), we need
to be able to reconstruct info based on what we can query via id/tag and
bpf syscall.

objref infra doesn't help here unless we'll force something like
'nft-defined-objref-name-must-match-elf-binary-name', and I find that
terrible.

> It would be good to avoid linear rule-based matching to select what
> program to run.

Hmmm, I did not consider it a huge deal, its an ebpf program so
users can dispatch to another program.

Objref is nice if the program to run should be selected from a criterion that isn't
readily available to a sk_filter program though.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 15:26           ` Florian Westphal
@ 2022-08-31 15:39             ` Alexei Starovoitov
  2022-08-31 15:53               ` Florian Westphal
  2022-08-31 20:44             ` Toke Høiland-Jørgensen
  1 sibling, 1 reply; 27+ messages in thread
From: Alexei Starovoitov @ 2022-08-31 15:39 UTC (permalink / raw)
  To: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development
  Cc: Toke Høiland-Jørgensen, netfilter-devel, bpf

On Wed, Aug 31, 2022 at 8:31 AM Florian Westphal <fw@strlen.de> wrote:
>
> Toke Høiland-Jørgensen <toke@kernel.org> wrote:
> > > Same with a 'nft list ruleset > /etc/nft.txt', reboot,
> > > 'nft -f /etc/nft.txt' fails because user forgot to load/pin the program
> > > first.
> >
> > Right, so under what conditions is the identifier expected to survive,
> > exactly? It's okay if it fails after a reboot, but it should keep
> > working while the system is up?
>
> Right, thats the question.  I think it boils down to 'least surprise',
> which to me would mean useable labels are:
>
> 1. pinned name
> 2. elf filename
> 3. filter name
>
> 3) has the advantage that afaiu I can extend nft to use the dumped
> id + program tag to query the name from the kernel, whereas 1+2 would
> need to store the label.
>
> 1 and 2 have the upside that its easy to handle a 'file not found'
> error.

I'm strongly against calling into bpf from the inner guts of nft.
Nack to all options discussed in this thread.
None of them make any sense.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 15:39             ` Alexei Starovoitov
@ 2022-08-31 15:53               ` Florian Westphal
  2022-08-31 17:26                 ` Alexei Starovoitov
  0 siblings, 1 reply; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 15:53 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf

Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > 1 and 2 have the upside that its easy to handle a 'file not found'
> > error.
> 
> I'm strongly against calling into bpf from the inner guts of nft.
> Nack to all options discussed in this thread.
> None of them make any sense.

-v please.  I can just rework userspace to allow going via xt_bpf
but its brain damaged.

This helps gradually moving towards move epbf for those that
still heavily rely on the classic forwarding path.

If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
but it will take significantly longer to get there.

It involves reviving
https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/

as a first stage/merge goal.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 15:53               ` Florian Westphal
@ 2022-08-31 17:26                 ` Alexei Starovoitov
  2022-08-31 21:49                   ` Daniel Borkmann
                                     ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Alexei Starovoitov @ 2022-08-31 17:26 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau,
	David S. Miller, Jakub Kicinski, Network Development,
	Toke Høiland-Jørgensen, netfilter-devel, bpf

On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
>
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > > 1 and 2 have the upside that its easy to handle a 'file not found'
> > > error.
> >
> > I'm strongly against calling into bpf from the inner guts of nft.
> > Nack to all options discussed in this thread.
> > None of them make any sense.
>
> -v please.  I can just rework userspace to allow going via xt_bpf
> but its brain damaged.

Right. xt_bpf was a dead end from the start.
It's time to deprecate it and remove it.

> This helps gradually moving towards move epbf for those that
> still heavily rely on the classic forwarding path.

No one is using it.
If it was, we would have seen at least one bug report over
all these years. We've seen none.

tbh we had a fair share of wrong design decisions that look
very reasonable early on and turned out to be useless with
zero users.
BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
are in this category.
All this code does is bit rot.
As a minimum we shouldn't step on the same rakes.
xt_ebpf would be the same dead code as xt_bpf.

> If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> but it will take significantly longer to get there.
>
> It involves reviving
> https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/

I missed it earlier. What is the end goal ?
Optimize nft run-time with on the fly generation of bpf byte code ?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 15:35             ` Florian Westphal
@ 2022-08-31 20:38               ` Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2022-08-31 20:38 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Toke Høiland-Jørgensen, netfilter-devel, bpf, netdev

On Wed, Aug 31, 2022 at 05:35:08PM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > asking the kernel to store an additional label with the program rule?
> > 
> > @Florian, could you probably use the object infrastructure to refer to
> > the program?
> 
> Yes, I would like to extend objref infra later once this is accepted.
> 
> > This might also allow you to refer to this new object type from
> > nf_tables maps.
> 
> Yes, but first nft needs to be able to construct some meaningful output
> again.  If we don't attach a specific label (such as filename), we need
> to be able to reconstruct info based on what we can query via id/tag and
> bpf syscall.
> 
> objref infra doesn't help here unless we'll force something like
> 'nft-defined-objref-name-must-match-elf-binary-name', and I find that
> terrible.

OK, you don't have to select such an ugly long name ;)

But I get your point: users need to declare explicitly the object.

> > It would be good to avoid linear rule-based matching to select what
> > program to run.
> 
> Hmmm, I did not consider it a huge deal, its an ebpf program so
> users can dispatch to another program.
> 
> Objref is nice if the program to run should be selected from a criterion that isn't
> readily available to a sk_filter program though.

You can also perform updates on the object without the need for
reloading your ruleset. And the declared object also allows for more
attributes to be added on it moving forward.

I think this approach would also allow you to maintain symmetry
between what you add and what it is shown in the listing?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 15:26           ` Florian Westphal
  2022-08-31 15:39             ` Alexei Starovoitov
@ 2022-08-31 20:44             ` Toke Høiland-Jørgensen
  1 sibling, 0 replies; 27+ messages in thread
From: Toke Høiland-Jørgensen @ 2022-08-31 20:44 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, bpf, netdev

> But, all things considered, what about this:
>
> I'll respin, with the FILENAME attribute removed, so user says
> 'ebpf pinned bla', and on listing nft walks /sys/bpf/nft/ to see if
> it can find the name again.
>
> If it can't find it, print the id instead.
>
> This would mean nft would also have to understand
> 'ebpf id 12' on input, but I think thats fine. We can document that
> this is not the preferred method due to the difficulty of determining
> the correct id to use.
>
> With this, no 'extra' userspace-sake info needs to be stored.
> We can revisit what do with 'ebpf file /bla/foo.o' once/if that gets
> added.
>
> What do you think?
> Will take a while because I'll need to extend the nft side first to cope
> with lack of 'FILENAME' attribute.

To the extend it's still relevant, yeah, this seems like a reasonable
plan to me :)

-Toke

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 17:26                 ` Alexei Starovoitov
@ 2022-08-31 21:49                   ` Daniel Borkmann
  2022-09-01  5:18                     ` Eyal Birger
  2022-09-01 10:14                     ` Florian Westphal
  2022-08-31 21:57                   ` Florian Westphal
  2022-09-01  8:08                   ` Jan Engelhardt
  2 siblings, 2 replies; 27+ messages in thread
From: Daniel Borkmann @ 2022-08-31 21:49 UTC (permalink / raw)
  To: Alexei Starovoitov, Florian Westphal
  Cc: Andrii Nakryiko, Martin KaFai Lau, David S. Miller,
	Jakub Kicinski, Network Development,
	Toke Høiland-Jørgensen, netfilter-devel, bpf

On 8/31/22 7:26 PM, Alexei Starovoitov wrote:
> On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
>> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>>>> 1 and 2 have the upside that its easy to handle a 'file not found'
>>>> error.
>>>
>>> I'm strongly against calling into bpf from the inner guts of nft.
>>> Nack to all options discussed in this thread.
>>> None of them make any sense.
>>
>> -v please.  I can just rework userspace to allow going via xt_bpf
>> but its brain damaged.
> 
> Right. xt_bpf was a dead end from the start.
> It's time to deprecate it and remove it.
> 
>> This helps gradually moving towards move epbf for those that
>> still heavily rely on the classic forwarding path.
> 
> No one is using it.
> If it was, we would have seen at least one bug report over
> all these years. We've seen none.
> 
> tbh we had a fair share of wrong design decisions that look
> very reasonable early on and turned out to be useless with
> zero users.
> BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
> are in this category. > All this code does is bit rot.

+1

> As a minimum we shouldn't step on the same rakes.
> xt_ebpf would be the same dead code as xt_bpf.

+1, and on top, the user experience will just be horrible. :(

>> If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
>> as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
>> but it will take significantly longer to get there.
>>
>> It involves reviving
>> https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> 
> I missed it earlier. What is the end goal ?
> Optimize nft run-time with on the fly generation of bpf byte code ?

Or rather to provide a pendant to nft given existence of xt_bpf, and the
latter will be removed at some point? (If so, can't we just deprecate the
old xt_bpf?)

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 17:26                 ` Alexei Starovoitov
  2022-08-31 21:49                   ` Daniel Borkmann
@ 2022-08-31 21:57                   ` Florian Westphal
  2022-09-06  6:57                     ` Nicolas Dichtel
  2022-09-01  8:08                   ` Jan Engelhardt
  2 siblings, 1 reply; 27+ messages in thread
From: Florian Westphal @ 2022-08-31 21:57 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf

Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > This helps gradually moving towards move epbf for those that
> > still heavily rely on the classic forwarding path.
> 
> No one is using it.
> If it was, we would have seen at least one bug report over
> all these years. We've seen none.

Err, it IS used, else I would not have sent this patch.

> very reasonable early on and turned out to be useless with
> zero users.
> BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
> are in this category.

I doubt it had 0 users.  Those users probably moved to something
better?

> As a minimum we shouldn't step on the same rakes.
> xt_ebpf would be the same dead code as xt_bpf.

Its just 160 LOC or so, I don't see it has a huge technical debt.

> > If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> > as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> > but it will take significantly longer to get there.
> >
> > It involves reviving
> > https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> 
> I missed it earlier. What is the end goal ?

Immediate goal: get rid of all indirect calls from NF_HOOK()
invocations. Its about 2% speedup in my tests (with connection
tracking+defrag enabled).

This series changes prototype of the callbacks to int foo(struct *),
so I think it would be possible to build on this and allow attaching raw
bpf progs/implement what is now a netfilter kernel module as a bpf
program.

I have not spent time on this so far though, so I don't know yet
how the "please attach prog id 12345 at FORWARD with prio 42" should
be done.

> Optimize nft run-time with on the fly generation of bpf byte code ?

This could be done too, so far this JITs nf_hook_slow() only.
The big question for nft run-time would be how and where to do the JIT
translation.

I think that "nft run time jit" would be step 3, after allowing
(re)implementation of netfilter modules via bpf programs.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 21:49                   ` Daniel Borkmann
@ 2022-09-01  5:18                     ` Eyal Birger
  2022-09-02 16:53                       ` Alexei Starovoitov
  2022-09-01 10:14                     ` Florian Westphal
  1 sibling, 1 reply; 27+ messages in thread
From: Eyal Birger @ 2022-09-01  5:18 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov, Florian Westphal
  Cc: Andrii Nakryiko, Martin KaFai Lau, David S. Miller,
	Jakub Kicinski, Network Development,
	Toke Høiland-Jørgensen, netfilter-devel, bpf,
	Shmulik Ladkani

On Thu, Sep 1, 2022 at 1:16 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 8/31/22 7:26 PM, Alexei Starovoitov wrote:
> > On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
> >> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >>>> 1 and 2 have the upside that its easy to handle a 'file not found'
> >>>> error.
> >>>
> >>> I'm strongly against calling into bpf from the inner guts of nft.
> >>> Nack to all options discussed in this thread.
> >>> None of them make any sense.
> >>
> >> -v please.  I can just rework userspace to allow going via xt_bpf
> >> but its brain damaged.
> >
> > Right. xt_bpf was a dead end from the start.
> > It's time to deprecate it and remove it.
> >
> >> This helps gradually moving towards move epbf for those that
> >> still heavily rely on the classic forwarding path.
> >
> > No one is using it.
> > If it was, we would have seen at least one bug report over
> > all these years. We've seen none.
> >
> > tbh we had a fair share of wrong design decisions that look
> > very reasonable early on and turned out to be useless with
> > zero users.
> > BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
> > are in this category. > All this code does is bit rot.
>
> +1
>
> > As a minimum we shouldn't step on the same rakes.
> > xt_ebpf would be the same dead code as xt_bpf.
>
> +1, and on top, the user experience will just be horrible. :(
>
> >> If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> >> as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> >> but it will take significantly longer to get there.
> >>
> >> It involves reviving
> >> https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> >
> > I missed it earlier. What is the end goal ?
> > Optimize nft run-time with on the fly generation of bpf byte code ?
>
> Or rather to provide a pendant to nft given existence of xt_bpf, and the
> latter will be removed at some point? (If so, can't we just deprecate the
> old xt_bpf?)

FWIW we've been using both lwt bpf and xt_bpf on our production workloads
for a few years now.

xt_bpf allows us to apply custom sophisticated policy logic at connection
establishment - which is not really possible (or efficient) using
iptables/nft constructs - without needing to reinvent all the facilities that
nf provides like connection tracking, ALGs, and simple filtering.

As for lwt bpf, We use it for load balancing towards collect md tunnels.
While this can be done at tc egress for unfragmented packets, the lwt out hook -
when used in tandem with nf fragment reassembly - provides a hooking point
where a bpf program can see reassembled packets and load balance based on
their internals.

Eyal.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 17:26                 ` Alexei Starovoitov
  2022-08-31 21:49                   ` Daniel Borkmann
  2022-08-31 21:57                   ` Florian Westphal
@ 2022-09-01  8:08                   ` Jan Engelhardt
  2 siblings, 0 replies; 27+ messages in thread
From: Jan Engelhardt @ 2022-09-01  8:08 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf


On Wednesday 2022-08-31 19:26, Alexei Starovoitov wrote:
>
>Right. xt_bpf was a dead end from the start.
>It's time to deprecate it and remove it.

So does that extend to xt_u32, which is like a subset of BPF?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 21:49                   ` Daniel Borkmann
  2022-09-01  5:18                     ` Eyal Birger
@ 2022-09-01 10:14                     ` Florian Westphal
  2022-09-02 17:06                       ` Alexei Starovoitov
  1 sibling, 1 reply; 27+ messages in thread
From: Florian Westphal @ 2022-09-01 10:14 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Alexei Starovoitov, Florian Westphal, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf

Daniel Borkmann <daniel@iogearbox.net> wrote:
> On 8/31/22 7:26 PM, Alexei Starovoitov wrote:
> > On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
> > As a minimum we shouldn't step on the same rakes.
> > xt_ebpf would be the same dead code as xt_bpf.
> 
> +1, and on top, the user experience will just be horrible. :(

Compared to what?

> > > If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> > > as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> > > but it will take significantly longer to get there.
> > > 
> > > It involves reviving
> > > https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> > 
> > I missed it earlier. What is the end goal ?
> > Optimize nft run-time with on the fly generation of bpf byte code ?
> 
> Or rather to provide a pendant to nft given existence of xt_bpf, and the
> latter will be removed at some point? (If so, can't we just deprecate the
> old xt_bpf?)

See my reply to Alexey, immediate goal was to get rid of the indirect
calls by providing a tailored/jitted equivalent of nf_hook_slow().

The next step could be to allow implementation of netfilter hooks
(i.e., kernel modules that call nf_register_net_hook()) in bpf
but AFAIU it requires addition of BPF_PROG_TYPE_NETFILTER etc.

After that, yes, one could think about how to jit nft_do_chain() and
all the rest of the nft machinery.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-09-01  5:18                     ` Eyal Birger
@ 2022-09-02 16:53                       ` Alexei Starovoitov
  2022-09-05 17:50                         ` Eyal Birger
  0 siblings, 1 reply; 27+ messages in thread
From: Alexei Starovoitov @ 2022-09-02 16:53 UTC (permalink / raw)
  To: Eyal Birger
  Cc: Daniel Borkmann, Florian Westphal, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf, Shmulik Ladkani

On Wed, Aug 31, 2022 at 10:18 PM Eyal Birger <eyal.birger@gmail.com> wrote:
>
> On Thu, Sep 1, 2022 at 1:16 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > On 8/31/22 7:26 PM, Alexei Starovoitov wrote:
> > > On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
> > >> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > >>>> 1 and 2 have the upside that its easy to handle a 'file not found'
> > >>>> error.
> > >>>
> > >>> I'm strongly against calling into bpf from the inner guts of nft.
> > >>> Nack to all options discussed in this thread.
> > >>> None of them make any sense.
> > >>
> > >> -v please.  I can just rework userspace to allow going via xt_bpf
> > >> but its brain damaged.
> > >
> > > Right. xt_bpf was a dead end from the start.
> > > It's time to deprecate it and remove it.
> > >
> > >> This helps gradually moving towards move epbf for those that
> > >> still heavily rely on the classic forwarding path.
> > >
> > > No one is using it.
> > > If it was, we would have seen at least one bug report over
> > > all these years. We've seen none.
> > >
> > > tbh we had a fair share of wrong design decisions that look
> > > very reasonable early on and turned out to be useless with
> > > zero users.
> > > BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
> > > are in this category. > All this code does is bit rot.
> >
> > +1
> >
> > > As a minimum we shouldn't step on the same rakes.
> > > xt_ebpf would be the same dead code as xt_bpf.
> >
> > +1, and on top, the user experience will just be horrible. :(
> >
> > >> If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> > >> as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> > >> but it will take significantly longer to get there.
> > >>
> > >> It involves reviving
> > >> https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> > >
> > > I missed it earlier. What is the end goal ?
> > > Optimize nft run-time with on the fly generation of bpf byte code ?
> >
> > Or rather to provide a pendant to nft given existence of xt_bpf, and the
> > latter will be removed at some point? (If so, can't we just deprecate the
> > old xt_bpf?)
>
> FWIW we've been using both lwt bpf and xt_bpf on our production workloads
> for a few years now.
>
> xt_bpf allows us to apply custom sophisticated policy logic at connection
> establishment - which is not really possible (or efficient) using
> iptables/nft constructs - without needing to reinvent all the facilities that
> nf provides like connection tracking, ALGs, and simple filtering.
>
> As for lwt bpf, We use it for load balancing towards collect md tunnels.
> While this can be done at tc egress for unfragmented packets, the lwt out hook -
> when used in tandem with nf fragment reassembly - provides a hooking point
> where a bpf program can see reassembled packets and load balance based on
> their internals.

Sounds very interesting!
Any open source code to look at ?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-09-01 10:14                     ` Florian Westphal
@ 2022-09-02 17:06                       ` Alexei Starovoitov
  2022-09-02 17:52                         ` Florian Westphal
  0 siblings, 1 reply; 27+ messages in thread
From: Alexei Starovoitov @ 2022-09-02 17:06 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau,
	David S. Miller, Jakub Kicinski, Network Development,
	Toke Høiland-Jørgensen, netfilter-devel, bpf

On Thu, Sep 1, 2022 at 3:14 AM Florian Westphal <fw@strlen.de> wrote:
>
> Daniel Borkmann <daniel@iogearbox.net> wrote:
> > On 8/31/22 7:26 PM, Alexei Starovoitov wrote:
> > > On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
> > > As a minimum we shouldn't step on the same rakes.
> > > xt_ebpf would be the same dead code as xt_bpf.
> >
> > +1, and on top, the user experience will just be horrible. :(
>
> Compared to what?
>
> > > > If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> > > > as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> > > > but it will take significantly longer to get there.
> > > >
> > > > It involves reviving
> > > > https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> > >
> > > I missed it earlier. What is the end goal ?
> > > Optimize nft run-time with on the fly generation of bpf byte code ?
> >
> > Or rather to provide a pendant to nft given existence of xt_bpf, and the
> > latter will be removed at some point? (If so, can't we just deprecate the
> > old xt_bpf?)
>
> See my reply to Alexey, immediate goal was to get rid of the indirect
> calls by providing a tailored/jitted equivalent of nf_hook_slow().
>
> The next step could be to allow implementation of netfilter hooks
> (i.e., kernel modules that call nf_register_net_hook()) in bpf
> but AFAIU it requires addition of BPF_PROG_TYPE_NETFILTER etc.

We were adding new prog and maps types in the past.
Now new features are being added differently.
All of the networking either works with sk_buff-s or xdp frames.
We try hard not to add any new uapi helpers.
Everything is moving to kfuncs.
Other sub-systems should be able to use bpf without touching
the bpf core. See hid-bpf as an example.
It needs several verifier improvements, but doesn't need
new prog types, helpers, etc.

> After that, yes, one could think about how to jit nft_do_chain() and
> all the rest of the nft machinery.

Sounds like a ton of work. All that just to accelerate nft a bit?
I think there are more impactful projects to work on.
For example, accelerating classic iptables with bpf would immediately
help a bunch of users.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-09-02 17:06                       ` Alexei Starovoitov
@ 2022-09-02 17:52                         ` Florian Westphal
  0 siblings, 0 replies; 27+ messages in thread
From: Florian Westphal @ 2022-09-02 17:52 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf

Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > See my reply to Alexey, immediate goal was to get rid of the indirect
> > calls by providing a tailored/jitted equivalent of nf_hook_slow().
> >
> > The next step could be to allow implementation of netfilter hooks
> > (i.e., kernel modules that call nf_register_net_hook()) in bpf
> > but AFAIU it requires addition of BPF_PROG_TYPE_NETFILTER etc.
> 
> We were adding new prog and maps types in the past.
> Now new features are being added differently.
> All of the networking either works with sk_buff-s or xdp frames.
> We try hard not to add any new uapi helpers.
> Everything is moving to kfuncs.
> Other sub-systems should be able to use bpf without touching
> the bpf core. See hid-bpf as an example.
> It needs several verifier improvements, but doesn't need
> new prog types, helpers, etc.

I don't see how it can be done without a new prog type, the bpf progs
would need access to "nf_hook_state" struct, passed as argument
to nf_hook_slow() (and down to the individual xt_foo modules...).

We can't change the existing netfilter hook prototype to go by
sk_buff * as that doesn't have all information, most prominent are
the input and output net_device, but also okfn is needed for async
reinject (nf_queue), the hook location and so on.

> > After that, yes, one could think about how to jit nft_do_chain() and
> > all the rest of the nft machinery.
> 
> Sounds like a ton of work. All that just to accelerate nft a bit?
> I think there are more impactful projects to work on.
> For example, accelerating classic iptables with bpf would immediately
> help a bunch of users.

Maybe, but from the problem points and the required effort it doesn't matter
if the chosen target is iptables or nftables; as far as the time/effort
needed I'd say they are identical.

The hard issues that need to be solved first are the same; they reside
in the netfilter core and not in the specific interpreter (nft_do_chain
vs. ipt_do_table and friends).

nf_tables might be *slightly* easier once that point would be reached
because the core functionality is more integrated with nf_tables whereas
in iptables there is more copypastry (ipt_do_table, ip6t_do_table,
ebt_do_table, ...).

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-09-02 16:53                       ` Alexei Starovoitov
@ 2022-09-05 17:50                         ` Eyal Birger
  0 siblings, 0 replies; 27+ messages in thread
From: Eyal Birger @ 2022-09-05 17:50 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Daniel Borkmann, Florian Westphal, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf, Shmulik Ladkani

On Fri, Sep 2, 2022 at 7:53 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Aug 31, 2022 at 10:18 PM Eyal Birger <eyal.birger@gmail.com> wrote:
> >
> > On Thu, Sep 1, 2022 at 1:16 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> > >
> > > On 8/31/22 7:26 PM, Alexei Starovoitov wrote:
> > > > On Wed, Aug 31, 2022 at 8:53 AM Florian Westphal <fw@strlen.de> wrote:
> > > >> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > > >>>> 1 and 2 have the upside that its easy to handle a 'file not found'
> > > >>>> error.
> > > >>>
> > > >>> I'm strongly against calling into bpf from the inner guts of nft.
> > > >>> Nack to all options discussed in this thread.
> > > >>> None of them make any sense.
> > > >>
> > > >> -v please.  I can just rework userspace to allow going via xt_bpf
> > > >> but its brain damaged.
> > > >
> > > > Right. xt_bpf was a dead end from the start.
> > > > It's time to deprecate it and remove it.
> > > >
> > > >> This helps gradually moving towards move epbf for those that
> > > >> still heavily rely on the classic forwarding path.
> > > >
> > > > No one is using it.
> > > > If it was, we would have seen at least one bug report over
> > > > all these years. We've seen none.
> > > >
> > > > tbh we had a fair share of wrong design decisions that look
> > > > very reasonable early on and turned out to be useless with
> > > > zero users.
> > > > BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
> > > > are in this category. > All this code does is bit rot.
> > >
> > > +1
> > >
> > > > As a minimum we shouldn't step on the same rakes.
> > > > xt_ebpf would be the same dead code as xt_bpf.
> > >
> > > +1, and on top, the user experience will just be horrible. :(
> > >
> > > >> If you are open to BPF_PROG_TYPE_NETFILTER I can go that route
> > > >> as well, raw bpf program attachment via NF_HOOK and the bpf dispatcher,
> > > >> but it will take significantly longer to get there.
> > > >>
> > > >> It involves reviving
> > > >> https://lore.kernel.org/netfilter-devel/20211014121046.29329-1-fw@strlen.de/
> > > >
> > > > I missed it earlier. What is the end goal ?
> > > > Optimize nft run-time with on the fly generation of bpf byte code ?
> > >
> > > Or rather to provide a pendant to nft given existence of xt_bpf, and the
> > > latter will be removed at some point? (If so, can't we just deprecate the
> > > old xt_bpf?)
> >
> > FWIW we've been using both lwt bpf and xt_bpf on our production workloads
> > for a few years now.
> >
> > xt_bpf allows us to apply custom sophisticated policy logic at connection
> > establishment - which is not really possible (or efficient) using
> > iptables/nft constructs - without needing to reinvent all the facilities that
> > nf provides like connection tracking, ALGs, and simple filtering.
> >
> > As for lwt bpf, We use it for load balancing towards collect md tunnels.
> > While this can be done at tc egress for unfragmented packets, the lwt out hook -
> > when used in tandem with nf fragment reassembly - provides a hooking point
> > where a bpf program can see reassembled packets and load balance based on
> > their internals.
>
> Sounds very interesting!
> Any open source code to look at ?

For these projects there isn't at this point. But some of the benefit in these
specific hooking points is that our custom logic is very scoped and integrates
well with the "classical" forwarding path.

In netfilter we have an identity based policy engine provisioning sets of
bpf maps. These maps are use used by policy programs invoked by xt_bpf on
connection establishment as part of a larger set of iptables rules.

In LWT this solved us a problem with fragmented traffic, as our load
balancing solution supports - among other things - IPsec stickiness based
on ESP-in-UDP SPI and as such needs to see unfragemented traffic.

Eyal.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-08-31 21:57                   ` Florian Westphal
@ 2022-09-06  6:57                     ` Nicolas Dichtel
  2022-09-07  3:04                       ` Alexei Starovoitov
  0 siblings, 1 reply; 27+ messages in thread
From: Nicolas Dichtel @ 2022-09-06  6:57 UTC (permalink / raw)
  To: Florian Westphal, Alexei Starovoitov
  Cc: Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau,
	David S. Miller, Jakub Kicinski, Network Development,
	Toke Høiland-Jørgensen, netfilter-devel, bpf


Le 31/08/2022 à 23:57, Florian Westphal a écrit :
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>>> This helps gradually moving towards move epbf for those that
>>> still heavily rely on the classic forwarding path.
>>
>> No one is using it.
>> If it was, we would have seen at least one bug report over
>> all these years. We've seen none.
> 
> Err, it IS used, else I would not have sent this patch.
> 
>> very reasonable early on and turned out to be useless with
>> zero users.
>> BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
>> are in this category.
> 
> I doubt it had 0 users.  Those users probably moved to something
> better?
We are using BPF_PROG_TYPE_SCHED_ACT to perform custom encapsulations.
What could we used to replace that?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-09-06  6:57                     ` Nicolas Dichtel
@ 2022-09-07  3:04                       ` Alexei Starovoitov
  2022-09-07 15:52                         ` Nicolas Dichtel
  0 siblings, 1 reply; 27+ messages in thread
From: Alexei Starovoitov @ 2022-09-07  3:04 UTC (permalink / raw)
  To: Nicolas Dichtel
  Cc: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf

On Mon, Sep 5, 2022 at 11:57 PM Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
>
>
> Le 31/08/2022 à 23:57, Florian Westphal a écrit :
> > Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >>> This helps gradually moving towards move epbf for those that
> >>> still heavily rely on the classic forwarding path.
> >>
> >> No one is using it.
> >> If it was, we would have seen at least one bug report over
> >> all these years. We've seen none.
> >
> > Err, it IS used, else I would not have sent this patch.
> >
> >> very reasonable early on and turned out to be useless with
> >> zero users.
> >> BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
> >> are in this category.
> >
> > I doubt it had 0 users.  Those users probably moved to something
> > better?
> We are using BPF_PROG_TYPE_SCHED_ACT to perform custom encapsulations.
> What could we used to replace that?

SCHED_CLS. It has all of the features of cls and act combined.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH nf-next] netfilter: nf_tables: add ebpf expression
  2022-09-07  3:04                       ` Alexei Starovoitov
@ 2022-09-07 15:52                         ` Nicolas Dichtel
  0 siblings, 0 replies; 27+ messages in thread
From: Nicolas Dichtel @ 2022-09-07 15:52 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Florian Westphal, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, David S. Miller, Jakub Kicinski,
	Network Development, Toke Høiland-Jørgensen,
	netfilter-devel, bpf


Le 07/09/2022 à 05:04, Alexei Starovoitov a écrit :
> On Mon, Sep 5, 2022 at 11:57 PM Nicolas Dichtel
> <nicolas.dichtel@6wind.com> wrote:
>>
>>
>> Le 31/08/2022 à 23:57, Florian Westphal a écrit :
>>> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>>>>> This helps gradually moving towards move epbf for those that
>>>>> still heavily rely on the classic forwarding path.
>>>>
>>>> No one is using it.
>>>> If it was, we would have seen at least one bug report over
>>>> all these years. We've seen none.
>>>
>>> Err, it IS used, else I would not have sent this patch.
>>>
>>>> very reasonable early on and turned out to be useless with
>>>> zero users.
>>>> BPF_PROG_TYPE_SCHED_ACT and BPF_PROG_TYPE_LWT*
>>>> are in this category.
>>>
>>> I doubt it had 0 users.  Those users probably moved to something
>>> better?
>> We are using BPF_PROG_TYPE_SCHED_ACT to perform custom encapsulations.
>> What could we used to replace that?
> 
> SCHED_CLS. It has all of the features of cls and act combined.

Indeed, thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2022-09-07 15:52 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-31 10:16 [PATCH nf-next] netfilter: nf_tables: add ebpf expression Florian Westphal
2022-08-31 12:13 ` Toke Høiland-Jørgensen
2022-08-31 12:56   ` Florian Westphal
2022-08-31 13:41     ` Toke Høiland-Jørgensen
2022-08-31 13:57       ` Florian Westphal
2022-08-31 14:43         ` Toke Høiland-Jørgensen
2022-08-31 15:09           ` Pablo Neira Ayuso
2022-08-31 15:35             ` Florian Westphal
2022-08-31 20:38               ` Pablo Neira Ayuso
2022-08-31 15:26           ` Florian Westphal
2022-08-31 15:39             ` Alexei Starovoitov
2022-08-31 15:53               ` Florian Westphal
2022-08-31 17:26                 ` Alexei Starovoitov
2022-08-31 21:49                   ` Daniel Borkmann
2022-09-01  5:18                     ` Eyal Birger
2022-09-02 16:53                       ` Alexei Starovoitov
2022-09-05 17:50                         ` Eyal Birger
2022-09-01 10:14                     ` Florian Westphal
2022-09-02 17:06                       ` Alexei Starovoitov
2022-09-02 17:52                         ` Florian Westphal
2022-08-31 21:57                   ` Florian Westphal
2022-09-06  6:57                     ` Nicolas Dichtel
2022-09-07  3:04                       ` Alexei Starovoitov
2022-09-07 15:52                         ` Nicolas Dichtel
2022-09-01  8:08                   ` Jan Engelhardt
2022-08-31 20:44             ` Toke Høiland-Jørgensen
2022-08-31 13:44     ` Florian Westphal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).