netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] netfilter: revert introduction of egress hook
@ 2020-03-18  9:33 Daniel Borkmann
  2020-03-18  9:36 ` Pablo Neira Ayuso
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Daniel Borkmann @ 2020-03-18  9:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Daniel Borkmann, Pablo Neira Ayuso, Alexei Starovoitov

This reverts the following commits:

  8537f78647c0 ("netfilter: Introduce egress hook")
  5418d3881e1f ("netfilter: Generalize ingress hook")
  b030f194aed2 ("netfilter: Rename ingress hook include file")

From the discussion in [0], the author's main motivation to add a hook
in fast path is for an out of tree kernel module, which is a red flag
to begin with. Other mentioned potential use cases like NAT{64,46}
is on future extensions w/o concrete code in the tree yet. Revert as
suggested [1] given the weak justification to add more hooks to critical
fast-path.

  [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/
  [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Miller <davem@davemloft.net>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/netdevice.h         |   4 --
 include/linux/netfilter_ingress.h |  58 +++++++++++++++++
 include/linux/netfilter_netdev.h  | 102 ------------------------------
 include/uapi/linux/netfilter.h    |   1 -
 net/core/dev.c                    |  27 ++------
 net/netfilter/Kconfig             |   8 ---
 net/netfilter/core.c              |  24 ++-----
 net/netfilter/nft_chain_filter.c  |   4 +-
 8 files changed, 68 insertions(+), 160 deletions(-)
 create mode 100644 include/linux/netfilter_ingress.h
 delete mode 100644 include/linux/netfilter_netdev.h

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 15f1e32b430c..654808bfad83 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1751,7 +1751,6 @@ enum netdev_priv_flags {
  *	@xps_maps:	XXX: need comments on this one
  *	@miniq_egress:		clsact qdisc specific data for
  *				egress processing
- *	@nf_hooks_egress:	netfilter hooks executed for egress packets
  *	@qdisc_hash:		qdisc hash table
  *	@watchdog_timeo:	Represents the timeout that is used by
  *				the watchdog (see dev_watchdog())
@@ -2027,9 +2026,6 @@ struct net_device {
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
 #endif
-#ifdef CONFIG_NETFILTER_EGRESS
-	struct nf_hook_entries __rcu *nf_hooks_egress;
-#endif
 
 #ifdef CONFIG_NET_SCHED
 	DECLARE_HASHTABLE	(qdisc_hash, 4);
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
new file mode 100644
index 000000000000..a13774be2eb5
--- /dev/null
+++ b/include/linux/netfilter_ingress.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NETFILTER_INGRESS_H_
+#define _NETFILTER_INGRESS_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
+{
+#ifdef CONFIG_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+		return false;
+#endif
+	return rcu_access_pointer(skb->dev->nf_hooks_ingress);
+}
+
+/* caller must hold rcu_read_lock */
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+	struct nf_hook_state state;
+	int ret;
+
+	/* Must recheck the ingress hook head, in the event it became NULL
+	 * after the check in nf_hook_ingress_active evaluated to true.
+	 */
+	if (unlikely(!e))
+		return 0;
+
+	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
+			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
+			   dev_net(skb->dev), NULL);
+	ret = nf_hook_slow(skb, &state, e, 0);
+	if (ret == 0)
+		return -1;
+
+	return ret;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev)
+{
+	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
+}
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev) {}
+#endif /* CONFIG_NETFILTER_INGRESS */
+#endif /* _NETFILTER_INGRESS_H_ */
diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
deleted file mode 100644
index 92d3611a782e..000000000000
--- a/include/linux/netfilter_netdev.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NETFILTER_NETDEV_H_
-#define _NETFILTER_NETDEV_H_
-
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-
-#ifdef CONFIG_NETFILTER
-static __always_inline bool nf_hook_netdev_active(enum nf_dev_hooks hooknum,
-					  struct nf_hook_entries __rcu *hooks)
-{
-#ifdef CONFIG_JUMP_LABEL
-	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][hooknum]))
-		return false;
-#endif
-	return rcu_access_pointer(hooks);
-}
-
-/* caller must hold rcu_read_lock */
-static __always_inline int nf_hook_netdev(struct sk_buff *skb,
-					  enum nf_dev_hooks hooknum,
-					  struct nf_hook_entries __rcu *hooks)
-{
-	struct nf_hook_entries *e = rcu_dereference(hooks);
-	struct nf_hook_state state;
-	int ret;
-
-	/* Must recheck the hook head, in the event it became NULL
-	 * after the check in nf_hook_netdev_active evaluated to true.
-	 */
-	if (unlikely(!e))
-		return 0;
-
-	nf_hook_state_init(&state, hooknum,
-			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
-			   dev_net(skb->dev), NULL);
-	ret = nf_hook_slow(skb, &state, e, 0);
-	if (ret == 0)
-		return -1;
-
-	return ret;
-}
-#endif /* CONFIG_NETFILTER */
-
-static inline void nf_hook_netdev_init(struct net_device *dev)
-{
-#ifdef CONFIG_NETFILTER_INGRESS
-	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
-#endif
-#ifdef CONFIG_NETFILTER_EGRESS
-	RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
-#endif
-}
-
-#ifdef CONFIG_NETFILTER_INGRESS
-static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
-{
-	return nf_hook_netdev_active(NF_NETDEV_INGRESS,
-				     skb->dev->nf_hooks_ingress);
-}
-
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
-	return nf_hook_netdev(skb, NF_NETDEV_INGRESS,
-			      skb->dev->nf_hooks_ingress);
-}
-#else /* CONFIG_NETFILTER_INGRESS */
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
-{
-	return 0;
-}
-
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
-	return 0;
-}
-#endif /* CONFIG_NETFILTER_INGRESS */
-
-#ifdef CONFIG_NETFILTER_EGRESS
-static inline bool nf_hook_egress_active(const struct sk_buff *skb)
-{
-	return nf_hook_netdev_active(NF_NETDEV_EGRESS,
-				     skb->dev->nf_hooks_egress);
-}
-
-static inline int nf_hook_egress(struct sk_buff *skb)
-{
-	return nf_hook_netdev(skb, NF_NETDEV_EGRESS,
-			      skb->dev->nf_hooks_egress);
-}
-#else /* CONFIG_NETFILTER_EGRESS */
-static inline int nf_hook_egress_active(struct sk_buff *skb)
-{
-	return 0;
-}
-
-static inline int nf_hook_egress(struct sk_buff *skb)
-{
-	return 0;
-}
-#endif /* CONFIG_NETFILTER_EGRESS */
-#endif /* _NETFILTER_INGRESS_H_ */
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index d1616574c54f..ca9e63d6e0e4 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -50,7 +50,6 @@ enum nf_inet_hooks {
 
 enum nf_dev_hooks {
 	NF_NETDEV_INGRESS,
-	NF_NETDEV_EGRESS,
 	NF_NETDEV_NUMHOOKS
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index aeb8ccbbe93b..021e18251465 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,7 +135,7 @@
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
-#include <linux/netfilter_netdev.h>
+#include <linux/netfilter_ingress.h>
 #include <linux/crash_dump.h>
 #include <linux/sctp.h>
 #include <net/udp_tunnel.h>
@@ -3773,7 +3773,6 @@ EXPORT_SYMBOL(dev_loopback_xmit);
 static struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
-#ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
 	struct tcf_result cl_res;
 
@@ -3807,24 +3806,11 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 	default:
 		break;
 	}
-#endif /* CONFIG_NET_CLS_ACT */
+
 	return skb;
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int nf_egress(struct sk_buff *skb)
-{
-	if (nf_hook_egress_active(skb)) {
-		int ret;
-
-		rcu_read_lock();
-		ret = nf_hook_egress(skb);
-		rcu_read_unlock();
-		return ret;
-	}
-	return 0;
-}
-
 #ifdef CONFIG_XPS
 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 			       struct xps_dev_maps *dev_maps, unsigned int tci)
@@ -4011,16 +3997,13 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 	qdisc_pkt_len_init(skb);
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_at_ingress = 0;
-#endif
-#ifdef CONFIG_NET_EGRESS
+# ifdef CONFIG_NET_EGRESS
 	if (static_branch_unlikely(&egress_needed_key)) {
-		if (nf_egress(skb) < 0)
-			goto out;
-
 		skb = sch_handle_egress(skb, &rc, dev);
 		if (!skb)
 			goto out;
 	}
+# endif
 #endif
 	/* If device/qdisc don't need skb->dst, release it right now while
 	 * its hot in this cpu cache.
@@ -9867,7 +9850,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
 
-	nf_hook_netdev_init(dev);
+	nf_hook_ingress_init(dev);
 
 	return dev;
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f4c68f60f241..468fea1aebba 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -10,14 +10,6 @@ config NETFILTER_INGRESS
 	  This allows you to classify packets from ingress using the Netfilter
 	  infrastructure.
 
-config NETFILTER_EGRESS
-	bool "Netfilter egress support"
-	default y
-	select NET_EGRESS
-	help
-	  This allows you to classify packets before transmission using the
-	  Netfilter infrastructure.
-
 config NETFILTER_NETLINK
 	tristate
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 85e9c959aba7..78f046ec506f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -306,12 +306,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 		if (dev && dev_net(dev) == net)
 			return &dev->nf_hooks_ingress;
 	}
-#endif
-#ifdef CONFIG_NETFILTER_EGRESS
-	if (hooknum == NF_NETDEV_EGRESS) {
-		if (dev && dev_net(dev) == net)
-			return &dev->nf_hooks_egress;
-	}
 #endif
 	WARN_ON_ONCE(1);
 	return NULL;
@@ -324,13 +318,11 @@ static int __nf_register_net_hook(struct net *net, int pf,
 	struct nf_hook_entries __rcu **pp;
 
 	if (pf == NFPROTO_NETDEV) {
-		if ((!IS_ENABLED(CONFIG_NETFILTER_INGRESS) &&
-		     reg->hooknum == NF_NETDEV_INGRESS) ||
-		    (!IS_ENABLED(CONFIG_NETFILTER_EGRESS) &&
-		     reg->hooknum == NF_NETDEV_EGRESS))
+#ifndef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS)
 			return -EOPNOTSUPP;
-		if ((reg->hooknum != NF_NETDEV_INGRESS &&
-		     reg->hooknum != NF_NETDEV_EGRESS) ||
+#endif
+		if (reg->hooknum != NF_NETDEV_INGRESS ||
 		    !reg->dev || dev_net(reg->dev) != net)
 			return -EINVAL;
 	}
@@ -356,10 +348,6 @@ static int __nf_register_net_hook(struct net *net, int pf,
 	if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 		net_inc_ingress_queue();
 #endif
-#ifdef CONFIG_NETFILTER_EGRESS
-	if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS)
-		net_inc_egress_queue();
-#endif
 #ifdef CONFIG_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
 #endif
@@ -418,10 +406,6 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
 		if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 			net_dec_ingress_queue();
 #endif
-#ifdef CONFIG_NETFILTER_EGRESS
-		if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS)
-			net_dec_egress_queue();
-#endif
 #ifdef CONFIG_JUMP_LABEL
 		static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
 #endif
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 67ce6dbb5496..c78d01bc02e9 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -277,11 +277,9 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_NETDEV,
-	.hook_mask	= (1 << NF_NETDEV_INGRESS) |
-			  (1 << NF_NETDEV_EGRESS),
+	.hook_mask	= (1 << NF_NETDEV_INGRESS),
 	.hooks		= {
 		[NF_NETDEV_INGRESS]	= nft_do_chain_netdev,
-		[NF_NETDEV_EGRESS]	= nft_do_chain_netdev,
 	},
 };
 
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18  9:33 [PATCH net-next] netfilter: revert introduction of egress hook Daniel Borkmann
@ 2020-03-18  9:36 ` Pablo Neira Ayuso
  2020-03-18  9:41   ` Daniel Borkmann
  2020-03-18 10:02 ` Florian Westphal
  2020-03-19  1:45 ` David Miller
  2 siblings, 1 reply; 9+ messages in thread
From: Pablo Neira Ayuso @ 2020-03-18  9:36 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: davem, netdev, Alexei Starovoitov

On Wed, Mar 18, 2020 at 10:33:22AM +0100, Daniel Borkmann wrote:
> This reverts the following commits:
> 
>   8537f78647c0 ("netfilter: Introduce egress hook")
>   5418d3881e1f ("netfilter: Generalize ingress hook")
>   b030f194aed2 ("netfilter: Rename ingress hook include file")
> 
> From the discussion in [0], the author's main motivation to add a hook
> in fast path is for an out of tree kernel module, which is a red flag
> to begin with. Other mentioned potential use cases like NAT{64,46}
> is on future extensions w/o concrete code in the tree yet. Revert as
> suggested [1] given the weak justification to add more hooks to critical
> fast-path.
> 
>   [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/
>   [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/
> 
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Nacked-by: Pablo Neira Ayuso <pablo@netfilter.org>

Daniel, you must be really worried about achieving your goals if you
have to do politics to block stuff.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18  9:36 ` Pablo Neira Ayuso
@ 2020-03-18  9:41   ` Daniel Borkmann
  2020-03-18  9:51     ` Pablo Neira Ayuso
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Borkmann @ 2020-03-18  9:41 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: davem, netdev, Alexei Starovoitov

On 3/18/20 10:36 AM, Pablo Neira Ayuso wrote:
> On Wed, Mar 18, 2020 at 10:33:22AM +0100, Daniel Borkmann wrote:
>> This reverts the following commits:
>>
>>    8537f78647c0 ("netfilter: Introduce egress hook")
>>    5418d3881e1f ("netfilter: Generalize ingress hook")
>>    b030f194aed2 ("netfilter: Rename ingress hook include file")
>>
>>  From the discussion in [0], the author's main motivation to add a hook
>> in fast path is for an out of tree kernel module, which is a red flag
>> to begin with. Other mentioned potential use cases like NAT{64,46}
>> is on future extensions w/o concrete code in the tree yet. Revert as
>> suggested [1] given the weak justification to add more hooks to critical
>> fast-path.
>>
>>    [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/
>>    [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/
>>
>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> 
> Nacked-by: Pablo Neira Ayuso <pablo@netfilter.org>
> 
> Daniel, you must be really worried about achieving your goals if you
> have to do politics to block stuff.

Looks like this is your only rationale technical argument you can come
up with?

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18  9:41   ` Daniel Borkmann
@ 2020-03-18  9:51     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 9+ messages in thread
From: Pablo Neira Ayuso @ 2020-03-18  9:51 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: davem, netdev, Alexei Starovoitov

On Wed, Mar 18, 2020 at 10:41:30AM +0100, Daniel Borkmann wrote:
> On 3/18/20 10:36 AM, Pablo Neira Ayuso wrote:
> > On Wed, Mar 18, 2020 at 10:33:22AM +0100, Daniel Borkmann wrote:
> > > This reverts the following commits:
> > > 
> > >    8537f78647c0 ("netfilter: Introduce egress hook")
> > >    5418d3881e1f ("netfilter: Generalize ingress hook")
> > >    b030f194aed2 ("netfilter: Rename ingress hook include file")
> > > 
> > >  From the discussion in [0], the author's main motivation to add a hook
> > > in fast path is for an out of tree kernel module, which is a red flag
> > > to begin with. Other mentioned potential use cases like NAT{64,46}
> > > is on future extensions w/o concrete code in the tree yet. Revert as
> > > suggested [1] given the weak justification to add more hooks to critical
> > > fast-path.
> > > 
> > >    [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/
> > >    [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/
> > > 
> > > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> > 
> > Nacked-by: Pablo Neira Ayuso <pablo@netfilter.org>
> > 
> > Daniel, you must be really worried about achieving your goals if you
> > have to do politics to block stuff.
> 
> Looks like this is your only rationale technical argument you can come
> up with?

I have waited for two days and I got no feedback from you.

Moreover, your concerns on performance has been addressed: Performance
impact is negligible.

Then, you popped up more arguments, like a reference to an email from
17 years ago, where only iptables was available and the only choice to
add ingress/egress filtering was to make another copy and paste of the
iptables code.

I also explained how to use this egress hook in Netfilter.

What's missing on your side?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18  9:33 [PATCH net-next] netfilter: revert introduction of egress hook Daniel Borkmann
  2020-03-18  9:36 ` Pablo Neira Ayuso
@ 2020-03-18 10:02 ` Florian Westphal
  2020-03-18 10:50   ` Daniel Borkmann
  2020-03-19  1:45 ` David Miller
  2 siblings, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2020-03-18 10:02 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: davem, netdev, Pablo Neira Ayuso, Alexei Starovoitov

Daniel Borkmann <daniel@iogearbox.net> wrote:
> This reverts the following commits:
> 
>   8537f78647c0 ("netfilter: Introduce egress hook")
>   5418d3881e1f ("netfilter: Generalize ingress hook")
>   b030f194aed2 ("netfilter: Rename ingress hook include file")
> 
> From the discussion in [0], the author's main motivation to add a hook
> in fast path is for an out of tree kernel module, which is a red flag
> to begin with.

The author did post patches for nftables, i.e. you can hook up rulesets to
this new hook point.

> is on future extensions w/o concrete code in the tree yet. Revert as
> suggested [1] given the weak justification to add more hooks to critical
> fast-path.

Do you have an alternative suggestion on how to expose this?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18 10:02 ` Florian Westphal
@ 2020-03-18 10:50   ` Daniel Borkmann
  2020-03-18 12:33     ` Florian Westphal
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Borkmann @ 2020-03-18 10:50 UTC (permalink / raw)
  To: Florian Westphal; +Cc: davem, netdev, Pablo Neira Ayuso, Alexei Starovoitov

On 3/18/20 11:02 AM, Florian Westphal wrote:
> Daniel Borkmann <daniel@iogearbox.net> wrote:
>> This reverts the following commits:
>>
>>    8537f78647c0 ("netfilter: Introduce egress hook")
>>    5418d3881e1f ("netfilter: Generalize ingress hook")
>>    b030f194aed2 ("netfilter: Rename ingress hook include file")
>>
>>  From the discussion in [0], the author's main motivation to add a hook
>> in fast path is for an out of tree kernel module, which is a red flag
>> to begin with.
> 
> The author did post patches for nftables, i.e. you can hook up rulesets to
> this new hook point.
> 
>> is on future extensions w/o concrete code in the tree yet. Revert as
>> suggested [1] given the weak justification to add more hooks to critical
>> fast-path.
> 
> Do you have an alternative suggestion on how to expose this?

Yeah, I think we should not plaster the stack with same/similar hooks that
achieve the same functionality next to each other over and over at the cost
of performance for users .. ideally there should just be a single entry point
that is very lightweight/efficient when not used and can otherwise patch to
a direct call when in use. Recent work from KP Singh goes into this direction
with the fmodify_return work [0], so we would have a single static key which
wraps an empty function call entry which can then be patched by the kernel at
runtime. Inside that trampoline we can still keep the ordering intact, but
imho this would overall better reduce overhead when functionality is not used
compared to the practice of duplication today.

Thanks,
Daniel

   [0] https://lore.kernel.org/bpf/20200304191853.1529-1-kpsingh@chromium.org/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18 10:50   ` Daniel Borkmann
@ 2020-03-18 12:33     ` Florian Westphal
  2020-03-18 14:22       ` Daniel Borkmann
  0 siblings, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2020-03-18 12:33 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Florian Westphal, davem, netdev, Pablo Neira Ayuso, Alexei Starovoitov

Daniel Borkmann <daniel@iogearbox.net> wrote:
> On 3/18/20 11:02 AM, Florian Westphal wrote:
> > Daniel Borkmann <daniel@iogearbox.net> wrote:
> > > This reverts the following commits:
> > > 
> > >    8537f78647c0 ("netfilter: Introduce egress hook")
> > >    5418d3881e1f ("netfilter: Generalize ingress hook")
> > >    b030f194aed2 ("netfilter: Rename ingress hook include file")
> > > 
> > >  From the discussion in [0], the author's main motivation to add a hook
> > > in fast path is for an out of tree kernel module, which is a red flag
> > > to begin with.
> > 
> > The author did post patches for nftables, i.e. you can hook up rulesets to
> > this new hook point.
> > 
> > > is on future extensions w/o concrete code in the tree yet. Revert as
> > > suggested [1] given the weak justification to add more hooks to critical
> > > fast-path.
> > 
> > Do you have an alternative suggestion on how to expose this?
> 
> Yeah, I think we should not plaster the stack with same/similar hooks that
> achieve the same functionality next to each other over and over at the cost
> of performance for users .. ideally there should just be a single entry point
> that is very lightweight/efficient when not used and can otherwise patch to
> a direct call when in use. Recent work from KP Singh goes into this direction
> with the fmodify_return work [0], so we would have a single static key which
> wraps an empty function call entry which can then be patched by the kernel at
> runtime. Inside that trampoline we can still keep the ordering intact, but
> imho this would overall better reduce overhead when functionality is not used
> compared to the practice of duplication today.

Thanks for explaining.  If I understand this correctly then:

1. sch_handle_egress() becomes a non-inlined function that isn't called
   from __dev_queue_xmit or any other location
2. __dev_queue_xmit calls a dummy do-nothing function wrapped in
   existing egress-static-key
3. kernels sched/tc code can patch the dummy function so it calls
   sch_handle_egress, without userspace changes/awareness
4. netfilter could reuse this even when tc is already patched in, so
   the dummy function does two direct calls.

How does that differ from current code?  One could also re-arrange
things like this (diff below, just for illustration).

The only difference I see vs. my understanding of your proposal is:
1. no additional static key, nf_hook_egress_active() doesn't exist
2. nf_hook_egress exists, but isn't called anywhere, patched-in at runtime
3. sch_handle_egress isn't called anywhere either, patched-in too

Did I get that right? The idea/plan looks good to me, it just looks
like a very marginal difference to me, thats why I'm asking.

Thanks!

diff --git a/net/core/dev.c b/net/core/dev.c
index aeb8ccbbe93b..406ac86b6d6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3770,13 +3770,24 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 EXPORT_SYMBOL(dev_loopback_xmit);
 
 #ifdef CONFIG_NET_EGRESS
-static struct sk_buff *
+static int nf_egress(struct sk_buff *skb)
+{
+	if (nf_hook_egress_active(skb))
+		return nf_hook_egress(skb);
+
+	return 0;
+}
+
+static noinline struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
-#ifdef CONFIG_NET_CLS_ACT
-	struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
+	struct mini_Qdisc *miniq;
 	struct tcf_result cl_res;
 
+	if (nf_egress(skb) < 0)
+		return NULL;
+
+	miniq = rcu_dereference_bh(dev->miniq_egress);
 	if (!miniq)
 		return skb;
 
@@ -3812,19 +3823,6 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 #ifdef CONFIG_XPS
 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 			       struct xps_dev_maps *dev_maps, unsigned int tci)
@@ -4014,9 +4012,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 #endif
 #ifdef CONFIG_NET_EGRESS
 	if (static_branch_unlikely(&egress_needed_key)) {
-		if (nf_egress(skb) < 0)
-			goto out;
-
 		skb = sch_handle_egress(skb, &rc, dev);
 		if (!skb)
 			goto out;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18 12:33     ` Florian Westphal
@ 2020-03-18 14:22       ` Daniel Borkmann
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Borkmann @ 2020-03-18 14:22 UTC (permalink / raw)
  To: Florian Westphal; +Cc: davem, netdev, Pablo Neira Ayuso, Alexei Starovoitov

On 3/18/20 1:33 PM, Florian Westphal wrote:
> Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 3/18/20 11:02 AM, Florian Westphal wrote:
>>> Daniel Borkmann <daniel@iogearbox.net> wrote:
>>>> This reverts the following commits:
>>>>
>>>>     8537f78647c0 ("netfilter: Introduce egress hook")
>>>>     5418d3881e1f ("netfilter: Generalize ingress hook")
>>>>     b030f194aed2 ("netfilter: Rename ingress hook include file")
>>>>
>>>>   From the discussion in [0], the author's main motivation to add a hook
>>>> in fast path is for an out of tree kernel module, which is a red flag
>>>> to begin with.
>>>
>>> The author did post patches for nftables, i.e. you can hook up rulesets to
>>> this new hook point.
>>>
>>>> is on future extensions w/o concrete code in the tree yet. Revert as
>>>> suggested [1] given the weak justification to add more hooks to critical
>>>> fast-path.
>>>
>>> Do you have an alternative suggestion on how to expose this?
>>
>> Yeah, I think we should not plaster the stack with same/similar hooks that
>> achieve the same functionality next to each other over and over at the cost
>> of performance for users .. ideally there should just be a single entry point
>> that is very lightweight/efficient when not used and can otherwise patch to
>> a direct call when in use. Recent work from KP Singh goes into this direction
>> with the fmodify_return work [0], so we would have a single static key which
>> wraps an empty function call entry which can then be patched by the kernel at
>> runtime. Inside that trampoline we can still keep the ordering intact, but
>> imho this would overall better reduce overhead when functionality is not used
>> compared to the practice of duplication today.
> 
> Thanks for explaining.  If I understand this correctly then:
> 
> 1. sch_handle_egress() becomes a non-inlined function that isn't called
>     from __dev_queue_xmit or any other location
> 2. __dev_queue_xmit calls a dummy do-nothing function wrapped in
>     existing egress-static-key
> 3. kernels sched/tc code can patch the dummy function so it calls
>     sch_handle_egress, without userspace changes/awareness
> 4. netfilter could reuse this even when tc is already patched in, so
>     the dummy function does two direct calls.

Yes, pretty much and we could do the same for the ingress side as well.

> How does that differ from current code?  One could also re-arrange
> things like this (diff below, just for illustration).
> 
> The only difference I see vs. my understanding of your proposal is:
> 1. no additional static key, nf_hook_egress_active() doesn't exist
> 2. nf_hook_egress exists, but isn't called anywhere, patched-in at runtime
> 3. sch_handle_egress isn't called anywhere either, patched-in too
> 
> Did I get that right? The idea/plan looks good to me, it just looks
> like a very marginal difference to me, thats why I'm asking.

Aside from that, we could take that approach potentially even further in that
it would allow for converting more indirect calls into direct ones, e.g. the
tp->classify() ones from sch_handle_egress() or potentially the entry->hook()
from nf_hook_slow() side, or BPF directly as well, so I think it would be worth
a try to see how far we could simplify both and get rid of indirect calls for
their users with this approach. Happy to help out here as well.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] netfilter: revert introduction of egress hook
  2020-03-18  9:33 [PATCH net-next] netfilter: revert introduction of egress hook Daniel Borkmann
  2020-03-18  9:36 ` Pablo Neira Ayuso
  2020-03-18 10:02 ` Florian Westphal
@ 2020-03-19  1:45 ` David Miller
  2 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2020-03-19  1:45 UTC (permalink / raw)
  To: daniel; +Cc: netdev, pablo, ast

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 18 Mar 2020 10:33:22 +0100

> This reverts the following commits:
> 
>   8537f78647c0 ("netfilter: Introduce egress hook")
>   5418d3881e1f ("netfilter: Generalize ingress hook")
>   b030f194aed2 ("netfilter: Rename ingress hook include file")
> 
> From the discussion in [0], the author's main motivation to add a hook
> in fast path is for an out of tree kernel module, which is a red flag
> to begin with. Other mentioned potential use cases like NAT{64,46}
> is on future extensions w/o concrete code in the tree yet. Revert as
> suggested [1] given the weak justification to add more hooks to critical
> fast-path.
> 
>   [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/
>   [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/
> 
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Applied, this definitely needs more discussion.

Thanks Daniel.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-03-19  1:45 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-18  9:33 [PATCH net-next] netfilter: revert introduction of egress hook Daniel Borkmann
2020-03-18  9:36 ` Pablo Neira Ayuso
2020-03-18  9:41   ` Daniel Borkmann
2020-03-18  9:51     ` Pablo Neira Ayuso
2020-03-18 10:02 ` Florian Westphal
2020-03-18 10:50   ` Daniel Borkmann
2020-03-18 12:33     ` Florian Westphal
2020-03-18 14:22       ` Daniel Borkmann
2020-03-19  1:45 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).