netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
@ 2015-02-10  9:32 Imre Palik
  2015-02-11 22:29 ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Imre Palik @ 2015-02-10  9:32 UTC (permalink / raw)
  To: bridge
  Cc: Stephen Hemminger, David S. Miller, netdev, linux-kernel, Palik,
	Imre, Anthony Liguori

From: "Palik, Imre" <imrep@amazon.de>

The netfilter code is made with flexibility instead of performance in mind.
So when all we want is to pass packets between different interfaces, the
performance penalty of hitting netfilter code can be considerable, even when
all the firewalling is disabled for the bridge.

This change makes it possible to disable netfilter both on a per bridge basis,
or for the whole bridging subsystem.  In the case interesting to us, this can
lead to more than 10% speedup compared to the case when only bridge-iptables
are disabled.

Cc: Anthony Liguori <aliguori@amazon.com>
Signed-off-by: Imre Palik <imrep@amazon.de>
---
 net/bridge/br_forward.c   |    3 +--
 net/bridge/br_input.c     |    3 +--
 net/bridge/br_netfilter.c |   48 +++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h   |    9 +++++++++
 net/bridge/br_sysfs_br.c  |   23 ++++++++++++++++++++++
 5 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a..2931d2a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -98,8 +98,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
-		br_forward_finish);
+	br_nf_do_forward(skb, indev);
 }
 
 /* called with rcu_read_lock */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2aa7be..0edd1d6 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -299,8 +299,7 @@ forward:
 		if (ether_addr_equal(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
-			br_handle_frame_finish);
+		br_nf_do_prerouting(skb, p);
 		break;
 	default:
 drop:
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index c190d22..b29c14f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -50,6 +50,7 @@
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
+static int brnf_call_netfilter __read_mostly = 1;
 static int brnf_call_iptables __read_mostly = 1;
 static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
@@ -57,6 +58,7 @@ static int brnf_filter_vlan_tagged __read_mostly = 0;
 static int brnf_filter_pppoe_tagged __read_mostly = 0;
 static int brnf_pass_vlan_indev __read_mostly = 0;
 #else
+#define brnf_call_netfilter 1
 #define brnf_call_iptables 1
 #define brnf_call_ip6tables 1
 #define brnf_call_arptables 1
@@ -523,6 +525,25 @@ bad:
 
 }
 
+int br_nf_do_prerouting(struct sk_buff *skb, struct net_bridge_port *p)
+{
+	struct net_bridge *br;
+
+	br = p->br;
+
+	if (!brnf_call_netfilter && !br->call_nf) {
+		__u32 len = nf_bridge_encap_header_len(skb);
+
+		if (likely(pskb_may_pull(skb, len)))
+			return br_handle_frame_finish(skb);
+		kfree(skb);
+		return -EPERM;
+	}
+
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+		br_handle_frame_finish);
+}
+
 /* Replicate the checks that IPv6 does on packet reception and pass the packet
  * to ip6tables, which doesn't support NAT, so things are fairly simple. */
 static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
@@ -667,6 +688,26 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	return 0;
 }
 
+int br_nf_do_forward(struct sk_buff *skb, struct net_device *indev)
+{
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+
+	p = br_port_get_rcu(indev);
+	if (!p) {
+		kfree(skb);
+		return NF_DROP;
+	}
+	br = p->br;
+	if (!brnf_call_netfilter && !br->call_nf) {
+		skb_push(skb, ETH_HLEN);
+		br_drop_fake_rtable(skb);
+		return dev_queue_xmit(skb);
+	} else {
+		return NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev,
+			skb->dev, br_forward_finish);
+	}
+}
 
 /* This is the 'purely bridged' case.  For IP, we pass the packet to
  * netfilter with indev and outdev set to the bridge device,
@@ -929,6 +970,13 @@ int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
 
 static struct ctl_table brnf_table[] = {
 	{
+		.procname	= "bridge-call-netfilter",
+		.data		= &brnf_call_netfilter,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= brnf_sysctl_call_tables,
+	},
+	{
 		.procname	= "bridge-nf-call-arptables",
 		.data		= &brnf_call_arptables,
 		.maxlen		= sizeof(int),
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aea3d13..6b0aad9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -215,6 +215,7 @@ struct net_bridge
 	struct hlist_head		hash[BR_HASH_SIZE];
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct rtable 			fake_rtable;
+	bool				call_nf;
 	bool				nf_call_iptables;
 	bool				nf_call_ip6tables;
 	bool				nf_call_arptables;
@@ -763,10 +764,18 @@ static inline int br_vlan_enabled(struct net_bridge *br)
 int br_nf_core_init(void);
 void br_nf_core_fini(void);
 void br_netfilter_rtable_init(struct net_bridge *);
+int br_nf_do_prerouting(struct sk_buff *skb, struct net_bridge_port *p);
+int br_nf_do_forward(struct sk_buff *skb, struct net_device *indev);
 #else
 static inline int br_nf_core_init(void) { return 0; }
 static inline void br_nf_core_fini(void) {}
 #define br_netfilter_rtable_init(x)
+#define br_nf_do_prerouting(skb, p) \
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, \
+		br_handle_frame_finish)
+#define br_nf_do_forward(skb, indev) \
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, \
+		br_forward_finish)
 #endif
 
 /* br_stp.c */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc5..707b994 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -630,6 +630,28 @@ static ssize_t multicast_startup_query_interval_store(
 static DEVICE_ATTR_RW(multicast_startup_query_interval);
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static ssize_t call_nf_show(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->call_nf);
+}
+
+static int set_call_nf(struct net_bridge *br, unsigned long val)
+{
+	br->call_nf = val ? true : false;
+	return 0;
+}
+
+static ssize_t call_nf_store(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_call_nf);
+}
+static DEVICE_ATTR(call_nf, S_IRUGO | S_IWUSR,
+		   call_nf_show, call_nf_store);
+
 static ssize_t nf_call_iptables_show(
 	struct device *d, struct device_attribute *attr, char *buf)
 {
@@ -780,6 +802,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_startup_query_interval.attr,
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	&dev_attr_call_nf.attr,
 	&dev_attr_nf_call_iptables.attr,
 	&dev_attr_nf_call_ip6tables.attr,
 	&dev_attr_nf_call_arptables.attr,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
  2015-02-10  9:32 [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter Imre Palik
@ 2015-02-11 22:29 ` David Miller
  2015-02-13 16:08   ` Imre Palik
  0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2015-02-11 22:29 UTC (permalink / raw)
  To: imrep.amz; +Cc: bridge, stephen, netdev, linux-kernel, imrep, aliguori

From: Imre Palik <imrep.amz@gmail.com>
Date: Tue, 10 Feb 2015 10:32:24 +0100

> From: "Palik, Imre" <imrep@amazon.de>
> 
> The netfilter code is made with flexibility instead of performance in mind.
> So when all we want is to pass packets between different interfaces, the
> performance penalty of hitting netfilter code can be considerable, even when
> all the firewalling is disabled for the bridge.
> 
> This change makes it possible to disable netfilter both on a per bridge basis,
> or for the whole bridging subsystem.  In the case interesting to us, this can
> lead to more than 10% speedup compared to the case when only bridge-iptables
> are disabled.
> 
> Cc: Anthony Liguori <aliguori@amazon.com>
> Signed-off-by: Imre Palik <imrep@amazon.de>

Sorry, no.

If I apply this, someone is going to try to submit a patch for every
damn protocol layer to add a stupid hack like this.

Makw NF_HOOK() faster instead.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
  2015-02-11 22:29 ` David Miller
@ 2015-02-13 16:08   ` Imre Palik
  2015-02-13 16:37     ` Florian Westphal
  0 siblings, 1 reply; 7+ messages in thread
From: Imre Palik @ 2015-02-13 16:08 UTC (permalink / raw)
  To: David Miller; +Cc: bridge, stephen, netdev, linux-kernel, imrep, aliguori

On 02/11/15 23:29, David Miller wrote:
> From: Imre Palik <imrep.amz@gmail.com>
> Date: Tue, 10 Feb 2015 10:32:24 +0100
> 
>> From: "Palik, Imre" <imrep@amazon.de>
>>
>> The netfilter code is made with flexibility instead of performance in mind.
>> So when all we want is to pass packets between different interfaces, the
>> performance penalty of hitting netfilter code can be considerable, even when
>> all the firewalling is disabled for the bridge.
>>
>> This change makes it possible to disable netfilter both on a per bridge basis,
>> or for the whole bridging subsystem.  In the case interesting to us, this can
>> lead to more than 10% speedup compared to the case when only bridge-iptables
>> are disabled.
>>
>> Cc: Anthony Liguori <aliguori@amazon.com>
>> Signed-off-by: Imre Palik <imrep@amazon.de>
> 
> Sorry, no.
> 
> If I apply this, someone is going to try to submit a patch for every
> damn protocol layer to add a stupid hack like this.

Actually this is one of those patches.  There is already a "stupid hack like this" for iptables and arptables.  (Implemented before git history, and giving me 10% speedup.  Many thanks, whoever did it.)

I also searched various LKML archives, and it seems the existing "stupid hacks" for iptables and arptables haven't resulted in any related patch submission in the last ten years.  (Or my google-fu is weak.)

Moreover, I cannot imagine any other reasonable on/off switch for bridge-netfilter than these three.  Of course, my imagination might be lacking there.

> Makw NF_HOOK() faster instead.

As far as I see, flexibility implies trashing the cache/TLB.  So it is impossible to have flexibility and performance at the same time.
(Except possibly with self modifying code, but that has its own set of problems.)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
  2015-02-13 16:08   ` Imre Palik
@ 2015-02-13 16:37     ` Florian Westphal
  2015-02-13 17:45       ` Imre Palik
  0 siblings, 1 reply; 7+ messages in thread
From: Florian Westphal @ 2015-02-13 16:37 UTC (permalink / raw)
  To: Imre Palik
  Cc: David Miller, bridge, stephen, netdev, linux-kernel, imrep, aliguori

Imre Palik <imrep.amz@gmail.com> wrote:
> On 02/11/15 23:29, David Miller wrote:
> > If I apply this, someone is going to try to submit a patch for every
> > damn protocol layer to add a stupid hack like this.
> 
> Actually this is one of those patches.  There is already a "stupid hack like this" for iptables and arptables.  (Implemented before git history, and giving me 10% speedup.  Many thanks, whoever did it.)
> 
> I also searched various LKML archives, and it seems the existing "stupid hacks" for iptables and arptables haven't resulted in any related patch submission in the last ten years.  (Or my google-fu is weak.)
> 
> Moreover, I cannot imagine any other reasonable on/off switch for bridge-netfilter than these three.  Of course, my imagination might be lacking there.

Why do you load the bridge netfilter module if you don't want it?
Loading it registers the internal hooks for the call-ip(6)tables and
sabotage hooks with NF_BRIDGE protocol so most of the NF_HOOK(NF_BRIDGE, ...
calls become active.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
  2015-02-13 16:37     ` Florian Westphal
@ 2015-02-13 17:45       ` Imre Palik
  2015-02-13 19:03         ` Florian Westphal
  0 siblings, 1 reply; 7+ messages in thread
From: Imre Palik @ 2015-02-13 17:45 UTC (permalink / raw)
  To: Florian Westphal
  Cc: David Miller, bridge, stephen, netdev, linux-kernel, imrep, aliguori

On 02/13/15 17:37, Florian Westphal wrote:
> Imre Palik <imrep.amz@gmail.com> wrote:
>> On 02/11/15 23:29, David Miller wrote:
>>> If I apply this, someone is going to try to submit a patch for every
>>> damn protocol layer to add a stupid hack like this.
>>
>> Actually this is one of those patches.  There is already a "stupid hack like this" for iptables and arptables.  (Implemented before git history, and giving me 10% speedup.  Many thanks, whoever did it.)
>>
>> I also searched various LKML archives, and it seems the existing "stupid hacks" for iptables and arptables haven't resulted in any related patch submission in the last ten years.  (Or my google-fu is weak.)
>>
>> Moreover, I cannot imagine any other reasonable on/off switch for bridge-netfilter than these three.  Of course, my imagination might be lacking there.
> 
> Why do you load the bridge netfilter module if you don't want it?
> Loading it registers the internal hooks for the call-ip(6)tables and
> sabotage hooks with NF_BRIDGE protocol so most of the NF_HOOK(NF_BRIDGE, ...
> calls become active.
> 

The trouble is that there are some bridges (with low traffic) where I need netfilter, and some other bridges (carrying lots of traffic), where I don't.  Being able to set things up on a per bridge basis is a powerful thing.

I only implemented the global switch because the iptables and arptables support also have one.  If this is what bugs people here, I can remove it, and resubmit.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
  2015-02-13 17:45       ` Imre Palik
@ 2015-02-13 19:03         ` Florian Westphal
  2015-02-23 15:24           ` Imre Palik
  0 siblings, 1 reply; 7+ messages in thread
From: Florian Westphal @ 2015-02-13 19:03 UTC (permalink / raw)
  To: Imre Palik
  Cc: Florian Westphal, David Miller, bridge, stephen, netdev,
	linux-kernel, imrep, aliguori

Imre Palik <imrep.amz@gmail.com> wrote:
> The trouble is that there are some bridges (with low traffic) where I need netfilter, and some other bridges (carrying lots of traffic), where I don't.  Being able to set things up on a per bridge basis is a powerful thing.
> 
> I only implemented the global switch because the iptables and arptables support also have one.  If this is what bugs people here, I can remove it, and resubmit.

I see.  But I agree with David, accepting such patch would pave way
for all kinds of ugly hacks.

It seems that technically the best solution would be to allow attaching
filter rules to devices, but alas, netfilter doesn't support that.

Alternatively, you patch *might* be ok iff you can get rid of the extra
userspace-visible configuration knobs, we already have way too many of
these.

You'll also have to figure out how to avoid any run-time dependency on
br_netfilter module from the bridge core.

If you can do this, you might be able to get similar effect as your patch
by replacing

NF_HOOK with NF_HOOK_COND(..., !(br->flags & NO_NETFILTER))

or something like this.

I don't know how invasive this would be, though.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter
  2015-02-13 19:03         ` Florian Westphal
@ 2015-02-23 15:24           ` Imre Palik
  0 siblings, 0 replies; 7+ messages in thread
From: Imre Palik @ 2015-02-23 15:24 UTC (permalink / raw)
  To: Florian Westphal
  Cc: David Miller, bridge, stephen, netdev, linux-kernel, imrep, aliguori

On 02/13/15 20:03, Florian Westphal wrote:
> Imre Palik <imrep.amz@gmail.com> wrote:
>> The trouble is that there are some bridges (with low traffic) where I need netfilter, and some other bridges (carrying lots of traffic), where I don't.  Being able to set things up on a per bridge basis is a powerful thing.
>>
>> I only implemented the global switch because the iptables and arptables support also have one.  If this is what bugs people here, I can remove it, and resubmit.
> 
> I see.  But I agree with David, accepting such patch would pave way
> for all kinds of ugly hacks.
> 
> It seems that technically the best solution would be to allow attaching
> filter rules to devices, but alas, netfilter doesn't support that.
> 
> Alternatively, you patch *might* be ok iff you can get rid of the extra
> userspace-visible configuration knobs, we already have way too many of
> these.

The sysctl can be removed.  But I need some means to switch it off for a given bridge, so I kept the sysfs interface.
If there is a more preferred way to do it, then please let me know.

> You'll also have to figure out how to avoid any run-time dependency on
> br_netfilter module from the bridge core.
> 
> If you can do this, you might be able to get similar effect as your patch
> by replacing
> 
> NF_HOOK with NF_HOOK_COND(..., !(br->flags & NO_NETFILTER))
> 
> or something like this.

This works nicely for the NFPROTO_BRIDGE, NF_BR_PRE_ROUTING case.  Thanks for the idea.
But for the NFPROTO_BRIDGE, NF_BR_FORWARD case the resulting code would be more ugly,
because of the chaining of the entries.

> I don't know how invasive this would be, though.

I will post the cleaned up version in a sec.
It looks way better.   I hope it will be enough ...

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-02-23 15:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-10  9:32 [PATCH] bridge: make it possible for packets to traverse the bridge withour hitting netfilter Imre Palik
2015-02-11 22:29 ` David Miller
2015-02-13 16:08   ` Imre Palik
2015-02-13 16:37     ` Florian Westphal
2015-02-13 17:45       ` Imre Palik
2015-02-13 19:03         ` Florian Westphal
2015-02-23 15:24           ` Imre Palik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).