All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net: bridge: add max_fdb_count
@ 2017-11-15 19:27 Sarah Newman
  2017-11-15 19:43 ` Sarah Newman
                   ` (5 more replies)
  0 siblings, 6 replies; 27+ messages in thread
From: Sarah Newman @ 2017-11-15 19:27 UTC (permalink / raw)
  To: netdev; +Cc: Sarah Newman

Current memory and CPU usage for managing bridge fdb entries is unbounded.
Add a parameter max_fdb_count, controlled from sysfs, which places an upper
limit on the number of entries. Defaults to 1024.

When max_fdb_count is met or exceeded, whether traffic is sent out a
given port should depend on its flooding behavior.

This may instead be mitigated by filtering mac address entries in the
PREROUTING chain of the ebtables nat table, but this is only practical
when mac addresses are known in advance.

Signed-off-by: Sarah Newman <srn@prgmr.com>
---
 net/bridge/br_device.c   |  2 ++
 net/bridge/br_fdb.c      | 25 ++++++++++++++++++++-----
 net/bridge/br_private.h  |  3 +++
 net/bridge/br_sysfs_br.c | 24 ++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index af5b8c8..aa7a7f4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -432,6 +432,8 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_hello_time = br->hello_time = 2 * HZ;
 	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
 	br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
+	br->max_fdb_count = 1024;
+	br->fdb_count = 0;
 	dev->max_mtu = ETH_MAX_MTU;
 
 	br_netfilter_rtable_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4ea5c8b..0422d48 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -179,6 +179,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 
 	hlist_del_init_rcu(&f->hlist);
 	fdb_notify(br, f, RTM_DELNEIGH);
+	br->fdb_count -= 1;
 	call_rcu(&f->rcu, fdb_rcu_free);
 }
 
@@ -478,7 +479,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 	return num;
 }
 
-static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
+static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
+					       struct hlist_head *head,
 					       struct net_bridge_port *source,
 					       const unsigned char *addr,
 					       __u16 vid,
@@ -498,6 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 		fdb->added_by_external_learn = 0;
 		fdb->offloaded = 0;
 		fdb->updated = fdb->used = jiffies;
+		br->fdb_count += 1;
 		hlist_add_head_rcu(&fdb->hlist, head);
 	}
 	return fdb;
@@ -524,7 +527,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		fdb_delete(br, fdb);
 	}
 
-	fdb = fdb_create(head, source, addr, vid, 1, 1);
+	fdb = fdb_create(br, head, source, addr, vid, 1, 1);
 	if (!fdb)
 		return -ENOMEM;
 
@@ -556,6 +559,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	if (hold_time(br) == 0)
 		return;
 
+	/* Place maximum on number of learned entries. */
+	if (br->max_fdb_count <= br->fdb_count)
+		return;
+
 	/* ignore packets unless we are using this port */
 	if (!(source->state == BR_STATE_LEARNING ||
 	      source->state == BR_STATE_FORWARDING))
@@ -591,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	} else {
 		spin_lock(&br->hash_lock);
 		if (likely(!fdb_find_rcu(head, addr, vid))) {
-			fdb = fdb_create(head, source, addr, vid, 0, 0);
+			fdb = fdb_create(br, head, source, addr, vid, 0, 0);
 			if (fdb) {
 				if (unlikely(added_by_user))
 					fdb->added_by_user = 1;
@@ -787,7 +794,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
-		fdb = fdb_create(head, source, addr, vid, 0, 0);
+		fdb = fdb_create(br, head, source, addr, vid, 0, 0);
 		if (!fdb)
 			return -ENOMEM;
 
@@ -1081,7 +1088,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 	head = &br->hash[br_mac_hash(addr, vid)];
 	fdb = br_fdb_find(br, addr, vid);
 	if (!fdb) {
-		fdb = fdb_create(head, p, addr, vid, 0, 0);
+		fdb = fdb_create(br, head, p, addr, vid, 0, 0);
 		if (!fdb) {
 			err = -ENOMEM;
 			goto err_unlock;
@@ -1147,3 +1154,11 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
 
 	spin_unlock_bh(&br->hash_lock);
 }
+
+int br_set_max_fdb_count(struct net_bridge *br, unsigned long max_fdb_count)
+{
+	spin_lock_bh(&br->lock);
+	br->max_fdb_count = max_fdb_count;
+	spin_unlock_bh(&br->lock);
+	return 0;
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1312b8d..98aae1f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -343,6 +343,8 @@ struct net_bridge {
 	unsigned long			bridge_hello_time;
 	unsigned long			bridge_forward_delay;
 	unsigned long			bridge_ageing_time;
+	unsigned long			max_fdb_count;
+	unsigned long			fdb_count;
 
 	u8				group_addr[ETH_ALEN];
 	bool				group_addr_set;
@@ -549,6 +551,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
 			  const unsigned char *addr, u16 vid);
+int br_set_max_fdb_count(struct net_bridge *br, unsigned long x);
 
 /* br_forward.c */
 enum br_pkt_type {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 723f25e..18fabdf 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -335,6 +335,28 @@ static ssize_t flush_store(struct device *d,
 }
 static DEVICE_ATTR_WO(flush);
 
+static ssize_t max_fdb_count_show(struct device *d, struct device_attribute *attr,
+			     char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%lu\n", br->max_fdb_count);
+}
+
+static ssize_t max_fdb_count_store(struct device *d, struct device_attribute *attr,
+			      const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_set_max_fdb_count);
+}
+static DEVICE_ATTR_RW(max_fdb_count);
+
+static ssize_t fdb_count_show(struct device *d, struct device_attribute *attr,
+			    char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%lu\n", br->fdb_count);
+}
+static DEVICE_ATTR_RO(fdb_count);
+
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t multicast_router_show(struct device *d,
 				     struct device_attribute *attr, char *buf)
@@ -830,6 +852,8 @@ static ssize_t vlan_stats_enabled_store(struct device *d,
 	&dev_attr_gc_timer.attr,
 	&dev_attr_group_addr.attr,
 	&dev_attr_flush.attr,
+	&dev_attr_max_fdb_count.attr,
+	&dev_attr_fdb_count.attr,
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	&dev_attr_multicast_router.attr,
 	&dev_attr_multicast_snooping.attr,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
@ 2017-11-15 19:43 ` Sarah Newman
  2017-11-15 20:04 ` Stephen Hemminger
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 27+ messages in thread
From: Sarah Newman @ 2017-11-15 19:43 UTC (permalink / raw)
  To: netdev

On 11/15/2017 11:27 AM, Sarah Newman wrote:
> Current memory and CPU usage for managing bridge fdb entries is unbounded.
> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
> limit on the number of entries. Defaults to 1024.
> 
> When max_fdb_count is met or exceeded, whether traffic is sent out a
> given port should depend on its flooding behavior.
> 
> This may instead be mitigated by filtering mac address entries in the
> PREROUTING chain of the ebtables nat table, but this is only practical
> when mac addresses are known in advance.
> 
> Signed-off-by: Sarah Newman <srn@prgmr.com>

I would love to improve this patch, but have limited time to devote to this...
What I would try first would be to maintain a data structure roughly ordered
based on both number of times an address was observed as well as age and evict
the least used, oldest entry when max_fdb_count was reached.

--Sarah

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
  2017-11-15 19:43 ` Sarah Newman
@ 2017-11-15 20:04 ` Stephen Hemminger
  2017-11-16  2:25   ` Andrew Lunn
  2017-11-15 21:34 ` Egil Hjelmeland
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 27+ messages in thread
From: Stephen Hemminger @ 2017-11-15 20:04 UTC (permalink / raw)
  To: Sarah Newman; +Cc: netdev

On Wed, 15 Nov 2017 11:27:07 -0800
Sarah Newman <srn@prgmr.com> wrote:

> Current memory and CPU usage for managing bridge fdb entries is unbounded.
> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
> limit on the number of entries. Defaults to 1024.
> 
> When max_fdb_count is met or exceeded, whether traffic is sent out a
> given port should depend on its flooding behavior.
> 
> This may instead be mitigated by filtering mac address entries in the
> PREROUTING chain of the ebtables nat table, but this is only practical
> when mac addresses are known in advance.
> 
> Signed-off-by: Sarah Newman <srn@prgmr.com>

Thanks for your patch, I can see how this can be a real
problem, especially for a learning bridge.

There are some details which probably need to be resolved before
this can be applied.

* if the bridge is being DoS attacked by random MAC addresses
  then the policy under spam needs to be considered.
  Not sure if not inserting the new entry is the best policy.

* The counter probably doesn't need to be unsigned long (64 bit on x86_64)
  maybe u32 is enough.

* The bridge attributes in general are controllable both by netlink
  and sysfs. It would make sense to do this for max fdb as well.

Also what do the vendors using bridge for L2 offload to switch think?
Many switches need to clone table, and similar limits must be in other
versions. FreeBSD has a limit like this.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
  2017-11-15 19:43 ` Sarah Newman
  2017-11-15 20:04 ` Stephen Hemminger
@ 2017-11-15 21:34 ` Egil Hjelmeland
  2017-11-16  3:01 ` Andrew Lunn
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 27+ messages in thread
From: Egil Hjelmeland @ 2017-11-15 21:34 UTC (permalink / raw)
  To: Sarah Newman, netdev



Den 15. nov. 2017 20:27, skrev Sarah Newman:
> Current memory and CPU usage for managing bridge fdb entries is unbounded.
> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
> limit on the number of entries. Defaults to 1024.
> 
> When max_fdb_count is met or exceeded, whether traffic is sent out a
> given port should depend on its flooding behavior.
> 
> This may instead be mitigated by filtering mac address entries in the
> PREROUTING chain of the ebtables nat table, but this is only practical
> when mac addresses are known in advance.
> 
> Signed-off-by: Sarah Newman <srn@prgmr.com>
> ---
>   net/bridge/br_device.c   |  2 ++
>   net/bridge/br_fdb.c      | 25 ++++++++++++++++++++-----
>   net/bridge/br_private.h  |  3 +++
>   net/bridge/br_sysfs_br.c | 24 ++++++++++++++++++++++++
>   4 files changed, 49 insertions(+), 5 deletions(-)
> 
> diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
> index 723f25e..18fabdf 100644
> --- a/net/bridge/br_sysfs_br.c
> +++ b/net/bridge/br_sysfs_br.c
> @@ -335,6 +335,28 @@ static ssize_t flush_store(struct device *d,
>   }
>   static DEVICE_ATTR_WO(flush);
>   
> +static ssize_t max_fdb_count_show(struct device *d, struct device_attribute *attr,
> +			     char *buf)
> +{
> +	struct net_bridge *br = to_bridge(d);
> +	return sprintf(buf, "%lu\n", br->max_fdb_count);
> +}
> +
> +static ssize_t max_fdb_count_store(struct device *d, struct device_attribute *attr,
> +			      const char *buf, size_t len)
> +{
> +	return store_bridge_parm(d, buf, len, br_set_max_fdb_count);
> +}
> +static DEVICE_ATTR_RW(max_fdb_count);
> +
> +static ssize_t fdb_count_show(struct device *d, struct device_attribute *attr,
> +			    char *buf)
> +{
> +	struct net_bridge *br = to_bridge(d);
> +	return sprintf(buf, "%lu\n", br->fdb_count);
> +}
> +static DEVICE_ATTR_RO(fdb_count);
> +
>   #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
>   static ssize_t multicast_router_show(struct device *d,
>   				     struct device_attribute *attr, char *buf)
> @@ -830,6 +852,8 @@ static ssize_t vlan_stats_enabled_store(struct device *d,
>   	&dev_attr_gc_timer.attr,
>   	&dev_attr_group_addr.attr,
>   	&dev_attr_flush.attr,
> +	&dev_attr_max_fdb_count.attr,
> +	&dev_attr_fdb_count.attr,
>   #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
>   	&dev_attr_multicast_router.attr,
>   	&dev_attr_multicast_snooping.attr,
> 



Documentation/filesystems/sysfs.txt:

All new sysfs attributes must be documented in Documentation/ABI. See 
also Documentation/ABI/README for more information.


Egil

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 20:04 ` Stephen Hemminger
@ 2017-11-16  2:25   ` Andrew Lunn
  2017-11-16  4:05     ` Toshiaki Makita
  0 siblings, 1 reply; 27+ messages in thread
From: Andrew Lunn @ 2017-11-16  2:25 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Sarah Newman, netdev

> Also what do the vendors using bridge for L2 offload to switch think?

The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So
maybe 1024 is a bit low?

How big is an FDB entry? Even an OpenWRT/LEDE class devices with 512MB
RAM can probably support 1MB of RAM for FDB entries.

> Many switches need to clone table, and similar limits must be in
> other versions.

DSA does not maintain synchronisation between the hardware tables and
the software tables. So no cloning. We expect both bridges to perform
learning based on the frames. This means we should be able to handle
one bridge flooding because its table is full, while the other has an
entry and forwards out the correct port.

	 Andrew

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
                   ` (2 preceding siblings ...)
  2017-11-15 21:34 ` Egil Hjelmeland
@ 2017-11-16  3:01 ` Andrew Lunn
  2017-11-16  7:31 ` Nikolay Aleksandrov
  2017-11-21 14:53 ` David Laight
  5 siblings, 0 replies; 27+ messages in thread
From: Andrew Lunn @ 2017-11-16  3:01 UTC (permalink / raw)
  To: Sarah Newman; +Cc: netdev

> @@ -556,6 +559,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>  	if (hold_time(br) == 0)
>  		return;
>  
> +	/* Place maximum on number of learned entries. */
> +	if (br->max_fdb_count <= br->fdb_count)
> +		return;
> +

Hi Sarah

We probably want a rate limited warning printed here. If the table is
full, it will have to start flooding frames, which is not good for
performance. Having something in the log will help debug the problem.
The log will also give a hit that a DoS attack could be happening.

    Andrew

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  2:25   ` Andrew Lunn
@ 2017-11-16  4:05     ` Toshiaki Makita
  2017-11-16  4:54       ` Sarah Newman
  0 siblings, 1 reply; 27+ messages in thread
From: Toshiaki Makita @ 2017-11-16  4:05 UTC (permalink / raw)
  To: Andrew Lunn, Stephen Hemminger, Sarah Newman; +Cc: netdev

On 2017/11/16 11:25, Andrew Lunn wrote:
>> Also what do the vendors using bridge for L2 offload to switch think?
> 
> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So
> maybe 1024 is a bit low?

How about U32_MAX by default since it is currently not restricted.
(assuming the field will be changed to u32 as per Stephen's feedback).

Otherwise users may suffer from unexpected behavior change by updating
kernel?

-- 
Toshiaki Makita

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  4:05     ` Toshiaki Makita
@ 2017-11-16  4:54       ` Sarah Newman
  2017-11-16  6:13         ` Toshiaki Makita
  0 siblings, 1 reply; 27+ messages in thread
From: Sarah Newman @ 2017-11-16  4:54 UTC (permalink / raw)
  To: Toshiaki Makita, Andrew Lunn, Stephen Hemminger; +Cc: netdev

On 11/15/2017 08:05 PM, Toshiaki Makita wrote:
> On 2017/11/16 11:25, Andrew Lunn wrote:
>>> Also what do the vendors using bridge for L2 offload to switch think?
>>
>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So
>> maybe 1024 is a bit low?
> 
> How about U32_MAX by default since it is currently not restricted.
> (assuming the field will be changed to u32 as per Stephen's feedback).
> 
> Otherwise users may suffer from unexpected behavior change by updating
> kernel?
> 

U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double
that seems like a reasonable default.

Additionally, since the exact limit seems controversial, it can be made a configuration parameter.

What about the rest?

--Sarah

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  4:54       ` Sarah Newman
@ 2017-11-16  6:13         ` Toshiaki Makita
  2017-11-16  6:20           ` Roopa Prabhu
  0 siblings, 1 reply; 27+ messages in thread
From: Toshiaki Makita @ 2017-11-16  6:13 UTC (permalink / raw)
  To: Sarah Newman, Andrew Lunn, Stephen Hemminger; +Cc: netdev

On 2017/11/16 13:54, Sarah Newman wrote:
> On 11/15/2017 08:05 PM, Toshiaki Makita wrote:
>> On 2017/11/16 11:25, Andrew Lunn wrote:
>>>> Also what do the vendors using bridge for L2 offload to switch think?
>>>
>>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So
>>> maybe 1024 is a bit low?
>>
>> How about U32_MAX by default since it is currently not restricted.
>> (assuming the field will be changed to u32 as per Stephen's feedback).
>>
>> Otherwise users may suffer from unexpected behavior change by updating
>> kernel?
>>
> 
> U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double
> that seems like a reasonable default.

I'm suggesting the most unrealistic number to essentially disable the
restriction by default.
My understanding is that we put a priority on not to break existing
users even if the new restriction looks reasonable for most people.

-- 
Toshiaki Makita

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  6:13         ` Toshiaki Makita
@ 2017-11-16  6:20           ` Roopa Prabhu
  2017-11-16 16:54             ` Stephen Hemminger
  0 siblings, 1 reply; 27+ messages in thread
From: Roopa Prabhu @ 2017-11-16  6:20 UTC (permalink / raw)
  To: Toshiaki Makita; +Cc: Sarah Newman, Andrew Lunn, Stephen Hemminger, netdev

On Wed, Nov 15, 2017 at 10:13 PM, Toshiaki Makita
<makita.toshiaki@lab.ntt.co.jp> wrote:
> On 2017/11/16 13:54, Sarah Newman wrote:
>> On 11/15/2017 08:05 PM, Toshiaki Makita wrote:
>>> On 2017/11/16 11:25, Andrew Lunn wrote:
>>>>> Also what do the vendors using bridge for L2 offload to switch think?
>>>>
>>>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So
>>>> maybe 1024 is a bit low?
>>>
>>> How about U32_MAX by default since it is currently not restricted.
>>> (assuming the field will be changed to u32 as per Stephen's feedback).
>>>
>>> Otherwise users may suffer from unexpected behavior change by updating
>>> kernel?
>>>
>>
>> U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double
>> that seems like a reasonable default.
>
> I'm suggesting the most unrealistic number to essentially disable the
> restriction by default.
> My understanding is that we put a priority on not to break existing
> users even if the new restriction looks reasonable for most people.

+1 , and yes, 1024 is very low. some of the switches we use support
around 128K FDB entries and we have seen that number increase fairly
quickly in newer generation switches. Default should be no limit to
not break existing users.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
                   ` (3 preceding siblings ...)
  2017-11-16  3:01 ` Andrew Lunn
@ 2017-11-16  7:31 ` Nikolay Aleksandrov
  2017-11-16  9:20   ` Sarah Newman
  2017-11-21 14:53 ` David Laight
  5 siblings, 1 reply; 27+ messages in thread
From: Nikolay Aleksandrov @ 2017-11-16  7:31 UTC (permalink / raw)
  To: Sarah Newman, netdev; +Cc: roopa

On 15/11/17 21:27, Sarah Newman wrote:
> Current memory and CPU usage for managing bridge fdb entries is unbounded.
> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
> limit on the number of entries. Defaults to 1024.
> 
> When max_fdb_count is met or exceeded, whether traffic is sent out a
> given port should depend on its flooding behavior.
> 
> This may instead be mitigated by filtering mac address entries in the
> PREROUTING chain of the ebtables nat table, but this is only practical
> when mac addresses are known in advance.
> 

One alternative solution: if limit is the only requirement it could be done
in user-space (even with a shell script) looking at fdb notifications and
if you reach some limit then remove the learning flag from ports, later if
enough expire turn it back on. In fact you can make any policy and if you
catch an offending port - you can disable only its learning flag and leave the
rest.

> Signed-off-by: Sarah Newman <srn@prgmr.com>
> ---
>  net/bridge/br_device.c   |  2 ++
>  net/bridge/br_fdb.c      | 25 ++++++++++++++++++++-----
>  net/bridge/br_private.h  |  3 +++
>  net/bridge/br_sysfs_br.c | 24 ++++++++++++++++++++++++
>  4 files changed, 49 insertions(+), 5 deletions(-)
> 

As others have mentioned placing a default limit would break existing users,
so this must be a config option that is off (or very large) by default.

> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index af5b8c8..aa7a7f4 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -432,6 +432,8 @@ void br_dev_setup(struct net_device *dev)
>  	br->bridge_hello_time = br->hello_time = 2 * HZ;
>  	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
>  	br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
> +	br->max_fdb_count = 1024;> +	br->fdb_count = 0;

No need to zero this explicitly, it is already zero.

>  	dev->max_mtu = ETH_MAX_MTU;
>  
>  	br_netfilter_rtable_init(br);
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index 4ea5c8b..0422d48 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -179,6 +179,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
>  
>  	hlist_del_init_rcu(&f->hlist);
>  	fdb_notify(br, f, RTM_DELNEIGH);
> +	br->fdb_count -= 1;>  	call_rcu(&f->rcu, fdb_rcu_free);
>  }
>  
> @@ -478,7 +479,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>  	return num;
>  }
>  
> -static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
> +static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
> +					       struct hlist_head *head,
>  					       struct net_bridge_port *source,
>  					       const unsigned char *addr,
>  					       __u16 vid,
> @@ -498,6 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>  		fdb->added_by_external_learn = 0;
>  		fdb->offloaded = 0;
>  		fdb->updated = fdb->used = jiffies;
> +		br->fdb_count += 1;
>  		hlist_add_head_rcu(&fdb->hlist, head);
>  	}
>  	return fdb;
> @@ -524,7 +527,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>  		fdb_delete(br, fdb);
>  	}
>  
> -	fdb = fdb_create(head, source, addr, vid, 1, 1);
> +	fdb = fdb_create(br, head, source, addr, vid, 1, 1);
>  	if (!fdb)
>  		return -ENOMEM;
>  
> @@ -556,6 +559,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>  	if (hold_time(br) == 0)
>  		return;
>  
> +	/* Place maximum on number of learned entries. */
> +	if (br->max_fdb_count <= br->fdb_count)
> +		return;
> +

checking for fdb_count in the beginning of br_fdb_update would cause stale fdb
entries, that is fdb moves between ports will stop and fdb updates will stop too
(i.e. everything expires)

>  	/* ignore packets unless we are using this port */
>  	if (!(source->state == BR_STATE_LEARNING ||
>  	      source->state == BR_STATE_FORWARDING))
> @@ -591,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>  	} else {
>  		spin_lock(&br->hash_lock);
>  		if (likely(!fdb_find_rcu(head, addr, vid))) {
> -			fdb = fdb_create(head, source, addr, vid, 0, 0);
> +			fdb = fdb_create(br, head, source, addr, vid, 0, 0);
>  			if (fdb) {
>  				if (unlikely(added_by_user))
>  					fdb->added_by_user = 1;
> @@ -787,7 +794,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
>  		if (!(flags & NLM_F_CREATE))
>  			return -ENOENT;
>  
> -		fdb = fdb_create(head, source, addr, vid, 0, 0);
> +		fdb = fdb_create(br, head, source, addr, vid, 0, 0);
>  		if (!fdb)
>  			return -ENOMEM;
>  
> @@ -1081,7 +1088,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
>  	head = &br->hash[br_mac_hash(addr, vid)];
>  	fdb = br_fdb_find(br, addr, vid);
>  	if (!fdb) {
> -		fdb = fdb_create(head, p, addr, vid, 0, 0);
> +		fdb = fdb_create(br, head, p, addr, vid, 0, 0);
>  		if (!fdb) {
>  			err = -ENOMEM;
>  			goto err_unlock;
> @@ -1147,3 +1154,11 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
>  
>  	spin_unlock_bh(&br->hash_lock);
>  }
> +
> +int br_set_max_fdb_count(struct net_bridge *br, unsigned long max_fdb_count)
> +{
> +	spin_lock_bh(&br->lock);
> +	br->max_fdb_count = max_fdb_count;
> +	spin_unlock_bh(&br->lock);

No need to take the spin lock here as this value is checked without it anyway,
and all bridge dev option changes are done under rtnl lock already.

> +	return 0;
> +}
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 1312b8d..98aae1f 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -343,6 +343,8 @@ struct net_bridge {
>  	unsigned long			bridge_hello_time;
>  	unsigned long			bridge_forward_delay;
>  	unsigned long			bridge_ageing_time;
> +	unsigned long			max_fdb_count;
> +	unsigned long			fdb_count;>  
>  	u8				group_addr[ETH_ALEN];
>  	bool				group_addr_set;
> @@ -549,6 +551,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
>  			      const unsigned char *addr, u16 vid);
>  void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
>  			  const unsigned char *addr, u16 vid);
> +int br_set_max_fdb_count(struct net_bridge *br, unsigned long x);
>  
>  /* br_forward.c */
>  enum br_pkt_type {
> diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
> index 723f25e..18fabdf 100644
> --- a/net/bridge/br_sysfs_br.c
> +++ b/net/bridge/br_sysfs_br.c
> @@ -335,6 +335,28 @@ static ssize_t flush_store(struct device *d,
>  }
>  static DEVICE_ATTR_WO(flush);
>  
> +static ssize_t max_fdb_count_show(struct device *d, struct device_attribute *attr,
> +			     char *buf)
> +{
> +	struct net_bridge *br = to_bridge(d);
> +	return sprintf(buf, "%lu\n", br->max_fdb_count);
> +}
> +
> +static ssize_t max_fdb_count_store(struct device *d, struct device_attribute *attr,
> +			      const char *buf, size_t len)
> +{
> +	return store_bridge_parm(d, buf, len, br_set_max_fdb_count);
> +}
> +static DEVICE_ATTR_RW(max_fdb_count);
> +
> +static ssize_t fdb_count_show(struct device *d, struct device_attribute *attr,
> +			    char *buf)
> +{
> +	struct net_bridge *br = to_bridge(d);
> +	return sprintf(buf, "%lu\n", br->fdb_count);
> +}
> +static DEVICE_ATTR_RO(fdb_count);
> +
>  #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
>  static ssize_t multicast_router_show(struct device *d,
>  				     struct device_attribute *attr, char *buf)
> @@ -830,6 +852,8 @@ static ssize_t vlan_stats_enabled_store(struct device *d,
>  	&dev_attr_gc_timer.attr,
>  	&dev_attr_group_addr.attr,
>  	&dev_attr_flush.attr,
> +	&dev_attr_max_fdb_count.attr,
> +	&dev_attr_fdb_count.attr,
>  #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
>  	&dev_attr_multicast_router.attr,
>  	&dev_attr_multicast_snooping.attr,
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  7:31 ` Nikolay Aleksandrov
@ 2017-11-16  9:20   ` Sarah Newman
  2017-11-16  9:49     ` Nikolay Aleksandrov
  2017-11-16  9:58     ` Willy Tarreau
  0 siblings, 2 replies; 27+ messages in thread
From: Sarah Newman @ 2017-11-16  9:20 UTC (permalink / raw)
  To: Nikolay Aleksandrov, netdev; +Cc: roopa

On 11/15/2017 11:31 PM, Nikolay Aleksandrov wrote:
> On 15/11/17 21:27, Sarah Newman wrote:
>> Current memory and CPU usage for managing bridge fdb entries is unbounded.
>> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
>> limit on the number of entries. Defaults to 1024.
>>
>> When max_fdb_count is met or exceeded, whether traffic is sent out a
>> given port should depend on its flooding behavior.
>>
>> This may instead be mitigated by filtering mac address entries in the
>> PREROUTING chain of the ebtables nat table, but this is only practical
>> when mac addresses are known in advance.
>>
> 
> One alternative solution: if limit is the only requirement it could be done
> in user-space (even with a shell script) looking at fdb notifications and
> if you reach some limit then remove the learning flag from ports, later if
> enough expire turn it back on. In fact you can make any policy and if you
> catch an offending port - you can disable only its learning flag and leave the
> rest.

Leaving such a trivial DOS in the kernel doesn't seem like a good idea to me,
but I suppose it hasn't bothered anyone else up to now except this person
back in 2013: https://www.keypressure.com/blog/linux-bridge-port-security/

I note that anyone who would run up against a too-low limit on the maximum
number of fdb entries would also be savvy enough to fix it in a matter of
minutes. They could also default the limit to U32_MAX in their particular
distribution if it was a configuration option.

At the moment there is not even a single log message if the problem doesn't
result in memory exhaustion.

--Sarah

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  9:20   ` Sarah Newman
@ 2017-11-16  9:49     ` Nikolay Aleksandrov
  2017-11-16  9:58     ` Willy Tarreau
  1 sibling, 0 replies; 27+ messages in thread
From: Nikolay Aleksandrov @ 2017-11-16  9:49 UTC (permalink / raw)
  To: Sarah Newman, netdev; +Cc: roopa

On 16/11/17 11:20, Sarah Newman wrote:
> On 11/15/2017 11:31 PM, Nikolay Aleksandrov wrote:
>> On 15/11/17 21:27, Sarah Newman wrote:
>>> Current memory and CPU usage for managing bridge fdb entries is unbounded.
>>> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
>>> limit on the number of entries. Defaults to 1024.
>>>
>>> When max_fdb_count is met or exceeded, whether traffic is sent out a
>>> given port should depend on its flooding behavior.
>>>
>>> This may instead be mitigated by filtering mac address entries in the
>>> PREROUTING chain of the ebtables nat table, but this is only practical
>>> when mac addresses are known in advance.
>>>
>>
>> One alternative solution: if limit is the only requirement it could be done
>> in user-space (even with a shell script) looking at fdb notifications and
>> if you reach some limit then remove the learning flag from ports, later if
>> enough expire turn it back on. In fact you can make any policy and if you
>> catch an offending port - you can disable only its learning flag and leave the
>> rest.
> 
> Leaving such a trivial DOS in the kernel doesn't seem like a good idea to me,
> but I suppose it hasn't bothered anyone else up to now except this person
> back in 2013: https://www.keypressure.com/blog/linux-bridge-port-security/
> 

Having L2 access means many more things are possible, that being said I'm not
against adding the limit just need to make it optional with the default either
being no limit or a very high one (e.g. u32 max).

> I note that anyone who would run up against a too-low limit on the maximum
> number of fdb entries would also be savvy enough to fix it in a matter of
> minutes. They could also default the limit to U32_MAX in their particular
> distribution if it was a configuration option.

The same argument applies to anyone who wants to limit their fdbs, and the
standard way to add new options in the Linux bridge which affect normal operations
is to keep the current behaviour and enable the new one on user request in order to
avoid breaking already existing setups.
In fact we try very hard to keep existing setups running.

> 
> At the moment there is not even a single log message if the problem doesn't
> result in memory exhaustion.
> 
> --Sarah
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  9:20   ` Sarah Newman
  2017-11-16  9:49     ` Nikolay Aleksandrov
@ 2017-11-16  9:58     ` Willy Tarreau
  2017-11-16 18:23       ` Sarah Newman
  1 sibling, 1 reply; 27+ messages in thread
From: Willy Tarreau @ 2017-11-16  9:58 UTC (permalink / raw)
  To: Sarah Newman; +Cc: Nikolay Aleksandrov, netdev, roopa

Hi Sarah,

On Thu, Nov 16, 2017 at 01:20:18AM -0800, Sarah Newman wrote:
> I note that anyone who would run up against a too-low limit on the maximum
> number of fdb entries would also be savvy enough to fix it in a matter of
> minutes.

I disagree on this point. There's a huge difference between experiencing
sudden breakage under normal conditions due to arbitrary limits being set
and being down because of an attack. While the latter is not desirable,
it's much more easily accepted and most often requires operations anyway.
The former is never an option.

And I continue to think that the default behaviour once the limit is reached
must not be to prevent new entries from being learned but to purge older
ones. At least it preserves normal operations.

But given the high CPU impact you reported for a very low load, definitely
something needs to be done.

> They could also default the limit to U32_MAX in their particular
> distribution if it was a configuration option.

Well, I'd say that we don't have a default limit on the socket number either
and that it happens to be the expected behaviour. It's almost impossible to
find a suitable limit for everyone. People dealing with regular loads never
read docs and get caught. People working in hostile environments are always
more careful and will ensure that their limits are properly set.

> At the moment there is not even a single log message if the problem doesn't
> result in memory exhaustion.

This probably needs to be addressed as well!

Regards,
Willy

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  6:20           ` Roopa Prabhu
@ 2017-11-16 16:54             ` Stephen Hemminger
  0 siblings, 0 replies; 27+ messages in thread
From: Stephen Hemminger @ 2017-11-16 16:54 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: Toshiaki Makita, Sarah Newman, Andrew Lunn, netdev

On Wed, 15 Nov 2017 22:20:23 -0800
Roopa Prabhu <roopa@cumulusnetworks.com> wrote:

> On Wed, Nov 15, 2017 at 10:13 PM, Toshiaki Makita
> <makita.toshiaki@lab.ntt.co.jp> wrote:
> > On 2017/11/16 13:54, Sarah Newman wrote:  
> >> On 11/15/2017 08:05 PM, Toshiaki Makita wrote:  
> >>> On 2017/11/16 11:25, Andrew Lunn wrote:  
> >>>>> Also what do the vendors using bridge for L2 offload to switch think?  
> >>>>
> >>>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So
> >>>> maybe 1024 is a bit low?  
> >>>
> >>> How about U32_MAX by default since it is currently not restricted.
> >>> (assuming the field will be changed to u32 as per Stephen's feedback).
> >>>
> >>> Otherwise users may suffer from unexpected behavior change by updating
> >>> kernel?
> >>>  
> >>
> >> U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double
> >> that seems like a reasonable default.  
> >
> > I'm suggesting the most unrealistic number to essentially disable the
> > restriction by default.
> > My understanding is that we put a priority on not to break existing
> > users even if the new restriction looks reasonable for most people.  
> 
> +1 , and yes, 1024 is very low. some of the switches we use support
> around 128K FDB entries and we have seen that number increase fairly
> quickly in newer generation switches. Default should be no limit to
> not break existing users.

New features can not break existing users.

My recommendation would be that 0 be used as a magic value to indicate
no limit and that would be the default.

Also the limit should be controllable on a per port of bridge (interface) basis.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16  9:58     ` Willy Tarreau
@ 2017-11-16 18:23       ` Sarah Newman
  2017-11-16 19:23         ` Andrew Lunn
  0 siblings, 1 reply; 27+ messages in thread
From: Sarah Newman @ 2017-11-16 18:23 UTC (permalink / raw)
  To: Willy Tarreau; +Cc: Nikolay Aleksandrov, netdev, roopa

On 11/16/2017 01:58 AM, Willy Tarreau wrote:
> Hi Sarah,
> 
> On Thu, Nov 16, 2017 at 01:20:18AM -0800, Sarah Newman wrote:
>> I note that anyone who would run up against a too-low limit on the maximum
>> number of fdb entries would also be savvy enough to fix it in a matter of
>> minutes.
> 
> I disagree on this point. There's a huge difference between experiencing
> sudden breakage under normal conditions due to arbitrary limits being set
> and being down because of an attack. While the latter is not desirable,
> it's much more easily accepted and most often requires operations anyway.
> The former is never an option.

Yes, being down during an attack is expected, assuming you know you are
being attacked.

Linux bridges can also be used in small embedded devices. With no limit,
the likely result from those devices being attacked is the device gets
thrown away for being unreliable.

> 
> And I continue to think that the default behaviour once the limit is reached
> must not be to prevent new entries from being learned but to purge older
> ones. At least it preserves normal operations.

I'm not disagreeing.

I spent maybe a couple of hours on this patch and was hoping someone else would
find more time to spend on the problem.

> 
> But given the high CPU impact you reported for a very low load, definitely
> something needs to be done
It's nice to think so.

> 
>> They could also default the limit to U32_MAX in their particular
>> distribution if it was a configuration option.
> 
> Well, I'd say that we don't have a default limit on the socket number either
> and that it happens to be the expected behaviour. It's almost impossible to
> find a suitable limit for everyone. People dealing with regular loads never
> read docs and get caught. People working in hostile environments are always
> more careful and will ensure that their limits are properly set.

Neighbor tables for ipv4/ipv6 seem more comparable. gc_thresh3 is 1024 and
typically needs to be adjusted higher for Linux routers.

As you say, there is no default limit that suits everyone. So the question is
really who is burdened with changing the default.

There is a lot of talk of not breaking existing users. The current
implementation is demonstrably vulnerable, and since the problem is likely silent
there's not a good way to know how often it's actually occurred. I note the
tool to trigger it is trivially available and it's a well-known type of attack.

But I understand if there have been an insufficient known number of attacks
to change the current default situation. It could be left to user space to
make a new default if it becomes a demonstrable problem.

If user space has to change at all you could again argue for a pure user-space
solution, but I'm not sure if a pure user-space solution would always have a
chance to fix the problem before the system was brought down.

> 
>> At the moment there is not even a single log message if the problem doesn't
>> result in memory exhaustion.
> 
> This probably needs to be addressed as well
Maybe what's needed is two thresholds, one for warning and one for enforcement.
The warning limit would need to be low enough that the information had a good chance
of being logged before the system was under too much load to be able to convey
that information. The enforcement limit could be left as default inactive until
shown that it needed to be otherwise.

--Sarah

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16 18:23       ` Sarah Newman
@ 2017-11-16 19:23         ` Andrew Lunn
  2017-11-16 19:36           ` Nikolay Aleksandrov
  2017-11-16 20:21           ` Vincent Bernat
  0 siblings, 2 replies; 27+ messages in thread
From: Andrew Lunn @ 2017-11-16 19:23 UTC (permalink / raw)
  To: Sarah Newman; +Cc: Willy Tarreau, Nikolay Aleksandrov, netdev, roopa

> Linux bridges can also be used in small embedded devices. With no limit,
> the likely result from those devices being attacked is the device gets
> thrown away for being unreliable.

Hi Sarah

Just to get a gut feeling...

struct net_bridge_fdb_entry is 40 bytes.

My WiFi access point which is also a 5 port bridge, currently has 97MB
free RAM. That is space for about 2.5M FDB entries. So even Roopa's
128K is not really a problem, in terms of memory.

> Maybe what's needed is two thresholds, one for warning and one for enforcement.
> The warning limit would need to be low enough that the information had a good chance
> of being logged before the system was under too much load to be able to convey
> that information. The enforcement limit could be left as default inactive until
> shown that it needed to be otherwise.

What exactly is the problem here? Does the DoS exhaust memory, or does
the hashing algorithm not scale?

It is more work, but the table could be more closely tied to the
memory management code. When memory is getting low, callbacks are made
asking to free up memory. Register such a callback and throw away part
of the table when memory is getting low. There is then no need to
limit the size, but print a rate limited warning when asked to reduce
the size.

    Andrew

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16 19:23         ` Andrew Lunn
@ 2017-11-16 19:36           ` Nikolay Aleksandrov
  2017-11-16 20:54             ` Sarah Newman
  2017-11-16 20:21           ` Vincent Bernat
  1 sibling, 1 reply; 27+ messages in thread
From: Nikolay Aleksandrov @ 2017-11-16 19:36 UTC (permalink / raw)
  To: Andrew Lunn, Sarah Newman; +Cc: Willy Tarreau, netdev, roopa

On 16 November 2017 21:23:25 EET, Andrew Lunn <andrew@lunn.ch> wrote:
>> Linux bridges can also be used in small embedded devices. With no
>limit,
>> the likely result from those devices being attacked is the device
>gets
>> thrown away for being unreliable.
>
>Hi Sarah
>
>Just to get a gut feeling...
>
>struct net_bridge_fdb_entry is 40 bytes.
>
>My WiFi access point which is also a 5 port bridge, currently has 97MB
>free RAM. That is space for about 2.5M FDB entries. So even Roopa's
>128K is not really a problem, in terms of memory.
>
>> Maybe what's needed is two thresholds, one for warning and one for
>enforcement.
>> The warning limit would need to be low enough that the information
>had a good chance
>> of being logged before the system was under too much load to be able
>to convey
>> that information. The enforcement limit could be left as default
>inactive until
>> shown that it needed to be otherwise.
>
>What exactly is the problem here? Does the DoS exhaust memory, or does
>the hashing algorithm not scale?

Just a note - when net-next opens I'll send patches
which move the fdb to a resizeable hashtable that scales nicely even with hundreds of thousands of entries so only the memory issue will remain.

>
>It is more work, but the table could be more closely tied to the
>memory management code. When memory is getting low, callbacks are made
>asking to free up memory. Register such a callback and throw away part
>of the table when memory is getting low. There is then no need to
>limit the size, but print a rate limited warning when asked to reduce
>the size.
>
>    Andrew


-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16 19:23         ` Andrew Lunn
  2017-11-16 19:36           ` Nikolay Aleksandrov
@ 2017-11-16 20:21           ` Vincent Bernat
  2017-11-17  0:27             ` Stephen Hemminger
  1 sibling, 1 reply; 27+ messages in thread
From: Vincent Bernat @ 2017-11-16 20:21 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Sarah Newman, Willy Tarreau, Nikolay Aleksandrov, netdev, roopa

 ❦ 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> :

> struct net_bridge_fdb_entry is 40 bytes.
>
> My WiFi access point which is also a 5 port bridge, currently has 97MB
> free RAM. That is space for about 2.5M FDB entries. So even Roopa's
> 128K is not really a problem, in terms of memory.

I am also interested in Sarah's patch because we can now have bridge
with many ports through VXLAN. The FDB can be replicated to an external
daemon with BGP and the cost of each additional MAC address is therefore
higher than just a few bytes. It seems simpler to implement a limiting
policy early (at the port or bridge level).

Also, this is a pretty standard limit to have for a bridge (switchport
port-security maximum on Cisco, set interface X mac-limit on
Juniper). And it's not something easy to do with ebtables.
-- 
Use the good features of a language; avoid the bad ones.
            - The Elements of Programming Style (Kernighan & Plauger)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16 19:36           ` Nikolay Aleksandrov
@ 2017-11-16 20:54             ` Sarah Newman
  0 siblings, 0 replies; 27+ messages in thread
From: Sarah Newman @ 2017-11-16 20:54 UTC (permalink / raw)
  To: Nikolay Aleksandrov, Andrew Lunn; +Cc: Willy Tarreau, netdev, roopa

On 11/16/2017 11:36 AM, Nikolay Aleksandrov wrote:
> On 16 November 2017 21:23:25 EET, Andrew Lunn <andrew@lunn.ch> wrote:
>>> Linux bridges can also be used in small embedded devices. With no
>> limit,
>>> the likely result from those devices being attacked is the device
>> gets
>>> thrown away for being unreliable.
>>
>> Hi Sarah
>>
>> Just to get a gut feeling...
>>
>> struct net_bridge_fdb_entry is 40 bytes.
>>
>> My WiFi access point which is also a 5 port bridge, currently has 97MB
>> free RAM. That is space for about 2.5M FDB entries. So even Roopa's
>> 128K is not really a problem, in terms of memory.

The recommendation was a default maximum of ~4B entries rather than 128k or 256k entries.

2.5M entries over a 300 second aging period is ~8.3kpps.
>>> Maybe what's needed is two thresholds, one for warning and one for
>> enforcement.
>>> The warning limit would need to be low enough that the information
>> had a good chance
>>> of being logged before the system was under too much load to be able
>> to convey
>>> that information. The enforcement limit could be left as default
>> inactive until
>>> shown that it needed to be otherwise.
>>
>> What exactly is the problem here? Does the DoS exhaust memory, or does
>> the hashing algorithm not scale?

My personal observation was 100% CPU usage, not memory exhaustion. Others have documented memory exhaustion.

> 
> Just a note - when net-next opens I'll send patches
> which move the fdb to a resizeable hashtable that scales nicely even with hundreds of thousands of entries so only the memory issue will remain.

Thank you.

I believe that under attack, the number of entries could exceed hundreds of thousands when accumulated over the default aging time. Perhaps it would
still make sense to support a hard limit, even if it is quite high by default?

> 
>>
>> It is more work, but the table could be more closely tied to the
>> memory management code. When memory is getting low, callbacks are made
>> asking to free up memory. Register such a callback and throw away part
>> of the table when memory is getting low. There is then no need to
>> limit the size, but print a rate limited warning when asked to reduce
>> the size.

That sounds reasonable, though I think it would only trigger in the small embedded devices before CPU usage became an issue.

--Sarah

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-16 20:21           ` Vincent Bernat
@ 2017-11-17  0:27             ` Stephen Hemminger
  2017-11-17  5:26               ` Willy Tarreau
  0 siblings, 1 reply; 27+ messages in thread
From: Stephen Hemminger @ 2017-11-17  0:27 UTC (permalink / raw)
  To: Vincent Bernat
  Cc: Andrew Lunn, Sarah Newman, Willy Tarreau, Nikolay Aleksandrov,
	netdev, roopa

On Thu, 16 Nov 2017 21:21:55 +0100
Vincent Bernat <bernat@luffy.cx> wrote:

>  ❦ 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> :
> 
> > struct net_bridge_fdb_entry is 40 bytes.
> >
> > My WiFi access point which is also a 5 port bridge, currently has 97MB
> > free RAM. That is space for about 2.5M FDB entries. So even Roopa's
> > 128K is not really a problem, in terms of memory.  
> 
> I am also interested in Sarah's patch because we can now have bridge
> with many ports through VXLAN. The FDB can be replicated to an external
> daemon with BGP and the cost of each additional MAC address is therefore
> higher than just a few bytes. It seems simpler to implement a limiting
> policy early (at the port or bridge level).
> 
> Also, this is a pretty standard limit to have for a bridge (switchport
> port-security maximum on Cisco, set interface X mac-limit on
> Juniper). And it's not something easy to do with ebtables.

I want an optional limit per port, it makes a lot of sense.
If for no other reason that huge hash tables are a performance problems.

There is a bigger question about which fdb to evict but just dropping the
new one seems to be easiest and as good as any other solution.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-17  0:27             ` Stephen Hemminger
@ 2017-11-17  5:26               ` Willy Tarreau
  2017-11-17  6:14                 ` Nikolay Aleksandrov
  2017-11-17 14:06                 ` Andrew Lunn
  0 siblings, 2 replies; 27+ messages in thread
From: Willy Tarreau @ 2017-11-17  5:26 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Vincent Bernat, Andrew Lunn, Sarah Newman, Nikolay Aleksandrov,
	netdev, roopa

Hi Stephen,

On Thu, Nov 16, 2017 at 04:27:18PM -0800, Stephen Hemminger wrote:
> On Thu, 16 Nov 2017 21:21:55 +0100
> Vincent Bernat <bernat@luffy.cx> wrote:
> 
> >  ? 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> :
> > 
> > > struct net_bridge_fdb_entry is 40 bytes.
> > >
> > > My WiFi access point which is also a 5 port bridge, currently has 97MB
> > > free RAM. That is space for about 2.5M FDB entries. So even Roopa's
> > > 128K is not really a problem, in terms of memory.  
> > 
> > I am also interested in Sarah's patch because we can now have bridge
> > with many ports through VXLAN. The FDB can be replicated to an external
> > daemon with BGP and the cost of each additional MAC address is therefore
> > higher than just a few bytes. It seems simpler to implement a limiting
> > policy early (at the port or bridge level).
> > 
> > Also, this is a pretty standard limit to have for a bridge (switchport
> > port-security maximum on Cisco, set interface X mac-limit on
> > Juniper). And it's not something easy to do with ebtables.
> 
> I want an optional limit per port, it makes a lot of sense.
> If for no other reason that huge hash tables are a performance problems.

Except its not a limit in that it doesn't prevent new traffic from going
in, it only prevents new MACs from being learned, so suddenly you start
flooding all ports with new traffic once the limit is reached, which is
not trivial to detect nor diagnose.

> There is a bigger question about which fdb to evict but just dropping the
> new one seems to be easiest and as good as any other solution.

Usually it's better to apply LRU or random here in my opinion, as the
new entry is much more likely to be needed than older ones by definition.
In terms of CPU usage it may even be better to kill an entire series in
the hash table (eg: all nodes in the same table entry for example), as
the operation can be almost as cheap and result in not being needed for
a while again.

Willy

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-17  5:26               ` Willy Tarreau
@ 2017-11-17  6:14                 ` Nikolay Aleksandrov
  2017-11-17  8:01                   ` Nikolay Aleksandrov
  2017-11-17 14:06                 ` Andrew Lunn
  1 sibling, 1 reply; 27+ messages in thread
From: Nikolay Aleksandrov @ 2017-11-17  6:14 UTC (permalink / raw)
  To: Willy Tarreau, Stephen Hemminger
  Cc: Vincent Bernat, Andrew Lunn, Sarah Newman, netdev, roopa

On 17/11/17 07:26, Willy Tarreau wrote:
> Hi Stephen,
> 
> On Thu, Nov 16, 2017 at 04:27:18PM -0800, Stephen Hemminger wrote:
>> On Thu, 16 Nov 2017 21:21:55 +0100
>> Vincent Bernat <bernat@luffy.cx> wrote:
>>
>>>  ? 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> :
>>>
>>>> struct net_bridge_fdb_entry is 40 bytes.
>>>>
>>>> My WiFi access point which is also a 5 port bridge, currently has 97MB
>>>> free RAM. That is space for about 2.5M FDB entries. So even Roopa's
>>>> 128K is not really a problem, in terms of memory.  
>>>
>>> I am also interested in Sarah's patch because we can now have bridge
>>> with many ports through VXLAN. The FDB can be replicated to an external
>>> daemon with BGP and the cost of each additional MAC address is therefore
>>> higher than just a few bytes. It seems simpler to implement a limiting
>>> policy early (at the port or bridge level).
>>>
>>> Also, this is a pretty standard limit to have for a bridge (switchport
>>> port-security maximum on Cisco, set interface X mac-limit on
>>> Juniper). And it's not something easy to do with ebtables.
>>
>> I want an optional limit per port, it makes a lot of sense.
>> If for no other reason that huge hash tables are a performance problems.
> 
> Except its not a limit in that it doesn't prevent new traffic from going
> in, it only prevents new MACs from being learned, so suddenly you start
> flooding all ports with new traffic once the limit is reached, which is
> not trivial to detect nor diagnose.
> 
>> There is a bigger question about which fdb to evict but just dropping the
>> new one seems to be easiest and as good as any other solution.
> 
> Usually it's better to apply LRU or random here in my opinion, as the
> new entry is much more likely to be needed than older ones by definition.
> In terms of CPU usage it may even be better to kill an entire series in
> the hash table (eg: all nodes in the same table entry for example), as
> the operation can be almost as cheap and result in not being needed for
> a while again.
> 
> Willy
> 

Hi,
I have been thinking about this and how to try and keep everyone happy
while maintaining performance, so how about this:

 * Add a per-port fdb LRU list which is used only when there are >= 2000
   global fdb entries _or_ a per-port limit is set. If the list is in use,
   update the fdb list position once per second. I think these properties will
   allow us to scale and have a better LRU locking granularity (per-port), and in
   smaller setups (not needing LRU) the cost will be a single test in fast path.

 * Use the above LRU list for per-port limit evictions

 * More importantly use the LRU list for fdb expire, removing the need to walk
   over all fdbs every time we expire entries. This would be of great help for
   larger setups with many fdbs (it will kick in on >= 2000 fdb entries).

 * (optional) Make the global LRU kick in limit an option, people might want to
   minimize traffic blocking due to expire process.

Any comments and suggestions are welcome. When we agree on the details I'll do
the RFC patches and run some tests before submitting. Defaults will be kept as
they are now. I've chosen the 2000 limit arbitrarily and am happy to change it
if people have something else in mind. This should play nice with the resizeable
hashtable change.

Thanks,
 Nik

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-17  6:14                 ` Nikolay Aleksandrov
@ 2017-11-17  8:01                   ` Nikolay Aleksandrov
  0 siblings, 0 replies; 27+ messages in thread
From: Nikolay Aleksandrov @ 2017-11-17  8:01 UTC (permalink / raw)
  To: Willy Tarreau, Stephen Hemminger
  Cc: Vincent Bernat, Andrew Lunn, Sarah Newman, netdev, roopa

On 17/11/17 08:14, Nikolay Aleksandrov wrote:
> On 17/11/17 07:26, Willy Tarreau wrote:
>> Hi Stephen,
>>
>> On Thu, Nov 16, 2017 at 04:27:18PM -0800, Stephen Hemminger wrote:
>>> On Thu, 16 Nov 2017 21:21:55 +0100
>>> Vincent Bernat <bernat@luffy.cx> wrote:
>>>
>>>>  ? 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> :
>>>>
>>>>> struct net_bridge_fdb_entry is 40 bytes.
>>>>>
>>>>> My WiFi access point which is also a 5 port bridge, currently has 97MB
>>>>> free RAM. That is space for about 2.5M FDB entries. So even Roopa's
>>>>> 128K is not really a problem, in terms of memory.  
>>>>
>>>> I am also interested in Sarah's patch because we can now have bridge
>>>> with many ports through VXLAN. The FDB can be replicated to an external
>>>> daemon with BGP and the cost of each additional MAC address is therefore
>>>> higher than just a few bytes. It seems simpler to implement a limiting
>>>> policy early (at the port or bridge level).
>>>>
>>>> Also, this is a pretty standard limit to have for a bridge (switchport
>>>> port-security maximum on Cisco, set interface X mac-limit on
>>>> Juniper). And it's not something easy to do with ebtables.
>>>
>>> I want an optional limit per port, it makes a lot of sense.
>>> If for no other reason that huge hash tables are a performance problems.
>>
>> Except its not a limit in that it doesn't prevent new traffic from going
>> in, it only prevents new MACs from being learned, so suddenly you start
>> flooding all ports with new traffic once the limit is reached, which is
>> not trivial to detect nor diagnose.
>>
>>> There is a bigger question about which fdb to evict but just dropping the
>>> new one seems to be easiest and as good as any other solution.
>>
>> Usually it's better to apply LRU or random here in my opinion, as the
>> new entry is much more likely to be needed than older ones by definition.
>> In terms of CPU usage it may even be better to kill an entire series in
>> the hash table (eg: all nodes in the same table entry for example), as
>> the operation can be almost as cheap and result in not being needed for
>> a while again.
>>
>> Willy
>>
> 
> Hi,
> I have been thinking about this and how to try and keep everyone happy
> while maintaining performance, so how about this:
> 
>  * Add a per-port fdb LRU list which is used only when there are >= 2000
>    global fdb entries _or_ a per-port limit is set. If the list is in use,
>    update the fdb list position once per second. I think these properties will
>    allow us to scale and have a better LRU locking granularity (per-port), and in
>    smaller setups (not needing LRU) the cost will be a single test in fast path.
> 

It seems I spoke too soon, I just tried a quick and dirty version of this and the
per-port LRU becomes a nightmare with the completely lockless fdb dst port changes.
So I think going back to the original proposition with new fdb dropping is best
for now, otherwise we need to add some synchronization on fdb port move. In general
the per-port locks would be tricky to handle with moving fdbs, so even if we add
a per-port LRU list we'll need some global lock to handle moves and updates in a
simple manner, and I'd like to avoid adding a new bridge lock.

>  * Use the above LRU list for per-port limit evictions
> >  * More importantly use the LRU list for fdb expire, removing the need to walk
>    over all fdbs every time we expire entries. This would be of great help for
>    larger setups with many fdbs (it will kick in on >= 2000 fdb entries).
> 

Will have to drop the above point for now, even though I was mostly interested
in that.

>  * (optional) Make the global LRU kick in limit an option, people might want to
>    minimize traffic blocking due to expire process.
> 
> Any comments and suggestions are welcome. When we agree on the details I'll do
> the RFC patches and run some tests before submitting. Defaults will be kept as
> they are now. I've chosen the 2000 limit arbitrarily and am happy to change it
> if people have something else in mind. This should play nice with the resizeable
> hashtable change.
> 
> Thanks,
>  Nik
> 
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-17  5:26               ` Willy Tarreau
  2017-11-17  6:14                 ` Nikolay Aleksandrov
@ 2017-11-17 14:06                 ` Andrew Lunn
  2017-11-17 18:44                   ` Willy Tarreau
  1 sibling, 1 reply; 27+ messages in thread
From: Andrew Lunn @ 2017-11-17 14:06 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Stephen Hemminger, Vincent Bernat, Sarah Newman,
	Nikolay Aleksandrov, netdev, roopa

> Usually it's better to apply LRU or random here in my opinion, as the
> new entry is much more likely to be needed than older ones by definition.

Hi Willy

I think this depends on why you need to discard. If it is normal
operation and the limits are simply too low, i would agree.

If however it is a DoS, throwing away the new entries makes sense,
leaving the old ones which are more likely to be useful.

Most of the talk in this thread has been about limits for DoS
prevention...

	Andrew

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] net: bridge: add max_fdb_count
  2017-11-17 14:06                 ` Andrew Lunn
@ 2017-11-17 18:44                   ` Willy Tarreau
  0 siblings, 0 replies; 27+ messages in thread
From: Willy Tarreau @ 2017-11-17 18:44 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Stephen Hemminger, Vincent Bernat, Sarah Newman,
	Nikolay Aleksandrov, netdev, roopa

Hi Andrew,

On Fri, Nov 17, 2017 at 03:06:23PM +0100, Andrew Lunn wrote:
> > Usually it's better to apply LRU or random here in my opinion, as the
> > new entry is much more likely to be needed than older ones by definition.
> 
> Hi Willy
> 
> I think this depends on why you need to discard. If it is normal
> operation and the limits are simply too low, i would agree.
> 
> If however it is a DoS, throwing away the new entries makes sense,
> leaving the old ones which are more likely to be useful.
> 
> Most of the talk in this thread has been about limits for DoS
> prevention...

Sure but my point is that it can kick in on regular traffic and in
this case it can be catastrophic. That's only what bothers me. If
we have an unlimited default value with this algorithm I'm fine
because nobody will get caught by accident with a bridge suddenly
replicating high traffic on all ports because an unknown limit was
reached. That's the principle of least surprise.

I know that when fighting DoSes there's never any universally good
solutions and one has to make tradeoffs. I'm perfectly fine with this.

Cheers,
Willy

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH] net: bridge: add max_fdb_count
  2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
                   ` (4 preceding siblings ...)
  2017-11-16  7:31 ` Nikolay Aleksandrov
@ 2017-11-21 14:53 ` David Laight
  5 siblings, 0 replies; 27+ messages in thread
From: David Laight @ 2017-11-21 14:53 UTC (permalink / raw)
  To: 'Sarah Newman', netdev

From: Sarah Newman
> Sent: 15 November 2017 19:27
> Current memory and CPU usage for managing bridge fdb entries is unbounded.
> Add a parameter max_fdb_count, controlled from sysfs, which places an upper
> limit on the number of entries. Defaults to 1024.
> 
> When max_fdb_count is met or exceeded, whether traffic is sent out a
> given port should depend on its flooding behavior.

Does it make sense for a bridge to run in a mode where it doesn't
remember (all the) MAC addresses from one of its interfaces?
Rather than flood unknown addresses they are just sent to the
'everywhere else' interface.

	David

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2017-11-21 14:53 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman
2017-11-15 19:43 ` Sarah Newman
2017-11-15 20:04 ` Stephen Hemminger
2017-11-16  2:25   ` Andrew Lunn
2017-11-16  4:05     ` Toshiaki Makita
2017-11-16  4:54       ` Sarah Newman
2017-11-16  6:13         ` Toshiaki Makita
2017-11-16  6:20           ` Roopa Prabhu
2017-11-16 16:54             ` Stephen Hemminger
2017-11-15 21:34 ` Egil Hjelmeland
2017-11-16  3:01 ` Andrew Lunn
2017-11-16  7:31 ` Nikolay Aleksandrov
2017-11-16  9:20   ` Sarah Newman
2017-11-16  9:49     ` Nikolay Aleksandrov
2017-11-16  9:58     ` Willy Tarreau
2017-11-16 18:23       ` Sarah Newman
2017-11-16 19:23         ` Andrew Lunn
2017-11-16 19:36           ` Nikolay Aleksandrov
2017-11-16 20:54             ` Sarah Newman
2017-11-16 20:21           ` Vincent Bernat
2017-11-17  0:27             ` Stephen Hemminger
2017-11-17  5:26               ` Willy Tarreau
2017-11-17  6:14                 ` Nikolay Aleksandrov
2017-11-17  8:01                   ` Nikolay Aleksandrov
2017-11-17 14:06                 ` Andrew Lunn
2017-11-17 18:44                   ` Willy Tarreau
2017-11-21 14:53 ` David Laight

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.