All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net: dsa: reference count the host mdb addresses
@ 2020-10-15 21:27 Vladimir Oltean
  2020-10-19 23:55 ` Jakub Kicinski
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Vladimir Oltean @ 2020-10-15 21:27 UTC (permalink / raw)
  To: netdev; +Cc: kuba, andrew, f.fainelli, vivien.didelot

Currently any DSA switch that implements the multicast ops (properly,
that is) gets these errors after just sitting for a while, with at least
2 ports bridged:

[  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)

The reason has to do with this piece of code:

	netdev_for_each_lower_dev(dev, lower_dev, iter)
		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);

called from:

br_multicast_group_expired
-> br_multicast_host_leave
   -> br_mdb_notify
      -> br_mdb_switchdev_host

Basically, that code is correct. It tells each switchdev port that the
host can leave that multicast group. But in the case of DSA, all user
ports are connected to the host through the same pipe. So, because DSA
translates a host MDB to a normal MDB on the CPU port, this means that
when all user ports leave a multicast group, DSA tries to remove it N
times from the CPU port.

We should be reference-counting these addresses.

Fixes: 5f4dbc50ce4d ("net: dsa: slave: Handle switchdev host mdb add/del")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 include/net/dsa.h |  9 +++++
 net/dsa/dsa2.c    |  2 ++
 net/dsa/slave.c   | 92 ++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 35429a140dfa..bad3877761b9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -251,6 +251,13 @@ struct dsa_link {
 	struct list_head list;
 };
 
+struct dsa_host_addr {
+	unsigned char addr[ETH_ALEN];
+	u16 vid;
+	refcount_t refcount;
+	struct list_head list;
+};
+
 struct dsa_switch {
 	bool setup;
 
@@ -333,6 +340,8 @@ struct dsa_switch {
 	 */
 	bool			mtu_enforcement_ingress;
 
+	struct list_head	host_mdb;
+
 	size_t num_ports;
 };
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 183003e45762..52b3ef34a2cb 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -413,6 +413,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
 	if (ds->setup)
 		return 0;
 
+	INIT_LIST_HEAD(&ds->host_mdb);
+
 	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
 	 * driver and before ops->setup() has run, since the switch drivers and
 	 * the slave MDIO bus driver rely on these values for probing PHY
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3bc5ca40c9fb..1bf3c406e2f8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -376,6 +376,87 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	return 0;
 }
 
+static struct dsa_host_addr *
+dsa_host_mdb_find(struct dsa_switch *ds,
+		  const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_host_addr *a;
+
+	list_for_each_entry(a, &ds->host_mdb, list)
+		if (ether_addr_equal(a->addr, mdb->addr) && a->vid == mdb->vid)
+			return a;
+
+	return NULL;
+}
+
+/* DSA can directly translate this to a normal MDB add, but on the CPU port.
+ * But because multiple user ports can join the same multicast group and the
+ * bridge will emit a notification for each port, we need to add/delete the
+ * entry towards the host only once, so we reference count it.
+ */
+static int dsa_host_mdb_add(struct dsa_port *dp,
+			    const struct switchdev_obj_port_mdb *mdb,
+			    struct switchdev_trans *trans)
+{
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+	struct dsa_switch *ds = dp->ds;
+	struct dsa_host_addr *a;
+	int err;
+
+	/* Only the commit phase is refcounted, which means that for the
+	 * second, third, etc port which is member of this host address,
+	 * we'll call the prepare phase but never commit.
+	 */
+	if (switchdev_trans_ph_prepare(trans))
+		return dsa_port_mdb_add(cpu_dp, mdb, trans);
+
+	a = dsa_host_mdb_find(ds, mdb);
+	if (a) {
+		refcount_inc(&a->refcount);
+		return 0;
+	}
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	err = dsa_port_mdb_add(cpu_dp, mdb, trans);
+	if (err)
+		return err;
+
+	ether_addr_copy(a->addr, mdb->addr);
+	a->vid = mdb->vid;
+	refcount_set(&a->refcount, 1);
+	list_add_tail(&a->list, &ds->host_mdb);
+
+	return 0;
+}
+
+static int dsa_host_mdb_del(struct dsa_port *dp,
+			    const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+	struct dsa_switch *ds = dp->ds;
+	struct dsa_host_addr *a;
+	int err;
+
+	a = dsa_host_mdb_find(ds, mdb);
+	if (!a)
+		return -ENOENT;
+
+	if (!refcount_dec_and_test(&a->refcount))
+		return 0;
+
+	err = dsa_port_mdb_del(cpu_dp, mdb);
+	if (err)
+		return err;
+
+	list_del(&a->list);
+	kfree(a);
+
+	return 0;
+}
+
 static int dsa_slave_port_obj_add(struct net_device *dev,
 				  const struct switchdev_obj *obj,
 				  struct switchdev_trans *trans,
@@ -396,11 +477,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
 		break;
 	case SWITCHDEV_OBJ_ID_HOST_MDB:
-		/* DSA can directly translate this to a normal MDB add,
-		 * but on the CPU port.
-		 */
-		err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
-				       trans);
+		err = dsa_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		err = dsa_slave_vlan_add(dev, obj, trans);
@@ -455,10 +532,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 		err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_HOST_MDB:
-		/* DSA can directly translate this to a normal MDB add,
-		 * but on the CPU port.
-		 */
-		err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+		err = dsa_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		err = dsa_slave_vlan_del(dev, obj);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-15 21:27 [PATCH net] net: dsa: reference count the host mdb addresses Vladimir Oltean
@ 2020-10-19 23:55 ` Jakub Kicinski
  2020-10-20  0:07   ` Andrew Lunn
  2020-10-20  0:06 ` Andrew Lunn
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Jakub Kicinski @ 2020-10-19 23:55 UTC (permalink / raw)
  To: Vladimir Oltean; +Cc: netdev, andrew, f.fainelli, vivien.didelot

On Fri, 16 Oct 2020 00:27:11 +0300 Vladimir Oltean wrote:
> Currently any DSA switch that implements the multicast ops (properly,
> that is) gets these errors after just sitting for a while, with at least
> 2 ports bridged:
> 
> [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
> 
> The reason has to do with this piece of code:
> 
> 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);

We need a review on this one, anyone?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-15 21:27 [PATCH net] net: dsa: reference count the host mdb addresses Vladimir Oltean
  2020-10-19 23:55 ` Jakub Kicinski
@ 2020-10-20  0:06 ` Andrew Lunn
  2020-10-20  0:28   ` Vladimir Oltean
  2020-10-20  2:11 ` Florian Fainelli
  2020-11-27 23:58 ` Tobias Waldekranz
  3 siblings, 1 reply; 11+ messages in thread
From: Andrew Lunn @ 2020-10-20  0:06 UTC (permalink / raw)
  To: Vladimir Oltean; +Cc: netdev, kuba, f.fainelli, vivien.didelot

On Fri, Oct 16, 2020 at 12:27:11AM +0300, Vladimir Oltean wrote:
> Currently any DSA switch that implements the multicast ops (properly,
> that is) gets these errors after just sitting for a while, with at least
> 2 ports bridged:
> 
> [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
> 
> The reason has to do with this piece of code:
> 
> 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
> 
> called from:
> 
> br_multicast_group_expired
> -> br_multicast_host_leave
>    -> br_mdb_notify
>       -> br_mdb_switchdev_host
> 
> Basically, that code is correct. It tells each switchdev port that the
> host can leave that multicast group. But in the case of DSA, all user
> ports are connected to the host through the same pipe. So, because DSA
> translates a host MDB to a normal MDB on the CPU port, this means that
> when all user ports leave a multicast group, DSA tries to remove it N
> times from the CPU port.

Hi Vladimir

I agree with the analysis. This is how i designed it!

As far as i remember, none of the switches at the time would report an
error when asked to delete an MDB which did not exist. They also would
not give an error when adding an MDB which already exists.

So i decided to keep it KISS and not bother with reference counting.

> +static int dsa_host_mdb_add(struct dsa_port *dp,
> +			    const struct switchdev_obj_port_mdb *mdb,
> +			    struct switchdev_trans *trans)
> +{
> +	struct dsa_port *cpu_dp = dp->cpu_dp;
> +	struct dsa_switch *ds = dp->ds;
> +	struct dsa_host_addr *a;
> +	int err;
> +
> +	/* Only the commit phase is refcounted, which means that for the
> +	 * second, third, etc port which is member of this host address,
> +	 * we'll call the prepare phase but never commit.
> +	 */
> +	if (switchdev_trans_ph_prepare(trans))
> +		return dsa_port_mdb_add(cpu_dp, mdb, trans);
> +
> +	a = dsa_host_mdb_find(ds, mdb);
> +	if (a) {
> +		refcount_inc(&a->refcount);
> +		return 0;
> +	}
> +
> +	a = kzalloc(sizeof(*a), GFP_KERNEL);
> +	if (!a)
> +		return -ENOMEM;
> +

The other part of the argument is that DSA is stateless, in that there
is no dynamic memory allocation. Drivers are also stateless in terms
of dynamically allocating memory.

So, what value do this code add? Why do we actually need reference
counting?

	Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-19 23:55 ` Jakub Kicinski
@ 2020-10-20  0:07   ` Andrew Lunn
  2020-10-20  0:13     ` Jakub Kicinski
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Lunn @ 2020-10-20  0:07 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Vladimir Oltean, netdev, f.fainelli, vivien.didelot

On Mon, Oct 19, 2020 at 04:55:14PM -0700, Jakub Kicinski wrote:
> On Fri, 16 Oct 2020 00:27:11 +0300 Vladimir Oltean wrote:
> > Currently any DSA switch that implements the multicast ops (properly,
> > that is) gets these errors after just sitting for a while, with at least
> > 2 ports bridged:
> > 
> > [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
> > 
> > The reason has to do with this piece of code:
> > 
> > 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> > 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
> 
> We need a review on this one, anyone?

Hi Jakub

Thanks for the reminder. It has been on my TODO list since i got back
from vacation.

	Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-20  0:07   ` Andrew Lunn
@ 2020-10-20  0:13     ` Jakub Kicinski
  0 siblings, 0 replies; 11+ messages in thread
From: Jakub Kicinski @ 2020-10-20  0:13 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Vladimir Oltean, netdev, f.fainelli, vivien.didelot

On Tue, 20 Oct 2020 02:07:46 +0200 Andrew Lunn wrote:
> On Mon, Oct 19, 2020 at 04:55:14PM -0700, Jakub Kicinski wrote:
> > On Fri, 16 Oct 2020 00:27:11 +0300 Vladimir Oltean wrote:  
> > > Currently any DSA switch that implements the multicast ops (properly,
> > > that is) gets these errors after just sitting for a while, with at least
> > > 2 ports bridged:
> > > 
> > > [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
> > > 
> > > The reason has to do with this piece of code:
> > > 
> > > 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> > > 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);  
> > 
> > We need a review on this one, anyone?  
> 
> Hi Jakub
> 
> Thanks for the reminder. It has been on my TODO list since i got back
> from vacation.

Good to have you back! :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-20  0:06 ` Andrew Lunn
@ 2020-10-20  0:28   ` Vladimir Oltean
  0 siblings, 0 replies; 11+ messages in thread
From: Vladimir Oltean @ 2020-10-20  0:28 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev, kuba, f.fainelli, vivien.didelot

On Tue, Oct 20, 2020 at 02:06:32AM +0200, Andrew Lunn wrote:
> I agree with the analysis. This is how i designed it!
[...]
> So i decided to keep it KISS and not bother with reference counting.

Well, I can't argue with that then...

> The other part of the argument is that DSA is stateless, in that there
> is no dynamic memory allocation. Drivers are also stateless in terms
> of dynamically allocating memory.

And is that some sort of design goal? I think you'll run out of things
to do very quickly if you set out to never call kmalloc().

> So, what value do this code add? Why do we actually need reference
> counting?

Well, for one thing, if you have multiple bridge interfaces spanning the
ports of a single switch ASIC (and this is not uncommon with port count > 4)
then you'll have the timer expiry from one bridge clear the host MDB
entries of the other. Weird. And in general, you can't just "add first,
delete first" with this type of things. I actually got some pushback
from Vivien an year ago on a topic very similar to this: in the other
place where DSA is "lazy" and does not implement refcounting, which is
VLANs on the CPU port, at least the VLAN is not deleted. That would be
more correct, at least, than performing 6 additions, then 1 single
deletion which would invalidate the entry for all the other ports.

Also, I am taking the opportunity to add the refcounting infrastructure
for host FDB entries (this goes back to the patch series about DSA RX
filtering). At the moment, host addresses are added from a single type
of source (aka, a switchdev HOST_MDB object). But with unicast, there
could be more than one sources that a unicast address could be
added from:
- the MAC addresses of the ports. These addresses should be installed as
  host FDB entries
- the MAC addresses of the upper interfaces. Similar argument here.
- the local (master) FDB of the bridge. My plan of record is to offload
  this using a new switchdev HOST_FDB object, similar to what you've
  done for HOST_MDB.
In this case, having reference counting is pretty much something to
have, when an address could come from more than one place, we wouldn't
want to break anything.
Also, consider what happens when you start having the ability to install
the MAC address of the switch ports as a host FDB entry. DSA configures
all interfaces to have the same MAC address, which is inherited from the
master's MAC address. But you can also change the MAC address of a
switch interface. What do you do with the old one? Do you delete it? Do
you keep it? If you don't delete it, you might run out of FDB space
after enough MAC address changes. If you delete it, you might break
traffic for the other switch ports that are still using it...

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-15 21:27 [PATCH net] net: dsa: reference count the host mdb addresses Vladimir Oltean
  2020-10-19 23:55 ` Jakub Kicinski
  2020-10-20  0:06 ` Andrew Lunn
@ 2020-10-20  2:11 ` Florian Fainelli
  2020-11-27 21:41   ` Vladimir Oltean
  2020-11-27 23:58 ` Tobias Waldekranz
  3 siblings, 1 reply; 11+ messages in thread
From: Florian Fainelli @ 2020-10-20  2:11 UTC (permalink / raw)
  To: Vladimir Oltean, netdev; +Cc: kuba, andrew, vivien.didelot



On 10/15/2020 2:27 PM, Vladimir Oltean wrote:
> Currently any DSA switch that implements the multicast ops (properly,
> that is) gets these errors after just sitting for a while, with at least
> 2 ports bridged:
> 
> [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
> 
> The reason has to do with this piece of code:
> 
> 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
> 
> called from:
> 
> br_multicast_group_expired
> -> br_multicast_host_leave
>     -> br_mdb_notify
>        -> br_mdb_switchdev_host
> 
> Basically, that code is correct. It tells each switchdev port that the
> host can leave that multicast group. But in the case of DSA, all user
> ports are connected to the host through the same pipe. So, because DSA
> translates a host MDB to a normal MDB on the CPU port, this means that
> when all user ports leave a multicast group, DSA tries to remove it N
> times from the CPU port.
> 
> We should be reference-counting these addresses.
> 
> Fixes: 5f4dbc50ce4d ("net: dsa: slave: Handle switchdev host mdb add/del")
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

This looks good to me, just one possible problem below:

[snip]

> +
> +	a = kzalloc(sizeof(*a), GFP_KERNEL);
> +	if (!a)
> +		return -ENOMEM;

I believe this needs to be GFP_ATOMIC if we are to follow 
net/bridge/br_mdb.c::br_mdb_notify which does its netlink messages 
allocations using GFP_ATOMIC. On a side note, it woul dbe very helpful 
if we could annotate all bridge functions accordingly with their context 
(BH, process, etc.).
-- 
Florian

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-20  2:11 ` Florian Fainelli
@ 2020-11-27 21:41   ` Vladimir Oltean
  0 siblings, 0 replies; 11+ messages in thread
From: Vladimir Oltean @ 2020-11-27 21:41 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: netdev, kuba, andrew, vivien.didelot

Hi Florian,

On Mon, Oct 19, 2020 at 07:11:47PM -0700, Florian Fainelli wrote:
> On 10/15/2020 2:27 PM, Vladimir Oltean wrote:
> > Currently any DSA switch that implements the multicast ops (properly,
> > that is) gets these errors after just sitting for a while, with at least
> > 2 ports bridged:
> >
> > [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
> >
> > The reason has to do with this piece of code:
> >
> > 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> > 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
> >
> > called from:
> >
> > br_multicast_group_expired
> > -> br_multicast_host_leave
> >     -> br_mdb_notify
> >        -> br_mdb_switchdev_host
> >
> > Basically, that code is correct. It tells each switchdev port that the
> > host can leave that multicast group. But in the case of DSA, all user
> > ports are connected to the host through the same pipe. So, because DSA
> > translates a host MDB to a normal MDB on the CPU port, this means that
> > when all user ports leave a multicast group, DSA tries to remove it N
> > times from the CPU port.
> >
> > We should be reference-counting these addresses.
> >
> > Fixes: 5f4dbc50ce4d ("net: dsa: slave: Handle switchdev host mdb add/del")
> > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
>
> This looks good to me, just one possible problem below:
>
> [snip]
>
> > +
> > +	a = kzalloc(sizeof(*a), GFP_KERNEL);
> > +	if (!a)
> > +		return -ENOMEM;
>
> I believe this needs to be GFP_ATOMIC if we are to follow
> net/bridge/br_mdb.c::br_mdb_notify which does its netlink messages
> allocations using GFP_ATOMIC. On a side note, it woul dbe very helpful if we
> could annotate all bridge functions accordingly with their context (BH,
> process, etc.).

We are not in atomic context here, since Andrew took care to use
SWITCHDEV_F_DEFER in br_mdb_switchdev_host_port(). Honestly, if he
wouldn't have done that, we would have been pretty toast anyway, since
we end up calling dsa_port_mdb_add(), which assumes it can sleep. And
deferring in DSA is super complicated, due to the fact that
SWITCHDEV_OBJ_ID_HOST_MDB is transactional.

So it's ok to use GFP_KERNEL, I'll leave this as-is when I respin.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-10-15 21:27 [PATCH net] net: dsa: reference count the host mdb addresses Vladimir Oltean
                   ` (2 preceding siblings ...)
  2020-10-20  2:11 ` Florian Fainelli
@ 2020-11-27 23:58 ` Tobias Waldekranz
  2020-11-28  0:27   ` Vladimir Oltean
  3 siblings, 1 reply; 11+ messages in thread
From: Tobias Waldekranz @ 2020-11-27 23:58 UTC (permalink / raw)
  To: Vladimir Oltean, netdev; +Cc: kuba, andrew, f.fainelli, vivien.didelot

On Fri, Oct 16, 2020 at 00:27, Vladimir Oltean <vladimir.oltean@nxp.com> wrote:
> Currently any DSA switch that implements the multicast ops (properly,
> that is) gets these errors after just sitting for a while, with at least
> 2 ports bridged:
>
> [  286.013814] mscc_felix 0000:00:00.5 swp3: failed (err=-2) to del object (id=3)
>
> The reason has to do with this piece of code:
>
> 	netdev_for_each_lower_dev(dev, lower_dev, iter)
> 		br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
>
> called from:
>
> br_multicast_group_expired
> -> br_multicast_host_leave
>    -> br_mdb_notify
>       -> br_mdb_switchdev_host
>
> Basically, that code is correct. It tells each switchdev port that the
> host can leave that multicast group. But in the case of DSA, all user
> ports are connected to the host through the same pipe. So, because DSA
> translates a host MDB to a normal MDB on the CPU port, this means that
> when all user ports leave a multicast group, DSA tries to remove it N
> times from the CPU port.
>
> We should be reference-counting these addresses.

That sounds like a good idea. We have run into another issue with the
MDB that maybe could be worked into this changeset. This is what we have
observed on 4.19, but from looking at the source it does not look like
anything has changed with respect to this issue.

The DSA driver handles the addition/removal of router ports by
enabling/disabling multicast flooding to the port in question. On
mv88e6xxx at least, this is only part of the solution. It only takes
care of the unregistered multicast. You also have to iterate through all
_registered_ groups and add the port to the destination vector.

If you do that the naïve way, you run in to a secondary problem. Without
any caching middle layer, you now have a single bit in the vector that
can be set either because the port is a router port, or because someone
has joined the group, _OR_ both (if the querier moves due to a topology
change for example). So you need a cache of the joined ports for each
group, and the router port vector. That way you can make sure to only
clear the bit if neither the cached group entry nor the router port
vector has the bit set.

If you think it could be useful I could try to rebase our internal
solution on net-next and post it as an RFC. Maybe there are at least
parts that can be used from it.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-11-27 23:58 ` Tobias Waldekranz
@ 2020-11-28  0:27   ` Vladimir Oltean
  2020-11-28  9:34     ` Tobias Waldekranz
  0 siblings, 1 reply; 11+ messages in thread
From: Vladimir Oltean @ 2020-11-28  0:27 UTC (permalink / raw)
  To: Tobias Waldekranz; +Cc: netdev, kuba, andrew, f.fainelli, vivien.didelot

On Sat, Nov 28, 2020 at 12:58:10AM +0100, Tobias Waldekranz wrote:
> That sounds like a good idea. We have run into another issue with the
> MDB that maybe could be worked into this changeset. This is what we have
> observed on 4.19, but from looking at the source it does not look like
> anything has changed with respect to this issue.
>
> The DSA driver handles the addition/removal of router ports by
> enabling/disabling multicast flooding to the port in question. On
> mv88e6xxx at least, this is only part of the solution. It only takes
> care of the unregistered multicast. You also have to iterate through all
> _registered_ groups and add the port to the destination vector.

And this observation is based on what? Based on this paragraph from RFC4541?

2.1.2.  Data Forwarding Rules

   1) Packets with a destination IP address outside 224.0.0.X which are
      not IGMP should be forwarded according to group-based port
      membership tables and must also be forwarded on router ports.

Let me ask you a different question. Why would DSA be in charge of
updating the MDB records, and not the bridge? Or why DSA and not the end
driver? Ignore my patch. I'm just trying to understand what you're
saying. Why precisely DSA, the mid layer? I don't know, this is new
information to me, I'm still digesting it.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] net: dsa: reference count the host mdb addresses
  2020-11-28  0:27   ` Vladimir Oltean
@ 2020-11-28  9:34     ` Tobias Waldekranz
  0 siblings, 0 replies; 11+ messages in thread
From: Tobias Waldekranz @ 2020-11-28  9:34 UTC (permalink / raw)
  To: Vladimir Oltean; +Cc: netdev, kuba, andrew, f.fainelli, vivien.didelot

On Sat, Nov 28, 2020 at 00:27, Vladimir Oltean <vladimir.oltean@nxp.com> wrote:
> On Sat, Nov 28, 2020 at 12:58:10AM +0100, Tobias Waldekranz wrote:
>> That sounds like a good idea. We have run into another issue with the
>> MDB that maybe could be worked into this changeset. This is what we have
>> observed on 4.19, but from looking at the source it does not look like
>> anything has changed with respect to this issue.
>>
>> The DSA driver handles the addition/removal of router ports by
>> enabling/disabling multicast flooding to the port in question. On
>> mv88e6xxx at least, this is only part of the solution. It only takes
>> care of the unregistered multicast. You also have to iterate through all
>> _registered_ groups and add the port to the destination vector.
>
> And this observation is based on what? Based on this paragraph from RFC4541?

Well in all honesty, it is mostly based on information from a colleague
of mine who knows the ins and outs of all these RFCs.

> 2.1.2.  Data Forwarding Rules
>
>    1) Packets with a destination IP address outside 224.0.0.X which are
>       not IGMP should be forwarded according to group-based port
>       membership tables and must also be forwarded on router ports.

...but yes, that paragraph does not leave a lot of wiggle room :)

> Let me ask you a different question. Why would DSA be in charge of
> updating the MDB records, and not the bridge? Or why DSA and not the end
> driver? Ignore my patch. I'm just trying to understand what you're
> saying. Why precisely DSA, the mid layer? I don't know, this is new
> information to me, I'm still digesting it.

The bridge has all the necessary information for sure. But it has a
different model with a separate list of router ports. Then in
br_multicast_flood you simply forward to the union of the group entry
and the router ports. It is not the bridge's fault that our hardware
does not have a separate bitmask for router ports. Some hardware may
very well have it.

I guess we could create internal APIs to the bridge to retrieve the
information though. There is already `bool br_multicast_router(dev)`, so
we should only need to add `bool br_multicast_member(dev, group, vid)`.

Assuming that those were available we should be able to solve it either
at the DSA or the driver layer. I seem to recall some issue that forced
us to place the cache at the dst level, but I would have to go through
the implementation to figure out what that issue was.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-11-28 22:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-15 21:27 [PATCH net] net: dsa: reference count the host mdb addresses Vladimir Oltean
2020-10-19 23:55 ` Jakub Kicinski
2020-10-20  0:07   ` Andrew Lunn
2020-10-20  0:13     ` Jakub Kicinski
2020-10-20  0:06 ` Andrew Lunn
2020-10-20  0:28   ` Vladimir Oltean
2020-10-20  2:11 ` Florian Fainelli
2020-11-27 21:41   ` Vladimir Oltean
2020-11-27 23:58 ` Tobias Waldekranz
2020-11-28  0:27   ` Vladimir Oltean
2020-11-28  9:34     ` Tobias Waldekranz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.