linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: dsa: microchip: fix race condition
@ 2020-10-05 16:08 Christian Eggers
  2020-10-05 19:59 ` Florian Fainelli
  2020-10-05 20:55 ` Vladimir Oltean
  0 siblings, 2 replies; 4+ messages in thread
From: Christian Eggers @ 2020-10-05 16:08 UTC (permalink / raw)
  To: Woojung Huh, Microchip Linux Driver Support
  Cc: Andrew Lunn, Vivien Didelot, Florian Fainelli, David S . Miller,
	Jakub Kicinski, netdev, linux-kernel, Christian Eggers, stable

Between queuing the delayed work and finishing the setup of the dsa
ports, the process may sleep in request_module() and the queued work may
be executed prior the initialization of the DSA ports is finished. In
ksz_mib_read_work(), a NULL dereference will happen within
netof_carrier_ok(dp->slave).

Not queuing the delayed work in ksz_init_mib_timer() make things even
worse because the work will now be queued for immediate execution
(instead of 2000 ms) in ksz_mac_link_down() via
dsa_port_link_register_of().

Solution:
1. Do not queue (only initialize) delayed work in ksz_init_mib_timer().
2. Only queue delayed work in ksz_mac_link_down() if init is completed.
3. Queue work once in ksz_switch_register(), after dsa_register_switch()
has completed.

Signed-off-by: Christian Eggers <ceggers@arri.de>
Cc: stable@vger.kernel.org
---
Call tree:
ksz9477_i2c_probe()
\--ksz9477_switch_register()
   \--ksz_switch_register()
      +--dsa_register_switch()
      |  \--dsa_switch_probe()
      |     \--dsa_tree_setup()
      |        \--dsa_tree_setup_switches()
      |           +--dsa_switch_setup()
      |           |  +--ksz9477_setup()
      |           |  |  \--ksz_init_mib_timer()
      |           |  |     |--/* Start the timer 2 seconds later. */
      |           |  |     \--schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
      |           |  \--__mdiobus_register()
      |           |     \--mdiobus_scan()
      |           |        \--get_phy_device()
      |           |           +--get_phy_id()
      |           |           \--phy_device_create()
      |           |              |--/* sleeping, ksz_mib_read_work() can be called meanwhile */
      |           |              \--request_module()
      |           |
      |           \--dsa_port_setup()
      |              +--/* Called for non-CPU ports */
      |              +--dsa_slave_create()
      |              |  +--/* Too late, ksz_mib_read_work() may be called beforehand */
      |              |  \--port->slave = ...
      |             ...
      |              +--Called for CPU port */
      |              \--dsa_port_link_register_of()
      |                 \--ksz_mac_link_down()
      |                    +--/* mib_read must be initialized here */
      |                    +--/* work is already scheduled, so it will be executed after 2000 ms */
      |                    \--schedule_delayed_work(&dev->mib_read, 0);
      \-- /* here port->slave is setup properly, scheduling the delayed work should be safe */

static void ksz_mib_read_work()
\--netif_carrier_ok(dp->slave);  dp->slave has not been initialized yet


 drivers/net/dsa/microchip/ksz_common.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 8e755b50c9c1..a94d2278b95c 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -103,14 +103,8 @@ void ksz_init_mib_timer(struct ksz_device *dev)
 
 	INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
 
-	/* Read MIB counters every 30 seconds to avoid overflow. */
-	dev->mib_read_interval = msecs_to_jiffies(30000);
-
 	for (i = 0; i < dev->mib_port_cnt; i++)
 		dev->dev_ops->port_init_cnt(dev, i);
-
-	/* Start the timer 2 seconds later. */
-	schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
 }
 EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
 
@@ -143,7 +137,9 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
 
 	/* Read all MIB counters when the link is going down. */
 	p->read = true;
-	schedule_delayed_work(&dev->mib_read, 0);
+	/* timer started */
+	if (dev->mib_read_interval)
+		schedule_delayed_work(&dev->mib_read, 0);
 }
 EXPORT_SYMBOL_GPL(ksz_mac_link_down);
 
@@ -446,6 +442,12 @@ int ksz_switch_register(struct ksz_device *dev,
 		return ret;
 	}
 
+	/* Read MIB counters every 30 seconds to avoid overflow. */
+	dev->mib_read_interval = msecs_to_jiffies(30000);
+
+	/* Start the MIB timer. */
+	schedule_delayed_work(&dev->mib_read, 0);
+
 	return 0;
 }
 EXPORT_SYMBOL(ksz_switch_register);
-- 
Christian Eggers
Embedded software developer

Arnold & Richter Cine Technik GmbH & Co. Betriebs KG
Sitz: Muenchen - Registergericht: Amtsgericht Muenchen - Handelsregisternummer: HRA 57918
Persoenlich haftender Gesellschafter: Arnold & Richter Cine Technik GmbH
Sitz: Muenchen - Registergericht: Amtsgericht Muenchen - Handelsregisternummer: HRB 54477
Geschaeftsfuehrer: Dr. Michael Neuhaeuser; Stephan Schenk; Walter Trauninger; Markus Zeiler


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] net: dsa: microchip: fix race condition
  2020-10-05 16:08 [PATCH] net: dsa: microchip: fix race condition Christian Eggers
@ 2020-10-05 19:59 ` Florian Fainelli
  2020-10-05 20:55 ` Vladimir Oltean
  1 sibling, 0 replies; 4+ messages in thread
From: Florian Fainelli @ 2020-10-05 19:59 UTC (permalink / raw)
  To: Christian Eggers, Woojung Huh, Microchip Linux Driver Support
  Cc: Andrew Lunn, Vivien Didelot, David S . Miller, Jakub Kicinski,
	netdev, linux-kernel, stable



On 10/5/2020 9:08 AM, Christian Eggers wrote:
> Between queuing the delayed work and finishing the setup of the dsa
> ports, the process may sleep in request_module() and the queued work may
> be executed prior the initialization of the DSA ports is finished. In
> ksz_mib_read_work(), a NULL dereference will happen within
> netof_carrier_ok(dp->slave).
> 
> Not queuing the delayed work in ksz_init_mib_timer() make things even
> worse because the work will now be queued for immediate execution
> (instead of 2000 ms) in ksz_mac_link_down() via
> dsa_port_link_register_of().
> 
> Solution:
> 1. Do not queue (only initialize) delayed work in ksz_init_mib_timer().
> 2. Only queue delayed work in ksz_mac_link_down() if init is completed.
> 3. Queue work once in ksz_switch_register(), after dsa_register_switch()
> has completed.
> 
> Signed-off-by: Christian Eggers <ceggers@arri.de>
> Cc: stable@vger.kernel.org

This looks fine to me and the analysis appears to be correct, don't you 
need to pair the test for dev->mib_read_internal being non zero with 
setting it to zero in ksz_switch_unregister()?

You would also most likely want to add a Fixes: tag such that this can 
be back ported to stable trees?

> ---
> Call tree:
> ksz9477_i2c_probe()
> \--ksz9477_switch_register()
>     \--ksz_switch_register()
>        +--dsa_register_switch()
>        |  \--dsa_switch_probe()
>        |     \--dsa_tree_setup()
>        |        \--dsa_tree_setup_switches()
>        |           +--dsa_switch_setup()
>        |           |  +--ksz9477_setup()
>        |           |  |  \--ksz_init_mib_timer()
>        |           |  |     |--/* Start the timer 2 seconds later. */
>        |           |  |     \--schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
>        |           |  \--__mdiobus_register()
>        |           |     \--mdiobus_scan()
>        |           |        \--get_phy_device()
>        |           |           +--get_phy_id()
>        |           |           \--phy_device_create()
>        |           |              |--/* sleeping, ksz_mib_read_work() can be called meanwhile */
>        |           |              \--request_module()
>        |           |
>        |           \--dsa_port_setup()
>        |              +--/* Called for non-CPU ports */
>        |              +--dsa_slave_create()
>        |              |  +--/* Too late, ksz_mib_read_work() may be called beforehand */
>        |              |  \--port->slave = ...
>        |             ...
>        |              +--Called for CPU port */
>        |              \--dsa_port_link_register_of()
>        |                 \--ksz_mac_link_down()
>        |                    +--/* mib_read must be initialized here */
>        |                    +--/* work is already scheduled, so it will be executed after 2000 ms */
>        |                    \--schedule_delayed_work(&dev->mib_read, 0);
>        \-- /* here port->slave is setup properly, scheduling the delayed work should be safe */
> 
> static void ksz_mib_read_work()
> \--netif_carrier_ok(dp->slave);  dp->slave has not been initialized yet
> 
> 
>   drivers/net/dsa/microchip/ksz_common.c | 16 +++++++++-------
>   1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
> index 8e755b50c9c1..a94d2278b95c 100644
> --- a/drivers/net/dsa/microchip/ksz_common.c
> +++ b/drivers/net/dsa/microchip/ksz_common.c
> @@ -103,14 +103,8 @@ void ksz_init_mib_timer(struct ksz_device *dev)
>   
>   	INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
>   
> -	/* Read MIB counters every 30 seconds to avoid overflow. */
> -	dev->mib_read_interval = msecs_to_jiffies(30000);
> -
>   	for (i = 0; i < dev->mib_port_cnt; i++)
>   		dev->dev_ops->port_init_cnt(dev, i);
> -
> -	/* Start the timer 2 seconds later. */
> -	schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
>   }
>   EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
>   
> @@ -143,7 +137,9 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
>   
>   	/* Read all MIB counters when the link is going down. */
>   	p->read = true;
> -	schedule_delayed_work(&dev->mib_read, 0);
> +	/* timer started */
> +	if (dev->mib_read_interval)
> +		schedule_delayed_work(&dev->mib_read, 0);
>   }
>   EXPORT_SYMBOL_GPL(ksz_mac_link_down);
>   
> @@ -446,6 +442,12 @@ int ksz_switch_register(struct ksz_device *dev,
>   		return ret;
>   	}
>   
> +	/* Read MIB counters every 30 seconds to avoid overflow. */
> +	dev->mib_read_interval = msecs_to_jiffies(30000);
> +
> +	/* Start the MIB timer. */
> +	schedule_delayed_work(&dev->mib_read, 0);
> +
>   	return 0;
>   }
>   EXPORT_SYMBOL(ksz_switch_register);
> 

-- 
Florian

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] net: dsa: microchip: fix race condition
  2020-10-05 16:08 [PATCH] net: dsa: microchip: fix race condition Christian Eggers
  2020-10-05 19:59 ` Florian Fainelli
@ 2020-10-05 20:55 ` Vladimir Oltean
  1 sibling, 0 replies; 4+ messages in thread
From: Vladimir Oltean @ 2020-10-05 20:55 UTC (permalink / raw)
  To: Christian Eggers
  Cc: Woojung Huh, Microchip Linux Driver Support, Andrew Lunn,
	Vivien Didelot, Florian Fainelli, David S . Miller,
	Jakub Kicinski, netdev, linux-kernel, stable

Hi Christian,

On Mon, Oct 05, 2020 at 06:08:29PM +0200, Christian Eggers wrote:
> Between queuing the delayed work and finishing the setup of the dsa
> ports, the process may sleep in request_module() and the queued work may
> be executed prior the initialization of the DSA ports is finished. In
              ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              "prior to the switch net devices being registered", maybe?
> ksz_mib_read_work(), a NULL dereference will happen within
> netof_carrier_ok(dp->slave).
> 
> Not queuing the delayed work in ksz_init_mib_timer() make things even
                                                       ~~~~
                                                       makes
> worse because the work will now be queued for immediate execution
> (instead of 2000 ms) in ksz_mac_link_down() via
> dsa_port_link_register_of().
> 
> Solution:
> 1. Do not queue (only initialize) delayed work in ksz_init_mib_timer().
> 2. Only queue delayed work in ksz_mac_link_down() if init is completed.
> 3. Queue work once in ksz_switch_register(), after dsa_register_switch()
> has completed.
> 
> Signed-off-by: Christian Eggers <ceggers@arri.de>
> Cc: stable@vger.kernel.org

For patches sent to the networking tree you should:
git format-patch --subject-prefix=
(a) "PATCH net-next" if it's a new feature (not applicable now)
(b) "PATCH net" if it's a bug fix (such is the case here)

Plus you should not Cc the stable mailing list, since David Miller deals
with sending patches to stable himself as long as you make sure to send
to his "net" tree as opposed to "net-next".

Read this for more details
https://www.kernel.org/doc/html/latest/networking/netdev-FAQ.html

> ---
> Call tree:

Please include the call path in the commit message, it is relevant that
request_module() is being called by phy_device_create(), and something
which you did not say in the verbal commit description.

FYI, you haven't even addressed the root cause of the problem, which is
ksz_mib_read_work sticking its nose where it's not supposed to:

		/* Only read MIB counters when the port is told to do.
		 * If not, read only dropped counters when link is not up.
		 */
		if (!p->read) {
			const struct dsa_port *dp = dsa_to_port(dev->ds, i);

			if (!netif_carrier_ok(dp->slave))
				mib->cnt_ptr = dev->reg_mib_cnt;
		}

This is simply Not Ok.
Not only the dp->slave is on purpose registered outside of the driver's
control (as you came to find out yourself), but not even all ports are
user ports. For example, the CPU port doesn't have a valid struct
net_device *slave pointer. You are just lucky that it's defined like
this:

struct dsa_port {
	/* A CPU port is physically connected to a master device.
	 * A user port exposed to userspace has a slave device.
	 */
	union {
		struct net_device *master;
		struct net_device *slave;
	};

so the code is in fact checking the status of the master interface's link.
But DSA doesn't assume that the *master and *slave pointers are under a
union. That can change any day, and when it changes, the KSZ driver will
break.

My personal feeling is that this driver hides a landmine beneath every
line of code, and it isn't getting better.
Sure, you should absolutely add the call stack to the commit message,
but how many people are going to git blame so they can see it. The code
needs to be obviously correct.

Things like needing to check dev->mib_read_interval as an indication
whether the race between ksz_mac_link_down and ksz_switch_register is
over are exactly the type of things that make it not fun to follow.

If reading MIB counters for ports that are down is such a "waste of time"
as per commit 7c6ff470aa867f53b8522a3a5c84c36ac7a20090, then how about
scheduling the delayed work from .phylink_mac_link_up, and canceling it
from .phylink_mac_link_down? Either that, or set a boolean variable to
struct ksz_port p->link_up, to true or false respectively from the
phylink callbacks, and using that as an indication whether to read the
MIB counters or not, instead of accessing the potentially invalid
dp->slave pointer? Would that work?

Sorry for rambling. I realize that there aren't probably a lot of things
you can do better to fix this problem for stable, but maybe you could
take some time and clean it up a little bit?

Thanks,
-Vladimir

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] net: dsa: microchip: fix race condition
@ 2020-10-27 19:45 Christian Eggers
  0 siblings, 0 replies; 4+ messages in thread
From: Christian Eggers @ 2020-10-27 19:45 UTC (permalink / raw)
  To: David S . Miller, Andrew Lunn, Vivien Didelot, Florian Fainelli
  Cc: netdev, linux-kernel, Christian Eggers, Vladimir Oltean, Jakub Kicinski

[ Upstream commit 8098bd69bc4e925070313b1b95d03510f4f24738 ]

Between queuing the delayed work and finishing the setup of the dsa
ports, the process may sleep in request_module() (via
phy_device_create()) and the queued work may be executed prior to the
switch net devices being registered. In ksz_mib_read_work(), a NULL
dereference will happen within netof_carrier_ok(dp->slave).

Not queuing the delayed work in ksz_init_mib_timer() makes things even
worse because the work will now be queued for immediate execution
(instead of 2000 ms) in ksz_mac_link_down() via
dsa_port_link_register_of().

Call tree:
ksz9477_i2c_probe()
\--ksz9477_switch_register()
   \--ksz_switch_register()
      +--dsa_register_switch()
      |  \--dsa_switch_probe()
      |     \--dsa_tree_setup()
      |        \--dsa_tree_setup_switches()
      |           +--dsa_switch_setup()
      |           |  +--ksz9477_setup()
      |           |  |  \--ksz_init_mib_timer()
      |           |  |     |--/* Start the timer 2 seconds later. */
      |           |  |     \--schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
      |           |  \--__mdiobus_register()
      |           |     \--mdiobus_scan()
      |           |        \--get_phy_device()
      |           |           +--get_phy_id()
      |           |           \--phy_device_create()
      |           |              |--/* sleeping, ksz_mib_read_work() can be called meanwhile */
      |           |              \--request_module()
      |           |
      |           \--dsa_port_setup()
      |              +--/* Called for non-CPU ports */
      |              +--dsa_slave_create()
      |              |  +--/* Too late, ksz_mib_read_work() may be called beforehand */
      |              |  \--port->slave = ...
      |             ...
      |              +--Called for CPU port */
      |              \--dsa_port_link_register_of()
      |                 \--ksz_mac_link_down()
      |                    +--/* mib_read must be initialized here */
      |                    +--/* work is already scheduled, so it will be executed after 2000 ms */
      |                    \--schedule_delayed_work(&dev->mib_read, 0);
      \-- /* here port->slave is setup properly, scheduling the delayed work should be safe */

Solution:
1. Do not queue (only initialize) delayed work in ksz_init_mib_timer().
2. Only queue delayed work in ksz_mac_link_down() if init is completed.
3. Queue work once in ksz_switch_register(), after dsa_register_switch()
has completed.

Fixes: 7c6ff470aa86 ("net: dsa: microchip: add MIB counter reading support")
Signed-off-by: Christian Eggers <ceggers@arri.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
This is a back port for 5.4. The original version has been applied to 5.8/5.9
a few hours ago.

Please decide whether the Reviewed-By: and Singed-off-by: tags shall be kept
or removed when forwarding this to stable.

regards
Christian

 drivers/net/dsa/microchip/ksz_common.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 7fabc0e3d807..b1a9d1012fc4 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -107,18 +107,11 @@ void ksz_init_mib_timer(struct ksz_device *dev)
 {
 	int i;
 
-	/* Read MIB counters every 30 seconds to avoid overflow. */
-	dev->mib_read_interval = msecs_to_jiffies(30000);
-
 	INIT_WORK(&dev->mib_read, ksz_mib_read_work);
 	timer_setup(&dev->mib_read_timer, mib_monitor, 0);
 
 	for (i = 0; i < dev->mib_port_cnt; i++)
 		dev->dev_ops->port_init_cnt(dev, i);
-
-	/* Start the timer 2 seconds later. */
-	dev->mib_read_timer.expires = jiffies + msecs_to_jiffies(2000);
-	add_timer(&dev->mib_read_timer);
 }
 EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
 
@@ -152,7 +145,9 @@ void ksz_adjust_link(struct dsa_switch *ds, int port,
 	/* Read all MIB counters when the link is going down. */
 	if (!phydev->link) {
 		p->read = true;
-		schedule_work(&dev->mib_read);
+		/* timer started */
+		if (dev->mib_read_interval)
+			schedule_work(&dev->mib_read);
 	}
 	mutex_lock(&dev->dev_mutex);
 	if (!phydev->link)
@@ -464,6 +459,13 @@ int ksz_switch_register(struct ksz_device *dev,
 		return ret;
 	}
 
+	/* Read MIB counters every 30 seconds to avoid overflow. */
+	dev->mib_read_interval = msecs_to_jiffies(30000);
+
+	/* Start the MIB timer. */
+	dev->mib_read_timer.expires = jiffies;
+	add_timer(&dev->mib_read_timer);
+
 	return 0;
 }
 EXPORT_SYMBOL(ksz_switch_register);
-- 
Christian Eggers
Embedded software developer

Arnold & Richter Cine Technik GmbH & Co. Betriebs KG
Sitz: Muenchen - Registergericht: Amtsgericht Muenchen - Handelsregisternummer: HRA 57918
Persoenlich haftender Gesellschafter: Arnold & Richter Cine Technik GmbH
Sitz: Muenchen - Registergericht: Amtsgericht Muenchen - Handelsregisternummer: HRB 54477
Geschaeftsfuehrer: Dr. Michael Neuhaeuser; Stephan Schenk; Walter Trauninger; Markus Zeiler


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-27 19:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-05 16:08 [PATCH] net: dsa: microchip: fix race condition Christian Eggers
2020-10-05 19:59 ` Florian Fainelli
2020-10-05 20:55 ` Vladimir Oltean
2020-10-27 19:45 Christian Eggers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).