All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] team: postpone features update to avoid deadlock
@ 2021-01-20 12:23 Ivan Vecera
  2021-01-20 13:40 ` Jiri Pirko
  2021-01-20 23:18 ` Cong Wang
  0 siblings, 2 replies; 7+ messages in thread
From: Ivan Vecera @ 2021-01-20 12:23 UTC (permalink / raw)
  To: netdev; +Cc: saeed, Jiri Pirko

Team driver protects port list traversal by its team->lock mutex
in functions like team_change_mtu(), team_set_rx_mode(),
team_vlan_rx_{add,del}_vid() etc.
These functions call appropriate callbacks of all enslaved
devices. Some drivers need to update their features under
certain conditions (e.g. TSO is broken for jumbo frames etc.) so
they call netdev_update_features(). This causes a deadlock because
netdev_update_features() calls netdevice notifiers and one of them
is team_device_event() that in case of NETDEV_FEAT_CHANGE tries lock
team->lock mutex again.

Example (r8169 case):
...
[ 6391.348202]  __mutex_lock.isra.6+0x2d0/0x4a0
[ 6391.358602]  team_device_event+0x9d/0x160 [team]
[ 6391.363756]  notifier_call_chain+0x47/0x70
[ 6391.368329]  netdev_update_features+0x56/0x60
[ 6391.373207]  rtl8169_change_mtu+0x14/0x50 [r8169]
[ 6391.378457]  dev_set_mtu_ext+0xe1/0x1d0
[ 6391.387022]  dev_set_mtu+0x52/0x90
[ 6391.390820]  team_change_mtu+0x64/0xf0 [team]
[ 6391.395683]  dev_set_mtu_ext+0xe1/0x1d0
[ 6391.399963]  do_setlink+0x231/0xf50
...

To fix the problem __team_compute_features() needs to be postponed
for these cases.

Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
---
 drivers/net/team/team.c | 36 +++++++++++++++++++++++++++++++++++-
 include/linux/if_team.h |  1 +
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index c19dac21c468..f66d38b0e70a 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -975,6 +975,10 @@ static void team_port_disable(struct team *team,
 	team_lower_state_changed(port);
 }
 
+/*******************
+ * Compute features
+ *******************/
+
 #define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
 			    NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \
 			    NETIF_F_HIGHDMA | NETIF_F_LRO)
@@ -1018,12 +1022,39 @@ static void __team_compute_features(struct team *team)
 		team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 }
 
-static void team_compute_features(struct team *team)
+static void team_compute_features_work(struct work_struct *work)
 {
+	struct team *team;
+
+	team = container_of(work, struct team, compute_features_task);
 	mutex_lock(&team->lock);
 	__team_compute_features(team);
 	mutex_unlock(&team->lock);
+
+	rtnl_lock();
 	netdev_change_features(team->dev);
+	rtnl_unlock();
+}
+
+static void team_compute_features(struct team *team)
+{
+	if (mutex_trylock(&team->lock)) {
+		__team_compute_features(team);
+		mutex_unlock(&team->lock);
+		netdev_change_features(team->dev);
+	} else {
+		schedule_work(&team->compute_features_task);
+	}
+}
+
+static void team_compute_features_init(struct team *team)
+{
+	INIT_WORK(&team->compute_features_task, team_compute_features_work);
+}
+
+static void team_compute_features_fini(struct team *team)
+{
+	cancel_work_sync(&team->compute_features_task);
 }
 
 static int team_port_enter(struct team *team, struct team_port *port)
@@ -1639,6 +1670,7 @@ static int team_init(struct net_device *dev)
 
 	team_notify_peers_init(team);
 	team_mcast_rejoin_init(team);
+	team_compute_features_init(team);
 
 	err = team_options_register(team, team_options, ARRAY_SIZE(team_options));
 	if (err)
@@ -1652,6 +1684,7 @@ static int team_init(struct net_device *dev)
 	return 0;
 
 err_options_register:
+	team_compute_features_fini(team);
 	team_mcast_rejoin_fini(team);
 	team_notify_peers_fini(team);
 	team_queue_override_fini(team);
@@ -1673,6 +1706,7 @@ static void team_uninit(struct net_device *dev)
 
 	__team_change_mode(team, NULL); /* cleanup */
 	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
+	team_compute_features_fini(team);
 	team_mcast_rejoin_fini(team);
 	team_notify_peers_fini(team);
 	team_queue_override_fini(team);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index add607943c95..581d79552bbd 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -208,6 +208,7 @@ struct team {
 	bool queue_override_enabled;
 	struct list_head *qom_lists; /* array of queue override mapping lists */
 	bool port_mtu_change_allowed;
+	struct work_struct compute_features_task;
 	struct {
 		unsigned int count;
 		unsigned int interval; /* in ms */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net] team: postpone features update to avoid deadlock
  2021-01-20 12:23 [PATCH net] team: postpone features update to avoid deadlock Ivan Vecera
@ 2021-01-20 13:40 ` Jiri Pirko
  2021-01-20 23:18 ` Cong Wang
  1 sibling, 0 replies; 7+ messages in thread
From: Jiri Pirko @ 2021-01-20 13:40 UTC (permalink / raw)
  To: Ivan Vecera; +Cc: netdev, saeed

Wed, Jan 20, 2021 at 01:23:54PM CET, ivecera@redhat.com wrote:
>Team driver protects port list traversal by its team->lock mutex
>in functions like team_change_mtu(), team_set_rx_mode(),
>team_vlan_rx_{add,del}_vid() etc.
>These functions call appropriate callbacks of all enslaved
>devices. Some drivers need to update their features under
>certain conditions (e.g. TSO is broken for jumbo frames etc.) so
>they call netdev_update_features(). This causes a deadlock because
>netdev_update_features() calls netdevice notifiers and one of them
>is team_device_event() that in case of NETDEV_FEAT_CHANGE tries lock
>team->lock mutex again.
>
>Example (r8169 case):
>...
>[ 6391.348202]  __mutex_lock.isra.6+0x2d0/0x4a0
>[ 6391.358602]  team_device_event+0x9d/0x160 [team]
>[ 6391.363756]  notifier_call_chain+0x47/0x70
>[ 6391.368329]  netdev_update_features+0x56/0x60
>[ 6391.373207]  rtl8169_change_mtu+0x14/0x50 [r8169]
>[ 6391.378457]  dev_set_mtu_ext+0xe1/0x1d0
>[ 6391.387022]  dev_set_mtu+0x52/0x90
>[ 6391.390820]  team_change_mtu+0x64/0xf0 [team]
>[ 6391.395683]  dev_set_mtu_ext+0xe1/0x1d0
>[ 6391.399963]  do_setlink+0x231/0xf50
>...
>
>To fix the problem __team_compute_features() needs to be postponed
>for these cases.
>
>Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
>Cc: Jiri Pirko <jiri@resnulli.us>
>Signed-off-by: Ivan Vecera <ivecera@redhat.com>

Reviewed-by: Jiri Pirko <jiri@nvidia.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] team: postpone features update to avoid deadlock
  2021-01-20 12:23 [PATCH net] team: postpone features update to avoid deadlock Ivan Vecera
  2021-01-20 13:40 ` Jiri Pirko
@ 2021-01-20 23:18 ` Cong Wang
  2021-01-21 10:29   ` Ivan Vecera
  1 sibling, 1 reply; 7+ messages in thread
From: Cong Wang @ 2021-01-20 23:18 UTC (permalink / raw)
  To: Ivan Vecera; +Cc: Linux Kernel Network Developers, saeed, Jiri Pirko

On Wed, Jan 20, 2021 at 4:56 AM Ivan Vecera <ivecera@redhat.com> wrote:
>
> To fix the problem __team_compute_features() needs to be postponed
> for these cases.

Is there any user-visible effect after deferring this feature change?

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] team: postpone features update to avoid deadlock
  2021-01-20 23:18 ` Cong Wang
@ 2021-01-21 10:29   ` Ivan Vecera
  2021-01-22  2:34     ` Jakub Kicinski
  0 siblings, 1 reply; 7+ messages in thread
From: Ivan Vecera @ 2021-01-21 10:29 UTC (permalink / raw)
  To: Cong Wang; +Cc: Linux Kernel Network Developers, saeed, Jiri Pirko

On Wed, 20 Jan 2021 15:18:20 -0800
Cong Wang <xiyou.wangcong@gmail.com> wrote:

> On Wed, Jan 20, 2021 at 4:56 AM Ivan Vecera <ivecera@redhat.com> wrote:
> >
> > To fix the problem __team_compute_features() needs to be postponed
> > for these cases.  
> 
> Is there any user-visible effect after deferring this feature change?
> 
An user should not notice this change.

I.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] team: postpone features update to avoid deadlock
  2021-01-21 10:29   ` Ivan Vecera
@ 2021-01-22  2:34     ` Jakub Kicinski
  2021-01-22  8:30       ` Ivan Vecera
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Kicinski @ 2021-01-22  2:34 UTC (permalink / raw)
  To: Ivan Vecera; +Cc: Cong Wang, Linux Kernel Network Developers, saeed, Jiri Pirko

On Thu, 21 Jan 2021 11:29:37 +0100 Ivan Vecera wrote:
> On Wed, 20 Jan 2021 15:18:20 -0800
> Cong Wang <xiyou.wangcong@gmail.com> wrote:
> > On Wed, Jan 20, 2021 at 4:56 AM Ivan Vecera <ivecera@redhat.com> wrote:  
> > > Team driver protects port list traversal by its team->lock mutex
> > > in functions like team_change_mtu(), team_set_rx_mode(),

The set_rx_mode part can't be true, set_rx_mode can't sleep and
team->lock is a mutex.

> > > To fix the problem __team_compute_features() needs to be postponed
> > > for these cases.    
> > 
> > Is there any user-visible effect after deferring this feature change?
>
> An user should not notice this change.

I think Cong is right, can you expand a little on your assertion?
User should be able to assume that the moment syscall returns the
features had settled.

What does team->mutex actually protect in team_compute_features()?
All callers seem to hold RTNL at a quick glance. This is a bit of 
a long shot but isn't it just tryin to protect the iteration over 
ports which could be under RCU?

More crude idea would be to wrap the mutex_unlock(&team->lock) into 
a helper which checks if something tried to change features while it
was locked. rtnl_unlock()-style.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] team: postpone features update to avoid deadlock
  2021-01-22  2:34     ` Jakub Kicinski
@ 2021-01-22  8:30       ` Ivan Vecera
  2021-01-22 18:03         ` Jakub Kicinski
  0 siblings, 1 reply; 7+ messages in thread
From: Ivan Vecera @ 2021-01-22  8:30 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Cong Wang, Linux Kernel Network Developers, saeed, Jiri Pirko

On Thu, 21 Jan 2021 18:34:52 -0800
Jakub Kicinski <kuba@kernel.org> wrote:

> On Thu, 21 Jan 2021 11:29:37 +0100 Ivan Vecera wrote:
> > On Wed, 20 Jan 2021 15:18:20 -0800
> > Cong Wang <xiyou.wangcong@gmail.com> wrote:  
> > > On Wed, Jan 20, 2021 at 4:56 AM Ivan Vecera <ivecera@redhat.com> wrote:    
> > > > Team driver protects port list traversal by its team->lock mutex
> > > > in functions like team_change_mtu(), team_set_rx_mode(),  
> 
> The set_rx_mode part can't be true, set_rx_mode can't sleep and
> team->lock is a mutex.
> 
> > > > To fix the problem __team_compute_features() needs to be postponed
> > > > for these cases.      
> > > 
> > > Is there any user-visible effect after deferring this feature change?  
> >
> > An user should not notice this change.  
> 
> I think Cong is right, can you expand a little on your assertion?
> User should be able to assume that the moment syscall returns the
> features had settled.
> 
> What does team->mutex actually protect in team_compute_features()?
> All callers seem to hold RTNL at a quick glance. This is a bit of 
> a long shot but isn't it just tryin to protect the iteration over 
> ports which could be under RCU?

In fact the mutex could be removed at all because all port-list
writers are running under rtnl_lock, some readers like team_change_mtu()
or team_device_event() [notifier] as well and hot path readers are
protected by RCU.
I have discussed this with Jiri but he don't want to introduce any dependency
on RTNL to team as it was designed as RTNL-independent from beginning.

Anyway your idea to run team_compute_features under RCU could be fine
as subsequent __team_compute_features() cannot sleep...

Do you mean something like this?

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index c19dac21c468..dd7917cab2b1 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -992,7 +992,8 @@ static void __team_compute_features(struct team *team)
        unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
                                        IFF_XMIT_DST_RELEASE_PERM;
 
-       list_for_each_entry(port, &team->port_list, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(port, &team->port_list, list) {
                vlan_features = netdev_increment_features(vlan_features,
                                        port->dev->vlan_features,
                                        TEAM_VLAN_FEATURES);
@@ -1006,6 +1007,7 @@ static void __team_compute_features(struct team *team)
                if (port->dev->hard_header_len > max_hard_header_len)
                        max_hard_header_len = port->dev->hard_header_len;
        }
+       rcu_read_unlock();
 
        team->dev->vlan_features = vlan_features;
        team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
@@ -1020,9 +1022,7 @@ static void __team_compute_features(struct team *team)
 
 static void team_compute_features(struct team *team)
 {
-       mutex_lock(&team->lock);
        __team_compute_features(team);
-       mutex_unlock(&team->lock);
        netdev_change_features(team->dev);
 }

Thanks for comments,

Ivan


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net] team: postpone features update to avoid deadlock
  2021-01-22  8:30       ` Ivan Vecera
@ 2021-01-22 18:03         ` Jakub Kicinski
  0 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2021-01-22 18:03 UTC (permalink / raw)
  To: Ivan Vecera; +Cc: Cong Wang, Linux Kernel Network Developers, saeed, Jiri Pirko

On Fri, 22 Jan 2021 09:30:27 +0100 Ivan Vecera wrote:
> On Thu, 21 Jan 2021 18:34:52 -0800
> Jakub Kicinski <kuba@kernel.org> wrote:
> 
> > On Thu, 21 Jan 2021 11:29:37 +0100 Ivan Vecera wrote:  
> > > On Wed, 20 Jan 2021 15:18:20 -0800
> > > Cong Wang <xiyou.wangcong@gmail.com> wrote:    
> > > > On Wed, Jan 20, 2021 at 4:56 AM Ivan Vecera <ivecera@redhat.com> wrote:      
> > > > > Team driver protects port list traversal by its team->lock mutex
> > > > > in functions like team_change_mtu(), team_set_rx_mode(),    
> > 
> > The set_rx_mode part can't be true, set_rx_mode can't sleep and
> > team->lock is a mutex.
> >   
> > > > > To fix the problem __team_compute_features() needs to be postponed
> > > > > for these cases.        
> > > > 
> > > > Is there any user-visible effect after deferring this feature change?    
> > >
> > > An user should not notice this change.    
> > 
> > I think Cong is right, can you expand a little on your assertion?
> > User should be able to assume that the moment syscall returns the
> > features had settled.
> > 
> > What does team->mutex actually protect in team_compute_features()?
> > All callers seem to hold RTNL at a quick glance. This is a bit of 
> > a long shot but isn't it just tryin to protect the iteration over 
> > ports which could be under RCU?  
> 
> In fact the mutex could be removed at all because all port-list
> writers are running under rtnl_lock, some readers like team_change_mtu()
> or team_device_event() [notifier] as well and hot path readers are
> protected by RCU.
> I have discussed this with Jiri but he don't want to introduce any dependency
> on RTNL to team as it was designed as RTNL-independent from beginning.
> 
> Anyway your idea to run team_compute_features under RCU could be fine
> as subsequent __team_compute_features() cannot sleep...
> 
> Do you mean something like this?
> 
> diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
> index c19dac21c468..dd7917cab2b1 100644
> --- a/drivers/net/team/team.c
> +++ b/drivers/net/team/team.c
> @@ -992,7 +992,8 @@ static void __team_compute_features(struct team *team)
>         unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
>                                         IFF_XMIT_DST_RELEASE_PERM;
>  
> -       list_for_each_entry(port, &team->port_list, list) {
> +       rcu_read_lock();
> +       list_for_each_entry_rcu(port, &team->port_list, list) {
>                 vlan_features = netdev_increment_features(vlan_features,
>                                         port->dev->vlan_features,
>                                         TEAM_VLAN_FEATURES);
> @@ -1006,6 +1007,7 @@ static void __team_compute_features(struct team *team)
>                 if (port->dev->hard_header_len > max_hard_header_len)
>                         max_hard_header_len = port->dev->hard_header_len;
>         }
> +       rcu_read_unlock();
>  
>         team->dev->vlan_features = vlan_features;
>         team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
> @@ -1020,9 +1022,7 @@ static void __team_compute_features(struct team *team)
>  
>  static void team_compute_features(struct team *team)
>  {
> -       mutex_lock(&team->lock);
>         __team_compute_features(team);
> -       mutex_unlock(&team->lock);
>         netdev_change_features(team->dev);
>  }

Yup, like this, but if Jiri doesn't like it then I guess we need to
come up with something else?

How about doing the work on unlock? Have some bit set when we had to
defer and then run __team_compute_features() before releasing the lock
for real?

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-01-22 18:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-20 12:23 [PATCH net] team: postpone features update to avoid deadlock Ivan Vecera
2021-01-20 13:40 ` Jiri Pirko
2021-01-20 23:18 ` Cong Wang
2021-01-21 10:29   ` Ivan Vecera
2021-01-22  2:34     ` Jakub Kicinski
2021-01-22  8:30       ` Ivan Vecera
2021-01-22 18:03         ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.