All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch net] net: fix dev_ifsioc_locked() race condition
@ 2021-01-24  1:30 Cong Wang
  2021-01-25 19:43 ` Gong, Sishuai
  2021-01-28 20:55 ` Jakub Kicinski
  0 siblings, 2 replies; 9+ messages in thread
From: Cong Wang @ 2021-01-24  1:30 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, Gong, Sishuai, Eric Dumazet

From: Cong Wang <cong.wang@bytedance.com>

dev_ifsioc_locked() is called with only RCU read lock, so when
there is a parallel writer changing the mac address, it could
get a partially updated mac address, as shown below:

Thread 1			Thread 2
// eth_commit_mac_addr_change()
memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
				// dev_ifsioc_locked()
				memcpy(ifr->ifr_hwaddr.sa_data,
					dev->dev_addr,...);

Close this race condition by guarding them with a RW semaphore,
like netdev_get_name(). The writers take RTNL anyway, so this
will not affect the slow path.

Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 39 ++++++++++++++++++++++++++++++++++++---
 net/core/dev_ioctl.c      | 18 ++++++------------
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 259be67644e3..7a871f2dcc03 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3918,6 +3918,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 			      struct netlink_ext_ack *extack);
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 			struct netlink_ext_ack *extack);
+int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_item_id *ppid);
diff --git a/net/core/dev.c b/net/core/dev.c
index a979b86dbacd..55c0db7704c7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8709,6 +8709,8 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 }
 EXPORT_SYMBOL(dev_pre_changeaddr_notify);
 
+static DECLARE_RWSEM(dev_addr_sem);
+
 /**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
@@ -8729,19 +8731,50 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
+
+	down_write(&dev_addr_sem);
 	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
 	if (err)
-		return err;
+		goto out;
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
-		return err;
+		goto out;
 	dev->addr_assign_type = NET_ADDR_SET;
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
-	return 0;
+out:
+	up_write(&dev_addr_sem);
+	return err;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 
+int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
+{
+	size_t size = sizeof(sa->sa_data);
+	struct net_device *dev;
+	int ret = 0;
+
+	down_read(&dev_addr_sem);
+	rcu_read_lock();
+
+	dev = dev_get_by_name_rcu(net, dev_name);
+	if (!dev) {
+		ret = -ENODEV;
+		goto unlock;
+	}
+	if (!dev->addr_len)
+		memset(sa->sa_data, 0, size);
+	else
+		memcpy(sa->sa_data, dev->dev_addr,
+		       min_t(size_t, size, dev->addr_len));
+	sa->sa_family = dev->type;
+
+unlock:
+	rcu_read_unlock();
+	up_read(&dev_addr_sem);
+	return ret;
+}
+
 /**
  *	dev_change_carrier - Change device carrier
  *	@dev: device
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index db8a0ff86f36..bfa0dbd3d36f 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -123,17 +123,6 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
 		ifr->ifr_mtu = dev->mtu;
 		return 0;
 
-	case SIOCGIFHWADDR:
-		if (!dev->addr_len)
-			memset(ifr->ifr_hwaddr.sa_data, 0,
-			       sizeof(ifr->ifr_hwaddr.sa_data));
-		else
-			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-			       min(sizeof(ifr->ifr_hwaddr.sa_data),
-				   (size_t)dev->addr_len));
-		ifr->ifr_hwaddr.sa_family = dev->type;
-		return 0;
-
 	case SIOCGIFSLAVE:
 		err = -EINVAL;
 		break;
@@ -418,6 +407,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	 */
 
 	switch (cmd) {
+	case SIOCGIFHWADDR:
+		dev_load(net, ifr->ifr_name);
+		ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
+		if (colon)
+			*colon = ':';
+		return ret;
 	/*
 	 *	These ioctl calls:
 	 *	- can be done by all.
@@ -427,7 +422,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGIFFLAGS:
 	case SIOCGIFMETRIC:
 	case SIOCGIFMTU:
-	case SIOCGIFHWADDR:
 	case SIOCGIFSLAVE:
 	case SIOCGIFMAP:
 	case SIOCGIFINDEX:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-24  1:30 [Patch net] net: fix dev_ifsioc_locked() race condition Cong Wang
@ 2021-01-25 19:43 ` Gong, Sishuai
  2021-01-28  1:42   ` Cong Wang
  2021-01-28 20:55 ` Jakub Kicinski
  1 sibling, 1 reply; 9+ messages in thread
From: Gong, Sishuai @ 2021-01-25 19:43 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, Cong Wang, Eric Dumazet

Hi,

We also found another pair of writer and reader that may suffer the same problem.

A data race may also happen on the variable netdev->dev_addr when functions e1000_set_mac() and packet_getname() run in parallel, eventually it could return a partially updated MAC address to the user, as shown below:

Thread 1								Thread 2
									// packet_getname()
									memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
//e1000_set_mac()
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);

And the calling stacks are:

Writer calling trace
- __sys_sendmsg
-- ___sys_sendmsg
--- sock_sendmsg
---- netlink_unicast
----- netlink_rcv_skb 
------ __rtnl_newlink
------- do_setlink
-------- dev_set_mac_address

Reader calling trace
- __sys_getsockname

Since e1000_set_mac() is also called by dev_set_mac_address(), the writer can be protected already with this patch. The reader, however, needs to grab the same semaphore to close the race condition.


Thanks,
Sishuai

> On Jan 23, 2021, at 8:30 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> 
> From: Cong Wang <cong.wang@bytedance.com>
> 
> dev_ifsioc_locked() is called with only RCU read lock, so when
> there is a parallel writer changing the mac address, it could
> get a partially updated mac address, as shown below:
> 
> Thread 1			Thread 2
> // eth_commit_mac_addr_change()
> memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> 				// dev_ifsioc_locked()
> 				memcpy(ifr->ifr_hwaddr.sa_data,
> 					dev->dev_addr,...);
> 
> Close this race condition by guarding them with a RW semaphore,
> like netdev_get_name(). The writers take RTNL anyway, so this
> will not affect the slow path.
> 
> Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
> include/linux/netdevice.h |  1 +
> net/core/dev.c            | 39 ++++++++++++++++++++++++++++++++++++---
> net/core/dev_ioctl.c      | 18 ++++++------------
> 3 files changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 259be67644e3..7a871f2dcc03 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3918,6 +3918,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
> 			      struct netlink_ext_ack *extack);
> int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
> 			struct netlink_ext_ack *extack);
> +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
> int dev_change_carrier(struct net_device *, bool new_carrier);
> int dev_get_phys_port_id(struct net_device *dev,
> 			 struct netdev_phys_item_id *ppid);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a979b86dbacd..55c0db7704c7 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -8709,6 +8709,8 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
> }
> EXPORT_SYMBOL(dev_pre_changeaddr_notify);
> 
> +static DECLARE_RWSEM(dev_addr_sem);
> +
> /**
>  *	dev_set_mac_address - Change Media Access Control Address
>  *	@dev: device
> @@ -8729,19 +8731,50 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
> 		return -EINVAL;
> 	if (!netif_device_present(dev))
> 		return -ENODEV;
> +
> +	down_write(&dev_addr_sem);
> 	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
> 	if (err)
> -		return err;
> +		goto out;
> 	err = ops->ndo_set_mac_address(dev, sa);
> 	if (err)
> -		return err;
> +		goto out;
> 	dev->addr_assign_type = NET_ADDR_SET;
> 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
> 	add_device_randomness(dev->dev_addr, dev->addr_len);
> -	return 0;
> +out:
> +	up_write(&dev_addr_sem);
> +	return err;
> }
> EXPORT_SYMBOL(dev_set_mac_address);
> 
> +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
> +{
> +	size_t size = sizeof(sa->sa_data);
> +	struct net_device *dev;
> +	int ret = 0;
> +
> +	down_read(&dev_addr_sem);
> +	rcu_read_lock();
> +
> +	dev = dev_get_by_name_rcu(net, dev_name);
> +	if (!dev) {
> +		ret = -ENODEV;
> +		goto unlock;
> +	}
> +	if (!dev->addr_len)
> +		memset(sa->sa_data, 0, size);
> +	else
> +		memcpy(sa->sa_data, dev->dev_addr,
> +		       min_t(size_t, size, dev->addr_len));
> +	sa->sa_family = dev->type;
> +
> +unlock:
> +	rcu_read_unlock();
> +	up_read(&dev_addr_sem);
> +	return ret;
> +}
> +
> /**
>  *	dev_change_carrier - Change device carrier
>  *	@dev: device
> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
> index db8a0ff86f36..bfa0dbd3d36f 100644
> --- a/net/core/dev_ioctl.c
> +++ b/net/core/dev_ioctl.c
> @@ -123,17 +123,6 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
> 		ifr->ifr_mtu = dev->mtu;
> 		return 0;
> 
> -	case SIOCGIFHWADDR:
> -		if (!dev->addr_len)
> -			memset(ifr->ifr_hwaddr.sa_data, 0,
> -			       sizeof(ifr->ifr_hwaddr.sa_data));
> -		else
> -			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
> -			       min(sizeof(ifr->ifr_hwaddr.sa_data),
> -				   (size_t)dev->addr_len));
> -		ifr->ifr_hwaddr.sa_family = dev->type;
> -		return 0;
> -
> 	case SIOCGIFSLAVE:
> 		err = -EINVAL;
> 		break;
> @@ -418,6 +407,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
> 	 */
> 
> 	switch (cmd) {
> +	case SIOCGIFHWADDR:
> +		dev_load(net, ifr->ifr_name);
> +		ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
> +		if (colon)
> +			*colon = ':';
> +		return ret;
> 	/*
> 	 *	These ioctl calls:
> 	 *	- can be done by all.
> @@ -427,7 +422,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
> 	case SIOCGIFFLAGS:
> 	case SIOCGIFMETRIC:
> 	case SIOCGIFMTU:
> -	case SIOCGIFHWADDR:
> 	case SIOCGIFSLAVE:
> 	case SIOCGIFMAP:
> 	case SIOCGIFINDEX:
> -- 
> 2.25.1
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-25 19:43 ` Gong, Sishuai
@ 2021-01-28  1:42   ` Cong Wang
  0 siblings, 0 replies; 9+ messages in thread
From: Cong Wang @ 2021-01-28  1:42 UTC (permalink / raw)
  To: Gong, Sishuai; +Cc: netdev, Cong Wang, Eric Dumazet

On Mon, Jan 25, 2021 at 11:43 AM Gong, Sishuai <sishuai@purdue.edu> wrote:
>
> Hi,
>
> We also found another pair of writer and reader that may suffer the same problem.
>
> A data race may also happen on the variable netdev->dev_addr when functions e1000_set_mac() and packet_getname() run in parallel, eventually it could return a partially updated MAC address to the user, as shown below:

Yeah, this one requires a separate patch, because at least
it uses an index instead of a name to lookup the devices.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-24  1:30 [Patch net] net: fix dev_ifsioc_locked() race condition Cong Wang
  2021-01-25 19:43 ` Gong, Sishuai
@ 2021-01-28 20:55 ` Jakub Kicinski
  2021-01-29  5:08   ` Cong Wang
  1 sibling, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2021-01-28 20:55 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, Cong Wang, Gong, Sishuai, Eric Dumazet

On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
> 
> dev_ifsioc_locked() is called with only RCU read lock, so when
> there is a parallel writer changing the mac address, it could
> get a partially updated mac address, as shown below:
> 
> Thread 1			Thread 2
> // eth_commit_mac_addr_change()
> memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> 				// dev_ifsioc_locked()
> 				memcpy(ifr->ifr_hwaddr.sa_data,
> 					dev->dev_addr,...);
> 
> Close this race condition by guarding them with a RW semaphore,
> like netdev_get_name(). The writers take RTNL anyway, so this
> will not affect the slow path.
> 
> Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

The addition of the write lock scares me a little for a fix, there's a
lot of code which can potentially run under the callbacks and notifiers
there.

What about using a seqlock?

> +static DECLARE_RWSEM(dev_addr_sem);
> +
>  /**
>   *	dev_set_mac_address - Change Media Access Control Address
>   *	@dev: device
> @@ -8729,19 +8731,50 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
>  		return -EINVAL;
>  	if (!netif_device_present(dev))
>  		return -ENODEV;
> +
> +	down_write(&dev_addr_sem);
>  	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
>  	if (err)
> -		return err;
> +		goto out;
>  	err = ops->ndo_set_mac_address(dev, sa);
>  	if (err)
> -		return err;
> +		goto out;
>  	dev->addr_assign_type = NET_ADDR_SET;
>  	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
>  	add_device_randomness(dev->dev_addr, dev->addr_len);
> -	return 0;
> +out:
> +	up_write(&dev_addr_sem);
> +	return err;
>  }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-28 20:55 ` Jakub Kicinski
@ 2021-01-29  5:08   ` Cong Wang
  2021-01-29  5:21     ` Jakub Kicinski
  0 siblings, 1 reply; 9+ messages in thread
From: Cong Wang @ 2021-01-29  5:08 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Linux Kernel Network Developers, Cong Wang, Gong, Sishuai, Eric Dumazet

On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > dev_ifsioc_locked() is called with only RCU read lock, so when
> > there is a parallel writer changing the mac address, it could
> > get a partially updated mac address, as shown below:
> >
> > Thread 1                      Thread 2
> > // eth_commit_mac_addr_change()
> > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> >                               // dev_ifsioc_locked()
> >                               memcpy(ifr->ifr_hwaddr.sa_data,
> >                                       dev->dev_addr,...);
> >
> > Close this race condition by guarding them with a RW semaphore,
> > like netdev_get_name(). The writers take RTNL anyway, so this
> > will not affect the slow path.
> >
> > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
>
> The addition of the write lock scares me a little for a fix, there's a
> lot of code which can potentially run under the callbacks and notifiers
> there.
>
> What about using a seqlock?

Actually I did use seqlock in my initial version (not posted), it does not
allow blocking inside write_seqlock() protection, so I have to change
to rwsem.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-29  5:08   ` Cong Wang
@ 2021-01-29  5:21     ` Jakub Kicinski
  2021-01-29  5:47       ` Cong Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2021-01-29  5:21 UTC (permalink / raw)
  To: Cong Wang
  Cc: Linux Kernel Network Developers, Cong Wang, Gong, Sishuai, Eric Dumazet

On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:
> On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >
> > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:  
> > > From: Cong Wang <cong.wang@bytedance.com>
> > >
> > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > there is a parallel writer changing the mac address, it could
> > > get a partially updated mac address, as shown below:
> > >
> > > Thread 1                      Thread 2
> > > // eth_commit_mac_addr_change()
> > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > >                               // dev_ifsioc_locked()
> > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > >                                       dev->dev_addr,...);
> > >
> > > Close this race condition by guarding them with a RW semaphore,
> > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > will not affect the slow path.
> > >
> > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>  
> >
> > The addition of the write lock scares me a little for a fix, there's a
> > lot of code which can potentially run under the callbacks and notifiers
> > there.
> >
> > What about using a seqlock?  
> 
> Actually I did use seqlock in my initial version (not posted), it does not
> allow blocking inside write_seqlock() protection, so I have to change
> to rwsem.

Argh, you're right. No way we can construct something that tries to
read once and if it fails falls back to waiting for RTNL?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-29  5:21     ` Jakub Kicinski
@ 2021-01-29  5:47       ` Cong Wang
  2021-01-29 18:00         ` Jakub Kicinski
  0 siblings, 1 reply; 9+ messages in thread
From: Cong Wang @ 2021-01-29  5:47 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Linux Kernel Network Developers, Cong Wang, Gong, Sishuai, Eric Dumazet

On Thu, Jan 28, 2021 at 9:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:
> > On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > >
> > > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> > > > From: Cong Wang <cong.wang@bytedance.com>
> > > >
> > > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > > there is a parallel writer changing the mac address, it could
> > > > get a partially updated mac address, as shown below:
> > > >
> > > > Thread 1                      Thread 2
> > > > // eth_commit_mac_addr_change()
> > > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > > >                               // dev_ifsioc_locked()
> > > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > > >                                       dev->dev_addr,...);
> > > >
> > > > Close this race condition by guarding them with a RW semaphore,
> > > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > > will not affect the slow path.
> > > >
> > > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > >
> > > The addition of the write lock scares me a little for a fix, there's a
> > > lot of code which can potentially run under the callbacks and notifiers
> > > there.
> > >
> > > What about using a seqlock?
> >
> > Actually I did use seqlock in my initial version (not posted), it does not
> > allow blocking inside write_seqlock() protection, so I have to change
> > to rwsem.
>
> Argh, you're right. No way we can construct something that tries to
> read once and if it fails falls back to waiting for RTNL?

I don't think there is any way to tell whether the read fails, a partially
updated address can not be detected without additional flags etc..

And devnet_rename_sem is already there, pretty much similar to this
one.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-29  5:47       ` Cong Wang
@ 2021-01-29 18:00         ` Jakub Kicinski
  2021-01-29 23:12           ` Cong Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2021-01-29 18:00 UTC (permalink / raw)
  To: Cong Wang
  Cc: Linux Kernel Network Developers, Cong Wang, Gong, Sishuai, Eric Dumazet

On Thu, 28 Jan 2021 21:47:04 -0800 Cong Wang wrote:
> On Thu, Jan 28, 2021 at 9:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:  
> > > On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:  
> > > >
> > > > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:  
> > > > > From: Cong Wang <cong.wang@bytedance.com>
> > > > >
> > > > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > > > there is a parallel writer changing the mac address, it could
> > > > > get a partially updated mac address, as shown below:
> > > > >
> > > > > Thread 1                      Thread 2
> > > > > // eth_commit_mac_addr_change()
> > > > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > > > >                               // dev_ifsioc_locked()
> > > > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > > > >                                       dev->dev_addr,...);
> > > > >
> > > > > Close this race condition by guarding them with a RW semaphore,
> > > > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > > > will not affect the slow path.
> > > > >
> > > > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>  
> > > >
> > > > The addition of the write lock scares me a little for a fix, there's a
> > > > lot of code which can potentially run under the callbacks and notifiers
> > > > there.
> > > >
> > > > What about using a seqlock?  
> > >
> > > Actually I did use seqlock in my initial version (not posted), it does not
> > > allow blocking inside write_seqlock() protection, so I have to change
> > > to rwsem.  
> >
> > Argh, you're right. No way we can construct something that tries to
> > read once and if it fails falls back to waiting for RTNL?  
> 
> I don't think there is any way to tell whether the read fails, a partially
> updated address can not be detected without additional flags etc..

Let me pseudo code it, I can't English that well:

void reader(obj)
{
	unsigned int seq;

	seq = READ_ONCE(seqcnt);
	if (seq & 1)
		goto slow_path;
	smb_rmb();

	obj = read_the_thing();

	smb_rmb();
	if (seq == READ_ONCE(seqcnt))
		return;

slow_path:
	rtnl_lock();
	obj = read_the_thing();
	rtnl_unlock();
}

void writer()
{
	ASSERT_RNTL();

	seqcnt++;
	smb_wmb();

	modify_the_thing();

	smb_wmb();
	seqcnt++;
}


I think raw_seqcount helpers should do here?

> And devnet_rename_sem is already there, pretty much similar to this
> one.

Ack, I don't see rename triggering cascading notifications, tho.
I think you've seen the recent patch for locking in team, that's 
pretty much what I'm afraid will happen here. 

But if I'm missing something about the seqcount or you strongly prefer
the rwlock, we can do that, too. Although I'd rather take this patch 
to net-next in that case.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch net] net: fix dev_ifsioc_locked() race condition
  2021-01-29 18:00         ` Jakub Kicinski
@ 2021-01-29 23:12           ` Cong Wang
  0 siblings, 0 replies; 9+ messages in thread
From: Cong Wang @ 2021-01-29 23:12 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Linux Kernel Network Developers, Cong Wang, Gong, Sishuai, Eric Dumazet

On Fri, Jan 29, 2021 at 10:00 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 28 Jan 2021 21:47:04 -0800 Cong Wang wrote:
> > On Thu, Jan 28, 2021 at 9:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > > On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:
> > > > On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > > > >
> > > > > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> > > > > > From: Cong Wang <cong.wang@bytedance.com>
> > > > > >
> > > > > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > > > > there is a parallel writer changing the mac address, it could
> > > > > > get a partially updated mac address, as shown below:
> > > > > >
> > > > > > Thread 1                      Thread 2
> > > > > > // eth_commit_mac_addr_change()
> > > > > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > > > > >                               // dev_ifsioc_locked()
> > > > > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > > > > >                                       dev->dev_addr,...);
> > > > > >
> > > > > > Close this race condition by guarding them with a RW semaphore,
> > > > > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > > > > will not affect the slow path.
> > > > > >
> > > > > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > > > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > > > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > > > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > > > >
> > > > > The addition of the write lock scares me a little for a fix, there's a
> > > > > lot of code which can potentially run under the callbacks and notifiers
> > > > > there.
> > > > >
> > > > > What about using a seqlock?
> > > >
> > > > Actually I did use seqlock in my initial version (not posted), it does not
> > > > allow blocking inside write_seqlock() protection, so I have to change
> > > > to rwsem.
> > >
> > > Argh, you're right. No way we can construct something that tries to
> > > read once and if it fails falls back to waiting for RTNL?
> >
> > I don't think there is any way to tell whether the read fails, a partially
> > updated address can not be detected without additional flags etc..
>
> Let me pseudo code it, I can't English that well:
>
> void reader(obj)
> {
>         unsigned int seq;
>
>         seq = READ_ONCE(seqcnt);
>         if (seq & 1)
>                 goto slow_path;
>         smb_rmb();
>
>         obj = read_the_thing();
>
>         smb_rmb();
>         if (seq == READ_ONCE(seqcnt))
>                 return;
>
> slow_path:
>         rtnl_lock();
>         obj = read_the_thing();
>         rtnl_unlock();
> }
>
> void writer()
> {
>         ASSERT_RNTL();
>
>         seqcnt++;
>         smb_wmb();
>
>         modify_the_thing();
>
>         smb_wmb();
>         seqcnt++;
> }
>
>
> I think raw_seqcount helpers should do here?

Not sure why you want to kinda rewrite seqlock here. Please see below.

>
> > And devnet_rename_sem is already there, pretty much similar to this
> > one.
>
> Ack, I don't see rename triggering cascading notifications, tho.
> I think you've seen the recent patch for locking in team, that's
> pretty much what I'm afraid will happen here.

Right, I should make only user-facing callers (ioctl, rtnetlink) use this
writer semaphore, and leave other callers of dev_set_mac_address()
untouched. Something like:

dev_set_mac_address_locked(...)
{
  down_write(&dev_addr_sem);
  dev_set_mac_address(...);
  up_write(&dev_addr_sem);
  ...
}

With this, we don't need to reinvent seqlock.

>
> But if I'm missing something about the seqcount or you strongly prefer
> the rwlock, we can do that, too. Although I'd rather take this patch
> to net-next in that case.

I have no problem with applying to net-next. I will send v2 targeting
net-next instead.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-01-29 23:13 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-24  1:30 [Patch net] net: fix dev_ifsioc_locked() race condition Cong Wang
2021-01-25 19:43 ` Gong, Sishuai
2021-01-28  1:42   ` Cong Wang
2021-01-28 20:55 ` Jakub Kicinski
2021-01-29  5:08   ` Cong Wang
2021-01-29  5:21     ` Jakub Kicinski
2021-01-29  5:47       ` Cong Wang
2021-01-29 18:00         ` Jakub Kicinski
2021-01-29 23:12           ` Cong Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.