netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations
@ 2020-01-11 16:36 Taehee Yoo
  2020-01-12 14:19 ` Jakub Kicinski
  0 siblings, 1 reply; 6+ messages in thread
From: Taehee Yoo @ 2020-01-11 16:36 UTC (permalink / raw)
  To: davem, jakub.kicinski, netdev; +Cc: ap420073

netdevsim basic operations are called with sysfs.

Create netdevsim device:
echo "1 1" > /sys/bus/netdevsim/new_device
Delete netdevsim device:
echo 1 > /sys/bus/netdevsim/del_device
Create netdevsim port:
echo 4 > /sys/devices/netdevsim1/new_port
Delete netdevsim port:
echo 4 > /sys/devices/netdevsim1/del_port
Set sriov number:
echo 4 > /sys/devices/netdevsim1/sriov_numvfs

These operations use the same resource so they should acquire a lock for
the whole resource not only for a list.

Test commands:
    #SHELL1
    modprobe netdevsim
    while :
    do
        echo "1 1" > /sys/bus/netdevsim/new_device
        echo "1 1" > /sys/bus/netdevsim/del_device
    done

    #SHELL2
    while :
    do
        echo 1 > /sys/devices/netdevsim1/new_port
        echo 1 > /sys/devices/netdevsim1/del_port
    done

Splat looks like:
[  151.623634][ T1165] kasan: CONFIG_KASAN_INLINE enabled
[  151.626503][ T1165] kasan: GPF could be caused by NULL-ptr deref or user memory access
[  151.627862][ T1165] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  151.630700][ T1165] CPU: 3 PID: 1165 Comm: bash Not tainted 5.5.0-rc5+ #270
[  151.633339][ T1165] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  151.635758][ T1165] RIP: 0010:__mutex_lock+0x10a/0x14b0
[  151.636713][ T1165] Code: 08 84 d2 0f 85 7f 12 00 00 44 8b 0d 20 66 67 02 45 85 c9 75 29 49 8d 7f 68 48 b8 00 f
[  151.641725][ T1165] RSP: 0018:ffff8880b17cfbb0 EFLAGS: 00010206
[  151.642547][ T1165] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
[  151.643636][ T1165] RDX: 0000000000000021 RSI: ffffffffbb922c00 RDI: 0000000000000108
[  151.645415][ T1165] RBP: ffff8880b17cfd30 R08: ffffffffc0392ad0 R09: 0000000000000000
[  151.646803][ T1165] R10: ffff8880b17cfd50 R11: ffff8880a9225440 R12: 0000000000000000
[  151.647846][ T1165] R13: dffffc0000000000 R14: ffffffffbd16e7c0 R15: 00000000000000a0
[  151.651979][ T1165] FS:  00007f9731c8e740(0000) GS:ffff8880da800000(0000) knlGS:0000000000000000
[  151.655905][ T1165] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  151.656828][ T1165] CR2: 000055be472e1140 CR3: 00000000b1e14005 CR4: 00000000000606e0
[  151.657906][ T1165] Call Trace:
[  151.658346][ T1165]  ? nsim_dev_port_add+0x50/0x150 [netdevsim]
[  151.659146][ T1165]  ? mutex_lock_io_nested+0x1380/0x1380
[  151.659902][ T1165]  ? _kstrtoull+0x76/0x160
[  151.660480][ T1165]  ? _parse_integer+0xf0/0xf0
[  151.661097][ T1165]  ? kernfs_fop_write+0x1cf/0x410
[  151.661751][ T1165]  ? sysfs_file_ops+0x160/0x160
[  151.662389][ T1165]  ? kstrtouint+0x86/0x110
[  151.662972][ T1165]  ? nsim_dev_port_add+0x50/0x150 [netdevsim]
[  151.663768][ T1165]  nsim_dev_port_add+0x50/0x150 [netdevsim]
[ ... ]

In this patch, __init and __exit function also acquire a lock.
operations could be called while __init and __exit functions are
processing. If so, uninitialized or freed resources could be used.
So, __init() and __exit function also need lock.

Fixes: 83c9e13aa39a ("netdevsim: add software driver for testing offloads")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
 drivers/net/netdevsim/bus.c       | 64 ++++++++++++++++++++++++-------
 drivers/net/netdevsim/dev.c       | 20 +++++++++-
 drivers/net/netdevsim/netdevsim.h |  2 +
 3 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 6aeed0c600f8..b40e4e70995d 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -16,7 +16,8 @@
 
 static DEFINE_IDA(nsim_bus_dev_ids);
 static LIST_HEAD(nsim_bus_dev_list);
-static DEFINE_MUTEX(nsim_bus_dev_list_lock);
+/* mutex lock for netdevsim operations */
+DEFINE_MUTEX(nsim_bus_dev_ops_lock);
 
 static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
 {
@@ -51,9 +52,14 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,
 	unsigned int num_vfs;
 	int ret;
 
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
+
 	ret = kstrtouint(buf, 0, &num_vfs);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return ret;
+	}
 
 	rtnl_lock();
 	if (nsim_bus_dev->num_vfs == num_vfs)
@@ -74,6 +80,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,
 	ret = count;
 exit_unlock:
 	rtnl_unlock();
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 
 	return ret;
 }
@@ -83,8 +90,13 @@ nsim_bus_dev_numvfs_show(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
 	struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
+	int ret;
 
-	return sprintf(buf, "%u\n", nsim_bus_dev->num_vfs);
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
+	ret = sprintf(buf, "%u\n", nsim_bus_dev->num_vfs);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
+	return ret;
 }
 
 static struct device_attribute nsim_bus_dev_numvfs_attr =
@@ -99,10 +111,15 @@ new_port_store(struct device *dev, struct device_attribute *attr,
 	unsigned int port_index;
 	int ret;
 
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
 	ret = kstrtouint(buf, 0, &port_index);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return ret;
+	}
 	ret = nsim_dev_port_add(nsim_bus_dev, port_index);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 	return ret ? ret : count;
 }
 
@@ -116,10 +133,17 @@ del_port_store(struct device *dev, struct device_attribute *attr,
 	unsigned int port_index;
 	int ret;
 
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
+
 	ret = kstrtouint(buf, 0, &port_index);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return ret;
+	}
+
 	ret = nsim_dev_port_del(nsim_bus_dev, port_index);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 	return ret ? ret : count;
 }
 
@@ -179,13 +203,17 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count)
 		pr_err("Format for adding new device is \"id port_count\" (uint uint).\n");
 		return -EINVAL;
 	}
+
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
 	nsim_bus_dev = nsim_bus_dev_new(id, port_count);
-	if (IS_ERR(nsim_bus_dev))
+	if (IS_ERR(nsim_bus_dev)) {
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return PTR_ERR(nsim_bus_dev);
+	}
 
-	mutex_lock(&nsim_bus_dev_list_lock);
 	list_add_tail(&nsim_bus_dev->list, &nsim_bus_dev_list);
-	mutex_unlock(&nsim_bus_dev_list_lock);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 
 	return count;
 }
@@ -214,7 +242,8 @@ del_device_store(struct bus_type *bus, const char *buf, size_t count)
 	}
 
 	err = -ENOENT;
-	mutex_lock(&nsim_bus_dev_list_lock);
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
 	list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) {
 		if (nsim_bus_dev->dev.id != id)
 			continue;
@@ -223,7 +252,7 @@ del_device_store(struct bus_type *bus, const char *buf, size_t count)
 		err = 0;
 		break;
 	}
-	mutex_unlock(&nsim_bus_dev_list_lock);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 	return !err ? count : err;
 }
 static BUS_ATTR_WO(del_device);
@@ -314,12 +343,19 @@ int nsim_bus_init(void)
 {
 	int err;
 
+	mutex_lock(&nsim_bus_dev_ops_lock);
 	err = bus_register(&nsim_bus);
-	if (err)
+	if (err) {
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return err;
+	}
 	err = driver_register(&nsim_driver);
-	if (err)
+	if (err) {
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		goto err_bus_unregister;
+	}
+
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 	return 0;
 
 err_bus_unregister:
@@ -331,12 +367,12 @@ void nsim_bus_exit(void)
 {
 	struct nsim_bus_dev *nsim_bus_dev, *tmp;
 
-	mutex_lock(&nsim_bus_dev_list_lock);
+	mutex_lock(&nsim_bus_dev_ops_lock);
 	list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) {
 		list_del(&nsim_bus_dev->list);
 		nsim_bus_dev_del(nsim_bus_dev);
 	}
-	mutex_unlock(&nsim_bus_dev_list_lock);
 	driver_unregister(&nsim_driver);
 	bus_unregister(&nsim_bus);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 }
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 059711edfc61..634eb5cdcbbe 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -43,13 +43,24 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 					    size_t count, loff_t *ppos)
 {
 	struct nsim_dev *nsim_dev = file->private_data;
+	struct devlink *devlink;
 	void *dummy_data;
 	int err;
 	u32 id;
 
+	devlink = priv_to_devlink(nsim_dev);
+
+	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
+		return -EBUSY;
+
+	devlink_reload_disable(devlink);
+
 	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
-	if (!dummy_data)
+	if (!dummy_data) {
+		devlink_reload_enable(devlink);
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return -ENOMEM;
+	}
 
 	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
 
@@ -59,9 +70,13 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 	if (err) {
 		pr_err("Failed to create region snapshot\n");
 		kfree(dummy_data);
+		devlink_reload_enable(devlink);
+		mutex_unlock(&nsim_bus_dev_ops_lock);
 		return err;
 	}
 
+	devlink_reload_enable(devlink);
+	mutex_unlock(&nsim_bus_dev_ops_lock);
 	return count;
 }
 
@@ -909,9 +924,12 @@ int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
 		      unsigned int port_index)
 {
 	struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
+	struct devlink *devlink = priv_to_devlink(nsim_dev);
 	struct nsim_dev_port *nsim_dev_port;
 	int err = 0;
 
+	devlink_reload_disable(devlink);
+
 	mutex_lock(&nsim_dev->port_list_lock);
 	nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, port_index);
 	if (!nsim_dev_port)
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 94df795ef4d3..6a7cfd2319df 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -30,6 +30,8 @@
 #define NSIM_IPSEC_MAX_SA_COUNT		33
 #define NSIM_IPSEC_VALID		BIT(31)
 
+extern struct mutex nsim_bus_dev_ops_lock;
+
 struct nsim_sa {
 	struct xfrm_state *xs;
 	__be32 ipaddr[4];
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations
  2020-01-11 16:36 [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations Taehee Yoo
@ 2020-01-12 14:19 ` Jakub Kicinski
  2020-01-14 15:26   ` Taehee Yoo
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2020-01-12 14:19 UTC (permalink / raw)
  To: Taehee Yoo; +Cc: davem, netdev, Jiri Pirko

On Sat, 11 Jan 2020 16:36:55 +0000, Taehee Yoo wrote:
> netdevsim basic operations are called with sysfs.
> 
> Create netdevsim device:
> echo "1 1" > /sys/bus/netdevsim/new_device
> Delete netdevsim device:
> echo 1 > /sys/bus/netdevsim/del_device
> Create netdevsim port:
> echo 4 > /sys/devices/netdevsim1/new_port
> Delete netdevsim port:
> echo 4 > /sys/devices/netdevsim1/del_port
> Set sriov number:
> echo 4 > /sys/devices/netdevsim1/sriov_numvfs
> 
> These operations use the same resource so they should acquire a lock for
> the whole resource not only for a list.
> 
> Test commands:
>     #SHELL1
>     modprobe netdevsim
>     while :
>     do
>         echo "1 1" > /sys/bus/netdevsim/new_device
>         echo "1 1" > /sys/bus/netdevsim/del_device
>     done
> 
>     #SHELL2
>     while :
>     do
>         echo 1 > /sys/devices/netdevsim1/new_port
>         echo 1 > /sys/devices/netdevsim1/del_port
>     done
> 
> Splat looks like:
> [  151.623634][ T1165] kasan: CONFIG_KASAN_INLINE enabled
> [  151.626503][ T1165] kasan: GPF could be caused by NULL-ptr deref or user memory access


> In this patch, __init and __exit function also acquire a lock.
> operations could be called while __init and __exit functions are
> processing. If so, uninitialized or freed resources could be used.
> So, __init() and __exit function also need lock.
> 
> Fixes: 83c9e13aa39a ("netdevsim: add software driver for testing offloads")

I don't think that's the correct Fixes tag, the first version of the
driver did not use the sysfs interface.

> Signed-off-by: Taehee Yoo <ap420073@gmail.com>

> --- a/drivers/net/netdevsim/bus.c
> +++ b/drivers/net/netdevsim/bus.c
> @@ -16,7 +16,8 @@
>  
>  static DEFINE_IDA(nsim_bus_dev_ids);
>  static LIST_HEAD(nsim_bus_dev_list);
> -static DEFINE_MUTEX(nsim_bus_dev_list_lock);
> +/* mutex lock for netdevsim operations */
> +DEFINE_MUTEX(nsim_bus_dev_ops_lock);
>  
>  static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
>  {
> @@ -51,9 +52,14 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,

Could the vf handling use the device lock like PCI does?

But actually, we free VF info from the release function, which IIUC is
called after all references to the device are gone. So this should be
fine, no?

Perhaps the entire bus dev structure should be freed from release?

>  	unsigned int num_vfs;
>  	int ret;
>  
> +	if (!mutex_trylock(&nsim_bus_dev_ops_lock))
> +		return -EBUSY;

Why the trylocks? Are you trying to catch the races between unregister
and other ops?

>  	ret = kstrtouint(buf, 0, &num_vfs);
> -	if (ret)
> +	if (ret) {
> +		mutex_unlock(&nsim_bus_dev_ops_lock);
>  		return ret;
> +	}
>  
>  	rtnl_lock();
>  	if (nsim_bus_dev->num_vfs == num_vfs)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations
  2020-01-12 14:19 ` Jakub Kicinski
@ 2020-01-14 15:26   ` Taehee Yoo
  2020-01-15 14:16     ` Jakub Kicinski
  0 siblings, 1 reply; 6+ messages in thread
From: Taehee Yoo @ 2020-01-14 15:26 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: David Miller, Netdev, Jiri Pirko

On Sun, 12 Jan 2020 at 23:19, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>

Hi Jakub,
Thank you for your review!

> On Sat, 11 Jan 2020 16:36:55 +0000, Taehee Yoo wrote:
> > netdevsim basic operations are called with sysfs.
> >
> > Create netdevsim device:
> > echo "1 1" > /sys/bus/netdevsim/new_device
> > Delete netdevsim device:
> > echo 1 > /sys/bus/netdevsim/del_device
> > Create netdevsim port:
> > echo 4 > /sys/devices/netdevsim1/new_port
> > Delete netdevsim port:
> > echo 4 > /sys/devices/netdevsim1/del_port
> > Set sriov number:
> > echo 4 > /sys/devices/netdevsim1/sriov_numvfs
> >
> > These operations use the same resource so they should acquire a lock for
> > the whole resource not only for a list.
> >
> > Test commands:
> >     #SHELL1
> >     modprobe netdevsim
> >     while :
> >     do
> >         echo "1 1" > /sys/bus/netdevsim/new_device
> >         echo "1 1" > /sys/bus/netdevsim/del_device
> >     done
> >
> >     #SHELL2
> >     while :
> >     do
> >         echo 1 > /sys/devices/netdevsim1/new_port
> >         echo 1 > /sys/devices/netdevsim1/del_port
> >     done
> >
> > Splat looks like:
> > [  151.623634][ T1165] kasan: CONFIG_KASAN_INLINE enabled
> > [  151.626503][ T1165] kasan: GPF could be caused by NULL-ptr deref or user memory access
>
>
> > In this patch, __init and __exit function also acquire a lock.
> > operations could be called while __init and __exit functions are
> > processing. If so, uninitialized or freed resources could be used.
> > So, __init() and __exit function also need lock.
> >
> > Fixes: 83c9e13aa39a ("netdevsim: add software driver for testing offloads")
>
> I don't think that's the correct Fixes tag, the first version of the
> driver did not use the sysfs interface.
>

I checked up the Fixes tag. you're right, this Fixes tag is not correct.
I will fix this.

> > Signed-off-by: Taehee Yoo <ap420073@gmail.com>
>
> > --- a/drivers/net/netdevsim/bus.c
> > +++ b/drivers/net/netdevsim/bus.c
> > @@ -16,7 +16,8 @@
> >
> >  static DEFINE_IDA(nsim_bus_dev_ids);
> >  static LIST_HEAD(nsim_bus_dev_list);
> > -static DEFINE_MUTEX(nsim_bus_dev_list_lock);
> > +/* mutex lock for netdevsim operations */
> > +DEFINE_MUTEX(nsim_bus_dev_ops_lock);
> >
> >  static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
> >  {
> > @@ -51,9 +52,14 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,
>
> Could the vf handling use the device lock like PCI does?
>
> But actually, we free VF info from the release function, which IIUC is
> called after all references to the device are gone. So this should be
> fine, no?
>

I have tested this patch without locking in the
nsim_bus_dev_numvfs_store(). It worked well.
The freeing and allocating routines are protected by RTNL.
As you said, the ->release() routine, which frees vconfig is protected by
reference count. So, It's safe.
I will drop this at a v2 patch.

> Perhaps the entire bus dev structure should be freed from release?
>

I tested this like this.

@@ -146,6 +161,8 @@ static void nsim_bus_dev_release(struct device *dev)
        struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);

        nsim_bus_dev_vfs_disable(nsim_bus_dev);
+       ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
+       kfree(nsim_bus_dev);
 }
@@ -300,8 +320,6 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
 static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
 {
        device_unregister(&nsim_bus_dev->dev);
-       ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
-       kfree(nsim_bus_dev);
 }

It works well. but I'm not sure this is needed.

> >       unsigned int num_vfs;
> >       int ret;
> >
> > +     if (!mutex_trylock(&nsim_bus_dev_ops_lock))
> > +             return -EBUSY;
>
> Why the trylocks? Are you trying to catch the races between unregister
> and other ops?
>

The reason is to avoid deadlock.
If we use mutex_lock() instead of mutex_trylock(), the below message
will be printed and actual deadlock also appeared.

[  426.879562][  T805] WARNING: possible circular locking dependency detected
[  426.880477][  T805] 5.5.0-rc5+ #280 Not tainted
[  426.881080][  T805] ------------------------------------------------------
[  426.882035][  T805] bash/805 is trying to acquire lock:
[  426.882826][  T805] ffffffffc03b7830 (nsim_bus_dev_ops_lock){+.+.},
at: del_port_store+0x68/0xe0 [netdevsim]
[  426.884159][  T805]
[  426.884159][  T805] but task is already holding lock:
[  426.885061][  T805] ffff88805edb54b0 (kn->count#170){++++}, at:
kernfs_fop_write+0x1f2/0x410
[  426.886130][  T805]
[  426.886130][  T805] which lock already depends on the new lock.
[  426.886130][  T805]
[  426.887492][  T805]
[  426.887492][  T805] the existing dependency chain (in reverse order) is:
[  426.888606][  T805]
[  426.888606][  T805] -> #1 (kn->count#170){++++}:
[  426.889749][  T805]        __kernfs_remove+0x612/0x7f0
[  426.890392][  T805]        kernfs_remove_by_name_ns+0x40/0x80
[  426.891342][  T805]        remove_files.isra.1+0x6c/0x170
[  426.892359][  T805]        sysfs_remove_group+0x9b/0x170
[  426.893414][  T805]        sysfs_remove_groups+0x4f/0x90
[  426.894405][  T805]        device_remove_attrs+0xc9/0x130
[  426.895198][  T805]        device_del+0x413/0xc10
[  426.895798][  T805]        device_unregister+0x16/0xa0
[  426.896457][  T805]        del_device_store+0x288/0x3c0 [netdevsim]
[  426.897250][  T805]        kernfs_fop_write+0x276/0x410
[  426.897915][  T805]        vfs_write+0x197/0x4a0
[  426.898506][  T805]        ksys_write+0x141/0x1d0
[  426.899111][  T805]        do_syscall_64+0x99/0x4f0
[  426.899738][  T805]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  426.900557][  T805]
[  426.900557][  T805] -> #0 (nsim_bus_dev_ops_lock){+.+.}:
[  426.901425][  T805]        __lock_acquire+0x2d8d/0x3de0
[  426.902023][  T805]        lock_acquire+0x164/0x3b0
[  426.902586][  T805]        __mutex_lock+0x14d/0x14b0
[  426.903166][  T805]        del_port_store+0x68/0xe0 [netdevsim]
[  426.903840][  T805]        kernfs_fop_write+0x276/0x410
[  426.904430][  T805]        vfs_write+0x197/0x4a0
[  426.904952][  T805]        ksys_write+0x141/0x1d0
[  426.905481][  T805]        do_syscall_64+0x99/0x4f0
[  426.906032][  T805]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  426.906745][  T805]
[  426.906745][  T805] other info that might help us debug this:
[  426.906745][  T805]
[  426.907883][  T805]  Possible unsafe locking scenario:
[  426.907883][  T805]
[  426.908715][  T805]        CPU0                    CPU1
[  426.909312][  T805]        ----                    ----
[  426.909902][  T805]   lock(kn->count#170);
[  426.910372][  T805]
lock(nsim_bus_dev_ops_lock);
[  426.911277][  T805]                                lock(kn->count#170);
[  426.912032][  T805]   lock(nsim_bus_dev_ops_lock);
[  426.912581][  T805]
[  426.912581][  T805]  *** DEADLOCK ***
[  426.912581][  T805]
[  426.913495][  T805] 3 locks held by bash/805:
[  426.913998][  T805]  #0: ffff88806547c478 (sb_writers#5){.+.+}, at:
vfs_write+0x3bb/0x4a0
[  426.914944][  T805]  #1: ffff88805ff55c90 (&of->mutex){+.+.}, at:
kernfs_fop_write+0x1cf/0x410
[  426.915928][  T805]  #2: ffff88805edb54b0 (kn->count#170){++++},
at: kernfs_fop_write+0x1f2/0x410
[  426.916957][  T805]
[  426.916957][  T805] stack backtrace:
[  426.917614][  T805] CPU: 1 PID: 805 Comm: bash Not tainted 5.5.0-rc5+ #280
[  426.918398][  T805] Hardware name: innotek GmbH
VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  426.919408][  T805] Call Trace:
[  426.919804][  T805]  dump_stack+0x96/0xdb
[  426.920263][  T805]  check_noncircular+0x371/0x450
[  426.920813][  T805]  ? print_circular_bug.isra.36+0x310/0x310
[  426.921475][  T805]  ? lock_acquire+0x164/0x3b0
[  426.921993][  T805]  ? hlock_class+0x130/0x130
[  426.922502][  T805]  ? __lock_acquire+0x2d8d/0x3de0
[  426.923158][  T805]  __lock_acquire+0x2d8d/0x3de0
[  426.923692][  T805]  ? register_lock_class+0x14d0/0x14d0
[  426.924282][  T805]  lock_acquire+0x164/0x3b0
[  426.924784][  T805]  ? del_port_store+0x68/0xe0 [netdevsim]
[  426.925415][  T805]  __mutex_lock+0x14d/0x14b0
[  426.925925][  T805]  ? del_port_store+0x68/0xe0 [netdevsim]
[  426.926551][  T805]  ? __lock_acquire+0xdfe/0x3de0
[  426.927108][  T805]  ? del_port_store+0x68/0xe0 [netdevsim]
[  426.927740][  T805]  ? mutex_lock_io_nested+0x1380/0x1380
[  426.928349][  T805]  ? register_lock_class+0x14d0/0x14d0
[  426.928947][  T805]  ? check_chain_key+0x236/0x5d0
[  426.929490][  T805]  ? kernfs_fop_write+0x1cf/0x410
[  426.930042][  T805]  ? lock_acquire+0x164/0x3b0
[  426.930558][  T805]  ? kernfs_fop_write+0x1f2/0x410
[  426.931123][  T805]  ? del_port_store+0x68/0xe0 [netdevsim]
[  426.931761][  T805]  del_port_store+0x68/0xe0 [netdevsim]
[  426.932389][  T805]  ? nsim_bus_dev_release+0x100/0x100 [netdevsim]
[  426.933097][  T805]  ? sysfs_file_ops+0x112/0x160
[  426.933630][  T805]  ? sysfs_kf_write+0x3b/0x180
[  426.934152][  T805]  ? sysfs_file_ops+0x160/0x160
[  426.934683][  T805]  kernfs_fop_write+0x276/0x410
[  426.935226][  T805]  ? __sb_start_write+0x215/0x2e0
[  426.935780][  T805]  vfs_write+0x197/0x4a0

Locking ordering of {new/del}_device() and {new/del}_port is different.
So, a deadlock could occur.

Thank you,
Taehee Yoo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations
  2020-01-14 15:26   ` Taehee Yoo
@ 2020-01-15 14:16     ` Jakub Kicinski
  2020-01-24  5:16       ` Taehee Yoo
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2020-01-15 14:16 UTC (permalink / raw)
  To: Taehee Yoo; +Cc: Jakub Kicinski, David Miller, Netdev, Jiri Pirko

On Wed, 15 Jan 2020 00:26:22 +0900, Taehee Yoo wrote:
> On Sun, 12 Jan 2020 at 23:19, Jakub Kicinski wrote:
> Hi Jakub,
> Thank you for your review!

Thank you for fixing these tricky bugs! :)

> > Perhaps the entire bus dev structure should be freed from release?
> 
> I tested this like this.
> 
> @@ -146,6 +161,8 @@ static void nsim_bus_dev_release(struct device *dev)
>         struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
> 
>         nsim_bus_dev_vfs_disable(nsim_bus_dev);
> +       ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> +       kfree(nsim_bus_dev);
>  }
> @@ -300,8 +320,6 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
>  static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
>  {
>         device_unregister(&nsim_bus_dev->dev);
> -       ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> -       kfree(nsim_bus_dev);
>  }
> 
> It works well. but I'm not sure this is needed.

My concern is that process A opens a sysfs file (eg. numvfs) then
process B deletes the device, but process A still has a file descriptor
(and device reference) so it may be able to write/read the numvfs file
even though nsim_bus_dev was already freed. 

I may very well be wrong, and something else may be preventing this
condition. It's just a bit strange to see release free an internal
sub-structure, while the main structure is freed immediately..

> > >       unsigned int num_vfs;
> > >       int ret;
> > >
> > > +     if (!mutex_trylock(&nsim_bus_dev_ops_lock))
> > > +             return -EBUSY;  
> >
> > Why the trylocks? Are you trying to catch the races between unregister
> > and other ops?
> >  
> 
> The reason is to avoid deadlock.
> If we use mutex_lock() instead of mutex_trylock(), the below message
> will be printed and actual deadlock also appeared.

> [  426.907883][  T805]  Possible unsafe locking scenario:
> [  426.907883][  T805]
> [  426.908715][  T805]        CPU0                    CPU1
> [  426.909312][  T805]        ----                    ----
> [  426.909902][  T805]   lock(kn->count#170);
> [  426.910372][  T805]
> lock(nsim_bus_dev_ops_lock);
> [  426.911277][  T805]                                lock(kn->count#170);
> [  426.912032][  T805]   lock(nsim_bus_dev_ops_lock);

> Locking ordering of {new/del}_device() and {new/del}_port is different.
> So, a deadlock could occur.

Hm, we can't use the same lock for the bus ops and port ops.
But the port ops already take port lock, do we really need 
another lock there?

Also does nsim_bus_exit() really need to iterate over devices to remove
them? Does core not do it for us?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations
  2020-01-15 14:16     ` Jakub Kicinski
@ 2020-01-24  5:16       ` Taehee Yoo
  2020-01-24 13:53         ` Jakub Kicinski
  0 siblings, 1 reply; 6+ messages in thread
From: Taehee Yoo @ 2020-01-24  5:16 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Jakub Kicinski, David Miller, Netdev, Jiri Pirko

On Wed, 15 Jan 2020 at 23:16, Jakub Kicinski <kuba@kernel.org> wrote:
>

Hi Jakub!

> On Wed, 15 Jan 2020 00:26:22 +0900, Taehee Yoo wrote:
> > On Sun, 12 Jan 2020 at 23:19, Jakub Kicinski wrote:
> > Hi Jakub,
> > Thank you for your review!
>
> Thank you for fixing these tricky bugs! :)
>
> > > Perhaps the entire bus dev structure should be freed from release?
> >
> > I tested this like this.
> >
> > @@ -146,6 +161,8 @@ static void nsim_bus_dev_release(struct device *dev)
> >         struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
> >
> >         nsim_bus_dev_vfs_disable(nsim_bus_dev);
> > +       ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> > +       kfree(nsim_bus_dev);
> >  }
> > @@ -300,8 +320,6 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
> >  static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
> >  {
> >         device_unregister(&nsim_bus_dev->dev);
> > -       ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> > -       kfree(nsim_bus_dev);
> >  }
> >
> > It works well. but I'm not sure this is needed.
>
> My concern is that process A opens a sysfs file (eg. numvfs) then
> process B deletes the device, but process A still has a file descriptor
> (and device reference) so it may be able to write/read the numvfs file
> even though nsim_bus_dev was already freed.
>

If I understood kernfs correctly, kernfs internally avoid this situation.
a) When kernfs file is being removed, it waits for all users who are
operating this file(open/read/write, etc...)
b) When kernfs file is removed, the file is disallowed to be used.
c) Opened kernfs file descriptor also will be disallowed to use anymore.
d) File remove routine is finished, resources are freed.
So, user-after-free case couldn't occur.
The below piece of code is kernfs synchronize code.

static void kernfs_drain(struct kernfs_node *kn)
{
 ...
        wait_event(root->deactivate_waitq,
                   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
 ...
}

static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
                                size_t count, loff_t *ppos)
{
 ...
        if (!kernfs_get_active(of->kn)) {
                mutex_unlock(&of->mutex);
                len = -ENODEV;
                goto out_free;
        }

        ops = kernfs_ops(of->kn);
        if (ops->write)
                len = ops->write(of, buf, len, *ppos);
        else
                len = -EINVAL;

        kernfs_put_active(of->kn);
 ...
}

void kernfs_put_active(struct kernfs_node *kn)
{
 ...
        v = atomic_dec_return(&kn->active);
        if (likely(v != KN_DEACTIVATED_BIAS))
                return;

        wake_up_all(&kernfs_root(kn)->deactivate_waitq);
}

I have tested this code, it works well.

> I may very well be wrong, and something else may be preventing this
> condition. It's just a bit strange to see release free an internal
> sub-structure, while the main structure is freed immediately..
>

I didn't think about ordering of resource release routine.
So I took a look at the release routine.

del_device_store()
    nsim_bus_dev_del()
        nsim_bus_dev_del()
            kobject_put()
                device_release()
                    nsim_bus_dev_release()
                        kfree(nsim_bus_dev->vconfigs)
    kfree(nsim_bus_dev)

Before freeing nsim_bus_dev, all resources are freed in the
device_unregister(). So, I think it's safe.

> > > >       unsigned int num_vfs;
> > > >       int ret;
> > > >
> > > > +     if (!mutex_trylock(&nsim_bus_dev_ops_lock))
> > > > +             return -EBUSY;
> > >
> > > Why the trylocks? Are you trying to catch the races between unregister
> > > and other ops?
> > >
> >
> > The reason is to avoid deadlock.
> > If we use mutex_lock() instead of mutex_trylock(), the below message
> > will be printed and actual deadlock also appeared.
>
> > [  426.907883][  T805]  Possible unsafe locking scenario:
> > [  426.907883][  T805]
> > [  426.908715][  T805]        CPU0                    CPU1
> > [  426.909312][  T805]        ----                    ----
> > [  426.909902][  T805]   lock(kn->count#170);
> > [  426.910372][  T805]
> > lock(nsim_bus_dev_ops_lock);
> > [  426.911277][  T805]                                lock(kn->count#170);
> > [  426.912032][  T805]   lock(nsim_bus_dev_ops_lock);
>
> > Locking ordering of {new/del}_device() and {new/del}_port is different.
> > So, a deadlock could occur.
>
> Hm, we can't use the same lock for the bus ops and port ops.
> But the port ops already take port lock, do we really need
> another lock there?
>

A synchronize routine is needed.
new_port() and del_port() operations access many device resources.
It could be used even before resources are allocated or initialized.
So, new_port() and del_port() should be allowed to use after resources
are initialized. But sriov_numvfs() doesn't use uninitialized resource
so it doesn't make any problem.

If a simple flag variable is used, we can avoid using a trylock.
The flag is set after resources are initialized.
So if new_port() and del_port() check the flag, it doesn't access
uninitialized resources.

I would like to try to avoid using trylock.

> Also does nsim_bus_exit() really need to iterate over devices to remove
> them? Does core not do it for us?

I couldn't find the logic, which remove devices.
So I think it's needed.

Thank you!
Taehee Yoo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations
  2020-01-24  5:16       ` Taehee Yoo
@ 2020-01-24 13:53         ` Jakub Kicinski
  0 siblings, 0 replies; 6+ messages in thread
From: Jakub Kicinski @ 2020-01-24 13:53 UTC (permalink / raw)
  To: Taehee Yoo; +Cc: Jakub Kicinski, David Miller, Netdev, Jiri Pirko

On Fri, 24 Jan 2020 14:16:03 +0900, Taehee Yoo wrote:
> On Wed, 15 Jan 2020 at 23:16, Jakub Kicinski <kuba@kernel.org> wrote:
> > I may very well be wrong, and something else may be preventing this
> > condition. It's just a bit strange to see release free an internal
> > sub-structure, while the main structure is freed immediately..
> 
> I didn't think about ordering of resource release routine.
> So I took a look at the release routine.
> 
> del_device_store()
>     nsim_bus_dev_del()
>         nsim_bus_dev_del()
>             kobject_put()
>                 device_release()
>                     nsim_bus_dev_release()
>                         kfree(nsim_bus_dev->vconfigs)
>     kfree(nsim_bus_dev)
> 
> Before freeing nsim_bus_dev, all resources are freed in the
> device_unregister(). So, I think it's safe.

I see

> > > > >       unsigned int num_vfs;
> > > > >       int ret;
> > > > >
> > > > > +     if (!mutex_trylock(&nsim_bus_dev_ops_lock))
> > > > > +             return -EBUSY;  
> > > >
> > > > Why the trylocks? Are you trying to catch the races between unregister
> > > > and other ops?
> > > >  
> > >
> > > The reason is to avoid deadlock.
> > > If we use mutex_lock() instead of mutex_trylock(), the below message
> > > will be printed and actual deadlock also appeared.  
> >  
> > > [  426.907883][  T805]  Possible unsafe locking scenario:
> > > [  426.907883][  T805]
> > > [  426.908715][  T805]        CPU0                    CPU1
> > > [  426.909312][  T805]        ----                    ----
> > > [  426.909902][  T805]   lock(kn->count#170);
> > > [  426.910372][  T805]
> > > lock(nsim_bus_dev_ops_lock);
> > > [  426.911277][  T805]                                lock(kn->count#170);
> > > [  426.912032][  T805]   lock(nsim_bus_dev_ops_lock);  
> >  
> > > Locking ordering of {new/del}_device() and {new/del}_port is different.
> > > So, a deadlock could occur.  
> >
> > Hm, we can't use the same lock for the bus ops and port ops.
> > But the port ops already take port lock, do we really need
> > another lock there?
> >  
> 
> A synchronize routine is needed.
> new_port() and del_port() operations access many device resources.
> It could be used even before resources are allocated or initialized.
> So, new_port() and del_port() should be allowed to use after resources
> are initialized. But sriov_numvfs() doesn't use uninitialized resource
> so it doesn't make any problem.

Oh - because the device can be registered, but that doesn't
mean it's probed yet..

> If a simple flag variable is used, we can avoid using a trylock.
> The flag is set after resources are initialized.
> So if new_port() and del_port() check the flag, it doesn't access
> uninitialized resources.
>
> I would like to try to avoid using trylock.

Sounds good!
 
> > Also does nsim_bus_exit() really need to iterate over devices to remove
> > them? Does core not do it for us?  
> 
> I couldn't find the logic, which remove devices.
> So I think it's needed.

OK, thanks for checking!

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-01-24 13:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-11 16:36 [PATCH net 1/5] netdevsim: fix a race condition in netdevsim operations Taehee Yoo
2020-01-12 14:19 ` Jakub Kicinski
2020-01-14 15:26   ` Taehee Yoo
2020-01-15 14:16     ` Jakub Kicinski
2020-01-24  5:16       ` Taehee Yoo
2020-01-24 13:53         ` Jakub Kicinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).