linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
@ 2021-08-26 16:03 Tetsuo Handa
  2021-08-27 18:43 ` Christoph Hellwig
  0 siblings, 1 reply; 14+ messages in thread
From: Tetsuo Handa @ 2021-08-26 16:03 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe
  Cc: Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
commit a160c6159d4a0cf8 ("block: add an optional probe callback to
major_names") is calling the module's probe function with major_names_lock
held.

Fortunately, since commit 990e78116d38059c ("block: loop: fix deadlock
between open and remove") stopped holding loop_ctl_mutex in lo_open(),
current role of loop_ctl_mutex is to serialize access to loop_index_idr
and loop_add()/loop_remove(); in other words, management of id for IDR.

Therefore, this patch introduces loop_idr_spinlock and a refcount for
protecting access to loop_index_idr. By introducing refcount for hiding
not-yet-initialized/to-be-released loop devices, we can remove
loop_ctl_mutex completely.

loop_unregister_transfer() which is called from cleanup_cryptoloop()
currently lacks serialization between kfree() from loop_remove() from
loop_control_remove() and mutex_lock() from unregister_transfer_cb().
We can use refcount and loop_idr_spinlock for serialization between
these functions.

Holding loop_ctl_mutex in loop_exit() is pointless, for all users must
close /dev/loop-control and /dev/loop$num (in order to drop module's
refcount to 0) before loop_exit() starts, and nobody can open
/dev/loop-control or /dev/loop$num afterwards.

Link: https://syzkaller.appspot.com/bug?id=7bb10e8b62f83e4d445cdf4c13d69e407e629558 [1]
Reported-by: syzbot <syzbot+f61766d5763f9e7a118f@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+f61766d5763f9e7a118f@syzkaller.appspotmail.com>
---
This is an alternative to "sort out the lock order in the loop driver v2"
posted at https://lkml.kernel.org/r/20210826133810.3700-1-hch@lst.de .

 drivers/block/loop.c | 125 +++++++++++++++++++++++++++----------------
 drivers/block/loop.h |   1 +
 2 files changed, 79 insertions(+), 47 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf..783b3d2ed277 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -87,7 +87,7 @@
 #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
 
 static DEFINE_IDR(loop_index_idr);
-static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_SPINLOCK(loop_idr_spinlock);
 static DEFINE_MUTEX(loop_validate_mutex);
 
 /**
@@ -2113,28 +2113,35 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
+static void loop_remove(struct loop_device *lo);
 
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
 	struct loop_func_table *xfer;
+	struct loop_device *lo;
+	int id;
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
+
+	spin_lock(&loop_idr_spinlock);
+	idr_for_each_entry(&loop_index_idr, lo, id) {
+		if (!refcount_inc_not_zero(&lo->idr_visible))
+			continue;
+		spin_unlock(&loop_idr_spinlock);
+		mutex_lock(&lo->lo_mutex);
+		if (lo->lo_encryption == xfer)
+			loop_release_xfer(lo);
+		mutex_unlock(&lo->lo_mutex);
+		if (refcount_dec_and_test(&lo->idr_visible))
+			loop_remove(lo);
+		spin_lock(&loop_idr_spinlock);
+	}
+	spin_unlock(&loop_idr_spinlock);
+
 	return 0;
 }
 
@@ -2313,20 +2320,20 @@ static int loop_add(int i)
 		goto out;
 	lo->lo_state = Lo_unbound;
 
-	err = mutex_lock_killable(&loop_ctl_mutex);
-	if (err)
-		goto out_free_dev;
-
 	/* allocate id, if @id >= 0, we're requesting that specific id */
+	idr_preload(GFP_KERNEL);
+	spin_lock(&loop_idr_spinlock);
 	if (i >= 0) {
-		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
+		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC);
 		if (err == -ENOSPC)
 			err = -EEXIST;
 	} else {
-		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
+		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC);
 	}
+	spin_unlock(&loop_idr_spinlock);
+	idr_preload_end();
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2392,16 +2399,18 @@ static int loop_add(int i)
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
-	mutex_unlock(&loop_ctl_mutex);
+	/* Show this loop device. */
+	refcount_set(&lo->idr_visible, 1);
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	spin_lock(&loop_idr_spinlock);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
-	mutex_unlock(&loop_ctl_mutex);
+	spin_unlock(&loop_idr_spinlock);
 out_free_dev:
 	kfree(lo);
 out:
@@ -2410,9 +2419,14 @@ static int loop_add(int i)
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	spin_lock(&loop_idr_spinlock);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	spin_unlock(&loop_idr_spinlock);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2435,52 +2449,67 @@ static int loop_control_remove(int idx)
 		pr_warn("deleting an unspecified loop device is not supported.\n");
 		return -EINVAL;
 	}
-		
-	ret = mutex_lock_killable(&loop_ctl_mutex);
-	if (ret)
-		return ret;
 
+	/*
+	 * Identify the loop device to remove. Skip the device if it is owned by
+	 * loop_remove()/loop_add() where it is not safe to access lo_mutex.
+	 */
+	spin_lock(&loop_idr_spinlock);
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
-		ret = -ENODEV;
-		goto out_unlock_ctrl;
+	if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) {
+		spin_unlock(&loop_idr_spinlock);
+		return -ENODEV;
 	}
+	spin_unlock(&loop_idr_spinlock);
 
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto out;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
-		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		if (lo->lo_state == Lo_deleting)
+			ret = -ENODEV;
+		else
+			ret = -EBUSY;
+		goto out;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
-
-	idr_remove(&loop_index_idr, lo->lo_number);
-	loop_remove(lo);
-out_unlock_ctrl:
-	mutex_unlock(&loop_ctl_mutex);
+	/* Hide this loop device. */
+	refcount_dec(&lo->idr_visible);
+	/*
+	 * Try to wait for concurrent callers (they should complete shortly due to
+	 * lo->lo_state == Lo_deleting) operating on this loop device, in order to
+	 * help that subsequent loop_add() will not to fail with -EEXIST.
+	 * Note that this is best effort.
+	 */
+	for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++)
+		schedule_timeout_killable(1);
+	ret = 0;
+out:
+	/* Remove this loop device. */
+	if (refcount_dec_and_test(&lo->idr_visible))
+		loop_remove(lo);
 	return ret;
 }
 
 static int loop_control_get_free(int idx)
 {
 	struct loop_device *lo;
-	int id, ret;
+	int id;
 
-	ret = mutex_lock_killable(&loop_ctl_mutex);
-	if (ret)
-		return ret;
+	spin_lock(&loop_idr_spinlock);
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		if (refcount_read(&lo->idr_visible) &&
+		    lo->lo_state == Lo_unbound)
 			goto found;
 	}
-	mutex_unlock(&loop_ctl_mutex);
+	spin_unlock(&loop_idr_spinlock);
 	return loop_add(-1);
 found:
-	mutex_unlock(&loop_ctl_mutex);
+	spin_unlock(&loop_idr_spinlock);
 	return id;
 }
 
@@ -2590,10 +2619,12 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_idr_spinlock here, for nobody else can
+	 * access loop_index_idr when this module is unloading.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..bed350d8722f 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	refcount_t		idr_visible;
 };
 
 struct loop_cmd {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
  2021-08-26 16:03 [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock Tetsuo Handa
@ 2021-08-27 18:43 ` Christoph Hellwig
  2021-08-28  1:10   ` Tetsuo Handa
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-08-27 18:43 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Hillf Danton, Pavel Tatashin,
	Tyler Hicks, linux-block

On Fri, Aug 27, 2021 at 01:03:45AM +0900, Tetsuo Handa wrote:
> 
> loop_unregister_transfer() which is called from cleanup_cryptoloop()
> currently lacks serialization between kfree() from loop_remove() from
> loop_control_remove() and mutex_lock() from unregister_transfer_cb().
> We can use refcount and loop_idr_spinlock for serialization between
> these functions.


So before we start complicating things for loop_release_xfer - how
do you actually reproduce loop_unregister_transfer finding a loop
device with a transfer set?  AFAICS loop_unregister_transfer is only
called form exit_cryptoloop, which can only be called when
cryptoloop has a zero reference count.  But as long as a transfer
is registered an extra refcount is held on its owner.

> @@ -2313,20 +2320,20 @@ static int loop_add(int i)
>  		goto out;
>  	lo->lo_state = Lo_unbound;
>  
> -	err = mutex_lock_killable(&loop_ctl_mutex);
> -	if (err)
> -		goto out_free_dev;
> -
>  	/* allocate id, if @id >= 0, we're requesting that specific id */
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&loop_idr_spinlock);
>  	if (i >= 0) {
> -		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
> +		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC);
>  		if (err == -ENOSPC)
>  			err = -EEXIST;
>  	} else {
> -		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
> +		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC);
>  	}
> +	spin_unlock(&loop_idr_spinlock);
> +	idr_preload_end();

Can you explain why the mutex is switched to a spinlock?  I could not
find any caller that can't block, so there doesn't seem to be a real
need for a spinlock, while a spinlock requires extra work and GFP_ATOMIC
allocations here.  Dropping the _killable probably makes some sense,
but seems like a separate cleanup.

> +	if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) {
> +		spin_unlock(&loop_idr_spinlock);
> +		return -ENODEV;
>  	}
> +	spin_unlock(&loop_idr_spinlock);

> +	refcount_dec(&lo->idr_visible);
> +	/*
> +	 * Try to wait for concurrent callers (they should complete shortly due to
> +	 * lo->lo_state == Lo_deleting) operating on this loop device, in order to
> +	 * help that subsequent loop_add() will not to fail with -EEXIST.
> +	 * Note that this is best effort.
> +	 */
> +	for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++)
> +		schedule_timeout_killable(1);
> +	ret = 0;

This dance looks pretty strange to me.  I think just making idr_visible
an atomic_t and using atomic_cmpxchg with just 0 and 1 as valid versions
will make this much simpler, as it avoids the need to deal with a > 1
count, and it also serializes multiple removal calls.

I quickly hacked this up as a slight variant of your patch, and it's been
running the syzbot reproducer you pointed me to for quite while now:

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf4..69ced1feb18d5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2113,28 +2113,29 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
-
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
 	struct loop_func_table *xfer;
+	struct loop_device *lo;
+	int id;
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
+
+	/*
+	 * loop_unregister_transfer is only called from cryptoloop module
+	 * unload.  Given that each loop device that has a transfer enabled
+	 * hold a reference to the module implementing it we should never
+	 * get here with a transfer that is set.
+	 */
+	mutex_lock(&loop_ctl_mutex);
+	idr_for_each_entry(&loop_index_idr, lo, id)
+		WARN_ON_ONCE(lo->lo_encryption == xfer);
+	mutex_unlock(&loop_ctl_mutex);
+
 	return 0;
 }
 
@@ -2325,8 +2326,9 @@ static int loop_add(int i)
 	} else {
 		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
+	mutex_unlock(&loop_ctl_mutex);
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2392,15 +2394,17 @@ static int loop_add(int i)
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
-	mutex_unlock(&loop_ctl_mutex);
+	/* Show this loop device. */
+	atomic_set(&lo->idr_visible, 1);
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_free_dev:
 	kfree(lo);
@@ -2410,9 +2414,14 @@ static int loop_add(int i)
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	mutex_lock(&loop_ctl_mutex);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	mutex_unlock(&loop_ctl_mutex);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2440,29 +2449,40 @@ static int loop_control_remove(int idx)
 	if (ret)
 		return ret;
 
+	/*
+	 * Identify the loop device to remove. Skip the device if it is owned by
+	 * loop_remove()/loop_add() where it is not safe to access lo_mutex.
+	 * The loop device is marked invisible even if we bail out of the
+	 * removal, but the only other place checking the visibility is the
+	 * LOOP_CTL_GET_FREE ioctl, which checks the same flags as we do below,
+	 * and which is fundamentally racy anyway.
+	 */
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
-		ret = -ENODEV;
-		goto out_unlock_ctrl;
+	if (!lo || atomic_cmpxchg(&lo->idr_visible, 1, 0) == 0) {
+		mutex_unlock(&loop_ctl_mutex);
+		return -ENODEV;
 	}
+	mutex_unlock(&loop_ctl_mutex);
 
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
 		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
 
-	idr_remove(&loop_index_idr, lo->lo_number);
 	loop_remove(lo);
-out_unlock_ctrl:
-	mutex_unlock(&loop_ctl_mutex);
-	return ret;
+	return 0;
+
+mark_visible:
+	atomic_inc(&lo->idr_visible);
+	return -EBUSY;
 }
 
 static int loop_control_get_free(int idx)
@@ -2474,7 +2494,8 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		if (atomic_read(&lo->idr_visible) &&
+		    lo->lo_state == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);
@@ -2590,10 +2611,12 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_ctl_mutex here, for nobody else can
+	 * access loop_index_idr when this module is unloading.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63ac..1ec5135da54a7 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	atomic_t		idr_visible; /* a bool in reality */
 };
 
 struct loop_cmd {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
  2021-08-27 18:43 ` Christoph Hellwig
@ 2021-08-28  1:10   ` Tetsuo Handa
  2021-08-28  2:22     ` Tetsuo Handa
  2021-08-28  7:18     ` Christoph Hellwig
  0 siblings, 2 replies; 14+ messages in thread
From: Tetsuo Handa @ 2021-08-28  1:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

On 2021/08/28 3:43, Christoph Hellwig wrote:
> On Fri, Aug 27, 2021 at 01:03:45AM +0900, Tetsuo Handa wrote:
>>
>> loop_unregister_transfer() which is called from cleanup_cryptoloop()
>> currently lacks serialization between kfree() from loop_remove() from
>> loop_control_remove() and mutex_lock() from unregister_transfer_cb().
>> We can use refcount and loop_idr_spinlock for serialization between
>> these functions.
> 
> 
> So before we start complicating things for loop_release_xfer - how
> do you actually reproduce loop_unregister_transfer finding a loop
> device with a transfer set?  AFAICS loop_unregister_transfer is only
> called form exit_cryptoloop, which can only be called when
> cryptoloop has a zero reference count.  But as long as a transfer
> is registered an extra refcount is held on its owner.

Indeed, lo->lo_encryption is set to non-NULL by loop_init_xfer() after a refcount
is taken and lo->lo_encryption is reset to NULL by loop_release_xfer() before
that refount is dropped, and these operations are serialized by lo->lo_mutex. Then,
lo->lo_encryption == xfer can't happen unless forced module unload is requested.

That is, it seems that unregister_transfer_cb() is there in case forced module
unload of cryptoloop module was requested. And in that case, there is no point
with crashing the kernel via panic_on_warn == 1 && WARN_ON_ONCE(). Simple printk()
will be sufficient.

Removing unregister_transfer_cb() (if we ignore forced module unload) will be
a separate patch.

> 
>> @@ -2313,20 +2320,20 @@ static int loop_add(int i)
>>  		goto out;
>>  	lo->lo_state = Lo_unbound;
>>  
>> -	err = mutex_lock_killable(&loop_ctl_mutex);
>> -	if (err)
>> -		goto out_free_dev;
>> -
>>  	/* allocate id, if @id >= 0, we're requesting that specific id */
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&loop_idr_spinlock);
>>  	if (i >= 0) {
>> -		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
>> +		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC);
>>  		if (err == -ENOSPC)
>>  			err = -EEXIST;
>>  	} else {
>> -		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
>> +		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC);
>>  	}
>> +	spin_unlock(&loop_idr_spinlock);
>> +	idr_preload_end();
> 
> Can you explain why the mutex is switched to a spinlock?  I could not
> find any caller that can't block, so there doesn't seem to be a real
> need for a spinlock, while a spinlock requires extra work and GFP_ATOMIC
> allocations here.  Dropping the _killable probably makes some sense,
> but seems like a separate cleanup.

In order to annotate that extra operations that might sleep should not be
added inside this section. Use of sleepable locks tends to get extra
operations (e.g. wait for a different mutex / completion) and makes it unclear
what the lock is protecting. I can imagine a future that someone adds an
unwanted dependency inside this section if we use mutex here.

Technically, we can add preempt_diable() after mutex_lock() and
preempt_enable() before mutex_unlock() in order to annotate that
extra operations that might sleep should be avoided.
But idr_alloc(GFP_ATOMIC)/idr_find()/idr_for_each_entry() etc. will be
fast enough.

> 
>> +	if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) {
>> +		spin_unlock(&loop_idr_spinlock);
>> +		return -ENODEV;
>>  	}
>> +	spin_unlock(&loop_idr_spinlock);
> 
>> +	refcount_dec(&lo->idr_visible);
>> +	/*
>> +	 * Try to wait for concurrent callers (they should complete shortly due to
>> +	 * lo->lo_state == Lo_deleting) operating on this loop device, in order to
>> +	 * help that subsequent loop_add() will not to fail with -EEXIST.
>> +	 * Note that this is best effort.
>> +	 */
>> +	for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++)
>> +		schedule_timeout_killable(1);
>> +	ret = 0;
> 
> This dance looks pretty strange to me.  I think just making idr_visible
> an atomic_t and using atomic_cmpxchg with just 0 and 1 as valid versions
> will make this much simpler, as it avoids the need to deal with a > 1
> count, and it also serializes multiple removal calls.

Yes if we ignore forced module unload (which needs to synchronously check
lo->lo_encryption) of cryptoloop module.

If we don't ignore forced module unload, we could update my patch to keep only
mutex_destroy() and kfree() deferred by a refcount, for only lo->lo_state,
lo->lo_refcnt and lo->lo_encryption would be accessed under lo->lo_mutex
serialization. There is no need to defer "del_gendisk() + idr_remove()"
sequence for concurrent callers.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
  2021-08-28  1:10   ` Tetsuo Handa
@ 2021-08-28  2:22     ` Tetsuo Handa
  2021-08-28  7:18     ` Christoph Hellwig
  1 sibling, 0 replies; 14+ messages in thread
From: Tetsuo Handa @ 2021-08-28  2:22 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

On 2021/08/28 10:10, Tetsuo Handa wrote:
> If we don't ignore forced module unload, we could update my patch to keep only
> mutex_destroy() and kfree() deferred by a refcount, for only lo->lo_state,
> lo->lo_refcnt and lo->lo_encryption would be accessed under lo->lo_mutex
> serialization. There is no need to defer "del_gendisk() + idr_remove()"
> sequence for concurrent callers.
> 

OK, here is a delta patch to make it no longer best effort.
We can consider removal of cryptoloop module after this patch,
starting from a printk() for deprecated message.

 drivers/block/loop.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2113,7 +2113,11 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static void loop_remove(struct loop_device *lo);
+static void loop_destroy(struct loop_device *lo)
+{
+	mutex_destroy(&lo->lo_mutex);
+	kfree(lo);
+}
 
 int loop_unregister_transfer(int number)
 {
@@ -2137,7 +2141,7 @@ int loop_unregister_transfer(int number)
 			loop_release_xfer(lo);
 		mutex_unlock(&lo->lo_mutex);
 		if (refcount_dec_and_test(&lo->idr_visible))
-			loop_remove(lo);
+			loop_destroy(lo);
 		spin_lock(&loop_idr_spinlock);
 	}
 	spin_unlock(&loop_idr_spinlock);
@@ -2426,9 +2430,6 @@ static void loop_remove(struct loop_device *lo)
 	spin_lock(&loop_idr_spinlock);
 	idr_remove(&loop_index_idr, lo->lo_number);
 	spin_unlock(&loop_idr_spinlock);
-	/* There is no route which can find this loop device. */
-	mutex_destroy(&lo->lo_mutex);
-	kfree(lo);
 }
 
 static void loop_probe(dev_t dev)
@@ -2452,7 +2453,7 @@ static int loop_control_remove(int idx)
 
 	/*
 	 * Identify the loop device to remove. Skip the device if it is owned by
-	 * loop_remove()/loop_add() where it is not safe to access lo_mutex.
+	 * loop_add() where it is not safe to access lo_mutex.
 	 */
 	spin_lock(&loop_idr_spinlock);
 	lo = idr_find(&loop_index_idr, idx);
@@ -2479,19 +2480,11 @@ static int loop_control_remove(int idx)
 	mutex_unlock(&lo->lo_mutex);
 	/* Hide this loop device. */
 	refcount_dec(&lo->idr_visible);
-	/*
-	 * Try to wait for concurrent callers (they should complete shortly due to
-	 * lo->lo_state == Lo_deleting) operating on this loop device, in order to
-	 * help that subsequent loop_add() will not to fail with -EEXIST.
-	 * Note that this is best effort.
-	 */
-	for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++)
-		schedule_timeout_killable(1);
-	ret = 0;
+	/* Remove this loop device, but wait concurrent callers before destroy. */
+	loop_remove(lo);
 out:
-	/* Remove this loop device. */
 	if (refcount_dec_and_test(&lo->idr_visible))
-		loop_remove(lo);
+		loop_destroy(lo);
 	return ret;
 }
 
@@ -2623,8 +2616,10 @@ static void __exit loop_exit(void)
 	 * There is no need to use loop_idr_spinlock here, for nobody else can
 	 * access loop_index_idr when this module is unloading.
 	 */
-	idr_for_each_entry(&loop_index_idr, lo, id)
+	idr_for_each_entry(&loop_index_idr, lo, id) {
 		loop_remove(lo);
+		loop_destroy(lo);
+	}
 
 	idr_destroy(&loop_index_idr);
 }
-- 
2.25.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
  2021-08-28  1:10   ` Tetsuo Handa
  2021-08-28  2:22     ` Tetsuo Handa
@ 2021-08-28  7:18     ` Christoph Hellwig
  2021-08-28 13:50       ` Tetsuo Handa
  1 sibling, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-08-28  7:18 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Hillf Danton, Pavel Tatashin,
	Tyler Hicks, linux-block

On Sat, Aug 28, 2021 at 10:10:36AM +0900, Tetsuo Handa wrote:
> That is, it seems that unregister_transfer_cb() is there in case forced module
> unload of cryptoloop module was requested. And in that case, there is no point
> with crashing the kernel via panic_on_warn == 1 && WARN_ON_ONCE(). Simple printk()
> will be sufficient.

If we have that case for forced module unload a WARN_ON is the right thing.
That being said we can simply do the cmpxchg based protection for that
case as well if you want to keep it.  That will lead to a spurious
loop remove failure with -EBUSY when a concurrent force module removal
for cryptoloop is happening, but if you do something like that you get
to keep the pieces.

> In order to annotate that extra operations that might sleep should not be
> added inside this section. Use of sleepable locks tends to get extra
> operations (e.g. wait for a different mutex / completion) and makes it unclear
> what the lock is protecting. I can imagine a future that someone adds an
> unwanted dependency inside this section if we use mutex here.
> 
> Technically, we can add preempt_diable() after mutex_lock() and
> preempt_enable() before mutex_unlock() in order to annotate that
> extra operations that might sleep should be avoided.
> But idr_alloc(GFP_ATOMIC)/idr_find()/idr_for_each_entry() etc. will be
> fast enough.

Well, split that into a cleanup patch if you think it is worth the
effort, with a good changelog.  Not really part of the bug fix.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock
  2021-08-28  7:18     ` Christoph Hellwig
@ 2021-08-28 13:50       ` Tetsuo Handa
  2021-08-29  1:22         ` [PATCH v2] loop: reduce the loop_ctl_mutex scope Tetsuo Handa
  0 siblings, 1 reply; 14+ messages in thread
From: Tetsuo Handa @ 2021-08-28 13:50 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

On 2021/08/28 16:18, Christoph Hellwig wrote:
> On Sat, Aug 28, 2021 at 10:10:36AM +0900, Tetsuo Handa wrote:
>> That is, it seems that unregister_transfer_cb() is there in case forced module
>> unload of cryptoloop module was requested. And in that case, there is no point
>> with crashing the kernel via panic_on_warn == 1 && WARN_ON_ONCE(). Simple printk()
>> will be sufficient.
> 
> If we have that case for forced module unload a WARN_ON is the right thing.
> That being said we can simply do the cmpxchg based protection for that
> case as well if you want to keep it.  That will lead to a spurious
> loop remove failure with -EBUSY when a concurrent force module removal
> for cryptoloop is happening, but if you do something like that you get
> to keep the pieces.

Oh, given that commit 222013f9ac30b9ce ("cryptoloop: add a deprecation warning") was
already merged into linux.git , there is no point with worrying about forced module
unloading. Then, I would warn like

+#ifdef CONFIG_MODULE_UNLOAD
+       if (module_refcount(xfer->owner) != -1)
+               pr_err("Unregistering a transfer function in use. Expect kernel crashes.\n");
+#endif

than

+	idr_for_each_entry(&loop_index_idr, lo, id)
+		WARN_ON_ONCE(lo->lo_encryption == xfer);

in your patch.
(Actually, nobody calls loop_unregister_transfer() if CONFIG_MODULE_UNLOAD=n ...)

Then, your atomic_cmpxchg(&lo->idr_visible, 1, 0) == 0 approach will be OK
(I would use

+	atomic_set(&lo->idr_visible, 1);

than

+	atomic_inc(&lo->idr_visible);

because it is "Show this loop device again.").

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2] loop: reduce the loop_ctl_mutex scope
  2021-08-28 13:50       ` Tetsuo Handa
@ 2021-08-29  1:22         ` Tetsuo Handa
  2021-08-29 13:47           ` [PATCH v3] " Tetsuo Handa
  0 siblings, 1 reply; 14+ messages in thread
From: Tetsuo Handa @ 2021-08-29  1:22 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe
  Cc: Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
commit a160c6159d4a0cf8 ("block: add an optional probe callback to
major_names") is calling the module's probe function with major_names_lock
held.

Fortunately, since commit 990e78116d38059c ("block: loop: fix deadlock
between open and remove") stopped holding loop_ctl_mutex in lo_open(),
current role of loop_ctl_mutex is to serialize access to loop_index_idr
and loop_add()/loop_remove(); in other words, management of id for IDR.
To avoid holding loop_ctl_mutex during whole add/remove operation, use
a bool flag to indicate whether the loop device is ready for use.

loop_unregister_transfer() which is called from cleanup_cryptoloop()
currently has possibility of use-after-free access due to lack of
serialization between kfree() from loop_remove() from loop_control_remove()
and mutex_lock() from unregister_transfer_cb(). But since lo->lo_encryption
should be already NULL when this function is called due to module unload,
and commit 222013f9ac30b9ce ("cryptoloop: add a deprecation warning")
indicates that we will remove this function shortly, this patch updates
this function to emit warning instead of checking lo->lo_encryption.

Holding loop_ctl_mutex in loop_exit() is pointless, for all users must
close /dev/loop-control and /dev/loop$num (in order to drop module's
refcount to 0) before loop_exit() starts, and nobody can open
/dev/loop-control or /dev/loop$num afterwards.

Link: https://syzkaller.appspot.com/bug?id=7bb10e8b62f83e4d445cdf4c13d69e407e629558 [1]
Reported-by: syzbot <syzbot+f61766d5763f9e7a118f@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+f61766d5763f9e7a118f@syzkaller.appspotmail.com>
---
Changes in v2:
  Don't replace loop_ctl_mutex mutex with loop_idr_spinlock spinlock.
  Don't traverse on loop_index_idr at loop_unregister_transfer().
  Don't use refcount for handling duplicated removal requests.

 drivers/block/loop.c | 77 ++++++++++++++++++++++++++++----------------
 drivers/block/loop.h |  1 +
 2 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf..b2c9d355e258 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2113,18 +2113,6 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
-
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
@@ -2132,9 +2120,20 @@ int loop_unregister_transfer(int number)
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
+	/*
+	 * This function is called from only cleanup_cryptoloop().
+	 * Given that each loop device that has a transfer enabled holds a
+	 * reference to the module implementing it we should never get here
+	 * with a transfer that is set (unless forced module unloading is
+	 * requested). Thus, check module's refcount and warn if this is
+	 * not a clean unloading.
+	 */
+#ifdef CONFIG_MODULE_UNLOAD
+	if (xfer->owner && module_refcount(xfer->owner) != -1)
+		pr_err("Unregistering a transfer function in use. Expect kernel crashes.\n");
+#endif
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
 	return 0;
 }
 
@@ -2325,8 +2324,9 @@ static int loop_add(int i)
 	} else {
 		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
+	mutex_unlock(&loop_ctl_mutex);
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2392,15 +2392,17 @@ static int loop_add(int i)
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
-	mutex_unlock(&loop_ctl_mutex);
+	/* Show this loop device. */
+	lo->idr_visible = true;
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_free_dev:
 	kfree(lo);
@@ -2410,9 +2412,14 @@ static int loop_add(int i)
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	mutex_lock(&loop_ctl_mutex);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	mutex_unlock(&loop_ctl_mutex);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2440,29 +2447,39 @@ static int loop_control_remove(int idx)
 	if (ret)
 		return ret;
 
+	/*
+	 * Hide this loop device for serialization, even if we bail out of
+	 * the removal. The only other place checking the visibility is the
+	 * LOOP_CTL_GET_FREE ioctl, which checks the same flags as we do below,
+	 * and which is fundamentally racy anyway.
+	 */
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
-		ret = -ENODEV;
-		goto out_unlock_ctrl;
+	if (!lo || !cmpxchg(&lo->idr_visible, true, false)) {
+		mutex_unlock(&loop_ctl_mutex);
+		return -ENODEV;
 	}
+	mutex_unlock(&loop_ctl_mutex);
 
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
 		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
 
-	idr_remove(&loop_index_idr, lo->lo_number);
 	loop_remove(lo);
-out_unlock_ctrl:
-	mutex_unlock(&loop_ctl_mutex);
-	return ret;
+	return 0;
+
+mark_visible:
+	/* Show this loop device again. */
+	lo->idr_visible = true;
+	return -EBUSY;
 }
 
 static int loop_control_get_free(int idx)
@@ -2474,7 +2491,7 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		if (lo->idr_visible && lo->lo_state == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);
@@ -2590,10 +2607,14 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_ctl_mutex here, for nobody else can
+	 * access loop_index_idr when this module is unloading (unless forced
+	 * module unloading is requested). If this is not a clean unloading,
+	 * we have no means to avoid kernel crash.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..04c88dd6eabd 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	bool			idr_visible;
 };
 
 struct loop_cmd {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3] loop: reduce the loop_ctl_mutex scope
  2021-08-29  1:22         ` [PATCH v2] loop: reduce the loop_ctl_mutex scope Tetsuo Handa
@ 2021-08-29 13:47           ` Tetsuo Handa
  2021-08-30  7:13             ` Christoph Hellwig
                               ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Tetsuo Handa @ 2021-08-29 13:47 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe
  Cc: Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
commit a160c6159d4a0cf8 ("block: add an optional probe callback to
major_names") is calling the module's probe function with major_names_lock
held.

Fortunately, since commit 990e78116d38059c ("block: loop: fix deadlock
between open and remove") stopped holding loop_ctl_mutex in lo_open(),
current role of loop_ctl_mutex is to serialize access to loop_index_idr
and loop_add()/loop_remove(); in other words, management of id for IDR.
To avoid holding loop_ctl_mutex during whole add/remove operation, use
a bool flag to indicate whether the loop device is ready for use.

loop_unregister_transfer() which is called from cleanup_cryptoloop()
currently has possibility of use-after-free access due to lack of
serialization between kfree() from loop_remove() from loop_control_remove()
and mutex_lock() from unregister_transfer_cb(). But since lo->lo_encryption
should be already NULL when this function is called due to module unload,
and commit 222013f9ac30b9ce ("cryptoloop: add a deprecation warning")
indicates that we will remove this function shortly, this patch updates
this function to emit warning instead of checking lo->lo_encryption.

Holding loop_ctl_mutex in loop_exit() is pointless, for all users must
close /dev/loop-control and /dev/loop$num (in order to drop module's
refcount to 0) before loop_exit() starts, and nobody can open
/dev/loop-control or /dev/loop$num afterwards.

Link: https://syzkaller.appspot.com/bug?id=7bb10e8b62f83e4d445cdf4c13d69e407e629558 [1]
Reported-by: syzbot <syzbot+f61766d5763f9e7a118f@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v3:
  Don't use cmpxchg(), for kernel test robot <lkp@intel.com> reported
  that some archtectures do not support cmpxchg() on bool.
  Add data_race() annotation to loop_control_get_free().

Changes in v2:
  Don't replace loop_ctl_mutex mutex with loop_idr_spinlock spinlock.
  Don't traverse on loop_index_idr at loop_unregister_transfer().
  Don't use refcount for handling duplicated removal requests.
---
 drivers/block/loop.c | 75 +++++++++++++++++++++++++++++---------------
 drivers/block/loop.h |  1 +
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f0cdff0c5fbf..478ff6650dab 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2113,18 +2113,6 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
-
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
@@ -2132,9 +2120,20 @@ int loop_unregister_transfer(int number)
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
+	/*
+	 * This function is called from only cleanup_cryptoloop().
+	 * Given that each loop device that has a transfer enabled holds a
+	 * reference to the module implementing it we should never get here
+	 * with a transfer that is set (unless forced module unloading is
+	 * requested). Thus, check module's refcount and warn if this is
+	 * not a clean unloading.
+	 */
+#ifdef CONFIG_MODULE_UNLOAD
+	if (xfer->owner && module_refcount(xfer->owner) != -1)
+		pr_err("Danger! Unregistering an in use transfer function.\n");
+#endif
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
 	return 0;
 }
 
@@ -2325,8 +2324,9 @@ static int loop_add(int i)
 	} else {
 		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
+	mutex_unlock(&loop_ctl_mutex);
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2392,15 +2392,19 @@ static int loop_add(int i)
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
+	/* Show this loop device. */
+	mutex_lock(&loop_ctl_mutex);
+	lo->idr_visible = true;
 	mutex_unlock(&loop_ctl_mutex);
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_free_dev:
 	kfree(lo);
@@ -2410,9 +2414,14 @@ static int loop_add(int i)
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	mutex_lock(&loop_ctl_mutex);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	mutex_unlock(&loop_ctl_mutex);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2436,31 +2445,40 @@ static int loop_control_remove(int idx)
 		return -EINVAL;
 	}
 		
+	/* Hide this loop device for serialization. */
 	ret = mutex_lock_killable(&loop_ctl_mutex);
 	if (ret)
 		return ret;
-
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
+	if (!lo || !lo->idr_visible)
 		ret = -ENODEV;
-		goto out_unlock_ctrl;
-	}
+	else
+		lo->idr_visible = false;
+	mutex_unlock(&loop_ctl_mutex);
+	if (ret)
+		return ret;
 
+	/* Check whether this loop can be removed. */
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
 		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
 
-	idr_remove(&loop_index_idr, lo->lo_number);
 	loop_remove(lo);
-out_unlock_ctrl:
+	return 0;
+
+mark_visible:
+	/* Show this loop device again. */
+	mutex_lock(&loop_ctl_mutex);
+	lo->idr_visible = true;
 	mutex_unlock(&loop_ctl_mutex);
 	return ret;
 }
@@ -2474,7 +2492,8 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		/* Hitting a race results in creating a new loop device which is harmless. */
+		if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);
@@ -2590,10 +2609,14 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_ctl_mutex here, for nobody else can
+	 * access loop_index_idr when this module is unloading (unless forced
+	 * module unloading is requested). If this is not a clean unloading,
+	 * we have no means to avoid kernel crash.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..04c88dd6eabd 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	bool			idr_visible;
 };
 
 struct loop_cmd {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] loop: reduce the loop_ctl_mutex scope
  2021-08-29 13:47           ` [PATCH v3] " Tetsuo Handa
@ 2021-08-30  7:13             ` Christoph Hellwig
  2021-09-01  5:36               ` Christoph Hellwig
  2021-09-01  6:10             ` Christoph Hellwig
  2021-09-04  4:16             ` [PATCH v3] " Jens Axboe
  2 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-08-30  7:13 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Hillf Danton, Pavel Tatashin,
	Tyler Hicks, linux-block

Hi Tetsuo,

this generally looks fine to me.  The only issue is that I remember is
that cmpxchg did historically not work on bool on all architectures.
I'm not sure if this has been fixed since.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] loop: reduce the loop_ctl_mutex scope
  2021-08-30  7:13             ` Christoph Hellwig
@ 2021-09-01  5:36               ` Christoph Hellwig
  2021-09-01  5:47                 ` Tetsuo Handa
  0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-09-01  5:36 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Hillf Danton, Pavel Tatashin,
	Tyler Hicks, linux-block

On Mon, Aug 30, 2021 at 09:13:50AM +0200, Christoph Hellwig wrote:
> Hi Tetsuo,
> 
> this generally looks fine to me.  The only issue is that I remember is
> that cmpxchg did historically not work on bool on all architectures.
> I'm not sure if this has been fixed since.

Looks like that still is an issue based on the buildbot reports on
linux-kernel.  I'd suggest to resend with idr_visible as an int.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] loop: reduce the loop_ctl_mutex scope
  2021-09-01  5:36               ` Christoph Hellwig
@ 2021-09-01  5:47                 ` Tetsuo Handa
  0 siblings, 0 replies; 14+ messages in thread
From: Tetsuo Handa @ 2021-09-01  5:47 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

On 2021/09/01 14:36, Christoph Hellwig wrote:
> On Mon, Aug 30, 2021 at 09:13:50AM +0200, Christoph Hellwig wrote:
>> Hi Tetsuo,
>>
>> this generally looks fine to me.  The only issue is that I remember is
>> that cmpxchg did historically not work on bool on all architectures.
>> I'm not sure if this has been fixed since.
> 
> Looks like that still is an issue based on the buildbot reports on
> linux-kernel.  I'd suggest to resend with idr_visible as an int.
> 

I sent "[PATCH v3] loop: reduce the loop_ctl_mutex scope" as not using cmpxchg().
The buildbot is reporting errors on the v2 patch.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] loop: reduce the loop_ctl_mutex scope
  2021-08-29 13:47           ` [PATCH v3] " Tetsuo Handa
  2021-08-30  7:13             ` Christoph Hellwig
@ 2021-09-01  6:10             ` Christoph Hellwig
  2021-09-02  0:07               ` [PATCH v3 (repost)] " Tetsuo Handa
  2021-09-04  4:16             ` [PATCH v3] " Jens Axboe
  2 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-09-01  6:10 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Hillf Danton, Pavel Tatashin,
	Tyler Hicks, linux-block

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v3 (repost)] loop: reduce the loop_ctl_mutex scope
  2021-09-01  6:10             ` Christoph Hellwig
@ 2021-09-02  0:07               ` Tetsuo Handa
  0 siblings, 0 replies; 14+ messages in thread
From: Tetsuo Handa @ 2021-09-02  0:07 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
commit a160c6159d4a0cf8 ("block: add an optional probe callback to
major_names") is calling the module's probe function with major_names_lock
held.

Fortunately, since commit 990e78116d38059c ("block: loop: fix deadlock
between open and remove") stopped holding loop_ctl_mutex in lo_open(),
current role of loop_ctl_mutex is to serialize access to loop_index_idr
and loop_add()/loop_remove(); in other words, management of id for IDR.
To avoid holding loop_ctl_mutex during whole add/remove operation, use
a bool flag to indicate whether the loop device is ready for use.

loop_unregister_transfer() which is called from cleanup_cryptoloop()
currently has possibility of use-after-free problem due to lack of
serialization between kfree() from loop_remove() from loop_control_remove()
and mutex_lock() from unregister_transfer_cb(). But since lo->lo_encryption
should be already NULL when this function is called due to module unload,
and commit 222013f9ac30b9ce ("cryptoloop: add a deprecation warning")
indicates that we will remove this function shortly, this patch updates
this function to emit warning instead of checking lo->lo_encryption.

Holding loop_ctl_mutex in loop_exit() is pointless, for all users must
close /dev/loop-control and /dev/loop$num (in order to drop module's
refcount to 0) before loop_exit() starts, and nobody can open
/dev/loop-control or /dev/loop$num afterwards.

Link: https://syzkaller.appspot.com/bug?id=7bb10e8b62f83e4d445cdf4c13d69e407e629558 [1]
Reported-by: syzbot <syzbot+f61766d5763f9e7a118f@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
Changes in v3:
  Don't use cmpxchg(), for kernel test robot <lkp@intel.com> reported
  that some archtectures do not support cmpxchg() on bool.
  Add data_race() annotation to loop_control_get_free().

Changes in v2:
  Don't replace loop_ctl_mutex mutex with loop_idr_spinlock spinlock.
  Don't traverse on loop_index_idr at loop_unregister_transfer().
  Don't use refcount for handling duplicated removal requests.

 drivers/block/loop.c | 75 +++++++++++++++++++++++++++++---------------
 drivers/block/loop.h |  1 +
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index fa1c298a8cfb..7bf4686af774 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2111,18 +2111,6 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
-
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
@@ -2130,9 +2118,20 @@ int loop_unregister_transfer(int number)
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
+	/*
+	 * This function is called from only cleanup_cryptoloop().
+	 * Given that each loop device that has a transfer enabled holds a
+	 * reference to the module implementing it we should never get here
+	 * with a transfer that is set (unless forced module unloading is
+	 * requested). Thus, check module's refcount and warn if this is
+	 * not a clean unloading.
+	 */
+#ifdef CONFIG_MODULE_UNLOAD
+	if (xfer->owner && module_refcount(xfer->owner) != -1)
+		pr_err("Danger! Unregistering an in use transfer function.\n");
+#endif
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
 	return 0;
 }
 
@@ -2323,8 +2322,9 @@ static int loop_add(int i)
 	} else {
 		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
+	mutex_unlock(&loop_ctl_mutex);
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2393,15 +2393,19 @@ static int loop_add(int i)
 	disk->events		= DISK_EVENT_MEDIA_CHANGE;
 	disk->event_flags	= DISK_EVENT_FLAG_UEVENT;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
+	/* Show this loop device. */
+	mutex_lock(&loop_ctl_mutex);
+	lo->idr_visible = true;
 	mutex_unlock(&loop_ctl_mutex);
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_free_dev:
 	kfree(lo);
@@ -2411,9 +2415,14 @@ static int loop_add(int i)
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	mutex_lock(&loop_ctl_mutex);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	mutex_unlock(&loop_ctl_mutex);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2437,31 +2446,40 @@ static int loop_control_remove(int idx)
 		return -EINVAL;
 	}
 		
+	/* Hide this loop device for serialization. */
 	ret = mutex_lock_killable(&loop_ctl_mutex);
 	if (ret)
 		return ret;
-
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
+	if (!lo || !lo->idr_visible)
 		ret = -ENODEV;
-		goto out_unlock_ctrl;
-	}
+	else
+		lo->idr_visible = false;
+	mutex_unlock(&loop_ctl_mutex);
+	if (ret)
+		return ret;
 
+	/* Check whether this loop device can be removed. */
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
 		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
 
-	idr_remove(&loop_index_idr, lo->lo_number);
 	loop_remove(lo);
-out_unlock_ctrl:
+	return 0;
+
+mark_visible:
+	/* Show this loop device again. */
+	mutex_lock(&loop_ctl_mutex);
+	lo->idr_visible = true;
 	mutex_unlock(&loop_ctl_mutex);
 	return ret;
 }
@@ -2475,7 +2493,8 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		/* Hitting a race results in creating a new loop device which is harmless. */
+		if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);
@@ -2591,10 +2610,14 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_ctl_mutex here, for nobody else can
+	 * access loop_index_idr when this module is unloading (unless forced
+	 * module unloading is requested). If this is not a clean unloading,
+	 * we have no means to avoid kernel crash.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..04c88dd6eabd 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	bool			idr_visible;
 };
 
 struct loop_cmd {
-- 
2.18.4



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] loop: reduce the loop_ctl_mutex scope
  2021-08-29 13:47           ` [PATCH v3] " Tetsuo Handa
  2021-08-30  7:13             ` Christoph Hellwig
  2021-09-01  6:10             ` Christoph Hellwig
@ 2021-09-04  4:16             ` Jens Axboe
  2 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2021-09-04  4:16 UTC (permalink / raw)
  To: Tetsuo Handa, Christoph Hellwig
  Cc: Hillf Danton, Pavel Tatashin, Tyler Hicks, linux-block

On 8/29/21 7:47 AM, Tetsuo Handa wrote:
> syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
> commit a160c6159d4a0cf8 ("block: add an optional probe callback to
> major_names") is calling the module's probe function with major_names_lock
> held.
> 
> Fortunately, since commit 990e78116d38059c ("block: loop: fix deadlock
> between open and remove") stopped holding loop_ctl_mutex in lo_open(),
> current role of loop_ctl_mutex is to serialize access to loop_index_idr
> and loop_add()/loop_remove(); in other words, management of id for IDR.
> To avoid holding loop_ctl_mutex during whole add/remove operation, use
> a bool flag to indicate whether the loop device is ready for use.
> 
> loop_unregister_transfer() which is called from cleanup_cryptoloop()
> currently has possibility of use-after-free access due to lack of
> serialization between kfree() from loop_remove() from loop_control_remove()
> and mutex_lock() from unregister_transfer_cb(). But since lo->lo_encryption
> should be already NULL when this function is called due to module unload,
> and commit 222013f9ac30b9ce ("cryptoloop: add a deprecation warning")
> indicates that we will remove this function shortly, this patch updates
> this function to emit warning instead of checking lo->lo_encryption.
> 
> Holding loop_ctl_mutex in loop_exit() is pointless, for all users must
> close /dev/loop-control and /dev/loop$num (in order to drop module's
> refcount to 0) before loop_exit() starts, and nobody can open
> /dev/loop-control or /dev/loop$num afterwards.

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-09-04  4:16 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-26 16:03 [PATCH] loop: replace loop_ctl_mutex with loop_idr_spinlock Tetsuo Handa
2021-08-27 18:43 ` Christoph Hellwig
2021-08-28  1:10   ` Tetsuo Handa
2021-08-28  2:22     ` Tetsuo Handa
2021-08-28  7:18     ` Christoph Hellwig
2021-08-28 13:50       ` Tetsuo Handa
2021-08-29  1:22         ` [PATCH v2] loop: reduce the loop_ctl_mutex scope Tetsuo Handa
2021-08-29 13:47           ` [PATCH v3] " Tetsuo Handa
2021-08-30  7:13             ` Christoph Hellwig
2021-09-01  5:36               ` Christoph Hellwig
2021-09-01  5:47                 ` Tetsuo Handa
2021-09-01  6:10             ` Christoph Hellwig
2021-09-02  0:07               ` [PATCH v3 (repost)] " Tetsuo Handa
2021-09-04  4:16             ` [PATCH v3] " Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).