All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal
@ 2021-07-02 15:30 Tetsuo Handa
  2021-07-02 17:28 ` Tetsuo Handa
  2021-07-04  5:42 ` [PATCH] " Tetsuo Handa
  0 siblings, 2 replies; 11+ messages in thread
From: Tetsuo Handa @ 2021-07-02 15:30 UTC (permalink / raw)
  To: Petr Vorel, Pavel Tatashin, Tyler Hicks, Christoph Hellwig
  Cc: Jens Axboe, linux-block, Tetsuo Handa

Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
device lock") re-opened a race window for NULL pointer dereference at
loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
global lock for ioctl() operation.") closed.

Although we need to guarantee that other loop devices will not change
during traversal, we can't take remote "struct loop_device"->lo_mutex
inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
introduce a global lock dedicated for loop_validate_file() which is
conditionally taken before local "struct loop_device"->lo_mutex is taken.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock")
---
 drivers/block/loop.c | 138 ++++++++++++++++++++++++++++---------------
 1 file changed, 89 insertions(+), 49 deletions(-)

This is a submission as a patch based on comments from Christoph Hellwig
at https://lkml.kernel.org/r/20210623144130.GA738@lst.de . I don't know
this patch can close all race windows.

For example, loop_change_fd() says

  This can only work if the loop device is used read-only, and if the
  new backing store is the same size and type as the old backing store.

and has

        /* size of the new backing store needs to be the same */
        if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
                goto out_err;

check. Then, do we also need to apply this global lock for
lo_simple_ioctl(LOOP_SET_CAPACITY) path because it changes the size
by loop_set_size(lo, get_loop_size(lo, lo->lo_backing_file)) ?
How serious if this check is racy?

Any other locations to apply this global lock?

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9f5a93688164..040f28b4bd5d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,35 @@
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+static inline void loop_global_lock(struct loop_device *lo)
+{
+	mutex_lock(&loop_validate_mutex);
+	mutex_lock(&lo->lo_mutex);
+}
+
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+	int err;
+
+	if (global) {
+		err = mutex_lock_killable(&loop_validate_mutex);
+		if (err)
+			return err;
+	}
+	err = mutex_lock_killable(&lo->lo_mutex);
+	if (err && global)
+		mutex_unlock(&loop_validate_mutex);
+	return err;
+}
+
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+	mutex_unlock(&lo->lo_mutex);
+	if (global)
+		mutex_unlock(&loop_validate_mutex);
+}
 
 static int max_part;
 static int part_shift;
@@ -672,13 +701,13 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 	while (is_loop_device(f)) {
 		struct loop_device *l;
 
+		lockdep_assert_held(&loop_validate_mutex);
 		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
 			return -EBADF;
 
 		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-		if (l->lo_state != Lo_bound) {
+		if (l->lo_state != Lo_bound)
 			return -EINVAL;
-		}
 		f = l->lo_backing_file;
 	}
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +726,20 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
 {
-	struct file	*file = NULL, *old_file;
+	struct file *file = fget(arg);
+	struct file *old_file;
 	int		error;
 	bool		partscan;
+	bool is_loop;
 
-	error = mutex_lock_killable(&lo->lo_mutex);
-	if (error)
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
+	if (error) {
+		fput(file);
 		return error;
+	}
 	error = -ENXIO;
 	if (lo->lo_state != Lo_bound)
 		goto out_err;
@@ -713,11 +749,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto out_err;
 
-	error = -EBADF;
-	file = fget(arg);
-	if (!file)
-		goto out_err;
-
 	error = loop_validate_file(file, bdev);
 	if (error)
 		goto out_err;
@@ -740,7 +771,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	loop_update_dio(lo);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
 	 * the file ref can take open_mutex which creates circular locking
@@ -752,9 +783,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	return 0;
 
 out_err:
-	mutex_unlock(&lo->lo_mutex);
-	if (file)
-		fput(file);
+	loop_global_unlock(lo, is_loop);
+	fput(file);
 	return error;
 }
 
@@ -1143,6 +1173,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	loff_t		size;
 	bool		partscan;
 	unsigned short  bsize;
+	bool is_loop;
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
@@ -1162,7 +1193,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			goto out_putf;
 	}
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
 		goto out_bdev;
 
@@ -1253,7 +1285,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 	 */
 	bdgrab(bdev);
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	if (partscan)
 		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
@@ -1261,7 +1293,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return 0;
 
 out_unlock:
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 out_bdev:
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
@@ -1400,15 +1432,12 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 
 static int loop_clr_fd(struct loop_device *lo)
 {
-	int err;
+	int err = loop_global_lock_killable(lo, true);
 
-	err = mutex_lock_killable(&lo->lo_mutex);
 	if (err)
 		return err;
-	if (lo->lo_state != Lo_bound) {
-		mutex_unlock(&lo->lo_mutex);
-		return -ENXIO;
-	}
+	if (lo->lo_state != Lo_bound)
+		err = -ENXIO;
 	/*
 	 * If we've explicitly asked to tear down the loop device,
 	 * and it has an elevated reference count, set it for auto-teardown when
@@ -1419,15 +1448,18 @@ static int loop_clr_fd(struct loop_device *lo)
 	 * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
 	 * command to fail with EBUSY.
 	 */
-	if (atomic_read(&lo->lo_refcnt) > 1) {
+	else if (atomic_read(&lo->lo_refcnt) > 1)
 		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
-		mutex_unlock(&lo->lo_mutex);
-		return 0;
-	}
-	lo->lo_state = Lo_rundown;
-	mutex_unlock(&lo->lo_mutex);
-
-	return __loop_clr_fd(lo, false);
+	else
+		lo->lo_state = Lo_rundown;
+	loop_global_unlock(lo, true);
+	/*
+	 * Since nobody updates lo->lo_state if lo->lo_state == Lo_rundown,
+	 * this check is safe.
+	 */
+	if (lo->lo_state == Lo_rundown)
+		err = __loop_clr_fd(lo, false);
+	return err;
 }
 
 static int
@@ -1988,31 +2020,39 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 {
 	struct loop_device *lo = disk->private_data;
 
-	mutex_lock(&lo->lo_mutex);
-	if (atomic_dec_return(&lo->lo_refcnt))
-		goto out_unlock;
-
+	loop_global_lock(lo);
+	if (atomic_dec_return(&lo->lo_refcnt)) {
+		loop_global_unlock(lo, true);
+		return;
+	}
+	/*
+	 * In autoclear mode, stop the loop thread and remove configuration
+	 * after last close.
+	 */
 	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
-		if (lo->lo_state != Lo_bound)
-			goto out_unlock;
-		lo->lo_state = Lo_rundown;
-		mutex_unlock(&lo->lo_mutex);
+		if (lo->lo_state == Lo_bound)
+			lo->lo_state = Lo_rundown;
+		loop_global_unlock(lo, true);
 		/*
-		 * In autoclear mode, stop the loop thread
-		 * and remove configuration after last close.
+		 * Since nobody updates lo->lo_state
+		 * if lo->lo_state == Lo_rundown, this check is safe.
 		 */
-		__loop_clr_fd(lo, true);
+		if (lo->lo_state == Lo_rundown)
+			__loop_clr_fd(lo, true);
 		return;
-	} else if (lo->lo_state == Lo_bound) {
-		/*
-		 * Otherwise keep thread (if running) and config,
-		 * but flush possible ongoing bios in thread.
-		 */
+	}
+	/*
+	 * Otherwise keep thread (if running) and config, but flush possible
+	 * ongoing bios in thread.
+	 *
+	 * We can release loop_validate_mutex before flushing, for still held
+	 * lo->lo_mutex prevents lo->lo_state from changing.
+	 */
+	mutex_unlock(&loop_validate_mutex);
+	if (lo->lo_state == Lo_bound) {
 		blk_mq_freeze_queue(lo->lo_queue);
 		blk_mq_unfreeze_queue(lo->lo_queue);
 	}
-
-out_unlock:
 	mutex_unlock(&lo->lo_mutex);
 }
 
-- 
2.18.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-02 15:30 [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal Tetsuo Handa
@ 2021-07-02 17:28 ` Tetsuo Handa
  2021-07-06  5:46   ` Christoph Hellwig
  2021-07-04  5:42 ` [PATCH] " Tetsuo Handa
  1 sibling, 1 reply; 11+ messages in thread
From: Tetsuo Handa @ 2021-07-02 17:28 UTC (permalink / raw)
  To: Petr Vorel, Pavel Tatashin, Tyler Hicks, Christoph Hellwig
  Cc: Jens Axboe, linux-block

On 2021/07/03 0:30, Tetsuo Handa wrote:
>  drivers/block/loop.c | 138 ++++++++++++++++++++++++++++---------------
>  1 file changed, 89 insertions(+), 49 deletions(-)
> 
> This is a submission as a patch based on comments from Christoph Hellwig
> at https://lkml.kernel.org/r/20210623144130.GA738@lst.de . I don't know
> this patch can close all race windows.
> 
> For example, loop_change_fd() says
> 
>   This can only work if the loop device is used read-only, and if the
>   new backing store is the same size and type as the old backing store.
> 
> and has
> 
>         /* size of the new backing store needs to be the same */
>         if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
>                 goto out_err;
> 
> check. Then, do we also need to apply this global lock for
> lo_simple_ioctl(LOOP_SET_CAPACITY) path because it changes the size
> by loop_set_size(lo, get_loop_size(lo, lo->lo_backing_file)) ?
> How serious if this check is racy?
> 
> Any other locations to apply this global lock?
> 

Well, apart from questions above, is this smaller patch safe?

 drivers/block/loop.c |   72 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 17 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cc0e8c39a48b..d3bb9c34a3e0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,29 @@
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+	int err;
+
+	if (global) {
+		err = mutex_lock_killable(&loop_validate_mutex);
+		if (err)
+			return err;
+	}
+	err = mutex_lock_killable(&lo->lo_mutex);
+	if (err && global)
+		mutex_unlock(&loop_validate_mutex);
+	return err;
+}
+
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+	mutex_unlock(&lo->lo_mutex);
+	if (global)
+		mutex_unlock(&loop_validate_mutex);
+}
 
 static int max_part;
 static int part_shift;
@@ -672,13 +695,13 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 	while (is_loop_device(f)) {
 		struct loop_device *l;
 
+		lockdep_assert_held(&loop_validate_mutex);
 		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
 			return -EBADF;
 
 		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-		if (l->lo_state != Lo_bound) {
+		if (l->lo_state != Lo_bound)
 			return -EINVAL;
-		}
 		f = l->lo_backing_file;
 	}
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +720,20 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
 {
-	struct file	*file = NULL, *old_file;
+	struct file *file = fget(arg);
+	struct file *old_file;
 	int		error;
 	bool		partscan;
+	bool is_loop;
 
-	error = mutex_lock_killable(&lo->lo_mutex);
-	if (error)
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
+	if (error) {
+		fput(file);
 		return error;
+	}
 	error = -ENXIO;
 	if (lo->lo_state != Lo_bound)
 		goto out_err;
@@ -713,11 +743,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto out_err;
 
-	error = -EBADF;
-	file = fget(arg);
-	if (!file)
-		goto out_err;
-
 	error = loop_validate_file(file, bdev);
 	if (error)
 		goto out_err;
@@ -740,7 +765,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	loop_update_dio(lo);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
 	 * the file ref can take open_mutex which creates circular locking
@@ -752,9 +777,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	return 0;
 
 out_err:
-	mutex_unlock(&lo->lo_mutex);
-	if (file)
-		fput(file);
+	loop_global_unlock(lo, is_loop);
+	fput(file);
 	return error;
 }
 
@@ -1143,6 +1167,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	loff_t		size;
 	bool		partscan;
 	unsigned short  bsize;
+	bool is_loop;
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
@@ -1162,7 +1187,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			goto out_putf;
 	}
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
 		goto out_bdev;
 
@@ -1253,7 +1279,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 	 */
 	bdgrab(bdev);
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	if (partscan)
 		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
@@ -1261,7 +1287,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return 0;
 
 out_unlock:
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 out_bdev:
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
@@ -1283,6 +1309,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	int lo_number;
 	struct loop_worker *pos, *worker;
 
+	/*
+	 * Flush loop_configure() and loop_change_fd(). It is acceptable for
+	 * loop_validate_file() to succeed, for actual clear operation has not
+	 * started yet (i.e. effectively lo->lo_state == Lo_bound state).
+	 */
+	mutex_lock(&loop_validate_mutex);
+	mutex_unlock(&loop_validate_mutex);
+	/*
+	 * loop_validate_file() now fails because lo->lo_state != Lo_bound
+	 * became visible.
+	 */
+
 	mutex_lock(&lo->lo_mutex);
 	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
 		err = -ENXIO;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-02 15:30 [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal Tetsuo Handa
  2021-07-02 17:28 ` Tetsuo Handa
@ 2021-07-04  5:42 ` Tetsuo Handa
  2021-07-06  5:43   ` Christoph Hellwig
  2021-07-06 14:35   ` Arjan van de Ven
  1 sibling, 2 replies; 11+ messages in thread
From: Tetsuo Handa @ 2021-07-04  5:42 UTC (permalink / raw)
  To: Arjan van de Ven, Alexander Viro
  Cc: Jens Axboe, linux-block, Petr Vorel, Pavel Tatashin, Tyler Hicks,
	Christoph Hellwig

Hello.

On 2021/07/03 0:30, Tetsuo Handa wrote:
> Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
> device lock") re-opened a race window for NULL pointer dereference at
> loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
> global lock for ioctl() operation.") closed.
> 
> Although we need to guarantee that other loop devices will not change
> during traversal, we can't take remote "struct loop_device"->lo_mutex
> inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
> introduce a global lock dedicated for loop_validate_file() which is
> conditionally taken before local "struct loop_device"->lo_mutex is taken.
> 
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock")
> ---
>  drivers/block/loop.c | 138 ++++++++++++++++++++++++++++---------------
>  1 file changed, 89 insertions(+), 49 deletions(-)
> 
> This is a submission as a patch based on comments from Christoph Hellwig
> at https://lkml.kernel.org/r/20210623144130.GA738@lst.de . I don't know
> this patch can close all race windows.
> 
> For example, loop_change_fd() says
> 
>   This can only work if the loop device is used read-only, and if the
>   new backing store is the same size and type as the old backing store.
> 
> and has
> 
>         /* size of the new backing store needs to be the same */
>         if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
>                 goto out_err;
> 
> check. Then, do we also need to apply this global lock for
> lo_simple_ioctl(LOOP_SET_CAPACITY) path because it changes the size
> by loop_set_size(lo, get_loop_size(lo, lo->lo_backing_file)) ?
> How serious if this check is racy?
> 
> Any other locations to apply this global lock?
> 

LOOP_CHANGE_FD was introduced in 2.6.5 with the following changelog.

# 04/03/12	akpm@osdl.org	1.1608.83.42
# [PATCH] LOOP_CHANGE_FD ioctl
# 
# From: Arjan van de Ven <arjanv@redhat.com>
# 
# The patch below (written by Al Viro) solves a nasty chicken-and-egg issue
# for operating system installers (well at least anaconda but the problem
# domain is not exclusive to that)
# 
# The basic problem is this:
# 
# - The small first stage installer locates the image file of the second
#   stage installer (which has X and all the graphical stuff); this image can
#   be on the same CD, but it can come via NFS, http or ftp or ...  as well.
# 
# - The first stage installer loop-back mounts this image and gives control
#   to the second stage installer by calling some binary there.
# 
# - The graphical installer then asks the user all those questions and
#   starts installing packages.  Again the packages can come from the CD but
#   also from NFS or http or ...
# 
# Now in case of a CD install, once all requested packages from the first CD
# are installed, the installer wants to unmount and eject the CD and prompt
# the user to put CD 2 in.......  EXCEPT that the unmount can't work since
# the installer is actually running from a loopback mount of this cd.
# 
# The solution is a "LOOP_CHANGE_FD" ioctl, where basically the installer
# copies the image to the harddisk (which can only be done late since only
# late the target harddisk is mkfs'd) and then magically switches the backing
# store FD from underneath the loop device to the one on the target harddisk
# (and thus unbusying the CD mount).
# 
# This is obviously only allowed if the size of the new image is identical
# and if the loop image is read-only in the first place.  It's the
# responsibility of root to make sure the contents is the same (but that's of
# the give-root-enough-rope kind)

It is not clear why the size of old and new image files need to be the same.

It would be true that the size of old and new image files will not change
if these are stored in a read-only filesystem (e.g. isofs). But I think that
the size of new file (obtained by get_loop_size(lo, file)) and
the size of old file (obtained by get_loop_size(lo, old_file)) can change
via e.g. truncate() if these files are stored in a read-write filesystem.
Therefore,

	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
		goto out_err;

check in loop_change_fd() is racy. When we hit this race, nothing can prevent
LOOP_CHANGE_FD from succeeding, and I guess that nothing is broken by this race.
Can we kill this check (like a diff shown below)?

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cc0e8c39a48b..14adb6d5bc07 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -691,8 +691,7 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
  * a new file. This is useful for operating system installers to free up
  * the original file and in High Availability environments to switch to
  * an alternative location for the content in case of server meltdown.
- * This can only work if the loop device is used read-only, and if the
- * new backing store is the same size and type as the old backing store.
+ * This can only work if the loop device is used read-only.
  */
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
@@ -724,12 +723,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
 	old_file = lo->lo_backing_file;
 
-	error = -EINVAL;
-
-	/* size of the new backing store needs to be the same */
-	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
-		goto out_err;
-
 	/* and ... switch */
 	blk_mq_freeze_queue(lo->lo_queue);
 	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);



Also, does "the loop device is used read-only" check

        /* the loop device has to be read-only */
        error = -EINVAL;
        if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
                goto out_err;

make sense? If it is the responsibility of root to make sure that
the contents of the old and the new are the same, isn't it as well
the responsibility of root to use the loop device for read-only?
Unless there is a reason (i.e. we need this check in order to avoid
e.g. writeback failure, oops), can we also kill LO_FLAGS_READ_ONLY
check (like a diff shown below)?

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 14adb6d5bc07..af4ea57a4abb 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -691,7 +691,6 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
  * a new file. This is useful for operating system installers to free up
  * the original file and in High Availability environments to switch to
  * an alternative location for the content in case of server meltdown.
- * This can only work if the loop device is used read-only.
  */
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
@@ -707,11 +706,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (lo->lo_state != Lo_bound)
 		goto out_err;
 
-	/* the loop device has to be read-only */
-	error = -EINVAL;
-	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
-		goto out_err;
-
 	error = -EBADF;
 	file = fget(arg);
 	if (!file)


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-04  5:42 ` [PATCH] " Tetsuo Handa
@ 2021-07-06  5:43   ` Christoph Hellwig
  2021-07-06 14:35   ` Arjan van de Ven
  1 sibling, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2021-07-06  5:43 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Arjan van de Ven, Alexander Viro, Jens Axboe, linux-block,
	Petr Vorel, Pavel Tatashin, Tyler Hicks, Christoph Hellwig

On Sun, Jul 04, 2021 at 02:42:10PM +0900, Tetsuo Handa wrote:
> It is not clear why the size of old and new image files need to be the same.

To simplify things.  And given that no one is asking for lifting the
restriction I'd rather not make the code even more hairy.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-02 17:28 ` Tetsuo Handa
@ 2021-07-06  5:46   ` Christoph Hellwig
  2021-07-06 11:19     ` [PATCH v2] " Tetsuo Handa
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2021-07-06  5:46 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Petr Vorel, Pavel Tatashin, Tyler Hicks, Christoph Hellwig,
	Jens Axboe, linux-block

> +static int loop_global_lock_killable(struct loop_device *lo, bool global)
> +{
> +	int err;
> +
> +	if (global) {
> +		err = mutex_lock_killable(&loop_validate_mutex);
> +		if (err)
> +			return err;
> +	}
> +	err = mutex_lock_killable(&lo->lo_mutex);
> +	if (err && global)
> +		mutex_unlock(&loop_validate_mutex);
> +	return err;
> +}
> +
> +static void loop_global_unlock(struct loop_device *lo, bool global)
> +{
> +	mutex_unlock(&lo->lo_mutex);
> +	if (global)
> +		mutex_unlock(&loop_validate_mutex);
> +}

Comments explaining these functions would be nice.

>  static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
>  			  unsigned int arg)
>  {
> -	struct file	*file = NULL, *old_file;
> +	struct file *file = fget(arg);
> +	struct file *old_file;
>  	int		error;
>  	bool		partscan;
> +	bool is_loop;

This doesn't match the existing formatting.  Either keep the existing
one or change the few other variables as well.

> +	is_loop = is_loop_device(file);
> +	error = loop_global_lock_killable(lo, is_loop);
> +	if (error) {
> +		fput(file);
>  		return error;
> +	}

Please use a goto label to share the fput with the other error handling.

> @@ -1143,6 +1167,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
>  	loff_t		size;
>  	bool		partscan;
>  	unsigned short  bsize;
> +	bool is_loop;

Please follow the existing formatting.

Otherwise this looks reasonable to me.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-06  5:46   ` Christoph Hellwig
@ 2021-07-06 11:19     ` Tetsuo Handa
  2021-07-06 14:40       ` [PATCH v3] " Tetsuo Handa
  0 siblings, 1 reply; 11+ messages in thread
From: Tetsuo Handa @ 2021-07-06 11:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Petr Vorel, Pavel Tatashin, Tyler Hicks, Jens Axboe, linux-block

Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
device lock") re-opened a race window for NULL pointer dereference at
loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
global lock for ioctl() operation.") has closed.

Although we need to guarantee that other loop devices will not change
during traversal, we can't take remote "struct loop_device"->lo_mutex
inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
introduce a global lock dedicated for loop_validate_file() which is
conditionally taken before local "struct loop_device"->lo_mutex is taken.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock")
---
Changes in v2:
  Minimize lock duration in __loop_clr_fd().
  Add kerneldoc to lock/unlock helper functions.
  Reformat local variables declaration.

 drivers/block/loop.c | 114 +++++++++++++++++++++++++++++++------------
 1 file changed, 83 insertions(+), 31 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cc0e8c39a48b..6ca9f3b2d6b4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,47 @@
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+	int err;
+
+	if (global) {
+		err = mutex_lock_killable(&loop_validate_mutex);
+		if (err)
+			return err;
+	}
+	err = mutex_lock_killable(&lo->lo_mutex);
+	if (err && global)
+		mutex_unlock(&loop_validate_mutex);
+	return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+	mutex_unlock(&lo->lo_mutex);
+	if (global)
+		mutex_unlock(&loop_validate_mutex);
+}
 
 static int max_part;
 static int part_shift;
@@ -672,13 +713,13 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 	while (is_loop_device(f)) {
 		struct loop_device *l;
 
+		lockdep_assert_held(&loop_validate_mutex);
 		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
 			return -EBADF;
 
 		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-		if (l->lo_state != Lo_bound) {
+		if (l->lo_state != Lo_bound)
 			return -EINVAL;
-		}
 		f = l->lo_backing_file;
 	}
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +738,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
 {
-	struct file	*file = NULL, *old_file;
-	int		error;
-	bool		partscan;
+	struct file *file = fget(arg);
+	struct file *old_file;
+	int error;
+	bool partscan;
+	bool is_loop;
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
-		return error;
+		goto out_putf;
 	error = -ENXIO;
 	if (lo->lo_state != Lo_bound)
 		goto out_err;
@@ -713,11 +759,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto out_err;
 
-	error = -EBADF;
-	file = fget(arg);
-	if (!file)
-		goto out_err;
-
 	error = loop_validate_file(file, bdev);
 	if (error)
 		goto out_err;
@@ -740,7 +781,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	loop_update_dio(lo);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
 	 * the file ref can take open_mutex which creates circular locking
@@ -752,9 +793,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	return 0;
 
 out_err:
-	mutex_unlock(&lo->lo_mutex);
-	if (file)
-		fput(file);
+	loop_global_unlock(lo, is_loop);
+out_putf:
+	fput(file);
 	return error;
 }
 
@@ -1136,22 +1177,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			  struct block_device *bdev,
 			  const struct loop_config *config)
 {
-	struct file	*file;
-	struct inode	*inode;
+	struct file *file = fget(config->fd);
+	struct inode *inode;
 	struct address_space *mapping;
-	int		error;
-	loff_t		size;
-	bool		partscan;
-	unsigned short  bsize;
+	int error;
+	loff_t size;
+	bool partscan;
+	unsigned short bsize;
+	bool is_loop;
+
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
-	error = -EBADF;
-	file = fget(config->fd);
-	if (!file)
-		goto out;
-
 	/*
 	 * If we don't hold exclusive handle for the device, upgrade to it
 	 * here to avoid changing device under exclusive owner.
@@ -1162,7 +1203,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			goto out_putf;
 	}
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
 		goto out_bdev;
 
@@ -1253,7 +1294,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 	 */
 	bdgrab(bdev);
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	if (partscan)
 		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
@@ -1261,13 +1302,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return 0;
 
 out_unlock:
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 out_bdev:
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 out_putf:
 	fput(file);
-out:
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
 	return error;
@@ -1283,6 +1323,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	int lo_number;
 	struct loop_worker *pos, *worker;
 
+	/*
+	 * Flush loop_configure() and loop_change_fd(). It is acceptable for
+	 * loop_validate_file() to succeed, for actual clear operation has not
+	 * started yet.
+	 */
+	mutex_lock(&loop_validate_mutex);
+	mutex_unlock(&loop_validate_mutex);
+	/*
+	 * loop_validate_file() now fails because l->lo_state != Lo_bound
+	 * became visible.
+	 */
+
 	mutex_lock(&lo->lo_mutex);
 	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
 		err = -ENXIO;
-- 
2.18.4



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-04  5:42 ` [PATCH] " Tetsuo Handa
  2021-07-06  5:43   ` Christoph Hellwig
@ 2021-07-06 14:35   ` Arjan van de Ven
  1 sibling, 0 replies; 11+ messages in thread
From: Arjan van de Ven @ 2021-07-06 14:35 UTC (permalink / raw)
  To: Tetsuo Handa, Alexander Viro
  Cc: Jens Axboe, linux-block, Petr Vorel, Pavel Tatashin, Tyler Hicks,
	Christoph Hellwig

On 7/3/2021 10:42 PM, Tetsuo Handa wrote:
> 
> It is not clear why the size of old and new image files need to be the same.

filesystems have a heck of a time dealing with suddenly a smaller disk ;)

note that the commit in question predates the ability for filesystems to grow into larger spaces
or most of hotplug.

so it's quite possible that today we can allow switching to a larger file just fine

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-06 11:19     ` [PATCH v2] " Tetsuo Handa
@ 2021-07-06 14:40       ` Tetsuo Handa
  2021-07-19 13:34         ` Tetsuo Handa
  2021-07-23 16:19         ` Jens Axboe
  0 siblings, 2 replies; 11+ messages in thread
From: Tetsuo Handa @ 2021-07-06 14:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Petr Vorel, Pavel Tatashin, Tyler Hicks, Jens Axboe, linux-block

Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
device lock") re-opened a race window for NULL pointer dereference at
loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
global lock for ioctl() operation.") has closed.

Although we need to guarantee that other loop devices will not change
during traversal, we can't take remote "struct loop_device"->lo_mutex
inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
introduce a global lock dedicated for loop_validate_file() which is
conditionally taken before local "struct loop_device"->lo_mutex is taken.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock")
---
Changes in v3:
  Add memory barrier between loop_configure() and loop_validate_file().
  Flush at loop_change_fd() to avoid possible use-after-free.

Changes in v2:
  Minimize lock duration in __loop_clr_fd().
  Add kerneldoc to lock/unlock helper functions.
  Reformat local variables declaration.

 drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 97 insertions(+), 31 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cc0e8c39a48b..2a5b3d365250 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,47 @@
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+	int err;
+
+	if (global) {
+		err = mutex_lock_killable(&loop_validate_mutex);
+		if (err)
+			return err;
+	}
+	err = mutex_lock_killable(&lo->lo_mutex);
+	if (err && global)
+		mutex_unlock(&loop_validate_mutex);
+	return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+	mutex_unlock(&lo->lo_mutex);
+	if (global)
+		mutex_unlock(&loop_validate_mutex);
+}
 
 static int max_part;
 static int part_shift;
@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 	while (is_loop_device(f)) {
 		struct loop_device *l;
 
+		lockdep_assert_held(&loop_validate_mutex);
 		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
 			return -EBADF;
 
 		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-		if (l->lo_state != Lo_bound) {
+		if (l->lo_state != Lo_bound)
 			return -EINVAL;
-		}
+		/* Order wrt setting lo->lo_backing_file in loop_configure(). */
+		rmb();
 		f = l->lo_backing_file;
 	}
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
 {
-	struct file	*file = NULL, *old_file;
-	int		error;
-	bool		partscan;
+	struct file *file = fget(arg);
+	struct file *old_file;
+	int error;
+	bool partscan;
+	bool is_loop;
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
-		return error;
+		goto out_putf;
 	error = -ENXIO;
 	if (lo->lo_state != Lo_bound)
 		goto out_err;
@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto out_err;
 
-	error = -EBADF;
-	file = fget(arg);
-	if (!file)
-		goto out_err;
-
 	error = loop_validate_file(file, bdev);
 	if (error)
 		goto out_err;
@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	loop_update_dio(lo);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
+
+	/*
+	 * Flush loop_validate_file() before fput(), for l->lo_backing_file
+	 * might be pointing at old_file which might be the last reference.
+	 */
+	if (!is_loop) {
+		mutex_lock(&loop_validate_mutex);
+		mutex_unlock(&loop_validate_mutex);
+	}
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
 	 * the file ref can take open_mutex which creates circular locking
@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	return 0;
 
 out_err:
-	mutex_unlock(&lo->lo_mutex);
-	if (file)
-		fput(file);
+	loop_global_unlock(lo, is_loop);
+out_putf:
+	fput(file);
 	return error;
 }
 
@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			  struct block_device *bdev,
 			  const struct loop_config *config)
 {
-	struct file	*file;
-	struct inode	*inode;
+	struct file *file = fget(config->fd);
+	struct inode *inode;
 	struct address_space *mapping;
-	int		error;
-	loff_t		size;
-	bool		partscan;
-	unsigned short  bsize;
+	int error;
+	loff_t size;
+	bool partscan;
+	unsigned short bsize;
+	bool is_loop;
+
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
-	error = -EBADF;
-	file = fget(config->fd);
-	if (!file)
-		goto out;
-
 	/*
 	 * If we don't hold exclusive handle for the device, upgrade to it
 	 * here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			goto out_putf;
 	}
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
 		goto out_bdev;
 
@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	size = get_loop_size(lo, file);
 	loop_set_size(lo, size);
 
+	/* Order wrt reading lo_state in loop_validate_file(). */
+	wmb();
+
 	lo->lo_state = Lo_bound;
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 	 */
 	bdgrab(bdev);
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	if (partscan)
 		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return 0;
 
 out_unlock:
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 out_bdev:
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 out_putf:
 	fput(file);
-out:
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
 	return error;
@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	int lo_number;
 	struct loop_worker *pos, *worker;
 
+	/*
+	 * Flush loop_configure() and loop_change_fd(). It is acceptable for
+	 * loop_validate_file() to succeed, for actual clear operation has not
+	 * started yet.
+	 */
+	mutex_lock(&loop_validate_mutex);
+	mutex_unlock(&loop_validate_mutex);
+	/*
+	 * loop_validate_file() now fails because l->lo_state != Lo_bound
+	 * became visible.
+	 */
+
 	mutex_lock(&lo->lo_mutex);
 	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
 		err = -ENXIO;
-- 
2.18.4



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-06 14:40       ` [PATCH v3] " Tetsuo Handa
@ 2021-07-19 13:34         ` Tetsuo Handa
  2021-07-23 16:19         ` Jens Axboe
  1 sibling, 0 replies; 11+ messages in thread
From: Tetsuo Handa @ 2021-07-19 13:34 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Petr Vorel, Pavel Tatashin, Tyler Hicks, linux-block, Christoph Hellwig

Jens, any questions?

I'm testing this patch in linux-next.git since next-20210714, and got
no regression reports. I think this patch is ready to be merged.

On 2021/07/06 23:40, Tetsuo Handa wrote:
> Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
> device lock") re-opened a race window for NULL pointer dereference at
> loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
> global lock for ioctl() operation.") has closed.
> 
> Although we need to guarantee that other loop devices will not change
> during traversal, we can't take remote "struct loop_device"->lo_mutex
> inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
> introduce a global lock dedicated for loop_validate_file() which is
> conditionally taken before local "struct loop_device"->lo_mutex is taken.
> 
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock")
> ---
> Changes in v3:
>   Add memory barrier between loop_configure() and loop_validate_file().
>   Flush at loop_change_fd() to avoid possible use-after-free.
> 
> Changes in v2:
>   Minimize lock duration in __loop_clr_fd().
>   Add kerneldoc to lock/unlock helper functions.
>   Reformat local variables declaration.
> 
>  drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 97 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index cc0e8c39a48b..2a5b3d365250 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -88,6 +88,47 @@
>  
>  static DEFINE_IDR(loop_index_idr);
>  static DEFINE_MUTEX(loop_ctl_mutex);
> +static DEFINE_MUTEX(loop_validate_mutex);
> +
> +/**
> + * loop_global_lock_killable() - take locks for safe loop_validate_file() test
> + *
> + * @lo: struct loop_device
> + * @global: true if @lo is about to bind another "struct loop_device", false otherwise
> + *
> + * Returns 0 on success, -EINTR otherwise.
> + *
> + * Since loop_validate_file() traverses on other "struct loop_device" if
> + * is_loop_device() is true, we need a global lock for serializing concurrent
> + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
> + */
> +static int loop_global_lock_killable(struct loop_device *lo, bool global)
> +{
> +	int err;
> +
> +	if (global) {
> +		err = mutex_lock_killable(&loop_validate_mutex);
> +		if (err)
> +			return err;
> +	}
> +	err = mutex_lock_killable(&lo->lo_mutex);
> +	if (err && global)
> +		mutex_unlock(&loop_validate_mutex);
> +	return err;
> +}
> +
> +/**
> + * loop_global_unlock() - release locks taken by loop_global_lock_killable()
> + *
> + * @lo: struct loop_device
> + * @global: true if @lo was about to bind another "struct loop_device", false otherwise
> + */
> +static void loop_global_unlock(struct loop_device *lo, bool global)
> +{
> +	mutex_unlock(&lo->lo_mutex);
> +	if (global)
> +		mutex_unlock(&loop_validate_mutex);
> +}
>  
>  static int max_part;
>  static int part_shift;
> @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
>  	while (is_loop_device(f)) {
>  		struct loop_device *l;
>  
> +		lockdep_assert_held(&loop_validate_mutex);
>  		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
>  			return -EBADF;
>  
>  		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
> -		if (l->lo_state != Lo_bound) {
> +		if (l->lo_state != Lo_bound)
>  			return -EINVAL;
> -		}
> +		/* Order wrt setting lo->lo_backing_file in loop_configure(). */
> +		rmb();
>  		f = l->lo_backing_file;
>  	}
>  	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
> @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
>  static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
>  			  unsigned int arg)
>  {
> -	struct file	*file = NULL, *old_file;
> -	int		error;
> -	bool		partscan;
> +	struct file *file = fget(arg);
> +	struct file *old_file;
> +	int error;
> +	bool partscan;
> +	bool is_loop;
>  
> -	error = mutex_lock_killable(&lo->lo_mutex);
> +	if (!file)
> +		return -EBADF;
> +	is_loop = is_loop_device(file);
> +	error = loop_global_lock_killable(lo, is_loop);
>  	if (error)
> -		return error;
> +		goto out_putf;
>  	error = -ENXIO;
>  	if (lo->lo_state != Lo_bound)
>  		goto out_err;
> @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
>  	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
>  		goto out_err;
>  
> -	error = -EBADF;
> -	file = fget(arg);
> -	if (!file)
> -		goto out_err;
> -
>  	error = loop_validate_file(file, bdev);
>  	if (error)
>  		goto out_err;
> @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
>  	loop_update_dio(lo);
>  	blk_mq_unfreeze_queue(lo->lo_queue);
>  	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
> -	mutex_unlock(&lo->lo_mutex);
> +	loop_global_unlock(lo, is_loop);
> +
> +	/*
> +	 * Flush loop_validate_file() before fput(), for l->lo_backing_file
> +	 * might be pointing at old_file which might be the last reference.
> +	 */
> +	if (!is_loop) {
> +		mutex_lock(&loop_validate_mutex);
> +		mutex_unlock(&loop_validate_mutex);
> +	}
>  	/*
>  	 * We must drop file reference outside of lo_mutex as dropping
>  	 * the file ref can take open_mutex which creates circular locking
> @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
>  	return 0;
>  
>  out_err:
> -	mutex_unlock(&lo->lo_mutex);
> -	if (file)
> -		fput(file);
> +	loop_global_unlock(lo, is_loop);
> +out_putf:
> +	fput(file);
>  	return error;
>  }
>  
> @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
>  			  struct block_device *bdev,
>  			  const struct loop_config *config)
>  {
> -	struct file	*file;
> -	struct inode	*inode;
> +	struct file *file = fget(config->fd);
> +	struct inode *inode;
>  	struct address_space *mapping;
> -	int		error;
> -	loff_t		size;
> -	bool		partscan;
> -	unsigned short  bsize;
> +	int error;
> +	loff_t size;
> +	bool partscan;
> +	unsigned short bsize;
> +	bool is_loop;
> +
> +	if (!file)
> +		return -EBADF;
> +	is_loop = is_loop_device(file);
>  
>  	/* This is safe, since we have a reference from open(). */
>  	__module_get(THIS_MODULE);
>  
> -	error = -EBADF;
> -	file = fget(config->fd);
> -	if (!file)
> -		goto out;
> -
>  	/*
>  	 * If we don't hold exclusive handle for the device, upgrade to it
>  	 * here to avoid changing device under exclusive owner.
> @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
>  			goto out_putf;
>  	}
>  
> -	error = mutex_lock_killable(&lo->lo_mutex);
> +	error = loop_global_lock_killable(lo, is_loop);
>  	if (error)
>  		goto out_bdev;
>  
> @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
>  	size = get_loop_size(lo, file);
>  	loop_set_size(lo, size);
>  
> +	/* Order wrt reading lo_state in loop_validate_file(). */
> +	wmb();
> +
>  	lo->lo_state = Lo_bound;
>  	if (part_shift)
>  		lo->lo_flags |= LO_FLAGS_PARTSCAN;
> @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
>  	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
>  	 */
>  	bdgrab(bdev);
> -	mutex_unlock(&lo->lo_mutex);
> +	loop_global_unlock(lo, is_loop);
>  	if (partscan)
>  		loop_reread_partitions(lo);
>  	if (!(mode & FMODE_EXCL))
> @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
>  	return 0;
>  
>  out_unlock:
> -	mutex_unlock(&lo->lo_mutex);
> +	loop_global_unlock(lo, is_loop);
>  out_bdev:
>  	if (!(mode & FMODE_EXCL))
>  		bd_abort_claiming(bdev, loop_configure);
>  out_putf:
>  	fput(file);
> -out:
>  	/* This is safe: open() is still holding a reference. */
>  	module_put(THIS_MODULE);
>  	return error;
> @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
>  	int lo_number;
>  	struct loop_worker *pos, *worker;
>  
> +	/*
> +	 * Flush loop_configure() and loop_change_fd(). It is acceptable for
> +	 * loop_validate_file() to succeed, for actual clear operation has not
> +	 * started yet.
> +	 */
> +	mutex_lock(&loop_validate_mutex);
> +	mutex_unlock(&loop_validate_mutex);
> +	/*
> +	 * loop_validate_file() now fails because l->lo_state != Lo_bound
> +	 * became visible.
> +	 */
> +
>  	mutex_lock(&lo->lo_mutex);
>  	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
>  		err = -ENXIO;
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-06 14:40       ` [PATCH v3] " Tetsuo Handa
  2021-07-19 13:34         ` Tetsuo Handa
@ 2021-07-23 16:19         ` Jens Axboe
  2021-07-24  6:57           ` Christoph Hellwig
  1 sibling, 1 reply; 11+ messages in thread
From: Jens Axboe @ 2021-07-23 16:19 UTC (permalink / raw)
  To: Tetsuo Handa, Christoph Hellwig
  Cc: Petr Vorel, Pavel Tatashin, Tyler Hicks, linux-block

On 7/6/21 8:40 AM, Tetsuo Handa wrote:
> Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
> device lock") re-opened a race window for NULL pointer dereference at
> loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
> global lock for ioctl() operation.") has closed.
> 
> Although we need to guarantee that other loop devices will not change
> during traversal, we can't take remote "struct loop_device"->lo_mutex
> inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
> introduce a global lock dedicated for loop_validate_file() which is
> conditionally taken before local "struct loop_device"->lo_mutex is taken.

I'll queue this up for next weeks merging. Christoph, are you happy with
it at this point? Can't say it's a thing of beauty, but the problem does
seem real.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] loop: reintroduce global lock for safe loop_validate_file() traversal
  2021-07-23 16:19         ` Jens Axboe
@ 2021-07-24  6:57           ` Christoph Hellwig
  0 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2021-07-24  6:57 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Tetsuo Handa, Christoph Hellwig, Petr Vorel, Pavel Tatashin,
	Tyler Hicks, linux-block

On Fri, Jul 23, 2021 at 10:19:39AM -0600, Jens Axboe wrote:
> I'll queue this up for next weeks merging. Christoph, are you happy with
> it at this point? Can't say it's a thing of beauty, but the problem does
> seem real.

Exactly my feelings.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-07-24  6:57 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-02 15:30 [PATCH] loop: reintroduce global lock for safe loop_validate_file() traversal Tetsuo Handa
2021-07-02 17:28 ` Tetsuo Handa
2021-07-06  5:46   ` Christoph Hellwig
2021-07-06 11:19     ` [PATCH v2] " Tetsuo Handa
2021-07-06 14:40       ` [PATCH v3] " Tetsuo Handa
2021-07-19 13:34         ` Tetsuo Handa
2021-07-23 16:19         ` Jens Axboe
2021-07-24  6:57           ` Christoph Hellwig
2021-07-04  5:42 ` [PATCH] " Tetsuo Handa
2021-07-06  5:43   ` Christoph Hellwig
2021-07-06 14:35   ` Arjan van de Ven

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.