[PATCH] block/loop: improve reliability of partition scanning

* [PATCH] block/loop: improve reliability of partition scanning
@ 2015-03-31 20:47 Jarod Wilson
  2015-04-01 15:33 ` Ming Lei
  2015-04-01 15:41 ` Christoph Hellwig
  0 siblings, 2 replies; 4+ messages in thread
From: Jarod Wilson @ 2015-03-31 20:47 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jarod Wilson, Jens Axboe, Ming Lei, Mike Galbraith,
	Kent Overstreet, Mikulas Patocka

If losetup is called with the -P option, it sets a flag to have the
resulting loop block device scanned for partitions. Unfortunately, due
to the way flags are passed in from userspace, there's first a
loop_set_fd() call, which does no partition scanning, then a
loop_set_status() call, where the partition scanning should kick in.
However, particularly on a system with slow I/O (such as a file-backed
vm), there's a race between the loop_set_status() call and udev poking the
device, which leads to partition scanning failing with an -EBUSY (passed
up from block/ioctl.c's blkdev_reread_part()) because the block_device's
bd_mutex is already held by udev calling blkdev_open(), which grabs
bd_mutex, and then in turn calls lo_open(), which then in turn tries to
grab lo_ctl_mutex, which we're holding in all loop ioctls.

To combat this, if we discover bd_mutex is locked, we know partition
scanning will fail, and its probably because of udev, so we can
temporarily drop the lo_ctl_mutex ourselves to try to let udev do its
thing, then grab it back, and hopefully then successfully scan partitions.

Testing shows a definite improvement to partition scanning success when
calling losetup -fP file-image over and over (with matching losetup -D
too, of course), but still not to 100% success, I'm still getting the
occasional failure, which is typically due to an -EBUSY trying to rescan
partitions on loop device removal.

CC: Jens Axboe <axboe@fb.com>
CC: Ming Lei <ming.lei@canonical.com>
CC: Mike Galbraith <bitbucket@online.de>
CC: Kent Overstreet <kmo@daterainc.com>
CC: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
---
 drivers/block/loop.c | 48 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d1f168b..b30e32c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,6 +75,7 @@
 #include <linux/sysfs.h>
 #include <linux/miscdevice.h>
 #include <linux/falloc.h>
+#include <linux/delay.h>
 #include "loop.h"
 
 #include <asm/uaccess.h>
@@ -529,6 +530,45 @@ static int loop_flush(struct loop_device *lo)
 }
 
 /*
+ * Re-reading partitions can fail with an -EBUSY return from block/ioctl.c's
+ * blkdev_reread_part(), which calls mutex_trylock on the bd_mutex. Now, udev
+ * is calling blkdev_open, which first grabs bd_mutex, then lo_ctl_mutex via
+ * lo_open, which occasionally happens before partition scanning, and will
+ * prevent partition scanning from ever being successful unless we give up
+ * the lo_ctl_mutex temporarily.
+ */
+static void loop_reread_partitions(struct loop_device *lo,
+				   struct block_device *bdev)
+{
+	int rc;
+	int retry = 5;
+
+	pr_debug("%s: firing for loop%d (%s)\n",
+		 __func__, lo->lo_number, lo->lo_file_name);
+
+	/*
+	 * If no lo_device, we were (probably) called from loop_clr_fd(), and
+	 * retries never seem to help, so don't retry.
+	 */
+	if (!lo->lo_device)
+		retry = 1;
+
+	while (mutex_is_locked(&bdev->bd_mutex) && retry > 0) {
+		mutex_unlock(&lo->lo_ctl_mutex);
+		msleep(50);
+		mutex_lock(&lo->lo_ctl_mutex);
+		retry--;
+		pr_debug("%s: unlocked lo_ctl temporarily (retries left: %d)\n",
+			 __func__, retry);
+	}
+
+	rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
+	if (rc)
+		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
+			__func__, lo->lo_number, lo->lo_file_name, rc);
+}
+
+/*
  * loop_change_fd switched the backing store of a loopback device to
  * a new file. This is useful for operating system installers to free up
  * the original file and in High Availability environments to switch to
@@ -576,7 +616,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
 	fput(old_file);
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 	return 0;
 
  out_putf:
@@ -807,7 +847,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 
 	/* Grab the block_device to prevent its destruction after we
 	 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@ -920,7 +960,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
@@ -995,7 +1035,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+		loop_reread_partitions(lo, lo->lo_device);
 	}
 
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread