All of lore.kernel.org
 help / color / mirror / Atom feed
* BUG drivers/md/md.c: data-offset reshape renders array unloadable
@ 2015-01-24 21:44 Wesley W. Terpstra
  2015-01-24 22:23 ` Wesley W. Terpstra
  0 siblings, 1 reply; 5+ messages in thread
From: Wesley W. Terpstra @ 2015-01-24 21:44 UTC (permalink / raw)
  To: linux-raid

[-- Attachment #1: Type: text/plain, Size: 2067 bytes --]

On Wed, Jan 21, 2015 at 1:37 PM, Wesley W. Terpstra <wesley@terpstra.ca> wrote:
> Try: madam -A -o --run --force --freeze-reshape /dev/md120 /dev/sd[a-d]4
> It failed the same as before. Logs in mdadm-a.log and dmesg.log

I have now tried 3.18.3, and had exactly the same problem.
To recap, for anyone new to this thread:

I have a raid5 array with 4 disks. I did a reshape to change the
data-offset from 5120 to 8192, changing nothing else. The number of
disks remained the same. The reshape was going fine and was probably
around 50% done when the system was rebooted. All the superblocks have
matching event counts, all checksums pass, and all disks have clean
bills of health. However, any attempt to reassemble the array fails.

I have gone ahead and applied a patch to md.c to diagnose WHY it does
not assemble the array. Find the patch I used (against 3.18.3)
attached. When I try to assemble the array using my patched 3.18.3,
this is what I see:

[    5.830139] md: FYI: 8192 old 8192 new
[    5.830167] md: sectors mismatch 5842894848 < 5842897920 on sdc4
[    5.830199] md: sdc4 does not have a valid v1.2 superblock, not importing!
[    5.830635] md: md_import_device returned -22

First, it is obviously the last test in super_1_load that is rejecting
the array. The superblock reports more sectors than are calculated, so
the check
        if (sectors < le64_to_cpu(sb->data_size)) {
fails.

IMO, it seems that the problem is that the superblock has had
rdev->data_offset updated prematurely! The reshape was incomplete, so
I would have expected data_offset to still be 5120 and new_data_offset
to be 8192. If data_offset HAD been 5120, the two values compared in
the failing inequality would be equal.

I am considering simply modifying my superblock to reset data_offset
to 5120 and see if the reshape then resumes correctly. Is this the
correct fix to recover my array? I imagine the source of my problems
must be a bug somewhere in mdadm reshape that updates both
new_data_offset and data_offset at the same time when doing a
data-offset reshape..?

[-- Attachment #2: debug.patch --]
[-- Type: application/octet-stream, Size: 5159 bytes --]

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9233c71..5c6a56a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1385,6 +1385,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 		sb_start = 8;
 		break;
 	default:
+		printk("md: invalid superblock minor %d on %s\n",
+			minor_version, bdevname(rdev->bdev,b));
 		return -EINVAL;
 	}
 	rdev->sb_start = sb_start;
@@ -1393,17 +1395,39 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 	 * and it is safe to read 4k, so we do that
 	 */
 	ret = read_disk_sb(rdev, 4096);
-	if (ret) return ret;
-
+	if (ret) {
+		printk("md: cannot read superblock on %s\n",
+			bdevname(rdev->bdev,b));
+		return ret;
+	}
+	
 	sb = page_address(rdev->sb_page);
 
-	if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
-	    sb->major_version != cpu_to_le32(1) ||
-	    le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
-	    le64_to_cpu(sb->super_offset) != rdev->sb_start ||
-	    (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
+	if (sb->magic != cpu_to_le32(MD_SB_MAGIC)) {
+		printk("md: bad magic %x on %s\n",
+			sb->magic, bdevname(rdev->bdev,b));
 		return -EINVAL;
-
+	}
+	if (sb->major_version != cpu_to_le32(1)) {
+		printk("md: wrong major version %d %s\n",
+			sb->major_version, bdevname(rdev->bdev,b));
+		return -EINVAL;
+	}
+	if (le32_to_cpu(sb->max_dev) > (4096-256)/2) {
+		printk("md: too many devices %d on %s\n",
+			sb->max_dev, bdevname(rdev->bdev,b));
+		return -EINVAL;
+	}
+	if (le64_to_cpu(sb->super_offset) != rdev->sb_start) {
+		printk("md: superblock not where expected, %d on %s\n",
+			sb->max_dev, bdevname(rdev->bdev,b));
+		return -EINVAL;
+	}
+	if ((le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) {
+		printk("md: required feature unsupported by kernel %x on %s\n",
+			sb->feature_map, bdevname(rdev->bdev,b));
+		return -EINVAL;
+	}
 	if (calc_sb_1_csum(sb) != sb->sb_csum) {
 		printk("md: invalid superblock checksum on %s\n",
 			bdevname(rdev->bdev,b));
@@ -1416,9 +1440,12 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 	}
 	if (sb->pad0 ||
 	    sb->pad3[0] ||
-	    memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
+	    memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) {
 		/* Some padding is non-zero, might be a new feature */
+		printk("md: non-zero padding on %s\n",
+		       bdevname(rdev->bdev,b));
 		return -EINVAL;
+	}
 
 	rdev->preferred_minor = 0xffff;
 	rdev->data_offset = le64_to_cpu(sb->data_offset);
@@ -1433,12 +1460,24 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 	if (rdev->sb_size & bmask)
 		rdev->sb_size = (rdev->sb_size | bmask) + 1;
 
+	printk("md: FYI: %ld old %ld new\n", rdev->data_offset, rdev->new_data_offset);
+	
 	if (minor_version
-	    && rdev->data_offset < sb_start + (rdev->sb_size/512))
+	    && rdev->data_offset < sb_start + (rdev->sb_size/512)) {
+		printk("md: data_offset=%ld < %ld on %s\n",
+		       rdev->data_offset,
+		       sb_start + (rdev->sb_size/512),
+		       bdevname(rdev->bdev,b));
 		return -EINVAL;
+	}
 	if (minor_version
-	    && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
+	    && rdev->new_data_offset < sb_start + (rdev->sb_size/512)) {
+		printk("md: new_data_offset=%ld < %ld on %s\n",
+		       rdev->new_data_offset,
+		       sb_start + (rdev->sb_size/512),
+		       bdevname(rdev->bdev,b));
 		return -EINVAL;
+	}
 
 	if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
 		rdev->desc_nr = -1;
@@ -1460,11 +1499,17 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 		u64 *bbp;
 		int i;
 		int sectors = le16_to_cpu(sb->bblog_size);
-		if (sectors > (PAGE_SIZE / 512))
+		if (sectors > (PAGE_SIZE / 512)) {
+			printk("md: bad block map too big %d on %s\n",
+			       sectors, bdevname(rdev->bdev,b));
 			return -EINVAL;
+		}
 		offset = le32_to_cpu(sb->bblog_offset);
-		if (offset == 0)
+		if (offset == 0) {
+			printk("md: bad block map too big %d on %s\n",
+			       sectors, bdevname(rdev->bdev,b));
 			return -EINVAL;
+		}
 		bb_sector = (long long)offset;
 		if (!sync_page_io(rdev, bb_sector, sectors << 9,
 				  rdev->bb_page, READ, true))
@@ -1480,8 +1525,11 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 			if (bb + 1 == 0)
 				break;
 			if (md_set_badblocks(&rdev->badblocks,
-					     sector, count, 1) == 0)
+					     sector, count, 1) == 0) {
+				printk("md: set bad blocks failed on %s\n",
+				       bdevname(rdev->bdev,b));
 				return -EINVAL;
+			}
 		}
 	} else if (sb->bblog_offset != 0)
 		rdev->badblocks.shift = 0;
@@ -1515,8 +1563,11 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 		sectors -= rdev->data_offset;
 	} else
 		sectors = rdev->sb_start;
-	if (sectors < le64_to_cpu(sb->data_size))
+	if (sectors < le64_to_cpu(sb->data_size)) {
+		printk("md: sectors mismatch %ld < %lld on %s\n",
+			sectors, le64_to_cpu(sb->data_size), bdevname(rdev->bdev,b));
 		return -EINVAL;
+	}
 	rdev->sectors = le64_to_cpu(sb->data_size);
 	return ret;
 }

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-02-05  8:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-24 21:44 BUG drivers/md/md.c: data-offset reshape renders array unloadable Wesley W. Terpstra
2015-01-24 22:23 ` Wesley W. Terpstra
2015-01-25 16:46   ` Wesley W. Terpstra
2015-02-05  6:14     ` NeilBrown
2015-02-05  8:17       ` Wesley W. Terpstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.