All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Sanders <plsander@gmail.com>
To: John Stoffel <john@stoffel.org>
Cc: Wols Lists <antlists@youngman.org.uk>,
	Eyal Lebedinsky <fedora@eyal.emu.id.au>,
	linux-raid@vger.kernel.org
Subject: Re: RAID 6, 6 device array - all devices lost superblock
Date: Fri, 2 Sep 2022 20:39:25 -0400	[thread overview]
Message-ID: <CAKAPSk+jhN-T9ubdFBs6N2k10veT2u5noyQ8NBnRE9igeZgn7g@mail.gmail.com> (raw)
In-Reply-To: <25362.21920.20956.599850@quad.stoffel.home>

Repeat of run 1

plsander@superior:~$ su -
Password:
root@superior:~# cat /proc/partitions
major minor  #blocks  name

 259        0  250059096 nvme0n1
 259        1     496640 nvme0n1p1
 259        2          1 nvme0n1p2
 259        3   63475712 nvme0n1p5
 259        4   97654784 nvme0n1p6
 259        5      37888 nvme0n1p7
 259        6   86913024 nvme0n1p8
 259        7    1474560 nvme0n1p9
   8       16 2930266584 sdb
   8       80 2930266584 sdf
   8        0 1953514584 sda
   8        1 1953513472 sda1
   8       32 2930266584 sdc
   8       96 2930266584 sdg
   8       64 2930266584 sde
   8       48 2930266584 sdd
  11        0    1048575 sr0
root@superior:~# cat /proc/mdstat
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5]
[raid4] [raid10]
unused devices: <none>
root@superior:~# DEVICES="/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg"
root@superior:~# echo $DEVICES
/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg
root@superior:~# parallel 'test -e /dev/loop{#} || mknod -m 660
/dev/loop{#} b 7 {#}' ::: $DEVICES
root@superior:~# ls /dev/lo
log           loop2         loop4         loop6
loop1         loop3         loop5         loop-control
root@superior:~# ls /dev/lo*
/dev/log  /dev/loop1  /dev/loop2  /dev/loop3  /dev/loop4  /dev/loop5
/dev/loop6  /dev/loop-control
root@superior:~# ls -l /dev/loop*
brw-rw---- 1 root root  7,   1 Sep  2 20:30 /dev/loop1
brw-rw---- 1 root root  7,   2 Sep  2 20:30 /dev/loop2
brw-rw---- 1 root root  7,   3 Sep  2 20:30 /dev/loop3
brw-rw---- 1 root root  7,   4 Sep  2 20:30 /dev/loop4
brw-rw---- 1 root root  7,   5 Sep  2 20:30 /dev/loop5
brw-rw---- 1 root root  7,   6 Sep  2 20:30 /dev/loop6
crw-rw---- 1 root disk 10, 237 Sep  2 20:22 /dev/loop-control
root@superior:~# cd /mnt/backup/
root@superior:/mnt/backup# parallel truncate -s4000G overlay-{/} ::: $DEVICES
root@superior:/mnt/backup# ls -l
total 16
drwx------ 2 root root         16384 Aug 28 18:50 lost+found
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdb
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdc
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdd
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sde
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdf
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdg
root@superior:/mnt/backup# rm over*
root@superior:/mnt/backup# parallel truncate -s300G overlay-{/} ::: $DEVICES
root@superior:/mnt/backup# ls -la
total 24
drwxr-xr-x 3 root root         4096 Sep  2 20:31 .
drwxr-xr-x 7 root root         4096 Aug 29 09:17 ..
drwx------ 2 root root        16384 Aug 28 18:50 lost+found
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdb
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdc
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdd
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sde
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdf
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdg
root@superior:/mnt/backup# dmsetup status
No devices found
root@superior:/mnt/backup# date
Fri 02 Sep 2022 08:32:11 PM EDT
root@superior:/mnt/backup#  parallel 'size=$(blockdev --getsize {});
loop=$(losetup -f --show -- overlay-{/}); echo 0 $size snapshot {}
$loop P 8 | dmsetup create {/}' ::: $DEVICES
root@superior:/mnt/backup# date
Fri 02 Sep 2022 08:32:20 PM EDT
root@superior:/mnt/backup# dmsetup status
sdg: 0 5860533168 snapshot 16/629145600 16
sdf: 0 5860533168 snapshot 16/629145600 16
sde: 0 5860533168 snapshot 16/629145600 16
sdd: 0 5860533168 snapshot 16/629145600 16
sdc: 0 5860533168 snapshot 16/629145600 16
sdb: 0 5860533168 snapshot 16/629145600 16
root@superior:/mnt/backup# OVERLAYS=$(parallel echo /dev/mapper/{/}
::: $DEVICES)
root@superior:/mnt/backup# echo $OVERLAYS
/dev/mapper/sdb /dev/mapper/sdc /dev/mapper/sdd /dev/mapper/sde
/dev/mapper/sdf /dev/mapper/sdg
root@superior:/mnt/backup# mdadm --create /dev/md1 --level=raid6 -n 6
--assume-clean $OVERLAYS
mdadm: partition table exists on /dev/mapper/sdb
mdadm: partition table exists on /dev/mapper/sdc
mdadm: partition table exists on /dev/mapper/sdc but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sdd
mdadm: partition table exists on /dev/mapper/sdd but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sde
mdadm: partition table exists on /dev/mapper/sde but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sdf
mdadm: partition table exists on /dev/mapper/sdf but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sdg
mdadm: partition table exists on /dev/mapper/sdg but will be lost or
       meaningless after creating array
Continue creating array? y
mdadm: Defaulting to version 1.2 metadata
mdadm: array /dev/md1 started.
root@superior:/mnt/backup# ls -l /dev/md*
brw-rw---- 1 root disk 9, 1 Sep  2 20:34 /dev/md1
root@superior:/mnt/backup# fsck /dev/md1
fsck from util-linux 2.36.1
e2fsck 1.46.2 (28-Feb-2021)
ext2fs_open2: Bad magic number in super-block
fsck.ext2: Superblock invalid, trying backup blocks...
fsck.ext2: Bad magic number in super-block while trying to open /dev/md1

The superblock could not be read or does not describe a valid ext2/ext3/ext4
filesystem.  If the device is valid and it really contains an ext2/ext3/ext4
filesystem (and not swap or ufs or something else), then the superblock
is corrupt, and you might try running e2fsck with an alternate superblock:
    e2fsck -b 8193 <device>
 or
    e2fsck -b 32768 <device>

root@superior:/mnt/backup# blkid /dev/md1
root@superior:/mnt/backup#
root@superior:/mnt/backup# cat /proc/mdstat
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5]
[raid4] [raid10]
md1 : active raid6 dm-3[5] dm-2[4] dm-1[3] dm-5[2] dm-0[1] dm-4[0]
      11720536064 blocks super 1.2 level 6, 512k chunk, algorithm 2
[6/6] [UUUUUU]
      bitmap: 0/22 pages [0KB], 65536KB chunk

unused devices: <none>
root@superior:/mnt/backup#

Some questions -
- is the easiest 'reset for next run' to reboot and rebuild?


On Fri, Sep 2, 2022 at 3:12 PM John Stoffel <john@stoffel.org> wrote:
>
> >>>>> "Peter" == Peter Sanders <plsander@gmail.com> writes:
>
> Peter, please include the output of all the commands, not just the
> commands themselves.  See my comments below.
>
>
> > Question on restarting from scratch...
> > How to reset to the starting point?
>
> I think you need to blow away the loop devices and re-create them.
>
> Or at least blow away the dmsetup devices you just created.
>
> It might be quickest to just reboot.  What OS are you using for the
> recovery?  Is it a recent live image?  Sorry for asking so many
> questions... some of this is new to me too.
>
>
> > dmsetup, both for remove and create of the overlay seems to be hanging.
>
> > On Fri, Sep 2, 2022 at 10:56 AM Peter Sanders <plsander@gmail.com> wrote:
> >>
> >> contents of /proc/mdstat
> >>
> >> root@superior:/mnt/backup# cat /proc/mdstat
> >> Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5]
> >> [raid4] [raid10]
> >> unused devices: <none>
> >> root@superior:/mnt/backup#
> >>
> >>
> >>
> >> Here are the steps I ran (minus some mounting other devices and
> >> looking around for mdadm tracks on the old os disk)
> >>
> >> 410  DEVICES=$(cat /proc/partitions | parallel --tagstring {5}
> >> --colsep ' +' mdadm -E /dev/{5} |grep $UUID | parallel --colsep '\t'
> >> echo /dev/{1})
> >> 411  apt install parallel
> >> 412  DEVICES=$(cat /proc/partitions | parallel --tagstring {5}
> >> --colsep ' +' mdadm -E /dev/{5} |grep $UUID | parallel --colsep '\t'
> >> echo /dev/{1})
> >> 413  echo $DEVICES
>
> So you found no MD RAID super blocks on any of the base devices.  You
> can skip this step moving forward.
>
> >> 414  cat /proc/partitions
> >> 415  DEVICES=/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg
> >> 416  DEVICES="/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg"
> >> 417  echo $DEVICES
> >> 418  parallel 'test -e /dev/loop{#} || mknod -m 660 /dev/loop{#} b 7
> >> {#}' ::: $DEVICES
> >> 419  ls /dev/loop*
>
> Can you show the output of all these commands, not just the commands please?
>
> >> 423  parallel truncate -s300G overlay-{/} ::: $DEVICES
>
> >> 427  parallel 'size=$(blockdev --getsize {}); loop=$(losetup -f
> >> --show -- overlay-{/}); echo 0 $size snapshot {} $loop P 8 | dmsetup
> >> create {/}' ::: $DEVICES
> >> 428  ls /dev/mapper/
>
> This is some key output to view.
>
> >> 429  OVERLAYS=$(parallel echo /dev/mapper/{/} ::: $DEVICES)
> >> 430  echo $OVERLAYS
>
> What are the overlays?
>
> >> 431  dmsetup status
>
> What did this command show?
>
> >> 432  mdadm --assemble --force /dev/md1 $OVERLAYS
>
> And here is where I think you need to put --assume-clean when using
> 'create' command instead.  It's not going to assemble anything because
> the info was wiped.  I *think* you really want:
>
>    mdadm --create /dev/md1 --level=raid6 -n 6 --assume-clean $OVERLAYS
>
> And once you do this above command and it comes back, do:
>
>     cat /proc/mdstat
>
> and show all the output please!
>
> >> 433  history
> >> 434  dmsetup status
> >> 435  echo $OVERLAYS
> >> 436  mdadm --assemble --force /dev/md0 $OVERLAYS
> >> 437  cat /proc/partitions
> >> 438  mkdir /mnt/oldroot
> >> << look for inird mdadm files >>
> >> 484  echo $OVERLAYS
> >> 485  mdadm --create /dev/md0 --level=raid6 -n 6 /dev/mapper/sdb
> >> /dev/mapper/sdc /dev/mapper/sdd /dev/mapper/sde /dev/mapper/sdf
> >> /dev/mapper/sdg
>
> I'm confused here, what  is the difference between the md1 you
> assembled above, and the md0 you're doing here?
>
> >> << cancelled out of 485, review instructions... >>
> >> 486  mdadm --create /dev/md0 --level=raid6 -n 6 /dev/mapper/sdb
> >> /dev/mapper/sdc /dev/mapper/sdd /dev/mapper/sde /dev/mapper/sdf
> >> /dev/mapper/sdg
> >> 487  fsck -n /dev/md0
>
> And what output did you get here?  Did it find a filesystem?  You might want
> to try:
>
>    blkid /dev/md0
>
>
> >> 488  mdadm --stop /dev/md0
> >> 489  echo $DEVICES
> >> 490   parallel 'dmsetup remove {/}; rm overlay-{/}' ::: $DEVICES
> >> 491  dmsetup status
>
> This all worked properly?  No errors?
>
> I gave up after this because it's not clear what the results really
> are.  If you don't find a filesystem that fsck's cleanly, then you
> should just need to stop the array, then re-create it but shuffle the
> order of the devices.
>
> Instead of disk in order of "sdb sdc sdd... sdN", you would try the
> order "sdc sdd ... sdN sdb".   See how I moved sdb to the end of the
> list of devices?  With six disks, you have I think 6 factorial options
> to try.   Which is alot of options to go though, and why you need to
> automate this more.  But also keep a log and show the output!
>
> John
>
>
> >> 492  ls
> >> 493  rm overlay-*
> >> 494  ls
> >> 495  parallel losetup -d ::: /dev/loop[0-9]*
> >> 496  parallel 'test -e /dev/loop{#} || mknod -m 660 /dev/loop{#} b 7
> >> {#}' ::: $DEVICES
> >> 497  parallel truncate -s300G overlay-{/} ::: $DEVICES
> >> 498  parallel 'size=$(blockdev --getsize {}); loop=$(losetup -f
> >> --show -- overlay-{/}); echo 0 $size snapshot {} $loop P 8 | dmsetup
> >> create {/}' ::: $DEVICES
> >> 499  dmsetup status
> >> 500  /sbin/reboot
> >> 501  history
> >> 502  dmsetup status
> >> 503  mount
> >> 504  cat /proc/partitions
> >> 505  nano /etc/fstab
> >> 506  mount /mnt/backup/
> >> 507  ls /mnt/backup/
> >> 508  rm /mnt/backup/
> >> 509  rm /mnt/backup/overlay-sd*
> >> 510  emacs setupOverlay &
> >> 511  ps auxww | grep emacs
> >> 512  kill 65017
> >> 513  ls /dev/loo*
> >> 514  DEVICES='/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg'
> >> 515  echo $DEVICES
> >> 516   parallel 'test -e /dev/loop{#} || mknod -m 660 /dev/loop{#} b
> >> 7 {#}' ::: $DEVICES
> >> 517  ls /dev/loo*
> >> 518  parallel truncate -s4000G overlay-{/} ::: $DEVICES
> >> 519  ls
> >> 520  rm overlay-sd*
> >> 521  cd /mnt/bak
> >> 522  cd /mnt/backup/
> >> 523  ls
> >> 524  parallel truncate -s4000G overlay-{/} ::: $DEVICES
> >> 525  ls -la
> >> 526  blockdev --getsize /dev/sdb
> >> 527  man losetup
> >> 528  man losetup
> >> 529  parallel 'size=$(blockdev --getsize {}); loop=$(losetup -f
> >> --show -- overlay-{/}); echo 0 $size snapshot {} $loop P 8 | dmsetup
> >> create {/}' ::: $DEVICES
> >> 530  dmsetup status
> >> 531  history | grep mdadm
> >> 532  history
> >> 533  dmsetup status
> >> 534  history | grep dmsetup
> >> 535  dmsetup status
> >> 536  dmsetup remove sdg
> >> 537  dmsetup ls --tree
> >> 538  lsof
> >> 539  dmsetup ls --tre
> >> 540  dmsetup ls --tree
> >> 541  lsof | grep -i sdg
> >> 542  lsof | grep -i sdf
> >> 543  history |grep dmsetup | less
> >> 544  dmsetup status
> >> 545  history > ~plsander/Documents/raidIssues/joblog
> >>
> >> On Wed, Aug 31, 2022 at 4:37 PM John Stoffel <john@stoffel.org> wrote:
> >> >
> >> > >>>>> "Peter" == Peter Sanders <plsander@gmail.com> writes:
> >> >
> >> > > encountering a puzzling situation.
> >> > > dmsetup is failing to return.
> >> >
> >> > I don't think you need to use dmsetup in your case, but can you post
> >> > *all* the commands you ran before you got to this point, and the
> >> > output of
> >> >
> >> >        cat /proc/mdstat
> >> >
> >> > as well?  Thinking on this some more, you might need to actually also
> >> > add:
> >> >
> >> >         --assume-clean
> >> >
> >> > to the 'mdadm create ....' string, since you don't want it to zero the
> >> > array or anything.
> >> >
> >> > Sorry for not remembering this at the time!
> >> >
> >> > So if you can, please just start over from scratch, showing the setup
> >> > of the loop devices, the overlayfs setup, and the building the RAID6
> >> > array, along with the cat /proc/mdstat after you do the initial build.
> >> >
> >> > John
> >> >
> >> > P.S.  For those who hated my email citing tool, I pulled it out for
> >> > now.  Only citing with > now.  :-)
> >> >
> >> > > root@superior:/mnt/backup# dmsetup status
> >> > > sdg: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdf: 0 5860533168 snapshot 16/8388608000 16
> >> > > sde: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdd: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdc: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdb: 0 5860533168 snapshot 16/8388608000 16
> >> >
> >> > > dmsetup remove sdg  runs for hours.
> >> > > Canceled it, ran dmsetup ls --tree and find that sdg is not present in the list.
> >> >
> >> > > dmsetup status shows:
> >> > > sdf: 0 5860533168 snapshot 16/8388608000 16
> >> > > sde: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdd: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdc: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdb: 0 5860533168 snapshot 16/8388608000 16
> >> >
> >> > > dmsetup ls --tree
> >> > > root@superior:/mnt/backup# dmsetup ls --tree
> >> > > sdf (253:3)
> >> > >  ├─ (7:3)
> >> > >  └─ (8:80)
> >> > > sde (253:1)
> >> > >  ├─ (7:1)
> >> > >  └─ (8:64)
> >> > > sdd (253:2)
> >> > >  ├─ (7:2)
> >> > >  └─ (8:48)
> >> > > sdc (253:0)
> >> > >  ├─ (7:0)
> >> > >  └─ (8:32)
> >> > > sdb (253:5)
> >> > >  ├─ (7:5)
> >> > >  └─ (8:16)
> >> >
> >> > > any suggestions?
> >> >
> >> >
> >> >
> >> > > On Tue, Aug 30, 2022 at 2:03 PM Wols Lists <antlists@youngman.org.uk> wrote:
> >> > >>
> >> > >> On 30/08/2022 14:27, Peter Sanders wrote:
> >> > >> >
> >> > >> > And the victory conditions would be a mountable file system that passes a fsck?
> >> > >>
> >> > >> Yes. Just make sure you delve through the file system a bit and satisfy
> >> > >> yourself it looks good, too ...
> >> > >>
> >> > >> Cheers,
> >> > >> Wol

  reply	other threads:[~2022-09-03  0:39 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-28  2:00 RAID 6, 6 device array - all devices lost superblock Peter Sanders
2022-08-28  9:14 ` Wols Lists
2022-08-28  9:54   ` Wols Lists
2022-08-28 16:47     ` Phil Turmel
     [not found]       ` <CAKAPSkJAQYsec-4zzcePbkJ7Ee0=sd_QvHj4Stnyineq+T8BXw@mail.gmail.com>
2022-08-28 17:16         ` Wols Lists
2022-08-28 18:45         ` John Stoffel
2022-08-28 19:36           ` Phil Turmel
2022-08-28 19:49             ` John Stoffel
2022-08-28 23:24               ` Peter Sanders
2022-08-29 13:12                 ` Peter Sanders
2022-08-29 21:45                 ` John Stoffel
2022-08-29 22:29                   ` Eyal Lebedinsky
2022-08-29 23:53                     ` Peter Sanders
2022-08-30 13:27                       ` Peter Sanders
2022-08-30 18:03                         ` Wols Lists
2022-08-31 17:48                           ` Peter Sanders
2022-08-31 20:37                             ` John Stoffel
2022-09-02 14:56                               ` Peter Sanders
2022-09-02 18:52                                 ` Peter Sanders
2022-09-02 19:12                                   ` John Stoffel
2022-09-03  0:39                                     ` Peter Sanders [this message]
2022-09-03  5:51                                       ` Peter Sanders
2022-09-05 19:36                                         ` John Stoffel
2022-09-05 20:16                                           ` Peter Sanders
2022-09-05 19:25                                       ` John Stoffel
2022-08-28 15:10 ` John Stoffel
2022-08-28 17:11 ` Andy Smith
2022-08-28 17:22   ` Andy Smith
2022-08-28 17:34     ` Peter Sanders

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAKAPSk+jhN-T9ubdFBs6N2k10veT2u5noyQ8NBnRE9igeZgn7g@mail.gmail.com \
    --to=plsander@gmail.com \
    --cc=antlists@youngman.org.uk \
    --cc=fedora@eyal.emu.id.au \
    --cc=john@stoffel.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.