All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] Use MD_BROKEN for redundant arrays
@ 2021-12-16 14:52 Mariusz Tkaczyk
  2021-12-16 14:52 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
                   ` (3 more replies)
  0 siblings, 4 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-16 14:52 UTC (permalink / raw)
  To: song; +Cc: linux-raid

Hi Song,
As discussed in first version[1], I've done following:
- errors handler for RAID0 and linear added
- raid5 bug described
- MD_BROKEN described
- removed MD_BROKEN check for superblock update.

[1] https://lore.kernel.org/linux-raid/20210917153452.5593-1-mariusz.tkaczyk@linux.intel.com/

Mariusz Tkaczyk (3):
  raid0, linear, md: add error_handlers for raid0 and linear
  md: Set MD_BROKEN for RAID1 and RAID10
  raid5: introduce MD_BROKEN

 drivers/md/md-linear.c | 15 ++++++++++++++-
 drivers/md/md.c        | 23 +++++++++++++++--------
 drivers/md/md.h        | 14 ++++----------
 drivers/md/raid0.c     | 15 ++++++++++++++-
 drivers/md/raid1.c     |  1 +
 drivers/md/raid10.c    |  1 +
 drivers/md/raid5.c     | 34 ++++++++++++++++------------------
 7 files changed, 65 insertions(+), 38 deletions(-)

-- 
2.26.2


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-16 14:52 [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Mariusz Tkaczyk
@ 2021-12-16 14:52 ` Mariusz Tkaczyk
  2021-12-17  2:00   ` Guoqing Jiang
  2021-12-19  3:20   ` Xiao Ni
  2021-12-16 14:52 ` [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10 Mariusz Tkaczyk
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-16 14:52 UTC (permalink / raw)
  To: song; +Cc: linux-raid

Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
if a member is gone") allowed to finish writes earlier (before level
dependent actions) for non-redundant arrays.

To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
is detected. This is done in is_mddev_broken() which is confusing and not
consistent with other levels where error_handler() is used.
This patch adds appropriate error_handler for raid0 and linear.

It also adopts md_error(), we only want to call .error_handler for those
levels. mddev->pers->sync_request is additionally checked, its existence
implies a level with redundancy.

Usage of error_handler causes that disk failure can be requested from
userspace. User can fail the array via #mdadm --set-faulty command. This
is not safe and will be fixed in mdadm. It is correctable because failed
state is not recorded in the metadata. After next assembly array will be
read-write again. For safety reason is better to keep MD_BROKEN in
runtime only.

Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
 drivers/md/md-linear.c | 15 ++++++++++++++-
 drivers/md/md.c        |  6 +++++-
 drivers/md/md.h        | 10 ++--------
 drivers/md/raid0.c     | 15 ++++++++++++++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 1ff51647a682..415d2615d17a 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		     bio_sector < start_sector))
 		goto out_of_bounds;
 
-	if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
+	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
+		md_error(mddev, tmp_dev->rdev);
 		bio_io_error(bio);
 		return true;
 	}
@@ -281,6 +282,17 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
 	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
 }
 
+static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	char b[BDEVNAME_SIZE];
+
+	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
+		pr_crit("md/linear%s: Disk failure on %s detected.\n"
+			"md/linear:%s: Cannot continue, failing array.\n",
+			mdname(mddev), bdevname(rdev->bdev, b),
+			mdname(mddev));
+}
+
 static void linear_quiesce(struct mddev *mddev, int state)
 {
 }
@@ -297,6 +309,7 @@ static struct md_personality linear_personality =
 	.hot_add_disk	= linear_add,
 	.size		= linear_size,
 	.quiesce	= linear_quiesce,
+	.error_handler	= linear_error,
 };
 
 static int __init linear_init (void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e8666bdc0d28..f888ef197765 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	if (!mddev->pers || !mddev->pers->error_handler)
 		return;
-	mddev->pers->error_handler(mddev,rdev);
+	mddev->pers->error_handler(mddev, rdev);
+
+	if (!mddev->pers->sync_request)
+		return;
+
 	if (mddev->degraded)
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
 	sysfs_notify_dirent_safe(rdev->sysfs_state);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 53ea7a6961de..bc3f2094d0b6 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -762,15 +762,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
 
-static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
+static inline bool is_rdev_broken(struct md_rdev *rdev)
 {
-	if (!disk_live(rdev->bdev->bd_disk)) {
-		if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
-			pr_warn("md: %s: %s array has a missing/failed member\n",
-				mdname(rdev->mddev), md_type);
-		return true;
-	}
-	return false;
+	return !disk_live(rdev->bdev->bd_disk);
 }
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 62c8b6adac70..cb9edd65d8a9 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -564,8 +564,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
 		return true;
 	}
 
-	if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
+	if (unlikely(is_rdev_broken(tmp_dev))) {
 		bio_io_error(bio);
+		md_error(mddev, tmp_dev);
 		return true;
 	}
 
@@ -588,6 +589,17 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
 	return;
 }
 
+static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	char b[BDEVNAME_SIZE];
+
+	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
+		pr_crit("md/raid0%s: Disk failure on %s detected.\n"
+			"md/raid0:%s: Cannot continue, failing array.\n",
+			mdname(mddev), bdevname(rdev->bdev, b),
+			mdname(mddev));
+}
+
 static void *raid0_takeover_raid45(struct mddev *mddev)
 {
 	struct md_rdev *rdev;
@@ -763,6 +775,7 @@ static struct md_personality raid0_personality=
 	.size		= raid0_size,
 	.takeover	= raid0_takeover,
 	.quiesce	= raid0_quiesce,
+	.error_handler	= raid0_error,
 };
 
 static int __init raid0_init (void)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10
  2021-12-16 14:52 [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Mariusz Tkaczyk
  2021-12-16 14:52 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
@ 2021-12-16 14:52 ` Mariusz Tkaczyk
  2021-12-17  2:16   ` Guoqing Jiang
  2021-12-22  7:24   ` Xiao Ni
  2021-12-16 14:52 ` [PATCH 3/3] raid5: introduce MD_BROKEN Mariusz Tkaczyk
  2021-12-17  0:52 ` [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Song Liu
  3 siblings, 2 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-16 14:52 UTC (permalink / raw)
  To: song; +Cc: linux-raid

There was no direct mechanism to determine raid failure outside
personality. It was done by checking rdev->flags after executing
md_error(). If "faulty" was not set then -EBUSY was returned to
userspace. It causes that mdadm expects -EBUSY if the array
becomes failed. There are some reasons to not consider this mechanism
as correct:
- drive can't be failed for different reasons.
- there are path where -EBUSY is not reported and drive removal leads
to failed array, without notification for userspace.
- in the array failure case -EBUSY seems to be wrong status. Array is
not busy, but removal process cannot proceed safe.

-EBUSY expectation cannot be removed without breaking compatibility
with userspace, but we can adopt the failed state verification method.

In this patch MD_BROKEN flag support, used to mark non-redundant array
as dead, is added to RAID1 and RAID10. Support for RAID456 is added in
next commit.

Now the array failure can be checked, so verify MD_BROKEN flag, however
still return -EBUSY to userspace.

As in previous commit, it causes that #mdadm --set-faulty is able to
mark array as failed. Previously proposed workaround is valid if optional
functionality 9a567843f79("md: allow last device to be forcibly
removed from RAID1/RAID10.") is disabled.

Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
 drivers/md/md.c     | 17 ++++++++++-------
 drivers/md/md.h     |  4 ++--
 drivers/md/raid1.c  |  1 +
 drivers/md/raid10.c |  1 +
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f888ef197765..fda8473f96b8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2983,10 +2983,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 
 	if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
 		md_error(rdev->mddev, rdev);
-		if (test_bit(Faulty, &rdev->flags))
-			err = 0;
-		else
+
+		if (test_bit(MD_BROKEN, &rdev->mddev->flags))
 			err = -EBUSY;
+		else
+			err = 0;
 	} else if (cmd_match(buf, "remove")) {
 		if (rdev->mddev->pers) {
 			clear_bit(Blocked, &rdev->flags);
@@ -7441,7 +7442,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
 		err =  -ENODEV;
 	else {
 		md_error(mddev, rdev);
-		if (!test_bit(Faulty, &rdev->flags))
+		if (test_bit(MD_BROKEN, &mddev->flags))
 			err = -EBUSY;
 	}
 	rcu_read_unlock();
@@ -7987,12 +7988,14 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 	if (!mddev->pers->sync_request)
 		return;
 
-	if (mddev->degraded)
+	if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
 	sysfs_notify_dirent_safe(rdev->sysfs_state);
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
+	if (!test_bit(MD_BROKEN, &mddev->flags)) {
+		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+		md_wakeup_thread(mddev->thread);
+	}
 	if (mddev->event_work.func)
 		queue_work(md_misc_wq, &mddev->event_work);
 	md_new_event();
diff --git a/drivers/md/md.h b/drivers/md/md.h
index bc3f2094d0b6..d3a897868695 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -259,8 +259,8 @@ enum mddev_flags {
 	MD_NOT_READY,		/* do_md_run() is active, so 'array_state'
 				 * must not report that array is ready yet
 				 */
-	MD_BROKEN,              /* This is used in RAID-0/LINEAR only, to stop
-				 * I/O in case an array member is gone/failed.
+	MD_BROKEN,              /* This is used to stop I/O and mark device as
+				 * dead in case an array becomes failed.
 				 */
 };
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7dc8026cf6ee..45dc75f90476 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1638,6 +1638,7 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 		 */
 		conf->recovery_disabled = mddev->recovery_disabled;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
+		set_bit(MD_BROKEN, &mddev->flags);
 		return;
 	}
 	set_bit(Blocked, &rdev->flags);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index dde98f65bd04..d7cefd212e6b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1964,6 +1964,7 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 		 * Don't fail the drive, just return an IO error.
 		 */
 		spin_unlock_irqrestore(&conf->device_lock, flags);
+		set_bit(MD_BROKEN, &mddev->flags);
 		return;
 	}
 	if (test_and_clear_bit(In_sync, &rdev->flags))
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 3/3] raid5: introduce MD_BROKEN
  2021-12-16 14:52 [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Mariusz Tkaczyk
  2021-12-16 14:52 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
  2021-12-16 14:52 ` [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10 Mariusz Tkaczyk
@ 2021-12-16 14:52 ` Mariusz Tkaczyk
  2021-12-17  2:26   ` Guoqing Jiang
  2021-12-17  0:52 ` [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Song Liu
  3 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-16 14:52 UTC (permalink / raw)
  To: song; +Cc: linux-raid

Raid456 module had allowed to achieve failed state. It was fixed by
fb73b357fb9 ("raid5: block failing device if raid will be failed").
This fix introduces a bug, now if raid5 fails during IO, it may result
with a hung task without completion. Faulty flag on the device is
necessary to process all requests and is checked many times, mainly in
analyze_stripe().
Allow to set faulty on drive again and set MD_BROKEN if raid is failed.

As a result, this level is allowed to achieve failed state again, but
communication with userspace (via -EBUSY status) will be preserved.

This restores possibility to fail array via #mdadm --set-faulty command
and will be fixed by additional verification on mdadm side.

Reproduction steps:
 mdadm -CR imsm -e imsm -n 3 /dev/nvme[0-2]n1
 mdadm -CR r5 -e imsm -l5 -n3 /dev/nvme[0-2]n1 --assume-clean
 mkfs.xfs /dev/md126 -f
 mount /dev/md126 /mnt/root/

 fio --filename=/mnt/root/file --size=5GB --direct=1 --rw=randrw
--bs=64k --ioengine=libaio --iodepth=64 --runtime=240 --numjobs=4
--time_based --group_reporting --name=throughput-test-job
--eta-newline=1 &

 echo 1 > /sys/block/nvme2n1/device/device/remove
 echo 1 > /sys/block/nvme1n1/device/device/remove

 [ 1475.787779] Call Trace:
 [ 1475.793111] __schedule+0x2a6/0x700
 [ 1475.799460] schedule+0x38/0xa0
 [ 1475.805454] raid5_get_active_stripe+0x469/0x5f0 [raid456]
 [ 1475.813856] ? finish_wait+0x80/0x80
 [ 1475.820332] raid5_make_request+0x180/0xb40 [raid456]
 [ 1475.828281] ? finish_wait+0x80/0x80
 [ 1475.834727] ? finish_wait+0x80/0x80
 [ 1475.841127] ? finish_wait+0x80/0x80
 [ 1475.847480] md_handle_request+0x119/0x190
 [ 1475.854390] md_make_request+0x8a/0x190
 [ 1475.861041] generic_make_request+0xcf/0x310
 [ 1475.868145] submit_bio+0x3c/0x160
 [ 1475.874355] iomap_dio_submit_bio.isra.20+0x51/0x60
 [ 1475.882070] iomap_dio_bio_actor+0x175/0x390
 [ 1475.889149] iomap_apply+0xff/0x310
 [ 1475.895447] ? iomap_dio_bio_actor+0x390/0x390
 [ 1475.902736] ? iomap_dio_bio_actor+0x390/0x390
 [ 1475.909974] iomap_dio_rw+0x2f2/0x490
 [ 1475.916415] ? iomap_dio_bio_actor+0x390/0x390
 [ 1475.923680] ? atime_needs_update+0x77/0xe0
 [ 1475.930674] ? xfs_file_dio_aio_read+0x6b/0xe0 [xfs]
 [ 1475.938455] xfs_file_dio_aio_read+0x6b/0xe0 [xfs]
 [ 1475.946084] xfs_file_read_iter+0xba/0xd0 [xfs]
 [ 1475.953403] aio_read+0xd5/0x180
 [ 1475.959395] ? _cond_resched+0x15/0x30
 [ 1475.965907] io_submit_one+0x20b/0x3c0
 [ 1475.972398] __x64_sys_io_submit+0xa2/0x180
 [ 1475.979335] ? do_io_getevents+0x7c/0xc0
 [ 1475.986009] do_syscall_64+0x5b/0x1a0
 [ 1475.992419] entry_SYSCALL_64_after_hwframe+0x65/0xca
 [ 1476.000255] RIP: 0033:0x7f11fc27978d
 [ 1476.006631] Code: Bad RIP value.
 [ 1476.073251] INFO: task fio:3877 blocked for more than 120 seconds.

Fixes: fb73b357fb9 ("raid5: block failing device if raid will be failed")
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
 drivers/md/raid5.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1240a5c16af8..8b5561811431 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -690,6 +690,9 @@ static int has_failed(struct r5conf *conf)
 {
 	int degraded;
 
+	if (test_bit(MD_BROKEN, &conf->mddev->flags))
+		return 1;
+
 	if (conf->mddev->reshape_position == MaxSector)
 		return conf->mddev->degraded > conf->max_degraded;
 
@@ -2877,34 +2880,29 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 	unsigned long flags;
 	pr_debug("raid456: error called\n");
 
-	spin_lock_irqsave(&conf->device_lock, flags);
-
-	if (test_bit(In_sync, &rdev->flags) &&
-	    mddev->degraded == conf->max_degraded) {
-		/*
-		 * Don't allow to achieve failed state
-		 * Don't try to recover this device
-		 */
-		conf->recovery_disabled = mddev->recovery_disabled;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-		return;
-	}
+	pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
+		mdname(mddev), bdevname(rdev->bdev, b));
 
+	spin_lock_irqsave(&conf->device_lock, flags);
 	set_bit(Faulty, &rdev->flags);
 	clear_bit(In_sync, &rdev->flags);
 	mddev->degraded = raid5_calc_degraded(conf);
+
+	if (has_failed(conf)) {
+		set_bit(MD_BROKEN, &mddev->flags);
+		conf->recovery_disabled = mddev->recovery_disabled;
+		pr_crit("md/raid:%s: Cannot continue on %d devices.\n",
+			mdname(mddev), conf->raid_disks - mddev->degraded);
+	} else
+		pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
+			mdname(mddev), conf->raid_disks - mddev->degraded);
+
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
 	set_bit(Blocked, &rdev->flags);
 	set_mask_bits(&mddev->sb_flags, 0,
 		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
-	pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
-		"md/raid:%s: Operation continuing on %d devices.\n",
-		mdname(mddev),
-		bdevname(rdev->bdev, b),
-		mdname(mddev),
-		conf->raid_disks - mddev->degraded);
 	r5c_update_on_rdev_error(mddev, rdev);
 }
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 0/3] Use MD_BROKEN for redundant arrays
  2021-12-16 14:52 [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Mariusz Tkaczyk
                   ` (2 preceding siblings ...)
  2021-12-16 14:52 ` [PATCH 3/3] raid5: introduce MD_BROKEN Mariusz Tkaczyk
@ 2021-12-17  0:52 ` Song Liu
  2021-12-17  8:02   ` Mariusz Tkaczyk
  3 siblings, 1 reply; 39+ messages in thread
From: Song Liu @ 2021-12-17  0:52 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: linux-raid

On Thu, Dec 16, 2021 at 6:52 AM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> Hi Song,
> As discussed in first version[1], I've done following:
> - errors handler for RAID0 and linear added
> - raid5 bug described
> - MD_BROKEN described
> - removed MD_BROKEN check for superblock update.
>
> [1] https://lore.kernel.org/linux-raid/20210917153452.5593-1-mariusz.tkaczyk@linux.intel.com/
>
> Mariusz Tkaczyk (3):
>   raid0, linear, md: add error_handlers for raid0 and linear
>   md: Set MD_BROKEN for RAID1 and RAID10
>   raid5: introduce MD_BROKEN

The set looks good to me. The only concern is that we changed some messages.
While dmesg is not a stable API, I believe there are people grep on it
to detect errors.
Therefore, please try to keep these messages same as before (as much
as possible).

Thanks,
Song

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-16 14:52 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
@ 2021-12-17  2:00   ` Guoqing Jiang
  2021-12-17  2:07     ` Guoqing Jiang
                       ` (2 more replies)
  2021-12-19  3:20   ` Xiao Ni
  1 sibling, 3 replies; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-17  2:00 UTC (permalink / raw)
  To: Mariusz Tkaczyk, song; +Cc: linux-raid



On 12/16/21 10:52 PM, Mariusz Tkaczyk wrote:
> Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
> if a member is gone") allowed to finish writes earlier (before level
> dependent actions) for non-redundant arrays.
>
> To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
> is detected. This is done in is_mddev_broken() which is confusing and not
> consistent with other levels where error_handler() is used.
> This patch adds appropriate error_handler for raid0 and linear.
>
> It also adopts md_error(), we only want to call .error_handler for those
> levels. mddev->pers->sync_request is additionally checked, its existence
> implies a level with redundancy.
>
> Usage of error_handler causes that disk failure can be requested from
> userspace. User can fail the array via #mdadm --set-faulty command. This
> is not safe and will be fixed in mdadm. It is correctable because failed
> state is not recorded in the metadata. After next assembly array will be
> read-write again. For safety reason is better to keep MD_BROKEN in
> runtime only.
>
> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> ---
>   drivers/md/md-linear.c | 15 ++++++++++++++-
>   drivers/md/md.c        |  6 +++++-
>   drivers/md/md.h        | 10 ++--------
>   drivers/md/raid0.c     | 15 ++++++++++++++-
>   4 files changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> index 1ff51647a682..415d2615d17a 100644
> --- a/drivers/md/md-linear.c
> +++ b/drivers/md/md-linear.c
> @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>   		     bio_sector < start_sector))
>   		goto out_of_bounds;
>   
> -	if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> +	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> +		md_error(mddev, tmp_dev->rdev);
>   		bio_io_error(bio);
>   		return true;
>   	}
> @@ -281,6 +282,17 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
>   	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
>   }
>   
> +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> +{
> +	char b[BDEVNAME_SIZE];
> +
> +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
> +		pr_crit("md/linear%s: Disk failure on %s detected.\n"
> +			"md/linear:%s: Cannot continue, failing array.\n",
> +			mdname(mddev), bdevname(rdev->bdev, b),
> +			mdname(mddev));
> +}
> +

Do you consider to use %pg to print block device?

>   static void linear_quiesce(struct mddev *mddev, int state)
>   {
>   }
> @@ -297,6 +309,7 @@ static struct md_personality linear_personality =
>   	.hot_add_disk	= linear_add,
>   	.size		= linear_size,
>   	.quiesce	= linear_quiesce,
> +	.error_handler	= linear_error,
>   };
>   
>   static int __init linear_init (void)
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index e8666bdc0d28..f888ef197765 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>   
>   	if (!mddev->pers || !mddev->pers->error_handler)
>   		return;
> -	mddev->pers->error_handler(mddev,rdev);
> +	mddev->pers->error_handler(mddev, rdev);
> +
> +	if (!mddev->pers->sync_request)
> +		return;
> +

What is the reason of the above change? I suppose dm event can be missed.

>   	if (mddev->degraded)
>   		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
>   	sysfs_notify_dirent_safe(rdev->sysfs_state);
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 53ea7a6961de..bc3f2094d0b6 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -762,15 +762,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
>   struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
>   struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
>   
> -static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
> +static inline bool is_rdev_broken(struct md_rdev *rdev)
>   {
> -	if (!disk_live(rdev->bdev->bd_disk)) {
> -		if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
> -			pr_warn("md: %s: %s array has a missing/failed member\n",
> -				mdname(rdev->mddev), md_type);
> -		return true;
> -	}
> -	return false;
> +	return !disk_live(rdev->bdev->bd_disk);
>   }

I would suggest to only rename the function, at least md_type info is 
useful.
Besides, if MD_BROKEN is not set here then I think you can also delete the
flag from other places as well.

>    static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index 62c8b6adac70..cb9edd65d8a9 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -564,8 +564,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
>   		return true;
>   	}
>   
> -	if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
> +	if (unlikely(is_rdev_broken(tmp_dev))) {
>   		bio_io_error(bio);
> +		md_error(mddev, tmp_dev);
>   		return true;
>   	}
>   
> @@ -588,6 +589,17 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
>   	return;
>   }
>   
> +static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
> +{
> +	char b[BDEVNAME_SIZE];
> +
> +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
> +		pr_crit("md/raid0%s: Disk failure on %s detected.\n"
> +			"md/raid0:%s: Cannot continue, failing array.\n",
> +			mdname(mddev), bdevname(rdev->bdev, b),
> +			mdname(mddev));
> +}
> +
>   static void *raid0_takeover_raid45(struct mddev *mddev)
>   {
>   	struct md_rdev *rdev;
> @@ -763,6 +775,7 @@ static struct md_personality raid0_personality=
>   	.size		= raid0_size,
>   	.takeover	= raid0_takeover,
>   	.quiesce	= raid0_quiesce,
> +	.error_handler	= raid0_error,
>   };
>   

What is the advantage of adding error_handler for raid0 and linear? IOW,
without implement the error_handler, is there some existing issue?

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-17  2:00   ` Guoqing Jiang
@ 2021-12-17  2:07     ` Guoqing Jiang
  2021-12-19  3:26     ` Xiao Ni
  2021-12-20  9:39     ` Mariusz Tkaczyk
  2 siblings, 0 replies; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-17  2:07 UTC (permalink / raw)
  To: Mariusz Tkaczyk, song; +Cc: linux-raid



On 12/17/21 10:00 AM, Guoqing Jiang wrote:
>> +static inline bool is_rdev_broken(struct md_rdev *rdev)
>>   {
>> -    if (!disk_live(rdev->bdev->bd_disk)) {
>> -        if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
>> -            pr_warn("md: %s: %s array has a missing/failed member\n",
>> -                mdname(rdev->mddev), md_type);
>> -        return true;
>> -    }
>> -    return false;
>> +    return !disk_live(rdev->bdev->bd_disk);
>>   }
>
>
> Besides, if MD_BROKEN is not set here then I think you can also delete 
> the
> flag from other places as well. 

Oops, I didn't notice the flag is set elsewhere.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10
  2021-12-16 14:52 ` [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10 Mariusz Tkaczyk
@ 2021-12-17  2:16   ` Guoqing Jiang
  2021-12-22  7:24   ` Xiao Ni
  1 sibling, 0 replies; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-17  2:16 UTC (permalink / raw)
  To: Mariusz Tkaczyk, song; +Cc: linux-raid



On 12/16/21 10:52 PM, Mariusz Tkaczyk wrote:
> There was no direct mechanism to determine raid failure outside
> personality. It was done by checking rdev->flags after executing
> md_error(). If "faulty" was not set then -EBUSY was returned to
> userspace. It causes that mdadm expects -EBUSY if the array
> becomes failed. There are some reasons to not consider this mechanism
> as correct:
> - drive can't be failed for different reasons.
> - there are path where -EBUSY is not reported and drive removal leads
> to failed array, without notification for userspace.
> - in the array failure case -EBUSY seems to be wrong status. Array is
> not busy, but removal process cannot proceed safe.
>
> -EBUSY expectation cannot be removed without breaking compatibility
> with userspace, but we can adopt the failed state verification method.
>
> In this patch MD_BROKEN flag support, used to mark non-redundant array
> as dead, is added to RAID1 and RAID10. Support for RAID456 is added in
> next commit.
>
> Now the array failure can be checked, so verify MD_BROKEN flag, however
> still return -EBUSY to userspace.
>
> As in previous commit, it causes that #mdadm --set-faulty is able to
> mark array as failed. Previously proposed workaround is valid if optional
> functionality 9a567843f79("md: allow last device to be forcibly

s/9a567843f79/9a567843f7ce/

> removed from RAID1/RAID10.") is disabled.
>
> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> ---
>   drivers/md/md.c     | 17 ++++++++++-------
>   drivers/md/md.h     |  4 ++--
>   drivers/md/raid1.c  |  1 +
>   drivers/md/raid10.c |  1 +
>   4 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f888ef197765..fda8473f96b8 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2983,10 +2983,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
>   
>   	if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
>   		md_error(rdev->mddev, rdev);
> -		if (test_bit(Faulty, &rdev->flags))
> -			err = 0;
> -		else
> +
> +		if (test_bit(MD_BROKEN, &rdev->mddev->flags))
>   			err = -EBUSY;
> +		else
> +			err = 0;
>   	} else if (cmd_match(buf, "remove")) {
>   		if (rdev->mddev->pers) {
>   			clear_bit(Blocked, &rdev->flags);
> @@ -7441,7 +7442,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
>   		err =  -ENODEV;
>   	else {
>   		md_error(mddev, rdev);
> -		if (!test_bit(Faulty, &rdev->flags))
> +		if (test_bit(MD_BROKEN, &mddev->flags))
>   			err = -EBUSY;
>   	}
>   	rcu_read_unlock();
> @@ -7987,12 +7988,14 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>   	if (!mddev->pers->sync_request)
>   		return;
>   
> -	if (mddev->degraded)
> +	if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
>   		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
>   	sysfs_notify_dirent_safe(rdev->sysfs_state);
>   	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> -	md_wakeup_thread(mddev->thread);
> +	if (!test_bit(MD_BROKEN, &mddev->flags)) {
> +		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +		md_wakeup_thread(mddev->thread);
> +	}
>   	if (mddev->event_work.func)
>   		queue_work(md_misc_wq, &mddev->event_work);
>   	md_new_event();
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index bc3f2094d0b6..d3a897868695 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -259,8 +259,8 @@ enum mddev_flags {
>   	MD_NOT_READY,		/* do_md_run() is active, so 'array_state'
>   				 * must not report that array is ready yet
>   				 */
> -	MD_BROKEN,              /* This is used in RAID-0/LINEAR only, to stop
> -				 * I/O in case an array member is gone/failed.
> +	MD_BROKEN,              /* This is used to stop I/O and mark device as

IIUC, 'device' actually means array, if so, could you change it to array 
to make it clear?

> +				 * dead in case an array becomes failed.
>   				 */
>   };
>   
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 7dc8026cf6ee..45dc75f90476 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1638,6 +1638,7 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>   		 */
>   		conf->recovery_disabled = mddev->recovery_disabled;
>   		spin_unlock_irqrestore(&conf->device_lock, flags);
> +		set_bit(MD_BROKEN, &mddev->flags);
>   		return;
>   	}
>   	set_bit(Blocked, &rdev->flags);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index dde98f65bd04..d7cefd212e6b 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1964,6 +1964,7 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
>   		 * Don't fail the drive, just return an IO error.
>   		 */
>   		spin_unlock_irqrestore(&conf->device_lock, flags);
> +		set_bit(MD_BROKEN, &mddev->flags);
>   		return;
>   	}
>   	if (test_and_clear_bit(In_sync, &rdev->flags))

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/3] raid5: introduce MD_BROKEN
  2021-12-16 14:52 ` [PATCH 3/3] raid5: introduce MD_BROKEN Mariusz Tkaczyk
@ 2021-12-17  2:26   ` Guoqing Jiang
  2021-12-17  8:37     ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-17  2:26 UTC (permalink / raw)
  To: Mariusz Tkaczyk, song; +Cc: linux-raid



On 12/16/21 10:52 PM, Mariusz Tkaczyk wrote:
> Raid456 module had allowed to achieve failed state. It was fixed by
> fb73b357fb9 ("raid5: block failing device if raid will be failed").
> This fix introduces a bug, now if raid5 fails during IO, it may result
> with a hung task without completion. Faulty flag on the device is
> necessary to process all requests and is checked many times, mainly in
> analyze_stripe().
> Allow to set faulty on drive again and set MD_BROKEN if raid is failed.
>
> As a result, this level is allowed to achieve failed state again, but
> communication with userspace (via -EBUSY status) will be preserved.
>
> This restores possibility to fail array via #mdadm --set-faulty command
> and will be fixed by additional verification on mdadm side.
>
> Reproduction steps:
>   mdadm -CR imsm -e imsm -n 3 /dev/nvme[0-2]n1
>   mdadm -CR r5 -e imsm -l5 -n3 /dev/nvme[0-2]n1 --assume-clean
>   mkfs.xfs /dev/md126 -f
>   mount /dev/md126 /mnt/root/
>
>   fio --filename=/mnt/root/file --size=5GB --direct=1 --rw=randrw
> --bs=64k --ioengine=libaio --iodepth=64 --runtime=240 --numjobs=4
> --time_based --group_reporting --name=throughput-test-job
> --eta-newline=1 &
>
>   echo 1 > /sys/block/nvme2n1/device/device/remove
>   echo 1 > /sys/block/nvme1n1/device/device/remove
>
>   [ 1475.787779] Call Trace:
>   [ 1475.793111] __schedule+0x2a6/0x700
>   [ 1475.799460] schedule+0x38/0xa0
>   [ 1475.805454] raid5_get_active_stripe+0x469/0x5f0 [raid456]
>   [ 1475.813856] ? finish_wait+0x80/0x80
>   [ 1475.820332] raid5_make_request+0x180/0xb40 [raid456]
>   [ 1475.828281] ? finish_wait+0x80/0x80
>   [ 1475.834727] ? finish_wait+0x80/0x80
>   [ 1475.841127] ? finish_wait+0x80/0x80
>   [ 1475.847480] md_handle_request+0x119/0x190
>   [ 1475.854390] md_make_request+0x8a/0x190
>   [ 1475.861041] generic_make_request+0xcf/0x310
>   [ 1475.868145] submit_bio+0x3c/0x160
>   [ 1475.874355] iomap_dio_submit_bio.isra.20+0x51/0x60
>   [ 1475.882070] iomap_dio_bio_actor+0x175/0x390
>   [ 1475.889149] iomap_apply+0xff/0x310
>   [ 1475.895447] ? iomap_dio_bio_actor+0x390/0x390
>   [ 1475.902736] ? iomap_dio_bio_actor+0x390/0x390
>   [ 1475.909974] iomap_dio_rw+0x2f2/0x490
>   [ 1475.916415] ? iomap_dio_bio_actor+0x390/0x390
>   [ 1475.923680] ? atime_needs_update+0x77/0xe0
>   [ 1475.930674] ? xfs_file_dio_aio_read+0x6b/0xe0 [xfs]
>   [ 1475.938455] xfs_file_dio_aio_read+0x6b/0xe0 [xfs]
>   [ 1475.946084] xfs_file_read_iter+0xba/0xd0 [xfs]
>   [ 1475.953403] aio_read+0xd5/0x180
>   [ 1475.959395] ? _cond_resched+0x15/0x30
>   [ 1475.965907] io_submit_one+0x20b/0x3c0
>   [ 1475.972398] __x64_sys_io_submit+0xa2/0x180
>   [ 1475.979335] ? do_io_getevents+0x7c/0xc0
>   [ 1475.986009] do_syscall_64+0x5b/0x1a0
>   [ 1475.992419] entry_SYSCALL_64_after_hwframe+0x65/0xca
>   [ 1476.000255] RIP: 0033:0x7f11fc27978d
>   [ 1476.006631] Code: Bad RIP value.
>   [ 1476.073251] INFO: task fio:3877 blocked for more than 120 seconds.
>
> Fixes: fb73b357fb9 ("raid5: block failing device if raid will be failed")
> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> ---
>   drivers/md/raid5.c | 34 ++++++++++++++++------------------
>   1 file changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 1240a5c16af8..8b5561811431 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -690,6 +690,9 @@ static int has_failed(struct r5conf *conf)
>   {
>   	int degraded;
>   
> +	if (test_bit(MD_BROKEN, &conf->mddev->flags))
> +		return 1;
> +
>   	if (conf->mddev->reshape_position == MaxSector)
>   		return conf->mddev->degraded > conf->max_degraded;
>   
> @@ -2877,34 +2880,29 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
>   	unsigned long flags;
>   	pr_debug("raid456: error called\n");
>   
> -	spin_lock_irqsave(&conf->device_lock, flags);
> -
> -	if (test_bit(In_sync, &rdev->flags) &&
> -	    mddev->degraded == conf->max_degraded) {
> -		/*
> -		 * Don't allow to achieve failed state
> -		 * Don't try to recover this device
> -		 */
> -		conf->recovery_disabled = mddev->recovery_disabled;
> -		spin_unlock_irqrestore(&conf->device_lock, flags);
> -		return;
> -	}
> +	pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
> +		mdname(mddev), bdevname(rdev->bdev, b));
>   
> +	spin_lock_irqsave(&conf->device_lock, flags);
>   	set_bit(Faulty, &rdev->flags);
>   	clear_bit(In_sync, &rdev->flags);
>   	mddev->degraded = raid5_calc_degraded(conf);
> +
> +	if (has_failed(conf)) {
> +		set_bit(MD_BROKEN, &mddev->flags);

What about other callers of has_failed? Do they need to set BROKEN flag?
Or set the flag in has_failed if it returns true, just FYI.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 0/3] Use MD_BROKEN for redundant arrays
  2021-12-17  0:52 ` [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Song Liu
@ 2021-12-17  8:02   ` Mariusz Tkaczyk
  2022-01-25 15:52     ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-17  8:02 UTC (permalink / raw)
  To: Song Liu; +Cc: linux-raid

Hi Song,

On Thu, 16 Dec 2021 16:52:23 -0800
Song Liu <song@kernel.org> wrote:
> > Mariusz Tkaczyk (3):
> >   raid0, linear, md: add error_handlers for raid0 and linear
> >   md: Set MD_BROKEN for RAID1 and RAID10
> >   raid5: introduce MD_BROKEN  
> 
> The set looks good to me. The only concern is that we changed some
> messages. While dmesg is not a stable API, I believe there are people
> grep on it to detect errors.
> Therefore, please try to keep these messages same as before (as much
> as possible).

Will do.
After sending it, I realized that my approach is not correct when
mddev->fail_last_dev is on. MD_BRKOEN should be set even if we agree to
remove the "last" drive. I will fix it too.

Thanks,
Mariusz



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/3] raid5: introduce MD_BROKEN
  2021-12-17  2:26   ` Guoqing Jiang
@ 2021-12-17  8:37     ` Mariusz Tkaczyk
  2021-12-22  1:46       ` Guoqing Jiang
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-17  8:37 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: song, linux-raid

Hi Guoqing,

On Fri, 17 Dec 2021 10:26:27 +0800
Guoqing Jiang <guoqing.jiang@linux.dev> wrote:

> > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> > index 1240a5c16af8..8b5561811431 100644
> > --- a/drivers/md/raid5.c
> > +++ b/drivers/md/raid5.c
> > @@ -690,6 +690,9 @@ static int has_failed(struct r5conf *conf)
> >   {
> >   	int degraded;
> >   
> > +	if (test_bit(MD_BROKEN, &conf->mddev->flags))
> > +		return 1;
> > +
> >   	if (conf->mddev->reshape_position == MaxSector)
> >   		return conf->mddev->degraded > conf->max_degraded;
> >   
> > @@ -2877,34 +2880,29 @@ static void raid5_error(struct mddev
> > *mddev, struct md_rdev *rdev) unsigned long flags;
> >   	pr_debug("raid456: error called\n");
> >   
> > -	spin_lock_irqsave(&conf->device_lock, flags);
> > -
> > -	if (test_bit(In_sync, &rdev->flags) &&
> > -	    mddev->degraded == conf->max_degraded) {
> > -		/*
> > -		 * Don't allow to achieve failed state
> > -		 * Don't try to recover this device
> > -		 */
> > -		conf->recovery_disabled = mddev->recovery_disabled;
> > -		spin_unlock_irqrestore(&conf->device_lock, flags);
> > -		return;
> > -	}
> > +	pr_crit("md/raid:%s: Disk failure on %s, disabling
> > device.\n",
> > +		mdname(mddev), bdevname(rdev->bdev, b));
> >   
> > +	spin_lock_irqsave(&conf->device_lock, flags);
> >   	set_bit(Faulty, &rdev->flags);
> >   	clear_bit(In_sync, &rdev->flags);
> >   	mddev->degraded = raid5_calc_degraded(conf);
> > +
> > +	if (has_failed(conf)) {
> > +		set_bit(MD_BROKEN, &mddev->flags);  
> 
> What about other callers of has_failed? Do they need to set BROKEN
> flag? Or set the flag in has_failed if it returns true, just FYI.
> 

The function checks rdev->state for faulty. There are two, places
where it can be set:
- raid5_error (handled here)
- raid5_spare_active (not a case IMO).

I left it in raid5_error to be consistent with other levels.
I think that moving it t has_failed is safe but I don't see any
additional value in it.
I see that in raid5_error we hold device_lock. It is not true for
all has_failed calls.

Do you have any recommendations?

Thanks,
Mariusz 


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-16 14:52 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
  2021-12-17  2:00   ` Guoqing Jiang
@ 2021-12-19  3:20   ` Xiao Ni
  2021-12-20  8:45     ` Mariusz Tkaczyk
  1 sibling, 1 reply; 39+ messages in thread
From: Xiao Ni @ 2021-12-19  3:20 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: Song Liu, linux-raid

On Thu, Dec 16, 2021 at 10:53 PM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
> if a member is gone") allowed to finish writes earlier (before level
> dependent actions) for non-redundant arrays.
>
> To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
> is detected. This is done in is_mddev_broken() which is confusing and not
> consistent with other levels where error_handler() is used.
> This patch adds appropriate error_handler for raid0 and linear.
>
> It also adopts md_error(), we only want to call .error_handler for those
> levels. mddev->pers->sync_request is additionally checked, its existence
> implies a level with redundancy.
>
> Usage of error_handler causes that disk failure can be requested from
> userspace. User can fail the array via #mdadm --set-faulty command. This
> is not safe and will be fixed in mdadm. It is correctable because failed
> state is not recorded in the metadata. After next assembly array will be
> read-write again. For safety reason is better to keep MD_BROKEN in
> runtime only.

Hi Mariusz

Let me call them chapter[1-4]

Could you explain more about 'mdadm --set-faulty' part? I've read this
patch. But I don't
know the relationship between the patch and chapter4.

In patch2, you write "As in previous commit, it causes that #mdadm
--set-faulty is able to
mark array as failed." I tried to run command `mdadm /dev/md0 -f
/dev/sda`. md0 is a raid0.
It can't remove sda from md0.

Best Regards
Xiao


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-17  2:00   ` Guoqing Jiang
  2021-12-17  2:07     ` Guoqing Jiang
@ 2021-12-19  3:26     ` Xiao Ni
  2021-12-22  1:22       ` Guoqing Jiang
  2021-12-20  9:39     ` Mariusz Tkaczyk
  2 siblings, 1 reply; 39+ messages in thread
From: Xiao Ni @ 2021-12-19  3:26 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: Mariusz Tkaczyk, Song Liu, linux-raid

On Fri, Dec 17, 2021 at 10:00 AM Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
>
>
> On 12/16/21 10:52 PM, Mariusz Tkaczyk wrote:
> > Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
> > if a member is gone") allowed to finish writes earlier (before level
> > dependent actions) for non-redundant arrays.
> >
> > To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
> > is detected. This is done in is_mddev_broken() which is confusing and not
> > consistent with other levels where error_handler() is used.
> > This patch adds appropriate error_handler for raid0 and linear.
> >
> > It also adopts md_error(), we only want to call .error_handler for those
> > levels. mddev->pers->sync_request is additionally checked, its existence
> > implies a level with redundancy.
> >
> > Usage of error_handler causes that disk failure can be requested from
> > userspace. User can fail the array via #mdadm --set-faulty command. This
> > is not safe and will be fixed in mdadm. It is correctable because failed
> > state is not recorded in the metadata. After next assembly array will be
> > read-write again. For safety reason is better to keep MD_BROKEN in
> > runtime only.
> >
> > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> > ---
> >   drivers/md/md-linear.c | 15 ++++++++++++++-
> >   drivers/md/md.c        |  6 +++++-
> >   drivers/md/md.h        | 10 ++--------
> >   drivers/md/raid0.c     | 15 ++++++++++++++-
> >   4 files changed, 35 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> > index 1ff51647a682..415d2615d17a 100644
> > --- a/drivers/md/md-linear.c
> > +++ b/drivers/md/md-linear.c
> > @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
> >                    bio_sector < start_sector))
> >               goto out_of_bounds;
> >
> > -     if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> > +     if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> > +             md_error(mddev, tmp_dev->rdev);
> >               bio_io_error(bio);
> >               return true;
> >       }
> > @@ -281,6 +282,17 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
> >       seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
> >   }
> >
> > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > +{
> > +     char b[BDEVNAME_SIZE];
> > +
> > +     if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
> > +             pr_crit("md/linear%s: Disk failure on %s detected.\n"
> > +                     "md/linear:%s: Cannot continue, failing array.\n",
> > +                     mdname(mddev), bdevname(rdev->bdev, b),
> > +                     mdname(mddev));
> > +}
> > +
>
> Do you consider to use %pg to print block device?
>
> >   static void linear_quiesce(struct mddev *mddev, int state)
> >   {
> >   }
> > @@ -297,6 +309,7 @@ static struct md_personality linear_personality =
> >       .hot_add_disk   = linear_add,
> >       .size           = linear_size,
> >       .quiesce        = linear_quiesce,
> > +     .error_handler  = linear_error,
> >   };
> >
> >   static int __init linear_init (void)
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index e8666bdc0d28..f888ef197765 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
> >
> >       if (!mddev->pers || !mddev->pers->error_handler)
> >               return;
> > -     mddev->pers->error_handler(mddev,rdev);
> > +     mddev->pers->error_handler(mddev, rdev);
> > +
> > +     if (!mddev->pers->sync_request)
> > +             return;
> > +
>
> What is the reason of the above change? I suppose dm event can be missed.

Hi Guoqing

What's the dm event here? From my understanding, this patch only wants
to set MD_BROKEN
in error handler. Now raid0 and linear only need to do this job in
md_error. It checks ->sync_request
here and can return for raid0 and linear. In the error handler raid0
and linear already set MD_BROKEN.

Best Regards
Xiao


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-19  3:20   ` Xiao Ni
@ 2021-12-20  8:45     ` Mariusz Tkaczyk
  2021-12-21  1:40       ` Xiao Ni
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-20  8:45 UTC (permalink / raw)
  To: Xiao Ni; +Cc: Song Liu, linux-raid

Hi Xiao,

On Sun, 19 Dec 2021 11:20:59 +0800
Xiao Ni <xni@redhat.com> wrote:

> > Usage of error_handler causes that disk failure can be requested
> > from userspace. User can fail the array via #mdadm --set-faulty
> > command. This is not safe and will be fixed in mdadm. It is
> > correctable because failed state is not recorded in the metadata.
> > After next assembly array will be read-write again. For safety
> > reason is better to keep MD_BROKEN in runtime only.  
> 
> Hi Mariusz
> 
> Let me call them chapter[1-4]
> 
> Could you explain more about 'mdadm --set-faulty' part? I've read this
> patch. But I don't
> know the relationship between the patch and chapter4.
> 
> In patch2, you write "As in previous commit, it causes that #mdadm
> --set-faulty is able to
> mark array as failed." I tried to run command `mdadm /dev/md0 -f
> /dev/sda`. md0 is a raid0.
> It can't remove sda from md0.

Did you test kernel with my patchset applied?

I've added chapter 4 because I'm aware of behavior change.
Now for r0, nothing happens when we are trying to write failure to
md/<disk>/state.
 
After the change, drive is not remove too, but MD_BROKEN is set and
any new write will be rejected. The drive will be still visible
in array (I didn't change that). Should I add it to description?

Thanks,
Mariusz


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-17  2:00   ` Guoqing Jiang
  2021-12-17  2:07     ` Guoqing Jiang
  2021-12-19  3:26     ` Xiao Ni
@ 2021-12-20  9:39     ` Mariusz Tkaczyk
  2 siblings, 0 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-20  9:39 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: song, linux-raid

Hi Guoqing,
On Fri, 17 Dec 2021 10:00:25 +0800
Guoqing Jiang <guoqing.jiang@linux.dev> wrote:

> > @@ -281,6 +282,17 @@ static void linear_status (struct seq_file
> > *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding",
> > mddev->chunk_sectors / 2); }
> >   
> > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > +{
> > +	char b[BDEVNAME_SIZE];
> > +
> > +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
> > +		pr_crit("md/linear%s: Disk failure on %s
> > detected.\n"
> > +			"md/linear:%s: Cannot continue, failing
> > array.\n",
> > +			mdname(mddev), bdevname(rdev->bdev, b),
> > +			mdname(mddev));
> > +}
> > +  
> 
> Do you consider to use %pg to print block device?
 
Will do.
 
> > @@ -588,6 +589,17 @@ static void raid0_status(struct seq_file *seq,
> > struct mddev *mddev) return;
> >   }
> >   
> > +static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
> > +{
> > +	char b[BDEVNAME_SIZE];
> > +
> > +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
> > +		pr_crit("md/raid0%s: Disk failure on %s
> > detected.\n"
> > +			"md/raid0:%s: Cannot continue, failing
> > array.\n",
> > +			mdname(mddev), bdevname(rdev->bdev, b),
> > +			mdname(mddev));
> > +}
> > +
> >   static void *raid0_takeover_raid45(struct mddev *mddev)
> >   {
> >   	struct md_rdev *rdev;
> > @@ -763,6 +775,7 @@ static struct md_personality raid0_personality=
> >   	.size		= raid0_size,
> >   	.takeover	= raid0_takeover,
> >   	.quiesce	= raid0_quiesce,
> > +	.error_handler	= raid0_error,
> >   };
> >     
> 
> What is the advantage of adding error_handler for raid0 and linear?
> IOW, without implement the error_handler, is there some existing
> issue?
> 
There is no issue. It was suggested by Song:
https://lore.kernel.org/linux-raid/CAPhsuW4X94eJ8aG6i7F0zCmgjuWHSRWuBH2gOJjTe5uWg_rMvQ@mail.gmail.com/

Thanks,
Mariusz

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-20  8:45     ` Mariusz Tkaczyk
@ 2021-12-21  1:40       ` Xiao Ni
  2021-12-21 13:56         ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Xiao Ni @ 2021-12-21  1:40 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: Song Liu, linux-raid

On Mon, Dec 20, 2021 at 4:45 PM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> Hi Xiao,
>
> On Sun, 19 Dec 2021 11:20:59 +0800
> Xiao Ni <xni@redhat.com> wrote:
>
> > > Usage of error_handler causes that disk failure can be requested
> > > from userspace. User can fail the array via #mdadm --set-faulty
> > > command. This is not safe and will be fixed in mdadm. It is
> > > correctable because failed state is not recorded in the metadata.
> > > After next assembly array will be read-write again. For safety
> > > reason is better to keep MD_BROKEN in runtime only.
> >
> > Hi Mariusz
> >
> > Let me call them chapter[1-4]
> >
> > Could you explain more about 'mdadm --set-faulty' part? I've read this
> > patch. But I don't
> > know the relationship between the patch and chapter4.
> >
> > In patch2, you write "As in previous commit, it causes that #mdadm
> > --set-faulty is able to
> > mark array as failed." I tried to run command `mdadm /dev/md0 -f
> > /dev/sda`. md0 is a raid0.
> > It can't remove sda from md0.
>
> Did you test kernel with my patchset applied?
>
> I've added chapter 4 because I'm aware of behavior change.
> Now for r0, nothing happens when we are trying to write failure to
> md/<disk>/state.
>
> After the change, drive is not remove too, but MD_BROKEN is set and
> any new write will be rejected. The drive will be still visible
> in array (I didn't change that). Should I add it to description?

Thanks for the explanation. I understand now. But I still have one question.

Now for a raid0, it can't remove one member disk from raid0. It returns
EBUSY and the raid0 still can work well. It makes sense. Because all member
disks are busy, the admin can't remove the member disk and mdadm gives a
proper error.

With this patch, it changes the situation. In raid0_error, it sets MD_BROKEN.
In fact, it isn't broken. So is it really good to set MD_BROKEN here? In patch
62f7b1989c0 ("md raid0/linear: Mark array as 'broken'...), MD_BROKEN
is introduced
when the member disk disappears and the disk is really broken. For raid0/linear,
the raid device can't work anymore.

Best Regards
Xiao


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-21  1:40       ` Xiao Ni
@ 2021-12-21 13:56         ` Mariusz Tkaczyk
  2021-12-22  1:54           ` Guoqing Jiang
  2021-12-22  3:08           ` Xiao Ni
  0 siblings, 2 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-21 13:56 UTC (permalink / raw)
  To: Xiao Ni; +Cc: Song Liu, linux-raid

Hi Xiao,
On Tue, 21 Dec 2021 09:40:50 +0800
Xiao Ni <xni@redhat.com> wrote:

> Now for a raid0, it can't remove one member disk from raid0. It
> returns EBUSY and the raid0 still can work well. It makes sense.
> Because all member disks are busy, the admin can't remove the member
> disk and mdadm gives a proper error.

EBUSY means that drive is busy but it is not. Just drive cannot be
safety removed. As I wrote in patch 2:

If "faulty" was not set then -EBUSY was returned to
userspace. It causes that mdadm expects -EBUSY if the array
becomes failed. There are some reasons to not consider this mechanism
as correct:
- drive can't be failed for different reasons.
- there are path where -EBUSY is not reported and drive removal leads
to failed array, without notification for userspace.
- in the array failure case -EBUSY seems to be wrong status. Array is
not busy, but removal process cannot proceed safe.

For compatibility reasons i cannot remove EBUSY. I left more detailed
explanation in patch 2.

> With this patch, it changes the situation. In raid0_error, it sets
> MD_BROKEN. In fact, it isn't broken. So is it really good to set
> MD_BROKEN here? In patch 62f7b1989c0 ("md raid0/linear: Mark array as
> 'broken'...), MD_BROKEN is introduced
> when the member disk disappears and the disk is really broken. For
> raid0/linear, the raid device can't work anymore.

It is broken, any md_error() call should end with appropriate action:
- drive removal (if possible)
- failing array (if cannot degrade array)

We cannot trust drive if md_error() was called, so writes will be
blocked. IMO it is reasonable- to not engage level stack, because one
or more members cannot be trusted (even if it is still accessible). Just
allow to read old data (if still possible).
 
Thanks,
Mariusz


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-19  3:26     ` Xiao Ni
@ 2021-12-22  1:22       ` Guoqing Jiang
  0 siblings, 0 replies; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-22  1:22 UTC (permalink / raw)
  To: Xiao Ni; +Cc: Mariusz Tkaczyk, Song Liu, linux-raid



On 12/19/21 11:26 AM, Xiao Ni wrote:
> On Fri, Dec 17, 2021 at 10:00 AM Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>>
>>
>> On 12/16/21 10:52 PM, Mariusz Tkaczyk wrote:
>>> Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
>>> if a member is gone") allowed to finish writes earlier (before level
>>> dependent actions) for non-redundant arrays.
>>>
>>> To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
>>> is detected. This is done in is_mddev_broken() which is confusing and not
>>> consistent with other levels where error_handler() is used.
>>> This patch adds appropriate error_handler for raid0 and linear.
>>>
>>> It also adopts md_error(), we only want to call .error_handler for those
>>> levels. mddev->pers->sync_request is additionally checked, its existence
>>> implies a level with redundancy.
>>>
>>> Usage of error_handler causes that disk failure can be requested from
>>> userspace. User can fail the array via #mdadm --set-faulty command. This
>>> is not safe and will be fixed in mdadm. It is correctable because failed
>>> state is not recorded in the metadata. After next assembly array will be
>>> read-write again. For safety reason is better to keep MD_BROKEN in
>>> runtime only.
>>>
>>> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
>>> ---
>>>    drivers/md/md-linear.c | 15 ++++++++++++++-
>>>    drivers/md/md.c        |  6 +++++-
>>>    drivers/md/md.h        | 10 ++--------
>>>    drivers/md/raid0.c     | 15 ++++++++++++++-
>>>    4 files changed, 35 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
>>> index 1ff51647a682..415d2615d17a 100644
>>> --- a/drivers/md/md-linear.c
>>> +++ b/drivers/md/md-linear.c
>>> @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>>>                     bio_sector < start_sector))
>>>                goto out_of_bounds;
>>>
>>> -     if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
>>> +     if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
>>> +             md_error(mddev, tmp_dev->rdev);
>>>                bio_io_error(bio);
>>>                return true;
>>>        }
>>> @@ -281,6 +282,17 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
>>>        seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
>>>    }
>>>
>>> +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
>>> +{
>>> +     char b[BDEVNAME_SIZE];
>>> +
>>> +     if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
>>> +             pr_crit("md/linear%s: Disk failure on %s detected.\n"
>>> +                     "md/linear:%s: Cannot continue, failing array.\n",
>>> +                     mdname(mddev), bdevname(rdev->bdev, b),
>>> +                     mdname(mddev));
>>> +}
>>> +
>> Do you consider to use %pg to print block device?
>>
>>>    static void linear_quiesce(struct mddev *mddev, int state)
>>>    {
>>>    }
>>> @@ -297,6 +309,7 @@ static struct md_personality linear_personality =
>>>        .hot_add_disk   = linear_add,
>>>        .size           = linear_size,
>>>        .quiesce        = linear_quiesce,
>>> +     .error_handler  = linear_error,
>>>    };
>>>
>>>    static int __init linear_init (void)
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index e8666bdc0d28..f888ef197765 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>>>
>>>        if (!mddev->pers || !mddev->pers->error_handler)
>>>                return;
>>> -     mddev->pers->error_handler(mddev,rdev);
>>> +     mddev->pers->error_handler(mddev, rdev);
>>> +
>>> +     if (!mddev->pers->sync_request)
>>> +             return;
>>> +
>> What is the reason of the above change? I suppose dm event can be missed.
> Hi Guoqing
>
> What's the dm event here?

Pls see mddev->event_work after above line, also commit 768e587e18 for dm
which might be relevant.

Thanks,
Guoqing


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/3] raid5: introduce MD_BROKEN
  2021-12-17  8:37     ` Mariusz Tkaczyk
@ 2021-12-22  1:46       ` Guoqing Jiang
  0 siblings, 0 replies; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-22  1:46 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: song, linux-raid



On 12/17/21 4:37 PM, Mariusz Tkaczyk wrote:
> Hi Guoqing,
>
> On Fri, 17 Dec 2021 10:26:27 +0800
> Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
>>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>>> index 1240a5c16af8..8b5561811431 100644
>>> --- a/drivers/md/raid5.c
>>> +++ b/drivers/md/raid5.c
>>> @@ -690,6 +690,9 @@ static int has_failed(struct r5conf *conf)
>>>    {
>>>    	int degraded;
>>>    
>>> +	if (test_bit(MD_BROKEN, &conf->mddev->flags))
>>> +		return 1;
>>> +
>>>    	if (conf->mddev->reshape_position == MaxSector)
>>>    		return conf->mddev->degraded > conf->max_degraded;
>>>    
>>> @@ -2877,34 +2880,29 @@ static void raid5_error(struct mddev
>>> *mddev, struct md_rdev *rdev) unsigned long flags;
>>>    	pr_debug("raid456: error called\n");
>>>    
>>> -	spin_lock_irqsave(&conf->device_lock, flags);
>>> -
>>> -	if (test_bit(In_sync, &rdev->flags) &&
>>> -	    mddev->degraded == conf->max_degraded) {
>>> -		/*
>>> -		 * Don't allow to achieve failed state
>>> -		 * Don't try to recover this device
>>> -		 */
>>> -		conf->recovery_disabled = mddev->recovery_disabled;
>>> -		spin_unlock_irqrestore(&conf->device_lock, flags);
>>> -		return;
>>> -	}
>>> +	pr_crit("md/raid:%s: Disk failure on %s, disabling
>>> device.\n",
>>> +		mdname(mddev), bdevname(rdev->bdev, b));
>>>    
>>> +	spin_lock_irqsave(&conf->device_lock, flags);
>>>    	set_bit(Faulty, &rdev->flags);
>>>    	clear_bit(In_sync, &rdev->flags);
>>>    	mddev->degraded = raid5_calc_degraded(conf);
>>> +
>>> +	if (has_failed(conf)) {
>>> +		set_bit(MD_BROKEN, &mddev->flags);
>> What about other callers of has_failed? Do they need to set BROKEN
>> flag? Or set the flag in has_failed if it returns true, just FYI.
>>
> The function checks rdev->state for faulty. There are two, places
> where it can be set:
> - raid5_error (handled here)
> - raid5_spare_active (not a case IMO).
>
> I left it in raid5_error to be consistent with other levels.
> I think that moving it t has_failed is safe but I don't see any
> additional value in it.

 From raid5's point of view, it is broken in case has_failed returns 1, 
so it is
kind of inconsistent I think.

> I see that in raid5_error we hold device_lock. It is not true for
> all has_failed calls.

Didn't go detail of each caller, not sure the lock  is needed for set/clear
mddev->flags.

> Do you have any recommendations?

I don't have strong opinion for it.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-21 13:56         ` Mariusz Tkaczyk
@ 2021-12-22  1:54           ` Guoqing Jiang
  2021-12-22  3:08           ` Xiao Ni
  1 sibling, 0 replies; 39+ messages in thread
From: Guoqing Jiang @ 2021-12-22  1:54 UTC (permalink / raw)
  To: Mariusz Tkaczyk, Xiao Ni; +Cc: Song Liu, linux-raid



On 12/21/21 9:56 PM, Mariusz Tkaczyk wrote:
> Hi Xiao,
> On Tue, 21 Dec 2021 09:40:50 +0800
> Xiao Ni <xni@redhat.com> wrote:
>
>> Now for a raid0, it can't remove one member disk from raid0. It
>> returns EBUSY and the raid0 still can work well. It makes sense.
>> Because all member disks are busy, the admin can't remove the member
>> disk and mdadm gives a proper error.
> EBUSY means that drive is busy but it is not. Just drive cannot be
> safety removed. As I wrote in patch 2:
>
> If "faulty" was not set then -EBUSY was returned to
> userspace. It causes that mdadm expects -EBUSY if the array
> becomes failed. There are some reasons to not consider this mechanism
> as correct:
> - drive can't be failed for different reasons.
> - there are path where -EBUSY is not reported and drive removal leads
> to failed array, without notification for userspace.
> - in the array failure case -EBUSY seems to be wrong status. Array is
> not busy, but removal process cannot proceed safe.
>
> For compatibility reasons i cannot remove EBUSY. I left more detailed
> explanation in patch 2.
>
>> With this patch, it changes the situation. In raid0_error, it sets
>> MD_BROKEN. In fact, it isn't broken. So is it really good to set
>> MD_BROKEN here? In patch 62f7b1989c0 ("md raid0/linear: Mark array as
>> 'broken'...), MD_BROKEN is introduced
>> when the member disk disappears and the disk is really broken. For
>> raid0/linear, the raid device can't work anymore.
> It is broken, any md_error() call should end with appropriate action:
> - drive removal (if possible)
> - failing array (if cannot degrade array)
>
> We cannot trust drive if md_error() was called, so writes will be
> blocked. IMO it is reasonable- to not engage level stack, because one
> or more members cannot be trusted (even if it is still accessible). Just
> allow to read old data (if still possible).

Agree, especially for  raid0 and linear, the array is literally broken 
in case one
of member device can't be accessed.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2021-12-21 13:56         ` Mariusz Tkaczyk
  2021-12-22  1:54           ` Guoqing Jiang
@ 2021-12-22  3:08           ` Xiao Ni
  1 sibling, 0 replies; 39+ messages in thread
From: Xiao Ni @ 2021-12-22  3:08 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: Song Liu, linux-raid

On Tue, Dec 21, 2021 at 9:56 PM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> Hi Xiao,
> On Tue, 21 Dec 2021 09:40:50 +0800
> Xiao Ni <xni@redhat.com> wrote:
>
> > Now for a raid0, it can't remove one member disk from raid0. It
> > returns EBUSY and the raid0 still can work well. It makes sense.
> > Because all member disks are busy, the admin can't remove the member
> > disk and mdadm gives a proper error.
>
> EBUSY means that drive is busy but it is not. Just drive cannot be
> safety removed. As I wrote in patch 2:
>
> If "faulty" was not set then -EBUSY was returned to
> userspace. It causes that mdadm expects -EBUSY if the array
> becomes failed. There are some reasons to not consider this mechanism
> as correct:
> - drive can't be failed for different reasons.
> - there are path where -EBUSY is not reported and drive removal leads
> to failed array, without notification for userspace.
> - in the array failure case -EBUSY seems to be wrong status. Array is
> not busy, but removal process cannot proceed safe.
>
> For compatibility reasons i cannot remove EBUSY. I left more detailed
> explanation in patch 2.
>
> > With this patch, it changes the situation. In raid0_error, it sets
> > MD_BROKEN. In fact, it isn't broken. So is it really good to set
> > MD_BROKEN here? In patch 62f7b1989c0 ("md raid0/linear: Mark array as
> > 'broken'...), MD_BROKEN is introduced
> > when the member disk disappears and the disk is really broken. For
> > raid0/linear, the raid device can't work anymore.
>
> It is broken, any md_error() call should end with appropriate action:
> - drive removal (if possible)
> - failing array (if cannot degrade array)
>
> We cannot trust drive if md_error() was called, so writes will be
> blocked. IMO it is reasonable- to not engage level stack, because one
> or more members cannot be trusted (even if it is still accessible). Just
> allow to read old data (if still possible).
>

Thanks for the explanation. It's ok for me. Just want to have the same
understanding. Now raid0 works
in the way that it will fail when calling md_error.

Regards
Xiao


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10
  2021-12-16 14:52 ` [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10 Mariusz Tkaczyk
  2021-12-17  2:16   ` Guoqing Jiang
@ 2021-12-22  7:24   ` Xiao Ni
  2021-12-27 12:34     ` Mariusz Tkaczyk
  1 sibling, 1 reply; 39+ messages in thread
From: Xiao Ni @ 2021-12-22  7:24 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: Song Liu, linux-raid

On Thu, Dec 16, 2021 at 10:53 PM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> There was no direct mechanism to determine raid failure outside
> personality. It was done by checking rdev->flags after executing
> md_error(). If "faulty" was not set then -EBUSY was returned to
> userspace. It causes that mdadm expects -EBUSY if the array
> becomes failed. There are some reasons to not consider this mechanism
> as correct:
> - drive can't be failed for different reasons.
> - there are path where -EBUSY is not reported and drive removal leads
> to failed array, without notification for userspace.
> - in the array failure case -EBUSY seems to be wrong status. Array is
> not busy, but removal process cannot proceed safe.
>
> -EBUSY expectation cannot be removed without breaking compatibility
> with userspace, but we can adopt the failed state verification method.
>
> In this patch MD_BROKEN flag support, used to mark non-redundant array
> as dead, is added to RAID1 and RAID10. Support for RAID456 is added in
> next commit.
>
> Now the array failure can be checked, so verify MD_BROKEN flag, however
> still return -EBUSY to userspace.
>
> As in previous commit, it causes that #mdadm --set-faulty is able to
> mark array as failed. Previously proposed workaround is valid if optional
> functionality 9a567843f79("md: allow last device to be forcibly
> removed from RAID1/RAID10.") is disabled.
>
> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> ---
>  drivers/md/md.c     | 17 ++++++++++-------
>  drivers/md/md.h     |  4 ++--
>  drivers/md/raid1.c  |  1 +
>  drivers/md/raid10.c |  1 +
>  4 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f888ef197765..fda8473f96b8 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2983,10 +2983,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
>
>         if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
>                 md_error(rdev->mddev, rdev);
> -               if (test_bit(Faulty, &rdev->flags))
> -                       err = 0;
> -               else
> +
> +               if (test_bit(MD_BROKEN, &rdev->mddev->flags))
>                         err = -EBUSY;
> +               else
> +                       err = 0;
>         } else if (cmd_match(buf, "remove")) {
>                 if (rdev->mddev->pers) {
>                         clear_bit(Blocked, &rdev->flags);
> @@ -7441,7 +7442,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
>                 err =  -ENODEV;
>         else {
>                 md_error(mddev, rdev);
> -               if (!test_bit(Faulty, &rdev->flags))
> +               if (test_bit(MD_BROKEN, &mddev->flags))
>                         err = -EBUSY;
>         }
>         rcu_read_unlock();
> @@ -7987,12 +7988,14 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>         if (!mddev->pers->sync_request)
>                 return;
>
> -       if (mddev->degraded)
> +       if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
>                 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
>         sysfs_notify_dirent_safe(rdev->sysfs_state);
>         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> -       md_wakeup_thread(mddev->thread);
> +       if (!test_bit(MD_BROKEN, &mddev->flags)) {
> +               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +               md_wakeup_thread(mddev->thread);
> +       }
>         if (mddev->event_work.func)
>                 queue_work(md_misc_wq, &mddev->event_work);
>         md_new_event();
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index bc3f2094d0b6..d3a897868695 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -259,8 +259,8 @@ enum mddev_flags {
>         MD_NOT_READY,           /* do_md_run() is active, so 'array_state'
>                                  * must not report that array is ready yet
>                                  */
> -       MD_BROKEN,              /* This is used in RAID-0/LINEAR only, to stop
> -                                * I/O in case an array member is gone/failed.
> +       MD_BROKEN,              /* This is used to stop I/O and mark device as
> +                                * dead in case an array becomes failed.
>                                  */
>  };
>
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 7dc8026cf6ee..45dc75f90476 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1638,6 +1638,7 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>                  */
>                 conf->recovery_disabled = mddev->recovery_disabled;
>                 spin_unlock_irqrestore(&conf->device_lock, flags);
> +               set_bit(MD_BROKEN, &mddev->flags);
>                 return;
>         }
>         set_bit(Blocked, &rdev->flags);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index dde98f65bd04..d7cefd212e6b 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1964,6 +1964,7 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
>                  * Don't fail the drive, just return an IO error.
>                  */
>                 spin_unlock_irqrestore(&conf->device_lock, flags);
> +               set_bit(MD_BROKEN, &mddev->flags);
>                 return;
>         }
>         if (test_and_clear_bit(In_sync, &rdev->flags))
> --
> 2.26.2
>

Hi Mariusz

In your first Version, it checks MD_BROKEN in super_written. Does it
need to do this in this version?
From my understanding, it needs to retry the write when failfast is
supported. If it checks MD_BROKEN
in super_written, it can't send re-write anymore. Am I right?

Regards
Xiao


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10
  2021-12-22  7:24   ` Xiao Ni
@ 2021-12-27 12:34     ` Mariusz Tkaczyk
  0 siblings, 0 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2021-12-27 12:34 UTC (permalink / raw)
  To: Xiao Ni; +Cc: Song Liu, linux-raid

On Wed, 22 Dec 2021 15:24:54 +0800
Xiao Ni <xni@redhat.com> wrote:

> On Thu, Dec 16, 2021 at 10:53 PM Mariusz Tkaczyk
> <mariusz.tkaczyk@linux.intel.com> wrote:
> >
> > There was no direct mechanism to determine raid failure outside
> > personality. It was done by checking rdev->flags after executing
> > md_error(). If "faulty" was not set then -EBUSY was returned to
> > userspace. It causes that mdadm expects -EBUSY if the array
> > becomes failed. There are some reasons to not consider this
> > mechanism as correct:
> > - drive can't be failed for different reasons.
> > - there are path where -EBUSY is not reported and drive removal
> > leads to failed array, without notification for userspace.
> > - in the array failure case -EBUSY seems to be wrong status. Array
> > is not busy, but removal process cannot proceed safe.
> >
> > -EBUSY expectation cannot be removed without breaking compatibility
> > with userspace, but we can adopt the failed state verification
> > method.
> >
> > In this patch MD_BROKEN flag support, used to mark non-redundant
> > array as dead, is added to RAID1 and RAID10. Support for RAID456 is
> > added in next commit.
> >
> > Now the array failure can be checked, so verify MD_BROKEN flag,
> > however still return -EBUSY to userspace.
> >
> > As in previous commit, it causes that #mdadm --set-faulty is able to
> > mark array as failed. Previously proposed workaround is valid if
> > optional functionality 9a567843f79("md: allow last device to be
> > forcibly removed from RAID1/RAID10.") is disabled.
> >
> > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> > ---
> >  drivers/md/md.c     | 17 ++++++++++-------
> >  drivers/md/md.h     |  4 ++--
> >  drivers/md/raid1.c  |  1 +
> >  drivers/md/raid10.c |  1 +
> >  4 files changed, 14 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index f888ef197765..fda8473f96b8 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -2983,10 +2983,11 @@ state_store(struct md_rdev *rdev, const
> > char *buf, size_t len)
> >
> >         if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
> >                 md_error(rdev->mddev, rdev);
> > -               if (test_bit(Faulty, &rdev->flags))
> > -                       err = 0;
> > -               else
> > +
> > +               if (test_bit(MD_BROKEN, &rdev->mddev->flags))
> >                         err = -EBUSY;
> > +               else
> > +                       err = 0;
> >         } else if (cmd_match(buf, "remove")) {
> >                 if (rdev->mddev->pers) {
> >                         clear_bit(Blocked, &rdev->flags);
> > @@ -7441,7 +7442,7 @@ static int set_disk_faulty(struct mddev
> > *mddev, dev_t dev) err =  -ENODEV;
> >         else {
> >                 md_error(mddev, rdev);
> > -               if (!test_bit(Faulty, &rdev->flags))
> > +               if (test_bit(MD_BROKEN, &mddev->flags))
> >                         err = -EBUSY;
> >         }
> >         rcu_read_unlock();
> > @@ -7987,12 +7988,14 @@ void md_error(struct mddev *mddev, struct
> > md_rdev *rdev) if (!mddev->pers->sync_request)
> >                 return;
> >
> > -       if (mddev->degraded)
> > +       if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
> >                 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
> >         sysfs_notify_dirent_safe(rdev->sysfs_state);
> >         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> > -       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> > -       md_wakeup_thread(mddev->thread);
> > +       if (!test_bit(MD_BROKEN, &mddev->flags)) {
> > +               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> > +               md_wakeup_thread(mddev->thread);
> > +       }
> >         if (mddev->event_work.func)
> >                 queue_work(md_misc_wq, &mddev->event_work);
> >         md_new_event();
> > diff --git a/drivers/md/md.h b/drivers/md/md.h
> > index bc3f2094d0b6..d3a897868695 100644
> > --- a/drivers/md/md.h
> > +++ b/drivers/md/md.h
> > @@ -259,8 +259,8 @@ enum mddev_flags {
> >         MD_NOT_READY,           /* do_md_run() is active, so
> > 'array_state'
> >                                  * must not report that array is
> > ready yet */
> > -       MD_BROKEN,              /* This is used in RAID-0/LINEAR
> > only, to stop
> > -                                * I/O in case an array member is
> > gone/failed.
> > +       MD_BROKEN,              /* This is used to stop I/O and
> > mark device as
> > +                                * dead in case an array becomes
> > failed. */
> >  };
> >
> > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> > index 7dc8026cf6ee..45dc75f90476 100644
> > --- a/drivers/md/raid1.c
> > +++ b/drivers/md/raid1.c
> > @@ -1638,6 +1638,7 @@ static void raid1_error(struct mddev *mddev,
> > struct md_rdev *rdev) */
> >                 conf->recovery_disabled = mddev->recovery_disabled;
> >                 spin_unlock_irqrestore(&conf->device_lock, flags);
> > +               set_bit(MD_BROKEN, &mddev->flags);
> >                 return;
> >         }
> >         set_bit(Blocked, &rdev->flags);
> > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> > index dde98f65bd04..d7cefd212e6b 100644
> > --- a/drivers/md/raid10.c
> > +++ b/drivers/md/raid10.c
> > @@ -1964,6 +1964,7 @@ static void raid10_error(struct mddev *mddev,
> > struct md_rdev *rdev)
> >                  * Don't fail the drive, just return an IO error.
> >                  */
> >                 spin_unlock_irqrestore(&conf->device_lock, flags);
> > +               set_bit(MD_BROKEN, &mddev->flags);
> >                 return;
> >         }
> >         if (test_and_clear_bit(In_sync, &rdev->flags))
> > --
> > 2.26.2
> >  
> 
> Hi Mariusz
> 
> In your first Version, it checks MD_BROKEN in super_written. Does it
> need to do this in this version?
> From my understanding, it needs to retry the write when failfast is
> supported. If it checks MD_BROKEN
> in super_written, it can't send re-write anymore. Am I right?
> 
Hi Xiao,
We have discussion about it with Song and as a result a dropped it.
I just left it as before. V2 shouldn't race with failfast because it
still takes care about faulty flag on device (MD_BROKEN is set
additionally).

I don't know a lot about failfast so, idea proposed in first patch could
be wrong.  I did it this way because I've updated all md_error() and
later test_bit(Faulty, &rdev->flags) paths, with strict assumption
faulty == MD_BROKEN. For failfast and last_dev it is not obvious.
If you have any recommendation, I will be grateful.

Thanks,
Mariusz

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 0/3] Use MD_BROKEN for redundant arrays
  2021-12-17  8:02   ` Mariusz Tkaczyk
@ 2022-01-25 15:52     ` Mariusz Tkaczyk
  2022-01-26  1:13       ` Song Liu
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-01-25 15:52 UTC (permalink / raw)
  To: Song Liu; +Cc: linux-raid

On Fri, 17 Dec 2021 09:02:38 +0100
Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com> wrote:

> Hi Song,
> 
> On Thu, 16 Dec 2021 16:52:23 -0800
> Song Liu <song@kernel.org> wrote:
> > > Mariusz Tkaczyk (3):
> > >   raid0, linear, md: add error_handlers for raid0 and linear
> > >   md: Set MD_BROKEN for RAID1 and RAID10
> > >   raid5: introduce MD_BROKEN  
> > 
> > The set looks good to me. The only concern is that we changed some
> > messages. While dmesg is not a stable API, I believe there are
> > people grep on it to detect errors.
> > Therefore, please try to keep these messages same as before (as much
> > as possible).
> 
> Will do.
> After sending it, I realized that my approach is not correct when
> mddev->fail_last_dev is on. MD_BRKOEN should be set even if we agree
> to remove the "last" drive. I will fix it too.
> 

Hi Song,
For raid0 and linear i added new messages so it shouldn't be a problem.
I added one message in raid5 for failed state:
+		pr_crit("md/raid:%s: Cannot continue on %d devices.\n",

Do you want to remove it?
Other errors are same. Order is also preserved.

Thanks,
Mariusz

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 0/3] Use MD_BROKEN for redundant arrays
  2022-01-25 15:52     ` Mariusz Tkaczyk
@ 2022-01-26  1:13       ` Song Liu
  0 siblings, 0 replies; 39+ messages in thread
From: Song Liu @ 2022-01-26  1:13 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: linux-raid

On Tue, Jan 25, 2022 at 7:52 AM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> On Fri, 17 Dec 2021 09:02:38 +0100
> Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com> wrote:
>
> > Hi Song,
> >
> > On Thu, 16 Dec 2021 16:52:23 -0800
> > Song Liu <song@kernel.org> wrote:
> > > > Mariusz Tkaczyk (3):
> > > >   raid0, linear, md: add error_handlers for raid0 and linear
> > > >   md: Set MD_BROKEN for RAID1 and RAID10
> > > >   raid5: introduce MD_BROKEN
> > >
> > > The set looks good to me. The only concern is that we changed some
> > > messages. While dmesg is not a stable API, I believe there are
> > > people grep on it to detect errors.
> > > Therefore, please try to keep these messages same as before (as much
> > > as possible).
> >
> > Will do.
> > After sending it, I realized that my approach is not correct when
> > mddev->fail_last_dev is on. MD_BRKOEN should be set even if we agree
> > to remove the "last" drive. I will fix it too.
> >
>
> Hi Song,
> For raid0 and linear i added new messages so it shouldn't be a problem.
> I added one message in raid5 for failed state:
> +               pr_crit("md/raid:%s: Cannot continue on %d devices.\n",
>
> Do you want to remove it?
> Other errors are same. Order is also preserved.

I think we should print in this case. How about we change it to something
similar to the one in raid5_run():

                pr_crit("md/raid:%s: Cannot continue operation (%d/%d
failed)\n",
                        mdname(mddev), mddev->degraded, conf->raid_disks);

Thanks,
Song

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-04-12 15:31         ` Mariusz Tkaczyk
@ 2022-04-12 16:36           ` Song Liu
  0 siblings, 0 replies; 39+ messages in thread
From: Song Liu @ 2022-04-12 16:36 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: linux-raid, Guoqing Jiang

On Tue, Apr 12, 2022 at 8:32 AM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> On Fri, 8 Apr 2022 09:18:28 -0700
> Song Liu <song@kernel.org> wrote:
>
> > On Fri, Apr 8, 2022 at 7:35 AM Mariusz Tkaczyk
> > <mariusz.tkaczyk@linux.intel.com> wrote:
> > >
> > > On Thu, 7 Apr 2022 17:16:37 -0700
> > > Song Liu <song@kernel.org> wrote:
> > >
> > > > On Tue, Mar 22, 2022 at 8:24 AM Mariusz Tkaczyk
> > > > <mariusz.tkaczyk@linux.intel.com> wrote:
> > > > >
> > > > > Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and
> > > > > fail BIOs if a member is gone") allowed to finish writes earlier
> > > > > (before level dependent actions) for non-redundant arrays.
> > > > >
> > > > > To achieve that MD_BROKEN is added to mddev->flags if drive
> > > > > disappearance is detected. This is done in is_mddev_broken() which
> > > > > is confusing and not consistent with other levels where
> > > > > error_handler() is used. This patch adds appropriate error_handler
> > > > > for raid0 and linear and adopt md_error() to the change.
> > > > >
> > > > > Usage of error_handler causes that disk failure can be requested
> > > > > from userspace. User can fail the array via #mdadm --set-faulty
> > > > > command. This is not safe and will be fixed in mdadm. It is
> > > > > correctable because failed state is not recorded in the metadata.
> > > > > After next assembly array will be read-write again. For safety
> > > > > reason is better to keep MD_BROKEN in runtime only.
> > > > >
> > > > > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> > > >
> > > > Sorry for the late response.
> > > >
> > > > > ---
> > > > >  drivers/md/md-linear.c | 14 +++++++++++++-
> > > > >  drivers/md/md.c        |  6 +++++-
> > > > >  drivers/md/md.h        | 10 ++--------
> > > > >  drivers/md/raid0.c     | 14 +++++++++++++-
> > > > >  4 files changed, 33 insertions(+), 11 deletions(-)
> > > > >
> > > > > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> > > > > index 1ff51647a682..c33cd28f1dba 100644
> > > > > --- a/drivers/md/md-linear.c
> > > > > +++ b/drivers/md/md-linear.c
> > > > > @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev
> > > > > *mddev, struct bio *bio) bio_sector < start_sector))
> > > > >                 goto out_of_bounds;
> > > > >
> > > > > -       if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> > > > > +       if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> > > > > +               md_error(mddev, tmp_dev->rdev);
> > > >
> > > > I apologize if we discussed this before. Shall we just call
> > > > linear_error() here?If we go this way, we don't really need ...
> > > >
> > > > >                 bio_io_error(bio);
> > > > >                 return true;
> > > > >         }
> > > > > @@ -281,6 +282,16 @@ static void linear_status (struct seq_file
> > > > > *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding",
> > > > > mddev->chunk_sectors / 2); }
> > > > >
> > > > > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > > > > +{
> > > > > +       if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
> > > > > +               char *md_name = mdname(mddev);
> > > > > +
> > > > > +               pr_crit("md/linear%s: Disk failure on %pg detected,
> > > > > failing array.\n",
> > > > > +                       md_name, rdev->bdev);
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  static void linear_quiesce(struct mddev *mddev, int state)
> > > > >  {
> > > > >  }
> > > > > @@ -297,6 +308,7 @@ static struct md_personality linear_personality
> > > > > = .hot_add_disk   = linear_add,
> > > > >         .size           = linear_size,
> > > > >         .quiesce        = linear_quiesce,
> > > > > +       .error_handler  = linear_error,
> > > >
> > > > ... set error_handler here, and ...
> > > >
> > > > >  };
> > > > >
> > > > >  static int __init linear_init (void)
> > > > > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > > > > index 0a89f072dae0..3354afc9d2a3 100644
> > > > > --- a/drivers/md/md.c
> > > > > +++ b/drivers/md/md.c
> > > > > @@ -7985,7 +7985,11 @@ void md_error(struct mddev *mddev, struct
> > > > > md_rdev *rdev)
> > > > >
> > > > >         if (!mddev->pers || !mddev->pers->error_handler)
> > > > >                 return;
> > > > > -       mddev->pers->error_handler(mddev,rdev);
> > > > > +       mddev->pers->error_handler(mddev, rdev);
> > > > > +
> > > > > +       if (mddev->pers->level == 0 || mddev->pers->level ==
> > > > > LEVEL_LINEAR)
> > > > > +               return;
> > > >
> > > > ... this check here.
> > > >
> > > > Did I miss something?
> > > >
> > > Hi Song,
> > > That is correct, we can do the same for raid0. I did it this way to
> > > make it similar to redundant levels.
> > > If you think that it is overhead, I can drop it.
> >
> > Yeah, it feels like more overhead to me.
> >
> > I applied 2/3 and 3/3 to md-next. Please take a look and let me know
> > if anything needs to be fixed.
> >
> Now the first patch is just a code refactor with different error message.
> I think that we can drop it. Do you agree?

Yes, I think we can drop it.

Thanks,
Song

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-04-08 16:18       ` Song Liu
@ 2022-04-12 15:31         ` Mariusz Tkaczyk
  2022-04-12 16:36           ` Song Liu
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-04-12 15:31 UTC (permalink / raw)
  To: Song Liu; +Cc: linux-raid, Guoqing Jiang

On Fri, 8 Apr 2022 09:18:28 -0700
Song Liu <song@kernel.org> wrote:

> On Fri, Apr 8, 2022 at 7:35 AM Mariusz Tkaczyk
> <mariusz.tkaczyk@linux.intel.com> wrote:
> >
> > On Thu, 7 Apr 2022 17:16:37 -0700
> > Song Liu <song@kernel.org> wrote:
> >  
> > > On Tue, Mar 22, 2022 at 8:24 AM Mariusz Tkaczyk
> > > <mariusz.tkaczyk@linux.intel.com> wrote:  
> > > >
> > > > Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and
> > > > fail BIOs if a member is gone") allowed to finish writes earlier
> > > > (before level dependent actions) for non-redundant arrays.
> > > >
> > > > To achieve that MD_BROKEN is added to mddev->flags if drive
> > > > disappearance is detected. This is done in is_mddev_broken() which
> > > > is confusing and not consistent with other levels where
> > > > error_handler() is used. This patch adds appropriate error_handler
> > > > for raid0 and linear and adopt md_error() to the change.
> > > >
> > > > Usage of error_handler causes that disk failure can be requested
> > > > from userspace. User can fail the array via #mdadm --set-faulty
> > > > command. This is not safe and will be fixed in mdadm. It is
> > > > correctable because failed state is not recorded in the metadata.
> > > > After next assembly array will be read-write again. For safety
> > > > reason is better to keep MD_BROKEN in runtime only.
> > > >
> > > > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>  
> > >
> > > Sorry for the late response.
> > >  
> > > > ---
> > > >  drivers/md/md-linear.c | 14 +++++++++++++-
> > > >  drivers/md/md.c        |  6 +++++-
> > > >  drivers/md/md.h        | 10 ++--------
> > > >  drivers/md/raid0.c     | 14 +++++++++++++-
> > > >  4 files changed, 33 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> > > > index 1ff51647a682..c33cd28f1dba 100644
> > > > --- a/drivers/md/md-linear.c
> > > > +++ b/drivers/md/md-linear.c
> > > > @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev
> > > > *mddev, struct bio *bio) bio_sector < start_sector))
> > > >                 goto out_of_bounds;
> > > >
> > > > -       if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> > > > +       if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> > > > +               md_error(mddev, tmp_dev->rdev);  
> > >
> > > I apologize if we discussed this before. Shall we just call
> > > linear_error() here?If we go this way, we don't really need ...
> > >  
> > > >                 bio_io_error(bio);
> > > >                 return true;
> > > >         }
> > > > @@ -281,6 +282,16 @@ static void linear_status (struct seq_file
> > > > *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding",
> > > > mddev->chunk_sectors / 2); }
> > > >
> > > > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > > > +{
> > > > +       if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
> > > > +               char *md_name = mdname(mddev);
> > > > +
> > > > +               pr_crit("md/linear%s: Disk failure on %pg detected,
> > > > failing array.\n",
> > > > +                       md_name, rdev->bdev);
> > > > +       }
> > > > +}
> > > > +
> > > >  static void linear_quiesce(struct mddev *mddev, int state)
> > > >  {
> > > >  }
> > > > @@ -297,6 +308,7 @@ static struct md_personality linear_personality
> > > > = .hot_add_disk   = linear_add,
> > > >         .size           = linear_size,
> > > >         .quiesce        = linear_quiesce,
> > > > +       .error_handler  = linear_error,  
> > >
> > > ... set error_handler here, and ...
> > >  
> > > >  };
> > > >
> > > >  static int __init linear_init (void)
> > > > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > > > index 0a89f072dae0..3354afc9d2a3 100644
> > > > --- a/drivers/md/md.c
> > > > +++ b/drivers/md/md.c
> > > > @@ -7985,7 +7985,11 @@ void md_error(struct mddev *mddev, struct
> > > > md_rdev *rdev)
> > > >
> > > >         if (!mddev->pers || !mddev->pers->error_handler)
> > > >                 return;
> > > > -       mddev->pers->error_handler(mddev,rdev);
> > > > +       mddev->pers->error_handler(mddev, rdev);
> > > > +
> > > > +       if (mddev->pers->level == 0 || mddev->pers->level ==
> > > > LEVEL_LINEAR)
> > > > +               return;  
> > >
> > > ... this check here.
> > >
> > > Did I miss something?
> > >  
> > Hi Song,
> > That is correct, we can do the same for raid0. I did it this way to
> > make it similar to redundant levels.
> > If you think that it is overhead, I can drop it.  
> 
> Yeah, it feels like more overhead to me.
> 
> I applied 2/3 and 3/3 to md-next. Please take a look and let me know
> if anything needs to be fixed.
> 
Now the first patch is just a code refactor with different error message.
I think that we can drop it. Do you agree?

Thanks,
Mariusz


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-04-08 14:35     ` Mariusz Tkaczyk
@ 2022-04-08 16:18       ` Song Liu
  2022-04-12 15:31         ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Song Liu @ 2022-04-08 16:18 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: linux-raid, Guoqing Jiang

On Fri, Apr 8, 2022 at 7:35 AM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> On Thu, 7 Apr 2022 17:16:37 -0700
> Song Liu <song@kernel.org> wrote:
>
> > On Tue, Mar 22, 2022 at 8:24 AM Mariusz Tkaczyk
> > <mariusz.tkaczyk@linux.intel.com> wrote:
> > >
> > > Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and
> > > fail BIOs if a member is gone") allowed to finish writes earlier
> > > (before level dependent actions) for non-redundant arrays.
> > >
> > > To achieve that MD_BROKEN is added to mddev->flags if drive
> > > disappearance is detected. This is done in is_mddev_broken() which
> > > is confusing and not consistent with other levels where
> > > error_handler() is used. This patch adds appropriate error_handler
> > > for raid0 and linear and adopt md_error() to the change.
> > >
> > > Usage of error_handler causes that disk failure can be requested
> > > from userspace. User can fail the array via #mdadm --set-faulty
> > > command. This is not safe and will be fixed in mdadm. It is
> > > correctable because failed state is not recorded in the metadata.
> > > After next assembly array will be read-write again. For safety
> > > reason is better to keep MD_BROKEN in runtime only.
> > >
> > > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
> >
> > Sorry for the late response.
> >
> > > ---
> > >  drivers/md/md-linear.c | 14 +++++++++++++-
> > >  drivers/md/md.c        |  6 +++++-
> > >  drivers/md/md.h        | 10 ++--------
> > >  drivers/md/raid0.c     | 14 +++++++++++++-
> > >  4 files changed, 33 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> > > index 1ff51647a682..c33cd28f1dba 100644
> > > --- a/drivers/md/md-linear.c
> > > +++ b/drivers/md/md-linear.c
> > > @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev
> > > *mddev, struct bio *bio) bio_sector < start_sector))
> > >                 goto out_of_bounds;
> > >
> > > -       if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> > > +       if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> > > +               md_error(mddev, tmp_dev->rdev);
> >
> > I apologize if we discussed this before. Shall we just call
> > linear_error() here?If we go this way, we don't really need ...
> >
> > >                 bio_io_error(bio);
> > >                 return true;
> > >         }
> > > @@ -281,6 +282,16 @@ static void linear_status (struct seq_file
> > > *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding",
> > > mddev->chunk_sectors / 2); }
> > >
> > > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > > +{
> > > +       if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
> > > +               char *md_name = mdname(mddev);
> > > +
> > > +               pr_crit("md/linear%s: Disk failure on %pg detected,
> > > failing array.\n",
> > > +                       md_name, rdev->bdev);
> > > +       }
> > > +}
> > > +
> > >  static void linear_quiesce(struct mddev *mddev, int state)
> > >  {
> > >  }
> > > @@ -297,6 +308,7 @@ static struct md_personality linear_personality
> > > = .hot_add_disk   = linear_add,
> > >         .size           = linear_size,
> > >         .quiesce        = linear_quiesce,
> > > +       .error_handler  = linear_error,
> >
> > ... set error_handler here, and ...
> >
> > >  };
> > >
> > >  static int __init linear_init (void)
> > > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > > index 0a89f072dae0..3354afc9d2a3 100644
> > > --- a/drivers/md/md.c
> > > +++ b/drivers/md/md.c
> > > @@ -7985,7 +7985,11 @@ void md_error(struct mddev *mddev, struct
> > > md_rdev *rdev)
> > >
> > >         if (!mddev->pers || !mddev->pers->error_handler)
> > >                 return;
> > > -       mddev->pers->error_handler(mddev,rdev);
> > > +       mddev->pers->error_handler(mddev, rdev);
> > > +
> > > +       if (mddev->pers->level == 0 || mddev->pers->level ==
> > > LEVEL_LINEAR)
> > > +               return;
> >
> > ... this check here.
> >
> > Did I miss something?
> >
> Hi Song,
> That is correct, we can do the same for raid0. I did it this way to
> make it similar to redundant levels.
> If you think that it is overhead, I can drop it.

Yeah, it feels like more overhead to me.

I applied 2/3 and 3/3 to md-next. Please take a look and let me know
if anything needs to be fixed.

Thanks,
Song

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-04-08  0:16   ` Song Liu
@ 2022-04-08 14:35     ` Mariusz Tkaczyk
  2022-04-08 16:18       ` Song Liu
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-04-08 14:35 UTC (permalink / raw)
  To: Song Liu; +Cc: linux-raid, Guoqing Jiang

On Thu, 7 Apr 2022 17:16:37 -0700
Song Liu <song@kernel.org> wrote:

> On Tue, Mar 22, 2022 at 8:24 AM Mariusz Tkaczyk
> <mariusz.tkaczyk@linux.intel.com> wrote:
> >
> > Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and
> > fail BIOs if a member is gone") allowed to finish writes earlier
> > (before level dependent actions) for non-redundant arrays.
> >
> > To achieve that MD_BROKEN is added to mddev->flags if drive
> > disappearance is detected. This is done in is_mddev_broken() which
> > is confusing and not consistent with other levels where
> > error_handler() is used. This patch adds appropriate error_handler
> > for raid0 and linear and adopt md_error() to the change.
> >
> > Usage of error_handler causes that disk failure can be requested
> > from userspace. User can fail the array via #mdadm --set-faulty
> > command. This is not safe and will be fixed in mdadm. It is
> > correctable because failed state is not recorded in the metadata.
> > After next assembly array will be read-write again. For safety
> > reason is better to keep MD_BROKEN in runtime only.
> >
> > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>  
> 
> Sorry for the late response.
> 
> > ---
> >  drivers/md/md-linear.c | 14 +++++++++++++-
> >  drivers/md/md.c        |  6 +++++-
> >  drivers/md/md.h        | 10 ++--------
> >  drivers/md/raid0.c     | 14 +++++++++++++-
> >  4 files changed, 33 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> > index 1ff51647a682..c33cd28f1dba 100644
> > --- a/drivers/md/md-linear.c
> > +++ b/drivers/md/md-linear.c
> > @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev
> > *mddev, struct bio *bio) bio_sector < start_sector))
> >                 goto out_of_bounds;
> >
> > -       if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> > +       if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> > +               md_error(mddev, tmp_dev->rdev);  
> 
> I apologize if we discussed this before. Shall we just call
> linear_error() here?If we go this way, we don't really need ...
> 
> >                 bio_io_error(bio);
> >                 return true;
> >         }
> > @@ -281,6 +282,16 @@ static void linear_status (struct seq_file
> > *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding",
> > mddev->chunk_sectors / 2); }
> >
> > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > +{
> > +       if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
> > +               char *md_name = mdname(mddev);
> > +
> > +               pr_crit("md/linear%s: Disk failure on %pg detected,
> > failing array.\n",
> > +                       md_name, rdev->bdev);
> > +       }
> > +}
> > +
> >  static void linear_quiesce(struct mddev *mddev, int state)
> >  {
> >  }
> > @@ -297,6 +308,7 @@ static struct md_personality linear_personality
> > = .hot_add_disk   = linear_add,
> >         .size           = linear_size,
> >         .quiesce        = linear_quiesce,
> > +       .error_handler  = linear_error,  
> 
> ... set error_handler here, and ...
> 
> >  };
> >
> >  static int __init linear_init (void)
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index 0a89f072dae0..3354afc9d2a3 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -7985,7 +7985,11 @@ void md_error(struct mddev *mddev, struct
> > md_rdev *rdev)
> >
> >         if (!mddev->pers || !mddev->pers->error_handler)
> >                 return;
> > -       mddev->pers->error_handler(mddev,rdev);
> > +       mddev->pers->error_handler(mddev, rdev);
> > +
> > +       if (mddev->pers->level == 0 || mddev->pers->level ==
> > LEVEL_LINEAR)
> > +               return;  
> 
> ... this check here.
> 
> Did I miss something?
> 
Hi Song,
That is correct, we can do the same for raid0. I did it this way to
make it similar to redundant levels.
If you think that it is overhead, I can drop it.

Thanks,
Mariusz


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-03-22 15:23 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
@ 2022-04-08  0:16   ` Song Liu
  2022-04-08 14:35     ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Song Liu @ 2022-04-08  0:16 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: linux-raid, Guoqing Jiang

On Tue, Mar 22, 2022 at 8:24 AM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
> if a member is gone") allowed to finish writes earlier (before level
> dependent actions) for non-redundant arrays.
>
> To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
> is detected. This is done in is_mddev_broken() which is confusing and not
> consistent with other levels where error_handler() is used.
> This patch adds appropriate error_handler for raid0 and linear and adopt
> md_error() to the change.
>
> Usage of error_handler causes that disk failure can be requested from
> userspace. User can fail the array via #mdadm --set-faulty command. This
> is not safe and will be fixed in mdadm. It is correctable because failed
> state is not recorded in the metadata. After next assembly array will be
> read-write again. For safety reason is better to keep MD_BROKEN in
> runtime only.
>
> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>

Sorry for the late response.

> ---
>  drivers/md/md-linear.c | 14 +++++++++++++-
>  drivers/md/md.c        |  6 +++++-
>  drivers/md/md.h        | 10 ++--------
>  drivers/md/raid0.c     | 14 +++++++++++++-
>  4 files changed, 33 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> index 1ff51647a682..c33cd28f1dba 100644
> --- a/drivers/md/md-linear.c
> +++ b/drivers/md/md-linear.c
> @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>                      bio_sector < start_sector))
>                 goto out_of_bounds;
>
> -       if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> +       if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> +               md_error(mddev, tmp_dev->rdev);

I apologize if we discussed this before. Shall we just call linear_error()
here?If we go this way, we don't really need ...

>                 bio_io_error(bio);
>                 return true;
>         }
> @@ -281,6 +282,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
>         seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
>  }
>
> +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> +{
> +       if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
> +               char *md_name = mdname(mddev);
> +
> +               pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
> +                       md_name, rdev->bdev);
> +       }
> +}
> +
>  static void linear_quiesce(struct mddev *mddev, int state)
>  {
>  }
> @@ -297,6 +308,7 @@ static struct md_personality linear_personality =
>         .hot_add_disk   = linear_add,
>         .size           = linear_size,
>         .quiesce        = linear_quiesce,
> +       .error_handler  = linear_error,

... set error_handler here, and ...

>  };
>
>  static int __init linear_init (void)
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 0a89f072dae0..3354afc9d2a3 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -7985,7 +7985,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>
>         if (!mddev->pers || !mddev->pers->error_handler)
>                 return;
> -       mddev->pers->error_handler(mddev,rdev);
> +       mddev->pers->error_handler(mddev, rdev);
> +
> +       if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
> +               return;

... this check here.

Did I miss something?

Thanks,
Song

[...]

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-03-22 15:23 [PATCH 0/3] Failed array handling improvements Mariusz Tkaczyk
@ 2022-03-22 15:23 ` Mariusz Tkaczyk
  2022-04-08  0:16   ` Song Liu
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-03-22 15:23 UTC (permalink / raw)
  To: song; +Cc: linux-raid, guoqing.jiang

Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
if a member is gone") allowed to finish writes earlier (before level
dependent actions) for non-redundant arrays.

To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
is detected. This is done in is_mddev_broken() which is confusing and not
consistent with other levels where error_handler() is used.
This patch adds appropriate error_handler for raid0 and linear and adopt
md_error() to the change.

Usage of error_handler causes that disk failure can be requested from
userspace. User can fail the array via #mdadm --set-faulty command. This
is not safe and will be fixed in mdadm. It is correctable because failed
state is not recorded in the metadata. After next assembly array will be
read-write again. For safety reason is better to keep MD_BROKEN in
runtime only.

Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
 drivers/md/md-linear.c | 14 +++++++++++++-
 drivers/md/md.c        |  6 +++++-
 drivers/md/md.h        | 10 ++--------
 drivers/md/raid0.c     | 14 +++++++++++++-
 4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 1ff51647a682..c33cd28f1dba 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		     bio_sector < start_sector))
 		goto out_of_bounds;
 
-	if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
+	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
+		md_error(mddev, tmp_dev->rdev);
 		bio_io_error(bio);
 		return true;
 	}
@@ -281,6 +282,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
 	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
 }
 
+static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+		char *md_name = mdname(mddev);
+
+		pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
+			md_name, rdev->bdev);
+	}
+}
+
 static void linear_quiesce(struct mddev *mddev, int state)
 {
 }
@@ -297,6 +308,7 @@ static struct md_personality linear_personality =
 	.hot_add_disk	= linear_add,
 	.size		= linear_size,
 	.quiesce	= linear_quiesce,
+	.error_handler	= linear_error,
 };
 
 static int __init linear_init (void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0a89f072dae0..3354afc9d2a3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7985,7 +7985,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	if (!mddev->pers || !mddev->pers->error_handler)
 		return;
-	mddev->pers->error_handler(mddev,rdev);
+	mddev->pers->error_handler(mddev, rdev);
+
+	if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
+		return;
+
 	if (mddev->degraded)
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
 	sysfs_notify_dirent_safe(rdev->sysfs_state);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f1bf3625ef4c..13d435a303fa 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -764,15 +764,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
 
-static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
+static inline bool is_rdev_broken(struct md_rdev *rdev)
 {
-	if (!disk_live(rdev->bdev->bd_disk)) {
-		if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
-			pr_warn("md: %s: %s array has a missing/failed member\n",
-				mdname(rdev->mddev), md_type);
-		return true;
-	}
-	return false;
+	return !disk_live(rdev->bdev->bd_disk);
 }
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index b59a77b31b90..b402e3dc4ead 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -582,8 +582,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
 		return true;
 	}
 
-	if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
+	if (unlikely(is_rdev_broken(tmp_dev))) {
 		bio_io_error(bio);
+		md_error(mddev, tmp_dev);
 		return true;
 	}
 
@@ -606,6 +607,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
 	return;
 }
 
+static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+		char *md_name = mdname(mddev);
+
+		pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n",
+			md_name, rdev->bdev);
+	}
+}
+
 static void *raid0_takeover_raid45(struct mddev *mddev)
 {
 	struct md_rdev *rdev;
@@ -781,6 +792,7 @@ static struct md_personality raid0_personality=
 	.size		= raid0_size,
 	.takeover	= raid0_takeover,
 	.quiesce	= raid0_quiesce,
+	.error_handler	= raid0_error,
 };
 
 static int __init raid0_init (void)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-02-22  6:34           ` Song Liu
@ 2022-02-22 13:02             ` Mariusz Tkaczyk
  0 siblings, 0 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-02-22 13:02 UTC (permalink / raw)
  To: Song Liu; +Cc: Guoqing Jiang, linux-raid

Hi Song,
On Mon, 21 Feb 2022 22:34:02 -0800
Song Liu <song@kernel.org> wrote:

> Could you please resend the patchset with feedback addressed? I can
> apply the newer version and force push to md-next.

Yes, I'm working on it. I cannot estimate when it will be ready, there
is ongoing discussion. If you prefer, you can take it off now.

Thanks,
Mariusz

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-02-15 14:06         ` Mariusz Tkaczyk
  2022-02-16  9:47           ` Xiao Ni
@ 2022-02-22  6:34           ` Song Liu
  2022-02-22 13:02             ` Mariusz Tkaczyk
  1 sibling, 1 reply; 39+ messages in thread
From: Song Liu @ 2022-02-22  6:34 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: Guoqing Jiang, linux-raid

Hi Mariusz,

On Tue, Feb 15, 2022 at 6:06 AM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> On Tue, 15 Feb 2022 11:43:34 +0800
> Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
> > >> It also adopts md_error(), we only want to call .error_handler for
> > >> those levels. mddev->pers->sync_request is additionally checked,
> > >> its existence implies a level with redundancy.
> > >>
> > >> Usage of error_handler causes that disk failure can be requested
> > >> from userspace. User can fail the array via #mdadm --set-faulty
> > >> command. This is not safe and will be fixed in mdadm.
> > >> What is the safe issue here? It would betterr to post mdadm fix
> > >> together.
> > > We can and should block user from damaging raid even if it is
> > > recoverable. It is a regression.
> >
> > I don't follow, did you mean --set-fault from mdadm could "damaging
> > raid"?
>
> Yes, now it will be able to impose failed state. This is a regression I
> caused and I'm aware of that.
> >
> > > I will fix mdadm. I don't consider it as a big risk (because it is
> > > recoverable) so I focused on kernel part first.
> > >
> > >>> It is correctable because failed
> > >>> state is not recorded in the metadata. After next assembly array
> > >>> will be read-write again.
> > >> I don't think it is a problem, care to explain why it can't be RW
> > >> again?
> > > failed state is not recoverable in runtime, so you need to recreate
> > > array.
> >
> > IIUC, the failfast flag is supposed to be set during transient error
> > not permanent failure, the rdev (marked as failfast) need to be
> > revalidated and readded to array.
> >
>
> The device is never marked as failed. I checked write paths (because I
> introduced more aggressive policy for writes) and if we are in a
> critical array, failfast flag is not added to bio for both raid1 and
> radi10, see raid1_write_request().
> >
> > [ ... ]
> >
> > >>> +         char *md_name = mdname(mddev);
> > >>> +
> > >>> +         pr_crit("md/linear%s: Disk failure on %pg
> > >>> detected.\n"
> > >>> +                 "md/linear:%s: Cannot continue, failing
> > >>> array.\n",
> > >>> +                 md_name, rdev->bdev, md_name);
> > >> The second md_name is not needed.
> > > Could you elaborate here more? Do you want to skip device name in
> > > second message?
> >
> > Yes, we printed two md_name here, seems unnecessary.
> >
> I will merge errors to one message.
>
> >
> > >>> --- a/drivers/md/md.c
> > >>> +++ b/drivers/md/md.c
> > >>> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct
> > >>> md_rdev *rdev)
> > >>>           if (!mddev->pers || !mddev->pers->error_handler)
> > >>>                   return;
> > >>> - mddev->pers->error_handler(mddev,rdev);
> > >>> + mddev->pers->error_handler(mddev, rdev);
> > >>> +
> > >>> + if (!mddev->pers->sync_request)
> > >>> +         return;
> > >> The above only valid for raid0 and linear, I guess it is fine if DM
> > >> don't create LV on top
> > >> of them. But the new checking deserves some comment above.
> > > Will do, could you propose comment?
> >
> > Or, just check if it is raid0 or linear directly instead of implies
> > level with
> > redundancy.
>
> Got it.

Could you please resend the patchset with feedback addressed? I can
apply the newer version and force push to md-next.

Thanks,
Song

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-02-15 14:06         ` Mariusz Tkaczyk
@ 2022-02-16  9:47           ` Xiao Ni
  2022-02-22  6:34           ` Song Liu
  1 sibling, 0 replies; 39+ messages in thread
From: Xiao Ni @ 2022-02-16  9:47 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: Guoqing Jiang, Song Liu, linux-raid

On Tue, Feb 15, 2022 at 10:07 PM Mariusz Tkaczyk
<mariusz.tkaczyk@linux.intel.com> wrote:
>
> On Tue, 15 Feb 2022 11:43:34 +0800
> Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
> > >> It also adopts md_error(), we only want to call .error_handler for
> > >> those levels. mddev->pers->sync_request is additionally checked,
> > >> its existence implies a level with redundancy.
> > >>
> > >> Usage of error_handler causes that disk failure can be requested
> > >> from userspace. User can fail the array via #mdadm --set-faulty
> > >> command. This is not safe and will be fixed in mdadm.
> > >> What is the safe issue here? It would betterr to post mdadm fix
> > >> together.
> > > We can and should block user from damaging raid even if it is
> > > recoverable. It is a regression.
> >
> > I don't follow, did you mean --set-fault from mdadm could "damaging
> > raid"?
>
> Yes, now it will be able to impose failed state. This is a regression I
> caused and I'm aware of that.
> >
> > > I will fix mdadm. I don't consider it as a big risk (because it is
> > > recoverable) so I focused on kernel part first.
> > >
> > >>> It is correctable because failed
> > >>> state is not recorded in the metadata. After next assembly array
> > >>> will be read-write again.
> > >> I don't think it is a problem, care to explain why it can't be RW
> > >> again?
> > > failed state is not recoverable in runtime, so you need to recreate
> > > array.
> >
> > IIUC, the failfast flag is supposed to be set during transient error
> > not permanent failure, the rdev (marked as failfast) need to be
> > revalidated and readded to array.
> >
>
> The device is never marked as failed. I checked write paths (because I
> introduced more aggressive policy for writes) and if we are in a
> critical array, failfast flag is not added to bio for both raid1 and
> radi10, see raid1_write_request().

Hi all

The "failed state" in the patch should mean MD_BROKEN, right? If so,
are you talking
in the wrong direction?

> >
> > [ ... ]
> >
> > >>> +         char *md_name = mdname(mddev);
> > >>> +
> > >>> +         pr_crit("md/linear%s: Disk failure on %pg
> > >>> detected.\n"
> > >>> +                 "md/linear:%s: Cannot continue, failing
> > >>> array.\n",
> > >>> +                 md_name, rdev->bdev, md_name);
> > >> The second md_name is not needed.
> > > Could you elaborate here more? Do you want to skip device name in
> > > second message?
> >
> > Yes, we printed two md_name here, seems unnecessary.
> >
> I will merge errors to one message.
>
> >
> > >>> --- a/drivers/md/md.c
> > >>> +++ b/drivers/md/md.c
> > >>> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct
> > >>> md_rdev *rdev)
> > >>>           if (!mddev->pers || !mddev->pers->error_handler)
> > >>>                   return;
> > >>> - mddev->pers->error_handler(mddev,rdev);
> > >>> + mddev->pers->error_handler(mddev, rdev);
> > >>> +
> > >>> + if (!mddev->pers->sync_request)
> > >>> +         return;
> > >> The above only valid for raid0 and linear, I guess it is fine if DM
> > >> don't create LV on top

Hi guoqing, could you explain this more? If we create LV on raid
device, this check doesn't work?

Regards
Xiao


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-02-15  3:43       ` Guoqing Jiang
@ 2022-02-15 14:06         ` Mariusz Tkaczyk
  2022-02-16  9:47           ` Xiao Ni
  2022-02-22  6:34           ` Song Liu
  0 siblings, 2 replies; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-02-15 14:06 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: song, linux-raid

On Tue, 15 Feb 2022 11:43:34 +0800
Guoqing Jiang <guoqing.jiang@linux.dev> wrote:

> >> It also adopts md_error(), we only want to call .error_handler for
> >> those levels. mddev->pers->sync_request is additionally checked,
> >> its existence implies a level with redundancy.
> >>
> >> Usage of error_handler causes that disk failure can be requested
> >> from userspace. User can fail the array via #mdadm --set-faulty
> >> command. This is not safe and will be fixed in mdadm.
> >> What is the safe issue here? It would betterr to post mdadm fix
> >> together.  
> > We can and should block user from damaging raid even if it is
> > recoverable. It is a regression.  
> 
> I don't follow, did you mean --set-fault from mdadm could "damaging
> raid"?

Yes, now it will be able to impose failed state. This is a regression I
caused and I'm aware of that.
> 
> > I will fix mdadm. I don't consider it as a big risk (because it is
> > recoverable) so I focused on kernel part first.
> >  
> >>> It is correctable because failed
> >>> state is not recorded in the metadata. After next assembly array
> >>> will be read-write again.  
> >> I don't think it is a problem, care to explain why it can't be RW
> >> again?  
> > failed state is not recoverable in runtime, so you need to recreate
> > array.  
> 
> IIUC, the failfast flag is supposed to be set during transient error
> not permanent failure, the rdev (marked as failfast) need to be
> revalidated and readded to array.
> 

The device is never marked as failed. I checked write paths (because I
introduced more aggressive policy for writes) and if we are in a
critical array, failfast flag is not added to bio for both raid1 and
radi10, see raid1_write_request().
> 
> [ ... ] 
> 
> >>> +		char *md_name = mdname(mddev);
> >>> +
> >>> +		pr_crit("md/linear%s: Disk failure on %pg
> >>> detected.\n"
> >>> +			"md/linear:%s: Cannot continue, failing
> >>> array.\n",
> >>> +			md_name, rdev->bdev, md_name);  
> >> The second md_name is not needed.  
> > Could you elaborate here more? Do you want to skip device name in
> > second message?  
> 
> Yes, we printed two md_name here, seems unnecessary.
> 
I will merge errors to one message.

> 
> >>> --- a/drivers/md/md.c
> >>> +++ b/drivers/md/md.c
> >>> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct
> >>> md_rdev *rdev)
> >>>    	if (!mddev->pers || !mddev->pers->error_handler)
> >>>    		return;
> >>> -	mddev->pers->error_handler(mddev,rdev);
> >>> +	mddev->pers->error_handler(mddev, rdev);
> >>> +
> >>> +	if (!mddev->pers->sync_request)
> >>> +		return;  
> >> The above only valid for raid0 and linear, I guess it is fine if DM
> >> don't create LV on top
> >> of them. But the new checking deserves some comment above.  
> > Will do, could you propose comment?  
> 
> Or, just check if it is raid0 or linear directly instead of implies 
> level with
> redundancy.

Got it.

Thanks,
Mariusz


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-02-14  9:37     ` Mariusz Tkaczyk
@ 2022-02-15  3:43       ` Guoqing Jiang
  2022-02-15 14:06         ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Guoqing Jiang @ 2022-02-15  3:43 UTC (permalink / raw)
  To: Mariusz Tkaczyk; +Cc: song, linux-raid



On 2/14/22 5:37 PM, Mariusz Tkaczyk wrote:
> On Sat, 12 Feb 2022 09:12:00 +0800
> Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
>> On 1/27/22 11:39 PM, Mariusz Tkaczyk wrote:
>>> Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and
>>> fail BIOs if a member is gone") allowed to finish writes earlier
>>> (before level dependent actions) for non-redundant arrays.
>>>
>>> To achieve that MD_BROKEN is added to mddev->flags if drive
>>> disappearance is detected. This is done in is_mddev_broken() which
>>> is confusing and not consistent with other levels where
>>> error_handler() is used. This patch adds appropriate error_handler
>>> for raid0 and linear.
>> I think the purpose of them are quite different, as said before,
>> error_handler
>> is mostly against rdev while is_mddev_broken is for mddev though it
>> needs to test rdev first.
> I changed is_mddev_broken to is_rdev_broken, because it checks the
> device now. On error it calls md_error and later error_handler.
> I unified error handling for each level. Do you consider it as wrong?

I am neutral to the change

>> It also adopts md_error(), we only want to call .error_handler for
>> those levels. mddev->pers->sync_request is additionally checked,
>> its existence implies a level with redundancy.
>>
>> Usage of error_handler causes that disk failure can be requested
>> from userspace. User can fail the array via #mdadm --set-faulty
>> command. This is not safe and will be fixed in mdadm.
>> What is the safe issue here? It would betterr to post mdadm fix
>> together.
> We can and should block user from damaging raid even if it is
> recoverable. It is a regression.

I don't follow, did you mean --set-fault from mdadm could "damaging raid"?

> I will fix mdadm. I don't consider it as a big risk (because it is
> recoverable) so I focused on kernel part first.
>
>>> It is correctable because failed
>>> state is not recorded in the metadata. After next assembly array
>>> will be read-write again.
>> I don't think it is a problem, care to explain why it can't be RW
>> again?
> failed state is not recoverable in runtime, so you need to recreate
> array.

IIUC, the failfast flag is supposed to be set during transient error not
permanent failure, the rdev (marked as failfast) need to be revalidated
and readded to array.


[ ... ]

>>> +		char *md_name = mdname(mddev);
>>> +
>>> +		pr_crit("md/linear%s: Disk failure on %pg
>>> detected.\n"
>>> +			"md/linear:%s: Cannot continue, failing
>>> array.\n",
>>> +			md_name, rdev->bdev, md_name);
>> The second md_name is not needed.
> Could you elaborate here more? Do you want to skip device name in
> second message?

Yes, we printed two md_name here, seems unnecessary.

[ ... ]

>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct
>>> md_rdev *rdev)
>>>    	if (!mddev->pers || !mddev->pers->error_handler)
>>>    		return;
>>> -	mddev->pers->error_handler(mddev,rdev);
>>> +	mddev->pers->error_handler(mddev, rdev);
>>> +
>>> +	if (!mddev->pers->sync_request)
>>> +		return;
>> The above only valid for raid0 and linear, I guess it is fine if DM
>> don't create LV on top
>> of them. But the new checking deserves some comment above.
> Will do, could you propose comment?

Or, just check if it is raid0 or linear directly instead of implies 
level with
redundancy.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-02-12  1:12   ` Guoqing Jiang
@ 2022-02-14  9:37     ` Mariusz Tkaczyk
  2022-02-15  3:43       ` Guoqing Jiang
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-02-14  9:37 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: song, linux-raid

On Sat, 12 Feb 2022 09:12:00 +0800
Guoqing Jiang <guoqing.jiang@linux.dev> wrote:

> On 1/27/22 11:39 PM, Mariusz Tkaczyk wrote:
> > Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and
> > fail BIOs if a member is gone") allowed to finish writes earlier
> > (before level dependent actions) for non-redundant arrays.
> >
> > To achieve that MD_BROKEN is added to mddev->flags if drive
> > disappearance is detected. This is done in is_mddev_broken() which
> > is confusing and not consistent with other levels where
> > error_handler() is used. This patch adds appropriate error_handler
> > for raid0 and linear.  
> 
> I think the purpose of them are quite different, as said before, 
> error_handler
> is mostly against rdev while is_mddev_broken is for mddev though it
> needs to test rdev first.

I changed is_mddev_broken to is_rdev_broken, because it checks the
device now. On error it calls md_error and later error_handler.
I unified error handling for each level. Do you consider it as wrong?

> 
> > It also adopts md_error(), we only want to call .error_handler for
> > those levels. mddev->pers->sync_request is additionally checked,
> > its existence implies a level with redundancy.
> >
> > Usage of error_handler causes that disk failure can be requested
> > from userspace. User can fail the array via #mdadm --set-faulty
> > command. This is not safe and will be fixed in mdadm.  
> 
> What is the safe issue here? It would betterr to post mdadm fix
> together.

We can and should block user from damaging raid even if it is
recoverable. It is a regression.
I will fix mdadm. I don't consider it as a big risk (because it is
recoverable) so I focused on kernel part first.

> 
> > It is correctable because failed
> > state is not recorded in the metadata. After next assembly array
> > will be read-write again.  
> 
> I don't think it is a problem, care to explain why it can't be RW
> again?

failed state is not recoverable in runtime, so you need to recreate
array.

> 
> > For safety reason is better to keep MD_BROKEN in runtime only.  
> 
> Isn't MD_BROKEN runtime already? It is mddev_flags not mddev_sb_flags.

Yes, and this is why I didn't propagate it.
> 
> > Signed-off-by: Mariusz Tkaczyk<mariusz.tkaczyk@linux.intel.com>
> > ---
> >   drivers/md/md-linear.c | 15 ++++++++++++++-
> >   drivers/md/md.c        |  6 +++++-
> >   drivers/md/md.h        | 10 ++--------
> >   drivers/md/raid0.c     | 15 ++++++++++++++-
> >   4 files changed, 35 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> > index 1ff51647a682..3c368e3e4641 100644
> > --- a/drivers/md/md-linear.c
> > +++ b/drivers/md/md-linear.c
> > @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev
> > *mddev, struct bio *bio) bio_sector < start_sector))
> >   		goto out_of_bounds;
> >   
> > -	if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> > +	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> > +		md_error(mddev, tmp_dev->rdev);  
> 
> [ ... ]
> 
> >   
> > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> > +{
> > +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) {  
> 
> s/rdev->mddev/mddev/

Noted.
> 
> > +		char *md_name = mdname(mddev);
> > +
> > +		pr_crit("md/linear%s: Disk failure on %pg
> > detected.\n"
> > +			"md/linear:%s: Cannot continue, failing
> > array.\n",
> > +			md_name, rdev->bdev, md_name);  
> 
> The second md_name is not needed.
Could you elaborate here more? Do you want to skip device name in
second message?

> 
> > +	}
> > +}
> > +
> >   static void linear_quiesce(struct mddev *mddev, int state)
> >   {
> >   }
> > @@ -297,6 +309,7 @@ static struct md_personality linear_personality
> > = .hot_add_disk	= linear_add,
> >   	.size		= linear_size,
> >   	.quiesce	= linear_quiesce,
> > +	.error_handler	= linear_error,
> >   };
> >   
> >   static int __init linear_init (void)
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index e8666bdc0d28..f888ef197765 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct
> > md_rdev *rdev) 
> >   	if (!mddev->pers || !mddev->pers->error_handler)
> >   		return;
> > -	mddev->pers->error_handler(mddev,rdev);
> > +	mddev->pers->error_handler(mddev, rdev);
> > +
> > +	if (!mddev->pers->sync_request)
> > +		return;  
> 
> The above only valid for raid0 and linear, I guess it is fine if DM 
> don't create LV on top
> of them. But the new checking deserves some comment above.
Will do, could you propose comment?

> 
> > +
> >   	if (mddev->degraded)
> >   		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
> >   	sysfs_notify_dirent_safe(rdev->sysfs_state);  
> 
> [ ... ]
> 
> > +static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
> > +{
> > +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) {
> > +		char *md_name = mdname(mddev);
> > +
> > +		pr_crit("md/raid0%s: Disk failure on %pg
> > detected.\n"
> > +			"md/raid0:%s: Cannot continue, failing
> > array.\n",
> > +			md_name, rdev->bdev, md_name);  
> 
> The comments for linear_error also valid here.
> 
Noted.

Thanks,
Mariusz

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-01-27 15:39 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
@ 2022-02-12  1:12   ` Guoqing Jiang
  2022-02-14  9:37     ` Mariusz Tkaczyk
  0 siblings, 1 reply; 39+ messages in thread
From: Guoqing Jiang @ 2022-02-12  1:12 UTC (permalink / raw)
  To: Mariusz Tkaczyk, song; +Cc: linux-raid



On 1/27/22 11:39 PM, Mariusz Tkaczyk wrote:
> Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
> if a member is gone") allowed to finish writes earlier (before level
> dependent actions) for non-redundant arrays.
>
> To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
> is detected. This is done in is_mddev_broken() which is confusing and not
> consistent with other levels where error_handler() is used.
> This patch adds appropriate error_handler for raid0 and linear.

I think the purpose of them are quite different, as said before, 
error_handler
is mostly against rdev while is_mddev_broken is for mddev though it needs
to test rdev first.

> It also adopts md_error(), we only want to call .error_handler for those
> levels. mddev->pers->sync_request is additionally checked, its existence
> implies a level with redundancy.
>
> Usage of error_handler causes that disk failure can be requested from
> userspace. User can fail the array via #mdadm --set-faulty command. This
> is not safe and will be fixed in mdadm.

What is the safe issue here? It would betterr to post mdadm fix together.

> It is correctable because failed
> state is not recorded in the metadata. After next assembly array will be
> read-write again.

I don't think it is a problem, care to explain why it can't be RW again?

> For safety reason is better to keep MD_BROKEN in runtime only.

Isn't MD_BROKEN runtime already? It is mddev_flags not mddev_sb_flags.

> Signed-off-by: Mariusz Tkaczyk<mariusz.tkaczyk@linux.intel.com>
> ---
>   drivers/md/md-linear.c | 15 ++++++++++++++-
>   drivers/md/md.c        |  6 +++++-
>   drivers/md/md.h        | 10 ++--------
>   drivers/md/raid0.c     | 15 ++++++++++++++-
>   4 files changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> index 1ff51647a682..3c368e3e4641 100644
> --- a/drivers/md/md-linear.c
> +++ b/drivers/md/md-linear.c
> @@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>   		     bio_sector < start_sector))
>   		goto out_of_bounds;
>   
> -	if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
> +	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> +		md_error(mddev, tmp_dev->rdev);

[ ... ]

>   
> +static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
> +{
> +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) {

s/rdev->mddev/mddev/

> +		char *md_name = mdname(mddev);
> +
> +		pr_crit("md/linear%s: Disk failure on %pg detected.\n"
> +			"md/linear:%s: Cannot continue, failing array.\n",
> +			md_name, rdev->bdev, md_name);

The second md_name is not needed.

> +	}
> +}
> +
>   static void linear_quiesce(struct mddev *mddev, int state)
>   {
>   }
> @@ -297,6 +309,7 @@ static struct md_personality linear_personality =
>   	.hot_add_disk	= linear_add,
>   	.size		= linear_size,
>   	.quiesce	= linear_quiesce,
> +	.error_handler	= linear_error,
>   };
>   
>   static int __init linear_init (void)
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index e8666bdc0d28..f888ef197765 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>   
>   	if (!mddev->pers || !mddev->pers->error_handler)
>   		return;
> -	mddev->pers->error_handler(mddev,rdev);
> +	mddev->pers->error_handler(mddev, rdev);
> +
> +	if (!mddev->pers->sync_request)
> +		return;

The above only valid for raid0 and linear, I guess it is fine if DM 
don't create LV on top
of them. But the new checking deserves some comment above.

> +
>   	if (mddev->degraded)
>   		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
>   	sysfs_notify_dirent_safe(rdev->sysfs_state);

[ ... ]

> +static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
> +{
> +	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) {
> +		char *md_name = mdname(mddev);
> +
> +		pr_crit("md/raid0%s: Disk failure on %pg detected.\n"
> +			"md/raid0:%s: Cannot continue, failing array.\n",
> +			md_name, rdev->bdev, md_name);

The comments for linear_error also valid here.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear
  2022-01-27 15:39 [PATCH v3 0/3] Improve failed arrays handling Mariusz Tkaczyk
@ 2022-01-27 15:39 ` Mariusz Tkaczyk
  2022-02-12  1:12   ` Guoqing Jiang
  0 siblings, 1 reply; 39+ messages in thread
From: Mariusz Tkaczyk @ 2022-01-27 15:39 UTC (permalink / raw)
  To: song; +Cc: linux-raid

Patch 62f7b1989c0 ("md raid0/linear: Mark array as 'broken' and fail BIOs
if a member is gone") allowed to finish writes earlier (before level
dependent actions) for non-redundant arrays.

To achieve that MD_BROKEN is added to mddev->flags if drive disappearance
is detected. This is done in is_mddev_broken() which is confusing and not
consistent with other levels where error_handler() is used.
This patch adds appropriate error_handler for raid0 and linear.

It also adopts md_error(), we only want to call .error_handler for those
levels. mddev->pers->sync_request is additionally checked, its existence
implies a level with redundancy.

Usage of error_handler causes that disk failure can be requested from
userspace. User can fail the array via #mdadm --set-faulty command. This
is not safe and will be fixed in mdadm. It is correctable because failed
state is not recorded in the metadata. After next assembly array will be
read-write again. For safety reason is better to keep MD_BROKEN in
runtime only.

Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
---
 drivers/md/md-linear.c | 15 ++++++++++++++-
 drivers/md/md.c        |  6 +++++-
 drivers/md/md.h        | 10 ++--------
 drivers/md/raid0.c     | 15 ++++++++++++++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 1ff51647a682..3c368e3e4641 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		     bio_sector < start_sector))
 		goto out_of_bounds;
 
-	if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
+	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
+		md_error(mddev, tmp_dev->rdev);
 		bio_io_error(bio);
 		return true;
 	}
@@ -281,6 +282,17 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
 	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
 }
 
+static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) {
+		char *md_name = mdname(mddev);
+
+		pr_crit("md/linear%s: Disk failure on %pg detected.\n"
+			"md/linear:%s: Cannot continue, failing array.\n",
+			md_name, rdev->bdev, md_name);
+	}
+}
+
 static void linear_quiesce(struct mddev *mddev, int state)
 {
 }
@@ -297,6 +309,7 @@ static struct md_personality linear_personality =
 	.hot_add_disk	= linear_add,
 	.size		= linear_size,
 	.quiesce	= linear_quiesce,
+	.error_handler	= linear_error,
 };
 
 static int __init linear_init (void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e8666bdc0d28..f888ef197765 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7982,7 +7982,11 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	if (!mddev->pers || !mddev->pers->error_handler)
 		return;
-	mddev->pers->error_handler(mddev,rdev);
+	mddev->pers->error_handler(mddev, rdev);
+
+	if (!mddev->pers->sync_request)
+		return;
+
 	if (mddev->degraded)
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
 	sysfs_notify_dirent_safe(rdev->sysfs_state);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 53ea7a6961de..bc3f2094d0b6 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -762,15 +762,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
 
-static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
+static inline bool is_rdev_broken(struct md_rdev *rdev)
 {
-	if (!disk_live(rdev->bdev->bd_disk)) {
-		if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
-			pr_warn("md: %s: %s array has a missing/failed member\n",
-				mdname(rdev->mddev), md_type);
-		return true;
-	}
-	return false;
+	return !disk_live(rdev->bdev->bd_disk);
 }
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 62c8b6adac70..707efb3825f0 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -564,8 +564,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
 		return true;
 	}
 
-	if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
+	if (unlikely(is_rdev_broken(tmp_dev))) {
 		bio_io_error(bio);
+		md_error(mddev, tmp_dev);
 		return true;
 	}
 
@@ -588,6 +589,17 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
 	return;
 }
 
+static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) {
+		char *md_name = mdname(mddev);
+
+		pr_crit("md/raid0%s: Disk failure on %pg detected.\n"
+			"md/raid0:%s: Cannot continue, failing array.\n",
+			md_name, rdev->bdev, md_name);
+	}
+}
+
 static void *raid0_takeover_raid45(struct mddev *mddev)
 {
 	struct md_rdev *rdev;
@@ -763,6 +775,7 @@ static struct md_personality raid0_personality=
 	.size		= raid0_size,
 	.takeover	= raid0_takeover,
 	.quiesce	= raid0_quiesce,
+	.error_handler	= raid0_error,
 };
 
 static int __init raid0_init (void)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2022-04-12 16:36 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-16 14:52 [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Mariusz Tkaczyk
2021-12-16 14:52 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
2021-12-17  2:00   ` Guoqing Jiang
2021-12-17  2:07     ` Guoqing Jiang
2021-12-19  3:26     ` Xiao Ni
2021-12-22  1:22       ` Guoqing Jiang
2021-12-20  9:39     ` Mariusz Tkaczyk
2021-12-19  3:20   ` Xiao Ni
2021-12-20  8:45     ` Mariusz Tkaczyk
2021-12-21  1:40       ` Xiao Ni
2021-12-21 13:56         ` Mariusz Tkaczyk
2021-12-22  1:54           ` Guoqing Jiang
2021-12-22  3:08           ` Xiao Ni
2021-12-16 14:52 ` [PATCH 2/3] md: Set MD_BROKEN for RAID1 and RAID10 Mariusz Tkaczyk
2021-12-17  2:16   ` Guoqing Jiang
2021-12-22  7:24   ` Xiao Ni
2021-12-27 12:34     ` Mariusz Tkaczyk
2021-12-16 14:52 ` [PATCH 3/3] raid5: introduce MD_BROKEN Mariusz Tkaczyk
2021-12-17  2:26   ` Guoqing Jiang
2021-12-17  8:37     ` Mariusz Tkaczyk
2021-12-22  1:46       ` Guoqing Jiang
2021-12-17  0:52 ` [PATCH v2 0/3] Use MD_BROKEN for redundant arrays Song Liu
2021-12-17  8:02   ` Mariusz Tkaczyk
2022-01-25 15:52     ` Mariusz Tkaczyk
2022-01-26  1:13       ` Song Liu
2022-01-27 15:39 [PATCH v3 0/3] Improve failed arrays handling Mariusz Tkaczyk
2022-01-27 15:39 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
2022-02-12  1:12   ` Guoqing Jiang
2022-02-14  9:37     ` Mariusz Tkaczyk
2022-02-15  3:43       ` Guoqing Jiang
2022-02-15 14:06         ` Mariusz Tkaczyk
2022-02-16  9:47           ` Xiao Ni
2022-02-22  6:34           ` Song Liu
2022-02-22 13:02             ` Mariusz Tkaczyk
2022-03-22 15:23 [PATCH 0/3] Failed array handling improvements Mariusz Tkaczyk
2022-03-22 15:23 ` [PATCH 1/3] raid0, linear, md: add error_handlers for raid0 and linear Mariusz Tkaczyk
2022-04-08  0:16   ` Song Liu
2022-04-08 14:35     ` Mariusz Tkaczyk
2022-04-08 16:18       ` Song Liu
2022-04-12 15:31         ` Mariusz Tkaczyk
2022-04-12 16:36           ` Song Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.