All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dm-raid1: keep writing after leg failure
@ 2015-05-13  6:04 Lidong Zhong
  2015-05-13 15:56 ` Heinz Mauelshagen
  2015-05-15 16:11 ` Mike Snitzer
  0 siblings, 2 replies; 4+ messages in thread
From: Lidong Zhong @ 2015-05-13  6:04 UTC (permalink / raw)
  To: dm-devel; +Cc: lwang, heinzm, snitzer

Currently if there is a leg failure, the bio will be put into the hold
list until userspace replace/remove the leg. Here we are trying to make
dm-raid1 ignore the failure and keep the following bios going on.
This is because there maybe a temporary path failure in clvmd
which leads to cluster raid1 remove/replace the fake device failure. And
it takes a long time to do the full sync if we readd the device back.

This patch merges the former five small patches to implement this
feature. The userspace should pass "keep_log" into kernel to make this feature to
take effect when creating dm-raid1, same as "handle_errors".

Signed-off-by: Lidong Zhong <lzhong@suse.com>
Tested-by: Liuhua Wang <lwang@suse.com>
---
 drivers/md/dm-raid1.c | 62 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 089d627..3de48d1 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -24,7 +24,9 @@
 #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
 
 #define DM_RAID1_HANDLE_ERRORS 0x01
+#define DM_RAID1_KEEP_LOG      0x02
 #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
+#define keep_log(p)	        ((p)->features & DM_RAID1_KEEP_LOG)
 
 static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
 
@@ -229,7 +231,7 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
 	if (m != get_default_mirror(ms))
 		goto out;
 
-	if (!ms->in_sync) {
+	if (!ms->in_sync && !keep_log(ms)) {
 		/*
 		 * Better to issue requests to same failing device
 		 * than to risk returning corrupt data.
@@ -370,6 +372,17 @@ static int recover(struct mirror_set *ms, struct dm_region *reg)
 	return r;
 }
 
+static void reset_ms_flags(struct mirror_set *ms)
+{
+	unsigned int m;
+
+	ms->leg_failure = 0;
+	for (m = 0; m < ms->nr_mirrors; m++) {
+		atomic_set(&(ms->mirror[m].error_count), 0);
+		ms->mirror[m].error_type = 0;
+	}
+}
+
 static void do_recovery(struct mirror_set *ms)
 {
 	struct dm_region *reg;
@@ -398,6 +411,7 @@ static void do_recovery(struct mirror_set *ms)
 		/* the sync is complete */
 		dm_table_event(ms->ti->table);
 		ms->in_sync = 1;
+		reset_ms_flags(ms);
 	}
 }
 
@@ -759,7 +773,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 		dm_rh_delay(ms->rh, bio);
 
 	while ((bio = bio_list_pop(&nosync))) {
-		if (unlikely(ms->leg_failure) && errors_handled(ms)) {
+		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
 			spin_lock_irq(&ms->lock);
 			bio_list_add(&ms->failures, bio);
 			spin_unlock_irq(&ms->lock);
@@ -809,9 +823,21 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 		 * be wrong if the failed leg returned after reboot and
 		 * got replicated back to the good legs.)
 		 */
-		if (!get_valid_mirror(ms))
+
+		/*
+		 * We return EIO when there's no valid mirror legs
+		 * -or-
+		 * the log device is failed if keep_log is set
+		 */
+		if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure)))
 			bio_endio(bio, -EIO);
-		else if (errors_handled(ms))
+		/*
+		 * After the userspace get noticed that the leg has failed,
+		 * we just pretend that the bio has suceeded since the region 
+		 * has already been marked nosync. It's OK do the recovery after
+		 * the device comes back
+		 */
+		else if (errors_handled(ms) && !keep_log(ms))
 			hold_bio(ms, bio);
 		else
 			bio_endio(bio, 0);
@@ -987,6 +1013,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
 	unsigned num_features;
 	struct dm_target *ti = ms->ti;
 	char dummy;
+	int i;
 
 	*args_used = 0;
 
@@ -1007,14 +1034,20 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
 		return -EINVAL;
 	}
 
-	if (!strcmp("handle_errors", argv[0]))
-		ms->features |= DM_RAID1_HANDLE_ERRORS;
-	else {
-		ti->error = "Unrecognised feature requested";
-		return -EINVAL;
-	}
+	for (i = 0; i < num_features; i++) {
+		if (!strcmp("handle_errors", argv[0]))
+			ms->features |= DM_RAID1_HANDLE_ERRORS;
+		else if (!strcmp("keep_log", argv[0])) 
+			ms->features |= DM_RAID1_KEEP_LOG;
+		else {
+			ti->error = "Unrecognised feature requested";
+			return -EINVAL;
+		}
 
-	(*args_used)++;
+		argc--;
+		argv++;
+		(*args_used)++;
+	}
 
 	return 0;
 }
@@ -1029,7 +1062,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
  * log_type is "core" or "disk"
  * #log_params is between 1 and 3
  *
- * If present, features must be "handle_errors".
+ * If present, features must be "handle_errors" and/or "keep_log".
  */
 static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
@@ -1394,8 +1427,11 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
 			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
 			       (unsigned long long)ms->mirror[m].offset);
 
-		if (ms->features & DM_RAID1_HANDLE_ERRORS)
+		if (errors_handled(ms) && keep_log(ms))
+			DMEMIT(" 2 handle_errors keep_log");
+		else if (errors_handled(ms))
 			DMEMIT(" 1 handle_errors");
+
 	}
 }
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] dm-raid1: keep writing after leg failure
  2015-05-13  6:04 [PATCH] dm-raid1: keep writing after leg failure Lidong Zhong
@ 2015-05-13 15:56 ` Heinz Mauelshagen
  2015-05-15 16:11 ` Mike Snitzer
  1 sibling, 0 replies; 4+ messages in thread
From: Heinz Mauelshagen @ 2015-05-13 15:56 UTC (permalink / raw)
  To: dm-devel


Acked-by: Heinz Mauelshagen <heinzm@redhat.com>

On 05/13/2015 08:04 AM, Lidong Zhong wrote:
> Currently if there is a leg failure, the bio will be put into the hold
> list until userspace replace/remove the leg. Here we are trying to make
> dm-raid1 ignore the failure and keep the following bios going on.
> This is because there maybe a temporary path failure in clvmd
> which leads to cluster raid1 remove/replace the fake device failure. And
> it takes a long time to do the full sync if we readd the device back.
>
> This patch merges the former five small patches to implement this
> feature. The userspace should pass "keep_log" into kernel to make this feature to
> take effect when creating dm-raid1, same as "handle_errors".
>
> Signed-off-by: Lidong Zhong <lzhong@suse.com>
> Tested-by: Liuhua Wang <lwang@suse.com>
> ---
>   drivers/md/dm-raid1.c | 62 ++++++++++++++++++++++++++++++++++++++++-----------
>   1 file changed, 49 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
> index 089d627..3de48d1 100644
> --- a/drivers/md/dm-raid1.c
> +++ b/drivers/md/dm-raid1.c
> @@ -24,7 +24,9 @@
>   #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
>   
>   #define DM_RAID1_HANDLE_ERRORS 0x01
> +#define DM_RAID1_KEEP_LOG      0x02
>   #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
> +#define keep_log(p)	        ((p)->features & DM_RAID1_KEEP_LOG)
>   
>   static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
>   
> @@ -229,7 +231,7 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
>   	if (m != get_default_mirror(ms))
>   		goto out;
>   
> -	if (!ms->in_sync) {
> +	if (!ms->in_sync && !keep_log(ms)) {
>   		/*
>   		 * Better to issue requests to same failing device
>   		 * than to risk returning corrupt data.
> @@ -370,6 +372,17 @@ static int recover(struct mirror_set *ms, struct dm_region *reg)
>   	return r;
>   }
>   
> +static void reset_ms_flags(struct mirror_set *ms)
> +{
> +	unsigned int m;
> +
> +	ms->leg_failure = 0;
> +	for (m = 0; m < ms->nr_mirrors; m++) {
> +		atomic_set(&(ms->mirror[m].error_count), 0);
> +		ms->mirror[m].error_type = 0;
> +	}
> +}
> +
>   static void do_recovery(struct mirror_set *ms)
>   {
>   	struct dm_region *reg;
> @@ -398,6 +411,7 @@ static void do_recovery(struct mirror_set *ms)
>   		/* the sync is complete */
>   		dm_table_event(ms->ti->table);
>   		ms->in_sync = 1;
> +		reset_ms_flags(ms);
>   	}
>   }
>   
> @@ -759,7 +773,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
>   		dm_rh_delay(ms->rh, bio);
>   
>   	while ((bio = bio_list_pop(&nosync))) {
> -		if (unlikely(ms->leg_failure) && errors_handled(ms)) {
> +		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
>   			spin_lock_irq(&ms->lock);
>   			bio_list_add(&ms->failures, bio);
>   			spin_unlock_irq(&ms->lock);
> @@ -809,9 +823,21 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
>   		 * be wrong if the failed leg returned after reboot and
>   		 * got replicated back to the good legs.)
>   		 */
> -		if (!get_valid_mirror(ms))
> +
> +		/*
> +		 * We return EIO when there's no valid mirror legs
> +		 * -or-
> +		 * the log device is failed if keep_log is set
> +		 */
> +		if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure)))
>   			bio_endio(bio, -EIO);
> -		else if (errors_handled(ms))
> +		/*
> +		 * After the userspace get noticed that the leg has failed,
> +		 * we just pretend that the bio has suceeded since the region
> +		 * has already been marked nosync. It's OK do the recovery after
> +		 * the device comes back
> +		 */
> +		else if (errors_handled(ms) && !keep_log(ms))
>   			hold_bio(ms, bio);
>   		else
>   			bio_endio(bio, 0);
> @@ -987,6 +1013,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
>   	unsigned num_features;
>   	struct dm_target *ti = ms->ti;
>   	char dummy;
> +	int i;
>   
>   	*args_used = 0;
>   
> @@ -1007,14 +1034,20 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
>   		return -EINVAL;
>   	}
>   
> -	if (!strcmp("handle_errors", argv[0]))
> -		ms->features |= DM_RAID1_HANDLE_ERRORS;
> -	else {
> -		ti->error = "Unrecognised feature requested";
> -		return -EINVAL;
> -	}
> +	for (i = 0; i < num_features; i++) {
> +		if (!strcmp("handle_errors", argv[0]))
> +			ms->features |= DM_RAID1_HANDLE_ERRORS;
> +		else if (!strcmp("keep_log", argv[0]))
> +			ms->features |= DM_RAID1_KEEP_LOG;
> +		else {
> +			ti->error = "Unrecognised feature requested";
> +			return -EINVAL;
> +		}
>   
> -	(*args_used)++;
> +		argc--;
> +		argv++;
> +		(*args_used)++;
> +	}
>   
>   	return 0;
>   }
> @@ -1029,7 +1062,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
>    * log_type is "core" or "disk"
>    * #log_params is between 1 and 3
>    *
> - * If present, features must be "handle_errors".
> + * If present, features must be "handle_errors" and/or "keep_log".
>    */
>   static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>   {
> @@ -1394,8 +1427,11 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
>   			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
>   			       (unsigned long long)ms->mirror[m].offset);
>   
> -		if (ms->features & DM_RAID1_HANDLE_ERRORS)
> +		if (errors_handled(ms) && keep_log(ms))
> +			DMEMIT(" 2 handle_errors keep_log");
> +		else if (errors_handled(ms))
>   			DMEMIT(" 1 handle_errors");
> +
>   	}
>   }
>   

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: dm-raid1: keep writing after leg failure
  2015-05-13  6:04 [PATCH] dm-raid1: keep writing after leg failure Lidong Zhong
  2015-05-13 15:56 ` Heinz Mauelshagen
@ 2015-05-15 16:11 ` Mike Snitzer
  2015-05-18  3:49   ` Lidong Zhong
  1 sibling, 1 reply; 4+ messages in thread
From: Mike Snitzer @ 2015-05-15 16:11 UTC (permalink / raw)
  To: Lidong Zhong; +Cc: lwang, heinzm, dm-devel

On Wed, May 13 2015 at  2:04am -0400,
Lidong Zhong <lzhong@suse.com> wrote:

> Currently if there is a leg failure, the bio will be put into the hold
> list until userspace replace/remove the leg. Here we are trying to make
> dm-raid1 ignore the failure and keep the following bios going on.
> This is because there maybe a temporary path failure in clvmd
> which leads to cluster raid1 remove/replace the fake device failure. And
> it takes a long time to do the full sync if we readd the device back.
> 
> This patch merges the former five small patches to implement this
> feature. The userspace should pass "keep_log" into kernel to make this feature to
> take effect when creating dm-raid1, same as "handle_errors".
> 
> Signed-off-by: Lidong Zhong <lzhong@suse.com>
> Tested-by: Liuhua Wang <lwang@suse.com>

I've staged a revised patch for 4.2 here:
https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next&id=164a064dd85801986f3190e6edd119b6fc9a05c5

The big things I clarified/cleaned are:
1) keep_log depends on handle_errors feature
2) feature reporting via 'dmsetup table' needed to be decoupled
   otherwise any future feature would've forced this same cleanup.
3) Cleaned up header and in-code comments.
4) bumped the target version

Please let me know if you see nay issues with this version.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: dm-raid1: keep writing after leg failure
  2015-05-15 16:11 ` Mike Snitzer
@ 2015-05-18  3:49   ` Lidong Zhong
  0 siblings, 0 replies; 4+ messages in thread
From: Lidong Zhong @ 2015-05-18  3:49 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: Liuhua Wang, heinzm, dm-devel

>>> On 5/16/2015 at 12:11 AM, in message <20150515161108.GA28234@redhat.com>, Mike
Snitzer <snitzer@redhat.com> wrote: 
> On Wed, May 13 2015 at  2:04am -0400, 
> Lidong Zhong <lzhong@suse.com> wrote: 
>  
> > Currently if there is a leg failure, the bio will be put into the hold 
> > list until userspace replace/remove the leg. Here we are trying to make 
> > dm-raid1 ignore the failure and keep the following bios going on. 
> > This is because there maybe a temporary path failure in clvmd 
> > which leads to cluster raid1 remove/replace the fake device failure. And 
> > it takes a long time to do the full sync if we readd the device back. 
> >  
> > This patch merges the former five small patches to implement this 
> > feature. The userspace should pass "keep_log" into kernel to make this  
> feature to 
> > take effect when creating dm-raid1, same as "handle_errors". 
> >  
> > Signed-off-by: Lidong Zhong <lzhong@suse.com> 
> > Tested-by: Liuhua Wang <lwang@suse.com> 
>  
> I've staged a revised patch for 4.2 here: 
> https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit 
> /?h=for-next&id=164a064dd85801986f3190e6edd119b6fc9a05c5 
>  
> The big things I clarified/cleaned are: 
> 1) keep_log depends on handle_errors feature 
> 2) feature reporting via 'dmsetup table' needed to be decoupled 
>    otherwise any future feature would've forced this same cleanup. 
> 3) Cleaned up header and in-code comments. 
> 4) bumped the target version 
>  
> Please let me know if you see nay issues with this version. 
>  

Hi Mike,

All look good to me. Thanks.

Regards,
Lidong
> Thanks, 
> Mike 
>  
>  

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-05-18  3:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-13  6:04 [PATCH] dm-raid1: keep writing after leg failure Lidong Zhong
2015-05-13 15:56 ` Heinz Mauelshagen
2015-05-15 16:11 ` Mike Snitzer
2015-05-18  3:49   ` Lidong Zhong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.