All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH][RESEND] dm-raid1: fix deadlock at suspending failed device
@ 2010-02-17 23:40 Takahiro Yasui
  2010-02-17 23:43 ` Mikulas Patocka
  0 siblings, 1 reply; 2+ messages in thread
From: Takahiro Yasui @ 2010-02-17 23:40 UTC (permalink / raw)
  To: Alasdair G Kergon, mpatocka, jbrassow; +Cc: device-mapper development

This is a repost of the following patch but its description is revised.

1/8/2010 3:08 PM
[PATCH][BUGFIX] dm-raid1: fix suspend stuck of failed device

I appreciate your review.

Thanks,
Taka


The recovery can't start because there are pending bios and therefore
dm_rh_stop_recovery deadlocks.

When there are pending bios in the hold list, the recovery waits for
the completion of the bios after recovery_count is acquired.
The recovery_count is released when the recovery finished, however,
the bios in the hold list are processed after dm_rh_stop_recovery() in
mirror_presuspend(). dm_rh_stop_recovery() also acquires recovery_count,
then deadlock occurs.

To prevent deadlock, bios in the hold list should be flushed before
dm_rh_stop_recovery() is called in mirror_suspend().


Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
---
 drivers/md/dm-raid1.c |   41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

Index: linux-2.6.33-rc1/drivers/md/dm-raid1.c
===================================================================
--- linux-2.6.33-rc1.orig/drivers/md/dm-raid1.c
+++ linux-2.6.33-rc1/drivers/md/dm-raid1.c
@@ -465,9 +465,17 @@ static void map_region(struct dm_io_regi
 static void hold_bio(struct mirror_set *ms, struct bio *bio)
 {
 	/*
-	 * If device is suspended, complete the bio.
+	 * Lock is required to avoid race condition during suspend
+	 * process.
 	 */
+	spin_lock_irq(&ms->lock);
+
 	if (atomic_read(&ms->suspend)) {
+		spin_unlock_irq(&ms->lock);
+
+		/*
+		 * If device is suspended, complete the bio.
+		 */
 		if (dm_noflush_suspending(ms->ti))
 			bio_endio(bio, DM_ENDIO_REQUEUE);
 		else
@@ -478,7 +486,6 @@ static void hold_bio(struct mirror_set *
 	/*
 	 * Hold bio until the suspend is complete.
 	 */
-	spin_lock_irq(&ms->lock);
 	bio_list_add(&ms->holds, bio);
 	spin_unlock_irq(&ms->lock);
 }
@@ -1259,6 +1266,20 @@ static void mirror_presuspend(struct dm_
 	atomic_set(&ms->suspend, 1);
 
 	/*
+	 * Process bios in the hold list to start recovery waiting
+	 * for bios in the hold list. After the process, no bio has
+	 * a chance to be added in the hold list because ms->suspend
+	 * is set.
+	 */
+	spin_lock_irq(&ms->lock);
+	holds = ms->holds;
+	bio_list_init(&ms->holds);
+	spin_unlock_irq(&ms->lock);
+
+	while ((bio = bio_list_pop(&holds)))
+		hold_bio(ms, bio);
+
+	/*
 	 * We must finish up all the work that we've
 	 * generated (i.e. recovery work).
 	 */
@@ -1278,22 +1299,6 @@ static void mirror_presuspend(struct dm_
 	 * we know that all of our I/O has been pushed.
 	 */
 	flush_workqueue(ms->kmirrord_wq);
-
-	/*
-	 * Now set ms->suspend is set and the workqueue flushed, no more
-	 * entries can be added to ms->hold list, so process it.
-	 *
-	 * Bios can still arrive concurrently with or after this
-	 * presuspend function, but they cannot join the hold list
-	 * because ms->suspend is set.
-	 */
-	spin_lock_irq(&ms->lock);
-	holds = ms->holds;
-	bio_list_init(&ms->holds);
-	spin_unlock_irq(&ms->lock);
-
-	while ((bio = bio_list_pop(&holds)))
-		hold_bio(ms, bio);
 }
 
 static void mirror_postsuspend(struct dm_target *ti)

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH][RESEND] dm-raid1: fix deadlock at suspending failed device
  2010-02-17 23:40 [PATCH][RESEND] dm-raid1: fix deadlock at suspending failed device Takahiro Yasui
@ 2010-02-17 23:43 ` Mikulas Patocka
  0 siblings, 0 replies; 2+ messages in thread
From: Mikulas Patocka @ 2010-02-17 23:43 UTC (permalink / raw)
  To: Takahiro Yasui; +Cc: device-mapper development, Alasdair G Kergon



On Wed, 17 Feb 2010, Takahiro Yasui wrote:

> This is a repost of the following patch but its description is revised.
> 
> 1/8/2010 3:08 PM
> [PATCH][BUGFIX] dm-raid1: fix suspend stuck of failed device
> 
> I appreciate your review.
> 
> Thanks,
> Taka

Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>

Mikulas

> The recovery can't start because there are pending bios and therefore
> dm_rh_stop_recovery deadlocks.
> 
> When there are pending bios in the hold list, the recovery waits for
> the completion of the bios after recovery_count is acquired.
> The recovery_count is released when the recovery finished, however,
> the bios in the hold list are processed after dm_rh_stop_recovery() in
> mirror_presuspend(). dm_rh_stop_recovery() also acquires recovery_count,
> then deadlock occurs.
> 
> To prevent deadlock, bios in the hold list should be flushed before
> dm_rh_stop_recovery() is called in mirror_suspend().
> 
> 
> Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
> ---
>  drivers/md/dm-raid1.c |   41 +++++++++++++++++++++++------------------
>  1 file changed, 23 insertions(+), 18 deletions(-)
> 
> Index: linux-2.6.33-rc1/drivers/md/dm-raid1.c
> ===================================================================
> --- linux-2.6.33-rc1.orig/drivers/md/dm-raid1.c
> +++ linux-2.6.33-rc1/drivers/md/dm-raid1.c
> @@ -465,9 +465,17 @@ static void map_region(struct dm_io_regi
>  static void hold_bio(struct mirror_set *ms, struct bio *bio)
>  {
>  	/*
> -	 * If device is suspended, complete the bio.
> +	 * Lock is required to avoid race condition during suspend
> +	 * process.
>  	 */
> +	spin_lock_irq(&ms->lock);
> +
>  	if (atomic_read(&ms->suspend)) {
> +		spin_unlock_irq(&ms->lock);
> +
> +		/*
> +		 * If device is suspended, complete the bio.
> +		 */
>  		if (dm_noflush_suspending(ms->ti))
>  			bio_endio(bio, DM_ENDIO_REQUEUE);
>  		else
> @@ -478,7 +486,6 @@ static void hold_bio(struct mirror_set *
>  	/*
>  	 * Hold bio until the suspend is complete.
>  	 */
> -	spin_lock_irq(&ms->lock);
>  	bio_list_add(&ms->holds, bio);
>  	spin_unlock_irq(&ms->lock);
>  }
> @@ -1259,6 +1266,20 @@ static void mirror_presuspend(struct dm_
>  	atomic_set(&ms->suspend, 1);
>  
>  	/*
> +	 * Process bios in the hold list to start recovery waiting
> +	 * for bios in the hold list. After the process, no bio has
> +	 * a chance to be added in the hold list because ms->suspend
> +	 * is set.
> +	 */
> +	spin_lock_irq(&ms->lock);
> +	holds = ms->holds;
> +	bio_list_init(&ms->holds);
> +	spin_unlock_irq(&ms->lock);
> +
> +	while ((bio = bio_list_pop(&holds)))
> +		hold_bio(ms, bio);
> +
> +	/*
>  	 * We must finish up all the work that we've
>  	 * generated (i.e. recovery work).
>  	 */
> @@ -1278,22 +1299,6 @@ static void mirror_presuspend(struct dm_
>  	 * we know that all of our I/O has been pushed.
>  	 */
>  	flush_workqueue(ms->kmirrord_wq);
> -
> -	/*
> -	 * Now set ms->suspend is set and the workqueue flushed, no more
> -	 * entries can be added to ms->hold list, so process it.
> -	 *
> -	 * Bios can still arrive concurrently with or after this
> -	 * presuspend function, but they cannot join the hold list
> -	 * because ms->suspend is set.
> -	 */
> -	spin_lock_irq(&ms->lock);
> -	holds = ms->holds;
> -	bio_list_init(&ms->holds);
> -	spin_unlock_irq(&ms->lock);
> -
> -	while ((bio = bio_list_pop(&holds)))
> -		hold_bio(ms, bio);
>  }
>  
>  static void mirror_postsuspend(struct dm_target *ti)
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2010-02-17 23:43 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-17 23:40 [PATCH][RESEND] dm-raid1: fix deadlock at suspending failed device Takahiro Yasui
2010-02-17 23:43 ` Mikulas Patocka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.