All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
@ 2016-07-30 23:54 shli
  2016-07-30 23:54 ` [PATCH 2/3] MD: hold mddev lock to change bitmap location shli
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: shli @ 2016-07-30 23:54 UTC (permalink / raw)
  To: linux-raid; +Cc: Shaohua Li, NeilBrown

From: Shaohua Li <shli@fb.com>

.quiesce is called with mddev lock hold at most places. There are few
exceptions. Calling .quesce without the lock hold could create races. For
example, the .quesce of raid1 can't be recursively. The purpose of the patches
is to fix a race in raid5-cache. The raid5-cache .quesce will write md
superblock and should be called with mddev lock hold.

Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2c3ab6f..0550445 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7945,8 +7945,10 @@ void md_do_sync(struct md_thread *thread)
 		 * region.
 		 */
 		if (mddev->bitmap) {
+			mddev_lock_nointr(mddev);
 			mddev->pers->quiesce(mddev, 1);
 			mddev->pers->quiesce(mddev, 0);
+			mddev_unlock(mddev);
 		}
 	}
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/3] MD: hold mddev lock to change bitmap location
  2016-07-30 23:54 [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync shli
@ 2016-07-30 23:54 ` shli
  2016-08-03  0:03   ` NeilBrown
  2016-07-30 23:54 ` [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread shli
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: shli @ 2016-07-30 23:54 UTC (permalink / raw)
  To: linux-raid; +Cc: Shaohua Li, NeilBrown

From: Shaohua Li <shli@fb.com>

Changing the location changes a lot of things. Holding the lock to avoid race.
This makes the .quiesce called with mddev lock hold too.

Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/bitmap.c | 47 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 6fff794..13041ee 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
 static ssize_t
 location_store(struct mddev *mddev, const char *buf, size_t len)
 {
+	int rv;
 
+	rv = mddev_lock(mddev);
+	if (rv)
+		return rv;
 	if (mddev->pers) {
-		if (!mddev->pers->quiesce)
-			return -EBUSY;
-		if (mddev->recovery || mddev->sync_thread)
-			return -EBUSY;
+		if (!mddev->pers->quiesce) {
+			rv = -EBUSY;
+			goto out;
+		}
+		if (mddev->recovery || mddev->sync_thread) {
+			rv = -EBUSY;
+			goto out;
+		}
 	}
 
 	if (mddev->bitmap || mddev->bitmap_info.file ||
 	    mddev->bitmap_info.offset) {
 		/* bitmap already configured.  Only option is to clear it */
-		if (strncmp(buf, "none", 4) != 0)
-			return -EBUSY;
+		if (strncmp(buf, "none", 4) != 0) {
+			rv = -EBUSY;
+			goto out;
+		}
 		if (mddev->pers) {
 			mddev->pers->quiesce(mddev, 1);
 			bitmap_destroy(mddev);
@@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
 			/* nothing to be done */;
 		else if (strncmp(buf, "file:", 5) == 0) {
 			/* Not supported yet */
-			return -EINVAL;
+			rv = -EINVAL;
+			goto out;
 		} else {
-			int rv;
 			if (buf[0] == '+')
 				rv = kstrtoll(buf+1, 10, &offset);
 			else
 				rv = kstrtoll(buf, 10, &offset);
 			if (rv)
-				return rv;
-			if (offset == 0)
-				return -EINVAL;
+				goto out;
+			if (offset == 0) {
+				rv = -EINVAL;
+				goto out;
+			}
 			if (mddev->bitmap_info.external == 0 &&
 			    mddev->major_version == 0 &&
-			    offset != mddev->bitmap_info.default_offset)
-				return -EINVAL;
+			    offset != mddev->bitmap_info.default_offset) {
+				rv = -EINVAL;
+				goto out;
+			}
 			mddev->bitmap_info.offset = offset;
 			if (mddev->pers) {
 				struct bitmap *bitmap;
@@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
 				mddev->pers->quiesce(mddev, 0);
 				if (rv) {
 					bitmap_destroy(mddev);
-					return rv;
+					goto out;
 				}
 			}
 		}
@@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		md_wakeup_thread(mddev->thread);
 	}
+	rv = 0;
+out:
+	mddev_unlock(mddev);
+	if (rv)
+		return rv;
 	return len;
 }
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-07-30 23:54 [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync shli
  2016-07-30 23:54 ` [PATCH 2/3] MD: hold mddev lock to change bitmap location shli
@ 2016-07-30 23:54 ` shli
  2016-08-01  8:38   ` Guoqing Jiang
  2016-07-31  6:03 ` [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync yizhan
  2016-08-02 23:47 ` NeilBrown
  3 siblings, 1 reply; 21+ messages in thread
From: shli @ 2016-07-30 23:54 UTC (permalink / raw)
  To: linux-raid; +Cc: Shaohua Li, NeilBrown, Guoqing Jiang

From: Shaohua Li <shli@fb.com>

md-cluster receive thread calls .quiesce too, let it hold mddev lock.

Cc: NeilBrown <neilb@suse.com>
Cc: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md-cluster.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 41573f1..f420060 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -567,11 +567,13 @@ static void recv_daemon(struct md_thread *thread)
 	struct cluster_msg msg;
 	int ret;
 
+	mddev_lock_nointr(thread->mddev);
 	mutex_lock(&cinfo->recv_mutex);
 	/*get CR on Message*/
 	if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
 		pr_err("md/raid1:failed to get CR on MESSAGE\n");
 		mutex_unlock(&cinfo->recv_mutex);
+		mddev_unlock(thread->mddev);
 		return;
 	}
 
@@ -599,6 +601,7 @@ static void recv_daemon(struct md_thread *thread)
 	if (unlikely(ret != 0))
 		pr_info("unlock msg failed return %d\n", ret);
 	mutex_unlock(&cinfo->recv_mutex);
+	mddev_unlock(thread->mddev);
 }
 
 /* lock_token()
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-07-30 23:54 [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync shli
  2016-07-30 23:54 ` [PATCH 2/3] MD: hold mddev lock to change bitmap location shli
  2016-07-30 23:54 ` [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread shli
@ 2016-07-31  6:03 ` yizhan
  2016-08-02 23:47 ` NeilBrown
  3 siblings, 0 replies; 21+ messages in thread
From: yizhan @ 2016-07-31  6:03 UTC (permalink / raw)
  To: shli, linux-raid; +Cc: Shaohua Li, NeilBrown

I tested these patch and fixed the bug I reported before[1].

[1] WARNING: CPU: 4 PID: 10512 at drivers/md/raid5-cache.c:728 
r5l_do_reclaim+0x415/0x430 [raid456]

Thanks

Yi Zhang


On 07/31/2016 07:54 AM, shli@kernel.org wrote:
> From: Shaohua Li <shli@fb.com>
>
> .quiesce is called with mddev lock hold at most places. There are few
> exceptions. Calling .quesce without the lock hold could create races. For
> example, the .quesce of raid1 can't be recursively. The purpose of the patches
> is to fix a race in raid5-cache. The raid5-cache .quesce will write md
> superblock and should be called with mddev lock hold.
>
> Cc: NeilBrown <neilb@suse.com>
> Signed-off-by: Shaohua Li <shli@fb.com>
> ---
>   drivers/md/md.c | 2 ++
>   1 file changed, 2 insertions(+)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 2c3ab6f..0550445 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -7945,8 +7945,10 @@ void md_do_sync(struct md_thread *thread)
>   		 * region.
>   		 */
>   		if (mddev->bitmap) {
> +			mddev_lock_nointr(mddev);
>   			mddev->pers->quiesce(mddev, 1);
>   			mddev->pers->quiesce(mddev, 0);
> +			mddev_unlock(mddev);
>   		}
>   	}
>   


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-07-30 23:54 ` [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread shli
@ 2016-08-01  8:38   ` Guoqing Jiang
  2016-08-01 21:45     ` Shaohua Li
  0 siblings, 1 reply; 21+ messages in thread
From: Guoqing Jiang @ 2016-08-01  8:38 UTC (permalink / raw)
  To: shli, linux-raid; +Cc: Shaohua Li, NeilBrown

Hi,

On 07/31/2016 07:54 AM, shli@kernel.org wrote:
> From: Shaohua Li <shli@fb.com>
>
> md-cluster receive thread calls .quiesce too, let it hold mddev lock.

I'd suggest hold on for the patchset, I can find lock problem easily with
the patchset applied. Take a resyncing  clusteed raid1 as example.

md127_raid1 thread held reconfig_mutex then update sb, so it needs dlm
token lock. Meanwhile md127_resync thread got token lock and wants
EX on ack lock but recv_daemon can't release ack lock since recv_daemon
doesn't get reconfig_mutex.

etalinux135:~ # ps aux|grep md|grep D
root      2028  0.0  0.0      0     0 ?        D    16:24   0:00 
[md127_raid1]
root      2041  0.0  0.0      0     0 ?        D    16:24   0:00 
[md127_resync]
betalinux135:~ # cat /proc/2028/stack
[<ffffffffa05f2660>] metadata_update_start+0xa0/0xb0 [md_cluster]
[<ffffffffa06cb1ce>] md_update_sb.part.50+0x8e/0x810 [md_mod]
[<ffffffffa06ccb8c>] md_check_recovery+0x23c/0x4f0 [md_mod]
[<ffffffffa06f6312>] raid1d+0x42/0x7d0 [raid1]
[<ffffffffa06c6d10>] md_thread+0x130/0x150 [md_mod]
[<ffffffff810995ed>] kthread+0xbd/0xe0
[<ffffffff815e96bf>] ret_from_fork+0x3f/0x70
[<ffffffff81099530>] kthread+0x0/0xe0
[<ffffffffffffffff>] 0xffffffffffffffff
betalinux135:~ # cat /proc/2041/stack
[<ffffffffa05f24cb>] dlm_lock_sync+0x6b/0x80 [md_cluster]
[<ffffffffa05f2708>] __sendmsg+0x98/0x130 [md_cluster]
[<ffffffffa05f282d>] sendmsg+0x1d/0x30 [md_cluster]
[<ffffffffa05f2b31>] resync_info_update+0x81/0xb0 [md_cluster]
[<ffffffffa06f3ad7>] sync_request+0xa57/0xaf0 [raid1]
[<ffffffffa06ca2dd>] md_do_sync+0x90d/0xe80 [md_mod]
[<ffffffffa06c6d10>] md_thread+0x130/0x150 [md_mod]
[<ffffffff810995ed>] kthread+0xbd/0xe0
[<ffffffff815e96bf>] ret_from_fork+0x3f/0x70
[<ffffffff81099530>] kthread+0x0/0xe0
[<ffffffffffffffff>] 0xffffffffffffffff


Thanks,
Guoqing


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-08-01  8:38   ` Guoqing Jiang
@ 2016-08-01 21:45     ` Shaohua Li
  2016-08-02  9:52       ` Guoqing Jiang
  0 siblings, 1 reply; 21+ messages in thread
From: Shaohua Li @ 2016-08-01 21:45 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: linux-raid, NeilBrown

On Mon, Aug 01, 2016 at 04:38:59PM +0800, Guoqing Jiang wrote:
> Hi,
> 
> On 07/31/2016 07:54 AM, shli@kernel.org wrote:
> >From: Shaohua Li <shli@fb.com>
> >
> >md-cluster receive thread calls .quiesce too, let it hold mddev lock.
> 
> I'd suggest hold on for the patchset, I can find lock problem easily with
> the patchset applied. Take a resyncing  clusteed raid1 as example.
> 
> md127_raid1 thread held reconfig_mutex then update sb, so it needs dlm
> token lock. Meanwhile md127_resync thread got token lock and wants
> EX on ack lock but recv_daemon can't release ack lock since recv_daemon
> doesn't get reconfig_mutex.

Thansk, I'll drop this one. Other two patches are still safe for md-cluster,
right? I really hope to have consistent locking for .quiesce. For the
process_recvd_msg, I'm wondering what's protecting the datas? for example,
md-cluster uses md_find_rdev_nr_rcu, which access the disks list without
locking. Is there a race? Does it work if we move the mddev lock to
process_recvd_msg?

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-08-01 21:45     ` Shaohua Li
@ 2016-08-02  9:52       ` Guoqing Jiang
  2016-08-02 22:44         ` Shaohua Li
  2016-08-03  0:09         ` NeilBrown
  0 siblings, 2 replies; 21+ messages in thread
From: Guoqing Jiang @ 2016-08-02  9:52 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, NeilBrown



On 08/02/2016 05:45 AM, Shaohua Li wrote:
> On Mon, Aug 01, 2016 at 04:38:59PM +0800, Guoqing Jiang wrote:
>> Hi,
>>
>> On 07/31/2016 07:54 AM, shli@kernel.org wrote:
>>> From: Shaohua Li <shli@fb.com>
>>>
>>> md-cluster receive thread calls .quiesce too, let it hold mddev lock.
>> I'd suggest hold on for the patchset, I can find lock problem easily with
>> the patchset applied. Take a resyncing  clusteed raid1 as example.
>>
>> md127_raid1 thread held reconfig_mutex then update sb, so it needs dlm
>> token lock. Meanwhile md127_resync thread got token lock and wants
>> EX on ack lock but recv_daemon can't release ack lock since recv_daemon
>> doesn't get reconfig_mutex.
> Thansk, I'll drop this one. Other two patches are still safe for md-cluster,
> right?

 From the latest test, I can't find  lock issues with the first two patches,
but I doubt it would have side effect for the performance of resync.

> I really hope to have consistent locking for .quiesce. For the
> process_recvd_msg, I'm wondering what's protecting the datas? for example,
> md-cluster uses md_find_rdev_nr_rcu, which access the disks list without
> locking. Is there a race?

Yes, it should be protected by rcu lock, I will post a patch for it, thanks
for reminder.

> Does it work if we move the mddev lock to
> process_recvd_msg?

I tried that, but It still have lock issue, eg, when node B and C have 
status
as "resync=PENDING", then try to stop the resyncing array in node A.


Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-08-02  9:52       ` Guoqing Jiang
@ 2016-08-02 22:44         ` Shaohua Li
  2016-08-03  3:18           ` Guoqing Jiang
  2016-08-03  0:09         ` NeilBrown
  1 sibling, 1 reply; 21+ messages in thread
From: Shaohua Li @ 2016-08-02 22:44 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: linux-raid, NeilBrown

On Tue, Aug 02, 2016 at 05:52:41PM +0800, Guoqing Jiang wrote:
> 
> 
> On 08/02/2016 05:45 AM, Shaohua Li wrote:
> >On Mon, Aug 01, 2016 at 04:38:59PM +0800, Guoqing Jiang wrote:
> >>Hi,
> >>
> >>On 07/31/2016 07:54 AM, shli@kernel.org wrote:
> >>>From: Shaohua Li <shli@fb.com>
> >>>
> >>>md-cluster receive thread calls .quiesce too, let it hold mddev lock.
> >>I'd suggest hold on for the patchset, I can find lock problem easily with
> >>the patchset applied. Take a resyncing  clusteed raid1 as example.
> >>
> >>md127_raid1 thread held reconfig_mutex then update sb, so it needs dlm
> >>token lock. Meanwhile md127_resync thread got token lock and wants
> >>EX on ack lock but recv_daemon can't release ack lock since recv_daemon
> >>doesn't get reconfig_mutex.
> >Thansk, I'll drop this one. Other two patches are still safe for md-cluster,
> >right?
> 
> From the latest test, I can't find  lock issues with the first two patches,
> but I doubt it would have side effect for the performance of resync.

That's not need to be worried. The .quiesce() call is way heavier than
hold/release the mutex.

> >I really hope to have consistent locking for .quiesce. For the
> >process_recvd_msg, I'm wondering what's protecting the datas? for example,
> >md-cluster uses md_find_rdev_nr_rcu, which access the disks list without
> >locking. Is there a race?
> 
> Yes, it should be protected by rcu lock, I will post a patch for it, thanks
> for reminder.
> 
> >Does it work if we move the mddev lock to
> >process_recvd_msg?
> 
> I tried that, but It still have lock issue, eg, when node B and C have
> status
> as "resync=PENDING", then try to stop the resyncing array in node A.

can you elaborate it?

For the raid5-cache issue, ignoring the md-cluster .quiesce() call is fine
currently as we don't support raid5 cluster. We probably should add another
parameter for .quiesce to indicate if the mddev lock is hold in the future.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-07-30 23:54 [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync shli
                   ` (2 preceding siblings ...)
  2016-07-31  6:03 ` [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync yizhan
@ 2016-08-02 23:47 ` NeilBrown
  2016-08-04  3:16   ` NeilBrown
  3 siblings, 1 reply; 21+ messages in thread
From: NeilBrown @ 2016-08-02 23:47 UTC (permalink / raw)
  To: shli, linux-raid; +Cc: Shaohua Li

[-- Attachment #1: Type: text/plain, Size: 1465 bytes --]

On Sun, Jul 31 2016, shli@kernel.org wrote:

> From: Shaohua Li <shli@fb.com>
>
> .quiesce is called with mddev lock hold at most places. There are few
> exceptions. Calling .quesce without the lock hold could create races. For
> example, the .quesce of raid1 can't be recursively. The purpose of the patches
> is to fix a race in raid5-cache. The raid5-cache .quesce will write md
> superblock and should be called with mddev lock hold.
>
> Cc: NeilBrown <neilb@suse.com>
> Signed-off-by: Shaohua Li <shli@fb.com>

Acked-by: NeilBrown <neilb@suse.com>

This should be safe but I'm not sure I really like it.
The raid1 quiesce could be changed so that it can be called recursively.
The raid5-cache situation would be harder to get right and maybe this is
the best solution... It's just that 'quiesce' should be a fairly
light-weight operation, just waiting for pending requests to flush.  It
shouldn't really *need* a lock.

NeilBrown


> ---
>  drivers/md/md.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 2c3ab6f..0550445 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -7945,8 +7945,10 @@ void md_do_sync(struct md_thread *thread)
>  		 * region.
>  		 */
>  		if (mddev->bitmap) {
> +			mddev_lock_nointr(mddev);
>  			mddev->pers->quiesce(mddev, 1);
>  			mddev->pers->quiesce(mddev, 0);
> +			mddev_unlock(mddev);
>  		}
>  	}
>  
> -- 
> 2.7.4

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/3] MD: hold mddev lock to change bitmap location
  2016-07-30 23:54 ` [PATCH 2/3] MD: hold mddev lock to change bitmap location shli
@ 2016-08-03  0:03   ` NeilBrown
  0 siblings, 0 replies; 21+ messages in thread
From: NeilBrown @ 2016-08-03  0:03 UTC (permalink / raw)
  To: shli, linux-raid; +Cc: Shaohua Li

[-- Attachment #1: Type: text/plain, Size: 3257 bytes --]

On Sun, Jul 31 2016, shli@kernel.org wrote:

> From: Shaohua Li <shli@fb.com>
>
> Changing the location changes a lot of things. Holding the lock to avoid race.
> This makes the .quiesce called with mddev lock hold too.
>
> Cc: NeilBrown <neilb@suse.com>
> Signed-off-by: Shaohua Li <shli@fb.com>

Acked-by: NeilBrown <neilb@suse.com>

Yes, I think this is justified.  As you say, location_store is a fairly
significant change.

Thanks,
NeilBrown

> ---
>  drivers/md/bitmap.c | 47 +++++++++++++++++++++++++++++++++--------------
>  1 file changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
> index 6fff794..13041ee 100644
> --- a/drivers/md/bitmap.c
> +++ b/drivers/md/bitmap.c
> @@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
>  static ssize_t
>  location_store(struct mddev *mddev, const char *buf, size_t len)
>  {
> +	int rv;
>  
> +	rv = mddev_lock(mddev);
> +	if (rv)
> +		return rv;
>  	if (mddev->pers) {
> -		if (!mddev->pers->quiesce)
> -			return -EBUSY;
> -		if (mddev->recovery || mddev->sync_thread)
> -			return -EBUSY;
> +		if (!mddev->pers->quiesce) {
> +			rv = -EBUSY;
> +			goto out;
> +		}
> +		if (mddev->recovery || mddev->sync_thread) {
> +			rv = -EBUSY;
> +			goto out;
> +		}
>  	}
>  
>  	if (mddev->bitmap || mddev->bitmap_info.file ||
>  	    mddev->bitmap_info.offset) {
>  		/* bitmap already configured.  Only option is to clear it */
> -		if (strncmp(buf, "none", 4) != 0)
> -			return -EBUSY;
> +		if (strncmp(buf, "none", 4) != 0) {
> +			rv = -EBUSY;
> +			goto out;
> +		}
>  		if (mddev->pers) {
>  			mddev->pers->quiesce(mddev, 1);
>  			bitmap_destroy(mddev);
> @@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
>  			/* nothing to be done */;
>  		else if (strncmp(buf, "file:", 5) == 0) {
>  			/* Not supported yet */
> -			return -EINVAL;
> +			rv = -EINVAL;
> +			goto out;
>  		} else {
> -			int rv;
>  			if (buf[0] == '+')
>  				rv = kstrtoll(buf+1, 10, &offset);
>  			else
>  				rv = kstrtoll(buf, 10, &offset);
>  			if (rv)
> -				return rv;
> -			if (offset == 0)
> -				return -EINVAL;
> +				goto out;
> +			if (offset == 0) {
> +				rv = -EINVAL;
> +				goto out;
> +			}
>  			if (mddev->bitmap_info.external == 0 &&
>  			    mddev->major_version == 0 &&
> -			    offset != mddev->bitmap_info.default_offset)
> -				return -EINVAL;
> +			    offset != mddev->bitmap_info.default_offset) {
> +				rv = -EINVAL;
> +				goto out;
> +			}
>  			mddev->bitmap_info.offset = offset;
>  			if (mddev->pers) {
>  				struct bitmap *bitmap;
> @@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
>  				mddev->pers->quiesce(mddev, 0);
>  				if (rv) {
>  					bitmap_destroy(mddev);
> -					return rv;
> +					goto out;
>  				}
>  			}
>  		}
> @@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
>  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  		md_wakeup_thread(mddev->thread);
>  	}
> +	rv = 0;
> +out:
> +	mddev_unlock(mddev);
> +	if (rv)
> +		return rv;
>  	return len;
>  }
>  
> -- 
> 2.7.4

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-08-02  9:52       ` Guoqing Jiang
  2016-08-02 22:44         ` Shaohua Li
@ 2016-08-03  0:09         ` NeilBrown
  2016-08-03  3:42           ` Guoqing Jiang
  1 sibling, 1 reply; 21+ messages in thread
From: NeilBrown @ 2016-08-03  0:09 UTC (permalink / raw)
  To: Guoqing Jiang, Shaohua Li; +Cc: linux-raid

[-- Attachment #1: Type: text/plain, Size: 416 bytes --]

On Tue, Aug 02 2016, Guoqing Jiang wrote:
>
>> Does it work if we move the mddev lock to
>> process_recvd_msg?
>
> I tried that, but It still have lock issue, eg, when node B and C have 
> status
> as "resync=PENDING", then try to stop the resyncing array in node A.
>

Maybe the reconfig_mutex locking needs to go down in
process_suspend_info() ?? That is the only part that calls quiesce.

NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-08-02 22:44         ` Shaohua Li
@ 2016-08-03  3:18           ` Guoqing Jiang
  0 siblings, 0 replies; 21+ messages in thread
From: Guoqing Jiang @ 2016-08-03  3:18 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, NeilBrown



On 08/03/2016 06:44 AM, Shaohua Li wrote:
> On Tue, Aug 02, 2016 at 05:52:41PM +0800, Guoqing Jiang wrote:
>>
>> On 08/02/2016 05:45 AM, Shaohua Li wrote:
>>> On Mon, Aug 01, 2016 at 04:38:59PM +0800, Guoqing Jiang wrote:
>>>> Hi,
>>>>
>>>> On 07/31/2016 07:54 AM, shli@kernel.org wrote:
>>>>> From: Shaohua Li <shli@fb.com>
>>>>>
>>>>> md-cluster receive thread calls .quiesce too, let it hold mddev lock.
>>>> I'd suggest hold on for the patchset, I can find lock problem easily with
>>>> the patchset applied. Take a resyncing  clusteed raid1 as example.
>>>>
>>>> md127_raid1 thread held reconfig_mutex then update sb, so it needs dlm
>>>> token lock. Meanwhile md127_resync thread got token lock and wants
>>>> EX on ack lock but recv_daemon can't release ack lock since recv_daemon
>>>> doesn't get reconfig_mutex.
>>> Thansk, I'll drop this one. Other two patches are still safe for md-cluster,
>>> right?
>>  From the latest test, I can't find  lock issues with the first two patches,
>> but I doubt it would have side effect for the performance of resync.
> That's not need to be worried. The .quiesce() call is way heavier than
> hold/release the mutex.
>
>>> I really hope to have consistent locking for .quiesce. For the
>>> process_recvd_msg, I'm wondering what's protecting the datas? for example,
>>> md-cluster uses md_find_rdev_nr_rcu, which access the disks list without
>>> locking. Is there a race?
>> Yes, it should be protected by rcu lock, I will post a patch for it, thanks
>> for reminder.
>>
>>> Does it work if we move the mddev lock to
>>> process_recvd_msg?
>> I tried that, but It still have lock issue, eg, when node B and C have
>> status
>> as "resync=PENDING", then try to stop the resyncing array in node A.
> can you elaborate it?

I am not lucky enough to do the same test as yesterday, but I even
can't assemble clustered raid1 in other nodes.

1. node135: mdadm --create md0 --bitmap=clustered --raid-devices=2 
--level=mirror /dev/vdb /dev/vdc
2. Then node240 and node244 try to assemble it, but both of them would hang.

betalinux135:~ # cat /proc/mdstat
Personalities : [raid1]
md127 : active raid1 vdc[1] vdb[0]
       2095104 blocks super 1.2 [2/2] [UU]
       [=>...................]  resync =  6.2% (130816/2095104) 
finish=1.5min speed=21802K/sec
       bitmap: 1/1 pages [4KB], 65536KB chunk

unused devices: <none>
betalinux135:~ # ssh betalinux240
Last login: Wed Aug  3 11:11:47 2016 from 192.168.100.1
betalinux240:~ # ps aux|grep md|grep D
root      1901  0.0  0.2  20896  2592 pts/0    D+   11:12   0:00 mdadm 
--assemble md0 /dev/vdb /dev/vdc
root      1914  0.0  0.2  19852  2032 ?        S    11:12   0:00 
/sbin/mdadm --incremental --export /dev/vdb --offroot ${DEVLINKS}
root      1915  0.0  0.1  19852  1940 ?        S    11:12   0:00 
/sbin/mdadm --incremental --export /dev/vdc --offroot ${DEVLINKS}
betalinux240:~ # cat /proc/1901/stack
[<ffffffffa069d4cb>] dlm_lock_sync+0x6b/0x80 [md_cluster]
[<ffffffffa069e7e6>] join+0x286/0x430 [md_cluster]
[<ffffffffa068a9f4>] bitmap_create+0x5f4/0x980 [md_mod]
[<ffffffffa0682f35>] md_run+0x595/0xa60 [md_mod]
[<ffffffffa06834cf>] do_md_run+0xf/0xb0 [md_mod]
[<ffffffffa0686781>] md_ioctl+0x11b1/0x1680 [md_mod]
[<ffffffff812ed158>] blkdev_ioctl+0x258/0x920
[<ffffffff8122f81d>] block_ioctl+0x3d/0x40
[<ffffffff8120ac0d>] do_vfs_ioctl+0x2cd/0x4a0
[<ffffffff8120ae54>] SyS_ioctl+0x74/0x80
[<ffffffff815e936e>] entry_SYSCALL_64_fastpath+0x12/0x6d
[<ffffffffffffffff>] 0xffffffffffffffff
betalinux240:~ # exit
logout
Connection to betalinux240 closed.
betalinux135:~ # ssh betalinux244
Last login: Wed Aug  3 11:11:49 2016 from 192.168.100.1
betalinux244:~ # ps aux|grep md|grep D
root      1903  0.0  0.2  20896  2660 pts/0    D+   11:12   0:00 mdadm 
--assemble md0 /dev/vdb /dev/vdc
root      1923  0.0  0.2  19852  2112 ?        S    11:12   0:00 
/sbin/mdadm --incremental --export /dev/vdc --offroot ${DEVLINKS}
root      1928  0.0  0.2  19852  2092 ?        S    11:12   0:00 
/sbin/mdadm --incremental --export /dev/vdb --offroot ${DEVLINKS}
root      1938  0.0  0.0      0     0 ?        D    11:12   0:00 
[md0_cluster_rec]
betalinux244:~ # cat /proc/1903/stack
[<ffffffffa06904cb>] dlm_lock_sync+0x6b/0x80 [md_cluster]
[<ffffffffa06904fb>] lock_token+0x1b/0x50 [md_cluster]
[<ffffffffa06905fd>] metadata_update_start+0x3d/0xb0 [md_cluster]
[<ffffffffa06751ee>] md_update_sb.part.50+0x8e/0x810 [md_mod]
[<ffffffffa067646e>] md_allow_write+0x6e/0xc0 [md_mod]
[<ffffffffa0676505>] do_md_run+0x45/0xb0 [md_mod]
[<ffffffffa0679781>] md_ioctl+0x11b1/0x1680 [md_mod]
[<ffffffff812ed158>] blkdev_ioctl+0x258/0x920
[<ffffffff8122f81d>] block_ioctl+0x3d/0x40
[<ffffffff8120ac0d>] do_vfs_ioctl+0x2cd/0x4a0
[<ffffffff8120ae54>] SyS_ioctl+0x74/0x80
[<ffffffff815e936e>] entry_SYSCALL_64_fastpath+0x12/0x6d
[<ffffffffffffffff>] 0xffffffffffffffff
betalinux244:~ # cat /proc/1938/stack
[<ffffffffa0691d90>] recv_daemon+0xc0/0x4a0 [md_cluster]
[<ffffffffa0670d30>] md_thread+0x130/0x150 [md_mod]
[<ffffffff810995ed>] kthread+0xbd/0xe0
[<ffffffff815e96bf>] ret_from_fork+0x3f/0x70
[<ffffffff81099530>] kthread+0x0/0xe0
[<ffffffffffffffff>] 0xffffffffffffffff

> For the raid5-cache issue, ignoring the md-cluster .quiesce() call is fine
> currently as we don't support raid5 cluster. We probably should add another
> parameter for .quiesce to indicate if the mddev lock is hold in the future.

Pls update me with the change in future, since it may have huge influence
for md-cluster.

Thanks,
GUoqing


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread
  2016-08-03  0:09         ` NeilBrown
@ 2016-08-03  3:42           ` Guoqing Jiang
  0 siblings, 0 replies; 21+ messages in thread
From: Guoqing Jiang @ 2016-08-03  3:42 UTC (permalink / raw)
  To: NeilBrown, Guoqing Jiang, Shaohua Li; +Cc: linux-raid



On 08/03/2016 08:09 AM, NeilBrown wrote:
> On Tue, Aug 02 2016, Guoqing Jiang wrote:
>>> Does it work if we move the mddev lock to
>>> process_recvd_msg?
>> I tried that, but It still have lock issue, eg, when node B and C have
>> status
>> as "resync=PENDING", then try to stop the resyncing array in node A.
>>
> Maybe the reconfig_mutex locking needs to go down in
> process_suspend_info() ?? That is the only part that calls quiesce.

Yes, but with this change, I still have lock issue for below steps:
1. create a resyning array in nodeA
2. Assemble the array in node B and nodeC

I can see A can't continue perform resync while C can't assemble
array successfully.

Thanks,
Guoqing


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-02 23:47 ` NeilBrown
@ 2016-08-04  3:16   ` NeilBrown
  2016-08-06  4:14     ` Shaohua Li
  0 siblings, 1 reply; 21+ messages in thread
From: NeilBrown @ 2016-08-04  3:16 UTC (permalink / raw)
  To: shli, linux-raid; +Cc: Shaohua Li

[-- Attachment #1: Type: text/plain, Size: 1656 bytes --]

On Wed, Aug 03 2016, NeilBrown wrote:

> [ Unknown signature status ]
> On Sun, Jul 31 2016, shli@kernel.org wrote:
>
>> From: Shaohua Li <shli@fb.com>
>>
>> .quiesce is called with mddev lock hold at most places. There are few
>> exceptions. Calling .quesce without the lock hold could create races. For
>> example, the .quesce of raid1 can't be recursively. The purpose of the patches
>> is to fix a race in raid5-cache. The raid5-cache .quesce will write md
>> superblock and should be called with mddev lock hold.
>>
>> Cc: NeilBrown <neilb@suse.com>
>> Signed-off-by: Shaohua Li <shli@fb.com>
>
> Acked-by: NeilBrown <neilb@suse.com>
>
> This should be safe but I'm not sure I really like it.
> The raid1 quiesce could be changed so that it can be called recursively.
> The raid5-cache situation would be harder to get right and maybe this is
> the best solution... It's just that 'quiesce' should be a fairly
> light-weight operation, just waiting for pending requests to flush.  It
> shouldn't really *need* a lock.

Actually, the more I think about this, the less I like it.

I would much rather make .quiesce lighter weight so that no locking was
needed.

For r5l_quiesce, that probable means removed the "r5l_do_reclaim()".
Stopping and restarting the reclaim thread seems reasonable, but calling
r5l_do_reclaim() should not be needed.  It should be done periodically
by the thread, and at 'stop' time, but otherwise isn't needed.
You would need to hold some mutex while calling md_register_thread, but
that could be probably be log->io_mutex, or maybe even some other new
mutex

Could you explore following that path instead?

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-04  3:16   ` NeilBrown
@ 2016-08-06  4:14     ` Shaohua Li
  2016-08-12  0:04       ` NeilBrown
  0 siblings, 1 reply; 21+ messages in thread
From: Shaohua Li @ 2016-08-06  4:14 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid, Shaohua Li

On Thu, Aug 04, 2016 at 01:16:49PM +1000, Neil Brown wrote:
> On Wed, Aug 03 2016, NeilBrown wrote:
> 
> > [ Unknown signature status ]
> > On Sun, Jul 31 2016, shli@kernel.org wrote:
> >
> >> From: Shaohua Li <shli@fb.com>
> >>
> >> .quiesce is called with mddev lock hold at most places. There are few
> >> exceptions. Calling .quesce without the lock hold could create races. For
> >> example, the .quesce of raid1 can't be recursively. The purpose of the patches
> >> is to fix a race in raid5-cache. The raid5-cache .quesce will write md
> >> superblock and should be called with mddev lock hold.
> >>
> >> Cc: NeilBrown <neilb@suse.com>
> >> Signed-off-by: Shaohua Li <shli@fb.com>
> >
> > Acked-by: NeilBrown <neilb@suse.com>
> >
> > This should be safe but I'm not sure I really like it.
> > The raid1 quiesce could be changed so that it can be called recursively.
> > The raid5-cache situation would be harder to get right and maybe this is
> > the best solution... It's just that 'quiesce' should be a fairly
> > light-weight operation, just waiting for pending requests to flush.  It
> > shouldn't really *need* a lock.
> 
> Actually, the more I think about this, the less I like it.
> 
> I would much rather make .quiesce lighter weight so that no locking was
> needed.
> 
> For r5l_quiesce, that probable means removed the "r5l_do_reclaim()".
> Stopping and restarting the reclaim thread seems reasonable, but calling
> r5l_do_reclaim() should not be needed.  It should be done periodically
> by the thread, and at 'stop' time, but otherwise isn't needed.
> You would need to hold some mutex while calling md_register_thread, but
> that could be probably be log->io_mutex, or maybe even some other new
> mutex

We will have the same deadlock issue with just stopping/restarting the reclaim
thread. As stopping the thread will wait for the thread, which probably is
doing r5l_do_reclaim and writting the superblock. Since we are writting the
superblock, we must hold the reconfig_mutex.

Letting raid5_quiesce call r5l_do_reclaim gives us a clean log. Just
stop/restart the reclaim thread can't guarantee this, as it's possible some
space aren't reclaimed yet. A clean log will simplify a lot of things, for
example we change the layout of the array. The log doesn't need to remember
which part is for the old layout and which part is the new layout.

I think we can add a new parameter for .quiesce to indicate if reconfig_mutex
is hold. raid5_quiesce can check the parameter and hold reconfig_mutex if
necessary.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-06  4:14     ` Shaohua Li
@ 2016-08-12  0:04       ` NeilBrown
  2016-08-17  1:28         ` Shaohua Li
  0 siblings, 1 reply; 21+ messages in thread
From: NeilBrown @ 2016-08-12  0:04 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, Shaohua Li

[-- Attachment #1: Type: text/plain, Size: 3748 bytes --]

On Sat, Aug 06 2016, Shaohua Li wrote:

> On Thu, Aug 04, 2016 at 01:16:49PM +1000, Neil Brown wrote:
>> On Wed, Aug 03 2016, NeilBrown wrote:
>> 
>> > [ Unknown signature status ]
>> > On Sun, Jul 31 2016, shli@kernel.org wrote:
>> >
>> >> From: Shaohua Li <shli@fb.com>
>> >>
>> >> .quiesce is called with mddev lock hold at most places. There are few
>> >> exceptions. Calling .quesce without the lock hold could create races. For
>> >> example, the .quesce of raid1 can't be recursively. The purpose of the patches
>> >> is to fix a race in raid5-cache. The raid5-cache .quesce will write md
>> >> superblock and should be called with mddev lock hold.
>> >>
>> >> Cc: NeilBrown <neilb@suse.com>
>> >> Signed-off-by: Shaohua Li <shli@fb.com>
>> >
>> > Acked-by: NeilBrown <neilb@suse.com>
>> >
>> > This should be safe but I'm not sure I really like it.
>> > The raid1 quiesce could be changed so that it can be called recursively.
>> > The raid5-cache situation would be harder to get right and maybe this is
>> > the best solution... It's just that 'quiesce' should be a fairly
>> > light-weight operation, just waiting for pending requests to flush.  It
>> > shouldn't really *need* a lock.
>> 
>> Actually, the more I think about this, the less I like it.
>> 
>> I would much rather make .quiesce lighter weight so that no locking was
>> needed.
>> 
>> For r5l_quiesce, that probable means removed the "r5l_do_reclaim()".
>> Stopping and restarting the reclaim thread seems reasonable, but calling
>> r5l_do_reclaim() should not be needed.  It should be done periodically
>> by the thread, and at 'stop' time, but otherwise isn't needed.
>> You would need to hold some mutex while calling md_register_thread, but
>> that could be probably be log->io_mutex, or maybe even some other new
>> mutex
>
> We will have the same deadlock issue with just stopping/restarting the reclaim
> thread. As stopping the thread will wait for the thread, which probably is
> doing r5l_do_reclaim and writting the superblock. Since we are writting the
> superblock, we must hold the reconfig_mutex.

When you say "writing the superblock" you presumably mean "blocked in
r5l_write_super_and_discard_space(), waiting for  MD_CHANGE_PENDING to
be cleared" ??
With a bit of care you could wait for MD_CHANGE_PENDING to clear, or for
->quiesce to be set, and then exit gracefully.

>
> Letting raid5_quiesce call r5l_do_reclaim gives us a clean log. Just
> stop/restart the reclaim thread can't guarantee this, as it's possible some
> space aren't reclaimed yet. A clean log will simplify a lot of things, for
> example we change the layout of the array. The log doesn't need to remember
> which part is for the old layout and which part is the new layout.

I really think you are putting too much functionality into quiesce.
When we change the shape of the array, we do much more than just
quiesce it.  We also call check_reshape and start_reshape etc.
They are called with reconfig_mutex held and it would be perfectly
appropriate to finish of the r5l_do_reclaim() work in there.

>
> I think we can add a new parameter for .quiesce to indicate if reconfig_mutex
> is hold. raid5_quiesce can check the parameter and hold reconfig_mutex if
> necessary.

Adding a new parameter because it happens to be convenient in one case
is not necessarily a good idea.  It is often a sign that the interface
isn't well designed, or isn't well understood, or is being used poorly.

I really really don't think ->quiesce() should care about whether
reconfig_mutex is held.  All it should do is drain all IO and stop new
IO so that other threads can do unusually things in race-free ways.

NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-12  0:04       ` NeilBrown
@ 2016-08-17  1:28         ` Shaohua Li
  2016-08-24  4:49           ` NeilBrown
  0 siblings, 1 reply; 21+ messages in thread
From: Shaohua Li @ 2016-08-17  1:28 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid, Shaohua Li

On Fri, Aug 12, 2016 at 10:04:05AM +1000, Neil Brown wrote:
> On Sat, Aug 06 2016, Shaohua Li wrote:
> 
> > On Thu, Aug 04, 2016 at 01:16:49PM +1000, Neil Brown wrote:
> >> On Wed, Aug 03 2016, NeilBrown wrote:
> >> 
> >> > [ Unknown signature status ]
> >> > On Sun, Jul 31 2016, shli@kernel.org wrote:
> >> >
> >> >> From: Shaohua Li <shli@fb.com>
> >> >>
> >> >> .quiesce is called with mddev lock hold at most places. There are few
> >> >> exceptions. Calling .quesce without the lock hold could create races. For
> >> >> example, the .quesce of raid1 can't be recursively. The purpose of the patches
> >> >> is to fix a race in raid5-cache. The raid5-cache .quesce will write md
> >> >> superblock and should be called with mddev lock hold.
> >> >>
> >> >> Cc: NeilBrown <neilb@suse.com>
> >> >> Signed-off-by: Shaohua Li <shli@fb.com>
> >> >
> >> > Acked-by: NeilBrown <neilb@suse.com>
> >> >
> >> > This should be safe but I'm not sure I really like it.
> >> > The raid1 quiesce could be changed so that it can be called recursively.
> >> > The raid5-cache situation would be harder to get right and maybe this is
> >> > the best solution... It's just that 'quiesce' should be a fairly
> >> > light-weight operation, just waiting for pending requests to flush.  It
> >> > shouldn't really *need* a lock.
> >> 
> >> Actually, the more I think about this, the less I like it.
> >> 
> >> I would much rather make .quiesce lighter weight so that no locking was
> >> needed.
> >> 
> >> For r5l_quiesce, that probable means removed the "r5l_do_reclaim()".
> >> Stopping and restarting the reclaim thread seems reasonable, but calling
> >> r5l_do_reclaim() should not be needed.  It should be done periodically
> >> by the thread, and at 'stop' time, but otherwise isn't needed.
> >> You would need to hold some mutex while calling md_register_thread, but
> >> that could be probably be log->io_mutex, or maybe even some other new
> >> mutex
> >
> > We will have the same deadlock issue with just stopping/restarting the reclaim
> > thread. As stopping the thread will wait for the thread, which probably is
> > doing r5l_do_reclaim and writting the superblock. Since we are writting the
> > superblock, we must hold the reconfig_mutex.
> 
> When you say "writing the superblock" you presumably mean "blocked in
> r5l_write_super_and_discard_space(), waiting for  MD_CHANGE_PENDING to
> be cleared" ??
right
> With a bit of care you could wait for MD_CHANGE_PENDING to clear, or for
> ->quiesce to be set, and then exit gracefully.

Can you give details about this please? .quiesce is called with reconfig_mutex
hold, so the MD_CHANGE_PENDING will never get cleared.

> >
> > Letting raid5_quiesce call r5l_do_reclaim gives us a clean log. Just
> > stop/restart the reclaim thread can't guarantee this, as it's possible some
> > space aren't reclaimed yet. A clean log will simplify a lot of things, for
> > example we change the layout of the array. The log doesn't need to remember
> > which part is for the old layout and which part is the new layout.
> 
> I really think you are putting too much functionality into quiesce.
> When we change the shape of the array, we do much more than just
> quiesce it.  We also call check_reshape and start_reshape etc.
> They are called with reconfig_mutex held and it would be perfectly
> appropriate to finish of the r5l_do_reclaim() work in there.

This makes sense. But I think we don't need worry 'finish of the
r5l_do_reclaim()' does too much things. In most cases, stopping the reclaim
thread will already finish all reclaim.

> >
> > I think we can add a new parameter for .quiesce to indicate if reconfig_mutex
> > is hold. raid5_quiesce can check the parameter and hold reconfig_mutex if
> > necessary.
> 
> Adding a new parameter because it happens to be convenient in one case
> is not necessarily a good idea.  It is often a sign that the interface
> isn't well designed, or isn't well understood, or is being used poorly.
> 
> I really really don't think ->quiesce() should care about whether
> reconfig_mutex is held.  All it should do is drain all IO and stop new
> IO so that other threads can do unusually things in race-free ways.

I agree this isn't a good interface, but I don't have a better solution for
this issue. Ingore reshape now. It's possible .quiesce and reclaim thread could
deadlock. One thread hold reconfig mutex and call raid5_quiesce(), which will
wait for IO finish. reclaim thread write super (wait for reconfig mutex), free
log space and then IO write can finish. So the first thread hold reconfig mutex
and wait reclaim thread to finish IO, while reclaim thread waits for reconfig
mutex.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-17  1:28         ` Shaohua Li
@ 2016-08-24  4:49           ` NeilBrown
  2016-08-24  5:25             ` Shaohua Li
  0 siblings, 1 reply; 21+ messages in thread
From: NeilBrown @ 2016-08-24  4:49 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, Shaohua Li

[-- Attachment #1: Type: text/plain, Size: 2244 bytes --]

On Wed, Aug 17 2016, Shaohua Li wrote:
>> >
>> > We will have the same deadlock issue with just stopping/restarting the reclaim
>> > thread. As stopping the thread will wait for the thread, which probably is
>> > doing r5l_do_reclaim and writting the superblock. Since we are writting the
>> > superblock, we must hold the reconfig_mutex.
>> 
>> When you say "writing the superblock" you presumably mean "blocked in
>> r5l_write_super_and_discard_space(), waiting for  MD_CHANGE_PENDING to
>> be cleared" ??
> right
>> With a bit of care you could wait for MD_CHANGE_PENDING to clear, or for
>> ->quiesce to be set, and then exit gracefully.
>
> Can you give details about this please? .quiesce is called with reconfig_mutex
> hold, so the MD_CHANGE_PENDING will never get cleared.

raid5_quiesce(mddev, 1) sets conf->quiesce and then calls r5l_quiesce().

r5l_quiesce() tells the reclaim_thread to exit and waits for it to do so.

But the reclaim thread might be in
   r5l_do_reclaim() -> r5l_write_super_and_discard_space()
waiting for MD_CHANGE_PENDING to clear.  That will only get cleared when
the main thread can get the reconfig_mutex, which the thread calling
raid5_quiesce() might hold.  So we get a deadlock.

My suggestion is to change r5l_write_super_and_discard_space() so that
it waits for *either* MD_CHANGE_PENDING to be clear, or conf->quiesce
to be set.  That will avoid the deadlock.

Whatever thread called raid5_quiesce() will now be in control of the
array without any async IO going on.  If it needs the metadata to be
sync, it can do that itself.  If not, then it doesn't really matter that
r5l_write_super_and_discard_space() didn't wait.

r5l_write_super_and_discard_space() shouldn't call discard if the
superblock write didn't complete, and probably r5l_do_reclaim()
shouldn't update last_checkpoint and last_cp_seq in that case.
This is what I mean by "with a bit of care" and "exit gracefully".
Maybe I should have said "abort cleanly".  The goal is to get the thread
to exit.  It doesn't need to complete what it was doing, it just needs
to make sure that it leaves things in a tidy state so that when it
starts up again, it can pick up where it left off.

NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-24  4:49           ` NeilBrown
@ 2016-08-24  5:25             ` Shaohua Li
  2016-08-25  4:59               ` NeilBrown
  0 siblings, 1 reply; 21+ messages in thread
From: Shaohua Li @ 2016-08-24  5:25 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid

On Wed, Aug 24, 2016 at 02:49:57PM +1000, Neil Brown wrote:
> On Wed, Aug 17 2016, Shaohua Li wrote:
> >> >
> >> > We will have the same deadlock issue with just stopping/restarting the reclaim
> >> > thread. As stopping the thread will wait for the thread, which probably is
> >> > doing r5l_do_reclaim and writting the superblock. Since we are writting the
> >> > superblock, we must hold the reconfig_mutex.
> >> 
> >> When you say "writing the superblock" you presumably mean "blocked in
> >> r5l_write_super_and_discard_space(), waiting for  MD_CHANGE_PENDING to
> >> be cleared" ??
> > right
> >> With a bit of care you could wait for MD_CHANGE_PENDING to clear, or for
> >> ->quiesce to be set, and then exit gracefully.
> >
> > Can you give details about this please? .quiesce is called with reconfig_mutex
> > hold, so the MD_CHANGE_PENDING will never get cleared.
> 
> raid5_quiesce(mddev, 1) sets conf->quiesce and then calls r5l_quiesce().
> 
> r5l_quiesce() tells the reclaim_thread to exit and waits for it to do so.
> 
> But the reclaim thread might be in
>    r5l_do_reclaim() -> r5l_write_super_and_discard_space()
> waiting for MD_CHANGE_PENDING to clear.  That will only get cleared when
> the main thread can get the reconfig_mutex, which the thread calling
> raid5_quiesce() might hold.  So we get a deadlock.
> 
> My suggestion is to change r5l_write_super_and_discard_space() so that
> it waits for *either* MD_CHANGE_PENDING to be clear, or conf->quiesce
> to be set.  That will avoid the deadlock.
> 
> Whatever thread called raid5_quiesce() will now be in control of the
> array without any async IO going on.  If it needs the metadata to be
> sync, it can do that itself.  If not, then it doesn't really matter that
> r5l_write_super_and_discard_space() didn't wait.

I'm afraid waiting conf->quiesce set isn't safe. The reason to wait for
superblock write isn't because of async IO. discard could zero data, so before
we do discard, we must make sure superblock points to correct log tail,
otherwise recovery will not work. This is the reason we wait for superblock
write.

> r5l_write_super_and_discard_space() shouldn't call discard if the
> superblock write didn't complete, and probably r5l_do_reclaim()
> shouldn't update last_checkpoint and last_cp_seq in that case.
> This is what I mean by "with a bit of care" and "exit gracefully".
> Maybe I should have said "abort cleanly".  The goal is to get the thread
> to exit.  It doesn't need to complete what it was doing, it just needs
> to make sure that it leaves things in a tidy state so that when it
> starts up again, it can pick up where it left off.

Agree, we could ignore discard sometime, which happens occasionally, so impact
is little. I tested something like below recently. Assume this is the solution
we agree on?


diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 5504ce2..cd34e66 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -96,7 +96,6 @@ struct r5l_log {
 	spinlock_t no_space_stripes_lock;
 
 	bool need_cache_flush;
-	bool in_teardown;
 };
 
 /*
@@ -703,32 +702,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 		return;
 
 	mddev = log->rdev->mddev;
+
 	/*
-	 * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and
-	 * wait for this thread to finish. This thread waits for
-	 * MD_CHANGE_PENDING clear, which is supposed to be done in
-	 * md_check_recovery(). md_check_recovery() tries to get
-	 * reconfig_mutex. Since r5l_quiesce already holds the mutex,
-	 * md_check_recovery() fails, so the PENDING never get cleared. The
-	 * in_teardown check workaround this issue.
+	 * Discard could zero data, so before discard we must make sure
+	 * superblock is updated to new log tail. Updating superblock (either
+	 * directly call md_update_sb() or depend on md thread) must hold
+	 * reconfig mutex. On the other hand, raid5_quiesce is called with
+	 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
+	 * for all IO finish, hence waitting for reclaim thread, while reclaim
+	 * thread is calling this function and waitting for reconfig mutex. So
+	 * there is a deadlock. We workaround this issue with a trylock.
+	 * FIXME: we could miss discard if we can't take reconfig mutex
 	 */
-	if (!log->in_teardown) {
-		set_mask_bits(&mddev->flags, 0,
-			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait,
-			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
-			log->in_teardown);
-		/*
-		 * r5l_quiesce could run after in_teardown check and hold
-		 * mutex first. Superblock might get updated twice.
-		 */
-		if (log->in_teardown)
-			md_update_sb(mddev, 1);
-	} else {
-		WARN_ON(!mddev_is_locked(mddev));
-		md_update_sb(mddev, 1);
-	}
+	if (!mddev_trylock(mddev))
+		return;
+	md_update_sb(mddev, 1);
+	mddev_unlock(mddev);
 
 	/* discard IO error really doesn't matter, ignore it */
 	if (log->last_checkpoint < end) {
@@ -827,7 +816,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
 	if (!log || state == 2)
 		return;
 	if (state == 0) {
-		log->in_teardown = 0;
 		/*
 		 * This is a special case for hotadd. In suspend, the array has
 		 * no journal. In resume, journal is initialized as well as the
@@ -838,11 +826,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
 		log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
 					log->rdev->mddev, "reclaim");
 	} else if (state == 1) {
-		/*
-		 * at this point all stripes are finished, so io_unit is at
-		 * least in STRIPE_END state
-		 */
-		log->in_teardown = 1;
 		/* make sure r5l_write_super_and_discard_space exits */
 		mddev = log->rdev->mddev;
 		wake_up(&mddev->sb_wait);

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-24  5:25             ` Shaohua Li
@ 2016-08-25  4:59               ` NeilBrown
  2016-08-25 17:17                 ` Shaohua Li
  0 siblings, 1 reply; 21+ messages in thread
From: NeilBrown @ 2016-08-25  4:59 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid

[-- Attachment #1: Type: text/plain, Size: 3542 bytes --]

On Wed, Aug 24 2016, Shaohua Li wrote:

> On Wed, Aug 24, 2016 at 02:49:57PM +1000, Neil Brown wrote:
>> On Wed, Aug 17 2016, Shaohua Li wrote:
>> >> >
>> >> > We will have the same deadlock issue with just stopping/restarting the reclaim
>> >> > thread. As stopping the thread will wait for the thread, which probably is
>> >> > doing r5l_do_reclaim and writting the superblock. Since we are writting the
>> >> > superblock, we must hold the reconfig_mutex.
>> >> 
>> >> When you say "writing the superblock" you presumably mean "blocked in
>> >> r5l_write_super_and_discard_space(), waiting for  MD_CHANGE_PENDING to
>> >> be cleared" ??
>> > right
>> >> With a bit of care you could wait for MD_CHANGE_PENDING to clear, or for
>> >> ->quiesce to be set, and then exit gracefully.
>> >
>> > Can you give details about this please? .quiesce is called with reconfig_mutex
>> > hold, so the MD_CHANGE_PENDING will never get cleared.
>> 
>> raid5_quiesce(mddev, 1) sets conf->quiesce and then calls r5l_quiesce().
>> 
>> r5l_quiesce() tells the reclaim_thread to exit and waits for it to do so.
>> 
>> But the reclaim thread might be in
>>    r5l_do_reclaim() -> r5l_write_super_and_discard_space()
>> waiting for MD_CHANGE_PENDING to clear.  That will only get cleared when
>> the main thread can get the reconfig_mutex, which the thread calling
>> raid5_quiesce() might hold.  So we get a deadlock.
>> 
>> My suggestion is to change r5l_write_super_and_discard_space() so that
>> it waits for *either* MD_CHANGE_PENDING to be clear, or conf->quiesce
>> to be set.  That will avoid the deadlock.
>> 
>> Whatever thread called raid5_quiesce() will now be in control of the
>> array without any async IO going on.  If it needs the metadata to be
>> sync, it can do that itself.  If not, then it doesn't really matter that
>> r5l_write_super_and_discard_space() didn't wait.
>
> I'm afraid waiting conf->quiesce set isn't safe. The reason to wait for
> superblock write isn't because of async IO. discard could zero data, so before
> we do discard, we must make sure superblock points to correct log tail,
> otherwise recovery will not work. This is the reason we wait for superblock
> write.
>
>> r5l_write_super_and_discard_space() shouldn't call discard if the
>> superblock write didn't complete, and probably r5l_do_reclaim()
>> shouldn't update last_checkpoint and last_cp_seq in that case.
>> This is what I mean by "with a bit of care" and "exit gracefully".
>> Maybe I should have said "abort cleanly".  The goal is to get the thread
>> to exit.  It doesn't need to complete what it was doing, it just needs
>> to make sure that it leaves things in a tidy state so that when it
>> starts up again, it can pick up where it left off.
>
> Agree, we could ignore discard sometime, which happens occasionally, so impact
> is little. I tested something like below recently. Assume this is the solution
> we agree on?

Yes, this definitely looks like it is heading in the right direction.

I thought that

> -		set_mask_bits(&mddev->flags, 0,
> -			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
> -		md_wakeup_thread(mddev->thread);

would still be there in the case that the lock cannot be claimed.

You could even record the ->events value before setting the flags,
and record the range that needs to be discarded.  Next time
r5l_do_reclaim is entered, if ->events has moved on, then it should be
safe to discard the recorded range.  Maybe.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 818 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync
  2016-08-25  4:59               ` NeilBrown
@ 2016-08-25 17:17                 ` Shaohua Li
  0 siblings, 0 replies; 21+ messages in thread
From: Shaohua Li @ 2016-08-25 17:17 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid

On Thu, Aug 25, 2016 at 02:59:13PM +1000, Neil Brown wrote:
> On Wed, Aug 24 2016, Shaohua Li wrote:
> 
> > On Wed, Aug 24, 2016 at 02:49:57PM +1000, Neil Brown wrote:
> >> On Wed, Aug 17 2016, Shaohua Li wrote:
> >> >> >
> >> >> > We will have the same deadlock issue with just stopping/restarting the reclaim
> >> >> > thread. As stopping the thread will wait for the thread, which probably is
> >> >> > doing r5l_do_reclaim and writting the superblock. Since we are writting the
> >> >> > superblock, we must hold the reconfig_mutex.
> >> >> 
> >> >> When you say "writing the superblock" you presumably mean "blocked in
> >> >> r5l_write_super_and_discard_space(), waiting for  MD_CHANGE_PENDING to
> >> >> be cleared" ??
> >> > right
> >> >> With a bit of care you could wait for MD_CHANGE_PENDING to clear, or for
> >> >> ->quiesce to be set, and then exit gracefully.
> >> >
> >> > Can you give details about this please? .quiesce is called with reconfig_mutex
> >> > hold, so the MD_CHANGE_PENDING will never get cleared.
> >> 
> >> raid5_quiesce(mddev, 1) sets conf->quiesce and then calls r5l_quiesce().
> >> 
> >> r5l_quiesce() tells the reclaim_thread to exit and waits for it to do so.
> >> 
> >> But the reclaim thread might be in
> >>    r5l_do_reclaim() -> r5l_write_super_and_discard_space()
> >> waiting for MD_CHANGE_PENDING to clear.  That will only get cleared when
> >> the main thread can get the reconfig_mutex, which the thread calling
> >> raid5_quiesce() might hold.  So we get a deadlock.
> >> 
> >> My suggestion is to change r5l_write_super_and_discard_space() so that
> >> it waits for *either* MD_CHANGE_PENDING to be clear, or conf->quiesce
> >> to be set.  That will avoid the deadlock.
> >> 
> >> Whatever thread called raid5_quiesce() will now be in control of the
> >> array without any async IO going on.  If it needs the metadata to be
> >> sync, it can do that itself.  If not, then it doesn't really matter that
> >> r5l_write_super_and_discard_space() didn't wait.
> >
> > I'm afraid waiting conf->quiesce set isn't safe. The reason to wait for
> > superblock write isn't because of async IO. discard could zero data, so before
> > we do discard, we must make sure superblock points to correct log tail,
> > otherwise recovery will not work. This is the reason we wait for superblock
> > write.
> >
> >> r5l_write_super_and_discard_space() shouldn't call discard if the
> >> superblock write didn't complete, and probably r5l_do_reclaim()
> >> shouldn't update last_checkpoint and last_cp_seq in that case.
> >> This is what I mean by "with a bit of care" and "exit gracefully".
> >> Maybe I should have said "abort cleanly".  The goal is to get the thread
> >> to exit.  It doesn't need to complete what it was doing, it just needs
> >> to make sure that it leaves things in a tidy state so that when it
> >> starts up again, it can pick up where it left off.
> >
> > Agree, we could ignore discard sometime, which happens occasionally, so impact
> > is little. I tested something like below recently. Assume this is the solution
> > we agree on?
> 
> Yes, this definitely looks like it is heading in the right direction.
> 
> I thought that
> 
> > -		set_mask_bits(&mddev->flags, 0,
> > -			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
> > -		md_wakeup_thread(mddev->thread);
> 
> would still be there in the case that the lock cannot be claimed.

yep, this makes sense.
> You could even record the ->events value before setting the flags,
> and record the range that needs to be discarded.  Next time
> r5l_do_reclaim is entered, if ->events has moved on, then it should be
> safe to discard the recorded range.  Maybe.

I thought something like this too, but looks there are more works to do to make
this happen. We updated the log, so the range could be reused soon. And if it's
a raid array stop, we don't have the chance to reenter reclaim, which I believe
it's the most common case the lock can't be hold. And missing discard isn't a
big issue especially since the miss happens rarely. I'm going to commit below
if no objection.

Thanks,
Shaohua


commit 93e297c0b152667cc4a17db6fe7360dab7e3e9d5
Author: Shaohua Li <shli@fb.com>
Date:   Thu Aug 25 10:09:39 2016 -0700

    raid5-cache: fix a deadlock in superblock write
    
    There is a potential deadlock in superblock write. Discard could zero data, so
    before discard we must make sure superblock is updated to new log tail.
    Updating superblock (either directly call md_update_sb() or depend on md
    thread) must hold reconfig mutex. On the other hand, raid5_quiesce is called
    with reconfig_mutex hold. The first step of raid5_quiesce() is waitting for all
    IO finish, hence waitting for reclaim thread, while reclaim thread is calling
    this function and waitting for reconfig mutex. So there is a deadlock. We
    workaround this issue with a trylock. The downside of the solution is we could
    miss discard if we can't take reconfig mutex. But this should happen rarely
    (mainly in raid array stop), so miss discard shouldn't be a big problem.
    
    Cc: NeilBrown <neilb@suse.com>
    Signed-off-by: Shaohua Li <shli@fb.com>

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 5504ce2..2b0589f 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -96,7 +96,6 @@ struct r5l_log {
 	spinlock_t no_space_stripes_lock;
 
 	bool need_cache_flush;
-	bool in_teardown;
 };
 
 /*
@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 
 	mddev = log->rdev->mddev;
 	/*
-	 * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and
-	 * wait for this thread to finish. This thread waits for
-	 * MD_CHANGE_PENDING clear, which is supposed to be done in
-	 * md_check_recovery(). md_check_recovery() tries to get
-	 * reconfig_mutex. Since r5l_quiesce already holds the mutex,
-	 * md_check_recovery() fails, so the PENDING never get cleared. The
-	 * in_teardown check workaround this issue.
+	 * Discard could zero data, so before discard we must make sure
+	 * superblock is updated to new log tail. Updating superblock (either
+	 * directly call md_update_sb() or depend on md thread) must hold
+	 * reconfig mutex. On the other hand, raid5_quiesce is called with
+	 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
+	 * for all IO finish, hence waitting for reclaim thread, while reclaim
+	 * thread is calling this function and waitting for reconfig mutex. So
+	 * there is a deadlock. We workaround this issue with a trylock.
+	 * FIXME: we could miss discard if we can't take reconfig mutex
 	 */
-	if (!log->in_teardown) {
-		set_mask_bits(&mddev->flags, 0,
-			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait,
-			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
-			log->in_teardown);
-		/*
-		 * r5l_quiesce could run after in_teardown check and hold
-		 * mutex first. Superblock might get updated twice.
-		 */
-		if (log->in_teardown)
-			md_update_sb(mddev, 1);
-	} else {
-		WARN_ON(!mddev_is_locked(mddev));
-		md_update_sb(mddev, 1);
-	}
+	set_mask_bits(&mddev->flags, 0,
+		BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+	if (!mddev_trylock(mddev))
+		return;
+	md_update_sb(mddev, 1);
+	mddev_unlock(mddev);
 
 	/* discard IO error really doesn't matter, ignore it */
 	if (log->last_checkpoint < end) {
@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
 	if (!log || state == 2)
 		return;
 	if (state == 0) {
-		log->in_teardown = 0;
 		/*
 		 * This is a special case for hotadd. In suspend, the array has
 		 * no journal. In resume, journal is initialized as well as the
@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
 		log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
 					log->rdev->mddev, "reclaim");
 	} else if (state == 1) {
-		/*
-		 * at this point all stripes are finished, so io_unit is at
-		 * least in STRIPE_END state
-		 */
-		log->in_teardown = 1;
 		/* make sure r5l_write_super_and_discard_space exits */
 		mddev = log->rdev->mddev;
 		wake_up(&mddev->sb_wait);

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-08-25 17:17 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-30 23:54 [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync shli
2016-07-30 23:54 ` [PATCH 2/3] MD: hold mddev lock to change bitmap location shli
2016-08-03  0:03   ` NeilBrown
2016-07-30 23:54 ` [PATCH 3/3] MD: hold mddev lock for md-cluster receive thread shli
2016-08-01  8:38   ` Guoqing Jiang
2016-08-01 21:45     ` Shaohua Li
2016-08-02  9:52       ` Guoqing Jiang
2016-08-02 22:44         ` Shaohua Li
2016-08-03  3:18           ` Guoqing Jiang
2016-08-03  0:09         ` NeilBrown
2016-08-03  3:42           ` Guoqing Jiang
2016-07-31  6:03 ` [PATCH 1/3] MD: hold mddev lock for .quiesce in md_do_sync yizhan
2016-08-02 23:47 ` NeilBrown
2016-08-04  3:16   ` NeilBrown
2016-08-06  4:14     ` Shaohua Li
2016-08-12  0:04       ` NeilBrown
2016-08-17  1:28         ` Shaohua Li
2016-08-24  4:49           ` NeilBrown
2016-08-24  5:25             ` Shaohua Li
2016-08-25  4:59               ` NeilBrown
2016-08-25 17:17                 ` Shaohua Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.