All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NFS: fix umount of pnfs filesystems
@ 2011-06-01  1:46 Weston Andros Adamson
  2011-06-01 12:00 ` Benny Halevy
  0 siblings, 1 reply; 5+ messages in thread
From: Weston Andros Adamson @ 2011-06-01  1:46 UTC (permalink / raw)
  To: trond; +Cc: linux-nfs, Weston Andros Adamson

Unmounting a pnfs filesystem hangs using filelayout and possibly others.
This fixes the use of the rcu protected node by making use of a new 'tmpnode'
for the temporary purge list. Also, the spinlock shouldn't be held when calling
synchronize_rcu().

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
---
Fix proposed by Trond.  Reposted without 'unused variable' warning.

 fs/nfs/pnfs.h     |    1 +
 fs/nfs/pnfs_dev.c |   13 ++++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e..96bf4e6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
 	struct hlist_node		node;
+	struct hlist_node		tmpnode;
 	const struct pnfs_layoutdriver_type *ld;
 	const struct nfs_client		*nfs_client;
 	struct nfs4_deviceid		deviceid;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index c65e133..503186e 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
 			const struct nfs4_deviceid *id)
 {
 	INIT_HLIST_NODE(&d->node);
+	INIT_HLIST_NODE(&d->tmpnode);
 	d->ld = ld;
 	d->nfs_client = nfs_client;
 	d->deviceid = *id;
@@ -238,24 +239,28 @@ static void
 _deviceid_purge_client(const struct nfs_client *clp, long hash)
 {
 	struct nfs4_deviceid_node *d;
-	struct hlist_node *n, *next;
+	struct hlist_node *n;
 	HLIST_HEAD(tmp);
 
+	spin_lock(&nfs4_deviceid_lock);
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
 		if (d->nfs_client == clp && atomic_read(&d->ref)) {
 			hlist_del_init_rcu(&d->node);
-			hlist_add_head(&d->node, &tmp);
+			hlist_add_head(&d->tmpnode, &tmp);
 		}
 	rcu_read_unlock();
+	spin_unlock(&nfs4_deviceid_lock);
 
 	if (hlist_empty(&tmp))
 		return;
 
 	synchronize_rcu();
-	hlist_for_each_entry_safe(d, n, next, &tmp, node)
+	while (!hlist_empty(&tmp)) {
 		if (atomic_dec_and_test(&d->ref))
 			d->ld->free_deviceid_node(d);
+		hlist_del_init(&d->tmpnode);
+	}
 }
 
 void
@@ -263,8 +268,6 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
 {
 	long h;
 
-	spin_lock(&nfs4_deviceid_lock);
 	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
 		_deviceid_purge_client(clp, h);
-	spin_unlock(&nfs4_deviceid_lock);
 }
-- 
1.7.5.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] NFS: fix umount of pnfs filesystems
  2011-06-01  1:46 [PATCH] NFS: fix umount of pnfs filesystems Weston Andros Adamson
@ 2011-06-01 12:00 ` Benny Halevy
  0 siblings, 0 replies; 5+ messages in thread
From: Benny Halevy @ 2011-06-01 12:00 UTC (permalink / raw)
  To: Weston Andros Adamson; +Cc: trond, linux-nfs

On 2011-06-01 04:46, Weston Andros Adamson wrote:
> Unmounting a pnfs filesystem hangs using filelayout and possibly others.
> This fixes the use of the rcu protected node by making use of a new 'tmpnode'
> for the temporary purge list. Also, the spinlock shouldn't be held when calling
> synchronize_rcu().
> 
> Signed-off-by: Weston Andros Adamson <dros@netapp.com>

ACK.
Looks good to me, thanks!

Benny

> ---
> Fix proposed by Trond.  Reposted without 'unused variable' warning.
> 
>  fs/nfs/pnfs.h     |    1 +
>  fs/nfs/pnfs_dev.c |   13 ++++++++-----
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 48d0a8e..96bf4e6 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
>  /* pnfs_dev.c */
>  struct nfs4_deviceid_node {
>  	struct hlist_node		node;
> +	struct hlist_node		tmpnode;
>  	const struct pnfs_layoutdriver_type *ld;
>  	const struct nfs_client		*nfs_client;
>  	struct nfs4_deviceid		deviceid;
> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
> index c65e133..503186e 100644
> --- a/fs/nfs/pnfs_dev.c
> +++ b/fs/nfs/pnfs_dev.c
> @@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
>  			const struct nfs4_deviceid *id)
>  {
>  	INIT_HLIST_NODE(&d->node);
> +	INIT_HLIST_NODE(&d->tmpnode);
>  	d->ld = ld;
>  	d->nfs_client = nfs_client;
>  	d->deviceid = *id;
> @@ -238,24 +239,28 @@ static void
>  _deviceid_purge_client(const struct nfs_client *clp, long hash)
>  {
>  	struct nfs4_deviceid_node *d;
> -	struct hlist_node *n, *next;
> +	struct hlist_node *n;
>  	HLIST_HEAD(tmp);
>  
> +	spin_lock(&nfs4_deviceid_lock);
>  	rcu_read_lock();
>  	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
>  		if (d->nfs_client == clp && atomic_read(&d->ref)) {
>  			hlist_del_init_rcu(&d->node);
> -			hlist_add_head(&d->node, &tmp);
> +			hlist_add_head(&d->tmpnode, &tmp);
>  		}
>  	rcu_read_unlock();
> +	spin_unlock(&nfs4_deviceid_lock);
>  
>  	if (hlist_empty(&tmp))
>  		return;
>  
>  	synchronize_rcu();
> -	hlist_for_each_entry_safe(d, n, next, &tmp, node)
> +	while (!hlist_empty(&tmp)) {
>  		if (atomic_dec_and_test(&d->ref))
>  			d->ld->free_deviceid_node(d);
> +		hlist_del_init(&d->tmpnode);
> +	}
>  }
>  
>  void
> @@ -263,8 +268,6 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
>  {
>  	long h;
>  
> -	spin_lock(&nfs4_deviceid_lock);
>  	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
>  		_deviceid_purge_client(clp, h);
> -	spin_unlock(&nfs4_deviceid_lock);
>  }


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NFS: fix umount of pnfs filesystems
  2011-06-01  6:38 ` Boaz Harrosh
@ 2011-06-01 14:39   ` Weston Andros Adamson
  0 siblings, 0 replies; 5+ messages in thread
From: Weston Andros Adamson @ 2011-06-01 14:39 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: trond, linux-nfs


On Jun 1, 2011, at 2:38 AM, Boaz Harrosh wrote:

> On 06/01/2011 04:30 AM, Weston Andros Adamson wrote:
>> Unmounting a pnfs filesystem hangs using filelayout and possibly others.
>> This fixes the use of the rcu protected node by making use of a new 'tmpnode'
>> for the temporary purge list. Also, the spinlock shouldn't be held when calling
>> synchronize_rcu().
>> 
> 
> I like the new code, but I have two questions:
> 
> * Why didn't I see this hang? (Maybe because I run uni-processor)
> * How do you have this problem. Usually with the regular usage of device-cache
>  all deviceids get released when layouts go away. So by the time client_purge comes
>  in there are no more devices in the cache. At objects-ld I take an extra ref at
>  first add_dev which is then get released in client_purge?
> 
> Thanks
> Boaz

Yeah, I saw the hangs on a vmware guest with two processors.

I believe it looked like this (I can revert and test again if you want the full traces):

1) Umount process calls client_purge, calls synchronize_rcu while holding lock
2) another process notices that it should call client_purge, spins trying acquire lock

With the file layout, there are definitely devices in the cache when umount is called.
Note, I could only reproduce this by: mounting remote fs, write one byte to a file on remote fs, umount.
If I get rid of the "write one byte" step, there is no hang.

-dros

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NFS: fix umount of pnfs filesystems
  2011-06-01  1:30 Weston Andros Adamson
@ 2011-06-01  6:38 ` Boaz Harrosh
  2011-06-01 14:39   ` Weston Andros Adamson
  0 siblings, 1 reply; 5+ messages in thread
From: Boaz Harrosh @ 2011-06-01  6:38 UTC (permalink / raw)
  To: Weston Andros Adamson; +Cc: trond, linux-nfs

On 06/01/2011 04:30 AM, Weston Andros Adamson wrote:
> Unmounting a pnfs filesystem hangs using filelayout and possibly others.
> This fixes the use of the rcu protected node by making use of a new 'tmpnode'
> for the temporary purge list. Also, the spinlock shouldn't be held when calling
> synchronize_rcu().
> 

I like the new code, but I have two questions:

* Why didn't I see this hang? (Maybe because I run uni-processor)
* How do you have this problem. Usually with the regular usage of device-cache
  all deviceids get released when layouts go away. So by the time client_purge comes
  in there are no more devices in the cache. At objects-ld I take an extra ref at
  first add_dev which is then get released in client_purge?

Thanks
Boaz

> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
> ---
> Fix proposed by Trond
> 
>  fs/nfs/pnfs.h     |    1 +
>  fs/nfs/pnfs_dev.c |   11 +++++++----
>  2 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 48d0a8e..96bf4e6 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
>  /* pnfs_dev.c */
>  struct nfs4_deviceid_node {
>  	struct hlist_node		node;
> +	struct hlist_node		tmpnode;
>  	const struct pnfs_layoutdriver_type *ld;
>  	const struct nfs_client		*nfs_client;
>  	struct nfs4_deviceid		deviceid;
> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
> index c65e133..81e642f 100644
> --- a/fs/nfs/pnfs_dev.c
> +++ b/fs/nfs/pnfs_dev.c
> @@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
>  			const struct nfs4_deviceid *id)
>  {
>  	INIT_HLIST_NODE(&d->node);
> +	INIT_HLIST_NODE(&d->tmpnode);
>  	d->ld = ld;
>  	d->nfs_client = nfs_client;
>  	d->deviceid = *id;
> @@ -241,21 +242,25 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash)
>  	struct hlist_node *n, *next;
>  	HLIST_HEAD(tmp);
>  
> +	spin_lock(&nfs4_deviceid_lock);
>  	rcu_read_lock();
>  	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
>  		if (d->nfs_client == clp && atomic_read(&d->ref)) {
>  			hlist_del_init_rcu(&d->node);
> -			hlist_add_head(&d->node, &tmp);
> +			hlist_add_head(&d->tmpnode, &tmp);
>  		}
>  	rcu_read_unlock();
> +	spin_unlock(&nfs4_deviceid_lock);
>  
>  	if (hlist_empty(&tmp))
>  		return;
>  
>  	synchronize_rcu();
> -	hlist_for_each_entry_safe(d, n, next, &tmp, node)
> +	while (!hlist_empty(&tmp)) {
>  		if (atomic_dec_and_test(&d->ref))
>  			d->ld->free_deviceid_node(d);
> +		hlist_del_init(&d->tmpnode);
> +	}
>  }
>  
>  void
> @@ -263,8 +268,6 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
>  {
>  	long h;
>  
> -	spin_lock(&nfs4_deviceid_lock);
>  	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
>  		_deviceid_purge_client(clp, h);
> -	spin_unlock(&nfs4_deviceid_lock);
>  }


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] NFS: fix umount of pnfs filesystems
@ 2011-06-01  1:30 Weston Andros Adamson
  2011-06-01  6:38 ` Boaz Harrosh
  0 siblings, 1 reply; 5+ messages in thread
From: Weston Andros Adamson @ 2011-06-01  1:30 UTC (permalink / raw)
  To: trond; +Cc: linux-nfs, Weston Andros Adamson

Unmounting a pnfs filesystem hangs using filelayout and possibly others.
This fixes the use of the rcu protected node by making use of a new 'tmpnode'
for the temporary purge list. Also, the spinlock shouldn't be held when calling
synchronize_rcu().

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
---
Fix proposed by Trond

 fs/nfs/pnfs.h     |    1 +
 fs/nfs/pnfs_dev.c |   11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e..96bf4e6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
 	struct hlist_node		node;
+	struct hlist_node		tmpnode;
 	const struct pnfs_layoutdriver_type *ld;
 	const struct nfs_client		*nfs_client;
 	struct nfs4_deviceid		deviceid;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index c65e133..81e642f 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
 			const struct nfs4_deviceid *id)
 {
 	INIT_HLIST_NODE(&d->node);
+	INIT_HLIST_NODE(&d->tmpnode);
 	d->ld = ld;
 	d->nfs_client = nfs_client;
 	d->deviceid = *id;
@@ -241,21 +242,25 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash)
 	struct hlist_node *n, *next;
 	HLIST_HEAD(tmp);
 
+	spin_lock(&nfs4_deviceid_lock);
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
 		if (d->nfs_client == clp && atomic_read(&d->ref)) {
 			hlist_del_init_rcu(&d->node);
-			hlist_add_head(&d->node, &tmp);
+			hlist_add_head(&d->tmpnode, &tmp);
 		}
 	rcu_read_unlock();
+	spin_unlock(&nfs4_deviceid_lock);
 
 	if (hlist_empty(&tmp))
 		return;
 
 	synchronize_rcu();
-	hlist_for_each_entry_safe(d, n, next, &tmp, node)
+	while (!hlist_empty(&tmp)) {
 		if (atomic_dec_and_test(&d->ref))
 			d->ld->free_deviceid_node(d);
+		hlist_del_init(&d->tmpnode);
+	}
 }
 
 void
@@ -263,8 +268,6 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
 {
 	long h;
 
-	spin_lock(&nfs4_deviceid_lock);
 	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
 		_deviceid_purge_client(clp, h);
-	spin_unlock(&nfs4_deviceid_lock);
 }
-- 
1.7.5.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-06-01 14:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-01  1:46 [PATCH] NFS: fix umount of pnfs filesystems Weston Andros Adamson
2011-06-01 12:00 ` Benny Halevy
  -- strict thread matches above, loose matches on Subject: below --
2011-06-01  1:30 Weston Andros Adamson
2011-06-01  6:38 ` Boaz Harrosh
2011-06-01 14:39   ` Weston Andros Adamson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.