All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] pnfs: fix pnfs lock inversion, try 2
@ 2011-02-03 18:28 Fred Isaman
  2011-02-03 18:28 ` [PATCH 1/3] pnfs: avoid incorrect use of layout stateid Fred Isaman
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Fred Isaman @ 2011-02-03 18:28 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Changes from previous version:
- fix PATCH 1 to avoid introducing race in pnfs_destroy_layout with LAYOUTGET
- refactor PATCH 3 per Benny's suggestion


These fix the incorrect lock order used throughout the pnfs code. We
first simplify the logic (and in the process squash a few bugs), then
rearrange the code so we never have to take both i_lock and cl_lock
simultaneously.

Fred


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] pnfs: avoid incorrect use of layout stateid
  2011-02-03 18:28 [PATCH 0/3] pnfs: fix pnfs lock inversion, try 2 Fred Isaman
@ 2011-02-03 18:28 ` Fred Isaman
  2011-02-03 18:28 ` [PATCH 2/3] pnfs: do not need to clear NFS_LAYOUT_BULK_RECALL flag Fred Isaman
  2011-02-03 18:28 ` [PATCH 3/3] pnfs: fix pnfs lock inversion of i_lock and cl_lock Fred Isaman
  2 siblings, 0 replies; 4+ messages in thread
From: Fred Isaman @ 2011-02-03 18:28 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

The code could violate the following from RFC5661, section 12.5.3:
"Once a client has no more layouts on a file, the layout stateid is no
longer valid and MUST NOT be used."

This can occur when a layout already has a lseg, starts another
non-everlapping LAYOUTGET, and a CB_LAYOUTRECALL for the existing lseg
is processed before we hit pnfs_layout_process().

Solve by setting, each time the client has no more lsegs for a file, a
flag which blocks further use of the layout and triggers its removal.

This also fixes a second bug which occurs in the same instance as
above.  If we actually use pnfs_layout_process, we add the new lseg to
the layout, but the layout has been removed from the nfs_client list
by the intervening CB_LAYOUTRECALL and will not be added back.  Thus
the newly acquired lseg will not be properly returned in the event of
a subsequent CB_LAYOUTRECALL.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/pnfs.c |   13 ++++++++++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b1bc1a..c8d9b21 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -255,6 +255,9 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
 			list_del_init(&lseg->pls_layout->plh_layouts);
 			spin_unlock(&clp->cl_lock);
 			clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
+			set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
+			/* Matched by initial refcount set in alloc_init_layout_hdr */
+			put_layout_hdr_locked(lseg->pls_layout);
 		}
 		rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
 		list_add(&lseg->pls_list, tmp_list);
@@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 
 	dprintk("%s:Begin lo %p\n", __func__, lo);
 
+	if (list_empty(&lo->plh_segs)) {
+		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
+			put_layout_hdr_locked(lo);
+		return 0;
+	}
 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
 		if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
 			dprintk("%s: freeing lseg %p iomode %d "
@@ -332,10 +340,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	lo = nfsi->layout;
 	if (lo) {
-		set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
+		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
 		mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
-		/* Matched by refcount set to 1 in alloc_init_layout_hdr */
-		put_layout_hdr_locked(lo);
 	}
 	spin_unlock(&nfsi->vfs_inode.i_lock);
 	pnfs_free_lseg_list(&tmp_list);
@@ -403,6 +409,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
 	    (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
 		return true;
 	return lo->plh_block_lgets ||
+		test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
 		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
 		(list_empty(&lo->plh_segs) &&
 		 (atomic_read(&lo->plh_outstanding) > lget));
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] pnfs: do not need to clear NFS_LAYOUT_BULK_RECALL flag
  2011-02-03 18:28 [PATCH 0/3] pnfs: fix pnfs lock inversion, try 2 Fred Isaman
  2011-02-03 18:28 ` [PATCH 1/3] pnfs: avoid incorrect use of layout stateid Fred Isaman
@ 2011-02-03 18:28 ` Fred Isaman
  2011-02-03 18:28 ` [PATCH 3/3] pnfs: fix pnfs lock inversion of i_lock and cl_lock Fred Isaman
  2 siblings, 0 replies; 4+ messages in thread
From: Fred Isaman @ 2011-02-03 18:28 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

We do not need to clear the NFS_LAYOUT_BULK_RECALL, as setting it
guarantees that NFS_LAYOUT_DESTROYED will be set once any outstanding
io is finished.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/pnfs.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c8d9b21..c17edfb 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -254,7 +254,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
 			/* List does not take a reference, so no need for put here */
 			list_del_init(&lseg->pls_layout->plh_layouts);
 			spin_unlock(&clp->cl_lock);
-			clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
 			set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
 			/* Matched by initial refcount set in alloc_init_layout_hdr */
 			put_layout_hdr_locked(lseg->pls_layout);
@@ -754,7 +753,6 @@ pnfs_update_layout(struct inode *ino,
 			spin_lock(&clp->cl_lock);
 			list_del_init(&lo->plh_layouts);
 			spin_unlock(&clp->cl_lock);
-			clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 		}
 		spin_unlock(&ino->i_lock);
 	}
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] pnfs: fix pnfs lock inversion of i_lock and cl_lock
  2011-02-03 18:28 [PATCH 0/3] pnfs: fix pnfs lock inversion, try 2 Fred Isaman
  2011-02-03 18:28 ` [PATCH 1/3] pnfs: avoid incorrect use of layout stateid Fred Isaman
  2011-02-03 18:28 ` [PATCH 2/3] pnfs: do not need to clear NFS_LAYOUT_BULK_RECALL flag Fred Isaman
@ 2011-02-03 18:28 ` Fred Isaman
  2 siblings, 0 replies; 4+ messages in thread
From: Fred Isaman @ 2011-02-03 18:28 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

The pnfs code was using throughout the lock order i_lock, cl_lock.
This conflicts with the nfs delegation code.  Rework the pnfs code
to avoid taking both locks simultaneously.

Currently the code takes the double lock to add/remove the layout to a
nfs_client list, while atomically checking that the list of lsegs is
empty.  To avoid this, we rely on existing serializations.  When a
layout is initialized with lseg count equal zero, LAYOUTGET's
openstateid serialization is in effect, making it safe to assume it
stays zero unless we change it.  And once a layout's lseg count drops
to zero, it is set as DESTROYED and so will stay at zero.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/callback_proc.c |    2 +-
 fs/nfs/pnfs.c          |   42 +++++++++++++++++++++++++-----------------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 8958757..2f41dcce 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 			rv = NFS4ERR_DELAY;
 		list_del_init(&lo->plh_bulk_recall);
 		spin_unlock(&ino->i_lock);
+		pnfs_free_lseg_list(&free_me_list);
 		put_layout_hdr(lo);
 		iput(ino);
 	}
-	pnfs_free_lseg_list(&free_me_list);
 	return rv;
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c17edfb..0f5b66f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -247,13 +247,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
 		BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 		list_del(&lseg->pls_list);
 		if (list_empty(&lseg->pls_layout->plh_segs)) {
-			struct nfs_client *clp;
-
-			clp = NFS_SERVER(ino)->nfs_client;
-			spin_lock(&clp->cl_lock);
-			/* List does not take a reference, so no need for put here */
-			list_del_init(&lseg->pls_layout->plh_layouts);
-			spin_unlock(&clp->cl_lock);
 			set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
 			/* Matched by initial refcount set in alloc_init_layout_hdr */
 			put_layout_hdr_locked(lseg->pls_layout);
@@ -319,11 +312,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 	return invalid - removed;
 }
 
+/* note free_me must contain lsegs from a single layout_hdr */
 void
 pnfs_free_lseg_list(struct list_head *free_me)
 {
 	struct pnfs_layout_segment *lseg, *tmp;
+	struct pnfs_layout_hdr *lo;
+
+	if (list_empty(free_me))
+		return;
 
+	lo = list_first_entry(free_me, struct pnfs_layout_segment,
+			      pls_list)->pls_layout;
+
+	if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
+		struct nfs_client *clp;
+
+		clp = NFS_SERVER(lo->plh_inode)->nfs_client;
+		spin_lock(&clp->cl_lock);
+		list_del_init(&lo->plh_layouts);
+		spin_unlock(&clp->cl_lock);
+	}
 	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
 		list_del(&lseg->pls_list);
 		free_lseg(lseg);
@@ -705,6 +714,7 @@ pnfs_update_layout(struct inode *ino,
 	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_segment *lseg = NULL;
+	bool first = false;
 
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		return NULL;
@@ -735,7 +745,10 @@ pnfs_update_layout(struct inode *ino,
 	atomic_inc(&lo->plh_outstanding);
 
 	get_layout_hdr(lo);
-	if (list_empty(&lo->plh_segs)) {
+	if (list_empty(&lo->plh_segs))
+		first = true;
+	spin_unlock(&ino->i_lock);
+	if (first) {
 		/* The lo must be on the clp list if there is any
 		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
 		 */
@@ -744,17 +757,12 @@ pnfs_update_layout(struct inode *ino,
 		list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
 		spin_unlock(&clp->cl_lock);
 	}
-	spin_unlock(&ino->i_lock);
 
 	lseg = send_layoutget(lo, ctx, iomode);
-	if (!lseg) {
-		spin_lock(&ino->i_lock);
-		if (list_empty(&lo->plh_segs)) {
-			spin_lock(&clp->cl_lock);
-			list_del_init(&lo->plh_layouts);
-			spin_unlock(&clp->cl_lock);
-		}
-		spin_unlock(&ino->i_lock);
+	if (!lseg && first) {
+		spin_lock(&clp->cl_lock);
+		list_del_init(&lo->plh_layouts);
+		spin_unlock(&clp->cl_lock);
 	}
 	atomic_dec(&lo->plh_outstanding);
 	put_layout_hdr(lo);
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-02-03 18:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-03 18:28 [PATCH 0/3] pnfs: fix pnfs lock inversion, try 2 Fred Isaman
2011-02-03 18:28 ` [PATCH 1/3] pnfs: avoid incorrect use of layout stateid Fred Isaman
2011-02-03 18:28 ` [PATCH 2/3] pnfs: do not need to clear NFS_LAYOUT_BULK_RECALL flag Fred Isaman
2011-02-03 18:28 ` [PATCH 3/3] pnfs: fix pnfs lock inversion of i_lock and cl_lock Fred Isaman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.