All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fred Isaman <iisaman@netapp.com>
To: linux-nfs@vger.kernel.org
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Subject: [PATCH 13/15] pnfs: add CB_LAYOUTRECALL handling
Date: Tue, 21 Dec 2010 23:00:47 -0500	[thread overview]
Message-ID: <1292990449-20057-14-git-send-email-iisaman@netapp.com> (raw)
In-Reply-To: <1292990449-20057-1-git-send-email-iisaman@netapp.com>

This is the heart of the wave 2 submission.  Add the code to trigger
drain and forget of any afected layouts.  In addition, we set a
"barrier", below which any LAYOUTGET reply is ignored.  This is to
compensate for the fact that we do not wait for outstanding LAYOUTGETs
to complete as per section 12.5.5.2.1 of RFC 5661.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/callback_proc.c |  116 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/nfs4_fs.h       |    1 +
 fs/nfs/pnfs.c          |   84 +++++++++++++++++++++++++++--------
 fs/nfs/pnfs.h          |   12 +++++
 4 files changed, 193 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c1bb157..2f645f9 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,6 +12,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "pnfs.h"
 
 #ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -107,10 +108,123 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
 
 #if defined(CONFIG_NFS_V4_1)
 
+static int initiate_layout_draining(struct nfs_client *clp,
+				    struct cb_layoutrecallargs *args)
+{
+	struct pnfs_layout_hdr *lo;
+	int rv = NFS4ERR_NOMATCHING_LAYOUT;
+
+	if (args->cbl_recall_type == RETURN_FILE) {
+		LIST_HEAD(free_me_list);
+
+		args->cbl_inode = NULL;
+		spin_lock(&clp->cl_lock);
+		list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+			if (nfs_compare_fh(&args->cbl_fh,
+					   &NFS_I(lo->plh_inode)->fh))
+				continue;
+			if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+				rv = NFS4ERR_DELAY;
+			else {
+				/* Without this, layout can be freed as soon
+				 * as we release cl_lock.  Matched in
+				 * do_callback_layoutrecall.
+				 */
+				get_layout_hdr(lo);
+				args->cbl_inode = lo->plh_inode;
+				rv = NFS4_OK;
+			}
+			break;
+		}
+		spin_unlock(&clp->cl_lock);
+
+		spin_lock(&lo->plh_inode->i_lock);
+		if (rv == NFS4_OK) {
+			lo->plh_block_lgets++;
+			mark_matching_lsegs_invalid(lo, &free_me_list,
+						    args->cbl_range.iomode);
+		}
+		pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+		spin_unlock(&lo->plh_inode->i_lock);
+		pnfs_free_lseg_list(&free_me_list);
+	} else {
+		struct pnfs_layout_hdr *tmp;
+		LIST_HEAD(recall_list);
+		LIST_HEAD(free_me_list);
+		struct pnfs_layout_range range = {
+			.iomode = IOMODE_ANY,
+			.offset = 0,
+			.length = NFS4_MAX_UINT64,
+		};
+
+		spin_lock(&clp->cl_lock);
+		list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+			if ((args->cbl_recall_type == RETURN_FSID) &&
+			    memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
+				   &args->cbl_fsid, sizeof(struct nfs_fsid)))
+				continue;
+			get_layout_hdr(lo);
+			BUG_ON(!list_empty(&lo->plh_bulk_recall));
+			list_add(&lo->plh_bulk_recall, &recall_list);
+		}
+		spin_unlock(&clp->cl_lock);
+		list_for_each_entry_safe(lo, tmp,
+					 &recall_list, plh_bulk_recall) {
+			spin_lock(&lo->plh_inode->i_lock);
+			set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+			mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode);
+			list_del_init(&lo->plh_bulk_recall);
+			spin_unlock(&lo->plh_inode->i_lock);
+			put_layout_hdr(lo);
+			rv = NFS4_OK;
+		}
+		pnfs_free_lseg_list(&free_me_list);
+	}
+	return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+				    struct cb_layoutrecallargs *args)
+{
+	u32 status, res = NFS4ERR_DELAY;
+
+	dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+	if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
+		goto out;
+	status = initiate_layout_draining(clp, args);
+	if (status)
+		res = status;
+	else if (args->cbl_recall_type == RETURN_FILE) {
+		struct pnfs_layout_hdr *lo;
+
+		lo = NFS_I(args->cbl_inode)->layout;
+		spin_lock(&lo->plh_inode->i_lock);
+		lo->plh_block_lgets--;
+		spin_unlock(&lo->plh_inode->i_lock);
+		put_layout_hdr(lo);
+		res = NFS4ERR_DELAY;
+	}
+	clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
+out:
+	dprintk("%s returning %i\n", __func__, res);
+	return res;
+
+}
+
 __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
 				  void *dummy, struct cb_process_state *cps)
 {
-	return cpu_to_be32(NFS4ERR_NOTSUPP); /* STUB */
+	u32 res;
+
+	dprintk("%s: -->\n", __func__);
+
+	if (cps->clp)
+		res = do_callback_layoutrecall(cps->clp, args);
+	else
+		res = NFS4ERR_OP_NOT_IN_SESSION;
+
+	dprintk("%s: exit with status = %d\n", __func__, res);
+	return cpu_to_be32(res);
 }
 
 int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a6eecf..d927251 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
 	NFS4CLNT_RECLAIM_REBOOT,
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
+	NFS4CLNT_LAYOUTRECALL,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
 };
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 53a0184..e8b8d04 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
  */
 
 /* Need to hold i_lock if caller does not already hold reference */
-static void
+void
 get_layout_hdr(struct pnfs_layout_hdr *lo)
 {
 	atomic_inc(&lo->plh_refcount);
@@ -256,6 +256,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
 			/* List does not take a reference, so no need for put here */
 			list_del_init(&lseg->pls_layout->plh_layouts);
 			spin_unlock(&clp->cl_lock);
+			clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
 		}
 		list_add(&lseg->pls_list, tmp_list);
 	}
@@ -281,7 +282,7 @@ static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 }
 
 /* Returns false if no lsegs match, true otherwise */
-static bool
+bool
 mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 			    struct list_head *tmp_list,
 			    u32 iomode)
@@ -304,7 +305,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 	return rv;
 }
 
-static void
+void
 pnfs_free_lseg_list(struct list_head *free_me)
 {
 	struct pnfs_layout_segment *lseg, *tmp;
@@ -356,23 +357,46 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 }
 
 /* update lo->plh_stateid with new if is more recent */
-static void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
-			const nfs4_stateid *new)
+void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+			bool update_barrier)
 {
 	u32 oldseq, newseq;
 
 	oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
 	newseq = be32_to_cpu(new->stateid.seqid);
-	if ((int)(newseq - oldseq) > 0)
+	if ((int)(newseq - oldseq) > 0) {
 		memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+		if (update_barrier) {
+			u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+
+			if ((int)(new_barrier - lo->plh_barrier))
+				lo->plh_barrier = new_barrier;
+		} else {
+			/* Because of wraparound, we want to keep the barrier
+			 * "close" to the current seqids.  It needs to be
+			 * within 2**31 to count as "behind", so if it
+			 * gets too near that limit, give us a litle leeway
+			 * and bring it to within 2**30.
+			 * NOTE - and yes, this is all unsigned arithmetic.
+			 */
+			if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+				lo->plh_barrier = newseq - (1 << 30);
+		}
+	}
 }
 
 /* lget is set to 1 if called from inside send_layoutget call chain */
 static bool
-pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget)
-{
-	return (list_empty(&lo->plh_segs) &&
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
+			int lget)
+{
+	if ((stateid) &&
+	    (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+		return true;
+	return lo->plh_block_lgets ||
+		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+		(list_empty(&lo->plh_segs) &&
 		 (atomic_read(&lo->plh_outstanding) > lget));
 }
 
@@ -384,7 +408,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 
 	dprintk("--> %s\n", __func__);
 	spin_lock(&lo->plh_inode->i_lock);
-	if (pnfs_layoutgets_blocked(lo, 1)) {
+	if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
 		status = -EAGAIN;
 	} else if (list_empty(&lo->plh_segs)) {
 		int seq;
@@ -503,6 +527,7 @@ alloc_init_layout_hdr(struct inode *ino)
 	atomic_set(&lo->plh_refcount, 1);
 	INIT_LIST_HEAD(&lo->plh_layouts);
 	INIT_LIST_HEAD(&lo->plh_segs);
+	INIT_LIST_HEAD(&lo->plh_bulk_recall);
 	lo->plh_inode = ino;
 	return lo;
 }
@@ -554,7 +579,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
  * lookup range in layout
  */
 static struct pnfs_layout_segment *
-pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 {
 	struct pnfs_layout_segment *lseg, *ret = NULL;
 
@@ -599,19 +624,22 @@ pnfs_update_layout(struct inode *ino,
 		goto out_unlock;
 	}
 
-	/* Check to see if the layout for the given range already exists */
-	lseg = pnfs_has_layout(lo, iomode);
-	if (lseg) {
-		dprintk("%s: Using cached lseg %p for iomode %d)\n",
-			__func__, lseg, iomode);
+	/* Do we even need to bother with this? */
+	if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+		dprintk("%s matches recall, use MDS\n", __func__);
 		goto out_unlock;
 	}
+	/* Check to see if the layout for the given range already exists */
+	lseg = pnfs_find_lseg(lo, iomode);
+	if (lseg)
+		goto out_unlock;
 
 	/* if LAYOUTGET already failed once we don't try again */
 	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
 		goto out_unlock;
 
-	if (pnfs_layoutgets_blocked(lo, 0))
+	if (pnfs_layoutgets_blocked(lo, NULL, 0))
 		goto out_unlock;
 	atomic_inc(&lo->plh_outstanding);
 
@@ -634,6 +662,7 @@ pnfs_update_layout(struct inode *ino,
 			spin_lock(&clp->cl_lock);
 			list_del_init(&lo->plh_layouts);
 			spin_unlock(&clp->cl_lock);
+			clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 		}
 		spin_unlock(&ino->i_lock);
 	}
@@ -655,6 +684,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 	struct nfs4_layoutget_res *res = &lgp->res;
 	struct pnfs_layout_segment *lseg;
 	struct inode *ino = lo->plh_inode;
+	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 	int status = 0;
 
 	/* Verify we got what we asked for.
@@ -681,16 +711,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 	}
 
 	spin_lock(&ino->i_lock);
+	if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+		dprintk("%s forget reply due to recall\n", __func__);
+		goto out_forget_reply;
+	}
+
+	if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
+		dprintk("%s forget reply due to state\n", __func__);
+		goto out_forget_reply;
+	}
 	init_lseg(lo, lseg);
 	lseg->pls_range = res->range;
 	*lgp->lsegpp = lseg;
 	pnfs_insert_layout(lo, lseg);
 
 	/* Done processing layoutget. Set the layout stateid */
-	pnfs_set_layout_stateid(lo, &res->stateid);
+	pnfs_set_layout_stateid(lo, &res->stateid, false);
 	spin_unlock(&ino->i_lock);
 out:
 	return status;
+
+out_forget_reply:
+	spin_unlock(&ino->i_lock);
+	lseg->pls_layout = lo;
+	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+	goto out;
 }
 
 /*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8aaab56..9c81d82 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -49,6 +49,7 @@ struct pnfs_layout_segment {
 enum {
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
+	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */
 	NFS_LAYOUT_DESTROYED,		/* no new use of layout allowed */
 };
 
@@ -67,9 +68,12 @@ struct pnfs_layoutdriver_type {
 struct pnfs_layout_hdr {
 	atomic_t		plh_refcount;
 	struct list_head	plh_layouts;   /* other client layouts */
+	struct list_head	plh_bulk_recall; /* clnt list of bulk recalls */
 	struct list_head	plh_segs;      /* layout segments list */
 	nfs4_stateid		plh_stateid;
 	atomic_t		plh_outstanding; /* number of RPCs out */
+	unsigned long		plh_block_lgets; /* block LAYOUTGET if >0 */
+	u32			plh_barrier; /* ignore lower seqids */
 	unsigned long		plh_flags;
 	struct inode		*plh_inode;
 };
@@ -139,18 +143,26 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
 
 /* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 		   enum pnfs_iomode access_type);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
 void put_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+			     const nfs4_stateid *new,
+			     bool update_barrier);
 int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
 				  struct pnfs_layout_hdr *lo,
 				  struct nfs4_state *open_state);
+bool mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+				 struct list_head *tmp_list,
+				 u32 iomode);
 
 
 static inline int lo_fail_bit(u32 iomode)
-- 
1.7.2.1


  parent reply	other threads:[~2010-12-22  4:01 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-22  4:00 [PATCH 00/15] pnfs wave 2 submission, try 2 Fred Isaman
2010-12-22  4:00 ` [PATCH 01/15] pnfs: fix incorrect comment in destroy_lseg Fred Isaman
2010-12-22  4:00 ` [PATCH 02/15] pnfs: remove unnecessary field lgp->status Fred Isaman
2010-12-22  4:00 ` [PATCH 03/15] pnfs: add prefix to struct pnfs_layout_segment fields Fred Isaman
2010-12-22  4:00 ` [PATCH 04/15] pnfs: add prefix to struct pnfs_layout_hdr fields Fred Isaman
2010-12-22  4:00 ` [PATCH 05/15] pnfs: change layout state seqlock to a spinlock Fred Isaman
2010-12-22  4:00 ` [PATCH 06/15] pnfs: change how lsegs are removed from layout list Fred Isaman
2010-12-22 21:43   ` Trond Myklebust
2010-12-22 22:08     ` Fred Isaman
2010-12-22 23:35       ` Trond Myklebust
2010-12-22 23:49         ` Fred Isaman
2010-12-22  4:00 ` [PATCH 07/15] pnfs: layoutget rpc code cleanup Fred Isaman
2010-12-22  4:00 ` [PATCH 08/15] pnfs: serialize LAYOUTGET(openstateid) Fred Isaman
2010-12-22  4:00 ` [PATCH 09/15] pnfs: add layout to client list before sending rpc Fred Isaman
2010-12-22  4:00 ` [PATCH 10/15] pnfs: check that partial LAYOUTGET return is ignored Fred Isaman
2010-12-22  4:00 ` [PATCH 11/15] pnfs: change lo refcounting to atomic_t Fred Isaman
2010-12-22 21:47   ` Trond Myklebust
     [not found]     ` <1293054479.6422.18.camel-rJ7iovZKK19ZJLDQqaL3InhyD016LWXt@public.gmane.org>
2010-12-22 22:08       ` Fred Isaman
2010-12-22  4:00 ` [PATCH 12/15] pnfs: CB_LAYOUTRECALL xdr code Fred Isaman
2010-12-22  4:00 ` Fred Isaman [this message]
2010-12-22  4:00 ` [PATCH 14/15] pnfs: update nfs4_callback_recallany to handle layouts Fred Isaman
2010-12-22  4:00 ` [PATCH 15/15] pnfs: layout roc code Fred Isaman
2010-12-22 22:00   ` Trond Myklebust
2010-12-23  0:19     ` Fred Isaman
2010-12-26  8:40       ` Benny Halevy
2010-12-26 13:58         ` Fred Isaman
2010-12-23 12:47 [PATCH 00/15] pnfs wave 2 submission, try 3 Fred Isaman
2010-12-23 12:47 ` [PATCH 13/15] pnfs: add CB_LAYOUTRECALL handling Fred Isaman
2010-12-23 17:29 [PATCH 00/15] pnfs wave 2 submission, try 4 Fred Isaman
2010-12-23 17:29 ` [PATCH 13/15] pnfs: add CB_LAYOUTRECALL handling Fred Isaman
2010-12-23 19:25   ` Trond Myklebust
2010-12-23 20:08     ` Fred Isaman
2010-12-23 21:05       ` Trond Myklebust
2010-12-23 21:09         ` Fred Isaman
2010-12-23 23:54 [PATCH 00/15] pnfs wave 2 submission, try 5 Fred Isaman
2010-12-23 23:54 ` [PATCH 13/15] pnfs: add CB_LAYOUTRECALL handling Fred Isaman
2011-01-06 11:36 [PATCH 00/15] pnfs wave 2 submission Fred Isaman
2011-01-06 11:36 ` [PATCH 13/15] pnfs: add CB_LAYOUTRECALL handling Fred Isaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1292990449-20057-14-git-send-email-iisaman@netapp.com \
    --to=iisaman@netapp.com \
    --cc=Trond.Myklebust@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.