All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/26] directio rewrite
@ 2012-04-09 20:51 Fred Isaman
  2012-04-09 20:51 ` [PATCH 01/26] NFS: check for req==NULL in nfs_try_to_update_request cleanup Fred Isaman
                   ` (25 more replies)
  0 siblings, 26 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:51 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

These patches rewrite the directio code to use the same coalesce
and RPC sendoff code as cached io.  This allows directio to make
use of the preexisting pnfs code.

There are some limitations at the moment that need to be fixed, but
given the amount of change this patchset introduces I'm
trying to put out as small a base set of patches as possible.

The primary TODOs are:
- Coalescing of bsize<PAGESIZE, while technically correct, is suboptimal.
  To fix this I'll need to adjust the *_multi functions to handle
  more than a single req

- Unaligned io is not sent to pnfs, as there is no existing code to deal
  with a layout boundary (think stripe edge) occurring in the middle
  of a nfs_page.

Fred


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 01/26] NFS: check for req==NULL in nfs_try_to_update_request cleanup
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
@ 2012-04-09 20:51 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 02/26] NFS4.1: make pnfs_ld_[read|write]_done consistent Fred Isaman
                   ` (24 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:51 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2c68818..9b8d4d4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -682,7 +682,8 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 		req->wb_bytes = rqend - req->wb_offset;
 out_unlock:
 	spin_unlock(&inode->i_lock);
-	nfs_clear_request_commit(req);
+	if (req)
+		nfs_clear_request_commit(req);
 	return req;
 out_flushme:
 	spin_unlock(&inode->i_lock);
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 02/26] NFS4.1: make pnfs_ld_[read|write]_done consistent
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
  2012-04-09 20:51 ` [PATCH 01/26] NFS: check for req==NULL in nfs_try_to_update_request cleanup Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket Fred Isaman
                   ` (23 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

The two functions had diverged quite a bit, with the write function
being a bit more robust than the read.

However, these still break badly in the desc->pg_bsize < PAGE_CACHE_SIZE case,
as then there is nothing hanging on the data->pages list, and the resend
ends up doing nothing.  This will be fixed in a patch later in the series.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/pnfs.c |   64 +++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b5d4515..e4aee9d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1189,6 +1189,17 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
 	return 0;
 }
 
+static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+{
+	dprintk("pnfs write error = %d\n", data->pnfs_error);
+	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+	    PNFS_LAYOUTRET_ON_ERROR) {
+		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
+		pnfs_return_layout(data->inode);
+	}
+	data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
+}
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1197,19 +1208,8 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
 	if (likely(!data->pnfs_error)) {
 		pnfs_set_layoutcommit(data);
 		data->mds_ops->rpc_call_done(&data->task, data);
-	} else {
-		dprintk("pnfs write error = %d\n", data->pnfs_error);
-		if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
-						PNFS_LAYOUTRET_ON_ERROR) {
-			/* Don't lo_commit on error, Server will needs to
-			 * preform a file recovery.
-			 */
-			clear_bit(NFS_INO_LAYOUTCOMMIT,
-				  &NFS_I(data->inode)->flags);
-			pnfs_return_layout(data->inode);
-		}
-		data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
-	}
+	} else
+		pnfs_ld_handle_write_error(data);
 	put_lseg(data->lseg);
 	data->mds_ops->rpc_release(data);
 }
@@ -1293,26 +1293,38 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head)
 {
 	struct nfs_pageio_descriptor pgio;
+	LIST_HEAD(failed);
 
-	put_lseg(data->lseg);
-	data->lseg = NULL;
-	dprintk("pnfs write error = %d\n", data->pnfs_error);
-	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
-						PNFS_LAYOUTRET_ON_ERROR)
-		pnfs_return_layout(data->inode);
-
-	nfs_pageio_init_read_mds(&pgio, data->inode);
-
-	while (!list_empty(&data->pages)) {
-		struct nfs_page *req = nfs_list_entry(data->pages.next);
+	/* Resend all requests through the MDS */
+	nfs_pageio_init_read_mds(&pgio, inode);
+	while (!list_empty(head)) {
+		struct nfs_page *req = nfs_list_entry(head->next);
 
 		nfs_list_remove_request(req);
-		nfs_pageio_add_request(&pgio, req);
+		if (!nfs_pageio_add_request(&pgio, req))
+			nfs_list_add_request(req, &failed);
 	}
 	nfs_pageio_complete(&pgio);
+
+	if (!list_empty(&failed)) {
+		list_move(&failed, head);
+		return -EIO;
+	}
+	return 0;
+}
+
+static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+{
+	dprintk("pnfs read error = %d\n", data->pnfs_error);
+	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+	    PNFS_LAYOUTRET_ON_ERROR) {
+		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
+		pnfs_return_layout(data->inode);
+	}
+	data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages);
 }
 
 /*
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
  2012-04-09 20:51 ` [PATCH 01/26] NFS: check for req==NULL in nfs_try_to_update_request cleanup Fred Isaman
  2012-04-09 20:52 ` [PATCH 02/26] NFS4.1: make pnfs_ld_[read|write]_done consistent Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-13 15:47   ` Myklebust, Trond
  2012-04-09 20:52 ` [PATCH 04/26] NFS: add a struct nfs_commit_data to replace nfs_write_data in commits Fred Isaman
                   ` (22 subsequent siblings)
  25 siblings, 1 reply; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Also create a commit_info structure to hold the bucket array and push
it up from the lseg to the layout where it really belongs.

While we are at it, fix a refcounting bug due to an (incorrect)
implicit assumption that filelayout_scan_ds_commit_list always
completely emptied the src list.

This clarifies refcounting, removes the ugly find_only_write_lseg
functions, and pushes the file layout commit code along on the path to
supporting multiple lsegs.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c |  150 +++++++++++++++++++++++++----------------------
 fs/nfs/nfs4filelayout.h |   20 ++++++-
 2 files changed, 97 insertions(+), 73 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9e..0bbc88a 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -650,7 +650,15 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 
 	dprintk("--> %s\n", __func__);
 	nfs4_fl_put_deviceid(fl->dsaddr);
-	kfree(fl->commit_buckets);
+	/* This assumes a single RW lseg */
+	if (lseg->pls_range.iomode == IOMODE_RW) {
+		struct nfs4_filelayout *flo;
+
+		flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
+		flo->commit_info.nbuckets = 0;
+		kfree(flo->commit_info.buckets);
+		flo->commit_info.buckets = NULL;
+	}
 	_filelayout_free_lseg(fl);
 }
 
@@ -681,19 +689,27 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 	 * to filelayout_write_pagelist().
 	 * */
 	if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
+		struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(layoutid);
 		int i;
 		int size = (fl->stripe_type == STRIPE_SPARSE) ?
 			fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
 
-		fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
-		if (!fl->commit_buckets) {
+		if (flo->commit_info.nbuckets != 0) {
+			/* Shouldn't happen if only one IOMODE_RW lseg */
 			filelayout_free_lseg(&fl->generic_hdr);
 			return NULL;
 		}
-		fl->number_of_buckets = size;
+		flo->commit_info.buckets = kcalloc(size,
+						   sizeof(struct nfs4_fl_commit_bucket),
+						   gfp_flags);
+		if (!flo->commit_info.buckets) {
+			filelayout_free_lseg(&fl->generic_hdr);
+			return NULL;
+		}
+		flo->commit_info.nbuckets = size;
 		for (i = 0; i < size; i++) {
-			INIT_LIST_HEAD(&fl->commit_buckets[i].written);
-			INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
+			INIT_LIST_HEAD(&flo->commit_info.buckets[i].written);
+			INIT_LIST_HEAD(&flo->commit_info.buckets[i].committing);
 		}
 	}
 	return &fl->generic_hdr;
@@ -793,17 +809,13 @@ filelayout_clear_request_commit(struct nfs_page *req)
 	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
 		goto out;
 	if (list_is_singular(&req->wb_list)) {
-		struct pnfs_layout_segment *lseg;
+		struct nfs4_fl_commit_bucket *bucket;
 
-		/* From here we can find the bucket, but for the moment,
-		 * since there is only one relevant lseg...
-		 */
-		list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
-			if (lseg->pls_range.iomode == IOMODE_RW) {
-				freeme = lseg;
-				break;
-			}
-		}
+		bucket = list_first_entry(&req->wb_list,
+					  struct nfs4_fl_commit_bucket,
+					  written);
+		freeme = bucket->wlseg;
+		bucket->wlseg = NULL;
 	}
 out:
 	nfs_request_remove_commit_list(req);
@@ -818,6 +830,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
 	u32 i, j;
 	struct list_head *list;
+	struct nfs4_fl_commit_bucket *buckets;
 
 	if (fl->commit_through_mds)
 		return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
@@ -831,15 +844,16 @@ filelayout_choose_commit_list(struct nfs_page *req,
 	j = nfs4_fl_calc_j_index(lseg,
 				 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
 	i = select_bucket_index(fl, j);
-	list = &fl->commit_buckets[i].written;
+	buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
+	list = &buckets[i].written;
 	if (list_empty(list)) {
 		/* Non-empty buckets hold a reference on the lseg.  That ref
 		 * is normally transferred to the COMMIT call and released
 		 * there.  It could also be released if the last req is pulled
 		 * off due to a rewrite, in which case it will be done in
-		 * filelayout_remove_commit_req
+		 * filelayout_clear_request_commit
 		 */
-		get_lseg(lseg);
+		buckets[i].wlseg = get_lseg(lseg);
 	}
 	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
 	return list;
@@ -908,32 +922,6 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
 				   &filelayout_commit_call_ops, how);
 }
 
-/*
- * This is only useful while we are using whole file layouts.
- */
-static struct pnfs_layout_segment *
-find_only_write_lseg_locked(struct inode *inode)
-{
-	struct pnfs_layout_segment *lseg;
-
-	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
-		if (lseg->pls_range.iomode == IOMODE_RW)
-			return lseg;
-	return NULL;
-}
-
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
-{
-	struct pnfs_layout_segment *rv;
-
-	spin_lock(&inode->i_lock);
-	rv = find_only_write_lseg_locked(inode);
-	if (rv)
-		get_lseg(rv);
-	spin_unlock(&inode->i_lock);
-	return rv;
-}
-
 static int
 filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 		spinlock_t *lock)
@@ -955,6 +943,13 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 		if (ret == max)
 			break;
 	}
+	if (ret) {
+		bucket->clseg = bucket->wlseg;
+		if (list_empty(src))
+			bucket->wlseg = NULL;
+		else
+			get_lseg(bucket->clseg);
+	}
 	return ret;
 }
 
@@ -964,18 +959,14 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 static int filelayout_scan_commit_lists(struct inode *inode, int max,
 		spinlock_t *lock)
 {
-	struct pnfs_layout_segment *lseg;
-	struct nfs4_filelayout_segment *fl;
+	struct nfs4_fl_commit_info *fl_cinfo;
 	int i, rv = 0, cnt;
 
-	lseg = find_only_write_lseg_locked(inode);
-	if (!lseg)
+	fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+	if (fl_cinfo->nbuckets == 0)
 		goto out_done;
-	fl = FILELAYOUT_LSEG(lseg);
-	if (fl->commit_through_mds)
-		goto out_done;
-	for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
-		cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
+	for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) {
+		cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i],
 				max, lock);
 		max -= cnt;
 		rv += cnt;
@@ -987,38 +978,35 @@ out_done:
 static unsigned int
 alloc_ds_commits(struct inode *inode, struct list_head *list)
 {
-	struct pnfs_layout_segment *lseg;
-	struct nfs4_filelayout_segment *fl;
+	struct nfs4_fl_commit_info *fl_cinfo;
+	struct nfs4_fl_commit_bucket *bucket;
 	struct nfs_write_data *data;
 	int i, j;
 	unsigned int nreq = 0;
 
-	/* Won't need this when non-whole file layout segments are supported
-	 * instead we will use a pnfs_layout_hdr structure */
-	lseg = find_only_write_lseg(inode);
-	if (!lseg)
-		return 0;
-	fl = FILELAYOUT_LSEG(lseg);
-	for (i = 0; i < fl->number_of_buckets; i++) {
-		if (list_empty(&fl->commit_buckets[i].committing))
+	fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+	bucket = fl_cinfo->buckets;
+	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
+		if (list_empty(&bucket->committing))
 			continue;
 		data = nfs_commitdata_alloc();
 		if (!data)
 			break;
 		data->ds_commit_index = i;
-		data->lseg = lseg;
+		data->lseg = bucket->clseg;
+		bucket->clseg = NULL;
 		list_add(&data->pages, list);
 		nreq++;
 	}
 
 	/* Clean up on error */
-	for (j = i; j < fl->number_of_buckets; j++) {
-		if (list_empty(&fl->commit_buckets[i].committing))
+	for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
+		if (list_empty(&bucket->committing))
 			continue;
-		nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
-		put_lseg(lseg);  /* associated with emptying bucket */
+		nfs_retry_commit(&bucket->committing, bucket->clseg);
+		put_lseg(bucket->clseg);
+		bucket->clseg = NULL;
 	}
-	put_lseg(lseg);
 	/* Caller will clean up entries put on list */
 	return nreq;
 }
@@ -1058,7 +1046,10 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			nfs_initiate_commit(data, NFS_CLIENT(inode),
 					    data->mds_ops, how);
 		} else {
-			nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
+			struct nfs4_fl_commit_info *fl_cinfo;
+
+			fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info;
+			nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg);
 			filelayout_initiate_commit(data, how);
 		}
 	}
@@ -1072,10 +1063,27 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
 	nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
 }
 
+static struct pnfs_layout_hdr *
+filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+{
+	struct nfs4_filelayout *flo;
+
+	flo = kzalloc(sizeof(*flo), gfp_flags);
+	return &flo->generic_hdr;
+}
+
+static void
+filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+	kfree(FILELAYOUT_FROM_HDR(lo));
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
 	.owner			= THIS_MODULE,
+	.alloc_layout_hdr	= filelayout_alloc_layout_hdr,
+	.free_layout_hdr	= filelayout_free_layout_hdr,
 	.alloc_lseg		= filelayout_alloc_lseg,
 	.free_lseg		= filelayout_free_lseg,
 	.pg_read_ops		= &filelayout_pg_read_ops,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb..333a3ac 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -77,6 +77,13 @@ struct nfs4_file_layout_dsaddr {
 struct nfs4_fl_commit_bucket {
 	struct list_head written;
 	struct list_head committing;
+	struct pnfs_layout_segment *wlseg;
+	struct pnfs_layout_segment *clseg;
+};
+
+struct nfs4_fl_commit_info {
+	int nbuckets;
+	struct nfs4_fl_commit_bucket *buckets;
 };
 
 struct nfs4_filelayout_segment {
@@ -89,10 +96,19 @@ struct nfs4_filelayout_segment {
 	struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
 	unsigned int num_fh;
 	struct nfs_fh **fh_array;
-	struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
-	int number_of_buckets;
 };
 
+struct nfs4_filelayout {
+	struct pnfs_layout_hdr generic_hdr;
+	struct nfs4_fl_commit_info commit_info;
+};
+
+static inline struct nfs4_filelayout *
+FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
+{
+	return container_of(lo, struct nfs4_filelayout, generic_hdr);
+}
+
 static inline struct nfs4_filelayout_segment *
 FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
 {
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 04/26] NFS: add a struct nfs_commit_data to replace nfs_write_data in commits
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (2 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 05/26] NFS: grab open context in direct read Fred Isaman
                   ` (21 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Commits don't need the vectors of pages, etc. that writes do. Split out
a separate structure for the commit operation.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c         |   17 +++++-------
 fs/nfs/internal.h       |   13 +++++----
 fs/nfs/nfs3proc.c       |   10 ++++++-
 fs/nfs/nfs3xdr.c        |    6 ++--
 fs/nfs/nfs4filelayout.c |   65 +++++++++++++++++++++++++++++++++-------------
 fs/nfs/nfs4proc.c       |   23 ++++++++++++----
 fs/nfs/nfs4xdr.c        |    8 +++---
 fs/nfs/proc.c           |    8 +++++-
 fs/nfs/write.c          |   50 +++++++++++++++++++++--------------
 include/linux/nfs_fs.h  |    4 +-
 include/linux/nfs_xdr.h |   45 ++++++++++++++++++++++++++++++--
 11 files changed, 173 insertions(+), 76 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f..d6d856d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -82,7 +82,7 @@ struct nfs_direct_req {
 
 	/* commit state */
 	struct list_head	rewrite_list;	/* saved nfs_write_data structs */
-	struct nfs_write_data *	commit_data;	/* special write_data for commits */
+	struct nfs_commit_data *commit_data;	/* special write_data for commits */
 	int			flags;
 #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
 #define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
@@ -524,7 +524,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = calldata;
+	struct nfs_commit_data *data = calldata;
 
 	/* Call the NFS version-specific code */
 	NFS_PROTO(data->inode)->commit_done(task, data);
@@ -532,8 +532,8 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
 
 static void nfs_direct_commit_release(void *calldata)
 {
-	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	struct nfs_commit_data *data = calldata;
+	struct nfs_direct_req *dreq = data->dreq;
 	int status = data->task.tk_status;
 
 	if (status < 0) {
@@ -551,14 +551,14 @@ static void nfs_direct_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
+	.rpc_call_prepare = nfs_commit_prepare,
 	.rpc_call_done = nfs_direct_commit_result,
 	.rpc_release = nfs_direct_commit_release,
 };
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-	struct nfs_write_data *data = dreq->commit_data;
+	struct nfs_commit_data *data = dreq->commit_data;
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
@@ -581,9 +581,6 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = 0;
 	data->args.count = 0;
-	data->args.context = dreq->ctx;
-	data->args.lock_context = dreq->l_ctx;
-	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
 	nfs_fattr_init(&data->fattr);
@@ -625,7 +622,7 @@ static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
 {
 	dreq->commit_data = nfs_commitdata_alloc();
 	if (dreq->commit_data != NULL)
-		dreq->commit_data->req = (struct nfs_page *) dreq;
+		dreq->commit_data->dreq = dreq;
 }
 #else
 static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2476dc6..87e899d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -314,24 +314,25 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
-extern void nfs_commit_free(struct nfs_write_data *p);
+extern void nfs_commit_free(struct nfs_commit_data *p);
 extern int nfs_initiate_write(struct nfs_write_data *data,
 			      struct rpc_clnt *clnt,
 			      const struct rpc_call_ops *call_ops,
 			      int how);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_initiate_commit(struct nfs_write_data *data,
-			       struct rpc_clnt *clnt,
+extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct rpc_clnt *clnt,
+			       struct nfs_commit_data *data,
 			       const struct rpc_call_ops *call_ops,
 			       int how);
-extern void nfs_init_commit(struct nfs_write_data *data,
+extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg);
 void nfs_retry_commit(struct list_head *page_list,
 		      struct pnfs_layout_segment *lseg);
 void nfs_commit_clear_lock(struct nfs_inode *nfsi);
-void nfs_commitdata_release(void *data);
-void nfs_commit_release_pages(struct nfs_write_data *data);
+void nfs_commitdata_release(struct nfs_commit_data *data);
+void nfs_commit_release_pages(struct nfs_commit_data *data);
 void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
 void nfs_request_remove_commit_list(struct nfs_page *req);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 5242eae..b1daca7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -848,7 +848,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
 	rpc_call_start(task);
 }
 
-static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+	rpc_call_start(task);
+}
+
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
@@ -856,7 +861,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
@@ -907,6 +912,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
+	.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
 	.commit_done	= nfs3_commit_done,
 	.lock		= nfs3_proc_lock,
 	.clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a..01e53e9 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1287,7 +1287,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_commit3args(struct xdr_stream *xdr,
-			       const struct nfs_writeargs *args)
+			       const struct nfs_commitargs *args)
 {
 	__be32 *p;
 
@@ -1300,7 +1300,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_writeargs *args)
+				     const struct nfs_commitargs *args)
 {
 	encode_commit3args(xdr, args);
 }
@@ -2319,7 +2319,7 @@ out_status:
  */
 static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_writeres *result)
+				   struct nfs_commitres *result)
 {
 	enum nfs_stat status;
 	int error;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 0bbc88a..d62c6ed 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -250,7 +250,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
 }
 
 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
-static void prepare_to_resend_writes(struct nfs_write_data *data)
+static void prepare_to_resend_writes(struct nfs_commit_data *data)
 {
 	struct nfs_page *first = nfs_list_entry(data->pages.next);
 
@@ -261,11 +261,11 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
 }
 
 static int filelayout_commit_done_cb(struct rpc_task *task,
-				     struct nfs_write_data *data)
+				     struct nfs_commit_data *data)
 {
 	int reset = 0;
 
-	if (filelayout_async_handle_error(task, data->args.context->state,
+	if (filelayout_async_handle_error(task, data->context->state,
 					  data->ds_clp, &reset) == -EAGAIN) {
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
@@ -315,15 +315,42 @@ static void filelayout_write_release(void *data)
 	wdata->mds_ops->rpc_release(data);
 }
 
-static void filelayout_commit_release(void *data)
+static void filelayout_commit_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+	struct nfs_commit_data *wdata = data;
 
-	nfs_commit_release_pages(wdata);
-	if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
-		nfs_commit_clear_lock(NFS_I(wdata->inode));
-	put_lseg(wdata->lseg);
-	nfs_commitdata_release(wdata);
+	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+				&wdata->args.seq_args, &wdata->res.seq_res,
+				task))
+		return;
+
+	rpc_call_start(task);
+}
+
+static void filelayout_write_commit_done(struct rpc_task *task, void *data)
+{
+	struct nfs_commit_data *wdata = data;
+
+	/* Note this may cause RPC to be resent */
+	wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
+{
+	struct nfs_commit_data *cdata = data;
+
+	rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
+}
+
+static void filelayout_commit_release(void *calldata)
+{
+	struct nfs_commit_data *data = calldata;
+
+	nfs_commit_release_pages(data);
+	if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding))
+		nfs_commit_clear_lock(NFS_I(data->inode));
+	put_lseg(data->lseg);
+	nfs_commitdata_release(data);
 }
 
 static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,9 +368,9 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
 };
 
 static const struct rpc_call_ops filelayout_commit_call_ops = {
-	.rpc_call_prepare = filelayout_write_prepare,
-	.rpc_call_done = filelayout_write_call_done,
-	.rpc_count_stats = filelayout_write_count_stats,
+	.rpc_call_prepare = filelayout_commit_prepare,
+	.rpc_call_done = filelayout_write_commit_done,
+	.rpc_count_stats = filelayout_commit_count_stats,
 	.rpc_release = filelayout_commit_release,
 };
 
@@ -894,7 +921,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
 	return flseg->fh_array[i];
 }
 
-static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 {
 	struct pnfs_layout_segment *lseg = data->lseg;
 	struct nfs4_pnfs_ds *ds;
@@ -913,12 +940,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
 		return -EAGAIN;
 	}
 	dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
-	data->write_done_cb = filelayout_commit_done_cb;
+	data->commit_done_cb = filelayout_commit_done_cb;
 	data->ds_clp = ds->ds_clp;
 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
 	if (fh)
 		data->args.fh = fh;
-	return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
+	return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
 				   &filelayout_commit_call_ops, how);
 }
 
@@ -980,7 +1007,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 {
 	struct nfs4_fl_commit_info *fl_cinfo;
 	struct nfs4_fl_commit_bucket *bucket;
-	struct nfs_write_data *data;
+	struct nfs_commit_data *data;
 	int i, j;
 	unsigned int nreq = 0;
 
@@ -1016,7 +1043,7 @@ static int
 filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			   int how)
 {
-	struct nfs_write_data	*data, *tmp;
+	struct nfs_commit_data *data, *tmp;
 	LIST_HEAD(list);
 	unsigned int nreq = 0;
 
@@ -1043,7 +1070,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 		list_del_init(&data->pages);
 		if (!data->lseg) {
 			nfs_init_commit(data, mds_pages, NULL);
-			nfs_initiate_commit(data, NFS_CLIENT(inode),
+			nfs_initiate_commit(NFS_CLIENT(inode), data,
 					    data->mds_ops, how);
 		} else {
 			struct nfs4_fl_commit_info *fl_cinfo;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f82bde0..54f6268 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3413,7 +3413,17 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
 	rpc_call_start(task);
 }
 
-static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+				&data->args.seq_args,
+				&data->res.seq_res,
+				task))
+		return;
+	rpc_call_start(task);
+}
+
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	struct inode *inode = data->inode;
 
@@ -3425,14 +3435,14 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
 	return 0;
 }
 
-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
-	return data->write_done_cb(task, data);
+	return data->commit_done_cb(task, data);
 }
 
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
 
@@ -3441,8 +3451,8 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
 		data->res.fattr = NULL;
 	} else
 		data->args.bitmask = server->cache_consistency_bitmask;
-	if (!data->write_done_cb)
-		data->write_done_cb = nfs4_commit_done_cb;
+	if (data->commit_done_cb == NULL)
+		data->commit_done_cb = nfs4_commit_done_cb;
 	data->res.server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -6498,6 +6508,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
+	.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
 	.commit_done	= nfs4_commit_done,
 	.lock		= nfs4_proc_lock,
 	.clear_acl_cache = nfs4_zap_acl_attr,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c74fdb1..c7ed9cb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1103,7 +1103,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
 	encode_nfs4_stateid(xdr, arg->stateid);
 }
 
-static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -2447,7 +2447,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
  *  a COMMIT request
  */
 static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_writeargs *args)
+				struct nfs_commitargs *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -4101,7 +4101,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
 	return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
 }
 
-static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
 {
 	int status;
 
@@ -6336,7 +6336,7 @@ out:
  * Decode COMMIT response
  */
 static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_writeres *res)
+			       struct nfs_commitres *res)
 {
 	struct compound_hdr hdr;
 	int status;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b63b6f4..bf80503 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -688,8 +688,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
 	rpc_call_start(task);
 }
 
+static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+	BUG();
+}
+
 static void
-nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
 	BUG();
 }
@@ -764,6 +769,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
+	.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
 	.lock		= nfs_proc_lock,
 	.lock_check_bounds = nfs_lock_check_bounds,
 	.close_context	= nfs_close_context,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9b8d4d4..87dd76a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -48,11 +48,12 @@ static const struct rpc_call_ops nfs_commit_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
+static struct kmem_cache *nfs_cdata_cachep;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_write_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
 {
-	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
+	struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
@@ -62,10 +63,8 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
 
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_commit_data *p)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
@@ -1179,6 +1178,13 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
 }
 
+void nfs_commit_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_commit_data *data = calldata;
+
+	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
+}
+
 static const struct rpc_call_ops nfs_write_partial_ops = {
 	.rpc_call_prepare = nfs_write_prepare,
 	.rpc_call_done = nfs_writeback_done_partial,
@@ -1355,16 +1361,14 @@ void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
 
-void nfs_commitdata_release(void *data)
+void nfs_commitdata_release(struct nfs_commit_data *data)
 {
-	struct nfs_write_data *wdata = data;
-
-	put_nfs_open_context(wdata->args.context);
-	nfs_commit_free(wdata);
+	put_nfs_open_context(data->context);
+	nfs_commit_free(data);
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_release);
 
-int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
 			const struct rpc_call_ops *call_ops,
 			int how)
 {
@@ -1403,7 +1407,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-void nfs_init_commit(struct nfs_write_data *data,
+void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg)
 {
@@ -1424,8 +1428,7 @@ void nfs_init_commit(struct nfs_write_data *data,
 	/* Note: we always request a commit of the entire inode */
 	data->args.offset = 0;
 	data->args.count  = 0;
-	data->args.context = get_nfs_open_context(first->wb_context);
-	data->res.count   = 0;
+	data->context     = get_nfs_open_context(first->wb_context);
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
@@ -1455,7 +1458,7 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
 static int
 nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
-	struct nfs_write_data	*data;
+	struct nfs_commit_data	*data;
 
 	data = nfs_commitdata_alloc();
 
@@ -1464,7 +1467,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 
 	/* Set up the argument struct */
 	nfs_init_commit(data, head, NULL);
-	return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
+	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  out_bad:
 	nfs_retry_commit(head, NULL);
 	nfs_commit_clear_lock(NFS_I(inode));
@@ -1476,7 +1479,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
  */
 static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = calldata;
+	struct nfs_commit_data	*data = calldata;
 
         dprintk("NFS: %5u nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
@@ -1485,7 +1488,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_done(task, data);
 }
 
-void nfs_commit_release_pages(struct nfs_write_data *data)
+void nfs_commit_release_pages(struct nfs_commit_data *data)
 {
 	struct nfs_page	*req;
 	int status = data->task.tk_status;
@@ -1526,7 +1529,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
 
 static void nfs_commit_release(void *calldata)
 {
-	struct nfs_write_data *data = calldata;
+	struct nfs_commit_data *data = calldata;
 
 	nfs_commit_release_pages(data);
 	nfs_commit_clear_lock(NFS_I(data->inode));
@@ -1534,7 +1537,7 @@ static void nfs_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
+	.rpc_call_prepare = nfs_commit_prepare,
 	.rpc_call_done = nfs_commit_done,
 	.rpc_release = nfs_commit_release,
 };
@@ -1753,6 +1756,13 @@ int __init nfs_init_writepagecache(void)
 	if (nfs_wdata_mempool == NULL)
 		return -ENOMEM;
 
+	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
+					     sizeof(struct nfs_commit_data),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL);
+	if (nfs_cdata_cachep == NULL)
+		return -ENOMEM;
+
 	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
 						      nfs_wdata_cachep);
 	if (nfs_commit_mempool == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 52a1bdb..d5d68f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -552,8 +552,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
-extern struct nfs_write_data *nfs_commitdata_alloc(void);
-extern void nfs_commit_free(struct nfs_write_data *wdata);
+extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern void nfs_commit_free(struct nfs_commit_data *data);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bfd0d1b..2aff66d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -514,6 +514,24 @@ struct nfs_writeres {
 };
 
 /*
+ * Arguments to the commit call.
+ */
+struct nfs_commitargs {
+	struct nfs_fh		*fh;
+	__u64			offset;
+	__u32			count;
+	const u32		*bitmask;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs_commitres {
+	struct nfs_fattr	*fattr;
+	struct nfs_writeverf	*verf;
+	const struct nfs_server *server;
+	struct nfs4_sequence_res	seq_res;
+};
+
+/*
  * Common arguments to the unlink call
  */
 struct nfs_removeargs {
@@ -1166,6 +1184,8 @@ struct nfs_read_data {
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
+struct nfs_direct_req;
+
 struct nfs_write_data {
 	struct rpc_task		task;
 	struct inode		*inode;
@@ -1181,7 +1201,6 @@ struct nfs_write_data {
 	struct nfs_writeres	res;		/* result struct */
 	struct pnfs_layout_segment *lseg;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_commit_index;
 	const struct rpc_call_ops *mds_ops;
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
@@ -1192,6 +1211,25 @@ struct nfs_write_data {
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
+struct nfs_commit_data {
+	struct rpc_task		task;
+	struct inode		*inode;
+	struct rpc_cred		*cred;
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	verf;
+	struct list_head	pages;		/* Coalesced requests we wish to flush */
+	struct list_head	list;		/* lists of struct nfs_write_data */
+	struct nfs_direct_req	*dreq;		/* O_DIRECT request */
+	struct nfs_commitargs	args;		/* argument struct */
+	struct nfs_commitres	res;		/* result struct */
+	struct nfs_open_context *context;
+	struct pnfs_layout_segment *lseg;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_commit_index;
+	const struct rpc_call_ops *mds_ops;
+	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
+};
+
 struct nfs_unlinkdata {
 	struct hlist_node list;
 	struct nfs_removeargs args;
@@ -1272,8 +1310,9 @@ struct nfs_rpc_ops {
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
 	void	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
-	void	(*commit_setup) (struct nfs_write_data *, struct rpc_message *);
-	int	(*commit_done) (struct rpc_task *, struct nfs_write_data *);
+	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
+	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
+	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);
 	int	(*lock)(struct file *, int, struct file_lock *);
 	int	(*lock_check_bounds)(const struct file_lock *);
 	void	(*clear_acl_cache)(struct inode *);
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 05/26] NFS: grab open context in direct read
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (3 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 04/26] NFS: add a struct nfs_commit_data to replace nfs_write_data in commits Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 06/26] NFS: dprintks in directio code were referencing task after put Fred Isaman
                   ` (20 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6d856d..5897dfe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -260,7 +260,7 @@ static void nfs_direct_read_release(void *calldata)
 
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
-	nfs_readdata_free(data);
+	nfs_readdata_release(data);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -337,7 +337,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = ctx;
+		data->args.context = get_nfs_open_context(ctx);
 		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 06/26] NFS: dprintks in directio code were referencing task after put
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (4 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 05/26] NFS: grab open context in direct read Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 07/26] NFS: reverse arg order in nfs_initiate_[read|write] Fred Isaman
                   ` (19 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 5897dfe..fb7fbaa 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -357,15 +357,15 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		task = rpc_run_task(&task_setup_data);
 		if (IS_ERR(task))
 			break;
-		rpc_put_task(task);
 
 		dprintk("NFS: %5u initiated direct read call "
 			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-				data->task.tk_pid,
+				task->tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
 				bytes,
 				(unsigned long long)data->args.offset);
+		rpc_put_task(task);
 
 		started += bytes;
 		user_addr += bytes;
@@ -784,15 +784,15 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		task = rpc_run_task(&task_setup_data);
 		if (IS_ERR(task))
 			break;
-		rpc_put_task(task);
 
 		dprintk("NFS: %5u initiated direct write call "
 			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-				data->task.tk_pid,
+				task->tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
 				bytes,
 				(unsigned long long)data->args.offset);
+		rpc_put_task(task);
 
 		started += bytes;
 		user_addr += bytes;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 07/26] NFS: reverse arg order in nfs_initiate_[read|write]
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (5 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 06/26] NFS: dprintks in directio code were referencing task after put Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 08/26] NFS: remove unnecessary casts of void pointers in nfs4filelayout.c Fred Isaman
                   ` (18 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Make it consistent with nfs_initiate_commit.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/internal.h       |    7 ++++---
 fs/nfs/nfs4filelayout.c |    4 ++--
 fs/nfs/read.c           |    5 +++--
 fs/nfs/write.c          |    6 +++---
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 87e899d..abdf40c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,7 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 
 struct nfs_pageio_descriptor;
 /* read.c */
-extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
+extern int nfs_initiate_read(struct rpc_clnt *clnt,
+			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
@@ -315,8 +316,8 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct nfs_write_data *data,
-			      struct rpc_clnt *clnt,
+extern int nfs_initiate_write(struct rpc_clnt *clnt,
+			      struct nfs_write_data *data,
 			      const struct rpc_call_ops *call_ops,
 			      int how);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index d62c6ed..fe6a116 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -413,7 +413,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 	data->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
+	status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
 				   &filelayout_read_call_ops);
 	BUG_ON(status != 0);
 	return PNFS_ATTEMPTED;
@@ -460,7 +460,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
+	status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
 				    &filelayout_write_call_ops, sync);
 	BUG_ON(status != 0);
 	return PNFS_ATTEMPTED;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9a0e8ef..e1db04a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -169,7 +169,8 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
-int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_read(struct rpc_clnt *clnt,
+		      struct nfs_read_data *data,
 		      const struct rpc_call_ops *call_ops)
 {
 	struct inode *inode = data->inode;
@@ -240,7 +241,7 @@ static int nfs_do_read(struct nfs_read_data *data,
 {
 	struct inode *inode = data->args.context->dentry->d_inode;
 
-	return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops);
 }
 
 static int
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 87dd76a..2ed93b8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -836,8 +836,8 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-int nfs_initiate_write(struct nfs_write_data *data,
-		       struct rpc_clnt *clnt,
+int nfs_initiate_write(struct rpc_clnt *clnt,
+		       struct nfs_write_data *data,
 		       const struct rpc_call_ops *call_ops,
 		       int how)
 {
@@ -937,7 +937,7 @@ static int nfs_do_write(struct nfs_write_data *data,
 {
 	struct inode *inode = data->args.context->dentry->d_inode;
 
-	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how);
 }
 
 static int nfs_do_multiple_writes(struct list_head *head,
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 08/26] NFS: remove unnecessary casts of void pointers in nfs4filelayout.c
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (6 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 07/26] NFS: reverse arg order in nfs_initiate_[read|write] Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 09/26] NFS: use req_offset where appropriate Fred Isaman
                   ` (17 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fe6a116..b9b2e54 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -191,7 +191,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+	struct nfs_read_data *rdata = data;
 
 	rdata->read_done_cb = filelayout_read_done_cb;
 
@@ -205,7 +205,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+	struct nfs_read_data *rdata = data;
 
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
@@ -215,14 +215,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+	struct nfs_read_data *rdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+	struct nfs_read_data *rdata = data;
 
 	put_lseg(rdata->lseg);
 	rdata->mds_ops->rpc_release(data);
@@ -282,7 +282,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+	struct nfs_write_data *wdata = data;
 
 	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
 				&wdata->args.seq_args, &wdata->res.seq_res,
@@ -294,7 +294,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+	struct nfs_write_data *wdata = data;
 
 	/* Note this may cause RPC to be resent */
 	wdata->mds_ops->rpc_call_done(task, data);
@@ -302,14 +302,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+	struct nfs_write_data *wdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+	struct nfs_write_data *wdata = data;
 
 	put_lseg(wdata->lseg);
 	wdata->mds_ops->rpc_release(data);
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 09/26] NFS: use req_offset where appropriate
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (7 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 08/26] NFS: remove unnecessary casts of void pointers in nfs4filelayout.c Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 10/26] NFS: put open context on error in nfs_pagein_multi Fred Isaman
                   ` (16 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b9b2e54..04dd63f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -759,8 +759,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	    !nfs_generic_pg_test(pgio, prev, req))
 		return false;
 
-	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
-	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
+	p_stripe = (u64)req_offset(prev);
+	r_stripe = (u64)req_offset(req);
 	stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
 
 	do_div(p_stripe, stripe_unit);
@@ -868,8 +868,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
 	 * to store the value calculated in filelayout_write_pagelist
 	 * and just use that here.
 	 */
-	j = nfs4_fl_calc_j_index(lseg,
-				 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
+	j = nfs4_fl_calc_j_index(lseg, req_offset(req));
 	i = select_bucket_index(fl, j);
 	buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
 	list = &buckets[i].written;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 10/26] NFS: put open context on error in nfs_pagein_multi
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (8 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 09/26] NFS: use req_offset where appropriate Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-13 15:52   ` Myklebust, Trond
  2012-04-09 20:52 ` [PATCH 11/26] NFS: put open context on error in nfs_flush_multi Fred Isaman
                   ` (15 subsequent siblings)
  25 siblings, 1 reply; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/read.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e1db04a..4ddba67 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -323,7 +323,7 @@ out_bad:
 	while (!list_empty(res)) {
 		data = list_entry(res->next, struct nfs_read_data, list);
 		list_del(&data->list);
-		nfs_readdata_free(data);
+		nfs_readdata_release(data);
 	}
 	nfs_readpage_release(req);
 	return -ENOMEM;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 11/26] NFS: put open context on error in nfs_flush_multi
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (9 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 10/26] NFS: put open context on error in nfs_pagein_multi Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-13 15:52   ` Myklebust, Trond
  2012-04-09 20:52 ` [PATCH 12/26] NFS: create common nfs_pgio_header for both read and write Fred Isaman
                   ` (14 subsequent siblings)
  25 siblings, 1 reply; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2ed93b8..76735dd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1018,7 +1018,7 @@ out_bad:
 	while (!list_empty(res)) {
 		data = list_entry(res->next, struct nfs_write_data, list);
 		list_del(&data->list);
-		nfs_writedata_free(data);
+		nfs_writedata_release(data);
 	}
 	nfs_redirty_request(req);
 	return -ENOMEM;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 12/26] NFS: create common nfs_pgio_header for both read and write
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (10 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 11/26] NFS: put open context on error in nfs_flush_multi Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 13/26] NFS: create struct nfs_page_array Fred Isaman
                   ` (13 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

In order to avoid duplicating all the data in nfs_read_data whenever we
split it up into multiple RPC calls (either due to a short read result
or due to rsize < PAGE_SIZE), we split out the bits that are the same
per RPC call into a separate "header" structure.

The goal this patch moves towards is to have a single header
refcounted by several rpc_data structures.  Thus, want to always refer
from rpc_data to the header, and not the other way.  This patch comes
close to that ideal, but the directio code currently needs some
special casing, isolated in the nfs_direct_[read_write]hdr_release()
functions.  This will be dealt with in a future patch.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c |   79 ++++++++++++++++-------------
 fs/nfs/direct.c                  |   73 ++++++++++++++++++--------
 fs/nfs/internal.h                |    4 ++
 fs/nfs/nfs3proc.c                |   14 +++--
 fs/nfs/nfs4filelayout.c          |   40 ++++++++------
 fs/nfs/nfs4proc.c                |   44 +++++++++-------
 fs/nfs/objlayout/objio_osd.c     |   16 +++---
 fs/nfs/objlayout/objlayout.c     |   19 ++++---
 fs/nfs/pnfs.c                    |  102 +++++++++++++++++++++----------------
 fs/nfs/proc.c                    |   10 +++-
 fs/nfs/read.c                    |   89 ++++++++++++++++++--------------
 fs/nfs/write.c                   |  104 +++++++++++++++++++++-----------------
 include/linux/nfs_fs.h           |   12 ----
 include/linux/nfs_xdr.h          |   48 ++++++++++--------
 14 files changed, 376 insertions(+), 278 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9c94297..192e16a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -185,7 +185,6 @@ static void bl_end_io_read(struct bio *bio, int err)
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
 
 	do {
 		struct page *page = bvec->bv_page;
@@ -196,9 +195,12 @@ static void bl_end_io_read(struct bio *bio, int err)
 			SetPageUptodate(page);
 	} while (bvec >= bio->bi_io_vec);
 	if (!uptodate) {
-		if (!rdata->pnfs_error)
-			rdata->pnfs_error = -EIO;
-		pnfs_set_lo_fail(rdata->lseg);
+		struct nfs_read_data *rdata = par->data;
+		struct nfs_pgio_header *header = rdata->header;
+
+		if (!header->pnfs_error)
+			header->pnfs_error = -EIO;
+		pnfs_set_lo_fail(header->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -219,7 +221,7 @@ bl_end_par_io_read(void *data, int unused)
 {
 	struct nfs_read_data *rdata = data;
 
-	rdata->task.tk_status = rdata->pnfs_error;
+	rdata->task.tk_status = rdata->header->pnfs_error;
 	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
 	schedule_work(&rdata->task.u.tk_work);
 }
@@ -227,6 +229,7 @@ bl_end_par_io_read(void *data, int unused)
 static enum pnfs_try_status
 bl_read_pagelist(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *header = rdata->header;
 	int i, hole;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -254,10 +257,10 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 			bl_put_extent(cow_read);
 			bio = bl_submit_bio(READ, bio);
 			/* Get the next one */
-			be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
+			be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
 					     isect, &cow_read);
 			if (!be) {
-				rdata->pnfs_error = -EIO;
+				header->pnfs_error = -EIO;
 				goto out;
 			}
 			extent_length = be->be_length -
@@ -284,7 +287,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par);
 			if (IS_ERR(bio)) {
-				rdata->pnfs_error = PTR_ERR(bio);
+				header->pnfs_error = PTR_ERR(bio);
 				bio = NULL;
 				goto out;
 			}
@@ -292,9 +295,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 		isect += PAGE_CACHE_SECTORS;
 		extent_length -= PAGE_CACHE_SECTORS;
 	}
-	if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
+	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
 		rdata->res.eof = 1;
-		rdata->res.count = rdata->inode->i_size - f_offset;
+		rdata->res.count = header->inode->i_size - f_offset;
 	} else {
 		rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
 	}
@@ -343,7 +346,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
 
 	do {
 		struct page *page = bvec->bv_page;
@@ -356,9 +358,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	} while (bvec >= bio->bi_io_vec);
 
 	if (unlikely(!uptodate)) {
-		if (!wdata->pnfs_error)
-			wdata->pnfs_error = -EIO;
-		pnfs_set_lo_fail(wdata->lseg);
+		struct nfs_write_data *data = par->data;
+		struct nfs_pgio_header *header = data->header;
+
+		if (!header->pnfs_error)
+			header->pnfs_error = -EIO;
+		pnfs_set_lo_fail(header->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -368,12 +373,13 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
+	struct nfs_write_data *data = par->data;
+	struct nfs_pgio_header *header = data->header;
 
 	if (!uptodate) {
-		if (!wdata->pnfs_error)
-			wdata->pnfs_error = -EIO;
-		pnfs_set_lo_fail(wdata->lseg);
+		if (!header->pnfs_error)
+			header->pnfs_error = -EIO;
+		pnfs_set_lo_fail(header->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -389,9 +395,9 @@ static void bl_write_cleanup(struct work_struct *work)
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
 	wdata = container_of(task, struct nfs_write_data, task);
-	if (likely(!wdata->pnfs_error)) {
+	if (likely(!wdata->header->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
-		mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
 				     wdata->args.offset, wdata->args.count);
 	}
 	pnfs_ld_write_done(wdata);
@@ -402,12 +408,12 @@ static void bl_end_par_io_write(void *data, int num_se)
 {
 	struct nfs_write_data *wdata = data;
 
-	if (unlikely(wdata->pnfs_error)) {
-		bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+	if (unlikely(wdata->header->pnfs_error)) {
+		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
 					num_se);
 	}
 
-	wdata->task.tk_status = wdata->pnfs_error;
+	wdata->task.tk_status = wdata->header->pnfs_error;
 	wdata->verf.committed = NFS_FILE_SYNC;
 	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
 	schedule_work(&wdata->task.u.tk_work);
@@ -538,6 +544,7 @@ check_page:
 static enum pnfs_try_status
 bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 {
+	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -550,7 +557,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 	pgoff_t index;
 	u64 temp;
 	int npg_per_block =
-	    NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+	    NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
 
 	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
 	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -564,7 +571,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 	/* At this point, have to be more careful with error handling */
 
 	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
-	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
+	be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
 	if (!be || !is_writable(be, isect)) {
 		dprintk("%s no matching extents!\n", __func__);
 		goto out_mds;
@@ -595,10 +602,10 @@ fill_invalid_ext:
 			dprintk("%s zero %dth page: index %lu isect %llu\n",
 				__func__, npg_zero, index,
 				(unsigned long long)isect);
-			page = bl_find_get_zeroing_page(wdata->inode, index,
+			page = bl_find_get_zeroing_page(header->inode, index,
 							cow_read);
 			if (unlikely(IS_ERR(page))) {
-				wdata->pnfs_error = PTR_ERR(page);
+				header->pnfs_error = PTR_ERR(page);
 				goto out;
 			} else if (page == NULL)
 				goto next_page;
@@ -610,7 +617,7 @@ fill_invalid_ext:
 					__func__, ret);
 				end_page_writeback(page);
 				page_cache_release(page);
-				wdata->pnfs_error = ret;
+				header->pnfs_error = ret;
 				goto out;
 			}
 			if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -618,11 +625,11 @@ fill_invalid_ext:
 			else {
 				end_page_writeback(page);
 				page_cache_release(page);
-				wdata->pnfs_error = -ENOMEM;
+				header->pnfs_error = -ENOMEM;
 				goto out;
 			}
 			/* FIXME: This should be done in bi_end_io */
-			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+			mark_extents_written(BLK_LSEG2EXT(header->lseg),
 					     page->index << PAGE_CACHE_SHIFT,
 					     PAGE_CACHE_SIZE);
 
@@ -630,7 +637,7 @@ fill_invalid_ext:
 						 isect, page, be,
 						 bl_end_io_write_zero, par);
 			if (IS_ERR(bio)) {
-				wdata->pnfs_error = PTR_ERR(bio);
+				header->pnfs_error = PTR_ERR(bio);
 				bio = NULL;
 				goto out;
 			}
@@ -651,10 +658,10 @@ next_page:
 			bl_put_extent(be);
 			bio = bl_submit_bio(WRITE, bio);
 			/* Get the next one */
-			be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+			be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
 					     isect, NULL);
 			if (!be || !is_writable(be, isect)) {
-				wdata->pnfs_error = -EINVAL;
+				header->pnfs_error = -EINVAL;
 				goto out;
 			}
 			if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -662,7 +669,7 @@ next_page:
 								be->be_inval)))
 					par->bse_count++;
 				else {
-					wdata->pnfs_error = -ENOMEM;
+					header->pnfs_error = -ENOMEM;
 					goto out;
 				}
 			}
@@ -675,7 +682,7 @@ next_page:
 			if (unlikely(ret)) {
 				dprintk("%s bl_mark_sectors_init fail %d\n",
 					__func__, ret);
-				wdata->pnfs_error = ret;
+				header->pnfs_error = ret;
 				goto out;
 			}
 		}
@@ -683,7 +690,7 @@ next_page:
 					 isect, pages[i], be,
 					 bl_end_io_write, par);
 		if (IS_ERR(bio)) {
-			wdata->pnfs_error = PTR_ERR(bio);
+			header->pnfs_error = PTR_ERR(bio);
 			bio = NULL;
 			goto out;
 		}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fb7fbaa..56176af 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -242,7 +242,7 @@ static void nfs_direct_read_release(void *calldata)
 {
 
 	struct nfs_read_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
 	int status = data->task.tk_status;
 
 	spin_lock(&dreq->lock);
@@ -269,6 +269,15 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 	.rpc_release = nfs_direct_read_release,
 };
 
+static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
+{
+	struct nfs_read_data *data = &rhdr->rpc_data;
+
+	if (data->pagevec != data->page_array)
+		kfree(data->pagevec);
+	nfs_readhdr_free(&rhdr->header);
+}
+
 /*
  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
@@ -301,6 +310,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	ssize_t started = 0;
 
 	do {
+		struct nfs_read_header *rhdr;
 		struct nfs_read_data *data;
 		size_t bytes;
 
@@ -308,23 +318,24 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(rsize,count);
 
 		result = -ENOMEM;
-		data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
-		if (unlikely(!data))
+		rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes));
+		if (unlikely(!rhdr))
 			break;
+		data = &rhdr->rpc_data;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
 					data->npages, 1, 0, data->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
-			nfs_readdata_free(data);
+			nfs_direct_readhdr_release(rhdr);
 			break;
 		}
 		if ((unsigned)result < data->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
 				nfs_direct_release_pages(data->pagevec, result);
-				nfs_readdata_free(data);
+				nfs_direct_readhdr_release(rhdr);
 				break;
 			}
 			bytes -= pgbase;
@@ -333,9 +344,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 
 		get_dreq(dreq);
 
-		data->req = (struct nfs_page *) dreq;
-		data->inode = inode;
-		data->cred = msg.rpc_cred;
+		rhdr->header.req = (struct nfs_page *) dreq;
+		rhdr->header.inode = inode;
+		rhdr->header.cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = get_nfs_open_context(ctx);
 		data->args.lock_context = dreq->l_ctx;
@@ -447,13 +458,23 @@ out:
 	return result;
 }
 
+static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
+{
+	struct nfs_write_data *data = &whdr->rpc_data;
+
+	if (data->pagevec != data->page_array)
+		kfree(data->pagevec);
+	nfs_writehdr_free(&whdr->header);
+}
+
 static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 {
 	while (!list_empty(&dreq->rewrite_list)) {
-		struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
-		list_del(&data->pages);
-		nfs_direct_release_pages(data->pagevec, data->npages);
-		nfs_writedata_free(data);
+		struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
+		struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+		list_del(&hdr->pages);
+		nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages);
+		nfs_direct_writehdr_release(whdr);
 	}
 }
 
@@ -463,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct inode *inode = dreq->inode;
 	struct list_head *p;
 	struct nfs_write_data *data;
+	struct nfs_pgio_header *hdr;
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_cred = dreq->ctx->cred,
@@ -479,7 +501,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	get_dreq(dreq);
 
 	list_for_each(p, &dreq->rewrite_list) {
-		data = list_entry(p, struct nfs_write_data, pages);
+		hdr = list_entry(p, struct nfs_pgio_header, pages);
+		data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data;
 
 		get_dreq(dreq);
 
@@ -652,7 +675,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 static void nfs_direct_write_release(void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	struct nfs_pgio_header *hdr = data->header;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req;
 	int status = data->task.tk_status;
 
 	spin_lock(&dreq->lock);
@@ -684,7 +708,7 @@ out_unlock:
 	spin_unlock(&dreq->lock);
 
 	if (put_dreq(dreq))
-		nfs_direct_write_complete(dreq, data->inode);
+		nfs_direct_write_complete(dreq, hdr->inode);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -725,6 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	ssize_t started = 0;
 
 	do {
+		struct nfs_write_header *whdr;
 		struct nfs_write_data *data;
 		size_t bytes;
 
@@ -732,23 +757,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(wsize,count);
 
 		result = -ENOMEM;
-		data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
-		if (unlikely(!data))
+		whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes));
+		if (unlikely(!whdr))
 			break;
 
+		data = &whdr->rpc_data;
+
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
 					data->npages, 0, 0, data->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
-			nfs_writedata_free(data);
+			nfs_direct_writehdr_release(whdr);
 			break;
 		}
 		if ((unsigned)result < data->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
 				nfs_direct_release_pages(data->pagevec, result);
-				nfs_writedata_free(data);
+				nfs_direct_writehdr_release(whdr);
 				break;
 			}
 			bytes -= pgbase;
@@ -757,11 +784,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 
 		get_dreq(dreq);
 
-		list_move_tail(&data->pages, &dreq->rewrite_list);
+		list_move_tail(&whdr->header.pages, &dreq->rewrite_list);
 
-		data->req = (struct nfs_page *) dreq;
-		data->inode = inode;
-		data->cred = msg.rpc_cred;
+		whdr->header.req = (struct nfs_page *) dreq;
+		whdr->header.inode = inode;
+		whdr->header.cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
 		data->args.lock_context = dreq->l_ctx;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index abdf40c..9b2b8bf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,6 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 
 struct nfs_pageio_descriptor;
 /* read.c */
+extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages);
+extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
@@ -309,6 +311,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages);
+extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 		struct list_head *head);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index b1daca7..56dcefc 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -811,11 +811,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	if (nfs3_async_handle_jukebox(task, data->inode))
+	struct inode *inode = data->header->inode;
+
+	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 
-	nfs_invalidate_atime(data->inode);
-	nfs_refresh_inode(data->inode, &data->fattr);
+	nfs_invalidate_atime(inode);
+	nfs_refresh_inode(inode, &data->fattr);
 	return 0;
 }
 
@@ -831,10 +833,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	if (nfs3_async_handle_jukebox(task, data->inode))
+	struct inode *inode = data->header->inode;
+
+	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 04dd63f..850ae85 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -148,6 +148,7 @@ wait_on_recovery:
 static int filelayout_read_done_cb(struct rpc_task *task,
 				struct nfs_read_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
 	int reset = 0;
 
 	dprintk("%s DS read\n", __func__);
@@ -157,7 +158,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
-			pnfs_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(hdr->lseg);
 			nfs4_reset_read(task, data);
 		}
 		rpc_restart_call_prepare(task);
@@ -175,13 +176,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 static void
 filelayout_set_layoutcommit(struct nfs_write_data *wdata)
 {
-	if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+	struct nfs_pgio_header *hdr = wdata->header;
+
+	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
 	    wdata->res.verf->committed == NFS_FILE_SYNC)
 		return;
 
 	pnfs_set_layoutcommit(wdata);
-	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
-		(unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
+	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
 
 /*
@@ -210,27 +213,28 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
 	/* Note this may cause RPC to be resent */
-	rdata->mds_ops->rpc_call_done(task, data);
+	rdata->header->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
 	struct nfs_read_data *rdata = data;
 
-	rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
 	struct nfs_read_data *rdata = data;
 
-	put_lseg(rdata->lseg);
-	rdata->mds_ops->rpc_release(data);
+	put_lseg(rdata->header->lseg);
+	rdata->header->mds_ops->rpc_release(data);
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
 				struct nfs_write_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
 	int reset = 0;
 
 	if (filelayout_async_handle_error(task, data->args.context->state,
@@ -238,7 +242,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
-			pnfs_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(hdr->lseg);
 			nfs4_reset_write(task, data);
 		}
 		rpc_restart_call_prepare(task);
@@ -297,22 +301,22 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
 	struct nfs_write_data *wdata = data;
 
 	/* Note this may cause RPC to be resent */
-	wdata->mds_ops->rpc_call_done(task, data);
+	wdata->header->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
 	struct nfs_write_data *wdata = data;
 
-	rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 
-	put_lseg(wdata->lseg);
-	wdata->mds_ops->rpc_release(data);
+	put_lseg(wdata->header->lseg);
+	wdata->header->mds_ops->rpc_release(data);
 }
 
 static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -377,7 +381,8 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
 static enum pnfs_try_status
 filelayout_read_pagelist(struct nfs_read_data *data)
 {
-	struct pnfs_layout_segment *lseg = data->lseg;
+	struct nfs_pgio_header *hdr = data->header;
+	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	loff_t offset = data->args.offset;
 	u32 j, idx;
@@ -385,7 +390,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 	int status;
 
 	dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
-		__func__, data->inode->i_ino,
+		__func__, hdr->inode->i_ino,
 		data->args.pgbase, (size_t)data->args.count, offset);
 
 	if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
@@ -423,7 +428,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 static enum pnfs_try_status
 filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 {
-	struct pnfs_layout_segment *lseg = data->lseg;
+	struct nfs_pgio_header *hdr = data->header;
+	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	loff_t offset = data->args.offset;
 	u32 j, idx;
@@ -445,7 +451,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 		return PNFS_NOT_ATTEMPTED;
 	}
 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
-		data->inode->i_ino, sync, (size_t) data->args.count, offset,
+		hdr->inode->i_ino, sync, (size_t) data->args.count, offset,
 		ds->ds_remotestr);
 
 	data->write_done_cb = filelayout_write_done_cb;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 54f6268..75d5959 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3281,12 +3281,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 void __nfs4_read_done_cb(struct nfs_read_data *data)
 {
-	nfs_invalidate_atime(data->inode);
+	nfs_invalidate_atime(data->header->inode);
 }
 
 static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_server *server = NFS_SERVER(data->inode);
+	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
 	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
@@ -3321,7 +3321,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
 
 static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 				&data->args.seq_args,
 				&data->res.seq_res,
 				task))
@@ -3332,22 +3332,25 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
 /* Reset the the nfs_read_data to send the read to the MDS. */
 void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
+	struct inode *inode = hdr->inode;
+
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(data->lseg);
-	data->lseg = NULL;
+	put_lseg(hdr->lseg);
+	hdr->lseg = NULL;
+	data->ds_clp = NULL;
 	/* offsets will differ in the dense stripe case */
 	data->args.offset = data->mds_offset;
-	data->ds_clp = NULL;
-	data->args.fh     = NFS_FH(data->inode);
+	data->args.fh     = NFS_FH(inode);
 	data->read_done_cb = nfs4_read_done_cb;
-	task->tk_ops = data->mds_ops;
-	rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+	task->tk_ops = hdr->mds_ops;
+	rpc_task_reset_client(task, NFS_CLIENT(inode));
 }
 EXPORT_SYMBOL_GPL(nfs4_reset_read);
 
 static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct inode *inode = data->inode;
+	struct inode *inode = data->header->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
@@ -3371,25 +3374,28 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 /* Reset the the nfs_write_data to send the write to the MDS. */
 void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
+	struct inode *inode = hdr->inode;
+
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(data->lseg);
-	data->lseg          = NULL;
-	data->ds_clp        = NULL;
+	put_lseg(hdr->lseg);
+	hdr->lseg        = NULL;
+	data->ds_clp     = NULL;
 	data->write_done_cb = nfs4_write_done_cb;
-	data->args.fh       = NFS_FH(data->inode);
+	data->args.fh       = NFS_FH(inode);
 	data->args.bitmask  = data->res.server->cache_consistency_bitmask;
 	data->args.offset   = data->mds_offset;
 	data->res.fattr     = &data->fattr;
-	task->tk_ops        = data->mds_ops;
-	rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+	task->tk_ops        = hdr->mds_ops;
+	rpc_task_reset_client(task, NFS_CLIENT(inode));
 }
 EXPORT_SYMBOL_GPL(nfs4_reset_write);
 
 static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct nfs_server *server = NFS_SERVER(data->inode);
+	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
-	if (data->lseg) {
+	if (data->header->lseg) {
 		data->args.bitmask = NULL;
 		data->res.fattr = NULL;
 	} else
@@ -3405,7 +3411,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 
 static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 				&data->args.seq_args,
 				&data->res.seq_res,
 				task))
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3..fbf4874 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
 
 int objio_read_pagelist(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
 	int ret;
 
-	ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
-			rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
+			hdr->lseg, rdata->args.pages, rdata->args.pgbase,
 			rdata->args.offset, rdata->args.count, rdata,
 			GFP_KERNEL, &objios);
 	if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
 	struct nfs_write_data *wdata = objios->oir.rpcdata;
+	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
-	struct page *page = find_get_page(wdata->inode->i_mapping, index);
+	struct page *page = find_get_page(mapping, index);
 
 	if (!page) {
-		page = find_or_create_page(wdata->inode->i_mapping,
-						index, GFP_NOFS);
+		page = find_or_create_page(mapping, index, GFP_NOFS);
 		if (unlikely(!page)) {
 			dprintk("%s: grab_cache_page Failed index=0x%lx\n",
 				__func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
 
 int objio_write_pagelist(struct nfs_write_data *wdata, int how)
 {
+	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
 	int ret;
 
-	ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
-			wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
+			hdr->lseg, wdata->args.pages, wdata->args.pgbase,
 			wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
 			&objios);
 	if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 8d45f1c..3f83fc2 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	if (status >= 0)
 		rdata->res.count = status;
 	else
-		rdata->pnfs_error = status;
+		rdata->header->pnfs_error = status;
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
 
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 enum pnfs_try_status
 objlayout_read_pagelist(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *hdr = rdata->header;
+	struct inode *inode = hdr->inode;
 	loff_t offset = rdata->args.offset;
 	size_t count = rdata->args.count;
 	int err;
 	loff_t eof;
 
-	eof = i_size_read(rdata->inode);
+	eof = i_size_read(inode);
 	if (unlikely(offset + count > eof)) {
 		if (offset >= eof) {
 			err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
 	}
 
 	rdata->res.eof = (offset + count) >= eof;
-	_fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+	_fix_verify_io_params(hdr->lseg, &rdata->args.pages,
 			      &rdata->args.pgbase,
 			      rdata->args.offset, rdata->args.count);
 
 	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
-		__func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
+		__func__, inode->i_ino, offset, count, rdata->res.eof);
 
 	err = objio_read_pagelist(rdata);
  out:
 	if (unlikely(err)) {
-		rdata->pnfs_error = err;
+		hdr->pnfs_error = err;
 		dprintk("%s: Returned Error %d\n", __func__, err);
 		return PNFS_NOT_ATTEMPTED;
 	}
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 		wdata->res.count = status;
 		wdata->verf.committed = oir->committed;
 	} else {
-		wdata->pnfs_error = status;
+		wdata->header->pnfs_error = status;
 	}
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
 objlayout_write_pagelist(struct nfs_write_data *wdata,
 			 int how)
 {
+	struct nfs_pgio_header *hdr = wdata->header;
 	int err;
 
-	_fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+	_fix_verify_io_params(hdr->lseg, &wdata->args.pages,
 			      &wdata->args.pgbase,
 			      wdata->args.offset, wdata->args.count);
 
 	err = objio_write_pagelist(wdata, how);
 	if (unlikely(err)) {
-		wdata->pnfs_error = err;
+		hdr->pnfs_error = err;
 		dprintk("%s: Returned Error %d\n", __func__, err);
 		return PNFS_NOT_ATTEMPTED;
 	}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e4aee9d..b00170a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1191,13 +1191,15 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
 
 static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 {
-	dprintk("pnfs write error = %d\n", data->pnfs_error);
-	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+	struct nfs_pgio_header *hdr = data->header;
+
+	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
+	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
-		pnfs_return_layout(data->inode);
+		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
+	data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages);
 }
 
 /*
@@ -1205,13 +1207,15 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
  */
 void pnfs_ld_write_done(struct nfs_write_data *data)
 {
-	if (likely(!data->pnfs_error)) {
+	struct nfs_pgio_header *hdr = data->header;
+
+	if (!hdr->pnfs_error) {
 		pnfs_set_layoutcommit(data);
-		data->mds_ops->rpc_call_done(&data->task, data);
+		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_write_error(data);
-	put_lseg(data->lseg);
-	data->mds_ops->rpc_release(data);
+	put_lseg(hdr->lseg);
+	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
@@ -1219,12 +1223,14 @@ static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 		struct nfs_write_data *data)
 {
-	list_splice_tail_init(&data->pages, &desc->pg_list);
-	if (data->req && list_empty(&data->req->wb_list))
-		nfs_list_add_request(data->req, &desc->pg_list);
+	struct nfs_pgio_header *hdr = data->header;
+
+	list_splice_tail_init(&hdr->pages, &desc->pg_list);
+	if (hdr->req && list_empty(&hdr->req->wb_list))
+		nfs_list_add_request(hdr->req, &desc->pg_list);
 	nfs_pageio_reset_write_mds(desc);
 	desc->pg_recoalesce = 1;
-	put_lseg(data->lseg);
+	put_lseg(hdr->lseg);
 	nfs_writedata_release(data);
 }
 
@@ -1234,20 +1240,21 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 			struct pnfs_layout_segment *lseg,
 			int how)
 {
-	struct inode *inode = wdata->inode;
+	struct nfs_pgio_header *hdr = wdata->header;
+	struct inode *inode = hdr->inode;
 	enum pnfs_try_status trypnfs;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
-	wdata->mds_ops = call_ops;
-	wdata->lseg = get_lseg(lseg);
+	hdr->mds_ops = call_ops;
+	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
 		inode->i_ino, wdata->args.count, wdata->args.offset, how);
 
 	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
 	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(wdata->lseg);
-		wdata->lseg = NULL;
+		put_lseg(hdr->lseg);
+		hdr->lseg = NULL;
 	} else
 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
 
@@ -1318,13 +1325,15 @@ static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *h
 
 static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 {
-	dprintk("pnfs read error = %d\n", data->pnfs_error);
-	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+	struct nfs_pgio_header *hdr = data->header;
+
+	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
+	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
-		pnfs_return_layout(data->inode);
+		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages);
+	data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages);
 }
 
 /*
@@ -1332,13 +1341,15 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
  */
 void pnfs_ld_read_done(struct nfs_read_data *data)
 {
-	if (likely(!data->pnfs_error)) {
+	struct nfs_pgio_header *hdr = data->header;
+
+	if (likely(!hdr->pnfs_error)) {
 		__nfs4_read_done_cb(data);
-		data->mds_ops->rpc_call_done(&data->task, data);
+		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_read_error(data);
-	put_lseg(data->lseg);
-	data->mds_ops->rpc_release(data);
+	put_lseg(hdr->lseg);
+	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
@@ -1346,9 +1357,11 @@ static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
 		struct nfs_read_data *data)
 {
-	list_splice_tail_init(&data->pages, &desc->pg_list);
-	if (data->req && list_empty(&data->req->wb_list))
-		nfs_list_add_request(data->req, &desc->pg_list);
+	struct nfs_pgio_header *hdr = data->header;
+
+	list_splice_tail_init(&hdr->pages, &desc->pg_list);
+	if (hdr->req && list_empty(&hdr->req->wb_list))
+		nfs_list_add_request(hdr->req, &desc->pg_list);
 	nfs_pageio_reset_read_mds(desc);
 	desc->pg_recoalesce = 1;
 	nfs_readdata_release(data);
@@ -1362,20 +1375,21 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
-	struct inode *inode = rdata->inode;
+	struct nfs_pgio_header *hdr = rdata->header;
+	struct inode *inode = hdr->inode;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	enum pnfs_try_status trypnfs;
 
-	rdata->mds_ops = call_ops;
-	rdata->lseg = get_lseg(lseg);
+	hdr->mds_ops = call_ops;
+	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
 		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
 
 	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
 	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(rdata->lseg);
-		rdata->lseg = NULL;
+		put_lseg(hdr->lseg);
+		hdr->lseg = NULL;
 	} else {
 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
 	}
@@ -1450,30 +1464,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 void
 pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
-	struct nfs_inode *nfsi = NFS_I(wdata->inode);
+	struct nfs_pgio_header *hdr = wdata->header;
+	struct inode *inode = hdr->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t end_pos = wdata->mds_offset + wdata->res.count;
 	bool mark_as_dirty = false;
 
-	spin_lock(&nfsi->vfs_inode.i_lock);
+	spin_lock(&inode->i_lock);
 	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
 		mark_as_dirty = true;
 		dprintk("%s: Set layoutcommit for inode %lu ",
-			__func__, wdata->inode->i_ino);
+			__func__, inode->i_ino);
 	}
-	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
+	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
 		/* references matched in nfs4_layoutcommit_release */
-		get_lseg(wdata->lseg);
+		get_lseg(hdr->lseg);
 	}
 	if (end_pos > nfsi->layout->plh_lwb)
 		nfsi->layout->plh_lwb = end_pos;
-	spin_unlock(&nfsi->vfs_inode.i_lock);
+	spin_unlock(&inode->i_lock);
 	dprintk("%s: lseg %p end_pos %llu\n",
-		__func__, wdata->lseg, nfsi->layout->plh_lwb);
+		__func__, hdr->lseg, nfsi->layout->plh_lwb);
 
 	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
 	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
 	if (mark_as_dirty)
-		mark_inode_dirty_sync(wdata->inode);
+		mark_inode_dirty_sync(inode);
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index bf80503..22ee705 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -641,12 +641,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
+	struct inode *inode = data->header->inode;
+
 	if (nfs_async_handle_expired_key(task))
 		return -EAGAIN;
 
-	nfs_invalidate_atime(data->inode);
+	nfs_invalidate_atime(inode);
 	if (task->tk_status >= 0) {
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_refresh_inode(inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
 		 * as it is guaranteed to always return the file attributes
 		 */
@@ -668,11 +670,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
+	struct inode *inode = data->header->inode;
+
 	if (nfs_async_handle_expired_key(task))
 		return -EAGAIN;
 
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
 	return 0;
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4ddba67..d6d4682 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,19 +35,24 @@ static const struct rpc_call_ops nfs_read_full_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
 {
-	struct nfs_read_data *p;
+	struct nfs_read_header *p;
 
 	p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 	if (p) {
-		INIT_LIST_HEAD(&p->pages);
-		p->npages = pagecount;
-		if (pagecount <= ARRAY_SIZE(p->page_array))
-			p->pagevec = p->page_array;
+		struct nfs_pgio_header *hdr = &p->header;
+		struct nfs_read_data *data = &p->rpc_data;
+
+		INIT_LIST_HEAD(&hdr->pages);
+		INIT_LIST_HEAD(&data->list);
+		data->npages = pagecount;
+		data->header = hdr;
+		if (pagecount <= ARRAY_SIZE(data->page_array))
+			data->pagevec = data->page_array;
 		else {
-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
-			if (!p->pagevec) {
+			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+			if (!data->pagevec) {
 				kmem_cache_free(nfs_rdata_cachep, p);
 				p = NULL;
 			}
@@ -56,17 +61,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 	return p;
 }
 
-void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	kmem_cache_free(nfs_rdata_cachep, p);
+	struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+
+	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
 
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
 	put_nfs_open_context(rdata->args.context);
-	nfs_readdata_free(rdata);
+	if (rdata->pagevec != rdata->page_array)
+		kfree(rdata->pagevec);
+	nfs_readhdr_free(rdata->header);
 }
 
 static
@@ -173,13 +180,13 @@ int nfs_initiate_read(struct rpc_clnt *clnt,
 		      struct nfs_read_data *data,
 		      const struct rpc_call_ops *call_ops)
 {
-	struct inode *inode = data->inode;
+	struct inode *inode = data->header->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
+		.rpc_cred = data->header->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.task = &data->task,
@@ -216,11 +223,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 		unsigned int count, unsigned int offset)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = data->header->inode;
 
-	data->req	  = req;
-	data->inode	  = inode;
-	data->cred	  = req->wb_context->cred;
+	data->header->req	  = req;
+	data->header->inode	  = inode;
+	data->header->cred	  = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -239,7 +246,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 static int nfs_do_read(struct nfs_read_data *data,
 		const struct rpc_call_ops *call_ops)
 {
-	struct inode *inode = data->args.context->dentry->d_inode;
+	struct inode *inode = data->header->inode;
 
 	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops);
 }
@@ -293,6 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 {
 	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 	struct page *page = req->wb_page;
+	struct nfs_read_header *rhdr;
 	struct nfs_read_data *data;
 	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -306,9 +314,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	do {
 		size_t len = min(nbytes,rsize);
 
-		data = nfs_readdata_alloc(1);
-		if (!data)
+		rhdr = nfs_readhdr_alloc(1);
+		if (!rhdr)
 			goto out_bad;
+		data = &rhdr->rpc_data;
 		data->pagevec[0] = page;
 		nfs_read_rpcsetup(req, data, len, offset);
 		list_add(&data->list, res);
@@ -333,26 +342,28 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
 {
 	struct nfs_page		*req;
 	struct page		**pages;
+	struct nfs_read_header	*rhdr;
 	struct nfs_read_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
-						     desc->pg_count));
-	if (!data) {
+	rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base,
+						    desc->pg_count));
+	if (!rhdr) {
 		nfs_async_read_error(head);
 		ret = -ENOMEM;
 		goto out;
 	}
 
+	data = &rhdr->rpc_data;
 	pages = data->pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &data->pages);
+		nfs_list_add_request(req, &rhdr->header.pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(data->pages.next);
+	req = nfs_list_entry(rhdr->header.pages.next);
 
 	nfs_read_rpcsetup(req, data, desc->pg_count, 0);
 	list_add(&data->list, res);
@@ -390,20 +401,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  */
 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
+	struct inode *inode = data->header->inode;
 	int status;
 
 	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
 			task->tk_status);
 
-	status = NFS_PROTO(data->inode)->read_done(task, data);
+	status = NFS_PROTO(inode)->read_done(task, data);
 	if (status != 0)
 		return status;
 
-	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
 
 	if (task->tk_status == -ESTALE) {
-		set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
-		nfs_mark_for_revalidate(data->inode);
+		set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+		nfs_mark_for_revalidate(inode);
 	}
 	return 0;
 }
@@ -417,7 +429,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 		return;
 
 	/* This is a short read! */
-	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
 	if (resp->count == 0)
 		return;
@@ -449,7 +461,7 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 static void nfs_readpage_release_partial(void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	struct nfs_page *req = data->req;
+	struct nfs_page *req = data->header->req;
 	struct page *page = req->wb_page;
 	int status = data->task.tk_status;
 
@@ -461,13 +473,13 @@ static void nfs_readpage_release_partial(void *calldata)
 			SetPageUptodate(page);
 		nfs_readpage_release(req);
 	}
-	nfs_readdata_release(calldata);
+	nfs_readdata_release(data);
 }
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
+	NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 }
 
 static const struct rpc_call_ops nfs_read_partial_ops = {
@@ -524,9 +536,10 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 static void nfs_readpage_release_full(void *calldata)
 {
 	struct nfs_read_data *data = calldata;
+	struct nfs_pgio_header *hdr = data->header;
 
-	while (!list_empty(&data->pages)) {
-		struct nfs_page *req = nfs_list_entry(data->pages.next);
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 
 		nfs_list_remove_request(req);
 		nfs_readpage_release(req);
@@ -685,7 +698,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_read_data),
+					     sizeof(struct nfs_read_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 76735dd..dbb5c0a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -69,19 +69,24 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
 {
-	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
+	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
 	if (p) {
+		struct nfs_pgio_header *hdr = &p->header;
+		struct nfs_write_data *data = &p->rpc_data;
+
 		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-		p->npages = pagecount;
-		if (pagecount <= ARRAY_SIZE(p->page_array))
-			p->pagevec = p->page_array;
+		INIT_LIST_HEAD(&hdr->pages);
+		INIT_LIST_HEAD(&data->list);
+		data->npages = pagecount;
+		data->header = hdr;
+		if (pagecount <= ARRAY_SIZE(data->page_array))
+			data->pagevec = data->page_array;
 		else {
-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
-			if (!p->pagevec) {
+			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
+			if (!data->pagevec) {
 				mempool_free(p, nfs_wdata_mempool);
 				p = NULL;
 			}
@@ -90,17 +95,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 	return p;
 }
 
-void nfs_writedata_free(struct nfs_write_data *p)
+void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	mempool_free(p, nfs_wdata_mempool);
+	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+	mempool_free(whdr, nfs_wdata_mempool);
 }
 
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
 	put_nfs_open_context(wdata->args.context);
-	nfs_writedata_free(wdata);
+	if (wdata->pagevec != wdata->page_array)
+		kfree(wdata->pagevec);
+	nfs_writehdr_free(wdata->header);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -507,9 +513,8 @@ static inline
 int nfs_write_need_commit(struct nfs_write_data *data)
 {
 	if (data->verf.committed == NFS_DATA_SYNC)
-		return data->lseg == NULL;
-	else
-		return data->verf.committed != NFS_FILE_SYNC;
+		return data->header->lseg == NULL;
+	return data->verf.committed != NFS_FILE_SYNC;
 }
 
 static inline
@@ -517,7 +522,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
 				  struct nfs_write_data *data)
 {
 	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-		nfs_mark_request_commit(req, data->lseg);
+		nfs_mark_request_commit(req, data->header->lseg);
 		return 1;
 	}
 	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -841,13 +846,13 @@ int nfs_initiate_write(struct rpc_clnt *clnt,
 		       const struct rpc_call_ops *call_ops,
 		       int how)
 {
-	struct inode *inode = data->inode;
+	struct inode *inode = data->header->inode;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
+		.rpc_cred = data->header->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
@@ -896,14 +901,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		unsigned int count, unsigned int offset,
 		int how)
 {
+	struct nfs_pgio_header *hdr = data->header;
 	struct inode *inode = req->wb_context->dentry->d_inode;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
-	data->req = req;
-	data->inode = inode = req->wb_context->dentry->d_inode;
-	data->cred = req->wb_context->cred;
+	hdr->req = req;
+	hdr->inode = inode = req->wb_context->dentry->d_inode;
+	hdr->cred = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -935,7 +941,7 @@ static int nfs_do_write(struct nfs_write_data *data,
 		const struct rpc_call_ops *call_ops,
 		int how)
 {
-	struct inode *inode = data->args.context->dentry->d_inode;
+	struct inode *inode = data->header->inode;
 
 	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how);
 }
@@ -981,6 +987,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 {
 	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 	struct page *page = req->wb_page;
+	struct nfs_write_header *whdr;
 	struct nfs_write_data *data;
 	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -1000,9 +1007,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	do {
 		size_t len = min(nbytes, wsize);
 
-		data = nfs_writedata_alloc(1);
-		if (!data)
+		whdr = nfs_writehdr_alloc(1);
+		if (!whdr)
 			goto out_bad;
+		data = &whdr->rpc_data;
 		data->pagevec[0] = page;
 		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
 		list_add(&data->list, res);
@@ -1036,13 +1044,14 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 {
 	struct nfs_page		*req;
 	struct page		**pages;
+	struct nfs_write_header	*whdr;
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
-						      desc->pg_count));
-	if (!data) {
+	whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base,
+						     desc->pg_count));
+	if (!whdr) {
 		while (!list_empty(head)) {
 			req = nfs_list_entry(head->next);
 			nfs_list_remove_request(req);
@@ -1051,14 +1060,15 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 		ret = -ENOMEM;
 		goto out;
 	}
+	data = &whdr->rpc_data;
 	pages = data->pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &data->pages);
+		nfs_list_add_request(req, &whdr->header.pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(data->pages.next);
+	req = nfs_list_entry(whdr->header.pages.next);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
@@ -1126,10 +1136,11 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 
 	dprintk("NFS: %5u write(%s/%lld %d@%lld)",
 		task->tk_pid,
-		data->req->wb_context->dentry->d_inode->i_sb->s_id,
+		data->header->inode->i_sb->s_id,
 		(long long)
-		  NFS_FILEID(data->req->wb_context->dentry->d_inode),
-		data->req->wb_bytes, (long long)req_offset(data->req));
+		  NFS_FILEID(data->header->inode),
+		data->header->req->wb_bytes,
+		(long long)req_offset(data->header->req));
 
 	nfs_writeback_done(task, data);
 }
@@ -1137,7 +1148,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 static void nfs_writeback_release_partial(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req = data->req;
+	struct nfs_page		*req = data->header->req;
 	struct page		*page = req->wb_page;
 	int status = data->task.tk_status;
 
@@ -1169,13 +1180,13 @@ static void nfs_writeback_release_partial(void *calldata)
 out:
 	if (atomic_dec_and_test(&req->wb_complete))
 		nfs_writepage_release(req, data);
-	nfs_writedata_release(calldata);
+	nfs_writedata_release(data);
 }
 
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
+	NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
 }
 
 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
@@ -1208,11 +1219,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 static void nfs_writeback_release_full(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
+	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
 	/* Update attributes as result of writeback. */
-	while (!list_empty(&data->pages)) {
-		struct nfs_page *req = nfs_list_entry(data->pages.next);
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
 
 		nfs_list_remove_request(req);
@@ -1233,7 +1245,7 @@ static void nfs_writeback_release_full(void *calldata)
 
 		if (nfs_write_need_commit(data)) {
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			nfs_mark_request_commit(req, data->lseg);
+			nfs_mark_request_commit(req, hdr->lseg);
 			dprintk(" marked for commit\n");
 			goto next;
 		}
@@ -1244,7 +1256,7 @@ remove_request:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(page);
 	}
-	nfs_writedata_release(calldata);
+	nfs_writedata_release(data);
 }
 
 static const struct rpc_call_ops nfs_write_full_ops = {
@@ -1261,6 +1273,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	struct inode		*inode = data->header->inode;
 	int status;
 
 	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1273,10 +1286,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	 * another writer had changed the file, but some applications
 	 * depend on tighter cache coherency when writing.
 	 */
-	status = NFS_PROTO(data->inode)->write_done(task, data);
+	status = NFS_PROTO(inode)->write_done(task, data);
 	if (status != 0)
 		return;
-	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1294,7 +1307,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
+				NFS_SERVER(inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
@@ -1304,7 +1317,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
-		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
@@ -1333,7 +1346,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
-	return;
 }
 
 
@@ -1745,7 +1757,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_write_data),
+					     sizeof(struct nfs_write_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d5d68f3..8d3a2b8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -569,12 +569,6 @@ nfs_have_writebacks(struct inode *inode)
 }
 
 /*
- * Allocate nfs_write_data structures
- */
-extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
-extern void nfs_writedata_free(struct nfs_write_data *);
-
-/*
  * linux/fs/nfs/read.c
  */
 extern int  nfs_readpage(struct file *, struct page *);
@@ -585,12 +579,6 @@ extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
 /*
- * Allocate nfs_read_data structures
- */
-extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
-extern void nfs_readdata_free(struct nfs_read_data *);
-
-/*
  * linux/fs/nfs3proc.c
  */
 #ifdef CONFIG_NFS_V3_ACL
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2aff66d..0e31c44 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1163,52 +1163,58 @@ struct nfs_page;
 #define NFS_PAGEVEC_SIZE	(8U)
 
 struct nfs_read_data {
+	struct nfs_pgio_header	*header;
+	struct list_head	list;
 	struct rpc_task		task;
-	struct inode		*inode;
-	struct rpc_cred		*cred;
 	struct nfs_fattr	fattr;	/* fattr storage */
-	struct list_head	pages;	/* Coalesced read requests */
-	struct list_head	list;	/* lists of struct nfs_read_data */
-	struct nfs_page		*req;	/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;	/* Max length of pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 	unsigned long		timestamp;	/* For lease renewal */
-	struct pnfs_layout_segment *lseg;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	const struct rpc_call_ops *mds_ops;
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
-	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+};
+
+struct nfs_pgio_header {
+	struct inode		*inode;
+	struct rpc_cred		*cred;
+	struct list_head	pages;
+	struct nfs_page		*req;
+	struct pnfs_layout_segment *lseg;
+	const struct rpc_call_ops *mds_ops;
+	int			pnfs_error;
+};
+
+struct nfs_read_header {
+	struct nfs_pgio_header	header;
+	struct nfs_read_data	rpc_data;
 };
 
 struct nfs_direct_req;
 
 struct nfs_write_data {
+	struct nfs_pgio_header	*header;
+	struct list_head	list;
 	struct rpc_task		task;
-	struct inode		*inode;
-	struct rpc_cred		*cred;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
-	struct list_head	pages;		/* Coalesced requests we wish to flush */
-	struct list_head	list;		/* lists of struct nfs_write_data */
-	struct nfs_page		*req;		/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
-	struct pnfs_layout_segment *lseg;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	const struct rpc_call_ops *mds_ops;
-	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
-#ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
-#endif
+	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+};
+
+struct nfs_write_header {
+	struct nfs_pgio_header	header;
+	struct nfs_write_data	rpc_data;
 };
 
 struct nfs_commit_data {
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 13/26] NFS: create struct nfs_page_array
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (11 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 12/26] NFS: create common nfs_pgio_header for both read and write Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 14/26] NFS: merge _full and _partial read rpc_ops Fred Isaman
                   ` (12 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Both nfs_read_data and nfs_write_data devote several fields which
can be combined into a single shared struct.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c |   11 +++++----
 fs/nfs/direct.c                  |   40 +++++++++++++++++++++----------------
 fs/nfs/internal.h                |    1 +
 fs/nfs/pagelist.c                |   13 ++++++++++++
 fs/nfs/read.c                    |   22 +++++++-------------
 fs/nfs/write.c                   |   22 +++++++-------------
 include/linux/nfs_xdr.h          |   14 +++++++-----
 7 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 192e16a..3f58832 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -240,7 +240,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
 
 	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-	       rdata->npages, f_offset, (unsigned int)rdata->args.count);
+	       rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
 
 	par = alloc_parallel(rdata);
 	if (!par)
@@ -250,7 +250,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 
 	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 	/* Code assumes extents are page-aligned */
-	for (i = pg_index; i < rdata->npages; i++) {
+	for (i = pg_index; i < rdata->pages.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -283,7 +283,8 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 			struct pnfs_block_extent *be_read;
 
 			be_read = (hole && cow_read) ? cow_read : be;
-			bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
+			bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+						 READ,
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par);
 			if (IS_ERR(bio)) {
@@ -652,7 +653,7 @@ next_page:
 
 	/* Middle pages */
 	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-	for (i = pg_index; i < wdata->npages; i++) {
+	for (i = pg_index; i < wdata->pages.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -686,7 +687,7 @@ next_page:
 				goto out;
 			}
 		}
-		bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+		bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
 					 isect, pages[i], be,
 					 bl_end_io_write, par);
 		if (IS_ERR(bio)) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 56176af..0faba4c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -252,11 +252,11 @@ static void nfs_direct_read_release(void *calldata)
 	} else {
 		dreq->count += data->res.count;
 		spin_unlock(&dreq->lock);
-		nfs_direct_dirty_pages(data->pagevec,
+		nfs_direct_dirty_pages(data->pages.pagevec,
 				data->args.pgbase,
 				data->res.count);
 	}
-	nfs_direct_release_pages(data->pagevec, data->npages);
+	nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
 
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
@@ -273,8 +273,8 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
 {
 	struct nfs_read_data *data = &rhdr->rpc_data;
 
-	if (data->pagevec != data->page_array)
-		kfree(data->pagevec);
+	if (data->pages.pagevec != data->pages.page_array)
+		kfree(data->pages.pagevec);
 	nfs_readhdr_free(&rhdr->header);
 }
 
@@ -312,6 +312,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	do {
 		struct nfs_read_header *rhdr;
 		struct nfs_read_data *data;
+		struct nfs_page_array *pages;
 		size_t bytes;
 
 		pgbase = user_addr & ~PAGE_MASK;
@@ -322,24 +323,25 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		if (unlikely(!rhdr))
 			break;
 		data = &rhdr->rpc_data;
+		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					data->npages, 1, 0, data->pagevec, NULL);
+					pages->npages, 1, 0, pages->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
 			nfs_direct_readhdr_release(rhdr);
 			break;
 		}
-		if ((unsigned)result < data->npages) {
+		if ((unsigned)result < pages->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(data->pagevec, result);
+				nfs_direct_release_pages(pages->pagevec, result);
 				nfs_direct_readhdr_release(rhdr);
 				break;
 			}
 			bytes -= pgbase;
-			data->npages = result;
+			pages->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -352,7 +354,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-		data->args.pages = data->pagevec;
+		data->args.pages = pages->pagevec;
 		data->args.count = bytes;
 		data->res.fattr = &data->fattr;
 		data->res.eof = 0;
@@ -462,8 +464,8 @@ static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
 {
 	struct nfs_write_data *data = &whdr->rpc_data;
 
-	if (data->pagevec != data->page_array)
-		kfree(data->pagevec);
+	if (data->pages.pagevec != data->pages.page_array)
+		kfree(data->pages.pagevec);
 	nfs_writehdr_free(&whdr->header);
 }
 
@@ -472,8 +474,10 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 	while (!list_empty(&dreq->rewrite_list)) {
 		struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
 		struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+		struct nfs_page_array *p = &whdr->rpc_data.pages;
+
 		list_del(&hdr->pages);
-		nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages);
+		nfs_direct_release_pages(p->pagevec, p->npages);
 		nfs_direct_writehdr_release(whdr);
 	}
 }
@@ -751,6 +755,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	do {
 		struct nfs_write_header *whdr;
 		struct nfs_write_data *data;
+		struct nfs_page_array *pages;
 		size_t bytes;
 
 		pgbase = user_addr & ~PAGE_MASK;
@@ -762,24 +767,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 			break;
 
 		data = &whdr->rpc_data;
+		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					data->npages, 0, 0, data->pagevec, NULL);
+					pages->npages, 0, 0, pages->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
 			nfs_direct_writehdr_release(whdr);
 			break;
 		}
-		if ((unsigned)result < data->npages) {
+		if ((unsigned)result < pages->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(data->pagevec, result);
+				nfs_direct_release_pages(pages->pagevec, result);
 				nfs_direct_writehdr_release(whdr);
 				break;
 			}
 			bytes -= pgbase;
-			data->npages = result;
+			pages->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -794,7 +800,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-		data->args.pages = data->pagevec;
+		data->args.pages = pages->pagevec;
 		data->args.count = bytes;
 		data->args.stable = sync;
 		data->res.fattr = &data->fattr;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9b2b8bf..0818e66 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -209,6 +209,7 @@ extern void nfs_destroy_writepagecache(void);
 
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
+extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
 
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(enum nfs_stat);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fcea..d349bd4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,19 @@
 
 static struct kmem_cache *nfs_page_cachep;
 
+bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+{
+	p->npages = pagecount;
+	if (pagecount <= ARRAY_SIZE(p->page_array))
+		p->pagevec = p->page_array;
+	else {
+		p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+		if (!p->pagevec)
+			p->npages = 0;
+	}
+	return p->pagevec != NULL;
+}
+
 static inline struct nfs_page *
 nfs_page_alloc(void)
 {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d6d4682..f6ab30b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,16 +46,10 @@ struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
 
 		INIT_LIST_HEAD(&hdr->pages);
 		INIT_LIST_HEAD(&data->list);
-		data->npages = pagecount;
 		data->header = hdr;
-		if (pagecount <= ARRAY_SIZE(data->page_array))
-			data->pagevec = data->page_array;
-		else {
-			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
-			if (!data->pagevec) {
-				kmem_cache_free(nfs_rdata_cachep, p);
-				p = NULL;
-			}
+		if (!nfs_pgarray_set(&data->pages, pagecount)) {
+			kmem_cache_free(nfs_rdata_cachep, p);
+			p = NULL;
 		}
 	}
 	return p;
@@ -71,8 +65,8 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
 	put_nfs_open_context(rdata->args.context);
-	if (rdata->pagevec != rdata->page_array)
-		kfree(rdata->pagevec);
+	if (rdata->pages.pagevec != rdata->pages.page_array)
+		kfree(rdata->pages.pagevec);
 	nfs_readhdr_free(rdata->header);
 }
 
@@ -232,7 +226,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
 	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pagevec;
+	data->args.pages  = data->pages.pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
@@ -318,7 +312,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 		if (!rhdr)
 			goto out_bad;
 		data = &rhdr->rpc_data;
-		data->pagevec[0] = page;
+		data->pages.pagevec[0] = page;
 		nfs_read_rpcsetup(req, data, len, offset);
 		list_add(&data->list, res);
 		requests++;
@@ -356,7 +350,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
 	}
 
 	data = &rhdr->rpc_data;
-	pages = data->pagevec;
+	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dbb5c0a..2efae04 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,16 +80,10 @@ struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&hdr->pages);
 		INIT_LIST_HEAD(&data->list);
-		data->npages = pagecount;
 		data->header = hdr;
-		if (pagecount <= ARRAY_SIZE(data->page_array))
-			data->pagevec = data->page_array;
-		else {
-			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
-			if (!data->pagevec) {
-				mempool_free(p, nfs_wdata_mempool);
-				p = NULL;
-			}
+		if (!nfs_pgarray_set(&data->pages, pagecount)) {
+			mempool_free(p, nfs_wdata_mempool);
+			p = NULL;
 		}
 	}
 	return p;
@@ -104,8 +98,8 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
 	put_nfs_open_context(wdata->args.context);
-	if (wdata->pagevec != wdata->page_array)
-		kfree(wdata->pagevec);
+	if (wdata->pages.pagevec != wdata->pages.page_array)
+		kfree(wdata->pages.pagevec);
 	nfs_writehdr_free(wdata->header);
 }
 
@@ -916,7 +910,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	/* pnfs_set_layoutcommit needs this */
 	data->mds_offset = data->args.offset;
 	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pagevec;
+	data->args.pages  = data->pages.pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
@@ -1011,7 +1005,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 		if (!whdr)
 			goto out_bad;
 		data = &whdr->rpc_data;
-		data->pagevec[0] = page;
+		data->pages.pagevec[0] = page;
 		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
 		list_add(&data->list, res);
 		requests++;
@@ -1061,7 +1055,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 		goto out;
 	}
 	data = &whdr->rpc_data;
-	pages = data->pagevec;
+	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0e31c44..060896f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1162,19 +1162,23 @@ struct nfs_page;
 
 #define NFS_PAGEVEC_SIZE	(8U)
 
+struct nfs_page_array {
+	struct page		**pagevec;
+	unsigned int		npages;		/* Max length of pagevec */
+	struct page		*page_array[NFS_PAGEVEC_SIZE];
+};
+
 struct nfs_read_data {
 	struct nfs_pgio_header	*header;
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;	/* fattr storage */
-	struct page		**pagevec;
-	unsigned int		npages;	/* Max length of pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
-	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
@@ -1201,14 +1205,12 @@ struct nfs_write_data {
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
-	struct page		**pagevec;
-	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (12 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 13/26] NFS: create struct nfs_page_array Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-16 20:28   ` Myklebust, Trond
  2012-04-16 20:41   ` Myklebust, Trond
  2012-04-09 20:52 ` [PATCH 15/26] NFS: merge _full and _partial write rpc_ops Fred Isaman
                   ` (11 subsequent siblings)
  25 siblings, 2 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Decouple nfs_pgio_header and nfs_read_data, and have (possibly
multiple) nfs_read_datas each take a refcount on nfs_pgio_header.

For the moment keeps nfs_read_header as a way to preallocate a single
nfs_read_data with the nfs_pgio_header.  The code doesn't need this,
and would be prettier without, but given the amount of churn I am
already introducing I didn't want to play with tuning new mempools.

This also fixes bug in pnfs_ld_handle_read_error.  In the case of
desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing
replay attempt to do nothing.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c          |   10 +-
 fs/nfs/internal.h        |   16 ++-
 fs/nfs/nfs4filelayout.c  |    1 -
 fs/nfs/nfs4proc.c        |    2 -
 fs/nfs/pagelist.c        |   22 +++
 fs/nfs/pnfs.c            |   55 +++++---
 fs/nfs/read.c            |  340 ++++++++++++++++++++++------------------------
 include/linux/nfs_page.h |    1 -
 include/linux/nfs_xdr.h  |   16 ++
 9 files changed, 254 insertions(+), 209 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0faba4c..90b00ce 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -319,10 +319,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(rsize,count);
 
 		result = -ENOMEM;
-		rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes));
+		rhdr = nfs_readhdr_alloc();
 		if (unlikely(!rhdr))
 			break;
-		data = &rhdr->rpc_data;
+		data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
+		if (!data) {
+			nfs_readhdr_free(&rhdr->header);
+			break;
+		}
+		data->header = &rhdr->header;
+		atomic_inc(&data->header->refcnt);
 		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0818e66..c4abe2b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -199,6 +199,7 @@ struct vfsmount *nfs_do_refmount(struct dentry *dentry)
 extern struct svc_version nfs4_callback_version1;
 extern struct svc_version nfs4_callback_version4;
 
+struct nfs_pageio_descriptor;
 /* pagelist.c */
 extern int __init nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
@@ -210,6 +211,11 @@ extern void nfs_destroy_writepagecache(void);
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
 extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
+extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+			      struct nfs_pgio_header *hdr,
+			      void (*release)(struct nfs_pgio_header *hdr));
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error,
+			unsigned long loc);
 
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(enum nfs_stat);
@@ -295,17 +301,19 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
-struct nfs_pageio_descriptor;
 /* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages);
+extern void nfs_async_read_error(struct list_head *head);
+extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
+extern void nfs_read_completion(struct nfs_pgio_header *hdr);
+extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+						unsigned int pagecount);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
-		struct list_head *head);
-
+			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
 		struct inode *inode);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 850ae85..d27035b 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -227,7 +227,6 @@ static void filelayout_read_release(void *data)
 {
 	struct nfs_read_data *rdata = data;
 
-	put_lseg(rdata->header->lseg);
 	rdata->header->mds_ops->rpc_release(data);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 75d5959..16a3877 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3336,8 +3336,6 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
 	struct inode *inode = hdr->inode;
 
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(hdr->lseg);
-	hdr->lseg = NULL;
 	data->ds_clp = NULL;
 	/* offsets will differ in the dense stripe case */
 	data->args.offset = data->mds_offset;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d349bd4..16b8a04 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -39,6 +39,28 @@ bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 	return p->pagevec != NULL;
 }
 
+void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+		       struct nfs_pgio_header *hdr,
+		       void (*release)(struct nfs_pgio_header *hdr))
+{
+	hdr->req = nfs_list_entry(desc->pg_list.next);
+	hdr->inode = desc->pg_inode;
+	hdr->cred = hdr->req->wb_context->cred;
+	hdr->first_error = req_offset(hdr->req) + desc->pg_count;
+	hdr->release = release;
+}
+
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error,
+			unsigned long loc)
+{
+	spin_lock(&hdr->lock);
+	if (test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags))
+		loc = min(loc, hdr->first_error);
+	hdr->first_error = loc;
+	hdr->error = error;
+	spin_unlock(&hdr->lock);
+}
+
 static inline struct nfs_page *
 nfs_page_alloc(void)
 {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b00170a..5947a90 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1333,7 +1333,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages);
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+								&hdr->pages);
 }
 
 /*
@@ -1348,7 +1350,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
 		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_read_error(data);
-	put_lseg(hdr->lseg);
 	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1359,11 +1360,11 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_pgio_header *hdr = data->header;
 
-	list_splice_tail_init(&hdr->pages, &desc->pg_list);
-	if (hdr->req && list_empty(&hdr->req->wb_list))
-		nfs_list_add_request(hdr->req, &desc->pg_list);
-	nfs_pageio_reset_read_mds(desc);
-	desc->pg_recoalesce = 1;
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+		list_splice_tail_init(&hdr->pages, &desc->pg_list);
+		nfs_pageio_reset_read_mds(desc);
+		desc->pg_recoalesce = 1;
+	}
 	nfs_readdata_release(data);
 }
 
@@ -1381,18 +1382,13 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
 	enum pnfs_try_status trypnfs;
 
 	hdr->mds_ops = call_ops;
-	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
 		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
 
 	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
-	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(hdr->lseg);
-		hdr->lseg = NULL;
-	} else {
+	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
-	}
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
 	return trypnfs;
 }
@@ -1408,7 +1404,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_entry(head->next, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_read_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1418,20 +1414,41 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 	put_lseg(lseg);
 }
 
+static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
+{
+	put_lseg(hdr->lseg);
+	nfs_readhdr_free(hdr);
+}
+
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_read_header *rhdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_pagein(desc, &head);
-	if (ret != 0) {
+	rhdr = nfs_readhdr_alloc();
+	if (!rhdr) {
+		nfs_async_read_error(&desc->pg_list);
+		ret = -ENOMEM;
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return ret;
 	}
-	pnfs_do_multiple_reads(desc, &head);
-	return 0;
+	hdr = &rhdr->header;
+	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
+	hdr->lseg = get_lseg(desc->pg_lseg);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_pagein(desc, hdr);
+	if (ret != 0) {
+		put_lseg(desc->pg_lseg);
+		desc->pg_lseg = NULL;
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	} else
+		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_read_completion(hdr);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f6ab30b..ea844b2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,29 +30,45 @@
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_partial_ops;
-static const struct rpc_call_ops nfs_read_full_ops;
+static const struct rpc_call_ops nfs_read_common_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc()
 {
-	struct nfs_read_header *p;
+	struct nfs_read_header *rhdr;
 
-	p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
-	if (p) {
-		struct nfs_pgio_header *hdr = &p->header;
-		struct nfs_read_data *data = &p->rpc_data;
+	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+	if (rhdr) {
+		struct nfs_pgio_header *hdr = &rhdr->header;
 
 		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&data->list);
-		data->header = hdr;
+		INIT_LIST_HEAD(&hdr->rpc_list);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
+	}
+	return rhdr;
+}
+
+struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+					 unsigned int pagecount)
+{
+	struct nfs_read_data *data;
+
+	if (hdr) {
+		/* Use data preallocated with the header */
+		data = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+	} else
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+
+	if (data) {
 		if (!nfs_pgarray_set(&data->pages, pagecount)) {
-			kmem_cache_free(nfs_rdata_cachep, p);
-			p = NULL;
+			if (!hdr)
+				kfree(data);
+			data = NULL;
 		}
 	}
-	return p;
+	return data;
 }
 
 void nfs_readhdr_free(struct nfs_pgio_header *hdr)
@@ -64,10 +80,16 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *hdr = rdata->header;
+
 	put_nfs_open_context(rdata->args.context);
 	if (rdata->pages.pagevec != rdata->pages.page_array)
 		kfree(rdata->pages.pagevec);
-	nfs_readhdr_free(rdata->header);
+	if (container_of(hdr, struct nfs_read_header, header) !=
+	    container_of(rdata, struct nfs_read_header, rpc_data))
+		kfree(rdata);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_read_completion(hdr);
 }
 
 static
@@ -79,35 +101,6 @@ int nfs_return_empty_page(struct page *page)
 	return 0;
 }
 
-static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
-{
-	unsigned int remainder = data->args.count - data->res.count;
-	unsigned int base = data->args.pgbase + data->res.count;
-	unsigned int pglen;
-	struct page **pages;
-
-	if (data->res.eof == 0 || remainder == 0)
-		return;
-	/*
-	 * Note: "remainder" can never be negative, since we check for
-	 * 	this in the XDR code.
-	 */
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	pglen = PAGE_CACHE_SIZE - base;
-	for (;;) {
-		if (remainder <= pglen) {
-			zero_user(*pages, base, remainder);
-			break;
-		}
-		zero_user(*pages, base, pglen);
-		pages++;
-		remainder -= pglen;
-		pglen = PAGE_CACHE_SIZE;
-		base = 0;
-	}
-}
-
 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
 		struct inode *inode)
 {
@@ -170,6 +163,54 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+/* Note io was page aligned */
+void nfs_read_completion(struct nfs_pgio_header *hdr)
+{
+	unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
+
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out;
+	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+	    test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+		/* Note they can't be equal */
+		if (hdr->eof < hdr->first_error)
+			clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
+		else
+			clear_bit(NFS_IOHDR_EOF, &hdr->flags);
+	}
+
+	if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+			if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+				if (pos > hdr->eof)
+					zero_user(req->wb_page, 0, PAGE_SIZE);
+				else if (hdr->eof - pos < PAGE_SIZE)
+					zero_user_segment(req->wb_page,
+							  hdr->eof & ~PAGE_MASK,
+							  PAGE_SIZE);
+			}
+			SetPageUptodate(req->wb_page);
+			nfs_list_remove_request(req);
+			nfs_readpage_release(req);
+			pos += PAGE_SIZE;
+		}
+	} else {
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+			if (pos < (hdr->first_error & PAGE_MASK))
+				SetPageUptodate(req->wb_page);
+			nfs_list_remove_request(req);
+			nfs_readpage_release(req);
+			pos += PAGE_SIZE;
+		}
+	}
+out:
+	hdr->release(hdr);
+}
+
 int nfs_initiate_read(struct rpc_clnt *clnt,
 		      struct nfs_read_data *data,
 		      const struct rpc_call_ops *call_ops)
@@ -214,16 +255,12 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_read_data *data,
 		unsigned int count, unsigned int offset)
 {
-	struct inode *inode = data->header->inode;
-
-	data->header->req	  = req;
-	data->header->inode	  = inode;
-	data->header->cred	  = req->wb_context->cred;
+	struct nfs_page *req = data->header->req;
 
-	data->args.fh     = NFS_FH(inode);
+	data->args.fh     = NFS_FH(data->header->inode);
 	data->args.offset = req_offset(req) + offset;
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pages.pagevec;
@@ -255,7 +292,7 @@ nfs_do_multiple_reads(struct list_head *head,
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_entry(head->next, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_read_data, list);
 		list_del_init(&data->list);
 
 		ret2 = nfs_do_read(data, call_ops);
@@ -265,7 +302,7 @@ nfs_do_multiple_reads(struct list_head *head,
 	return ret;
 }
 
-static void
+void
 nfs_async_read_error(struct list_head *head)
 {
 	struct nfs_page	*req;
@@ -290,11 +327,11 @@ nfs_async_read_error(struct list_head *head)
  * won't see the new data until our attribute cache is updated.  This is more
  * or less conventional NFS client behavior.
  */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
+			    struct nfs_pgio_header *hdr)
 {
-	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_read_header *rhdr;
 	struct nfs_read_data *data;
 	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -302,85 +339,101 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	int ret = 0;
 
 	nfs_list_remove_request(req);
+	nfs_list_add_request(req, &hdr->pages);
 
 	offset = 0;
 	nbytes = desc->pg_count;
 	do {
 		size_t len = min(nbytes,rsize);
 
-		rhdr = nfs_readhdr_alloc(1);
-		if (!rhdr)
+		data = nfs_readdata_alloc(NULL, 1);
+		if (!data)
 			goto out_bad;
-		data = &rhdr->rpc_data;
+		data->header = hdr;
+		atomic_inc(&hdr->refcnt);
 		data->pages.pagevec[0] = page;
-		nfs_read_rpcsetup(req, data, len, offset);
-		list_add(&data->list, res);
+		nfs_read_rpcsetup(data, len, offset);
+		list_add(&data->list, &hdr->rpc_list);
 		requests++;
 		nbytes -= len;
 		offset += len;
 	} while(nbytes != 0);
-	atomic_set(&req->wb_complete, requests);
-	desc->pg_rpc_callops = &nfs_read_partial_ops;
+	desc->pg_rpc_callops = &nfs_read_common_ops;
 	return ret;
 out_bad:
-	while (!list_empty(res)) {
-		data = list_entry(res->next, struct nfs_read_data, list);
+	while (!list_empty(&hdr->rpc_list)) {
+		data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list);
 		list_del(&data->list);
 		nfs_readdata_release(data);
 	}
-	nfs_readpage_release(req);
+	nfs_async_read_error(&hdr->pages);
 	return -ENOMEM;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
+			  struct nfs_pgio_header *hdr)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_read_header	*rhdr;
-	struct nfs_read_data	*data;
+	struct nfs_read_data    *data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base,
-						    desc->pg_count));
-	if (!rhdr) {
+	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+							  desc->pg_count));
+	if (!data) {
 		nfs_async_read_error(head);
 		ret = -ENOMEM;
 		goto out;
 	}
+	data->header = hdr;
+	atomic_inc(&hdr->refcnt);
 
-	data = &rhdr->rpc_data;
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &rhdr->header.pages);
+		nfs_list_add_request(req, &hdr->pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(rhdr->header.pages.next);
 
-	nfs_read_rpcsetup(req, data, desc->pg_count, 0);
-	list_add(&data->list, res);
-	desc->pg_rpc_callops = &nfs_read_full_ops;
+	nfs_read_rpcsetup(data, desc->pg_count, 0);
+	list_add(&data->list, &hdr->rpc_list);
+	desc->pg_rpc_callops = &nfs_read_common_ops;
 out:
 	return ret;
 }
 
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+		       struct nfs_pgio_header *hdr)
 {
 	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_pagein_multi(desc, head);
-	return nfs_pagein_one(desc, head);
+		return nfs_pagein_multi(desc, hdr);
+	return nfs_pagein_one(desc, hdr);
 }
 
 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_read_header *rhdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_pagein(desc, &head);
+	rhdr = nfs_readhdr_alloc();
+	if (!rhdr) {
+		nfs_async_read_error(&desc->pg_list);
+		return -ENOMEM;
+	}
+	hdr = &rhdr->header;
+	nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_pagein(desc, hdr);
 	if (ret == 0)
-		ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+		ret = nfs_do_multiple_reads(&hdr->rpc_list,
+					    desc->pg_rpc_callops);
+	else
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_read_completion(hdr);
 	return ret;
 }
 
@@ -419,15 +472,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 
-	if (resp->eof || resp->count == argp->count)
-		return;
-
 	/* This is a short read! */
 	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
-	if (resp->count == 0)
+	if (resp->count == 0) {
+		nfs_set_pgio_error(data->header, -EIO, argp->offset);
 		return;
-
+	}
 	/* Yes, so retry the read at the end of the data */
 	data->mds_offset += resp->count;
 	argp->offset += resp->count;
@@ -436,38 +487,32 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 	rpc_restart_call_prepare(task);
 }
 
-/*
- * Handle a read reply that fills part of a page.
- */
-static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
- 
+	struct nfs_pgio_header *hdr = data->header;
+
+	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
 	if (nfs_readpage_result(task, data) != 0)
 		return;
 	if (task->tk_status < 0)
-		return;
+		nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
+	else if (data->res.eof) {
+		unsigned long bound;
 
-	nfs_readpage_truncate_uninitialised_page(data);
-	nfs_readpage_retry(task, data);
+		bound = data->args.offset + data->res.count;
+		spin_lock(&hdr->lock);
+		if (test_and_set_bit(NFS_IOHDR_EOF, &hdr->flags))
+			bound = min(bound, hdr->eof);
+		hdr->eof = bound;
+		spin_unlock(&hdr->lock);
+	} else if (data->res.count != data->args.count)
+		nfs_readpage_retry(task, data);
 }
 
-static void nfs_readpage_release_partial(void *calldata)
+static void nfs_readpage_release_common(void *calldata)
 {
-	struct nfs_read_data *data = calldata;
-	struct nfs_page *req = data->header->req;
-	struct page *page = req->wb_page;
-	int status = data->task.tk_status;
-
-	if (status < 0)
-		set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
-
-	if (atomic_dec_and_test(&req->wb_complete)) {
-		if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
-			SetPageUptodate(page);
-		nfs_readpage_release(req);
-	}
-	nfs_readdata_release(data);
+	nfs_readdata_release(calldata);
 }
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
@@ -476,75 +521,10 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 }
 
-static const struct rpc_call_ops nfs_read_partial_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_readpage_result_partial,
-	.rpc_release = nfs_readpage_release_partial,
-};
-
-static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
-{
-	unsigned int count = data->res.count;
-	unsigned int base = data->args.pgbase;
-	struct page **pages;
-
-	if (data->res.eof)
-		count = data->args.count;
-	if (unlikely(count == 0))
-		return;
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	count += base;
-	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-		SetPageUptodate(*pages);
-	if (count == 0)
-		return;
-	/* Was this a short read? */
-	if (data->res.eof || data->res.count == data->args.count)
-		SetPageUptodate(*pages);
-}
-
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
-{
-	struct nfs_read_data *data = calldata;
-
-	if (nfs_readpage_result(task, data) != 0)
-		return;
-	if (task->tk_status < 0)
-		return;
-	/*
-	 * Note: nfs_readpage_retry may change the values of
-	 * data->args. In the multi-page case, we therefore need
-	 * to ensure that we call nfs_readpage_set_pages_uptodate()
-	 * first.
-	 */
-	nfs_readpage_truncate_uninitialised_page(data);
-	nfs_readpage_set_pages_uptodate(data);
-	nfs_readpage_retry(task, data);
-}
-
-static void nfs_readpage_release_full(void *calldata)
-{
-	struct nfs_read_data *data = calldata;
-	struct nfs_pgio_header *hdr = data->header;
-
-	while (!list_empty(&hdr->pages)) {
-		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
-
-		nfs_list_remove_request(req);
-		nfs_readpage_release(req);
-	}
-	nfs_readdata_release(calldata);
-}
-
-static const struct rpc_call_ops nfs_read_full_ops = {
+static const struct rpc_call_ops nfs_read_common_ops = {
 	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_readpage_result_full,
-	.rpc_release = nfs_readpage_release_full,
+	.rpc_call_done = nfs_readpage_result_common,
+	.rpc_release = nfs_readpage_release_common,
 };
 
 /*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eac30d6..5c52034 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,7 +27,6 @@ enum {
 	PG_CLEAN,
 	PG_NEED_COMMIT,
 	PG_NEED_RESCHED,
-	PG_PARTIAL_READ_FAILED,
 	PG_COMMIT_TO_DS,
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 060896f..386ebfb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1182,14 +1182,30 @@ struct nfs_read_data {
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
+/* used as flag bits in nfs_pgio_header */
+enum {
+	NFS_IOHDR_ERROR = 0,
+	NFS_IOHDR_EOF,
+	NFS_IOHDR_REDO,
+};
+
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
+	struct list_head	rpc_list;
+	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct pnfs_layout_segment *lseg;
 	const struct rpc_call_ops *mds_ops;
+	void (*release) (struct nfs_pgio_header *hdr);
+	spinlock_t		lock;
+	/* fields protected by lock */
 	int			pnfs_error;
+	int			error;		/* merge with pnfs_error */
+	unsigned long		eof;		/* where saw eof marker */
+	unsigned long		first_error;	/* boundary of good data */
+	unsigned long		flags;
 };
 
 struct nfs_read_header {
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 15/26] NFS: merge _full and _partial write rpc_ops
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (13 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 14/26] NFS: merge _full and _partial read rpc_ops Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 16/26] NFS: create completion structure to pass into page_init functions Fred Isaman
                   ` (10 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Decouple nfs_pgio_header and nfs_write_data, and have (possibly
multiple) nfs_write_datas each take a refcount on nfs_pgio_header.

For the moment keeps nfs_write_header as a way to preallocate a single
nfs_write_data with the nfs_pgio_header.  The code doesn't need this,
and would be prettier without, but given the amount of churn I am
already introducing I didn't want to play with tuning new mempools.

This also fixes bug in pnfs_ld_handle_write_error.  In the case of
desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing
replay attempt to do nothing.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c         |   10 +-
 fs/nfs/internal.h       |    8 +-
 fs/nfs/nfs4filelayout.c |    1 -
 fs/nfs/nfs4proc.c       |    4 +-
 fs/nfs/pnfs.c           |   58 +++++---
 fs/nfs/write.c          |  379 +++++++++++++++++++++--------------------------
 include/linux/nfs_xdr.h |    2 +
 7 files changed, 224 insertions(+), 238 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 90b00ce..22a40c4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -768,11 +768,17 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(wsize,count);
 
 		result = -ENOMEM;
-		whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes));
+		whdr = nfs_writehdr_alloc();
 		if (unlikely(!whdr))
 			break;
 
-		data = &whdr->rpc_data;
+		data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes));
+		if (!data) {
+			nfs_writehdr_free(&whdr->header);
+			break;
+		}
+		data->header = &whdr->header;
+		atomic_inc(&data->header->refcnt);
 		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c4abe2b..722f521 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -320,10 +320,14 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
-extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages);
+extern void nfs_async_write_error(struct list_head *head);
+extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
+extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+						  unsigned int pagecount);
+extern void nfs_write_completion(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
-		struct list_head *head);
+			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index d27035b..2c78a46 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -314,7 +314,6 @@ static void filelayout_write_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 
-	put_lseg(wdata->header->lseg);
 	wdata->header->mds_ops->rpc_release(data);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 16a3877..6365b02 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3376,8 +3376,6 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
 	struct inode *inode = hdr->inode;
 
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(hdr->lseg);
-	hdr->lseg        = NULL;
 	data->ds_clp     = NULL;
 	data->write_done_cb = nfs4_write_done_cb;
 	data->args.fh       = NFS_FH(inode);
@@ -3393,7 +3391,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
-	if (data->header->lseg) {
+	if (data->ds_clp) {
 		data->args.bitmask = NULL;
 		data->res.fattr = NULL;
 	} else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5947a90..2b89b54 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1199,7 +1199,9 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages);
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+								&hdr->pages);
 }
 
 /*
@@ -1214,7 +1216,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
 		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_write_error(data);
-	put_lseg(hdr->lseg);
 	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1225,12 +1226,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_pgio_header *hdr = data->header;
 
-	list_splice_tail_init(&hdr->pages, &desc->pg_list);
-	if (hdr->req && list_empty(&hdr->req->wb_list))
-		nfs_list_add_request(hdr->req, &desc->pg_list);
-	nfs_pageio_reset_write_mds(desc);
-	desc->pg_recoalesce = 1;
-	put_lseg(hdr->lseg);
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+		list_splice_tail_init(&hdr->pages, &desc->pg_list);
+		nfs_pageio_reset_write_mds(desc);
+		desc->pg_recoalesce = 1;
+	}
 	nfs_writedata_release(data);
 }
 
@@ -1246,18 +1246,12 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
 	hdr->mds_ops = call_ops;
-	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
 		inode->i_ino, wdata->args.count, wdata->args.offset, how);
-
 	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
-	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(hdr->lseg);
-		hdr->lseg = NULL;
-	} else
+	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
-
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
 	return trypnfs;
 }
@@ -1273,7 +1267,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_entry(head->next, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_write_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1283,20 +1277,40 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 	put_lseg(lseg);
 }
 
+static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
+{
+	put_lseg(hdr->lseg);
+	nfs_writehdr_free(hdr);
+}
+
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_write_header *whdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_flush(desc, &head);
-	if (ret != 0) {
+	whdr = nfs_writehdr_alloc();
+	if (!whdr) {
+		nfs_async_write_error(&desc->pg_list);
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
-		return ret;
+		return -ENOMEM;
 	}
-	pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
-	return 0;
+	hdr = &whdr->header;
+	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
+	hdr->lseg = get_lseg(desc->pg_lseg);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_flush(desc, hdr);
+	if (ret != 0) {
+		put_lseg(desc->pg_lseg);
+		desc->pg_lseg = NULL;
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	} else
+		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_write_completion(hdr);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2efae04..cde0fa2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,8 +42,7 @@
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
 				  struct inode *inode, int ioflags);
 static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_partial_ops;
-static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
@@ -69,24 +68,41 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
 	if (p) {
 		struct nfs_pgio_header *hdr = &p->header;
-		struct nfs_write_data *data = &p->rpc_data;
 
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&data->list);
-		data->header = hdr;
+		INIT_LIST_HEAD(&hdr->rpc_list);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
+	}
+	return p;
+}
+
+struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+					   unsigned int pagecount)
+{
+	struct nfs_write_data *data;
+
+	if (hdr) {
+		/* Use data preallocated with the header */
+		data = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+	} else
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+
+	if (data) {
 		if (!nfs_pgarray_set(&data->pages, pagecount)) {
-			mempool_free(p, nfs_wdata_mempool);
-			p = NULL;
+			if (!hdr)
+				kfree(data);
+			data = NULL;
 		}
 	}
-	return p;
+	return data;
 }
 
 void nfs_writehdr_free(struct nfs_pgio_header *hdr)
@@ -97,10 +113,16 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
+	struct nfs_pgio_header *hdr = wdata->header;
+
 	put_nfs_open_context(wdata->args.context);
 	if (wdata->pages.pagevec != wdata->pages.page_array)
 		kfree(wdata->pages.pagevec);
-	nfs_writehdr_free(wdata->header);
+	if (container_of(hdr, struct nfs_write_header, header) !=
+	    container_of(wdata, struct nfs_write_header, rpc_data))
+		kfree(wdata);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_write_completion(hdr);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -511,20 +533,6 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 	return data->verf.committed != NFS_FILE_SYNC;
 }
 
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
-				  struct nfs_write_data *data)
-{
-	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-		nfs_mark_request_commit(req, data->header->lseg);
-		return 1;
-	}
-	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-		nfs_mark_request_dirty(req);
-		return 1;
-	}
-	return 0;
-}
 #else
 static void
 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
@@ -542,13 +550,43 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 	return 0;
 }
 
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
-				  struct nfs_write_data *data)
+#endif
+
+void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
-	return 0;
+	unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
+
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out;
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+		struct page *page = req->wb_page;
+
+		nfs_list_remove_request(req);
+		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+		    (pos >= (hdr->first_error & PAGE_MASK))) {
+			nfs_set_pageerror(page);
+			nfs_context_set_write_error(req->wb_context, hdr->error);
+			goto remove_req;
+		}
+		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+			nfs_mark_request_dirty(req);
+			goto next;
+		}
+		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+			nfs_mark_request_commit(req, hdr->lseg);
+			goto next;
+		}
+remove_req:
+		nfs_inode_remove_request(req);
+next:
+		nfs_unlock_request(req);
+		nfs_end_page_writeback(page);
+		pos += PAGE_SIZE;
+	}
+out:
+	hdr->release(hdr);
 }
-#endif
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
@@ -813,17 +851,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 	return status;
 }
 
-static void nfs_writepage_release(struct nfs_page *req,
-				  struct nfs_write_data *data)
-{
-	struct page *page = req->wb_page;
-
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
-		nfs_inode_remove_request(req);
-	nfs_unlock_request(req);
-	nfs_end_page_writeback(page);
-}
-
 static int flush_task_priority(int how)
 {
 	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -890,22 +917,16 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_write_rpcsetup(struct nfs_page *req,
-		struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_write_data *data,
 		unsigned int count, unsigned int offset,
 		int how)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct nfs_page *req = data->header->req;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
-	hdr->req = req;
-	hdr->inode = inode = req->wb_context->dentry->d_inode;
-	hdr->cred = req->wb_context->cred;
-
-	data->args.fh     = NFS_FH(inode);
+	data->args.fh     = NFS_FH(data->header->inode);
 	data->args.offset = req_offset(req) + offset;
 	/* pnfs_set_layoutcommit needs this */
 	data->mds_offset = data->args.offset;
@@ -919,7 +940,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	case 0:
 		break;
 	case FLUSH_COND_STABLE:
-		if (nfs_need_commit(NFS_I(inode)))
+		if (nfs_need_commit(NFS_I(data->header->inode)))
 			break;
 	default:
 		data->args.stable = NFS_FILE_SYNC;
@@ -950,7 +971,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_entry(head->next, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_write_data, list);
 		list_del_init(&data->list);
 		
 		ret2 = nfs_do_write(data, call_ops, how);
@@ -973,15 +994,26 @@ static void nfs_redirty_request(struct nfs_page *req)
 	nfs_end_page_writeback(page);
 }
 
+void nfs_async_write_error(struct list_head *head)
+{
+	struct nfs_page	*req;
+
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_redirty_request(req);
+	}
+}
+
 /*
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
+			   struct nfs_pgio_header *hdr)
 {
-	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_write_header *whdr;
 	struct nfs_write_data *data;
 	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -989,6 +1021,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	int ret = 0;
 
 	nfs_list_remove_request(req);
+	nfs_list_add_request(req, &hdr->pages);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
@@ -1001,28 +1034,29 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	do {
 		size_t len = min(nbytes, wsize);
 
-		whdr = nfs_writehdr_alloc(1);
-		if (!whdr)
+		data = nfs_writedata_alloc(NULL, 1);
+		if (!data)
 			goto out_bad;
-		data = &whdr->rpc_data;
+		data->header = hdr;
+		atomic_inc(&hdr->refcnt);
 		data->pages.pagevec[0] = page;
-		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
-		list_add(&data->list, res);
+		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags);
+		list_add(&data->list, &hdr->rpc_list);
 		requests++;
 		nbytes -= len;
 		offset += len;
 	} while (nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
-	desc->pg_rpc_callops = &nfs_write_partial_ops;
+	desc->pg_rpc_callops = &nfs_write_common_ops;
 	return ret;
 
 out_bad:
-	while (!list_empty(res)) {
-		data = list_entry(res->next, struct nfs_write_data, list);
+	while (!list_empty(&hdr->rpc_list)) {
+		data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list);
 		list_del(&data->list);
 		nfs_writedata_release(data);
 	}
-	nfs_redirty_request(req);
+	nfs_async_write_error(&hdr->pages);
 	return -ENOMEM;
 }
 
@@ -1034,64 +1068,76 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
+			 struct nfs_pgio_header *hdr)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_write_header	*whdr;
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base,
-						     desc->pg_count));
-	if (!whdr) {
-		while (!list_empty(head)) {
-			req = nfs_list_entry(head->next);
-			nfs_list_remove_request(req);
-			nfs_redirty_request(req);
-		}
+	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+							   desc->pg_count));
+	if (!data) {
+		nfs_async_write_error(head);
 		ret = -ENOMEM;
 		goto out;
 	}
-	data = &whdr->rpc_data;
+	data->header = hdr;
+	atomic_inc(&hdr->refcnt);
+
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &whdr->header.pages);
+		nfs_list_add_request(req, &hdr->pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(whdr->header.pages.next);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
-	list_add(&data->list, res);
-	desc->pg_rpc_callops = &nfs_write_full_ops;
+	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags);
+	list_add(&data->list, &hdr->rpc_list);
+	desc->pg_rpc_callops = &nfs_write_common_ops;
 out:
 	return ret;
 }
 
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+		      struct nfs_pgio_header *hdr)
 {
 	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_flush_multi(desc, head);
-	return nfs_flush_one(desc, head);
+		return nfs_flush_multi(desc, hdr);
+	return nfs_flush_one(desc, hdr);
 }
 
 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_write_header *whdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_flush(desc, &head);
+	whdr = nfs_writehdr_alloc();
+	if (!whdr) {
+		nfs_async_write_error(&desc->pg_list);
+		return -ENOMEM;
+	}
+	hdr = &whdr->header;
+	nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_flush(desc, hdr);
 	if (ret == 0)
-		ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
-				desc->pg_ioflags);
+		ret = nfs_do_multiple_writes(&hdr->rpc_list,
+					     desc->pg_rpc_callops,
+					     desc->pg_ioflags);
+	else
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_write_completion(hdr);
 	return ret;
 }
 
@@ -1121,62 +1167,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 		nfs_pageio_init_write_mds(pgio, inode, ioflags);
 }
 
-/*
- * Handle a write reply that flushed part of a page.
- */
-static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
-{
-	struct nfs_write_data	*data = calldata;
-
-	dprintk("NFS: %5u write(%s/%lld %d@%lld)",
-		task->tk_pid,
-		data->header->inode->i_sb->s_id,
-		(long long)
-		  NFS_FILEID(data->header->inode),
-		data->header->req->wb_bytes,
-		(long long)req_offset(data->header->req));
-
-	nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_partial(void *calldata)
-{
-	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req = data->header->req;
-	struct page		*page = req->wb_page;
-	int status = data->task.tk_status;
-
-	if (status < 0) {
-		nfs_set_pageerror(page);
-		nfs_context_set_write_error(req->wb_context, status);
-		dprintk(", error = %d\n", status);
-		goto out;
-	}
-
-	if (nfs_write_need_commit(data)) {
-		struct inode *inode = page->mapping->host;
-
-		spin_lock(&inode->i_lock);
-		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-			/* Do nothing we need to resend the writes */
-		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			dprintk(" defer commit\n");
-		} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
-			set_bit(PG_NEED_RESCHED, &req->wb_flags);
-			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
-			dprintk(" server reboot detected\n");
-		}
-		spin_unlock(&inode->i_lock);
-	} else
-		dprintk(" OK\n");
-
-out:
-	if (atomic_dec_and_test(&req->wb_complete))
-		nfs_writepage_release(req, data);
-	nfs_writedata_release(data);
-}
-
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -1190,12 +1180,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-static const struct rpc_call_ops nfs_write_partial_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
-	.rpc_call_done = nfs_writeback_done_partial,
-	.rpc_release = nfs_writeback_release_partial,
-};
-
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1203,60 +1187,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
  *	  writebacks since the page->count is kept > 1 for as long
  *	  as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 
 	nfs_writeback_done(task, data);
 }
 
-static void nfs_writeback_release_full(void *calldata)
+static void nfs_writeback_release_common(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
+	struct nfs_page *req = hdr->req;
 
-	/* Update attributes as result of writeback. */
-	while (!list_empty(&hdr->pages)) {
-		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
-		struct page *page = req->wb_page;
-
-		nfs_list_remove_request(req);
-
-		dprintk("NFS: %5u write (%s/%lld %d@%lld)",
-			data->task.tk_pid,
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
-			req->wb_bytes,
-			(long long)req_offset(req));
-
-		if (status < 0) {
-			nfs_set_pageerror(page);
-			nfs_context_set_write_error(req->wb_context, status);
-			dprintk(", error = %d\n", status);
-			goto remove_request;
-		}
-
-		if (nfs_write_need_commit(data)) {
+	if ((status >= 0) && nfs_write_need_commit(data)) {
+		spin_lock(&hdr->lock);
+		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
+			; /* Do nothing */
+		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			nfs_mark_request_commit(req, hdr->lseg);
-			dprintk(" marked for commit\n");
-			goto next;
-		}
-		dprintk(" OK\n");
-remove_request:
-		nfs_inode_remove_request(req);
-	next:
-		nfs_unlock_request(req);
-		nfs_end_page_writeback(page);
+		else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
+		spin_unlock(&hdr->lock);
 	}
 	nfs_writedata_release(data);
 }
 
-static const struct rpc_call_ops nfs_write_full_ops = {
+static const struct rpc_call_ops nfs_write_common_ops = {
 	.rpc_call_prepare = nfs_write_prepare,
-	.rpc_call_done = nfs_writeback_done_full,
-	.rpc_release = nfs_writeback_release_full,
+	.rpc_call_done = nfs_writeback_done_common,
+	.rpc_release = nfs_writeback_release_common,
 };
 
 
@@ -1307,38 +1268,40 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		}
 	}
 #endif
-	/* Is this a short write? */
-	if (task->tk_status >= 0 && resp->count < argp->count) {
+	if (task->tk_status < 0)
+		nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
+	else if (resp->count < argp->count) {
 		static unsigned long    complain;
 
+		/* This a short write! */
 		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
-		if (resp->count != 0) {
-			/* Was this an NFSv2 write or an NFSv3 stable write? */
-			if (resp->verf->committed != NFS_UNSTABLE) {
-				/* Resend from where the server left off */
-				data->mds_offset += resp->count;
-				argp->offset += resp->count;
-				argp->pgbase += resp->count;
-				argp->count -= resp->count;
-			} else {
-				/* Resend as a stable write in order to avoid
-				 * headaches in the case of a server crash.
-				 */
-				argp->stable = NFS_FILE_SYNC;
+		if (resp->count == 0) {
+			if (time_before(complain, jiffies)) {
+				printk(KERN_WARNING
+				       "NFS: Server wrote zero bytes, expected %u.\n",
+				       argp->count);
+				complain = jiffies + 300 * HZ;
 			}
-			rpc_restart_call_prepare(task);
+			nfs_set_pgio_error(data->header, -EIO, argp->offset);
+			task->tk_status = -EIO;
 			return;
 		}
-		if (time_before(complain, jiffies)) {
-			printk(KERN_WARNING
-			       "NFS: Server wrote zero bytes, expected %u.\n",
-					argp->count);
-			complain = jiffies + 300 * HZ;
+		/* Was this an NFSv2 write or an NFSv3 stable write? */
+		if (resp->verf->committed != NFS_UNSTABLE) {
+			/* Resend from where the server left off */
+			data->mds_offset += resp->count;
+			argp->offset += resp->count;
+			argp->pgbase += resp->count;
+			argp->count -= resp->count;
+		} else {
+			/* Resend as a stable write in order to avoid
+			 * headaches in the case of a server crash.
+			 */
+			argp->stable = NFS_FILE_SYNC;
 		}
-		/* Can't do anything about it except throw an error. */
-		task->tk_status = -EIO;
+		rpc_restart_call_prepare(task);
 	}
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 386ebfb..2bbb7c7 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1187,6 +1187,8 @@ enum {
 	NFS_IOHDR_ERROR = 0,
 	NFS_IOHDR_EOF,
 	NFS_IOHDR_REDO,
+	NFS_IOHDR_NEED_COMMIT,
+	NFS_IOHDR_NEED_RESCHED,
 };
 
 struct nfs_pgio_header {
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 16/26] NFS: create completion structure to pass into page_init functions
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (14 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 15/26] NFS: merge _full and _partial write rpc_ops Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-16 21:07   ` Myklebust, Trond
  2012-04-09 20:52 ` [PATCH 17/26] NFS: remove unused wb_complete field from struct nfs_page Fred Isaman
                   ` (9 subsequent siblings)
  25 siblings, 1 reply; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Factors out the code that will need to change when directio
starts using these code paths.  This will allow directio to use
the generic pagein and flush routines

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/internal.h        |   11 +++++------
 fs/nfs/pagelist.c        |    3 +++
 fs/nfs/pnfs.c            |   39 +++++++++++++++++++++++++--------------
 fs/nfs/pnfs.h            |    6 ++++--
 fs/nfs/read.c            |   36 ++++++++++++++++++++++--------------
 fs/nfs/write.c           |   41 ++++++++++++++++++++++++++---------------
 include/linux/nfs_page.h |    2 ++
 include/linux/nfs_xdr.h  |    6 ++++++
 8 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 722f521..efa5124 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -301,11 +301,10 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
+struct nfs_io_completion_ops;
 /* read.c */
-extern void nfs_async_read_error(struct list_head *head);
 extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
-extern void nfs_read_completion(struct nfs_pgio_header *hdr);
 extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 						unsigned int pagecount);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
@@ -315,21 +314,21 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
 			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
-		struct inode *inode);
+				struct inode *inode,
+				const struct nfs_io_completion_ops *compl_ops);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
-extern void nfs_async_write_error(struct list_head *head);
 extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
 						  unsigned int pagecount);
-extern void nfs_write_completion(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags);
+				struct inode *inode, int ioflags,
+				const struct nfs_io_completion_ops *compl_ops);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 16b8a04..e2c2af0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -48,6 +48,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->cred = hdr->req->wb_context->cred;
 	hdr->first_error = req_offset(hdr->req) + desc->pg_count;
 	hdr->release = release;
+	hdr->compl_ops = desc->pg_compl_ops;
 }
 
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error,
@@ -238,6 +239,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     struct inode *inode,
 		     const struct nfs_pageio_ops *pg_ops,
+		     const struct nfs_io_completion_ops *compl_ops,
 		     size_t bsize,
 		     int io_flags)
 {
@@ -250,6 +252,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_recoalesce = 0;
 	desc->pg_inode = inode;
 	desc->pg_ops = pg_ops;
+	desc->pg_compl_ops = compl_ops;
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2b89b54..02dd84b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1113,26 +1113,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
 bool
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+		      const struct nfs_io_completion_ops *compl_ops)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
 	if (ld == NULL)
 		return false;
-	nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+	nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
+			server->rsize, 0);
 	return true;
 }
 
 bool
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+		       int ioflags,
+		       const struct nfs_io_completion_ops *compl_ops)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
 	if (ld == NULL)
 		return false;
-	nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+	nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
+			server->wsize, ioflags);
 	return true;
 }
 
@@ -1162,13 +1167,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
-static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
+static int pnfs_write_done_resend_to_mds(struct inode *inode,
+				struct list_head *head,
+				const struct nfs_io_completion_ops *compl_ops)
 {
 	struct nfs_pageio_descriptor pgio;
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
+	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 
@@ -1201,7 +1208,8 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
 		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
-								&hdr->pages);
+								&hdr->pages,
+								hdr->compl_ops);
 }
 
 /*
@@ -1292,7 +1300,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 
 	whdr = nfs_writehdr_alloc();
 	if (!whdr) {
-		nfs_async_write_error(&desc->pg_list);
+		desc->pg_compl_ops->error_cleanup(&hdr->pages);
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return -ENOMEM;
@@ -1309,18 +1317,20 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	} else
 		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_write_completion(hdr);
+		hdr->compl_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
-static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head)
+static int pnfs_read_done_resend_to_mds(struct inode *inode,
+				struct list_head *head,
+				const struct nfs_io_completion_ops *compl_ops)
 {
 	struct nfs_pageio_descriptor pgio;
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read_mds(&pgio, inode);
+	nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 
@@ -1349,7 +1359,8 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
 		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
-								&hdr->pages);
+								&hdr->pages,
+								hdr->compl_ops);
 }
 
 /*
@@ -1443,7 +1454,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 
 	rhdr = nfs_readhdr_alloc();
 	if (!rhdr) {
-		nfs_async_read_error(&desc->pg_list);
+		desc->pg_compl_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
@@ -1461,7 +1472,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	} else
 		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_read_completion(hdr);
+		hdr->compl_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf6..465e15e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -168,8 +168,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 
-bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
-bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
+			   const struct nfs_io_completion_ops *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
+			    int, const struct nfs_io_completion_ops *);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ea844b2..4704f120 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,6 +31,7 @@
 
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_common_ops;
+static const struct nfs_io_completion_ops nfs_asynch_read_compl_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
@@ -89,7 +90,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
 	    container_of(rdata, struct nfs_read_header, rpc_data))
 		kfree(rdata);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_read_completion(hdr);
+		hdr->compl_ops->completion(hdr);
 }
 
 static
@@ -102,9 +103,10 @@ int nfs_return_empty_page(struct page *page)
 }
 
 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
-		struct inode *inode)
+			      struct inode *inode,
+			      const struct nfs_io_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
 			NFS_SERVER(inode)->rsize, 0);
 }
 
@@ -116,10 +118,11 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
 static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-		struct inode *inode)
+				 struct inode *inode,
+				 const struct nfs_io_completion_ops *compl_ops)
 {
-	if (!pnfs_pageio_init_read(pgio, inode))
-		nfs_pageio_init_read_mds(pgio, inode);
+	if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
+		nfs_pageio_init_read_mds(pgio, inode, compl_ops);
 }
 
 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -140,7 +143,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-	nfs_pageio_init_read(&pgio, inode);
+	nfs_pageio_init_read(&pgio, inode, &nfs_asynch_read_compl_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
 	return 0;
@@ -164,7 +167,7 @@ static void nfs_readpage_release(struct nfs_page *req)
 }
 
 /* Note io was page aligned */
-void nfs_read_completion(struct nfs_pgio_header *hdr)
+static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
 
@@ -302,7 +305,7 @@ nfs_do_multiple_reads(struct list_head *head,
 	return ret;
 }
 
-void
+static void
 nfs_async_read_error(struct list_head *head)
 {
 	struct nfs_page	*req;
@@ -314,6 +317,11 @@ nfs_async_read_error(struct list_head *head)
 	}
 }
 
+static const struct nfs_io_completion_ops nfs_asynch_read_compl_ops = {
+	.error_cleanup = nfs_async_read_error,
+	.completion = nfs_read_completion,
+};
+
 /*
  * Generate multiple requests to fill a single page.
  *
@@ -366,7 +374,7 @@ out_bad:
 		list_del(&data->list);
 		nfs_readdata_release(data);
 	}
-	nfs_async_read_error(&hdr->pages);
+	desc->pg_compl_ops->error_cleanup(&hdr->pages);
 	return -ENOMEM;
 }
 
@@ -382,7 +390,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
 	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
 							  desc->pg_count));
 	if (!data) {
-		nfs_async_read_error(head);
+		desc->pg_compl_ops->error_cleanup(head);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -420,7 +428,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 
 	rhdr = nfs_readhdr_alloc();
 	if (!rhdr) {
-		nfs_async_read_error(&desc->pg_list);
+		desc->pg_compl_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
 	hdr = &rhdr->header;
@@ -433,7 +441,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	else
 		set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_read_completion(hdr);
+		hdr->compl_ops->completion(hdr);
 	return ret;
 }
 
@@ -656,7 +664,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	nfs_pageio_init_read(&pgio, inode);
+	nfs_pageio_init_read(&pgio, inode, &nfs_asynch_read_compl_ops);
 
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cde0fa2..4d9bbaf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,10 +40,12 @@
  * Local function declarations
  */
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
-				  struct inode *inode, int ioflags);
+				  struct inode *inode, int ioflags,
+				  const struct nfs_io_completion_ops *compl_ops);
 static void nfs_redirty_request(struct nfs_page *req);
 static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
+static const struct nfs_io_completion_ops nfs_asynch_write_compl_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -122,7 +124,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
 	    container_of(wdata, struct nfs_write_header, rpc_data))
 		kfree(wdata);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_write_completion(hdr);
+		hdr->compl_ops->completion(hdr);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -331,7 +333,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+			      &nfs_asynch_write_compl_ops);
 	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
@@ -374,7 +377,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
+	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+			      &nfs_asynch_write_compl_ops);
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
@@ -552,7 +556,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 
 #endif
 
-void nfs_write_completion(struct nfs_pgio_header *hdr)
+static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
 
@@ -994,7 +998,7 @@ static void nfs_redirty_request(struct nfs_page *req)
 	nfs_end_page_writeback(page);
 }
 
-void nfs_async_write_error(struct list_head *head)
+static void nfs_async_write_error(struct list_head *head)
 {
 	struct nfs_page	*req;
 
@@ -1005,6 +1009,11 @@ void nfs_async_write_error(struct list_head *head)
 	}
 }
 
+static const struct nfs_io_completion_ops nfs_asynch_write_compl_ops = {
+	.error_cleanup = nfs_async_write_error,
+	.completion = nfs_write_completion,
+};
+
 /*
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
@@ -1056,7 +1065,7 @@ out_bad:
 		list_del(&data->list);
 		nfs_writedata_release(data);
 	}
-	nfs_async_write_error(&hdr->pages);
+	desc->pg_compl_ops->error_cleanup(&hdr->pages);
 	return -ENOMEM;
 }
 
@@ -1080,7 +1089,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
 							   desc->pg_count));
 	if (!data) {
-		nfs_async_write_error(head);
+		desc->pg_compl_ops->error_cleanup(head);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1123,7 +1132,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 
 	whdr = nfs_writehdr_alloc();
 	if (!whdr) {
-		nfs_async_write_error(&desc->pg_list);
+		desc->pg_compl_ops->error_cleanup(&hdr->pages);
 		return -ENOMEM;
 	}
 	hdr = &whdr->header;
@@ -1137,7 +1146,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	else
 		set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_write_completion(hdr);
+		hdr->compl_ops->completion(hdr);
 	return ret;
 }
 
@@ -1147,9 +1156,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
 };
 
 void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags)
+			       struct inode *inode, int ioflags,
+			       const struct nfs_io_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
 				NFS_SERVER(inode)->wsize, ioflags);
 }
 
@@ -1161,10 +1171,11 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags)
+				  struct inode *inode, int ioflags,
+				  const struct nfs_io_completion_ops *compl_ops)
 {
-	if (!pnfs_pageio_init_write(pgio, inode, ioflags))
-		nfs_pageio_init_write_mds(pgio, inode, ioflags);
+	if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
+		nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
 }
 
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 5c52034..a36fbb6 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -67,6 +67,7 @@ struct nfs_pageio_descriptor {
 	int 			pg_ioflags;
 	int			pg_error;
 	const struct rpc_call_ops *pg_rpc_callops;
+	const struct nfs_io_completion_ops *pg_compl_ops;
 	struct pnfs_layout_segment *pg_lseg;
 };
 
@@ -83,6 +84,7 @@ extern	void nfs_release_request(struct nfs_page *req);
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
 			     const struct nfs_pageio_ops *pg_ops,
+			     const struct nfs_io_completion_ops *compl_ops,
 			     size_t bsize,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2bbb7c7..b38950c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1201,6 +1201,7 @@ struct nfs_pgio_header {
 	struct pnfs_layout_segment *lseg;
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
+	const struct nfs_io_completion_ops *compl_ops;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
@@ -1256,6 +1257,11 @@ struct nfs_commit_data {
 	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
 };
 
+struct nfs_io_completion_ops {
+	void	(*error_cleanup)(struct list_head *head);
+	void	(*completion)(struct nfs_pgio_header *hdr);
+};
+
 struct nfs_unlinkdata {
 	struct hlist_node list;
 	struct nfs_removeargs args;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 17/26] NFS: remove unused wb_complete field from struct nfs_page
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (15 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 16/26] NFS: create completion structure to pass into page_init functions Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 18/26] NFS: prepare coalesce testing for directio Fred Isaman
                   ` (8 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/pagelist.c        |    1 -
 fs/nfs/write.c           |    1 -
 include/linux/nfs_page.h |    1 -
 3 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e2c2af0..b360d08 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -112,7 +112,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	 * long write-back delay. This will be adjusted in
 	 * update_nfs_request below if the region is not locked. */
 	req->wb_page    = page;
-	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
 	BUG_ON(PagePrivate(page));
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4d9bbaf..293ddb9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1055,7 +1055,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 		nbytes -= len;
 		offset += len;
 	} while (nbytes != 0);
-	atomic_set(&req->wb_complete, requests);
 	desc->pg_rpc_callops = &nfs_write_common_ops;
 	return ret;
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index a36fbb6..1198f18 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -36,7 +36,6 @@ struct nfs_page {
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
-	atomic_t		wb_complete;	/* i/os we're waiting for */
 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
 				wb_pgbase,	/* Start of page data */
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 18/26] NFS: prepare coalesce testing for directio
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (16 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 17/26] NFS: remove unused wb_complete field from struct nfs_page Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 19/26] NFS: rewrite directio read to use async coalesce code Fred Isaman
                   ` (7 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

The coalesce code made assumptions that will no longer be true once
non-page aligned io occurs.  This introduces no change in
current behavior, but allows for more general situations to come.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c |   17 ++++++++++++++++-
 fs/nfs/pagelist.c       |    4 +---
 fs/nfs/pnfs.c           |    8 ++++++++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 2c78a46..0afa735 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -779,6 +779,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 {
 	BUG_ON(pgio->pg_lseg != NULL);
 
+	if (req->wb_offset != req->wb_pgbase) {
+		/*
+		 * Handling unaligned pages is difficult, because have to
+		 * somehow split a req in two in certain cases in the
+		 * pg.test code.  Avoid this by just not using pnfs
+		 * in this case.
+		 */
+		nfs_pageio_reset_read_mds(pgio);
+		return;
+	}
 	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   0,
@@ -796,6 +806,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 {
 	BUG_ON(pgio->pg_lseg != NULL);
 
+	if (req->wb_offset != req->wb_pgbase)
+		goto out_mds;
 	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   0,
@@ -804,7 +816,10 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 					   GFP_NOFS);
 	/* If no lseg, fall back to write through mds */
 	if (pgio->pg_lseg == NULL)
-		nfs_pageio_reset_write_mds(pgio);
+		goto out_mds;
+	return;
+out_mds:
+	nfs_pageio_reset_write_mds(pgio);
 }
 
 static const struct nfs_pageio_ops filelayout_pg_read_ops = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b360d08..bd201d1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -278,12 +278,10 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 		return false;
 	if (req->wb_context->state != prev->wb_context->state)
 		return false;
-	if (req->wb_index != (prev->wb_index + 1))
+	if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
 		return false;
 	if (req->wb_pgbase != 0)
 		return false;
-	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
-		return false;
 	return pgio->pg_ops->pg_test(pgio, prev, req);
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 02dd84b..da3c183 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1082,6 +1082,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
 {
 	BUG_ON(pgio->pg_lseg != NULL);
 
+	if (req->wb_offset != req->wb_pgbase) {
+		nfs_pageio_reset_read_mds(pgio);
+		return;
+	}
 	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   req_offset(req),
@@ -1100,6 +1104,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
 {
 	BUG_ON(pgio->pg_lseg != NULL);
 
+	if (req->wb_offset != req->wb_pgbase) {
+		nfs_pageio_reset_write_mds(pgio);
+		return;
+	}
 	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   req_offset(req),
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 19/26] NFS: rewrite directio read to use async coalesce code
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (17 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 18/26] NFS: prepare coalesce testing for directio Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-16 21:22   ` Myklebust, Trond
  2012-04-09 20:52 ` [PATCH 20/26] NFS: create nfs_generic_commit_list Fred Isaman
                   ` (6 subsequent siblings)
  25 siblings, 1 reply; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

This also has the advantage that it allows directio to use pnfs.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c          |  247 +++++++++++++++++++++------------------------
 fs/nfs/internal.h        |    3 +
 fs/nfs/pagelist.c        |    6 +-
 fs/nfs/read.c            |    6 +-
 include/linux/nfs_page.h |    1 +
 include/linux/nfs_xdr.h  |    4 +-
 6 files changed, 128 insertions(+), 139 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 22a40c4..409a9a0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
-static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
-{
-	unsigned int npages;
-	unsigned int i;
-
-	if (count == 0)
-		return;
-	pages += (pgbase >> PAGE_SHIFT);
-	npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	for (i = 0; i < npages; i++) {
-		struct page *page = pages[i];
-		if (!PageCompound(page))
-			set_page_dirty(page);
-	}
-}
-
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
 {
 	unsigned int i;
@@ -226,58 +210,85 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	nfs_direct_req_release(dreq);
 }
 
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
+void nfs_direct_readpage_release(struct nfs_page *req)
 {
-	struct nfs_read_data *data = calldata;
-
-	nfs_readpage_result(task, data);
+	dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+		req->wb_context->dentry->d_inode->i_sb->s_id,
+		(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+		req->wb_bytes,
+		(long long)req_offset(req));
+	nfs_release_request(req);
 }
 
-static void nfs_direct_read_release(void *calldata)
+static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 {
+	unsigned long pos = req_offset(hdr->req);
+	struct nfs_direct_req *dreq = hdr->dreq;
 
-	struct nfs_read_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
-	int status = data->task.tk_status;
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out_put;
 
 	spin_lock(&dreq->lock);
-	if (unlikely(status < 0)) {
-		dreq->error = status;
-		spin_unlock(&dreq->lock);
+	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+	    (hdr->first_error == pos))
+		dreq->error = hdr->error;
+	else
+		dreq->count += (hdr->first_error - pos);
+	spin_unlock(&dreq->lock);
+
+	if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+			struct page *page = req->wb_page;
+
+			nfs_list_remove_request(req);
+			nfs_direct_readpage_release(req);
+			if (!PageCompound(page))
+				set_page_dirty(page);
+			page_cache_release(page);
+		}
 	} else {
-		dreq->count += data->res.count;
-		spin_unlock(&dreq->lock);
-		nfs_direct_dirty_pages(data->pages.pagevec,
-				data->args.pgbase,
-				data->res.count);
+		pos &= PAGE_MASK;
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+			if (pos < (hdr->first_error & PAGE_MASK))
+				if (!PageCompound(req->wb_page))
+					set_page_dirty(req->wb_page);
+			page_cache_release(req->wb_page);
+			nfs_list_remove_request(req);
+			nfs_direct_readpage_release(req);
+			pos += PAGE_SIZE;
+		}
 	}
-	nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
-
+out_put:
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
-	nfs_readdata_release(data);
+	hdr->release(hdr);
 }
 
-static const struct rpc_call_ops nfs_read_direct_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_direct_read_result,
-	.rpc_release = nfs_direct_read_release,
-};
-
-static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
+static void nfs_sync_pgio_error(struct list_head *head)
 {
-	struct nfs_read_data *data = &rhdr->rpc_data;
+	struct nfs_page *req;
 
-	if (data->pages.pagevec != data->pages.page_array)
-		kfree(data->pages.pagevec);
-	nfs_readhdr_free(&rhdr->header);
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_release_request(req);
+	}
 }
 
+static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
+{
+	get_dreq(hdr->dreq);
+}
+
+static const struct nfs_io_completion_ops nfs_direct_read_compl_ops = {
+	.error_cleanup = nfs_sync_pgio_error,
+	.init_hdr = nfs_direct_pgio_init,
+	.completion = nfs_direct_read_completion,
+};
+
 /*
  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
@@ -285,118 +296,85 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
 						const struct iovec *iov,
 						loff_t pos)
 {
+	struct nfs_direct_req *dreq = desc->pg_dreq;
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
 	size_t rsize = NFS_SERVER(inode)->rsize;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_cred = ctx->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = NFS_CLIENT(inode),
-		.rpc_message = &msg,
-		.callback_ops = &nfs_read_direct_ops,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC,
-	};
 	unsigned int pgbase;
 	int result;
 	ssize_t started = 0;
+	struct page **pagevec = NULL;
+	unsigned int npages;
 
 	do {
-		struct nfs_read_header *rhdr;
-		struct nfs_read_data *data;
-		struct nfs_page_array *pages;
 		size_t bytes;
+		int i;
 
 		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(rsize,count);
+		bytes = min(max(rsize, PAGE_SIZE), count);
 
 		result = -ENOMEM;
-		rhdr = nfs_readhdr_alloc();
-		if (unlikely(!rhdr))
-			break;
-		data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
-		if (!data) {
-			nfs_readhdr_free(&rhdr->header);
+		npages = nfs_page_array_len(pgbase, bytes);
+		if (!pagevec)
+			pagevec = kmalloc(npages * sizeof(struct page *),
+					  GFP_KERNEL);
+		if (!pagevec)
 			break;
-		}
-		data->header = &rhdr->header;
-		atomic_inc(&data->header->refcnt);
-		pages = &data->pages;
-
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					pages->npages, 1, 0, pages->pagevec, NULL);
+					npages, 1, 0, pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
-		if (result < 0) {
-			nfs_direct_readhdr_release(rhdr);
+		if (result < 0)
 			break;
-		}
-		if ((unsigned)result < pages->npages) {
+		if ((unsigned)result < npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pages->pagevec, result);
-				nfs_direct_readhdr_release(rhdr);
+				nfs_direct_release_pages(pagevec, result);
 				break;
 			}
 			bytes -= pgbase;
-			pages->npages = result;
+			npages = result;
 		}
 
-		get_dreq(dreq);
-
-		rhdr->header.req = (struct nfs_page *) dreq;
-		rhdr->header.inode = inode;
-		rhdr->header.cred = msg.rpc_cred;
-		data->args.fh = NFS_FH(inode);
-		data->args.context = get_nfs_open_context(ctx);
-		data->args.lock_context = dreq->l_ctx;
-		data->args.offset = pos;
-		data->args.pgbase = pgbase;
-		data->args.pages = pages->pagevec;
-		data->args.count = bytes;
-		data->res.fattr = &data->fattr;
-		data->res.eof = 0;
-		data->res.count = bytes;
-		nfs_fattr_init(&data->fattr);
-		msg.rpc_argp = &data->args;
-		msg.rpc_resp = &data->res;
-
-		task_setup_data.task = &data->task;
-		task_setup_data.callback_data = data;
-		NFS_PROTO(inode)->read_setup(data, &msg);
-
-		task = rpc_run_task(&task_setup_data);
-		if (IS_ERR(task))
-			break;
-
-		dprintk("NFS: %5u initiated direct read call "
-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-				task->tk_pid,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				bytes,
-				(unsigned long long)data->args.offset);
-		rpc_put_task(task);
-
-		started += bytes;
-		user_addr += bytes;
-		pos += bytes;
-		/* FIXME: Remove this unnecessary math from final patch */
-		pgbase += bytes;
-		pgbase &= ~PAGE_MASK;
-		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
-		count -= bytes;
+		for (i = 0; i < npages; i++) {
+			struct nfs_page *req;
+			unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
+			/* XXX do we need to do the eof zeroing found in async_filler? */
+			req = nfs_create_request(dreq->ctx, dreq->inode,
+						 pagevec[i],
+						 pgbase, req_len);
+			if (IS_ERR(req)) {
+				nfs_direct_release_pages(pagevec + i,
+							 npages - i);
+				result = PTR_ERR(req);
+				break;
+			}
+			req->wb_index = pos >> PAGE_SHIFT;
+			req->wb_offset = pos & ~PAGE_MASK;
+			if (!nfs_pageio_add_request(desc, req)) {
+				result = desc->pg_error;
+				nfs_release_request(req);
+				nfs_direct_release_pages(pagevec + i,
+							 npages - i);
+				break;
+			}
+			pgbase = 0;
+			bytes -= req_len;
+			started += req_len;
+			user_addr += req_len;
+			pos += req_len;
+			count -= req_len;
+		}
 	} while (count != 0);
 
+	kfree(pagevec);
+
 	if (started)
 		return started;
 	return result < 0 ? (ssize_t) result : -EFAULT;
@@ -407,15 +385,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 					      unsigned long nr_segs,
 					      loff_t pos)
 {
+	struct nfs_pageio_descriptor desc;
 	ssize_t result = -EINVAL;
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
+	nfs_pageio_init_read(&desc, dreq->inode, &nfs_direct_read_compl_ops);
 	get_dreq(dreq);
+	desc.pg_dreq = dreq;
 
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+		result = nfs_direct_read_schedule_segment(&desc, vec, pos);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -424,6 +405,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 		pos += vec->iov_len;
 	}
 
+	nfs_pageio_complete(&desc);
+
 	/*
 	 * If no bytes were started, return the error, and let the
 	 * generic layer handle the completion.
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index efa5124..63fff81 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -307,6 +307,9 @@ extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 						unsigned int pagecount);
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+				 struct inode *inode,
+				 const struct nfs_io_completion_ops *compl_ops);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index bd201d1..1a73f01 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -47,8 +47,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->inode = desc->pg_inode;
 	hdr->cred = hdr->req->wb_context->cred;
 	hdr->first_error = req_offset(hdr->req) + desc->pg_count;
+	hdr->dreq = desc->pg_dreq;
 	hdr->release = release;
 	hdr->compl_ops = desc->pg_compl_ops;
+	if (hdr->compl_ops->init_hdr)
+		hdr->compl_ops->init_hdr(hdr);
 }
 
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error,
@@ -115,8 +118,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	req->wb_index	= page->index;
 	page_cache_get(page);
 	BUG_ON(PagePrivate(page));
-	BUG_ON(!PageLocked(page));
-	BUG_ON(page->mapping->host != inode);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -255,6 +256,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
+	desc->pg_dreq = NULL;
 }
 
 /**
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4704f120..3c09e4b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -117,9 +117,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
-static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-				 struct inode *inode,
-				 const struct nfs_io_completion_ops *compl_ops)
+void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+			  struct inode *inode,
+			  const struct nfs_io_completion_ops *compl_ops)
 {
 	if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
 		nfs_pageio_init_read_mds(pgio, inode, compl_ops);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 1198f18..1809b52 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -68,6 +68,7 @@ struct nfs_pageio_descriptor {
 	const struct rpc_call_ops *pg_rpc_callops;
 	const struct nfs_io_completion_ops *pg_compl_ops;
 	struct pnfs_layout_segment *pg_lseg;
+	struct nfs_direct_req	*pg_dreq;
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b38950c..61fd160 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1202,6 +1202,7 @@ struct nfs_pgio_header {
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_io_completion_ops *compl_ops;
+	struct nfs_direct_req	*dreq;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
@@ -1216,8 +1217,6 @@ struct nfs_read_header {
 	struct nfs_read_data	rpc_data;
 };
 
-struct nfs_direct_req;
-
 struct nfs_write_data {
 	struct nfs_pgio_header	*header;
 	struct list_head	list;
@@ -1259,6 +1258,7 @@ struct nfs_commit_data {
 
 struct nfs_io_completion_ops {
 	void	(*error_cleanup)(struct list_head *head);
+	void	(*init_hdr)(struct nfs_pgio_header *hdr);
 	void	(*completion)(struct nfs_pgio_header *hdr);
 };
 
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 20/26] NFS: create nfs_generic_commit_list
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (18 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 19/26] NFS: rewrite directio read to use async coalesce code Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 21/26] NFS: create struct nfs_commit_info Fred Isaman
                   ` (5 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Simple refactoring.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c |   15 ++++++++++++---
 1 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 293ddb9..948df43 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1521,6 +1521,17 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
+static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+				   int how)
+{
+	int status;
+
+	status = pnfs_commit_list(inode, head, how);
+	if (status == PNFS_NOT_ATTEMPTED)
+		status = nfs_commit_list(inode, head, how);
+	return status;
+}
+
 int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
@@ -1534,9 +1545,7 @@ int nfs_commit_inode(struct inode *inode, int how)
 	if (res) {
 		int error;
 
-		error = pnfs_commit_list(inode, &head, how);
-		if (error == PNFS_NOT_ATTEMPTED)
-			error = nfs_commit_list(inode, &head, how);
+		error = nfs_generic_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
 		if (!may_wait)
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 21/26] NFS: create struct nfs_commit_info
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (19 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 20/26] NFS: create nfs_generic_commit_list Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 22/26] NFS: create nfs_commit_completion_ops Fred Isaman
                   ` (4 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

It is COMMIT that is handled the most differently between
the paged and direct paths.  Create a structure that encapsulates
everything either path needs to know about the commit state.

We could use void to hide some of the layout driver stuff, but
Trond suggests pulling it out to ensure type checking, given the
huge changes being made, and the fact that it doesn't interfere
with other drivers.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/inode.c          |    6 +-
 fs/nfs/internal.h       |   12 +++-
 fs/nfs/nfs4filelayout.c |   98 ++++++++++++++++-------------
 fs/nfs/nfs4filelayout.h |   14 +----
 fs/nfs/pnfs.h           |   72 ++++++++++++++-------
 fs/nfs/write.c          |  158 +++++++++++++++++++++++++++-------------------
 include/linux/nfs_fs.h  |    5 +-
 include/linux/nfs_xdr.h |   27 ++++++++
 8 files changed, 237 insertions(+), 155 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5..59a12c6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1547,7 +1547,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 	nfsi->delegation_state = 0;
 	init_rwsem(&nfsi->rwsem);
 	nfsi->layout = NULL;
-	atomic_set(&nfsi->commits_outstanding, 0);
+	atomic_set(&nfsi->commit_info.rpcs_out, 0);
 #endif
 }
 
@@ -1559,9 +1559,9 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->open_files);
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
-	INIT_LIST_HEAD(&nfsi->commit_list);
+	INIT_LIST_HEAD(&nfsi->commit_info.list);
 	nfsi->npages = 0;
-	nfsi->ncommit = 0;
+	nfsi->commit_info.ncommit = 0;
 	atomic_set(&nfsi->silly_count, 1);
 	INIT_HLIST_HEAD(&nfsi->silly_list);
 	init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 63fff81..cbdea5e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -349,12 +349,18 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg);
 void nfs_retry_commit(struct list_head *page_list,
-		      struct pnfs_layout_segment *lseg);
+		      struct pnfs_layout_segment *lseg,
+		      struct nfs_commit_info *cinfo);
 void nfs_commit_clear_lock(struct nfs_inode *nfsi);
 void nfs_commitdata_release(struct nfs_commit_data *data);
 void nfs_commit_release_pages(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
-void nfs_request_remove_commit_list(struct nfs_page *req);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+				 struct nfs_commit_info *cinfo);
+void nfs_request_remove_commit_list(struct nfs_page *req,
+				    struct nfs_commit_info *cinfo);
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+		    struct inode *inode,
+		    struct nfs_direct_req *dreq);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 0afa735..a1860ac 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -347,9 +347,11 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
 static void filelayout_commit_release(void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
+	struct nfs_commit_info cinfo;
 
 	nfs_commit_release_pages(data);
-	if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding))
+	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
 		nfs_commit_clear_lock(NFS_I(data->inode));
 	put_lseg(data->lseg);
 	nfs_commitdata_release(data);
@@ -731,7 +733,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 			return NULL;
 		}
 		flo->commit_info.buckets = kcalloc(size,
-						   sizeof(struct nfs4_fl_commit_bucket),
+						   sizeof(struct pnfs_commit_bucket),
 						   gfp_flags);
 		if (!flo->commit_info.buckets) {
 			filelayout_free_lseg(&fl->generic_hdr);
@@ -846,40 +848,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
  * If this will make the bucket empty, it will need to put the lseg reference.
  */
 static void
-filelayout_clear_request_commit(struct nfs_page *req)
+filelayout_clear_request_commit(struct nfs_page *req,
+				struct nfs_commit_info *cinfo)
 {
 	struct pnfs_layout_segment *freeme = NULL;
-	struct inode *inode = req->wb_context->dentry->d_inode;
 
-	spin_lock(&inode->i_lock);
+	spin_lock(cinfo->lock);
 	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
 		goto out;
+	cinfo->ds->nwritten--;
 	if (list_is_singular(&req->wb_list)) {
-		struct nfs4_fl_commit_bucket *bucket;
+		struct pnfs_commit_bucket *bucket;
 
 		bucket = list_first_entry(&req->wb_list,
-					  struct nfs4_fl_commit_bucket,
+					  struct pnfs_commit_bucket,
 					  written);
 		freeme = bucket->wlseg;
 		bucket->wlseg = NULL;
 	}
 out:
-	nfs_request_remove_commit_list(req);
-	spin_unlock(&inode->i_lock);
+	nfs_request_remove_commit_list(req, cinfo);
+	spin_unlock(cinfo->lock);
 	put_lseg(freeme);
 }
 
 static struct list_head *
 filelayout_choose_commit_list(struct nfs_page *req,
-			      struct pnfs_layout_segment *lseg)
+			      struct pnfs_layout_segment *lseg,
+			      struct nfs_commit_info *cinfo)
 {
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
 	u32 i, j;
 	struct list_head *list;
-	struct nfs4_fl_commit_bucket *buckets;
+	struct pnfs_commit_bucket *buckets;
 
 	if (fl->commit_through_mds)
-		return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+		return &cinfo->mds->list;
 
 	/* Note that we are calling nfs4_fl_calc_j_index on each page
 	 * that ends up being committed to a data server.  An attractive
@@ -889,7 +893,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
 	 */
 	j = nfs4_fl_calc_j_index(lseg, req_offset(req));
 	i = select_bucket_index(fl, j);
-	buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
+	buckets = cinfo->ds->buckets;
 	list = &buckets[i].written;
 	if (list_empty(list)) {
 		/* Non-empty buckets hold a reference on the lseg.  That ref
@@ -901,17 +905,19 @@ filelayout_choose_commit_list(struct nfs_page *req,
 		buckets[i].wlseg = get_lseg(lseg);
 	}
 	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+	cinfo->ds->nwritten++;
 	return list;
 }
 
 static void
 filelayout_mark_request_commit(struct nfs_page *req,
-		struct pnfs_layout_segment *lseg)
+			       struct pnfs_layout_segment *lseg,
+			       struct nfs_commit_info *cinfo)
 {
 	struct list_head *list;
 
-	list = filelayout_choose_commit_list(req, lseg);
-	nfs_request_add_commit_list(req, list);
+	list = filelayout_choose_commit_list(req, lseg, cinfo);
+	nfs_request_add_commit_list(req, list, cinfo);
 }
 
 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -968,8 +974,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 }
 
 static int
-filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
-		spinlock_t *lock)
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+			       struct nfs_commit_info *cinfo,
+			       int max)
 {
 	struct list_head *src = &bucket->written;
 	struct list_head *dst = &bucket->committing;
@@ -979,9 +986,9 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 	list_for_each_entry_safe(req, tmp, src, wb_list) {
 		if (!nfs_lock_request(req))
 			continue;
-		if (cond_resched_lock(lock))
+		if (cond_resched_lock(cinfo->lock))
 			list_safe_reset_next(req, tmp, wb_list);
-		nfs_request_remove_commit_list(req);
+		nfs_request_remove_commit_list(req, cinfo);
 		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
 		nfs_list_add_request(req, dst);
 		ret++;
@@ -989,6 +996,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 			break;
 	}
 	if (ret) {
+		cinfo->ds->nwritten -= ret;
+		cinfo->ds->ncommitting += ret;
 		bucket->clseg = bucket->wlseg;
 		if (list_empty(src))
 			bucket->wlseg = NULL;
@@ -999,37 +1008,32 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 }
 
 /* Move reqs from written to committing lists, returning count of number moved.
- * Note called with i_lock held.
+ * Note called with cinfo->lock held.
  */
-static int filelayout_scan_commit_lists(struct inode *inode, int max,
-		spinlock_t *lock)
+static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
+					int max)
 {
-	struct nfs4_fl_commit_info *fl_cinfo;
 	int i, rv = 0, cnt;
 
-	fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
-	if (fl_cinfo->nbuckets == 0)
-		goto out_done;
-	for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) {
-		cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i],
-				max, lock);
+	for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
+		cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
+						     cinfo, max);
 		max -= cnt;
 		rv += cnt;
 	}
-out_done:
 	return rv;
 }
 
 static unsigned int
-alloc_ds_commits(struct inode *inode, struct list_head *list)
+alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
 {
-	struct nfs4_fl_commit_info *fl_cinfo;
-	struct nfs4_fl_commit_bucket *bucket;
+	struct pnfs_ds_commit_info *fl_cinfo;
+	struct pnfs_commit_bucket *bucket;
 	struct nfs_commit_data *data;
 	int i, j;
 	unsigned int nreq = 0;
 
-	fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+	fl_cinfo = cinfo->ds;
 	bucket = fl_cinfo->buckets;
 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 		if (list_empty(&bucket->committing))
@@ -1048,7 +1052,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 	for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
 		if (list_empty(&bucket->committing))
 			continue;
-		nfs_retry_commit(&bucket->committing, bucket->clseg);
+		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
 		put_lseg(bucket->clseg);
 		bucket->clseg = NULL;
 	}
@@ -1059,7 +1063,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 /* This follows nfs_commit_list pretty closely */
 static int
 filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
-			   int how)
+			   int how, struct nfs_commit_info *cinfo)
 {
 	struct nfs_commit_data *data, *tmp;
 	LIST_HEAD(list);
@@ -1072,17 +1076,17 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			list_add(&data->pages, &list);
 			nreq++;
 		} else
-			nfs_retry_commit(mds_pages, NULL);
+			nfs_retry_commit(mds_pages, NULL, cinfo);
 	}
 
-	nreq += alloc_ds_commits(inode, &list);
+	nreq += alloc_ds_commits(cinfo, &list);
 
 	if (nreq == 0) {
 		nfs_commit_clear_lock(NFS_I(inode));
 		goto out;
 	}
 
-	atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
+	atomic_add(nreq, &cinfo->mds->rpcs_out);
 
 	list_for_each_entry_safe(data, tmp, &list, pages) {
 		list_del_init(&data->pages);
@@ -1091,14 +1095,15 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			nfs_initiate_commit(NFS_CLIENT(inode), data,
 					    data->mds_ops, how);
 		} else {
-			struct nfs4_fl_commit_info *fl_cinfo;
+			struct pnfs_commit_bucket *buckets;
 
-			fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info;
-			nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg);
+			buckets = cinfo->ds->buckets;
+			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg);
 			filelayout_initiate_commit(data, how);
 		}
 	}
 out:
+	cinfo->ds->ncommitting = 0;
 	return PNFS_ATTEMPTED;
 }
 
@@ -1123,6 +1128,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
 	kfree(FILELAYOUT_FROM_HDR(lo));
 }
 
+static struct pnfs_ds_commit_info *
+filelayout_get_ds_info(struct inode *inode)
+{
+	return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
@@ -1133,6 +1144,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
 	.free_lseg		= filelayout_free_lseg,
 	.pg_read_ops		= &filelayout_pg_read_ops,
 	.pg_write_ops		= &filelayout_pg_write_ops,
+	.get_ds_info		= &filelayout_get_ds_info,
 	.mark_request_commit	= filelayout_mark_request_commit,
 	.clear_request_commit	= filelayout_clear_request_commit,
 	.scan_commit_lists	= filelayout_scan_commit_lists,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 333a3ac..96b89bb 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -74,18 +74,6 @@ struct nfs4_file_layout_dsaddr {
 	struct nfs4_pnfs_ds		*ds_list[1];
 };
 
-struct nfs4_fl_commit_bucket {
-	struct list_head written;
-	struct list_head committing;
-	struct pnfs_layout_segment *wlseg;
-	struct pnfs_layout_segment *clseg;
-};
-
-struct nfs4_fl_commit_info {
-	int nbuckets;
-	struct nfs4_fl_commit_bucket *buckets;
-};
-
 struct nfs4_filelayout_segment {
 	struct pnfs_layout_segment generic_hdr;
 	u32 stripe_type;
@@ -100,7 +88,7 @@ struct nfs4_filelayout_segment {
 
 struct nfs4_filelayout {
 	struct pnfs_layout_hdr generic_hdr;
-	struct nfs4_fl_commit_info commit_info;
+	struct pnfs_ds_commit_info commit_info;
 };
 
 static inline struct nfs4_filelayout *
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 465e15e..5e4a51c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,18 @@ struct pnfs_layoutdriver_type {
 	const struct nfs_pageio_ops *pg_read_ops;
 	const struct nfs_pageio_ops *pg_write_ops;
 
+	struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
 	void (*mark_request_commit) (struct nfs_page *req,
-					struct pnfs_layout_segment *lseg);
-	void (*clear_request_commit) (struct nfs_page *req);
-	int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
-	int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
+				     struct pnfs_layout_segment *lseg,
+				     struct nfs_commit_info *cinfo);
+	void (*clear_request_commit) (struct nfs_page *req,
+				      struct nfs_commit_info *cinfo);
+	int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
+				  int max);
+	int (*commit_pagelist)(struct inode *inode,
+			       struct list_head *mds_pages,
+			       int how,
+			       struct nfs_commit_info *cinfo);
 
 	/*
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -263,49 +270,57 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
 }
 
 static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+		 struct nfs_commit_info *cinfo)
 {
-	if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
+	if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
 		return PNFS_NOT_ATTEMPTED;
-	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
+	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
+}
+
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+	if (ld == NULL || ld->get_ds_info == NULL)
+		return NULL;
+	return ld->get_ds_info(inode);
 }
 
 static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			 struct nfs_commit_info *cinfo)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
 	if (lseg == NULL || ld->mark_request_commit == NULL)
 		return false;
-	ld->mark_request_commit(req, lseg);
+	ld->mark_request_commit(req, lseg, cinfo);
 	return true;
 }
 
 static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
 	if (ld == NULL || ld->clear_request_commit == NULL)
 		return false;
-	ld->clear_request_commit(req);
+	ld->clear_request_commit(req, cinfo);
 	return true;
 }
 
 static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+		       int max)
 {
-	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-	int ret;
-
-	if (ld == NULL || ld->scan_commit_lists == NULL)
+	if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
 		return 0;
-	ret = ld->scan_commit_lists(inode, max, lock);
-	if (ret != 0)
-		set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
-	return ret;
+	else
+		return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
 }
 
 /* Should the pNFS client commit and return the layout upon a setattr */
@@ -409,25 +424,34 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
 }
 
 static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+		 struct nfs_commit_info *cinfo)
 {
 	return PNFS_NOT_ATTEMPTED;
 }
 
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+	return NULL;
+}
+
 static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			 struct nfs_commit_info *cinfo)
 {
 	return false;
 }
 
 static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
 	return false;
 }
 
 static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+		       int max)
 {
 	return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 948df43..30dcc4f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -446,65 +446,79 @@ nfs_mark_request_dirty(struct nfs_page *req)
 /**
  * nfs_request_add_commit_list - add request to a commit list
  * @req: pointer to a struct nfs_page
- * @head: commit list head
+ * @dst: commit list head
+ * @cinfo: holds list lock and accounting info
  *
- * This sets the PG_CLEAN bit, updates the inode global count of
+ * This sets the PG_CLEAN bit, updates the cinfo count of
  * number of outstanding requests requiring a commit as well as
  * the MM page stats.
  *
- * The caller must _not_ hold the inode->i_lock, but must be
+ * The caller must _not_ hold the cinfo->lock, but must be
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
+nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+			    struct nfs_commit_info *cinfo)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
-
 	set_bit(PG_CLEAN, &(req)->wb_flags);
-	spin_lock(&inode->i_lock);
-	nfs_list_add_request(req, head);
-	NFS_I(inode)->ncommit++;
-	spin_unlock(&inode->i_lock);
+	spin_lock(cinfo->lock);
+	nfs_list_add_request(req, dst);
+	cinfo->mds->ncommit++;
+	spin_unlock(cinfo->lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	__mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC);
 }
 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
 
 /**
  * nfs_request_remove_commit_list - Remove request from a commit list
  * @req: pointer to a nfs_page
+ * @cinfo: holds list lock and accounting info
  *
- * This clears the PG_CLEAN bit, and updates the inode global count of
+ * This clears the PG_CLEAN bit, and updates the cinfo's count of
  * number of outstanding requests requiring a commit
  * It does not update the MM page stats.
  *
- * The caller _must_ hold the inode->i_lock and the nfs_page lock.
+ * The caller _must_ hold the cinfo->lock and the nfs_page lock.
  */
 void
-nfs_request_remove_commit_list(struct nfs_page *req)
+nfs_request_remove_commit_list(struct nfs_page *req,
+			       struct nfs_commit_info *cinfo)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
-
 	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
 		return;
 	nfs_list_remove_request(req);
-	NFS_I(inode)->ncommit--;
+	cinfo->mds->ncommit--;
 }
 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
 
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+				      struct inode *inode)
+{
+	cinfo->lock = &inode->i_lock;
+	cinfo->mds = &NFS_I(inode)->commit_info;
+	cinfo->ds = pnfs_get_ds_info(inode);
+}
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+		    struct inode *inode,
+		    struct nfs_direct_req *dreq)
+{
+	nfs_init_cinfo_from_inode(cinfo, inode);
+}
+EXPORT_SYMBOL_GPL(nfs_init_cinfo);
 
 /*
  * Add a request to the inode's commit list.
  */
 static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			struct nfs_commit_info *cinfo)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
-
-	if (pnfs_mark_request_commit(req, lseg))
+	if (pnfs_mark_request_commit(req, lseg, cinfo))
 		return;
-	nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
+	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
 }
 
 static void
@@ -519,11 +533,13 @@ nfs_clear_request_commit(struct nfs_page *req)
 {
 	if (test_bit(PG_CLEAN, &req->wb_flags)) {
 		struct inode *inode = req->wb_context->dentry->d_inode;
+		struct nfs_commit_info cinfo;
 
-		if (!pnfs_clear_request_commit(req)) {
-			spin_lock(&inode->i_lock);
-			nfs_request_remove_commit_list(req);
-			spin_unlock(&inode->i_lock);
+		nfs_init_cinfo_from_inode(&cinfo, inode);
+		if (!pnfs_clear_request_commit(req, &cinfo)) {
+			spin_lock(cinfo.lock);
+			nfs_request_remove_commit_list(req, &cinfo);
+			spin_unlock(cinfo.lock);
 		}
 		nfs_clear_page_commit(req->wb_page);
 	}
@@ -539,7 +555,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 
 #else
 static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			struct nfs_commit_info *cinfo)
 {
 }
 
@@ -558,10 +575,12 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
+	struct nfs_commit_info cinfo;
 	unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 		goto out;
+	nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
@@ -578,7 +597,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
-			nfs_mark_request_commit(req, hdr->lseg);
+			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
 remove_req:
@@ -593,16 +612,16 @@ out:
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static int
-nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long
+nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
-	return nfsi->ncommit > 0;
+	return cinfo->mds->ncommit;
 }
 
-/* i_lock held by caller */
+/* cinfo->lock held by caller */
 static int
-nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
-		spinlock_t *lock)
+nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+		     struct nfs_commit_info *cinfo, int max)
 {
 	struct nfs_page *req, *tmp;
 	int ret = 0;
@@ -610,9 +629,9 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
 	list_for_each_entry_safe(req, tmp, src, wb_list) {
 		if (!nfs_lock_request(req))
 			continue;
-		if (cond_resched_lock(lock))
+		if (cond_resched_lock(cinfo->lock))
 			list_safe_reset_next(req, tmp, wb_list);
-		nfs_request_remove_commit_list(req);
+		nfs_request_remove_commit_list(req, cinfo);
 		nfs_list_add_request(req, dst);
 		ret++;
 		if (ret == max)
@@ -624,37 +643,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
 /*
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
- * @dst: destination list
+ * @dst: mds destination list
+ * @cinfo: mds and ds lists of reqs ready to commit
  *
  * Moves requests from the inode's 'commit' request list.
  * The requests are *not* checked to ensure that they form a contiguous set.
  */
 static int
-nfs_scan_commit(struct inode *inode, struct list_head *dst)
+nfs_scan_commit(struct inode *inode, struct list_head *dst,
+		struct nfs_commit_info *cinfo)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
 	int ret = 0;
 
-	spin_lock(&inode->i_lock);
-	if (nfsi->ncommit > 0) {
+	spin_lock(cinfo->lock);
+	if (cinfo->mds->ncommit > 0) {
 		const int max = INT_MAX;
 
-		ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
-				&inode->i_lock);
-		ret += pnfs_scan_commit_lists(inode, max - ret,
-				&inode->i_lock);
+		ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
+					   cinfo, max);
+		ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
 	}
-	spin_unlock(&inode->i_lock);
+	spin_unlock(cinfo->lock);
 	return ret;
 }
 
 #else
-static inline int nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
 	return 0;
 }
 
-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+				  struct nfs_commit_info *cinfo)
 {
 	return 0;
 }
@@ -923,7 +943,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
  */
 static void nfs_write_rpcsetup(struct nfs_write_data *data,
 		unsigned int count, unsigned int offset,
-		int how)
+		int how, struct nfs_commit_info *cinfo)
 {
 	struct nfs_page *req = data->header->req;
 
@@ -944,7 +964,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data,
 	case 0:
 		break;
 	case FLUSH_COND_STABLE:
-		if (nfs_need_commit(NFS_I(data->header->inode)))
+		if (nfs_reqs_to_commit(cinfo))
 			break;
 	default:
 		data->args.stable = NFS_FILE_SYNC;
@@ -1028,12 +1048,14 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 	unsigned int offset;
 	int requests = 0;
 	int ret = 0;
+	struct nfs_commit_info cinfo;
 
+	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	nfs_list_remove_request(req);
 	nfs_list_add_request(req, &hdr->pages);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
+	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
 	     desc->pg_count > wsize))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
@@ -1049,7 +1071,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 		data->header = hdr;
 		atomic_inc(&hdr->refcnt);
 		data->pages.pagevec[0] = page;
-		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags);
+		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
 		list_add(&data->list, &hdr->rpc_list);
 		requests++;
 		nbytes -= len;
@@ -1084,6 +1106,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
+	struct nfs_commit_info cinfo;
 
 	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
 							   desc->pg_count));
@@ -1095,6 +1118,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	data->header = hdr;
 	atomic_inc(&hdr->refcnt);
 
+	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
@@ -1104,11 +1128,11 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	}
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
+	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags);
+	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
 	list_add(&data->list, &hdr->rpc_list);
 	desc->pg_rpc_callops = &nfs_write_common_ops;
 out:
@@ -1415,14 +1439,15 @@ void nfs_init_commit(struct nfs_commit_data *data,
 EXPORT_SYMBOL_GPL(nfs_init_commit);
 
 void nfs_retry_commit(struct list_head *page_list,
-		      struct pnfs_layout_segment *lseg)
+		      struct pnfs_layout_segment *lseg,
+		      struct nfs_commit_info *cinfo)
 {
 	struct nfs_page *req;
 
 	while (!list_empty(page_list)) {
 		req = nfs_list_entry(page_list->next);
 		nfs_list_remove_request(req);
-		nfs_mark_request_commit(req, lseg);
+		nfs_mark_request_commit(req, lseg, cinfo);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
 			     BDI_RECLAIMABLE);
@@ -1435,7 +1460,8 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
  * Commit dirty pages
  */
 static int
-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+		struct nfs_commit_info *cinfo)
 {
 	struct nfs_commit_data	*data;
 
@@ -1448,7 +1474,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 	nfs_init_commit(data, head, NULL);
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  out_bad:
-	nfs_retry_commit(head, NULL);
+	nfs_retry_commit(head, NULL, cinfo);
 	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
@@ -1522,30 +1548,32 @@ static const struct rpc_call_ops nfs_commit_ops = {
 };
 
 static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
-				   int how)
+				   int how, struct nfs_commit_info *cinfo)
 {
 	int status;
 
-	status = pnfs_commit_list(inode, head, how);
+	status = pnfs_commit_list(inode, head, how, cinfo);
 	if (status == PNFS_NOT_ATTEMPTED)
-		status = nfs_commit_list(inode, head, how);
+		status = nfs_commit_list(inode, head, how, cinfo);
 	return status;
 }
 
 int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
+	struct nfs_commit_info cinfo;
 	int may_wait = how & FLUSH_SYNC;
 	int res;
 
 	res = nfs_commit_set_lock(NFS_I(inode), may_wait);
 	if (res <= 0)
 		goto out_mark_dirty;
-	res = nfs_scan_commit(inode, &head);
+	nfs_init_cinfo_from_inode(&cinfo, inode);
+	res = nfs_scan_commit(inode, &head, &cinfo);
 	if (res) {
 		int error;
 
-		error = nfs_generic_commit_list(inode, &head, how);
+		error = nfs_generic_commit_list(inode, &head, how, &cinfo);
 		if (error < 0)
 			return error;
 		if (!may_wait)
@@ -1576,14 +1604,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 	int ret = 0;
 
 	/* no commits means nothing needs to be done */
-	if (!nfsi->ncommit)
+	if (!nfsi->commit_info.ncommit)
 		return ret;
 
 	if (wbc->sync_mode == WB_SYNC_NONE) {
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
-		if (nfsi->ncommit <= (nfsi->npages >> 1))
+		if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
 			goto out_mark_dirty;
 
 		/* don't wait for the COMMIT response */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8d3a2b8..8a88c16 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -179,8 +179,7 @@ struct nfs_inode {
 	__be32			cookieverf[2];
 
 	unsigned long		npages;
-	unsigned long		ncommit;
-	struct list_head	commit_list;
+	struct nfs_mds_commit_info commit_info;
 
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
@@ -201,7 +200,6 @@ struct nfs_inode {
 
 	/* pNFS layout information */
 	struct pnfs_layout_hdr *layout;
-	atomic_t		commits_outstanding;
 #endif /* CONFIG_NFS_V4*/
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
@@ -230,7 +228,6 @@ struct nfs_inode {
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
 #define NFS_INO_COMMIT		(7)		/* inode is committing unstable writes */
-#define NFS_INO_PNFS_COMMIT	(8)		/* use pnfs code for commit */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 61fd160..646100c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1074,6 +1074,21 @@ struct nfstime4 {
 };
 
 #ifdef CONFIG_NFS_V4_1
+
+struct pnfs_commit_bucket {
+	struct list_head written;
+	struct list_head committing;
+	struct pnfs_layout_segment *wlseg;
+	struct pnfs_layout_segment *clseg;
+};
+
+struct pnfs_ds_commit_info {
+	int nwritten;
+	int ncommitting;
+	int nbuckets;
+	struct pnfs_commit_bucket *buckets;
+};
+
 #define NFS4_EXCHANGE_ID_LEN	(48)
 struct nfs41_exchange_id_args {
 	struct nfs_client		*client;
@@ -1237,6 +1252,18 @@ struct nfs_write_header {
 	struct nfs_write_data	rpc_data;
 };
 
+struct nfs_mds_commit_info {
+	atomic_t rpcs_out;
+	unsigned long		ncommit;
+	struct list_head	list;
+};
+
+struct nfs_commit_info {
+	spinlock_t			*lock;
+	struct nfs_mds_commit_info	*mds;
+	struct pnfs_ds_commit_info	*ds;
+};
+
 struct nfs_commit_data {
 	struct rpc_task		task;
 	struct inode		*inode;
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 22/26] NFS: create nfs_commit_completion_ops
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (20 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 21/26] NFS: create struct nfs_commit_info Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 23/26] NFS: add dreq to nfs_commit_info Fred Isaman
                   ` (3 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Factors out the code that needs to change when directio
starts using these code paths.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/internal.h       |    5 ++---
 fs/nfs/nfs4filelayout.c |   12 ++++--------
 fs/nfs/write.c          |   31 +++++++++++++++++++++----------
 include/linux/nfs_xdr.h |    9 +++++++++
 4 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cbdea5e..d08f048 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -347,13 +347,12 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
 			       int how);
 extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
-			    struct pnfs_layout_segment *lseg);
+			    struct pnfs_layout_segment *lseg,
+			    struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
 		      struct pnfs_layout_segment *lseg,
 		      struct nfs_commit_info *cinfo);
-void nfs_commit_clear_lock(struct nfs_inode *nfsi);
 void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_commit_release_pages(struct nfs_commit_data *data);
 void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
 				 struct nfs_commit_info *cinfo);
 void nfs_request_remove_commit_list(struct nfs_page *req,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index a1860ac..96a6141 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -347,12 +347,8 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
 static void filelayout_commit_release(void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
-	struct nfs_commit_info cinfo;
 
-	nfs_commit_release_pages(data);
-	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
-	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
-		nfs_commit_clear_lock(NFS_I(data->inode));
+	data->compl_ops->completion(data);
 	put_lseg(data->lseg);
 	nfs_commitdata_release(data);
 }
@@ -1082,7 +1078,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	nreq += alloc_ds_commits(cinfo, &list);
 
 	if (nreq == 0) {
-		nfs_commit_clear_lock(NFS_I(inode));
+		cinfo->compl_ops->error_cleanup(NFS_I(inode));
 		goto out;
 	}
 
@@ -1091,14 +1087,14 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	list_for_each_entry_safe(data, tmp, &list, pages) {
 		list_del_init(&data->pages);
 		if (!data->lseg) {
-			nfs_init_commit(data, mds_pages, NULL);
+			nfs_init_commit(data, mds_pages, NULL, cinfo);
 			nfs_initiate_commit(NFS_CLIENT(inode), data,
 					    data->mds_ops, how);
 		} else {
 			struct pnfs_commit_bucket *buckets;
 
 			buckets = cinfo->ds->buckets;
-			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg);
+			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
 			filelayout_initiate_commit(data, how);
 		}
 	}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 30dcc4f..ceb3979 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static void nfs_redirty_request(struct nfs_page *req);
 static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_io_completion_ops nfs_asynch_write_compl_ops;
+static const struct nfs_commit_completion_ops nfs_commit_compl_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -499,6 +500,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
 	cinfo->lock = &inode->i_lock;
 	cinfo->mds = &NFS_I(inode)->commit_info;
 	cinfo->ds = pnfs_get_ds_info(inode);
+	cinfo->compl_ops = &nfs_commit_compl_ops;
 }
 
 void nfs_init_cinfo(struct nfs_commit_info *cinfo,
@@ -1356,13 +1358,12 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 	return (ret < 0) ? ret : 1;
 }
 
-void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 {
 	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
 }
-EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
 
 void nfs_commitdata_release(struct nfs_commit_data *data)
 {
@@ -1411,8 +1412,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
  * Set up the argument/result storage required for the RPC call.
  */
 void nfs_init_commit(struct nfs_commit_data *data,
-			    struct list_head *head,
-			    struct pnfs_layout_segment *lseg)
+		     struct list_head *head,
+		     struct pnfs_layout_segment *lseg,
+		     struct nfs_commit_info *cinfo)
 {
 	struct nfs_page *first = nfs_list_entry(head->next);
 	struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1426,6 +1428,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->cred	  = first->wb_context->cred;
 	data->lseg	  = lseg; /* reference transferred */
 	data->mds_ops     = &nfs_commit_ops;
+	data->compl_ops	  = cinfo->compl_ops;
 
 	data->args.fh     = NFS_FH(data->inode);
 	/* Note: we always request a commit of the entire inode */
@@ -1471,11 +1474,12 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 		goto out_bad;
 
 	/* Set up the argument struct */
-	nfs_init_commit(data, head, NULL);
+	nfs_init_commit(data, head, NULL, cinfo);
+	atomic_inc(&cinfo->mds->rpcs_out);
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  out_bad:
 	nfs_retry_commit(head, NULL, cinfo);
-	nfs_commit_clear_lock(NFS_I(inode));
+	cinfo->compl_ops->error_cleanup(NFS_I(inode));
 	return -ENOMEM;
 }
 
@@ -1493,10 +1497,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_done(task, data);
 }
 
-void nfs_commit_release_pages(struct nfs_commit_data *data)
+static void nfs_commit_release_pages(struct nfs_commit_data *data)
 {
 	struct nfs_page	*req;
 	int status = data->task.tk_status;
+	struct nfs_commit_info cinfo;
 
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1529,15 +1534,16 @@ void nfs_commit_release_pages(struct nfs_commit_data *data)
 	next:
 		nfs_unlock_request(req);
 	}
+	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+		nfs_commit_clear_lock(NFS_I(data->inode));
 }
-EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
 
 static void nfs_commit_release(void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
 
-	nfs_commit_release_pages(data);
-	nfs_commit_clear_lock(NFS_I(data->inode));
+	data->compl_ops->completion(data);
 	nfs_commitdata_release(calldata);
 }
 
@@ -1547,6 +1553,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
+static const struct nfs_commit_completion_ops nfs_commit_compl_ops = {
+	.completion = nfs_commit_release_pages,
+	.error_cleanup = nfs_commit_clear_lock,
+};
+
 static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 				   int how, struct nfs_commit_info *cinfo)
 {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 646100c..0ee7b3a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1258,10 +1258,18 @@ struct nfs_mds_commit_info {
 	struct list_head	list;
 };
 
+struct nfs_commit_data;
+struct nfs_inode;
+struct nfs_commit_completion_ops {
+	void (*error_cleanup) (struct nfs_inode *nfsi);
+	void (*completion) (struct nfs_commit_data *data);
+};
+
 struct nfs_commit_info {
 	spinlock_t			*lock;
 	struct nfs_mds_commit_info	*mds;
 	struct pnfs_ds_commit_info	*ds;
+	const struct nfs_commit_completion_ops *compl_ops;
 };
 
 struct nfs_commit_data {
@@ -1280,6 +1288,7 @@ struct nfs_commit_data {
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 	int			ds_commit_index;
 	const struct rpc_call_ops *mds_ops;
+	const struct nfs_commit_completion_ops *compl_ops;
 	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
 };
 
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 23/26] NFS: add dreq to nfs_commit_info
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (21 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 22/26] NFS: create nfs_commit_completion_ops Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 24/26] NFS: avoid some stat gathering for direct io Fred Isaman
                   ` (2 subsequent siblings)
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Need this to pass into nfs_commitdata_init, in order to keep data->dreq
accurate.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c          |    2 ++
 include/linux/nfs_xdr.h |    1 +
 2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ceb3979..fe8224c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -500,6 +500,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
 	cinfo->lock = &inode->i_lock;
 	cinfo->mds = &NFS_I(inode)->commit_info;
 	cinfo->ds = pnfs_get_ds_info(inode);
+	cinfo->dreq = NULL;
 	cinfo->compl_ops = &nfs_commit_compl_ops;
 }
 
@@ -1429,6 +1430,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->lseg	  = lseg; /* reference transferred */
 	data->mds_ops     = &nfs_commit_ops;
 	data->compl_ops	  = cinfo->compl_ops;
+	data->dreq	  = cinfo->dreq;
 
 	data->args.fh     = NFS_FH(data->inode);
 	/* Note: we always request a commit of the entire inode */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0ee7b3a..5297f4f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1269,6 +1269,7 @@ struct nfs_commit_info {
 	spinlock_t			*lock;
 	struct nfs_mds_commit_info	*mds;
 	struct pnfs_ds_commit_info	*ds;
+	struct nfs_direct_req		*dreq;	/* O_DIRECT request */
 	const struct nfs_commit_completion_ops *compl_ops;
 };
 
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 24/26] NFS: avoid some stat gathering for direct io
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (22 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 23/26] NFS: add dreq to nfs_commit_info Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 25/26] NFS: move allocation of nfs4_fl_commit_info to write's pg_init Fred Isaman
  2012-04-09 20:52 ` [PATCH 26/26] NFS: rewrite directio write to use async coalesce code Fred Isaman
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/write.c |   18 ++++++++++++------
 1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index fe8224c..aabc103 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -466,9 +466,13 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
 	nfs_list_add_request(req, dst);
 	cinfo->mds->ncommit++;
 	spin_unlock(cinfo->lock);
-	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
-	__mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC);
+	if (!cinfo->dreq) {
+		inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
+			     BDI_RECLAIMABLE);
+		__mark_inode_dirty(req->wb_context->dentry->d_inode,
+				   I_DIRTY_DATASYNC);
+	}
 }
 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
 
@@ -1453,9 +1457,11 @@ void nfs_retry_commit(struct list_head *page_list,
 		req = nfs_list_entry(page_list->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req, lseg, cinfo);
-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-			     BDI_RECLAIMABLE);
+		if (!cinfo->dreq) {
+			dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+			dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+				     BDI_RECLAIMABLE);
+		}
 		nfs_unlock_request(req);
 	}
 }
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 25/26] NFS: move allocation of nfs4_fl_commit_info to write's pg_init
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (23 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 24/26] NFS: avoid some stat gathering for direct io Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  2012-04-09 20:52 ` [PATCH 26/26] NFS: rewrite directio write to use async coalesce code Fred Isaman
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

This allows directio to have private copies of the commit_info,
instead of relying on a global layout-wide one, just like for the
inode itself.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c |   87 ++++++++++++++++++++++++++++++-----------------
 1 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 96a6141..9bbc798 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -691,6 +691,52 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 	_filelayout_free_lseg(fl);
 }
 
+static int
+filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+			     struct nfs_commit_info *cinfo,
+			     gfp_t gfp_flags)
+{
+	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+	struct pnfs_commit_bucket *buckets;
+	int size;
+
+	/* This assumes there is only one IOMODE_RW lseg.  What
+	 * we really want to do is have a layout_hdr level
+	 * dictionary of <multipath_list4, fh> keys, each
+	 * associated with a struct list_head, populated by calls
+	 * to filelayout_write_pagelist().
+	 * */
+	if (fl->commit_through_mds)
+		return 0;
+	if (cinfo->ds->nbuckets != 0)
+		return 0;
+
+	size = (fl->stripe_type == STRIPE_SPARSE) ?
+		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+	buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
+			  gfp_flags);
+	if (!buckets)
+		return -ENOMEM;
+	else {
+		int i;
+
+		spin_lock(cinfo->lock);
+		if (cinfo->ds->nbuckets != 0)
+			kfree(buckets);
+		else {
+			cinfo->ds->buckets = buckets;
+			cinfo->ds->nbuckets = size;
+			for (i = 0; i < size; i++) {
+				INIT_LIST_HEAD(&buckets[i].written);
+				INIT_LIST_HEAD(&buckets[i].committing);
+			}
+		}
+		spin_unlock(cinfo->lock);
+		return 0;
+	}
+}
+
 static struct pnfs_layout_segment *
 filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 		      struct nfs4_layoutget_res *lgr,
@@ -710,37 +756,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
-
-	/* This assumes there is only one IOMODE_RW lseg.  What
-	 * we really want to do is have a layout_hdr level
-	 * dictionary of <multipath_list4, fh> keys, each
-	 * associated with a struct list_head, populated by calls
-	 * to filelayout_write_pagelist().
-	 * */
-	if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
-		struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(layoutid);
-		int i;
-		int size = (fl->stripe_type == STRIPE_SPARSE) ?
-			fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
-		if (flo->commit_info.nbuckets != 0) {
-			/* Shouldn't happen if only one IOMODE_RW lseg */
-			filelayout_free_lseg(&fl->generic_hdr);
-			return NULL;
-		}
-		flo->commit_info.buckets = kcalloc(size,
-						   sizeof(struct pnfs_commit_bucket),
-						   gfp_flags);
-		if (!flo->commit_info.buckets) {
-			filelayout_free_lseg(&fl->generic_hdr);
-			return NULL;
-		}
-		flo->commit_info.nbuckets = size;
-		for (i = 0; i < size; i++) {
-			INIT_LIST_HEAD(&flo->commit_info.buckets[i].written);
-			INIT_LIST_HEAD(&flo->commit_info.buckets[i].committing);
-		}
-	}
 	return &fl->generic_hdr;
 }
 
@@ -802,6 +817,9 @@ static void
 filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			 struct nfs_page *req)
 {
+	struct nfs_commit_info cinfo;
+	int status;
+
 	BUG_ON(pgio->pg_lseg != NULL);
 
 	if (req->wb_offset != req->wb_pgbase)
@@ -815,6 +833,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	/* If no lseg, fall back to write through mds */
 	if (pgio->pg_lseg == NULL)
 		goto out_mds;
+	nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
+	status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
+	if (status < 0) {
+		put_lseg(pgio->pg_lseg);
+		pgio->pg_lseg = NULL;
+		goto out_mds;
+	}
 	return;
 out_mds:
 	nfs_pageio_reset_write_mds(pgio);
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 26/26] NFS: rewrite directio write to use async coalesce code
  2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
                   ` (24 preceding siblings ...)
  2012-04-09 20:52 ` [PATCH 25/26] NFS: move allocation of nfs4_fl_commit_info to write's pg_init Fred Isaman
@ 2012-04-09 20:52 ` Fred Isaman
  25 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-09 20:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: Trond Myklebust

This also has the advantage that it allows directio to use pnfs.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c         |  514 ++++++++++++++++++++---------------------------
 fs/nfs/internal.h       |   16 ++
 fs/nfs/nfs4filelayout.c |   44 ++++-
 fs/nfs/pnfs.h           |   17 ++
 fs/nfs/write.c          |   32 ++--
 5 files changed, 307 insertions(+), 316 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 409a9a0..bd25e9e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
 
 #include "internal.h"
 #include "iostat.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
@@ -81,16 +82,19 @@ struct nfs_direct_req {
 	struct completion	completion;	/* wait for i/o completion */
 
 	/* commit state */
-	struct list_head	rewrite_list;	/* saved nfs_write_data structs */
-	struct nfs_commit_data *commit_data;	/* special write_data for commits */
+	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo */
+	struct pnfs_ds_commit_info ds_cinfo;	/* Storage for cinfo */
+	struct work_struct	work;
 	int			flags;
 #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
 #define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
 	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
+static const struct nfs_io_completion_ops nfs_direct_write_compl_ops;
+static const struct nfs_commit_completion_ops nfs_direct_commit_compl_ops;
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
-static const struct rpc_call_ops nfs_write_direct_ops;
+static void nfs_direct_write_schedule_work(struct work_struct *work);
 
 static inline void get_dreq(struct nfs_direct_req *dreq)
 {
@@ -131,6 +135,16 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
 		page_cache_release(pages[i]);
 }
 
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+			      struct nfs_direct_req *dreq)
+{
+	cinfo->lock = &dreq->lock;
+	cinfo->mds = &dreq->mds_cinfo;
+	cinfo->ds = &dreq->ds_cinfo;
+	cinfo->dreq = dreq;
+	cinfo->compl_ops = &nfs_direct_commit_compl_ops;
+}
+
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
@@ -142,7 +156,10 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	kref_init(&dreq->kref);
 	kref_get(&dreq->kref);
 	init_completion(&dreq->completion);
-	INIT_LIST_HEAD(&dreq->rewrite_list);
+	dreq->mds_cinfo.ncommit = 0;
+	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
+	memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo));
 	dreq->iocb = NULL;
 	dreq->ctx = NULL;
 	dreq->l_ctx = NULL;
@@ -449,112 +466,60 @@ out:
 	return result;
 }
 
-static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
-{
-	struct nfs_write_data *data = &whdr->rpc_data;
-
-	if (data->pages.pagevec != data->pages.page_array)
-		kfree(data->pages.pagevec);
-	nfs_writehdr_free(&whdr->header);
-}
-
-static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
-{
-	while (!list_empty(&dreq->rewrite_list)) {
-		struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
-		struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
-		struct nfs_page_array *p = &whdr->rpc_data.pages;
-
-		list_del(&hdr->pages);
-		nfs_direct_release_pages(p->pagevec, p->npages);
-		nfs_direct_writehdr_release(whdr);
-	}
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
-	struct inode *inode = dreq->inode;
-	struct list_head *p;
-	struct nfs_write_data *data;
-	struct nfs_pgio_header *hdr;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_cred = dreq->ctx->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = NFS_CLIENT(inode),
-		.rpc_message = &msg,
-		.callback_ops = &nfs_write_direct_ops,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC,
-	};
+	struct nfs_pageio_descriptor desc;
+	struct nfs_page *req, *tmp;
+	LIST_HEAD(reqs);
+	struct nfs_commit_info cinfo;
+	LIST_HEAD(failed);
+
+	nfs_init_cinfo_from_dreq(&cinfo, dreq);
+	pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
+	spin_lock(cinfo.lock);
+	nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
+	spin_unlock(cinfo.lock);
 
 	dreq->count = 0;
 	get_dreq(dreq);
 
-	list_for_each(p, &dreq->rewrite_list) {
-		hdr = list_entry(p, struct nfs_pgio_header, pages);
-		data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data;
-
-		get_dreq(dreq);
-
-		/* Use stable writes */
-		data->args.stable = NFS_FILE_SYNC;
-
-		/*
-		 * Reset data->res.
-		 */
-		nfs_fattr_init(&data->fattr);
-		data->res.count = data->args.count;
-		memset(&data->verf, 0, sizeof(data->verf));
-
-		/*
-		 * Reuse data->task; data->args should not have changed
-		 * since the original request was sent.
-		 */
-		task_setup_data.task = &data->task;
-		task_setup_data.callback_data = data;
-		msg.rpc_argp = &data->args;
-		msg.rpc_resp = &data->res;
-		NFS_PROTO(inode)->write_setup(data, &msg);
-
-		/*
-		 * We're called via an RPC callback, so BKL is already held.
-		 */
-		task = rpc_run_task(&task_setup_data);
-		if (!IS_ERR(task))
-			rpc_put_task(task);
-
-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
-				data->task.tk_pid,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				data->args.count,
-				(unsigned long long)data->args.offset);
-	}
+	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
+			      &nfs_direct_write_compl_ops);
+	desc.pg_dreq = dreq;
 
-	if (put_dreq(dreq))
-		nfs_direct_write_complete(dreq, inode);
-}
+	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+		if (!nfs_pageio_add_request(&desc, req)) {
+			nfs_list_add_request(req, &failed);
+			spin_lock(cinfo.lock);
+			dreq->flags = 0;
+			dreq->error = -EIO;
+			spin_unlock(cinfo.lock);
+		}
+	}
+	nfs_pageio_complete(&desc);
 
-static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
-{
-	struct nfs_commit_data *data = calldata;
+	while (!list_empty(&failed)) {
+		page_cache_release(req->wb_page);
+		nfs_release_request(req);
+		nfs_unlock_request(req);
+	}
 
-	/* Call the NFS version-specific code */
-	NFS_PROTO(data->inode)->commit_done(task, data);
+	if (put_dreq(dreq))
+		nfs_direct_write_complete(dreq, dreq->inode);
 }
 
-static void nfs_direct_commit_release(void *calldata)
+static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 {
-	struct nfs_commit_data *data = calldata;
 	struct nfs_direct_req *dreq = data->dreq;
+	struct nfs_commit_info cinfo;
+	struct nfs_page *req;
 	int status = data->task.tk_status;
 
+	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 	if (status < 0) {
 		dprintk("NFS: %5u commit failed with error %d.\n",
-				data->task.tk_pid, status);
+			data->task.tk_pid, status);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
 		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -562,59 +527,48 @@ static void nfs_direct_commit_release(void *calldata)
 	}
 
 	dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
+	while (!list_empty(&data->pages)) {
+		req = nfs_list_entry(data->pages.next);
+		nfs_list_remove_request(req);
+		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+			/* Note the rewrite will go through mds */
+			nfs_mark_request_commit(req, NULL, &cinfo);
+		} else {
+			page_cache_release(req->wb_page);
+			nfs_release_request(req);
+		}
+		nfs_unlock_request(req);
+	}
+
 	nfs_direct_write_complete(dreq, data->inode);
-	nfs_commit_free(data);
 }
 
-static const struct rpc_call_ops nfs_commit_direct_ops = {
-	.rpc_call_prepare = nfs_commit_prepare,
-	.rpc_call_done = nfs_direct_commit_result,
-	.rpc_release = nfs_direct_commit_release,
+static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+{
+	/* There is no lock to clear */
+}
+
+static const struct nfs_commit_completion_ops nfs_direct_commit_compl_ops = {
+	.completion = nfs_direct_commit_complete,
+	.error_cleanup = nfs_direct_error_cleanup,
 };
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-	struct nfs_commit_data *data = dreq->commit_data;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = dreq->ctx->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.task = &data->task,
-		.rpc_client = NFS_CLIENT(dreq->inode),
-		.rpc_message = &msg,
-		.callback_ops = &nfs_commit_direct_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC,
-	};
-
-	data->inode = dreq->inode;
-	data->cred = msg.rpc_cred;
-
-	data->args.fh = NFS_FH(data->inode);
-	data->args.offset = 0;
-	data->args.count = 0;
-	data->res.fattr = &data->fattr;
-	data->res.verf = &data->verf;
-	nfs_fattr_init(&data->fattr);
-
-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
-
-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
-	dreq->commit_data = NULL;
-
-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
-
-	task = rpc_run_task(&task_setup_data);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
+	int res;
+	struct nfs_commit_info cinfo;
+	LIST_HEAD(mds_list);
+
+	nfs_init_cinfo_from_dreq(&cinfo, dreq);
+	nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
+	res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
+	if (res < 0) /* res == -ENOMEM */
+		nfs_direct_write_reschedule(dreq);
 }
 
-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
 {
+	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
 	int flags = dreq->flags;
 
 	dreq->flags = 0;
@@ -626,90 +580,29 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 			nfs_direct_write_reschedule(dreq);
 			break;
 		default:
-			if (dreq->commit_data != NULL)
-				nfs_commit_free(dreq->commit_data);
-			nfs_direct_free_writedata(dreq);
-			nfs_zap_mapping(inode, inode->i_mapping);
+			nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
 			nfs_direct_complete(dreq);
 	}
 }
 
-static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
 {
-	dreq->commit_data = nfs_commitdata_alloc();
-	if (dreq->commit_data != NULL)
-		dreq->commit_data->dreq = dreq;
+	schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
 }
+
 #else
-static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
-{
-	dreq->commit_data = NULL;
-}
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
 {
-	nfs_direct_free_writedata(dreq);
 	nfs_zap_mapping(inode, inode->i_mapping);
 	nfs_direct_complete(dreq);
 }
 #endif
 
-static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
-{
-	struct nfs_write_data *data = calldata;
-
-	nfs_writeback_done(task, data);
-}
-
 /*
  * NB: Return the value of the first error return code.  Subsequent
  *     errors after the first one are ignored.
  */
-static void nfs_direct_write_release(void *calldata)
-{
-	struct nfs_write_data *data = calldata;
-	struct nfs_pgio_header *hdr = data->header;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req;
-	int status = data->task.tk_status;
-
-	spin_lock(&dreq->lock);
-
-	if (unlikely(status < 0)) {
-		/* An error has occurred, so we should not commit */
-		dreq->flags = 0;
-		dreq->error = status;
-	}
-	if (unlikely(dreq->error != 0))
-		goto out_unlock;
-
-	dreq->count += data->res.count;
-
-	if (data->res.verf->committed != NFS_FILE_SYNC) {
-		switch (dreq->flags) {
-			case 0:
-				memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
-				dreq->flags = NFS_ODIRECT_DO_COMMIT;
-				break;
-			case NFS_ODIRECT_DO_COMMIT:
-				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
-					dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
-					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-				}
-		}
-	}
-out_unlock:
-	spin_unlock(&dreq->lock);
-
-	if (put_dreq(dreq))
-		nfs_direct_write_complete(dreq, hdr->inode);
-}
-
-static const struct rpc_call_ops nfs_write_direct_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
-	.rpc_call_done = nfs_direct_write_result,
-	.rpc_release = nfs_direct_write_release,
-};
-
 /*
  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
@@ -717,143 +610,181 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
  * handled automatically by nfs_direct_write_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
 						 const struct iovec *iov,
-						 loff_t pos, int sync)
+						 loff_t pos)
 {
+	struct nfs_direct_req *dreq = desc->pg_dreq;
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_cred = ctx->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = NFS_CLIENT(inode),
-		.rpc_message = &msg,
-		.callback_ops = &nfs_write_direct_ops,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC,
-	};
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int pgbase;
 	int result;
 	ssize_t started = 0;
+	struct page **pagevec = NULL;
+	unsigned int npages;
 
 	do {
-		struct nfs_write_header *whdr;
-		struct nfs_write_data *data;
-		struct nfs_page_array *pages;
 		size_t bytes;
+		int i;
 
 		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(wsize,count);
+		bytes = min(max(wsize, PAGE_SIZE), count);
 
 		result = -ENOMEM;
-		whdr = nfs_writehdr_alloc();
-		if (unlikely(!whdr))
-			break;
-
-		data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes));
-		if (!data) {
-			nfs_writehdr_free(&whdr->header);
+		npages = nfs_page_array_len(pgbase, bytes);
+		if (!pagevec)
+			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+		if (!pagevec)
 			break;
-		}
-		data->header = &whdr->header;
-		atomic_inc(&data->header->refcnt);
-		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					pages->npages, 0, 0, pages->pagevec, NULL);
+					npages, 0, 0, pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
-		if (result < 0) {
-			nfs_direct_writehdr_release(whdr);
+		if (result < 0)
 			break;
-		}
-		if ((unsigned)result < pages->npages) {
+
+		if ((unsigned)result < npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pages->pagevec, result);
-				nfs_direct_writehdr_release(whdr);
+				nfs_direct_release_pages(pagevec, result);
 				break;
 			}
 			bytes -= pgbase;
-			pages->npages = result;
+			npages = result;
 		}
 
-		get_dreq(dreq);
-
-		list_move_tail(&whdr->header.pages, &dreq->rewrite_list);
-
-		whdr->header.req = (struct nfs_page *) dreq;
-		whdr->header.inode = inode;
-		whdr->header.cred = msg.rpc_cred;
-		data->args.fh = NFS_FH(inode);
-		data->args.context = ctx;
-		data->args.lock_context = dreq->l_ctx;
-		data->args.offset = pos;
-		data->args.pgbase = pgbase;
-		data->args.pages = pages->pagevec;
-		data->args.count = bytes;
-		data->args.stable = sync;
-		data->res.fattr = &data->fattr;
-		data->res.count = bytes;
-		data->res.verf = &data->verf;
-		nfs_fattr_init(&data->fattr);
-
-		task_setup_data.task = &data->task;
-		task_setup_data.callback_data = data;
-		msg.rpc_argp = &data->args;
-		msg.rpc_resp = &data->res;
-		NFS_PROTO(inode)->write_setup(data, &msg);
-
-		task = rpc_run_task(&task_setup_data);
-		if (IS_ERR(task))
-			break;
+		for (i = 0; i < npages; i++) {
+			struct nfs_page *req;
+			unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
 
-		dprintk("NFS: %5u initiated direct write call "
-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-				task->tk_pid,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				bytes,
-				(unsigned long long)data->args.offset);
-		rpc_put_task(task);
-
-		started += bytes;
-		user_addr += bytes;
-		pos += bytes;
-
-		/* FIXME: Remove this useless math from the final patch */
-		pgbase += bytes;
-		pgbase &= ~PAGE_MASK;
-		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
-		count -= bytes;
+			req = nfs_create_request(dreq->ctx, dreq->inode,
+						 pagevec[i],
+						 pgbase, req_len);
+			if (IS_ERR(req)) {
+				nfs_direct_release_pages(pagevec + i,
+							 npages - i);
+				result = PTR_ERR(req);
+				break;
+			}
+			nfs_lock_request(req);
+			req->wb_index = pos >> PAGE_SHIFT;
+			req->wb_offset = pos & ~PAGE_MASK;
+			if (!nfs_pageio_add_request(desc, req)) {
+				result = desc->pg_error;
+				nfs_unlock_request(req);
+				nfs_release_request(req);
+				nfs_direct_release_pages(pagevec + i,
+							 npages - i);
+			}
+			pgbase = 0;
+			bytes -= req_len;
+			started += req_len;
+			user_addr += req_len;
+			pos += req_len;
+			count -= req_len;
+		}
 	} while (count != 0);
 
+	kfree(pagevec);
+
 	if (started)
 		return started;
 	return result < 0 ? (ssize_t) result : -EFAULT;
 }
 
+static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+{
+	struct nfs_direct_req *dreq = hdr->dreq;
+	struct nfs_commit_info cinfo;
+	int bit = -1;
+	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out_put;
+
+	nfs_init_cinfo_from_dreq(&cinfo, dreq);
+
+	spin_lock(&dreq->lock);
+
+	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+		dreq->flags = 0;
+		dreq->error = hdr->error;
+	}
+	if (dreq->error != 0)
+		bit = NFS_IOHDR_ERROR;
+	else {
+		dreq->count += (hdr->first_error - req_offset(hdr->req));
+		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+			bit = NFS_IOHDR_NEED_RESCHED;
+		} else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
+				bit = NFS_IOHDR_NEED_RESCHED;
+			else if (dreq->flags == 0) {
+				memcpy(&dreq->verf, &req->wb_verf,
+				       sizeof(dreq->verf));
+				bit = NFS_IOHDR_NEED_COMMIT;
+				dreq->flags = NFS_ODIRECT_DO_COMMIT;
+			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
+				if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+					bit = NFS_IOHDR_NEED_RESCHED;
+				} else
+					bit = NFS_IOHDR_NEED_COMMIT;
+			}
+		}
+	}
+	spin_unlock(&dreq->lock);
+
+	while (!list_empty(&hdr->pages)) {
+		req = nfs_list_entry(hdr->pages.next);
+		nfs_list_remove_request(req);
+		switch (bit) {
+		case NFS_IOHDR_NEED_RESCHED:
+		case NFS_IOHDR_NEED_COMMIT:
+			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+			break;
+		default:
+			page_cache_release(req->wb_page);
+			nfs_release_request(req);
+		}
+		nfs_unlock_request(req);
+	}
+
+out_put:
+	if (put_dreq(dreq))
+		nfs_direct_write_complete(dreq, hdr->inode);
+	hdr->release(hdr);
+}
+
+static const struct nfs_io_completion_ops nfs_direct_write_compl_ops = {
+	.error_cleanup = nfs_sync_pgio_error,
+	.init_hdr = nfs_direct_pgio_init,
+	.completion = nfs_direct_write_completion,
+};
+
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 					       const struct iovec *iov,
 					       unsigned long nr_segs,
-					       loff_t pos, int sync)
+					       loff_t pos)
 {
+	struct nfs_pageio_descriptor desc;
 	ssize_t result = 0;
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
+	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
+			      &nfs_direct_write_compl_ops);
+	desc.pg_dreq = dreq;
 	get_dreq(dreq);
 
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_write_schedule_segment(dreq, vec,
-							   pos, sync);
+		result = nfs_direct_write_schedule_segment(&desc, vec, pos);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -861,6 +792,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			break;
 		pos += vec->iov_len;
 	}
+	nfs_pageio_complete(&desc);
 
 	/*
 	 * If no bytes were started, return the error, and let the
@@ -883,16 +815,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct nfs_direct_req *dreq;
-	size_t wsize = NFS_SERVER(inode)->wsize;
-	int sync = NFS_UNSTABLE;
 
 	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		goto out;
-	nfs_alloc_commit_data(dreq);
-
-	if (dreq->commit_data == NULL || count <= wsize)
-		sync = NFS_FILE_SYNC;
 
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -902,7 +828,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
+	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
 out_release:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d08f048..1436d23 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -323,6 +323,9 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+				struct inode *inode, int ioflags,
+				const struct nfs_io_completion_ops *compl_ops);
 extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
@@ -349,6 +352,15 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg,
 			    struct nfs_commit_info *cinfo);
+int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+			 struct nfs_commit_info *cinfo, int max);
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+		    struct nfs_commit_info *cinfo);
+void nfs_mark_request_commit(struct nfs_page *req,
+			     struct pnfs_layout_segment *lseg,
+			     struct nfs_commit_info *cinfo);
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
 		      struct pnfs_layout_segment *lseg,
 		      struct nfs_commit_info *cinfo);
@@ -368,6 +380,10 @@ extern int nfs_migrate_page(struct address_space *,
 #define nfs_migrate_page NULL
 #endif
 
+/* direct.c */
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+			      struct nfs_direct_req *dreq);
+
 /* nfs4proc.c */
 extern void __nfs4_read_done_cb(struct nfs_read_data *);
 extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 9bbc798..87aa10a 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -995,12 +995,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 }
 
 static int
-filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
-			       struct nfs_commit_info *cinfo,
-			       int max)
+transfer_commit_list(struct list_head *src, struct list_head *dst,
+		     struct nfs_commit_info *cinfo, int max)
 {
-	struct list_head *src = &bucket->written;
-	struct list_head *dst = &bucket->committing;
 	struct nfs_page *req, *tmp;
 	int ret = 0;
 
@@ -1013,9 +1010,22 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
 		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
 		nfs_list_add_request(req, dst);
 		ret++;
-		if (ret == max)
+		if ((ret == max) && !cinfo->dreq)
 			break;
 	}
+	return ret;
+}
+
+static int
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+			       struct nfs_commit_info *cinfo,
+			       int max)
+{
+	struct list_head *src = &bucket->written;
+	struct list_head *dst = &bucket->committing;
+	int ret;
+
+	ret = transfer_commit_list(src, dst, cinfo, max);
 	if (ret) {
 		cinfo->ds->nwritten -= ret;
 		cinfo->ds->ncommitting += ret;
@@ -1045,6 +1055,27 @@ static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
 	return rv;
 }
 
+/* Pull everything off the committing lists and dump into @dst */
+static void filelayout_recover_commit_reqs(struct list_head *dst,
+					   struct nfs_commit_info *cinfo)
+{
+	struct pnfs_commit_bucket *b;
+	int i;
+
+	/* NOTE cinfo->lock is NOT held, relying on fact that this is
+	 * only called on single thread per dreq.
+	 * Can't take the lock because need to do put_lseg
+	 */
+	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+		if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
+			BUG_ON(!list_empty(&b->written));
+			put_lseg(b->wlseg);
+			b->wlseg = NULL;
+		}
+	}
+	cinfo->ds->nwritten = 0;
+}
+
 static unsigned int
 alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
 {
@@ -1169,6 +1200,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
 	.mark_request_commit	= filelayout_mark_request_commit,
 	.clear_request_commit	= filelayout_clear_request_commit,
 	.scan_commit_lists	= filelayout_scan_commit_lists,
+	.recover_commit_reqs	= filelayout_recover_commit_reqs,
 	.commit_pagelist	= filelayout_commit_pagelist,
 	.read_pagelist		= filelayout_read_pagelist,
 	.write_pagelist		= filelayout_write_pagelist,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 5e4a51c..c35cfdb 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -102,6 +102,8 @@ struct pnfs_layoutdriver_type {
 				      struct nfs_commit_info *cinfo);
 	int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
 				  int max);
+	void (*recover_commit_reqs) (struct list_head *list,
+				     struct nfs_commit_info *cinfo);
 	int (*commit_pagelist)(struct inode *inode,
 			       struct list_head *mds_pages,
 			       int how,
@@ -323,6 +325,15 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
 		return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
 }
 
+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+			 struct nfs_commit_info *cinfo)
+{
+	if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
+		return;
+	NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
+}
+
 /* Should the pNFS client commit and return the layout upon a setattr */
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -456,6 +467,12 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
 	return 0;
 }
 
+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+			 struct nfs_commit_info *cinfo)
+{
+}
+
 static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 {
 	return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index aabc103..fceb7cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,9 +39,6 @@
 /*
  * Local function declarations
  */
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
-				  struct inode *inode, int ioflags,
-				  const struct nfs_io_completion_ops *compl_ops);
 static void nfs_redirty_request(struct nfs_page *req);
 static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -512,14 +509,17 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo,
 		    struct inode *inode,
 		    struct nfs_direct_req *dreq)
 {
-	nfs_init_cinfo_from_inode(cinfo, inode);
+	if (dreq)
+		nfs_init_cinfo_from_dreq(cinfo, dreq);
+	else
+		nfs_init_cinfo_from_inode(cinfo, inode);
 }
 EXPORT_SYMBOL_GPL(nfs_init_cinfo);
 
 /*
  * Add a request to the inode's commit list.
  */
-static void
+void
 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 			struct nfs_commit_info *cinfo)
 {
@@ -561,7 +561,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 }
 
 #else
-static void
+void
 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 			struct nfs_commit_info *cinfo)
 {
@@ -626,7 +626,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 }
 
 /* cinfo->lock held by caller */
-static int
+int
 nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
 		     struct nfs_commit_info *cinfo, int max)
 {
@@ -641,7 +641,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
 		nfs_request_remove_commit_list(req, cinfo);
 		nfs_list_add_request(req, dst);
 		ret++;
-		if (ret == max)
+		if ((ret == max) && !cinfo->dreq)
 			break;
 	}
 	return ret;
@@ -656,7 +656,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
  * Moves requests from the inode's 'commit' request list.
  * The requests are *not* checked to ensure that they form a contiguous set.
  */
-static int
+int
 nfs_scan_commit(struct inode *inode, struct list_head *dst,
 		struct nfs_commit_info *cinfo)
 {
@@ -680,8 +680,8 @@ static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 	return 0;
 }
 
-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst,
-				  struct nfs_commit_info *cinfo)
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+		    struct nfs_commit_info *cinfo)
 {
 	return 0;
 }
@@ -1200,9 +1200,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags,
-				  const struct nfs_io_completion_ops *compl_ops)
+void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+			   struct inode *inode, int ioflags,
+			   const struct nfs_io_completion_ops *compl_ops)
 {
 	if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
 		nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
@@ -1566,8 +1566,8 @@ static const struct nfs_commit_completion_ops nfs_commit_compl_ops = {
 	.error_cleanup = nfs_commit_clear_lock,
 };
 
-static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
-				   int how, struct nfs_commit_info *cinfo)
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+			    int how, struct nfs_commit_info *cinfo)
 {
 	int status;
 
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket
  2012-04-09 20:52 ` [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket Fred Isaman
@ 2012-04-13 15:47   ` Myklebust, Trond
  2012-04-13 16:50     ` Fred Isaman
  0 siblings, 1 reply; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-13 15:47 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
QWxzbyBjcmVhdGUgYSBjb21taXRfaW5mbyBzdHJ1Y3R1cmUgdG8gaG9sZCB0aGUgYnVja2V0IGFy
cmF5IGFuZCBwdXNoDQo+IGl0IHVwIGZyb20gdGhlIGxzZWcgdG8gdGhlIGxheW91dCB3aGVyZSBp
dCByZWFsbHkgYmVsb25ncy4NCj4gDQo+IFdoaWxlIHdlIGFyZSBhdCBpdCwgZml4IGEgcmVmY291
bnRpbmcgYnVnIGR1ZSB0byBhbiAoaW5jb3JyZWN0KQ0KPiBpbXBsaWNpdCBhc3N1bXB0aW9uIHRo
YXQgZmlsZWxheW91dF9zY2FuX2RzX2NvbW1pdF9saXN0IGFsd2F5cw0KPiBjb21wbGV0ZWx5IGVt
cHRpZWQgdGhlIHNyYyBsaXN0Lg0KPiANCj4gVGhpcyBjbGFyaWZpZXMgcmVmY291bnRpbmcsIHJl
bW92ZXMgdGhlIHVnbHkgZmluZF9vbmx5X3dyaXRlX2xzZWcNCj4gZnVuY3Rpb25zLCBhbmQgcHVz
aGVzIHRoZSBmaWxlIGxheW91dCBjb21taXQgY29kZSBhbG9uZyBvbiB0aGUgcGF0aCB0bw0KPiBz
dXBwb3J0aW5nIG11bHRpcGxlIGxzZWdzLg0KPiANCj4gU2lnbmVkLW9mZi1ieTogRnJlZCBJc2Ft
YW4gPGlpc2FtYW5AbmV0YXBwLmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvbmZzNGZpbGVsYXlvdXQu
YyB8ICAxNTAgKysrKysrKysrKysrKysrKysrKysrKysrKy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0N
Cj4gIGZzL25mcy9uZnM0ZmlsZWxheW91dC5oIHwgICAyMCArKysrKystDQo+ICAyIGZpbGVzIGNo
YW5nZWQsIDk3IGluc2VydGlvbnMoKyksIDczIGRlbGV0aW9ucygtKQ0KPiANCj4gZGlmZiAtLWdp
dCBhL2ZzL25mcy9uZnM0ZmlsZWxheW91dC5jIGIvZnMvbmZzL25mczRmaWxlbGF5b3V0LmMNCj4g
aW5kZXggNWFjZmQ5ZS4uMGJiYzg4YSAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL25mczRmaWxlbGF5
b3V0LmMNCj4gKysrIGIvZnMvbmZzL25mczRmaWxlbGF5b3V0LmMNCj4gQEAgLTY1MCw3ICs2NTAs
MTUgQEAgZmlsZWxheW91dF9mcmVlX2xzZWcoc3RydWN0IHBuZnNfbGF5b3V0X3NlZ21lbnQgKmxz
ZWcpDQo+ICANCj4gIAlkcHJpbnRrKCItLT4gJXNcbiIsIF9fZnVuY19fKTsNCj4gIAluZnM0X2Zs
X3B1dF9kZXZpY2VpZChmbC0+ZHNhZGRyKTsNCj4gLQlrZnJlZShmbC0+Y29tbWl0X2J1Y2tldHMp
Ow0KPiArCS8qIFRoaXMgYXNzdW1lcyBhIHNpbmdsZSBSVyBsc2VnICovDQo+ICsJaWYgKGxzZWct
PnBsc19yYW5nZS5pb21vZGUgPT0gSU9NT0RFX1JXKSB7DQo+ICsJCXN0cnVjdCBuZnM0X2ZpbGVs
YXlvdXQgKmZsbzsNCj4gKw0KPiArCQlmbG8gPSBGSUxFTEFZT1VUX0ZST01fSERSKGxzZWctPnBs
c19sYXlvdXQpOw0KPiArCQlmbG8tPmNvbW1pdF9pbmZvLm5idWNrZXRzID0gMDsNCj4gKwkJa2Zy
ZWUoZmxvLT5jb21taXRfaW5mby5idWNrZXRzKTsNCj4gKwkJZmxvLT5jb21taXRfaW5mby5idWNr
ZXRzID0gTlVMTDsNCj4gKwl9DQo+ICAJX2ZpbGVsYXlvdXRfZnJlZV9sc2VnKGZsKTsNCj4gIH0N
Cj4gIA0KPiBAQCAtNjgxLDE5ICs2ODksMjcgQEAgZmlsZWxheW91dF9hbGxvY19sc2VnKHN0cnVj
dCBwbmZzX2xheW91dF9oZHIgKmxheW91dGlkLA0KPiAgCSAqIHRvIGZpbGVsYXlvdXRfd3JpdGVf
cGFnZWxpc3QoKS4NCj4gIAkgKiAqLw0KPiAgCWlmICgoIWZsLT5jb21taXRfdGhyb3VnaF9tZHMp
ICYmIChsZ3ItPnJhbmdlLmlvbW9kZSA9PSBJT01PREVfUlcpKSB7DQo+ICsJCXN0cnVjdCBuZnM0
X2ZpbGVsYXlvdXQgKmZsbyA9IEZJTEVMQVlPVVRfRlJPTV9IRFIobGF5b3V0aWQpOw0KPiAgCQlp
bnQgaTsNCj4gIAkJaW50IHNpemUgPSAoZmwtPnN0cmlwZV90eXBlID09IFNUUklQRV9TUEFSU0Up
ID8NCj4gIAkJCWZsLT5kc2FkZHItPmRzX251bSA6IGZsLT5kc2FkZHItPnN0cmlwZV9jb3VudDsN
Cj4gIA0KPiAtCQlmbC0+Y29tbWl0X2J1Y2tldHMgPSBrY2FsbG9jKHNpemUsIHNpemVvZihzdHJ1
Y3QgbmZzNF9mbF9jb21taXRfYnVja2V0KSwgZ2ZwX2ZsYWdzKTsNCj4gLQkJaWYgKCFmbC0+Y29t
bWl0X2J1Y2tldHMpIHsNCj4gKwkJaWYgKGZsby0+Y29tbWl0X2luZm8ubmJ1Y2tldHMgIT0gMCkg
ew0KPiArCQkJLyogU2hvdWxkbid0IGhhcHBlbiBpZiBvbmx5IG9uZSBJT01PREVfUlcgbHNlZyAq
Lw0KPiAgCQkJZmlsZWxheW91dF9mcmVlX2xzZWcoJmZsLT5nZW5lcmljX2hkcik7DQo+ICAJCQly
ZXR1cm4gTlVMTDsNCg0KSXMgdGhpcyByZWFsbHkgdGhlIGNvcnJlY3QgdGhpbmcgdG8gZG8/IElm
IHdlJ3JlIGRlYWxpbmcgd2l0aCBtdWx0aXBsZQ0KSU9NT0RFX1JXIGxzZWdzLCB0aGVuIHN1cmVs
eSB3ZSBtaWdodCBmaW5kIG91cnNlbHZlcyBpbiBhIHNpdHVhdGlvbg0Kd2hlcmUgd2UgbWlnaHQg
aGF2ZSB0byBhZGQgbmV3IGNvbW1pdCBidWNrZXRzLg0KDQpEb2Vzbid0IHRoZSBhYm92ZSBkZXNl
cnZlIGEgV0FSTl9PTigpIGFuZCBhIEZJWE1FIGNvbW1lbnQ/DQoNCj4gIAkJfQ0KPiAtCQlmbC0+
bnVtYmVyX29mX2J1Y2tldHMgPSBzaXplOw0KPiArCQlmbG8tPmNvbW1pdF9pbmZvLmJ1Y2tldHMg
PSBrY2FsbG9jKHNpemUsDQo+ICsJCQkJCQkgICBzaXplb2Yoc3RydWN0IG5mczRfZmxfY29tbWl0
X2J1Y2tldCksDQo+ICsJCQkJCQkgICBnZnBfZmxhZ3MpOw0KPiArCQlpZiAoIWZsby0+Y29tbWl0
X2luZm8uYnVja2V0cykgew0KPiArCQkJZmlsZWxheW91dF9mcmVlX2xzZWcoJmZsLT5nZW5lcmlj
X2hkcik7DQo+ICsJCQlyZXR1cm4gTlVMTDsNCj4gKwkJfQ0KPiArCQlmbG8tPmNvbW1pdF9pbmZv
Lm5idWNrZXRzID0gc2l6ZTsNCj4gIAkJZm9yIChpID0gMDsgaSA8IHNpemU7IGkrKykgew0KPiAt
CQkJSU5JVF9MSVNUX0hFQUQoJmZsLT5jb21taXRfYnVja2V0c1tpXS53cml0dGVuKTsNCj4gLQkJ
CUlOSVRfTElTVF9IRUFEKCZmbC0+Y29tbWl0X2J1Y2tldHNbaV0uY29tbWl0dGluZyk7DQo+ICsJ
CQlJTklUX0xJU1RfSEVBRCgmZmxvLT5jb21taXRfaW5mby5idWNrZXRzW2ldLndyaXR0ZW4pOw0K
PiArCQkJSU5JVF9MSVNUX0hFQUQoJmZsby0+Y29tbWl0X2luZm8uYnVja2V0c1tpXS5jb21taXR0
aW5nKTsNCj4gIAkJfQ0KDQpUaGUgY29tbWl0X2luZm8gYWxsb2NhdGlvbiBhbmQgaW5pdGlhbGlz
YXRpb24gc2hvdWxkIHByb2JhYmx5IGJlIG1vdmVkDQppbnRvIGEgZGlmZmVyZW50IGZ1bmN0aW9u
IGlmIGl0IGlzIG5vIGxvbmdlciBwYXJ0IG9mIHRoZSBsc2VnLg0KDQpUaGF0IHNhaWQsIEknbSBo
YXZpbmcgdHJvdWJsZSBzZWVpbmcgaG93IHRoaXMgYXJjaGl0ZWN0dXJlIGNhbiBzdXJ2aXZlDQpp
biBhIGZ1dHVyZSBtdWx0aXBsZS1sYXlvdXQgd29ybGQuIEknbSB0aGlua2luZyB0aGF0IHNpbmNl
IHRoZQ0KY29tbWl0X2J1Y2tldHMgbmVlZCB0byB0YWtlIGEgcmVmZXJlbmNlIHRvIHRoZSBsc2Vn
LCB0aGVuIG1heWJlIHdlDQpzaG91bGQga2VlcCB0aGUgY29tbWl0IGJ1Y2tldHMgdGllZCB0byB0
aGUgbHNlZywgYW5kIHRoZW4gbGV0IHRoZSBjb21taXQNCmNvZGUgaXRlcmF0ZSB0aHJvdWdoIHRo
ZSBsaXN0IG9mIGxzZWdzIHdpdGggc29tZXRoaW5nIGluIHRoZWlyIGNvbW1pdA0KYnVja2V0cy4u
LiBXaGF0IGFyZSB5b3VyIHRob3VnaHRzIGZvciBob3cgdGhpcyBtaWdodCBldm9sdmU/DQoNCi0t
IA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXINCg0KTmV0QXBw
DQpUcm9uZC5NeWtsZWJ1c3RAbmV0YXBwLmNvbQ0Kd3d3Lm5ldGFwcC5jb20NCg0K

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/26] NFS: put open context on error in nfs_pagein_multi
  2012-04-09 20:52 ` [PATCH 10/26] NFS: put open context on error in nfs_pagein_multi Fred Isaman
@ 2012-04-13 15:52   ` Myklebust, Trond
  0 siblings, 0 replies; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-13 15:52 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
U2lnbmVkLW9mZi1ieTogRnJlZCBJc2FtYW4gPGlpc2FtYW5AbmV0YXBwLmNvbT4NCj4gLS0tDQo+
ICBmcy9uZnMvcmVhZC5jIHwgICAgMiArLQ0KPiAgMSBmaWxlcyBjaGFuZ2VkLCAxIGluc2VydGlv
bnMoKyksIDEgZGVsZXRpb25zKC0pDQo+IA0KPiBkaWZmIC0tZ2l0IGEvZnMvbmZzL3JlYWQuYyBi
L2ZzL25mcy9yZWFkLmMNCj4gaW5kZXggZTFkYjA0YS4uNGRkYmE2NyAxMDA2NDQNCj4gLS0tIGEv
ZnMvbmZzL3JlYWQuYw0KPiArKysgYi9mcy9uZnMvcmVhZC5jDQo+IEBAIC0zMjMsNyArMzIzLDcg
QEAgb3V0X2JhZDoNCj4gIAl3aGlsZSAoIWxpc3RfZW1wdHkocmVzKSkgew0KPiAgCQlkYXRhID0g
bGlzdF9lbnRyeShyZXMtPm5leHQsIHN0cnVjdCBuZnNfcmVhZF9kYXRhLCBsaXN0KTsNCj4gIAkJ
bGlzdF9kZWwoJmRhdGEtPmxpc3QpOw0KPiAtCQluZnNfcmVhZGRhdGFfZnJlZShkYXRhKTsNCj4g
KwkJbmZzX3JlYWRkYXRhX3JlbGVhc2UoZGF0YSk7DQo+ICAJfQ0KPiAgCW5mc19yZWFkcGFnZV9y
ZWxlYXNlKHJlcSk7DQo+ICAJcmV0dXJuIC1FTk9NRU07DQoNClNob3VsZCB0aGlzIGJlIGEgc3Rh
YmxlIHRyZWUgY2FuZGlkYXRlPw0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNs
aWVudCBtYWludGFpbmVyDQoNCk5ldEFwcA0KVHJvbmQuTXlrbGVidXN0QG5ldGFwcC5jb20NCnd3
dy5uZXRhcHAuY29tDQoNCg==

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/26] NFS: put open context on error in nfs_flush_multi
  2012-04-09 20:52 ` [PATCH 11/26] NFS: put open context on error in nfs_flush_multi Fred Isaman
@ 2012-04-13 15:52   ` Myklebust, Trond
  0 siblings, 0 replies; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-13 15:52 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
U2lnbmVkLW9mZi1ieTogRnJlZCBJc2FtYW4gPGlpc2FtYW5AbmV0YXBwLmNvbT4NCj4gLS0tDQo+
ICBmcy9uZnMvd3JpdGUuYyB8ICAgIDIgKy0NCj4gIDEgZmlsZXMgY2hhbmdlZCwgMSBpbnNlcnRp
b25zKCspLCAxIGRlbGV0aW9ucygtKQ0KPiANCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy93cml0ZS5j
IGIvZnMvbmZzL3dyaXRlLmMNCj4gaW5kZXggMmVkOTNiOC4uNzY3MzVkZCAxMDA2NDQNCj4gLS0t
IGEvZnMvbmZzL3dyaXRlLmMNCj4gKysrIGIvZnMvbmZzL3dyaXRlLmMNCj4gQEAgLTEwMTgsNyAr
MTAxOCw3IEBAIG91dF9iYWQ6DQo+ICAJd2hpbGUgKCFsaXN0X2VtcHR5KHJlcykpIHsNCj4gIAkJ
ZGF0YSA9IGxpc3RfZW50cnkocmVzLT5uZXh0LCBzdHJ1Y3QgbmZzX3dyaXRlX2RhdGEsIGxpc3Qp
Ow0KPiAgCQlsaXN0X2RlbCgmZGF0YS0+bGlzdCk7DQo+IC0JCW5mc193cml0ZWRhdGFfZnJlZShk
YXRhKTsNCj4gKwkJbmZzX3dyaXRlZGF0YV9yZWxlYXNlKGRhdGEpOw0KPiAgCX0NCj4gIAluZnNf
cmVkaXJ0eV9yZXF1ZXN0KHJlcSk7DQo+ICAJcmV0dXJuIC1FTk9NRU07DQoNClN0YWJsZSBjYW5k
aWRhdGU/DQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWlu
ZXINCg0KTmV0QXBwDQpUcm9uZC5NeWtsZWJ1c3RAbmV0YXBwLmNvbQ0Kd3d3Lm5ldGFwcC5jb20N
Cg0K

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket
  2012-04-13 15:47   ` Myklebust, Trond
@ 2012-04-13 16:50     ` Fred Isaman
  0 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-13 16:50 UTC (permalink / raw)
  To: Myklebust, Trond; +Cc: Isaman, Fred, linux-nfs

On Fri, Apr 13, 2012 at 11:47 AM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
> On Mon, 2012-04-09 at 16:52 -0400, Fred Isaman wrote:
>> Also create a commit_info structure to hold the bucket array and push
>> it up from the lseg to the layout where it really belongs.
>>
>> While we are at it, fix a refcounting bug due to an (incorrect)
>> implicit assumption that filelayout_scan_ds_commit_list always
>> completely emptied the src list.
>>
>> This clarifies refcounting, removes the ugly find_only_write_lseg
>> functions, and pushes the file layout commit code along on the path to
>> supporting multiple lsegs.
>>
>> Signed-off-by: Fred Isaman <iisaman@netapp.com>
>> ---
>>  fs/nfs/nfs4filelayout.c |  150 +++++++++++++++++++++++++----------------------
>>  fs/nfs/nfs4filelayout.h |   20 ++++++-
>>  2 files changed, 97 insertions(+), 73 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 5acfd9e..0bbc88a 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -650,7 +650,15 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
>>
>>       dprintk("--> %s\n", __func__);
>>       nfs4_fl_put_deviceid(fl->dsaddr);
>> -     kfree(fl->commit_buckets);
>> +     /* This assumes a single RW lseg */
>> +     if (lseg->pls_range.iomode == IOMODE_RW) {
>> +             struct nfs4_filelayout *flo;
>> +
>> +             flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
>> +             flo->commit_info.nbuckets = 0;
>> +             kfree(flo->commit_info.buckets);
>> +             flo->commit_info.buckets = NULL;
>> +     }
>>       _filelayout_free_lseg(fl);
>>  }
>>
>> @@ -681,19 +689,27 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
>>        * to filelayout_write_pagelist().
>>        * */
>>       if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
>> +             struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(layoutid);
>>               int i;
>>               int size = (fl->stripe_type == STRIPE_SPARSE) ?
>>                       fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
>>
>> -             fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
>> -             if (!fl->commit_buckets) {
>> +             if (flo->commit_info.nbuckets != 0) {
>> +                     /* Shouldn't happen if only one IOMODE_RW lseg */
>>                       filelayout_free_lseg(&fl->generic_hdr);
>>                       return NULL;
>
> Is this really the correct thing to do? If we're dealing with multiple
> IOMODE_RW lsegs, then surely we might find ourselves in a situation
> where we might have to add new commit buckets.
>
> Doesn't the above deserve a WARN_ON() and a FIXME comment?
>
>>               }
>> -             fl->number_of_buckets = size;
>> +             flo->commit_info.buckets = kcalloc(size,
>> +                                                sizeof(struct nfs4_fl_commit_bucket),
>> +                                                gfp_flags);
>> +             if (!flo->commit_info.buckets) {
>> +                     filelayout_free_lseg(&fl->generic_hdr);
>> +                     return NULL;
>> +             }
>> +             flo->commit_info.nbuckets = size;
>>               for (i = 0; i < size; i++) {
>> -                     INIT_LIST_HEAD(&fl->commit_buckets[i].written);
>> -                     INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
>> +                     INIT_LIST_HEAD(&flo->commit_info.buckets[i].written);
>> +                     INIT_LIST_HEAD(&flo->commit_info.buckets[i].committing);
>>               }
>
> The commit_info allocation and initialisation should probably be moved
> into a different function if it is no longer part of the lseg.
>

This is done in patch 26, and called from pg_init_write.  Perhaps I
should move it earlier in the series
This is also the reason for the lack of WARN_ON, as in the moved code
it hits frequently.

My idea was that in a multi-layout scenerio, the check (for
ncommits==0) would be replaced with code that
would scan the existing buckets to see if any new needed to be added.
Regarding taking the reference to the lseg,
I am still not convinced that the bucket really needs to take a ref on
the lseg.  Eventually buckets need to be tied to
a ds, fh pair, though a ds, fh,lseg triple may be needed to deal with
certain corner cases.

Fred

> That said, I'm having trouble seeing how this architecture can survive
> in a future multiple-layout world. I'm thinking that since the
> commit_buckets need to take a reference to the lseg, then maybe we
> should keep the commit buckets tied to the lseg, and then let the commit
> code iterate through the list of lsegs with something in their commit
> buckets... What are your thoughts for how this might evolve?
>
> --
> Trond Myklebust
> Linux NFS client maintainer
>
> NetApp
> Trond.Myklebust@netapp.com
> www.netapp.com
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-09 20:52 ` [PATCH 14/26] NFS: merge _full and _partial read rpc_ops Fred Isaman
@ 2012-04-16 20:28   ` Myklebust, Trond
  2012-04-17 15:37     ` Fred Isaman
  2012-04-16 20:41   ` Myklebust, Trond
  1 sibling, 1 reply; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-16 20:28 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
RGVjb3VwbGUgbmZzX3BnaW9faGVhZGVyIGFuZCBuZnNfcmVhZF9kYXRhLCBhbmQgaGF2ZSAocG9z
c2libHkNCj4gbXVsdGlwbGUpIG5mc19yZWFkX2RhdGFzIGVhY2ggdGFrZSBhIHJlZmNvdW50IG9u
IG5mc19wZ2lvX2hlYWRlci4NCj4gDQo+IEZvciB0aGUgbW9tZW50IGtlZXBzIG5mc19yZWFkX2hl
YWRlciBhcyBhIHdheSB0byBwcmVhbGxvY2F0ZSBhIHNpbmdsZQ0KPiBuZnNfcmVhZF9kYXRhIHdp
dGggdGhlIG5mc19wZ2lvX2hlYWRlci4gIFRoZSBjb2RlIGRvZXNuJ3QgbmVlZCB0aGlzLA0KPiBh
bmQgd291bGQgYmUgcHJldHRpZXIgd2l0aG91dCwgYnV0IGdpdmVuIHRoZSBhbW91bnQgb2YgY2h1
cm4gSSBhbQ0KPiBhbHJlYWR5IGludHJvZHVjaW5nIEkgZGlkbid0IHdhbnQgdG8gcGxheSB3aXRo
IHR1bmluZyBuZXcgbWVtcG9vbHMuDQo+IA0KPiBUaGlzIGFsc28gZml4ZXMgYnVnIGluIHBuZnNf
bGRfaGFuZGxlX3JlYWRfZXJyb3IuICBJbiB0aGUgY2FzZSBvZg0KPiBkZXNjLT5wZ19ic2l6ZSA8
IFBBR0VfQ0FDSEVfU0laRSwgdGhlIHBhZ2VzIGxpc3Qgd2FzIGVtcHR5LCBjYXVzaW5nDQo+IHJl
cGxheSBhdHRlbXB0IHRvIGRvIG5vdGhpbmcuDQo+IA0KPiBTaWduZWQtb2ZmLWJ5OiBGcmVkIElz
YW1hbiA8aWlzYW1hbkBuZXRhcHAuY29tPg0KPiBkaWZmIC0tZ2l0IGEvZnMvbmZzL3JlYWQuYyBi
L2ZzL25mcy9yZWFkLmMNCj4gaW5kZXggZjZhYjMwYi4uZWE4NDRiMiAxMDA2NDQNCj4gLS0tIGEv
ZnMvbmZzL3JlYWQuYw0KPiArKysgYi9mcy9uZnMvcmVhZC5jDQo+IEBAIC0zMCwyOSArMzAsNDUg
QEANCj4gICNkZWZpbmUgTkZTREJHX0ZBQ0lMSVRZCQlORlNEQkdfUEFHRUNBQ0hFDQo+ICANCj4g
IHN0YXRpYyBjb25zdCBzdHJ1Y3QgbmZzX3BhZ2Vpb19vcHMgbmZzX3BhZ2Vpb19yZWFkX29wczsN
Cj4gLXN0YXRpYyBjb25zdCBzdHJ1Y3QgcnBjX2NhbGxfb3BzIG5mc19yZWFkX3BhcnRpYWxfb3Bz
Ow0KPiAtc3RhdGljIGNvbnN0IHN0cnVjdCBycGNfY2FsbF9vcHMgbmZzX3JlYWRfZnVsbF9vcHM7
DQo+ICtzdGF0aWMgY29uc3Qgc3RydWN0IHJwY19jYWxsX29wcyBuZnNfcmVhZF9jb21tb25fb3Bz
Ow0KPiAgDQo+ICBzdGF0aWMgc3RydWN0IGttZW1fY2FjaGUgKm5mc19yZGF0YV9jYWNoZXA7DQo+
ICANCj4gLXN0cnVjdCBuZnNfcmVhZF9oZWFkZXIgKm5mc19yZWFkaGRyX2FsbG9jKHVuc2lnbmVk
IGludCBwYWdlY291bnQpDQo+ICtzdHJ1Y3QgbmZzX3JlYWRfaGVhZGVyICpuZnNfcmVhZGhkcl9h
bGxvYygpDQo+ICB7DQo+IC0Jc3RydWN0IG5mc19yZWFkX2hlYWRlciAqcDsNCj4gKwlzdHJ1Y3Qg
bmZzX3JlYWRfaGVhZGVyICpyaGRyOw0KPiAgDQo+IC0JcCA9IGttZW1fY2FjaGVfemFsbG9jKG5m
c19yZGF0YV9jYWNoZXAsIEdGUF9LRVJORUwpOw0KPiAtCWlmIChwKSB7DQo+IC0JCXN0cnVjdCBu
ZnNfcGdpb19oZWFkZXIgKmhkciA9ICZwLT5oZWFkZXI7DQo+IC0JCXN0cnVjdCBuZnNfcmVhZF9k
YXRhICpkYXRhID0gJnAtPnJwY19kYXRhOw0KPiArCXJoZHIgPSBrbWVtX2NhY2hlX3phbGxvYyhu
ZnNfcmRhdGFfY2FjaGVwLCBHRlBfS0VSTkVMKTsNCj4gKwlpZiAocmhkcikgew0KPiArCQlzdHJ1
Y3QgbmZzX3BnaW9faGVhZGVyICpoZHIgPSAmcmhkci0+aGVhZGVyOw0KPiAgDQo+ICAJCUlOSVRf
TElTVF9IRUFEKCZoZHItPnBhZ2VzKTsNCj4gLQkJSU5JVF9MSVNUX0hFQUQoJmRhdGEtPmxpc3Qp
Ow0KPiAtCQlkYXRhLT5oZWFkZXIgPSBoZHI7DQo+ICsJCUlOSVRfTElTVF9IRUFEKCZoZHItPnJw
Y19saXN0KTsNCj4gKwkJc3Bpbl9sb2NrX2luaXQoJmhkci0+bG9jayk7DQo+ICsJCWF0b21pY19z
ZXQoJmhkci0+cmVmY250LCAwKTsNCj4gKwl9DQo+ICsJcmV0dXJuIHJoZHI7DQo+ICt9DQo+ICsN
Cj4gK3N0cnVjdCBuZnNfcmVhZF9kYXRhICpuZnNfcmVhZGRhdGFfYWxsb2Moc3RydWN0IG5mc19w
Z2lvX2hlYWRlciAqaGRyLA0KPiArCQkJCQkgdW5zaWduZWQgaW50IHBhZ2Vjb3VudCkNCj4gK3sN
Cj4gKwlzdHJ1Y3QgbmZzX3JlYWRfZGF0YSAqZGF0YTsNCj4gKw0KPiArCWlmIChoZHIpIHsNCj4g
KwkJLyogVXNlIGRhdGEgcHJlYWxsb2NhdGVkIHdpdGggdGhlIGhlYWRlciAqLw0KPiArCQlkYXRh
ID0gJmNvbnRhaW5lcl9vZihoZHIsIHN0cnVjdCBuZnNfcmVhZF9oZWFkZXIsIGhlYWRlciktPnJw
Y19kYXRhOw0KPiArCX0gZWxzZQ0KPiArCQlkYXRhID0ga3phbGxvYyhzaXplb2YoKmRhdGEpLCBH
RlBfS0VSTkVMKTsNCj4gKw0KPiArCWlmIChkYXRhKSB7DQo+ICAJCWlmICghbmZzX3BnYXJyYXlf
c2V0KCZkYXRhLT5wYWdlcywgcGFnZWNvdW50KSkgew0KPiAtCQkJa21lbV9jYWNoZV9mcmVlKG5m
c19yZGF0YV9jYWNoZXAsIHApOw0KPiAtCQkJcCA9IE5VTEw7DQo+ICsJCQlpZiAoIWhkcikNCj4g
KwkJCQlrZnJlZShkYXRhKTsNCj4gKwkJCWRhdGEgPSBOVUxMOw0KPiAgCQl9DQo+ICAJfQ0KPiAt
CXJldHVybiBwOw0KPiArCXJldHVybiBkYXRhOw0KPiAgfQ0KPiAgDQo+ICB2b2lkIG5mc19yZWFk
aGRyX2ZyZWUoc3RydWN0IG5mc19wZ2lvX2hlYWRlciAqaGRyKQ0KPiBAQCAtNjQsMTAgKzgwLDE2
IEBAIHZvaWQgbmZzX3JlYWRoZHJfZnJlZShzdHJ1Y3QgbmZzX3BnaW9faGVhZGVyICpoZHIpDQo+
ICANCj4gIHZvaWQgbmZzX3JlYWRkYXRhX3JlbGVhc2Uoc3RydWN0IG5mc19yZWFkX2RhdGEgKnJk
YXRhKQ0KPiAgew0KPiArCXN0cnVjdCBuZnNfcGdpb19oZWFkZXIgKmhkciA9IHJkYXRhLT5oZWFk
ZXI7DQo+ICsNCj4gIAlwdXRfbmZzX29wZW5fY29udGV4dChyZGF0YS0+YXJncy5jb250ZXh0KTsN
Cj4gIAlpZiAocmRhdGEtPnBhZ2VzLnBhZ2V2ZWMgIT0gcmRhdGEtPnBhZ2VzLnBhZ2VfYXJyYXkp
DQo+ICAJCWtmcmVlKHJkYXRhLT5wYWdlcy5wYWdldmVjKTsNCj4gLQluZnNfcmVhZGhkcl9mcmVl
KHJkYXRhLT5oZWFkZXIpOw0KPiArCWlmIChjb250YWluZXJfb2YoaGRyLCBzdHJ1Y3QgbmZzX3Jl
YWRfaGVhZGVyLCBoZWFkZXIpICE9DQo+ICsJICAgIGNvbnRhaW5lcl9vZihyZGF0YSwgc3RydWN0
IG5mc19yZWFkX2hlYWRlciwgcnBjX2RhdGEpKQ0KPiArCQlrZnJlZShyZGF0YSk7DQoNClRoaXMg
bG9va3MgdW5uZWNlc3NhcmlseSBjb21wbGljYXRlZC4gSG93IGFib3V0DQoNCglpZiAocmRhdGEg
IT0gJmhkci0+cnBjX2RhdGEpDQoJCWtmcmVlKHJkYXRhKTsNCg0KPiArCWlmIChhdG9taWNfZGVj
X2FuZF90ZXN0KCZoZHItPnJlZmNudCkpDQo+ICsJCW5mc19yZWFkX2NvbXBsZXRpb24oaGRyKTsN
Cj4gIH0NCj4gIA0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWlu
dGFpbmVyDQoNCk5ldEFwcA0KVHJvbmQuTXlrbGVidXN0QG5ldGFwcC5jb20NCnd3dy5uZXRhcHAu
Y29tDQoNCg==

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-09 20:52 ` [PATCH 14/26] NFS: merge _full and _partial read rpc_ops Fred Isaman
  2012-04-16 20:28   ` Myklebust, Trond
@ 2012-04-16 20:41   ` Myklebust, Trond
  2012-04-17 15:38     ` Fred Isaman
  1 sibling, 1 reply; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-16 20:41 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
RGVjb3VwbGUgbmZzX3BnaW9faGVhZGVyIGFuZCBuZnNfcmVhZF9kYXRhLCBhbmQgaGF2ZSAocG9z
c2libHkNCj4gbXVsdGlwbGUpIG5mc19yZWFkX2RhdGFzIGVhY2ggdGFrZSBhIHJlZmNvdW50IG9u
IG5mc19wZ2lvX2hlYWRlci4NCj4gDQo+IEZvciB0aGUgbW9tZW50IGtlZXBzIG5mc19yZWFkX2hl
YWRlciBhcyBhIHdheSB0byBwcmVhbGxvY2F0ZSBhIHNpbmdsZQ0KPiBuZnNfcmVhZF9kYXRhIHdp
dGggdGhlIG5mc19wZ2lvX2hlYWRlci4gIFRoZSBjb2RlIGRvZXNuJ3QgbmVlZCB0aGlzLA0KPiBh
bmQgd291bGQgYmUgcHJldHRpZXIgd2l0aG91dCwgYnV0IGdpdmVuIHRoZSBhbW91bnQgb2YgY2h1
cm4gSSBhbQ0KPiBhbHJlYWR5IGludHJvZHVjaW5nIEkgZGlkbid0IHdhbnQgdG8gcGxheSB3aXRo
IHR1bmluZyBuZXcgbWVtcG9vbHMuDQo+IA0KPiBUaGlzIGFsc28gZml4ZXMgYnVnIGluIHBuZnNf
bGRfaGFuZGxlX3JlYWRfZXJyb3IuICBJbiB0aGUgY2FzZSBvZg0KPiBkZXNjLT5wZ19ic2l6ZSA8
IFBBR0VfQ0FDSEVfU0laRSwgdGhlIHBhZ2VzIGxpc3Qgd2FzIGVtcHR5LCBjYXVzaW5nDQo+IHJl
cGxheSBhdHRlbXB0IHRvIGRvIG5vdGhpbmcuDQo+IA0KPiBTaWduZWQtb2ZmLWJ5OiBGcmVkIElz
YW1hbiA8aWlzYW1hbkBuZXRhcHAuY29tPg0KPiAtLS0NCjxzbmlwPg0KPiBkaWZmIC0tZ2l0IGEv
ZnMvbmZzL3JlYWQuYyBiL2ZzL25mcy9yZWFkLmMNCj4gaW5kZXggZjZhYjMwYi4uZWE4NDRiMiAx
MDA2NDQNCj4gLS0tIGEvZnMvbmZzL3JlYWQuYw0KPiArKysgYi9mcy9uZnMvcmVhZC5jDQo+IEBA
IC0zMCwyOSArMzAsNDUgQEANCj4gICNkZWZpbmUgTkZTREJHX0ZBQ0lMSVRZCQlORlNEQkdfUEFH
RUNBQ0hFDQo+ICANCj4gIHN0YXRpYyBjb25zdCBzdHJ1Y3QgbmZzX3BhZ2Vpb19vcHMgbmZzX3Bh
Z2Vpb19yZWFkX29wczsNCj4gLXN0YXRpYyBjb25zdCBzdHJ1Y3QgcnBjX2NhbGxfb3BzIG5mc19y
ZWFkX3BhcnRpYWxfb3BzOw0KPiAtc3RhdGljIGNvbnN0IHN0cnVjdCBycGNfY2FsbF9vcHMgbmZz
X3JlYWRfZnVsbF9vcHM7DQo+ICtzdGF0aWMgY29uc3Qgc3RydWN0IHJwY19jYWxsX29wcyBuZnNf
cmVhZF9jb21tb25fb3BzOw0KPiAgDQo+ICBzdGF0aWMgc3RydWN0IGttZW1fY2FjaGUgKm5mc19y
ZGF0YV9jYWNoZXA7DQo+ICANCj4gLXN0cnVjdCBuZnNfcmVhZF9oZWFkZXIgKm5mc19yZWFkaGRy
X2FsbG9jKHVuc2lnbmVkIGludCBwYWdlY291bnQpDQo+ICtzdHJ1Y3QgbmZzX3JlYWRfaGVhZGVy
ICpuZnNfcmVhZGhkcl9hbGxvYygpDQo+ICB7DQo+IC0Jc3RydWN0IG5mc19yZWFkX2hlYWRlciAq
cDsNCj4gKwlzdHJ1Y3QgbmZzX3JlYWRfaGVhZGVyICpyaGRyOw0KPiAgDQo+IC0JcCA9IGttZW1f
Y2FjaGVfemFsbG9jKG5mc19yZGF0YV9jYWNoZXAsIEdGUF9LRVJORUwpOw0KPiAtCWlmIChwKSB7
DQo+IC0JCXN0cnVjdCBuZnNfcGdpb19oZWFkZXIgKmhkciA9ICZwLT5oZWFkZXI7DQo+IC0JCXN0
cnVjdCBuZnNfcmVhZF9kYXRhICpkYXRhID0gJnAtPnJwY19kYXRhOw0KPiArCXJoZHIgPSBrbWVt
X2NhY2hlX3phbGxvYyhuZnNfcmRhdGFfY2FjaGVwLCBHRlBfS0VSTkVMKTsNCj4gKwlpZiAocmhk
cikgew0KPiArCQlzdHJ1Y3QgbmZzX3BnaW9faGVhZGVyICpoZHIgPSAmcmhkci0+aGVhZGVyOw0K
PiAgDQo+ICAJCUlOSVRfTElTVF9IRUFEKCZoZHItPnBhZ2VzKTsNCj4gLQkJSU5JVF9MSVNUX0hF
QUQoJmRhdGEtPmxpc3QpOw0KPiAtCQlkYXRhLT5oZWFkZXIgPSBoZHI7DQo+ICsJCUlOSVRfTElT
VF9IRUFEKCZoZHItPnJwY19saXN0KTsNCj4gKwkJc3Bpbl9sb2NrX2luaXQoJmhkci0+bG9jayk7
DQo+ICsJCWF0b21pY19zZXQoJmhkci0+cmVmY250LCAwKTsNCj4gKwl9DQo+ICsJcmV0dXJuIHJo
ZHI7DQo+ICt9DQo+ICsNCj4gK3N0cnVjdCBuZnNfcmVhZF9kYXRhICpuZnNfcmVhZGRhdGFfYWxs
b2Moc3RydWN0IG5mc19wZ2lvX2hlYWRlciAqaGRyLA0KPiArCQkJCQkgdW5zaWduZWQgaW50IHBh
Z2Vjb3VudCkNCj4gK3sNCj4gKwlzdHJ1Y3QgbmZzX3JlYWRfZGF0YSAqZGF0YTsNCj4gKw0KPiAr
CWlmIChoZHIpIHsNCj4gKwkJLyogVXNlIGRhdGEgcHJlYWxsb2NhdGVkIHdpdGggdGhlIGhlYWRl
ciAqLw0KPiArCQlkYXRhID0gJmNvbnRhaW5lcl9vZihoZHIsIHN0cnVjdCBuZnNfcmVhZF9oZWFk
ZXIsIGhlYWRlciktPnJwY19kYXRhOw0KPiArCX0gZWxzZQ0KPiArCQlkYXRhID0ga3phbGxvYyhz
aXplb2YoKmRhdGEpLCBHRlBfS0VSTkVMKTsNCj4gKw0KPiArCWlmIChkYXRhKSB7DQo+ICAJCWlm
ICghbmZzX3BnYXJyYXlfc2V0KCZkYXRhLT5wYWdlcywgcGFnZWNvdW50KSkgew0KPiAtCQkJa21l
bV9jYWNoZV9mcmVlKG5mc19yZGF0YV9jYWNoZXAsIHApOw0KPiAtCQkJcCA9IE5VTEw7DQo+ICsJ
CQlpZiAoIWhkcikNCj4gKwkJCQlrZnJlZShkYXRhKTsNCj4gKwkJCWRhdGEgPSBOVUxMOw0KPiAg
CQl9DQo+ICAJfQ0KPiAtCXJldHVybiBwOw0KPiArCXJldHVybiBkYXRhOw0KPiAgfQ0KDQpCVFc6
IEl0IG1ha2VzIG1lIG5lcnZvdXMgdGhhdCB3ZSBjYW4gYWxsb2NhdGUgYSBzdHJ1Y3QgbmZzX3Jl
YWRfZGF0YQ0KdGhhdCBkb2VzIG5vdCBoYXZlIGRhdGEtPmhlYWRlciBzZXQuIENvdWxkIHdlIHBs
ZWFzZSBhbHdheXMgcGFzcyBhbg0KbmZzX3BnaW9faGVhZGVyIGFyZ3VtZW50Pw0KDQpJZiB5b3Ug
bWFrZSBzdXJlIHRoYXQgdGhlIGFib3ZlIGZ1bmN0aW9uIGFsd2F5cyBidW1wcyBoZWFkZXItPnJl
ZmNudCwNCnRoZW4geW91IGNhbiBjaGVjayBpZiBoZWFkZXItPnJwY19kYXRhIGlzIGFsbG9jYXRl
ZCBieSBzZWVpbmcgaWYNCmhlYWRlci0+cmVmY250IGlzIHplcm8uDQoNCkJUVzogVGhlIHNhbWUg
YXBwbGllcyB0byB0aGUgIndyaXRlIiBlcXVpdmFsZW50cy4uLg0KDQo+ICB2b2lkIG5mc19yZWFk
aGRyX2ZyZWUoc3RydWN0IG5mc19wZ2lvX2hlYWRlciAqaGRyKQ0KPiBAQCAtNjQsMTAgKzgwLDE2
IEBAIHZvaWQgbmZzX3JlYWRoZHJfZnJlZShzdHJ1Y3QgbmZzX3BnaW9faGVhZGVyICpoZHIpDQo+
ICANCj4gIHZvaWQgbmZzX3JlYWRkYXRhX3JlbGVhc2Uoc3RydWN0IG5mc19yZWFkX2RhdGEgKnJk
YXRhKQ0KPiAgew0KPiArCXN0cnVjdCBuZnNfcGdpb19oZWFkZXIgKmhkciA9IHJkYXRhLT5oZWFk
ZXI7DQo+ICsNCj4gIAlwdXRfbmZzX29wZW5fY29udGV4dChyZGF0YS0+YXJncy5jb250ZXh0KTsN
Cj4gIAlpZiAocmRhdGEtPnBhZ2VzLnBhZ2V2ZWMgIT0gcmRhdGEtPnBhZ2VzLnBhZ2VfYXJyYXkp
DQo+ICAJCWtmcmVlKHJkYXRhLT5wYWdlcy5wYWdldmVjKTsNCj4gLQluZnNfcmVhZGhkcl9mcmVl
KHJkYXRhLT5oZWFkZXIpOw0KPiArCWlmIChjb250YWluZXJfb2YoaGRyLCBzdHJ1Y3QgbmZzX3Jl
YWRfaGVhZGVyLCBoZWFkZXIpICE9DQo+ICsJICAgIGNvbnRhaW5lcl9vZihyZGF0YSwgc3RydWN0
IG5mc19yZWFkX2hlYWRlciwgcnBjX2RhdGEpKQ0KPiArCQlrZnJlZShyZGF0YSk7DQo+ICsJaWYg
KGF0b21pY19kZWNfYW5kX3Rlc3QoJmhkci0+cmVmY250KSkNCj4gKwkJbmZzX3JlYWRfY29tcGxl
dGlvbihoZHIpOw0KPiAgfQ0KPiAgDQo+ICBzdGF0aWMNCj4gQEAgLTc5LDM1ICsxMDEsNiBAQCBp
bnQgbmZzX3JldHVybl9lbXB0eV9wYWdlKHN0cnVjdCBwYWdlICpwYWdlKQ0KPiAgCXJldHVybiAw
Ow0KPiAgfQ0KDQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50
YWluZXINCg0KTmV0QXBwDQpUcm9uZC5NeWtsZWJ1c3RAbmV0YXBwLmNvbQ0Kd3d3Lm5ldGFwcC5j
b20NCg0K

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 16/26] NFS: create completion structure to pass into page_init functions
  2012-04-09 20:52 ` [PATCH 16/26] NFS: create completion structure to pass into page_init functions Fred Isaman
@ 2012-04-16 21:07   ` Myklebust, Trond
  0 siblings, 0 replies; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-16 21:07 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
RmFjdG9ycyBvdXQgdGhlIGNvZGUgdGhhdCB3aWxsIG5lZWQgdG8gY2hhbmdlIHdoZW4gZGlyZWN0
aW8NCj4gc3RhcnRzIHVzaW5nIHRoZXNlIGNvZGUgcGF0aHMuICBUaGlzIHdpbGwgYWxsb3cgZGly
ZWN0aW8gdG8gdXNlDQo+IHRoZSBnZW5lcmljIHBhZ2VpbiBhbmQgZmx1c2ggcm91dGluZXMNCj4g
DQo+IFNpZ25lZC1vZmYtYnk6IEZyZWQgSXNhbWFuIDxpaXNhbWFuQG5ldGFwcC5jb20+DQo+IC0t
LQ0KDQo+IC0tLSBhL2ZzL25mcy9yZWFkLmMNCj4gKysrIGIvZnMvbmZzL3JlYWQuYw0KPiBAQCAt
MzEsNiArMzEsNyBAQA0KPiAgDQo+ICBzdGF0aWMgY29uc3Qgc3RydWN0IG5mc19wYWdlaW9fb3Bz
IG5mc19wYWdlaW9fcmVhZF9vcHM7DQo+ICBzdGF0aWMgY29uc3Qgc3RydWN0IHJwY19jYWxsX29w
cyBuZnNfcmVhZF9jb21tb25fb3BzOw0KPiArc3RhdGljIGNvbnN0IHN0cnVjdCBuZnNfaW9fY29t
cGxldGlvbl9vcHMgbmZzX2FzeW5jaF9yZWFkX2NvbXBsX29wczsNCg0KQ291bGQgd2UgcGxlYXNl
IHJlbmFtZT8gSSdkIHByZWZlciBuZnNfYXN5bmNfcmVhZF9jb21wbGV0aW9uX29wcy4gSXQncw0K
bW9yZSBpbiBsaW5lIHdpdGggaG93IHdlIHVzdWFsbHkgbmFtZSBzdHVmZiAod2UgdXNlIHRoZSBh
YmJyZXZpYXRpb25zDQonYXN5bmMnIGFuZCAnb3BzJyBldmVyeXdoZXJlLCBidXQgd2UgZG9uJ3Qg
Y29tbW9ubHkgYWJicmV2aWF0ZQ0KJ2NvbXBsZXRpb24nKS4NCg0KPiBkaWZmIC0tZ2l0IGEvZnMv
bmZzL3dyaXRlLmMgYi9mcy9uZnMvd3JpdGUuYw0KPiBpbmRleCBjZGUwZmEyLi40ZDliYmFmIDEw
MDY0NA0KPiAtLS0gYS9mcy9uZnMvd3JpdGUuYw0KPiArKysgYi9mcy9uZnMvd3JpdGUuYw0KPiBA
QCAtNDAsMTAgKzQwLDEyIEBADQo+ICAgKiBMb2NhbCBmdW5jdGlvbiBkZWNsYXJhdGlvbnMNCj4g
ICAqLw0KPiAgc3RhdGljIHZvaWQgbmZzX3BhZ2Vpb19pbml0X3dyaXRlKHN0cnVjdCBuZnNfcGFn
ZWlvX2Rlc2NyaXB0b3IgKmRlc2MsDQo+IC0JCQkJICBzdHJ1Y3QgaW5vZGUgKmlub2RlLCBpbnQg
aW9mbGFncyk7DQo+ICsJCQkJICBzdHJ1Y3QgaW5vZGUgKmlub2RlLCBpbnQgaW9mbGFncywNCj4g
KwkJCQkgIGNvbnN0IHN0cnVjdCBuZnNfaW9fY29tcGxldGlvbl9vcHMgKmNvbXBsX29wcyk7DQo+
ICBzdGF0aWMgdm9pZCBuZnNfcmVkaXJ0eV9yZXF1ZXN0KHN0cnVjdCBuZnNfcGFnZSAqcmVxKTsN
Cj4gIHN0YXRpYyBjb25zdCBzdHJ1Y3QgcnBjX2NhbGxfb3BzIG5mc193cml0ZV9jb21tb25fb3Bz
Ow0KPiAgc3RhdGljIGNvbnN0IHN0cnVjdCBycGNfY2FsbF9vcHMgbmZzX2NvbW1pdF9vcHM7DQo+
ICtzdGF0aWMgY29uc3Qgc3RydWN0IG5mc19pb19jb21wbGV0aW9uX29wcyBuZnNfYXN5bmNoX3dy
aXRlX2NvbXBsX29wczsNCg0KRGl0dG8NCiANCj4gIA0KPiAgdm9pZCBuZnNfd3JpdGVfcHJlcGFy
ZShzdHJ1Y3QgcnBjX3Rhc2sgKnRhc2ssIHZvaWQgKmNhbGxkYXRhKQ0KPiBkaWZmIC0tZ2l0IGEv
aW5jbHVkZS9saW51eC9uZnNfcGFnZS5oIGIvaW5jbHVkZS9saW51eC9uZnNfcGFnZS5oDQo+IGlu
ZGV4IDVjNTIwMzQuLmEzNmZiYjYgMTAwNjQ0DQo+IC0tLSBhL2luY2x1ZGUvbGludXgvbmZzX3Bh
Z2UuaA0KPiArKysgYi9pbmNsdWRlL2xpbnV4L25mc19wYWdlLmgNCj4gQEAgLTY3LDYgKzY3LDcg
QEAgc3RydWN0IG5mc19wYWdlaW9fZGVzY3JpcHRvciB7DQo+ICAJaW50IAkJCXBnX2lvZmxhZ3M7
DQo+ICAJaW50CQkJcGdfZXJyb3I7DQo+ICAJY29uc3Qgc3RydWN0IHJwY19jYWxsX29wcyAqcGdf
cnBjX2NhbGxvcHM7DQo+ICsJY29uc3Qgc3RydWN0IG5mc19pb19jb21wbGV0aW9uX29wcyAqcGdf
Y29tcGxfb3BzOw0KDQpBZ2FpbiwgY2FuIHdlIHBsZWFzZSBtYWtlIHRoaXMgJ3BnX2NvbXBsZXRp
b25fb3BzJz8NCg0KPiAgCXN0cnVjdCBwbmZzX2xheW91dF9zZWdtZW50ICpwZ19sc2VnOw0KPiAg
fTsNCj4gIA0KPiAgZXh0ZXJuCWludCBuZnNfcGFnZWlvX2FkZF9yZXF1ZXN0KHN0cnVjdCBuZnNf
cGFnZWlvX2Rlc2NyaXB0b3IgKiwNCj4gZGlmZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvbmZzX3hk
ci5oIGIvaW5jbHVkZS9saW51eC9uZnNfeGRyLmgNCj4gaW5kZXggMmJiYjdjNy4uYjM4OTUwYyAx
MDA2NDQNCj4gLS0tIGEvaW5jbHVkZS9saW51eC9uZnNfeGRyLmgNCj4gKysrIGIvaW5jbHVkZS9s
aW51eC9uZnNfeGRyLmgNCj4gQEAgLTEyMDEsNiArMTIwMSw3IEBAIHN0cnVjdCBuZnNfcGdpb19o
ZWFkZXIgew0KPiAgCXN0cnVjdCBwbmZzX2xheW91dF9zZWdtZW50ICpsc2VnOw0KPiAgCWNvbnN0
IHN0cnVjdCBycGNfY2FsbF9vcHMgKm1kc19vcHM7DQo+ICAJdm9pZCAoKnJlbGVhc2UpIChzdHJ1
Y3QgbmZzX3BnaW9faGVhZGVyICpoZHIpOw0KPiArCWNvbnN0IHN0cnVjdCBuZnNfaW9fY29tcGxl
dGlvbl9vcHMgKmNvbXBsX29wczsNCg0KRGl0dG8uLi4NCg0KDQoNCi0tIA0KVHJvbmQgTXlrbGVi
dXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXINCg0KTmV0QXBwDQpUcm9uZC5NeWtsZWJ1
c3RAbmV0YXBwLmNvbQ0Kd3d3Lm5ldGFwcC5jb20NCg0K

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 19/26] NFS: rewrite directio read to use async coalesce code
  2012-04-09 20:52 ` [PATCH 19/26] NFS: rewrite directio read to use async coalesce code Fred Isaman
@ 2012-04-16 21:22   ` Myklebust, Trond
  2012-04-17 15:41     ` Fred Isaman
  0 siblings, 1 reply; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-16 21:22 UTC (permalink / raw)
  To: Isaman, Fred; +Cc: linux-nfs

T24gTW9uLCAyMDEyLTA0LTA5IGF0IDE2OjUyIC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
VGhpcyBhbHNvIGhhcyB0aGUgYWR2YW50YWdlIHRoYXQgaXQgYWxsb3dzIGRpcmVjdGlvIHRvIHVz
ZSBwbmZzLg0KPiANCj4gU2lnbmVkLW9mZi1ieTogRnJlZCBJc2FtYW4gPGlpc2FtYW5AbmV0YXBw
LmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvZGlyZWN0LmMgICAgICAgICAgfCAgMjQ3ICsrKysrKysr
KysrKysrKysrKysrKy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KPiAgZnMvbmZzL2ludGVybmFs
LmggICAgICAgIHwgICAgMyArDQo+ICBmcy9uZnMvcGFnZWxpc3QuYyAgICAgICAgfCAgICA2ICst
DQo+ICBmcy9uZnMvcmVhZC5jICAgICAgICAgICAgfCAgICA2ICstDQo+ICBpbmNsdWRlL2xpbnV4
L25mc19wYWdlLmggfCAgICAxICsNCj4gIGluY2x1ZGUvbGludXgvbmZzX3hkci5oICB8ICAgIDQg
Ky0NCj4gIDYgZmlsZXMgY2hhbmdlZCwgMTI4IGluc2VydGlvbnMoKyksIDEzOSBkZWxldGlvbnMo
LSkNCj4gDQo+IGRpZmYgLS1naXQgYS9mcy9uZnMvZGlyZWN0LmMgYi9mcy9uZnMvZGlyZWN0LmMN
Cj4gaW5kZXggMjJhNDBjNC4uNDA5YTlhMCAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL2RpcmVjdC5j
DQo+ICsrKyBiL2ZzL25mcy9kaXJlY3QuYw0KPiBAQCAtMTI0LDIyICsxMjQsNiBAQCBzc2l6ZV90
IG5mc19kaXJlY3RfSU8oaW50IHJ3LCBzdHJ1Y3Qga2lvY2IgKmlvY2IsIGNvbnN0IHN0cnVjdCBp
b3ZlYyAqaW92LCBsb2ZmXw0KPiAgCXJldHVybiAtRUlOVkFMOw0KPiAgfQ0KPiAgDQo+IC1zdGF0
aWMgdm9pZCBuZnNfZGlyZWN0X2RpcnR5X3BhZ2VzKHN0cnVjdCBwYWdlICoqcGFnZXMsIHVuc2ln
bmVkIGludCBwZ2Jhc2UsIHNpemVfdCBjb3VudCkNCj4gLXsNCj4gLQl1bnNpZ25lZCBpbnQgbnBh
Z2VzOw0KPiAtCXVuc2lnbmVkIGludCBpOw0KPiAtDQo+IC0JaWYgKGNvdW50ID09IDApDQo+IC0J
CXJldHVybjsNCj4gLQlwYWdlcyArPSAocGdiYXNlID4+IFBBR0VfU0hJRlQpOw0KPiAtCW5wYWdl
cyA9IChjb3VudCArIChwZ2Jhc2UgJiB+UEFHRV9NQVNLKSArIFBBR0VfU0laRSAtIDEpID4+IFBB
R0VfU0hJRlQ7DQo+IC0JZm9yIChpID0gMDsgaSA8IG5wYWdlczsgaSsrKSB7DQo+IC0JCXN0cnVj
dCBwYWdlICpwYWdlID0gcGFnZXNbaV07DQo+IC0JCWlmICghUGFnZUNvbXBvdW5kKHBhZ2UpKQ0K
PiAtCQkJc2V0X3BhZ2VfZGlydHkocGFnZSk7DQo+IC0JfQ0KPiAtfQ0KPiAtDQo+ICBzdGF0aWMg
dm9pZCBuZnNfZGlyZWN0X3JlbGVhc2VfcGFnZXMoc3RydWN0IHBhZ2UgKipwYWdlcywgdW5zaWdu
ZWQgaW50IG5wYWdlcykNCj4gIHsNCj4gIAl1bnNpZ25lZCBpbnQgaTsNCj4gQEAgLTIyNiw1OCAr
MjEwLDg1IEBAIHN0YXRpYyB2b2lkIG5mc19kaXJlY3RfY29tcGxldGUoc3RydWN0IG5mc19kaXJl
Y3RfcmVxICpkcmVxKQ0KPiAgCW5mc19kaXJlY3RfcmVxX3JlbGVhc2UoZHJlcSk7DQo+ICB9DQo+
ICANCj4gLS8qDQo+IC0gKiBXZSBtdXN0IGhvbGQgYSByZWZlcmVuY2UgdG8gYWxsIHRoZSBwYWdl
cyBpbiB0aGlzIGRpcmVjdCByZWFkIHJlcXVlc3QNCj4gLSAqIHVudGlsIHRoZSBSUENzIGNvbXBs
ZXRlLiAgVGhpcyBjb3VsZCBiZSBsb25nICphZnRlciogd2UgYXJlIHdva2VuIHVwIGluDQo+IC0g
KiBuZnNfZGlyZWN0X3dhaXQgKGZvciBpbnN0YW5jZSwgaWYgc29tZW9uZSBoaXRzIF5DIG9uIGEg
c2xvdyBzZXJ2ZXIpLg0KPiAtICovDQo+IC1zdGF0aWMgdm9pZCBuZnNfZGlyZWN0X3JlYWRfcmVz
dWx0KHN0cnVjdCBycGNfdGFzayAqdGFzaywgdm9pZCAqY2FsbGRhdGEpDQo+ICt2b2lkIG5mc19k
aXJlY3RfcmVhZHBhZ2VfcmVsZWFzZShzdHJ1Y3QgbmZzX3BhZ2UgKnJlcSkNCj4gIHsNCj4gLQlz
dHJ1Y3QgbmZzX3JlYWRfZGF0YSAqZGF0YSA9IGNhbGxkYXRhOw0KPiAtDQo+IC0JbmZzX3JlYWRw
YWdlX3Jlc3VsdCh0YXNrLCBkYXRhKTsNCj4gKwlkcHJpbnRrKCJORlM6IGRpcmVjdCByZWFkIGRv
bmUgKCVzLyVsbGQgJWRAJWxsZClcbiIsDQo+ICsJCXJlcS0+d2JfY29udGV4dC0+ZGVudHJ5LT5k
X2lub2RlLT5pX3NiLT5zX2lkLA0KPiArCQkobG9uZyBsb25nKU5GU19GSUxFSUQocmVxLT53Yl9j
b250ZXh0LT5kZW50cnktPmRfaW5vZGUpLA0KPiArCQlyZXEtPndiX2J5dGVzLA0KPiArCQkobG9u
ZyBsb25nKXJlcV9vZmZzZXQocmVxKSk7DQo+ICsJbmZzX3JlbGVhc2VfcmVxdWVzdChyZXEpOw0K
PiAgfQ0KPiAgDQo+IC1zdGF0aWMgdm9pZCBuZnNfZGlyZWN0X3JlYWRfcmVsZWFzZSh2b2lkICpj
YWxsZGF0YSkNCj4gK3N0YXRpYyB2b2lkIG5mc19kaXJlY3RfcmVhZF9jb21wbGV0aW9uKHN0cnVj
dCBuZnNfcGdpb19oZWFkZXIgKmhkcikNCj4gIHsNCj4gKwl1bnNpZ25lZCBsb25nIHBvcyA9IHJl
cV9vZmZzZXQoaGRyLT5yZXEpOw0KDQpVbW0uLi4gcmVxX29mZnNldCgpIHJldHVybnMgYW4gbG9m
Zl90LiAndW5zaWduZWQgbG9uZycgaXMgbGlrZWx5IHRvIGJlDQp0b28gc21hbGwgdG8gaG9sZCB0
aGVzZSB2YWx1ZXMgb24gYSAzMi1iaXQgbWFjaGluZS4NCg0KTG9va2luZyBiYWNrLCBpdCBsb29r
cyBhcyBpZiB0aGUgc2FtZSBwcm9ibGVtIGV4aXN0cyBpbiB0aGUgcGFnZSBjYWNoZQ0KY29kZSAo
ZS5nLiBuZnNfcmVhZF9jb21wbGV0aW9uKSBhcyB3ZWxsIGFzIHN0dWZmIGxpa2UgaGRyLT5lb2Ys
IGFuZA0KaGRyLT5maXJzdF9lcnJvci4NCg0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGludXgg
TkZTIGNsaWVudCBtYWludGFpbmVyDQoNCk5ldEFwcA0KVHJvbmQuTXlrbGVidXN0QG5ldGFwcC5j
b20NCnd3dy5uZXRhcHAuY29tDQoNCg==

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-16 20:28   ` Myklebust, Trond
@ 2012-04-17 15:37     ` Fred Isaman
  2012-04-17 16:13       ` Myklebust, Trond
  0 siblings, 1 reply; 40+ messages in thread
From: Fred Isaman @ 2012-04-17 15:37 UTC (permalink / raw)
  To: Myklebust, Trond; +Cc: Isaman, Fred, linux-nfs

On Mon, Apr 16, 2012 at 4:28 PM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
> On Mon, 2012-04-09 at 16:52 -0400, Fred Isaman wrote:
>> @@ -64,10 +80,16 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
>>
>>  void nfs_readdata_release(struct nfs_read_data *rdata)
>>  {
>> +     struct nfs_pgio_header *hdr = rdata->header;
>> +
>>       put_nfs_open_context(rdata->args.context);
>>       if (rdata->pages.pagevec != rdata->pages.page_array)
>>               kfree(rdata->pages.pagevec);
>> -     nfs_readhdr_free(rdata->header);
>> +     if (container_of(hdr, struct nfs_read_header, header) !=
>> +         container_of(rdata, struct nfs_read_header, rpc_data))
>> +             kfree(rdata);
>
> This looks unnecessarily complicated. How about
>
>        if (rdata != &hdr->rpc_data)
>                kfree(rdata);
>

That won't work.  There is no hdr->rpc_data.

Fred

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-16 20:41   ` Myklebust, Trond
@ 2012-04-17 15:38     ` Fred Isaman
  0 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-17 15:38 UTC (permalink / raw)
  To: Myklebust, Trond; +Cc: Isaman, Fred, linux-nfs

On Mon, Apr 16, 2012 at 4:41 PM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
> BTW: It makes me nervous that we can allocate a struct nfs_read_data
> that does not have data->header set. Could we please always pass an
> nfs_pgio_header argument?
>
> If you make sure that the above function always bumps header->refcnt,
> then you can check if header->rpc_data is allocated by seeing if
> header->refcnt is zero.
>
> BTW: The same applies to the "write" equivalents...
>


OK

Fred

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 19/26] NFS: rewrite directio read to use async coalesce code
  2012-04-16 21:22   ` Myklebust, Trond
@ 2012-04-17 15:41     ` Fred Isaman
  0 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-17 15:41 UTC (permalink / raw)
  To: Myklebust, Trond; +Cc: Isaman, Fred, linux-nfs

On Mon, Apr 16, 2012 at 5:22 PM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
> On Mon, 2012-04-09 at 16:52 -0400, Fred Isaman wrote:
>> This also has the advantage that it allows directio to use pnfs.
>>
>> Signed-off-by: Fred Isaman <iisaman@netapp.com>
>> ---
>> -static void nfs_direct_read_release(void *calldata)
>> +static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
>>  {
>> +     unsigned long pos = req_offset(hdr->req);
>
> Umm... req_offset() returns an loff_t. 'unsigned long' is likely to be
> too small to hold these values on a 32-bit machine.
>
> Looking back, it looks as if the same problem exists in the page cache
> code (e.g. nfs_read_completion) as well as stuff like hdr->eof, and
> hdr->first_error.
>
>

OK, I'll fix these.

Fred

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-17 15:37     ` Fred Isaman
@ 2012-04-17 16:13       ` Myklebust, Trond
  2012-04-17 16:16         ` Fred Isaman
  0 siblings, 1 reply; 40+ messages in thread
From: Myklebust, Trond @ 2012-04-17 16:13 UTC (permalink / raw)
  To: Fred Isaman; +Cc: Isaman, Fred, linux-nfs

T24gVHVlLCAyMDEyLTA0LTE3IGF0IDExOjM3IC0wNDAwLCBGcmVkIElzYW1hbiB3cm90ZToNCj4g
T24gTW9uLCBBcHIgMTYsIDIwMTIgYXQgNDoyOCBQTSwgTXlrbGVidXN0LCBUcm9uZA0KPiA8VHJv
bmQuTXlrbGVidXN0QG5ldGFwcC5jb20+IHdyb3RlOg0KPiA+IE9uIE1vbiwgMjAxMi0wNC0wOSBh
dCAxNjo1MiAtMDQwMCwgRnJlZCBJc2FtYW4gd3JvdGU6DQo+ID4+IEBAIC02NCwxMCArODAsMTYg
QEAgdm9pZCBuZnNfcmVhZGhkcl9mcmVlKHN0cnVjdCBuZnNfcGdpb19oZWFkZXIgKmhkcikNCj4g
Pj4NCj4gPj4gIHZvaWQgbmZzX3JlYWRkYXRhX3JlbGVhc2Uoc3RydWN0IG5mc19yZWFkX2RhdGEg
KnJkYXRhKQ0KPiA+PiAgew0KPiA+PiArICAgICBzdHJ1Y3QgbmZzX3BnaW9faGVhZGVyICpoZHIg
PSByZGF0YS0+aGVhZGVyOw0KPiA+PiArDQo+ID4+ICAgICAgIHB1dF9uZnNfb3Blbl9jb250ZXh0
KHJkYXRhLT5hcmdzLmNvbnRleHQpOw0KPiA+PiAgICAgICBpZiAocmRhdGEtPnBhZ2VzLnBhZ2V2
ZWMgIT0gcmRhdGEtPnBhZ2VzLnBhZ2VfYXJyYXkpDQo+ID4+ICAgICAgICAgICAgICAga2ZyZWUo
cmRhdGEtPnBhZ2VzLnBhZ2V2ZWMpOw0KPiA+PiAtICAgICBuZnNfcmVhZGhkcl9mcmVlKHJkYXRh
LT5oZWFkZXIpOw0KPiA+PiArICAgICBpZiAoY29udGFpbmVyX29mKGhkciwgc3RydWN0IG5mc19y
ZWFkX2hlYWRlciwgaGVhZGVyKSAhPQ0KPiA+PiArICAgICAgICAgY29udGFpbmVyX29mKHJkYXRh
LCBzdHJ1Y3QgbmZzX3JlYWRfaGVhZGVyLCBycGNfZGF0YSkpDQo+ID4+ICsgICAgICAgICAgICAg
a2ZyZWUocmRhdGEpOw0KPiA+DQo+ID4gVGhpcyBsb29rcyB1bm5lY2Vzc2FyaWx5IGNvbXBsaWNh
dGVkLiBIb3cgYWJvdXQNCj4gPg0KPiA+ICAgICAgICBpZiAocmRhdGEgIT0gJmhkci0+cnBjX2Rh
dGEpDQo+ID4gICAgICAgICAgICAgICAga2ZyZWUocmRhdGEpOw0KPiA+DQo+IA0KPiBUaGF0IHdv
bid0IHdvcmsuICBUaGVyZSBpcyBubyBoZHItPnJwY19kYXRhLg0KDQpEb2ghIE1ha2UgdGhhdA0K
DQoJc3RydWN0IG5mc19yZWFkX2hlYWRlciAqcmVhZF9oZWFkZXIgPSBjb250YWluZXJfb2YoaGRy
LCBzdHJ1Y3QuLi4uKTsNCg0KCWlmIChyZGF0YSAhPSAmcmVhZF9oZWFkZXItPnJwY19kYXRhKQ0K
CQlrZnJlZShyZGF0YSk7DQoNClRoZSByZWFzb24gZm9yIHByZWZlcnJpbmcgdGhlIGFib3ZlIGxp
bmUgaXMgdGhhdCBJIGNhbiB0cml2aWFsbHkgc2VlDQp0aGF0IHRoZSByZWFzb24gd2h5IEkgZG9u
J3Qgd2FudCB0byBkbyB0aGUga2ZyZWUoKSBpcyBiZWNhdXNlIEknbQ0KcG9pbnRpbmcgYXQgYSBm
aWVsZCBpbnNpZGUgdGhlIHJlYWRfaGVhZGVyLg0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGlu
dXggTkZTIGNsaWVudCBtYWludGFpbmVyDQoNCk5ldEFwcA0KVHJvbmQuTXlrbGVidXN0QG5ldGFw
cC5jb20NCnd3dy5uZXRhcHAuY29tDQoNCg==

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 14/26] NFS: merge _full and _partial read rpc_ops
  2012-04-17 16:13       ` Myklebust, Trond
@ 2012-04-17 16:16         ` Fred Isaman
  0 siblings, 0 replies; 40+ messages in thread
From: Fred Isaman @ 2012-04-17 16:16 UTC (permalink / raw)
  To: Myklebust, Trond; +Cc: Isaman, Fred, linux-nfs

On Tue, Apr 17, 2012 at 12:13 PM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
> On Tue, 2012-04-17 at 11:37 -0400, Fred Isaman wrote:
>> On Mon, Apr 16, 2012 at 4:28 PM, Myklebust, Trond
>> <Trond.Myklebust@netapp.com> wrote:
>> > On Mon, 2012-04-09 at 16:52 -0400, Fred Isaman wrote:
>> >> @@ -64,10 +80,16 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
>> >>
>> >>  void nfs_readdata_release(struct nfs_read_data *rdata)
>> >>  {
>> >> +     struct nfs_pgio_header *hdr = rdata->header;
>> >> +
>> >>       put_nfs_open_context(rdata->args.context);
>> >>       if (rdata->pages.pagevec != rdata->pages.page_array)
>> >>               kfree(rdata->pages.pagevec);
>> >> -     nfs_readhdr_free(rdata->header);
>> >> +     if (container_of(hdr, struct nfs_read_header, header) !=
>> >> +         container_of(rdata, struct nfs_read_header, rpc_data))
>> >> +             kfree(rdata);
>> >
>> > This looks unnecessarily complicated. How about
>> >
>> >        if (rdata != &hdr->rpc_data)
>> >                kfree(rdata);
>> >
>>
>> That won't work.  There is no hdr->rpc_data.
>
> Doh! Make that
>
>        struct nfs_read_header *read_header = container_of(hdr, struct....);
>
>        if (rdata != &read_header->rpc_data)
>                kfree(rdata);
>
> The reason for preferring the above line is that I can trivially see
> that the reason why I don't want to do the kfree() is because I'm
> pointing at a field inside the read_header.
>

OK.

Fred

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2012-04-17 16:16 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-09 20:51 [PATCH 00/26] directio rewrite Fred Isaman
2012-04-09 20:51 ` [PATCH 01/26] NFS: check for req==NULL in nfs_try_to_update_request cleanup Fred Isaman
2012-04-09 20:52 ` [PATCH 02/26] NFS4.1: make pnfs_ld_[read|write]_done consistent Fred Isaman
2012-04-09 20:52 ` [PATCH 03/26] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket Fred Isaman
2012-04-13 15:47   ` Myklebust, Trond
2012-04-13 16:50     ` Fred Isaman
2012-04-09 20:52 ` [PATCH 04/26] NFS: add a struct nfs_commit_data to replace nfs_write_data in commits Fred Isaman
2012-04-09 20:52 ` [PATCH 05/26] NFS: grab open context in direct read Fred Isaman
2012-04-09 20:52 ` [PATCH 06/26] NFS: dprintks in directio code were referencing task after put Fred Isaman
2012-04-09 20:52 ` [PATCH 07/26] NFS: reverse arg order in nfs_initiate_[read|write] Fred Isaman
2012-04-09 20:52 ` [PATCH 08/26] NFS: remove unnecessary casts of void pointers in nfs4filelayout.c Fred Isaman
2012-04-09 20:52 ` [PATCH 09/26] NFS: use req_offset where appropriate Fred Isaman
2012-04-09 20:52 ` [PATCH 10/26] NFS: put open context on error in nfs_pagein_multi Fred Isaman
2012-04-13 15:52   ` Myklebust, Trond
2012-04-09 20:52 ` [PATCH 11/26] NFS: put open context on error in nfs_flush_multi Fred Isaman
2012-04-13 15:52   ` Myklebust, Trond
2012-04-09 20:52 ` [PATCH 12/26] NFS: create common nfs_pgio_header for both read and write Fred Isaman
2012-04-09 20:52 ` [PATCH 13/26] NFS: create struct nfs_page_array Fred Isaman
2012-04-09 20:52 ` [PATCH 14/26] NFS: merge _full and _partial read rpc_ops Fred Isaman
2012-04-16 20:28   ` Myklebust, Trond
2012-04-17 15:37     ` Fred Isaman
2012-04-17 16:13       ` Myklebust, Trond
2012-04-17 16:16         ` Fred Isaman
2012-04-16 20:41   ` Myklebust, Trond
2012-04-17 15:38     ` Fred Isaman
2012-04-09 20:52 ` [PATCH 15/26] NFS: merge _full and _partial write rpc_ops Fred Isaman
2012-04-09 20:52 ` [PATCH 16/26] NFS: create completion structure to pass into page_init functions Fred Isaman
2012-04-16 21:07   ` Myklebust, Trond
2012-04-09 20:52 ` [PATCH 17/26] NFS: remove unused wb_complete field from struct nfs_page Fred Isaman
2012-04-09 20:52 ` [PATCH 18/26] NFS: prepare coalesce testing for directio Fred Isaman
2012-04-09 20:52 ` [PATCH 19/26] NFS: rewrite directio read to use async coalesce code Fred Isaman
2012-04-16 21:22   ` Myklebust, Trond
2012-04-17 15:41     ` Fred Isaman
2012-04-09 20:52 ` [PATCH 20/26] NFS: create nfs_generic_commit_list Fred Isaman
2012-04-09 20:52 ` [PATCH 21/26] NFS: create struct nfs_commit_info Fred Isaman
2012-04-09 20:52 ` [PATCH 22/26] NFS: create nfs_commit_completion_ops Fred Isaman
2012-04-09 20:52 ` [PATCH 23/26] NFS: add dreq to nfs_commit_info Fred Isaman
2012-04-09 20:52 ` [PATCH 24/26] NFS: avoid some stat gathering for direct io Fred Isaman
2012-04-09 20:52 ` [PATCH 25/26] NFS: move allocation of nfs4_fl_commit_info to write's pg_init Fred Isaman
2012-04-09 20:52 ` [PATCH 26/26] NFS: rewrite directio write to use async coalesce code Fred Isaman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.