Linux-NFS Archive on lore.kernel.org
 help / color / Atom feed
From: Jim Rees <rees@umich.edu>
To: linux-nfs@vger.kernel.org
Cc: peter honeyman <honey@citi.umich.edu>
Subject: [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation
Date: Sun, 12 Jun 2011 19:43:55 -0400
Message-ID: <ea5e2bf59d726fbfec6f63af9eda83dbc15cb96d.1307921137.git.rees@umich.edu> (raw)
In-Reply-To: <cover.1307921137.git.rees@umich.edu>

From: Peng Tao <bergwolf@gmail.com>

Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Reported-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
---
 fs/nfs/file.c          |   26 ++++++++++-
 fs/nfs/pnfs.c          |   41 +++++++++++++++++
 fs/nfs/pnfs.h          |  115 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/write.c         |   12 +++--
 include/linux/nfs_fs.h |    3 +-
 5 files changed, 189 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2f093ed..1768762 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -384,12 +384,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	struct page *page;
 	int once_thru = 0;
+	struct pnfs_layout_segment *lseg;
 
 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
 		file->f_path.dentry->d_parent->d_name.name,
 		file->f_path.dentry->d_name.name,
 		mapping->host->i_ino, len, (long long) pos);
-
+	lseg = pnfs_update_layout(mapping->host,
+				  nfs_file_open_context(file),
+				  pos, len, IOMODE_RW, GFP_NOFS);
 start:
 	/*
 	 * Prevent starvation issues if someone is doing a consistency
@@ -409,6 +412,9 @@ start:
 	if (ret) {
 		unlock_page(page);
 		page_cache_release(page);
+		*pagep = NULL;
+		*fsdata = NULL;
+		goto out;
 	} else if (!once_thru &&
 		   nfs_want_read_modify_write(file, page, pos, len)) {
 		once_thru = 1;
@@ -417,6 +423,12 @@ start:
 		if (!ret)
 			goto start;
 	}
+	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
+ out:
+	if (ret) {
+		put_lseg(lseg);
+		*fsdata = NULL;
+	}
 	return ret;
 }
 
@@ -426,6 +438,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 {
 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
 	int status;
+	struct pnfs_layout_segment *lseg;
 
 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
 		file->f_path.dentry->d_parent->d_name.name,
@@ -452,10 +465,17 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
 	}
 
-	status = nfs_updatepage(file, page, offset, copied);
+	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
+	status = pnfs_write_end(file, page, pos, len, copied, lseg);
+	if (status)
+		goto out;
+	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
 
+out:
 	unlock_page(page);
 	page_cache_release(page);
+	pnfs_write_end_cleanup(file, fsdata);
+	put_lseg(lseg);
 
 	if (status < 0)
 		return status;
@@ -577,7 +597,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	ret = VM_FAULT_LOCKED;
 	if (nfs_flush_incompatible(filp, page) == 0 &&
-	    nfs_updatepage(filp, page, 0, pagelen) == 0)
+	    nfs_updatepage(filp, page, 0, pagelen, NULL, NULL) == 0)
 		goto out;
 
 	ret = VM_FAULT_SIGBUS;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f03a5e0..e693718 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1138,6 +1138,41 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 }
 
 /*
+ * This gives the layout driver an opportunity to read in page "around"
+ * the data to be written.  It returns 0 on success, otherwise an error code
+ * which will either be passed up to user, or ignored if
+ * some previous part of write succeeded.
+ * Note the range [pos, pos+len-1] is entirely within the page.
+ */
+int _pnfs_write_begin(struct inode *inode, struct page *page,
+		      loff_t pos, unsigned len,
+		      struct pnfs_layout_segment *lseg,
+		      struct pnfs_fsdata **fsdata)
+{
+	struct pnfs_fsdata *data;
+	int status = 0;
+
+	dprintk("--> %s: pos=%llu len=%u\n",
+		__func__, (unsigned long long)pos, len);
+	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
+	if (!data) {
+		status = -ENOMEM;
+		goto out;
+	}
+	data->lseg = lseg; /* refcount passed into data to be managed there */
+	status = NFS_SERVER(inode)->pnfs_curr_ld->write_begin(
+						lseg, page, pos, len, data);
+	if (status) {
+		kfree(data);
+		data = NULL;
+	}
+out:
+	*fsdata = data;
+	dprintk("<-- %s: status=%d\n", __func__, status);
+	return status;
+}
+
+/*
  * Called by non rpc-based layout drivers
  */
 int
@@ -1237,6 +1272,12 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
+void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
+{
+	/* lseg refcounting handled directly in nfs_write_end */
+	kfree(fsdata);
+}
+
 /*
  * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
  * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a3fc0f2..525ec55 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -54,6 +54,12 @@ enum pnfs_try_status {
 	PNFS_NOT_ATTEMPTED = 1,
 };
 
+struct pnfs_fsdata {
+	struct pnfs_layout_segment *lseg;
+	int bypass_eof;
+	void *private;
+};
+
 #ifdef CONFIG_NFS_V4_1
 
 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -106,6 +112,14 @@ struct pnfs_layoutdriver_type {
 	 */
 	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
 	enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
+			    loff_t pos, unsigned count,
+			    struct pnfs_fsdata *fsdata);
+	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
+			 unsigned count, unsigned copied,
+			 struct pnfs_layout_segment *lseg);
+	void (*write_end_cleanup)(struct file *filp,
+				  struct pnfs_fsdata *fsdata);
 
 	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
 
@@ -175,6 +189,7 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
 enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
 					    const struct rpc_call_ops *);
 bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
@@ -186,6 +201,10 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
 int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
 				  struct pnfs_layout_hdr *lo,
 				  struct nfs4_state *open_state);
+int _pnfs_write_begin(struct inode *inode, struct page *page,
+		      loff_t pos, unsigned len,
+		      struct pnfs_layout_segment *lseg,
+		      struct pnfs_fsdata **fsdata);
 int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
 				struct pnfs_layout_range *recall_range);
@@ -287,6 +306,13 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req)
 		put_lseg(req->wb_commit_lseg);
 }
 
+static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
+			       struct pnfs_fsdata *fsdata)
+{
+	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
+		!fsdata->bypass_eof;
+}
+
 /* Should the pNFS client commit and return the layout upon a setattr */
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -297,6 +323,49 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode)
 		PNFS_LAYOUTRET_ON_SETATTR;
 }
 
+static inline int pnfs_write_begin(struct file *filp, struct page *page,
+				   loff_t pos, unsigned len,
+				   struct pnfs_layout_segment *lseg,
+				   void **fsdata)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct nfs_server *nfss = NFS_SERVER(inode);
+	int status = 0;
+
+	*fsdata = lseg;
+	if (lseg && nfss->pnfs_curr_ld->write_begin)
+		status = _pnfs_write_begin(inode, page, pos, len, lseg,
+					   (struct pnfs_fsdata **) fsdata);
+	return status;
+}
+
+/* CAREFUL - what happens if copied < len??? */
+static inline int pnfs_write_end(struct file *filp, struct page *page,
+				 loff_t pos, unsigned len, unsigned copied,
+				 struct pnfs_layout_segment *lseg)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct nfs_server *nfss = NFS_SERVER(inode);
+
+	if (nfss->pnfs_curr_ld && nfss->pnfs_curr_ld->write_end)
+		return nfss->pnfs_curr_ld->write_end(inode, page, pos, len,
+						     copied, lseg);
+	else
+		return 0;
+}
+
+static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
+{
+	struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
+
+	if (fsdata && nfss->pnfs_curr_ld) {
+		if (nfss->pnfs_curr_ld->write_end_cleanup)
+			nfss->pnfs_curr_ld->write_end_cleanup(filp, fsdata);
+		if (nfss->pnfs_curr_ld->write_begin)
+			pnfs_free_fsdata(fsdata);
+	}
+}
+
 static inline int pnfs_return_layout(struct inode *ino)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
@@ -317,6 +386,19 @@ static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
 		pgio->pg_test = ld->pg_test;
 }
 
+static inline struct pnfs_layout_segment *
+nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
+{
+	if (fsdata) {
+		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
+
+		if (nfss->pnfs_curr_ld && nfss->pnfs_curr_ld->write_begin)
+			return ((struct pnfs_fsdata *) fsdata)->lseg;
+		return (struct pnfs_layout_segment *)fsdata;
+	}
+	return NULL;
+}
+
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -345,6 +427,12 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 	return NULL;
 }
 
+static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
+			       struct pnfs_fsdata *fsdata)
+{
+	return 1;
+}
+
 static inline enum pnfs_try_status
 pnfs_try_to_read_data(struct nfs_read_data *data,
 		      const struct rpc_call_ops *call_ops)
@@ -364,6 +452,26 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline int pnfs_write_begin(struct file *filp, struct page *page,
+				   loff_t pos, unsigned len,
+				   struct pnfs_layout_segment *lseg,
+				   void **fsdata)
+{
+	*fsdata = NULL;
+	return 0;
+}
+
+static inline int pnfs_write_end(struct file *filp, struct page *page,
+				 loff_t pos, unsigned len, unsigned copied,
+				 struct pnfs_layout_segment *lseg)
+{
+	return 0;
+}
+
+static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
+{
+}
+
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
 {
@@ -435,6 +543,13 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
 {
 }
+
+static inline struct pnfs_layout_segment *
+nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e268e3b..75e2a6b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -673,7 +673,9 @@ out:
 }
 
 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
-		unsigned int offset, unsigned int count)
+		unsigned int offset, unsigned int count,
+		struct pnfs_layout_segment *lseg, void *fsdata)
+
 {
 	struct nfs_page	*req;
 
@@ -681,7 +683,8 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 	/* Update file length */
-	nfs_grow_file(page, offset, count);
+	if (pnfs_grow_ok(lseg, fsdata))
+		nfs_grow_file(page, offset, count);
 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
 	nfs_mark_request_dirty(req);
 	nfs_clear_page_tag_locked(req);
@@ -734,7 +737,8 @@ static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
  * things with a page scheduled for an RPC call (e.g. invalidate it).
  */
 int nfs_updatepage(struct file *file, struct page *page,
-		unsigned int offset, unsigned int count)
+		unsigned int offset, unsigned int count,
+		struct pnfs_layout_segment *lseg, void *fsdata)
 {
 	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct inode	*inode = page->mapping->host;
@@ -759,7 +763,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 		offset = 0;
 	}
 
-	status = nfs_writepage_setup(ctx, page, offset, count);
+	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
 	if (status < 0)
 		nfs_set_pageerror(page);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1b93b9c..be1ac1d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -510,7 +510,8 @@ extern int  nfs_congestion_kb;
 extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
+extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int,
+			struct pnfs_layout_segment *, void *);
 extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 
 /*
-- 
1.7.4.1


  parent reply index

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-12 23:43 [PATCH 00/34] pnfs block layout driver based on v3.0-rc2 Jim Rees
2011-06-12 23:43 ` [PATCH 01/34] pnfs: GETDEVICELIST Jim Rees
2011-06-12 23:43 ` [PATCH 02/34] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-12 23:43 ` [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-13 14:36   ` Fred Isaman
2011-06-14 10:40     ` tao.peng
2011-06-14 13:58       ` Fred Isaman
2011-06-14 14:28       ` Benny Halevy
2011-06-12 23:43 ` Jim Rees [this message]
2011-06-13 14:44   ` [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation Fred Isaman
2011-06-14 11:01     ` tao.peng
2011-06-14 14:05       ` Fred Isaman
2011-06-14 15:53         ` Peng Tao
2011-06-14 16:02           ` Fred Isaman
2011-06-12 23:43 ` [PATCH 05/34] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-06-14 15:01   ` Benny Halevy
2011-06-14 15:08     ` Peng Tao
2011-06-12 23:44 ` [PATCH 06/34] pnfs: cleanup_layoutcommit Jim Rees
2011-06-13 21:19   ` Benny Halevy
2011-06-14 15:16     ` Peng Tao
2011-06-14 15:10   ` Benny Halevy
2011-06-14 15:21     ` Peng Tao
2011-06-14 15:19   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 07/34] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-14 15:13   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 08/34] pnfsblock: blocklayout stub Jim Rees
2011-06-12 23:44 ` [PATCH 09/34] pnfsblock: layout alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 10/34] Add support for simple rpc pipefs Jim Rees
2011-06-12 23:44 ` [PATCH 11/34] pnfs-block: Add block device discovery pipe Jim Rees
2011-06-12 23:44 ` [PATCH 12/34] pnfsblock: basic extent code Jim Rees
2011-06-12 23:44 ` [PATCH 13/34] pnfsblock: add device operations Jim Rees
2011-06-12 23:44 ` [PATCH 14/34] pnfsblock: remove " Jim Rees
2011-06-12 23:44 ` [PATCH 15/34] pnfsblock: lseg alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 16/34] pnfsblock: merge extents Jim Rees
2011-06-12 23:44 ` [PATCH 17/34] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-14 15:36   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 18/34] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-13 15:56   ` Fred Isaman
2011-06-12 23:44 ` [PATCH 19/34] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-12 23:44 ` [PATCH 20/34] pnfsblock: find_get_extent Jim Rees
2011-06-12 23:44 ` [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-14 15:40   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 22/34] pnfsblock: merge rw extents Jim Rees
2011-06-12 23:44 ` [PATCH 23/34] pnfsblock: encode_layoutcommit Jim Rees
2011-06-14 15:44   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 24/34] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-12 23:44 ` [PATCH 25/34] pnfsblock: bl_read_pagelist Jim Rees
2011-06-12 23:44 ` [PATCH 26/34] pnfsblock: write_begin Jim Rees
2011-06-12 23:44 ` [PATCH 27/34] pnfsblock: write_end Jim Rees
2011-06-12 23:44 ` [PATCH 28/34] pnfsblock: write_end_cleanup Jim Rees
2011-06-12 23:45 ` [PATCH 29/34] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-12 23:45 ` [PATCH 30/34] pnfsblock: bl_write_pagelist Jim Rees
2011-06-12 23:45 ` [PATCH 31/34] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-12 23:45 ` [PATCH 32/34] pnfsblock: Implement release_inval_marks Jim Rees
2011-06-12 23:45 ` [PATCH 33/34] Add configurable prefetch size for layoutget Jim Rees
2011-06-12 23:45 ` [PATCH 34/34] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-14 16:15   ` Benny Halevy
2011-06-14 16:22     ` Fred Isaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ea5e2bf59d726fbfec6f63af9eda83dbc15cb96d.1307921137.git.rees@umich.edu \
    --to=rees@umich.edu \
    --cc=honey@citi.umich.edu \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org
	public-inbox-index linux-nfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git