All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/10] pnfsblock fixes for 3.2
@ 2011-09-23  1:50 Jim Rees
  2011-09-23  1:50 ` [PATCH 01/10] pnfsblock: fix return code confusion Jim Rees
                   ` (9 more replies)
  0 siblings, 10 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

Various pnfs client block layout driver bug fixes and improvements for Linux
3.2.  These are also available on the for-trond branch of my git repo at:
git://citi.umich.edu/projects/linux-pnfs-blk.git

Jim Rees (2):
  pnfsblock: fix return code confusion
  pnfsblock: fix size of upcall message

Peng Tao (8):
  SUNRPC/NFS: make rpc pipe upcall generic
  pnfsblock: add missing rpc_put_mount and path_put
  pnfs: make _set_lo_fail generic
  pnfsblock: init pg_bsize properly
  pnfs: recoalesce when ld write pagelist fails
  pnfs: recoalesce when ld read pagelist fails
  pnfsblock: fix NULL pointer dereference
  pnfsblock: fix writeback deadlock

 fs/nfs/blocklayout/blocklayout.c    |   78 ++++++++++++++++++++++-------------
 fs/nfs/blocklayout/blocklayout.h    |    4 +-
 fs/nfs/blocklayout/blocklayoutdev.c |   35 ++++------------
 fs/nfs/idmap.c                      |   25 +-----------
 fs/nfs/nfs4filelayout.c             |   19 +-------
 fs/nfs/pnfs.c                       |   52 ++++++++++++------------
 fs/nfs/pnfs.h                       |    5 +-
 fs/nfs/read.c                       |   12 +++++-
 fs/nfs/write.c                      |   25 +++++++++++-
 include/linux/sunrpc/rpc_pipe_fs.h  |    2 +
 net/sunrpc/auth_gss/auth_gss.c      |   24 +----------
 net/sunrpc/rpc_pipe.c               |   20 +++++++++
 12 files changed, 150 insertions(+), 151 deletions(-)

-- 
1.7.4.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 01/10] pnfsblock: fix return code confusion
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 02/10] pnfsblock: fix size of upcall message Jim Rees
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

Always return PTR_ERR, not NULL, from nfs4_blk_get_deviceinfo and
nfs4_blk_decode_device.

Check for IS_ERR, not NULL, in bl_set_layoutdriver when calling
nfs4_blk_get_deviceinfo.

Signed-off-by: Jim Rees <rees@umich.edu>
Signed-off-by: Benny Halevy <bhalevy@tonian.com>
---
 fs/nfs/blocklayout/blocklayout.c    |   20 ++++++++++++--------
 fs/nfs/blocklayout/blocklayoutdev.c |   13 ++++++++-----
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9561c8f..d2432f0 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -805,7 +805,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
 			struct nfs4_deviceid *d_id)
 {
 	struct pnfs_device *dev;
-	struct pnfs_block_dev *rv = NULL;
+	struct pnfs_block_dev *rv;
 	u32 max_resp_sz;
 	int max_pages;
 	struct page **pages = NULL;
@@ -823,18 +823,20 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
 	dev = kmalloc(sizeof(*dev), GFP_NOFS);
 	if (!dev) {
 		dprintk("%s kmalloc failed\n", __func__);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
 	if (pages == NULL) {
 		kfree(dev);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 	for (i = 0; i < max_pages; i++) {
 		pages[i] = alloc_page(GFP_NOFS);
-		if (!pages[i])
+		if (!pages[i]) {
+			rv = ERR_PTR(-ENOMEM);
 			goto out_free;
+		}
 	}
 
 	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
@@ -847,8 +849,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
 	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
 	rc = nfs4_proc_getdeviceinfo(server, dev);
 	dprintk("%s getdevice info returns %d\n", __func__, rc);
-	if (rc)
+	if (rc) {
+		rv = ERR_PTR(rc);
 		goto out_free;
+	}
 
 	rv = nfs4_blk_decode_device(server, dev);
  out_free:
@@ -866,7 +870,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
 	struct pnfs_devicelist *dlist = NULL;
 	struct pnfs_block_dev *bdev;
 	LIST_HEAD(block_disklist);
-	int status = 0, i;
+	int status, i;
 
 	dprintk("%s enter\n", __func__);
 
@@ -898,8 +902,8 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
 		for (i = 0; i < dlist->num_devs; i++) {
 			bdev = nfs4_blk_get_deviceinfo(server, fh,
 						       &dlist->dev_id[i]);
-			if (!bdev) {
-				status = -ENODEV;
+			if (IS_ERR(bdev)) {
+				status = PTR_ERR(bdev);
 				goto out_error;
 			}
 			spin_lock(&b_mt_id->bm_lock);
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a83b393..0b1fb0e 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -131,7 +131,7 @@ struct pnfs_block_dev *
 nfs4_blk_decode_device(struct nfs_server *server,
 		       struct pnfs_device *dev)
 {
-	struct pnfs_block_dev *rv = NULL;
+	struct pnfs_block_dev *rv;
 	struct block_device *bd = NULL;
 	struct rpc_pipe_msg msg;
 	struct bl_msg_hdr bl_msg = {
@@ -141,7 +141,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
 	uint8_t *dataptr;
 	DECLARE_WAITQUEUE(wq, current);
 	struct bl_dev_msg *reply = &bl_mount_reply;
-	int offset, len, i;
+	int offset, len, i, rc;
 
 	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
 	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
@@ -168,8 +168,10 @@ nfs4_blk_decode_device(struct nfs_server *server,
 
 	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
 	add_wait_queue(&bl_wq, &wq);
-	if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) {
+	rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg);
+	if (rc < 0) {
 		remove_wait_queue(&bl_wq, &wq);
+		rv = ERR_PTR(rc);
 		goto out;
 	}
 
@@ -187,8 +189,9 @@ nfs4_blk_decode_device(struct nfs_server *server,
 
 	bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor));
 	if (IS_ERR(bd)) {
-		dprintk("%s failed to open device : %ld\n",
-			__func__, PTR_ERR(bd));
+		rc = PTR_ERR(bd);
+		dprintk("%s failed to open device : %d\n", __func__, rc);
+		rv = ERR_PTR(rc);
 		goto out;
 	}
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 02/10] pnfsblock: fix size of upcall message
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
  2011-09-23  1:50 ` [PATCH 01/10] pnfsblock: fix return code confusion Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 03/10] SUNRPC/NFS: make rpc pipe upcall generic Jim Rees
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

Make the status field explicitly 32 bits.  "...it's unlikely that the kernel
and userspace would differ on the size of an int here, but it might be a
good idea to go ahead and make that explicitly 32 bits in case we end up
dealing with more exotic arches at some point in the future."

Suggested-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Jim Rees <rees@umich.edu>
Signed-off-by: Benny Halevy <bhalevy@tonian.com>
---
 fs/nfs/blocklayout/blocklayout.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f27d827..58dc256 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -150,7 +150,7 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
 }
 
 struct bl_dev_msg {
-	int status;
+	int32_t status;
 	uint32_t major, minor;
 };
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 03/10] SUNRPC/NFS: make rpc pipe upcall generic
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
  2011-09-23  1:50 ` [PATCH 01/10] pnfsblock: fix return code confusion Jim Rees
  2011-09-23  1:50 ` [PATCH 02/10] pnfsblock: fix size of upcall message Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 04/10] pnfsblock: add missing rpc_put_mount and path_put Jim Rees
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

The same function is used by idmap, gss and blocklayout code. Make it
generic.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/blocklayout.c    |    2 +-
 fs/nfs/blocklayout/blocklayout.h    |    2 --
 fs/nfs/blocklayout/blocklayoutdev.c |   22 ----------------------
 fs/nfs/idmap.c                      |   25 +------------------------
 include/linux/sunrpc/rpc_pipe_fs.h  |    2 ++
 net/sunrpc/auth_gss/auth_gss.c      |   24 ++----------------------
 net/sunrpc/rpc_pipe.c               |   20 ++++++++++++++++++++
 7 files changed, 26 insertions(+), 71 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d2432f0..dc23833 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -964,7 +964,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
 };
 
 static const struct rpc_pipe_ops bl_upcall_ops = {
-	.upcall		= bl_pipe_upcall,
+	.upcall		= rpc_pipe_generic_upcall,
 	.downcall	= bl_pipe_downcall,
 	.destroy_msg	= bl_pipe_destroy_msg,
 };
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 58dc256..42acf7e 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -169,8 +169,6 @@ extern wait_queue_head_t bl_wq;
 #define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
 
 /* blocklayoutdev.c */
-ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
-		       char __user *, size_t);
 ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
 void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
 struct block_device *nfs4_blkdev_get(dev_t dev);
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index 0b1fb0e..d08ba91 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -79,28 +79,6 @@ int nfs4_blkdev_put(struct block_device *bdev)
 	return blkdev_put(bdev, FMODE_READ);
 }
 
-/*
- * Shouldn't there be a rpc_generic_upcall() to do this for us?
- */
-ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
-		       char __user *dst, size_t buflen)
-{
-	char *data = (char *)msg->data + msg->copied;
-	size_t mlen = min(msg->len - msg->copied, buflen);
-	unsigned long left;
-
-	left = copy_to_user(dst, data, mlen);
-	if (left == mlen) {
-		msg->errno = -EFAULT;
-		return -EFAULT;
-	}
-
-	mlen -= left;
-	msg->copied += mlen;
-	msg->errno = 0;
-	return mlen;
-}
-
 static struct bl_dev_msg bl_mount_reply;
 
 ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f20801a..47d1c6f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -336,8 +336,6 @@ struct idmap {
 	struct idmap_hashtable	idmap_group_hash;
 };
 
-static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
-				 char __user *, size_t);
 static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
 				   size_t);
 static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
@@ -345,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
 static unsigned int fnvhash32(const void *, size_t);
 
 static const struct rpc_pipe_ops idmap_upcall_ops = {
-	.upcall		= idmap_pipe_upcall,
+	.upcall		= rpc_pipe_generic_upcall,
 	.downcall	= idmap_pipe_downcall,
 	.destroy_msg	= idmap_pipe_destroy_msg,
 };
@@ -595,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 	return ret;
 }
 
-/* RPC pipefs upcall/downcall routines */
-static ssize_t
-idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
-		  char __user *dst, size_t buflen)
-{
-	char *data = (char *)msg->data + msg->copied;
-	size_t mlen = min(msg->len, buflen);
-	unsigned long left;
-
-	left = copy_to_user(dst, data, mlen);
-	if (left == mlen) {
-		msg->errno = -EFAULT;
-		return -EFAULT;
-	}
-
-	mlen -= left;
-	msg->copied += mlen;
-	msg->errno = 0;
-	return mlen;
-}
-
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index cf14db9..e4ea430 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -44,6 +44,8 @@ RPC_I(struct inode *inode)
 	return container_of(inode, struct rpc_inode, vfs_inode);
 }
 
+extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
+				       char __user *, size_t);
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 struct rpc_clnt;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 364eb45..e9b7693 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -603,26 +603,6 @@ out:
 	return err;
 }
 
-static ssize_t
-gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
-		char __user *dst, size_t buflen)
-{
-	char *data = (char *)msg->data + msg->copied;
-	size_t mlen = min(msg->len, buflen);
-	unsigned long left;
-
-	left = copy_to_user(dst, data, mlen);
-	if (left == mlen) {
-		msg->errno = -EFAULT;
-		return -EFAULT;
-	}
-
-	mlen -= left;
-	msg->copied += mlen;
-	msg->errno = 0;
-	return mlen;
-}
-
 #define MSG_BUF_MAXSIZE 1024
 
 static ssize_t
@@ -1590,7 +1570,7 @@ static const struct rpc_credops gss_nullops = {
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
-	.upcall		= gss_pipe_upcall,
+	.upcall		= rpc_pipe_generic_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
 	.open_pipe	= gss_pipe_open_v0,
@@ -1598,7 +1578,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
-	.upcall		= gss_pipe_upcall,
+	.upcall		= rpc_pipe_generic_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
 	.open_pipe	= gss_pipe_open_v1,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b181e34..67dbc18 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -77,6 +77,26 @@ rpc_timeout_upcall_queue(struct work_struct *work)
 	rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT);
 }
 
+ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+				char __user *dst, size_t buflen)
+{
+	char *data = (char *)msg->data + msg->copied;
+	size_t mlen = min(msg->len - msg->copied, buflen);
+	unsigned long left;
+
+	left = copy_to_user(dst, data, mlen);
+	if (left == mlen) {
+		msg->errno = -EFAULT;
+		return -EFAULT;
+	}
+
+	mlen -= left;
+	msg->copied += mlen;
+	msg->errno = 0;
+	return mlen;
+}
+EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall);
+
 /**
  * rpc_queue_upcall - queue an upcall message to userspace
  * @inode: inode of upcall pipe on which to queue given message
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 04/10] pnfsblock: add missing rpc_put_mount and path_put
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (2 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 03/10] SUNRPC/NFS: make rpc pipe upcall generic Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 05/10] pnfs: make _set_lo_fail generic Jim Rees
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/blocklayout.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dc23833..dee6cae 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -993,17 +993,20 @@ static int __init nfs4blocklayout_init(void)
 			      mnt,
 			      NFS_PIPE_DIRNAME, 0, &path);
 	if (ret)
-		goto out_remove;
+		goto out_putrpc;
 
 	bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
 				    &bl_upcall_ops, 0);
+	path_put(&path);
 	if (IS_ERR(bl_device_pipe)) {
 		ret = PTR_ERR(bl_device_pipe);
-		goto out_remove;
+		goto out_putrpc;
 	}
 out:
 	return ret;
 
+out_putrpc:
+	rpc_put_mount();
 out_remove:
 	pnfs_unregister_layoutdriver(&blocklayout_type);
 	return ret;
@@ -1016,6 +1019,7 @@ static void __exit nfs4blocklayout_exit(void)
 
 	pnfs_unregister_layoutdriver(&blocklayout_type);
 	rpc_unlink(bl_device_pipe);
+	rpc_put_mount();
 }
 
 MODULE_ALIAS("nfs-layouttype4-3");
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 05/10] pnfs: make _set_lo_fail generic
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (3 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 04/10] pnfsblock: add missing rpc_put_mount and path_put Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 06/10] pnfsblock: init pg_bsize properly Jim Rees
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

file layout and block layout both use it to set mark layout io failure
bit. So make it generic.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/blocklayout.c |   17 +++--------------
 fs/nfs/nfs4filelayout.c          |   19 +++----------------
 fs/nfs/pnfs.c                    |   12 ++++++++++++
 fs/nfs/pnfs.h                    |    1 +
 4 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dee6cae..2167ba2 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -176,17 +176,6 @@ retry:
 	return bio;
 }
 
-static void bl_set_lo_fail(struct pnfs_layout_segment *lseg)
-{
-	if (lseg->pls_range.iomode == IOMODE_RW) {
-		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
-		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
-	} else {
-		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
-		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
-	}
-}
-
 /* This is basically copied from mpage_end_io_read */
 static void bl_end_io_read(struct bio *bio, int err)
 {
@@ -206,7 +195,7 @@ static void bl_end_io_read(struct bio *bio, int err)
 	if (!uptodate) {
 		if (!rdata->pnfs_error)
 			rdata->pnfs_error = -EIO;
-		bl_set_lo_fail(rdata->lseg);
+		pnfs_set_lo_fail(rdata->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -370,7 +359,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	if (!uptodate) {
 		if (!wdata->pnfs_error)
 			wdata->pnfs_error = -EIO;
-		bl_set_lo_fail(wdata->lseg);
+		pnfs_set_lo_fail(wdata->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -386,7 +375,7 @@ static void bl_end_io_write(struct bio *bio, int err)
 	if (!uptodate) {
 		if (!wdata->pnfs_error)
 			wdata->pnfs_error = -EIO;
-		bl_set_lo_fail(wdata->lseg);
+		pnfs_set_lo_fail(wdata->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e8915d4..4c78c62 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -77,19 +77,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
 	BUG();
 }
 
-/* For data server errors we don't recover from */
-static void
-filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
-{
-	if (lseg->pls_range.iomode == IOMODE_RW) {
-		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
-		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
-	} else {
-		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
-		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
-	}
-}
-
 static int filelayout_async_handle_error(struct rpc_task *task,
 					 struct nfs4_state *state,
 					 struct nfs_client *clp,
@@ -145,7 +132,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
-			filelayout_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(data->lseg);
 			nfs4_reset_read(task, data);
 			clp = NFS_SERVER(data->inode)->nfs_client;
 		}
@@ -221,7 +208,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
-			filelayout_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(data->lseg);
 			nfs4_reset_write(task, data);
 			clp = NFS_SERVER(data->inode)->nfs_client;
 		} else
@@ -256,7 +243,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
 			prepare_to_resend_writes(data);
-			filelayout_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(data->lseg);
 		} else
 			nfs_restart_rpc(task, data->ds_clp);
 		return -EAGAIN;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e550e88..6b19fff 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1381,6 +1381,18 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
 	}
 }
 
+void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
+{
+	if (lseg->pls_range.iomode == IOMODE_RW) {
+		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
+		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+	} else {
+		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
+		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+	}
+}
+EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
+
 void
 pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 01cbfd5..94e760e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -178,6 +178,7 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
 int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
 bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 06/10] pnfsblock: init pg_bsize properly
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (4 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 05/10] pnfs: make _set_lo_fail generic Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  5:21   ` Boaz Harrosh
  2011-09-23  1:50 ` [PATCH 07/10] pnfs: recoalesce when ld write pagelist fails Jim Rees
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

pg_bsize is server->wsize/rsize by default. We would want to use the lseg
length.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
 1 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 2167ba2..f5a7fa6 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -923,14 +923,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
 	return 0;
 }
 
+static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
+			    struct nfs_page *req)
+{
+	pnfs_generic_pg_init_read(pgio, req);
+	if (pgio->pg_lseg)
+		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
+}
+
+static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
+			     struct nfs_page *req)
+{
+	pnfs_generic_pg_init_write(pgio, req);
+	if (pgio->pg_lseg)
+		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
+}
+
 static const struct nfs_pageio_ops bl_pg_read_ops = {
-	.pg_init = pnfs_generic_pg_init_read,
+	.pg_init = bl_pg_init_read,
 	.pg_test = pnfs_generic_pg_test,
 	.pg_doio = pnfs_generic_pg_readpages,
 };
 
 static const struct nfs_pageio_ops bl_pg_write_ops = {
-	.pg_init = pnfs_generic_pg_init_write,
+	.pg_init = bl_pg_init_write,
 	.pg_test = pnfs_generic_pg_test,
 	.pg_doio = pnfs_generic_pg_writepages,
 };
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 07/10] pnfs: recoalesce when ld write pagelist fails
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (5 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 06/10] pnfsblock: init pg_bsize properly Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 08/10] pnfs: recoalesce when ld read " Jim Rees
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

For pnfs pagelist write failure, we need to pg_recoalesce and resend IO to
mds.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/pnfs.c  |   20 +++++++-------------
 fs/nfs/pnfs.h  |    2 +-
 fs/nfs/write.c |   25 ++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6b19fff..a205c8e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 /*
  * Called by non rpc-based layout drivers
  */
-int
-pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_write_data *data)
 {
-	int status;
-
-	if (!data->pnfs_error) {
+	if (likely(!data->pnfs_error)) {
 		pnfs_set_layoutcommit(data);
 		data->mds_ops->rpc_call_done(&data->task, data);
-		data->mds_ops->rpc_release(data);
-		return 0;
+	} else {
+		put_lseg(data->lseg);
+		data->lseg = NULL;
+		dprintk("pnfs write error = %d\n", data->pnfs_error);
 	}
-
-	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
-		data->pnfs_error);
-	status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
-				    data->mds_ops, NFS_FILE_SYNC);
-	return status ? : -EAGAIN;
+	data->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94e760e..71c23d4 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -201,7 +201,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
-int pnfs_ld_write_done(struct nfs_write_data *);
+void pnfs_ld_write_done(struct nfs_write_data *);
 int pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c9bd2a6..62eb5c5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1165,7 +1165,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 static void nfs_writeback_release_full(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	int status = data->task.tk_status;
+	int ret, status = data->task.tk_status;
+	struct nfs_pageio_descriptor pgio;
+
+	if (data->pnfs_error) {
+		nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
+		pgio.pg_recoalesce = 1;
+	}
 
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
@@ -1181,6 +1187,11 @@ static void nfs_writeback_release_full(void *calldata)
 			req->wb_bytes,
 			(long long)req_offset(req));
 
+		if (data->pnfs_error) {
+			dprintk(", pnfs error = %d\n", data->pnfs_error);
+			goto next;
+		}
+
 		if (status < 0) {
 			nfs_set_pageerror(page);
 			nfs_context_set_write_error(req->wb_context, status);
@@ -1200,7 +1211,19 @@ remove_request:
 	next:
 		nfs_clear_page_tag_locked(req);
 		nfs_end_page_writeback(page);
+		if (data->pnfs_error) {
+			lock_page(page);
+			nfs_pageio_cond_complete(&pgio, page->index);
+			ret = nfs_page_async_flush(&pgio, page, 0);
+			if (ret) {
+				nfs_set_pageerror(page);
+				dprintk("rewrite to MDS error = %d\n", ret);
+			}
+			unlock_page(page);
+		}
 	}
+	if (data->pnfs_error)
+		nfs_pageio_complete(&pgio);
 	nfs_writedata_release(calldata);
 }
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 08/10] pnfs: recoalesce when ld read pagelist fails
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (6 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 07/10] pnfs: recoalesce when ld write pagelist fails Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 09/10] pnfsblock: fix NULL pointer dereference Jim Rees
  2011-09-23  1:50 ` [PATCH 10/10] pnfsblock: fix writeback deadlock Jim Rees
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

For pnfs pagelist read failure, we need to pg_recoalesce and resend IO to
mds.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/pnfs.c |   20 +++++++-------------
 fs/nfs/pnfs.h |    2 +-
 fs/nfs/read.c |   12 +++++++++++-
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a205c8e..ee73d9a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1262,23 +1262,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 /*
  * Called by non rpc-based layout drivers
  */
-int
-pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_read_data *data)
 {
-	int status;
-
-	if (!data->pnfs_error) {
+	if (likely(!data->pnfs_error)) {
 		__nfs4_read_done_cb(data);
 		data->mds_ops->rpc_call_done(&data->task, data);
-		data->mds_ops->rpc_release(data);
-		return 0;
+	} else {
+		put_lseg(data->lseg);
+		data->lseg = NULL;
+		dprintk("pnfs write error = %d\n", data->pnfs_error);
 	}
-
-	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
-		data->pnfs_error);
-	status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
-				   data->mds_ops);
-	return status ? : -EAGAIN;
+	data->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 71c23d4..1509530 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -202,7 +202,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_write_data *);
-int pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
 					       loff_t pos,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2171c04..bfc20b1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -541,13 +541,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 static void nfs_readpage_release_full(void *calldata)
 {
 	struct nfs_read_data *data = calldata;
+	struct nfs_pageio_descriptor pgio;
 
+	if (data->pnfs_error) {
+		nfs_pageio_init_read_mds(&pgio, data->inode);
+		pgio.pg_recoalesce = 1;
+	}
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 
 		nfs_list_remove_request(req);
-		nfs_readpage_release(req);
+		if (!data->pnfs_error)
+			nfs_readpage_release(req);
+		else
+			nfs_pageio_add_request(&pgio, req);
 	}
+	if (data->pnfs_error)
+		nfs_pageio_complete(&pgio);
 	nfs_readdata_release(calldata);
 }
 
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 09/10] pnfsblock: fix NULL pointer dereference
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (7 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 08/10] pnfs: recoalesce when ld read " Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  2011-09-23  1:50 ` [PATCH 10/10] pnfsblock: fix writeback deadlock Jim Rees
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

bl_add_page_to_bio returns error pointer. bio should be reset to
NULL in failure cases as the out path always calls bl_submit_bio.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/blocklayout.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f5a7fa6..cc63717 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -292,6 +292,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 						 bl_end_io_read, par);
 			if (IS_ERR(bio)) {
 				rdata->pnfs_error = PTR_ERR(bio);
+				bio = NULL;
 				goto out;
 			}
 		}
@@ -581,6 +582,7 @@ fill_invalid_ext:
 						 bl_end_io_write_zero, par);
 			if (IS_ERR(bio)) {
 				wdata->pnfs_error = PTR_ERR(bio);
+				bio = NULL;
 				goto out;
 			}
 			/* FIXME: This should be done in bi_end_io */
@@ -629,6 +631,7 @@ next_page:
 					 bl_end_io_write, par);
 		if (IS_ERR(bio)) {
 			wdata->pnfs_error = PTR_ERR(bio);
+			bio = NULL;
 			goto out;
 		}
 		isect += PAGE_CACHE_SECTORS;
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 10/10] pnfsblock: fix writeback deadlock
  2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
                   ` (8 preceding siblings ...)
  2011-09-23  1:50 ` [PATCH 09/10] pnfsblock: fix NULL pointer dereference Jim Rees
@ 2011-09-23  1:50 ` Jim Rees
  9 siblings, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23  1:50 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, peter honeyman

From: Peng Tao <bergwolf@gmail.com>

We should check if the sector is already initialized before
trying to grab the page from page cache. Otherwise when two
pages of the same block are written back by two threads each
calling from writepage_locked, it can cause deadlock like bellow.

 [ 1080.972099] INFO: task kswapd0:25 blocked for more than 120 seconds.
 [ 1080.972377] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 [ 1080.972812] kswapd0         D ffff88000c4926c0     0    25      2 0x00000000
 [ 1080.972816]  ffff88000df276b0 0000000000000046 ffff88000df27640 ffffffff81013ba7
 [ 1080.972821]  ffff88000c492310 ffff88000df27fd8 ffff88000df27fd8 00000000001d3440
 [ 1080.972824]  ffff88000c378000 ffff88000c492310 ffff8800175d3d40 ffff880017fc75a8
 [ 1080.972828] Call Trace:
 [ 1080.972860]  [<ffffffff81013ba7>] ? read_tsc+0x9/0x19
 [ 1080.972877]  [<ffffffff810e0b23>] ? lock_page+0x2b/0x2b
 [ 1080.972899]  [<ffffffff81475a1d>] io_schedule+0x63/0x7e
 [ 1080.972902]  [<ffffffff810e0b31>] sleep_on_page+0xe/0x12
 [ 1080.972905]  [<ffffffff81475fe8>] __wait_on_bit_lock+0x46/0x8f
 [ 1080.972916]  [<ffffffff810822d7>] ? lock_release_holdtime.part.7+0x6b/0x72
 [ 1080.972919]  [<ffffffff810e0af6>] __lock_page+0x66/0x68
 [ 1080.972928]  [<ffffffff81072705>] ? autoremove_wake_function+0x3d/0x3d
 [ 1080.972932]  [<ffffffff810e0b1f>] lock_page+0x27/0x2b
 [ 1080.972934]  [<ffffffff810e0bcf>] find_lock_page+0x34/0x57
 [ 1080.972937]  [<ffffffff810e1738>] find_or_create_page+0x34/0x8a
 [ 1080.972947]  [<ffffffffa034245b>] bl_write_pagelist+0x205/0x6da [blocklayoutdriver]
 [ 1080.972951]  [<ffffffffa034145d>] ? bl_free_lseg+0x38/0x38 [blocklayoutdriver]
 [ 1080.972995]  [<ffffffffa02e27b9>] ? nfs_write_rpcsetup+0x118/0x123 [nfs]
 [ 1080.973033]  [<ffffffffa030246b>] pnfs_generic_pg_writepages+0x10b/0x1f4 [nfs]
 [ 1080.973089]  [<ffffffffa02deaae>] nfs_pageio_doio+0x1a/0x43 [nfs]
 [ 1080.973098]  [<ffffffffa02df035>] nfs_pageio_complete+0x16/0x2d [nfs]
 [ 1080.973108]  [<ffffffffa02e2d8f>] nfs_writepage_locked+0xa0/0xbf [nfs]
 [ 1080.973119]  [<ffffffffa02e36a1>] nfs_writepage+0x16/0x2b [nfs]
 [ 1080.973122]  [<ffffffff810e8762>] ? clear_page_dirty_for_io+0x87/0x9a
 [ 1080.973133]  [<ffffffff810efc5b>] shrink_page_list+0x39b/0x6c8
 [ 1080.973139]  [<ffffffff810f03bb>] shrink_inactive_list+0x22c/0x39e
 [ 1080.973144]  [<ffffffff810822d7>] ? lock_release_holdtime.part.7+0x6b/0x72
 [ 1080.973148]  [<ffffffff810f0c33>] shrink_zone+0x445/0x588
 [ 1080.973152]  [<ffffffff810f1a11>] balance_pgdat+0x2c2/0x56b
 [ 1080.973170]  [<ffffffff81254208>] ? __bitmap_weight+0x34/0x80
 [ 1080.973175]  [<ffffffff810f1f78>] kswapd+0x2be/0x2fa
 [ 1080.973179]  [<ffffffff810726c8>] ? __init_waitqueue_head+0x4b/0x4b
 [ 1080.973183]  [<ffffffff810f1cba>] ? balance_pgdat+0x56b/0x56b
 [ 1080.973187]  [<ffffffff81071f69>] kthread+0xa8/0xb0
 [ 1080.973200]  [<ffffffff814806b4>] kernel_thread_helper+0x4/0x10
 [ 1080.973205]  [<ffffffff81071ec1>] ? __init_kthread_worker+0x5a/0x5a
 [ 1080.973210]  [<ffffffff814806b0>] ? gs_change+0x13/0x13
 [ 1080.973213] no locks held by kswapd0/25.

Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Jim Rees <rees@umich.edu>
---
 fs/nfs/blocklayout/blocklayout.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index cc63717..c7582e4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -533,6 +533,11 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 fill_invalid_ext:
 		dprintk("%s need to zero %d pages\n", __func__, npg_zero);
 		for (;npg_zero > 0; npg_zero--) {
+			if (bl_is_sector_init(be->be_inval, isect)) {
+				dprintk("isect %llu already init\n",
+					(unsigned long long)isect);
+				goto next_page;
+			}
 			/* page ref released in bl_end_io_write_zero */
 			index = isect >> PAGE_CACHE_SECTOR_SHIFT;
 			dprintk("%s zero %dth page: index %lu isect %llu\n",
@@ -552,8 +557,7 @@ fill_invalid_ext:
 			 * PageUptodate: It was read before
 			 * sector_initialized: already written out
 			 */
-			if (PageDirty(page) || PageWriteback(page) ||
-			    bl_is_sector_init(be->be_inval, isect)) {
+			if (PageDirty(page) || PageWriteback(page)) {
 				print_page(page);
 				unlock_page(page);
 				page_cache_release(page);
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 06/10] pnfsblock: init pg_bsize properly
  2011-09-23  1:50 ` [PATCH 06/10] pnfsblock: init pg_bsize properly Jim Rees
@ 2011-09-23  5:21   ` Boaz Harrosh
  2011-09-23  5:55     ` tao.peng
  2011-09-23 12:22     ` Jim Rees
  0 siblings, 2 replies; 15+ messages in thread
From: Boaz Harrosh @ 2011-09-23  5:21 UTC (permalink / raw)
  To: Jim Rees; +Cc: Trond Myklebust, linux-nfs, peter honeyman

On 09/23/2011 04:50 AM, Jim Rees wrote:
> From: Peng Tao <bergwolf@gmail.com>
> 
> pg_bsize is server->wsize/rsize by default. We would want to use the lseg
> length.
> 
> Signed-off-by: Peng Tao <peng_tao@emc.com>
> Signed-off-by: Jim Rees <rees@umich.edu>

If you want to get lazy about this patch and take the easy way out.
The least you can do is supply the same fix to that other place
that has the same bug.

This is not nice. You have identified a deficiency in the generic
layer, You know that objects would have the same bug, (because I told you)
and you just don't care. I have spent plenty of times slaving over
blocks code when changing or fixing generic layer. (And Benny even more
then me)

And when you will actually send a patch that does exactly the same in
two places, which access only generic members, you might see that it
might be better to fix it in a single place at the generic layer.

NACK. I'm ok with getting lazy like below, but only if you also
add the same code to objio_osd.c

Sigh
Boaz

> ---
>  fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
>  1 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index 2167ba2..f5a7fa6 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -923,14 +923,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
>  	return 0;
>  }
>  
> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
> +			    struct nfs_page *req)
> +{
> +	pnfs_generic_pg_init_read(pgio, req);
> +	if (pgio->pg_lseg)
> +		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
> +}
> +
> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
> +			     struct nfs_page *req)
> +{
> +	pnfs_generic_pg_init_write(pgio, req);
> +	if (pgio->pg_lseg)
> +		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
> +}
> +
>  static const struct nfs_pageio_ops bl_pg_read_ops = {
> -	.pg_init = pnfs_generic_pg_init_read,
> +	.pg_init = bl_pg_init_read,
>  	.pg_test = pnfs_generic_pg_test,
>  	.pg_doio = pnfs_generic_pg_readpages,
>  };
>  
>  static const struct nfs_pageio_ops bl_pg_write_ops = {
> -	.pg_init = pnfs_generic_pg_init_write,
> +	.pg_init = bl_pg_init_write,
>  	.pg_test = pnfs_generic_pg_test,
>  	.pg_doio = pnfs_generic_pg_writepages,
>  };


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH 06/10] pnfsblock: init pg_bsize properly
  2011-09-23  5:21   ` Boaz Harrosh
@ 2011-09-23  5:55     ` tao.peng
  2011-09-23  8:55       ` Boaz Harrosh
  2011-09-23 12:22     ` Jim Rees
  1 sibling, 1 reply; 15+ messages in thread
From: tao.peng @ 2011-09-23  5:55 UTC (permalink / raw)
  To: bharrosh, rees; +Cc: Trond.Myklebust, linux-nfs, honey

PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBsaW51eC1uZnMtb3duZXJAdmdl
ci5rZXJuZWwub3JnIFttYWlsdG86bGludXgtbmZzLW93bmVyQHZnZXIua2VybmVsLm9yZ10NCj4g
T24gQmVoYWxmIE9mIEJvYXogSGFycm9zaA0KPiBTZW50OiBGcmlkYXksIFNlcHRlbWJlciAyMywg
MjAxMSAxOjIyIFBNDQo+IFRvOiBKaW0gUmVlcw0KPiBDYzogVHJvbmQgTXlrbGVidXN0OyBsaW51
eC1uZnNAdmdlci5rZXJuZWwub3JnOyBwZXRlciBob25leW1hbg0KPiBTdWJqZWN0OiBSZTogW1BB
VENIIDA2LzEwXSBwbmZzYmxvY2s6IGluaXQgcGdfYnNpemUgcHJvcGVybHkNCj4gDQo+IE9uIDA5
LzIzLzIwMTEgMDQ6NTAgQU0sIEppbSBSZWVzIHdyb3RlOg0KPiA+IEZyb206IFBlbmcgVGFvIDxi
ZXJnd29sZkBnbWFpbC5jb20+DQo+ID4NCj4gPiBwZ19ic2l6ZSBpcyBzZXJ2ZXItPndzaXplL3Jz
aXplIGJ5IGRlZmF1bHQuIFdlIHdvdWxkIHdhbnQgdG8gdXNlIHRoZSBsc2VnDQo+ID4gbGVuZ3Ro
Lg0KPiA+DQo+ID4gU2lnbmVkLW9mZi1ieTogUGVuZyBUYW8gPHBlbmdfdGFvQGVtYy5jb20+DQo+
ID4gU2lnbmVkLW9mZi1ieTogSmltIFJlZXMgPHJlZXNAdW1pY2guZWR1Pg0KPiANCj4gSWYgeW91
IHdhbnQgdG8gZ2V0IGxhenkgYWJvdXQgdGhpcyBwYXRjaCBhbmQgdGFrZSB0aGUgZWFzeSB3YXkg
b3V0Lg0KPiBUaGUgbGVhc3QgeW91IGNhbiBkbyBpcyBzdXBwbHkgdGhlIHNhbWUgZml4IHRvIHRo
YXQgb3RoZXIgcGxhY2UNCj4gdGhhdCBoYXMgdGhlIHNhbWUgYnVnLg0KSWYgSSByZW1lbWJlciBj
b3JyZWN0bHksIHRoaXMgcGF0Y2ggc2hvdWxkIGJlIGRyb3BwZWQgYW5kIEppbSBpcyB3b3JraW5n
IG9uIGEgcmVwbGFjZW1lbnQsIG5vPw0KaHR0cDovL3d3dy5zcGluaWNzLm5ldC9saXN0cy9saW51
eC1uZnMvbXNnMjM1OTYuaHRtbA0KDQoNCkNoZWVycywNClRhbw0KDQoNCj4gDQo+IFRoaXMgaXMg
bm90IG5pY2UuIFlvdSBoYXZlIGlkZW50aWZpZWQgYSBkZWZpY2llbmN5IGluIHRoZSBnZW5lcmlj
DQo+IGxheWVyLCBZb3Uga25vdyB0aGF0IG9iamVjdHMgd291bGQgaGF2ZSB0aGUgc2FtZSBidWcs
IChiZWNhdXNlIEkgdG9sZCB5b3UpDQo+IGFuZCB5b3UganVzdCBkb24ndCBjYXJlLiBJIGhhdmUg
c3BlbnQgcGxlbnR5IG9mIHRpbWVzIHNsYXZpbmcgb3Zlcg0KPiBibG9ja3MgY29kZSB3aGVuIGNo
YW5naW5nIG9yIGZpeGluZyBnZW5lcmljIGxheWVyLiAoQW5kIEJlbm55IGV2ZW4gbW9yZQ0KPiB0
aGVuIG1lKQ0KPiANCj4gQW5kIHdoZW4geW91IHdpbGwgYWN0dWFsbHkgc2VuZCBhIHBhdGNoIHRo
YXQgZG9lcyBleGFjdGx5IHRoZSBzYW1lIGluDQo+IHR3byBwbGFjZXMsIHdoaWNoIGFjY2VzcyBv
bmx5IGdlbmVyaWMgbWVtYmVycywgeW91IG1pZ2h0IHNlZSB0aGF0IGl0DQo+IG1pZ2h0IGJlIGJl
dHRlciB0byBmaXggaXQgaW4gYSBzaW5nbGUgcGxhY2UgYXQgdGhlIGdlbmVyaWMgbGF5ZXIuDQo+
IA0KPiBOQUNLLiBJJ20gb2sgd2l0aCBnZXR0aW5nIGxhenkgbGlrZSBiZWxvdywgYnV0IG9ubHkg
aWYgeW91IGFsc28NCj4gYWRkIHRoZSBzYW1lIGNvZGUgdG8gb2JqaW9fb3NkLmMNCj4gDQo+IFNp
Z2gNCj4gQm9heg0KPiANCj4gPiAtLS0NCj4gPiAgZnMvbmZzL2Jsb2NrbGF5b3V0L2Jsb2NrbGF5
b3V0LmMgfCAgIDIwICsrKysrKysrKysrKysrKysrKy0tDQo+ID4gIDEgZmlsZXMgY2hhbmdlZCwg
MTggaW5zZXJ0aW9ucygrKSwgMiBkZWxldGlvbnMoLSkNCj4gPg0KPiA+IGRpZmYgLS1naXQgYS9m
cy9uZnMvYmxvY2tsYXlvdXQvYmxvY2tsYXlvdXQuYyBiL2ZzL25mcy9ibG9ja2xheW91dC9ibG9j
a2xheW91dC5jDQo+ID4gaW5kZXggMjE2N2JhMi4uZjVhN2ZhNiAxMDA2NDQNCj4gPiAtLS0gYS9m
cy9uZnMvYmxvY2tsYXlvdXQvYmxvY2tsYXlvdXQuYw0KPiA+ICsrKyBiL2ZzL25mcy9ibG9ja2xh
eW91dC9ibG9ja2xheW91dC5jDQo+ID4gQEAgLTkyMywxNCArOTIzLDMwIEBAIGJsX2NsZWFyX2xh
eW91dGRyaXZlcihzdHJ1Y3QgbmZzX3NlcnZlciAqc2VydmVyKQ0KPiA+ICAJcmV0dXJuIDA7DQo+
ID4gIH0NCj4gPg0KPiA+ICtzdGF0aWMgdm9pZCBibF9wZ19pbml0X3JlYWQoc3RydWN0IG5mc19w
YWdlaW9fZGVzY3JpcHRvciAqcGdpbywNCj4gPiArCQkJICAgIHN0cnVjdCBuZnNfcGFnZSAqcmVx
KQ0KPiA+ICt7DQo+ID4gKwlwbmZzX2dlbmVyaWNfcGdfaW5pdF9yZWFkKHBnaW8sIHJlcSk7DQo+
ID4gKwlpZiAocGdpby0+cGdfbHNlZykNCj4gPiArCQlwZ2lvLT5wZ19ic2l6ZSA9IHBnaW8tPnBn
X2xzZWctPnBsc19yYW5nZS5sZW5ndGg7DQo+ID4gK30NCj4gPiArDQo+ID4gK3N0YXRpYyB2b2lk
IGJsX3BnX2luaXRfd3JpdGUoc3RydWN0IG5mc19wYWdlaW9fZGVzY3JpcHRvciAqcGdpbywNCj4g
PiArCQkJICAgICBzdHJ1Y3QgbmZzX3BhZ2UgKnJlcSkNCj4gPiArew0KPiA+ICsJcG5mc19nZW5l
cmljX3BnX2luaXRfd3JpdGUocGdpbywgcmVxKTsNCj4gPiArCWlmIChwZ2lvLT5wZ19sc2VnKQ0K
PiA+ICsJCXBnaW8tPnBnX2JzaXplID0gcGdpby0+cGdfbHNlZy0+cGxzX3JhbmdlLmxlbmd0aDsN
Cj4gPiArfQ0KPiA+ICsNCj4gPiAgc3RhdGljIGNvbnN0IHN0cnVjdCBuZnNfcGFnZWlvX29wcyBi
bF9wZ19yZWFkX29wcyA9IHsNCj4gPiAtCS5wZ19pbml0ID0gcG5mc19nZW5lcmljX3BnX2luaXRf
cmVhZCwNCj4gPiArCS5wZ19pbml0ID0gYmxfcGdfaW5pdF9yZWFkLA0KPiA+ICAJLnBnX3Rlc3Qg
PSBwbmZzX2dlbmVyaWNfcGdfdGVzdCwNCj4gPiAgCS5wZ19kb2lvID0gcG5mc19nZW5lcmljX3Bn
X3JlYWRwYWdlcywNCj4gPiAgfTsNCj4gPg0KPiA+ICBzdGF0aWMgY29uc3Qgc3RydWN0IG5mc19w
YWdlaW9fb3BzIGJsX3BnX3dyaXRlX29wcyA9IHsNCj4gPiAtCS5wZ19pbml0ID0gcG5mc19nZW5l
cmljX3BnX2luaXRfd3JpdGUsDQo+ID4gKwkucGdfaW5pdCA9IGJsX3BnX2luaXRfd3JpdGUsDQo+
ID4gIAkucGdfdGVzdCA9IHBuZnNfZ2VuZXJpY19wZ190ZXN0LA0KPiA+ICAJLnBnX2RvaW8gPSBw
bmZzX2dlbmVyaWNfcGdfd3JpdGVwYWdlcywNCj4gPiAgfTsNCj4gDQo+IC0tDQo+IFRvIHVuc3Vi
c2NyaWJlIGZyb20gdGhpcyBsaXN0OiBzZW5kIHRoZSBsaW5lICJ1bnN1YnNjcmliZSBsaW51eC1u
ZnMiIGluDQo+IHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0byBtYWpvcmRvbW9Admdlci5rZXJuZWwu
b3JnDQo+IE1vcmUgbWFqb3Jkb21vIGluZm8gYXQgIGh0dHA6Ly92Z2VyLmtlcm5lbC5vcmcvbWFq
b3Jkb21vLWluZm8uaHRtbA0KDQoT77+977+97Lm7HO+/vSbvv71+77+9Ju+/vRjvv73vv70rLe+/
ve+/vd22F++/ve+/vXfvv73vv73Lm++/ve+/ve+/vW3vv71i77+977+9Z37Ip++/vRfvv73vv73c
qH3vv73vv73vv73GoHrvv70majordu+/ve+/ve+/vQfvv73vv73vv73vv716Wivvv73vv70rembv
v73vv73vv71o77+977+977+9fu+/ve+/ve+/ve+/vWnvv73vv73vv71677+9Hu+/vXfvv73vv73v
v70/77+977+977+977+9Ju+/vSnfohtm

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 06/10] pnfsblock: init pg_bsize properly
  2011-09-23  5:55     ` tao.peng
@ 2011-09-23  8:55       ` Boaz Harrosh
  0 siblings, 0 replies; 15+ messages in thread
From: Boaz Harrosh @ 2011-09-23  8:55 UTC (permalink / raw)
  To: tao.peng; +Cc: rees, Trond.Myklebust, linux-nfs, honey

On 09/23/2011 08:55 AM, tao.peng@emc.com wrote:
>>
>> If you want to get lazy about this patch and take the easy way out.
>> The least you can do is supply the same fix to that other place
>> that has the same bug.
> If I remember correctly, this patch should be dropped and Jim is working on a replacement, no?
> http://www.spinics.net/lists/linux-nfs/msg23596.html
> 

Thanks, Yes I thought so too, but it keeps coming up. It's already in Benny's
tree. At least for now it should be patching both places, til a better one
replaces it.

Boaz

> Cheers,
> Tao

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 06/10] pnfsblock: init pg_bsize properly
  2011-09-23  5:21   ` Boaz Harrosh
  2011-09-23  5:55     ` tao.peng
@ 2011-09-23 12:22     ` Jim Rees
  1 sibling, 0 replies; 15+ messages in thread
From: Jim Rees @ 2011-09-23 12:22 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Trond Myklebust, linux-nfs, peter honeyman

Boaz Harrosh wrote:

  On 09/23/2011 04:50 AM, Jim Rees wrote:
  > From: Peng Tao <bergwolf@gmail.com>
  > 
  > pg_bsize is server->wsize/rsize by default. We would want to use the lseg
  > length.
  > 
  > Signed-off-by: Peng Tao <peng_tao@emc.com>
  > Signed-off-by: Jim Rees <rees@umich.edu>
  
  If you want to get lazy about this patch and take the easy way out.
  The least you can do is supply the same fix to that other place
  that has the same bug.
  
  This is not nice. You have identified a deficiency in the generic
  layer, You know that objects would have the same bug, (because I told you)
  and you just don't care. I have spent plenty of times slaving over
  blocks code when changing or fixing generic layer. (And Benny even more
  then me)
  
  And when you will actually send a patch that does exactly the same in
  two places, which access only generic members, you might see that it
  might be better to fix it in a single place at the generic layer.
  
  NACK. I'm ok with getting lazy like below, but only if you also
  add the same code to objio_osd.c

Sorry, I sent the wrong patch.  Trond, please drop this one.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-09-23 12:22 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-23  1:50 [PATCH 00/10] pnfsblock fixes for 3.2 Jim Rees
2011-09-23  1:50 ` [PATCH 01/10] pnfsblock: fix return code confusion Jim Rees
2011-09-23  1:50 ` [PATCH 02/10] pnfsblock: fix size of upcall message Jim Rees
2011-09-23  1:50 ` [PATCH 03/10] SUNRPC/NFS: make rpc pipe upcall generic Jim Rees
2011-09-23  1:50 ` [PATCH 04/10] pnfsblock: add missing rpc_put_mount and path_put Jim Rees
2011-09-23  1:50 ` [PATCH 05/10] pnfs: make _set_lo_fail generic Jim Rees
2011-09-23  1:50 ` [PATCH 06/10] pnfsblock: init pg_bsize properly Jim Rees
2011-09-23  5:21   ` Boaz Harrosh
2011-09-23  5:55     ` tao.peng
2011-09-23  8:55       ` Boaz Harrosh
2011-09-23 12:22     ` Jim Rees
2011-09-23  1:50 ` [PATCH 07/10] pnfs: recoalesce when ld write pagelist fails Jim Rees
2011-09-23  1:50 ` [PATCH 08/10] pnfs: recoalesce when ld read " Jim Rees
2011-09-23  1:50 ` [PATCH 09/10] pnfsblock: fix NULL pointer dereference Jim Rees
2011-09-23  1:50 ` [PATCH 10/10] pnfsblock: fix writeback deadlock Jim Rees

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.