All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr
@ 2012-05-23  9:02 andros
  2012-05-23  9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: andros @ 2012-05-23  9:02 UTC (permalink / raw)
  To: trond.myklebust; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

We only support one layout type per file system, so one threshold_item4 per
mdsthreshold4.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/nfs4xdr.c        |  125 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs4.h    |    7 +++
 include/linux/nfs_xdr.h |   10 ++++
 3 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db040e9..db199f8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -99,9 +99,12 @@ static int nfs4_stat_to_errno(int);
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
 #define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
 #define nfs4_group_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We support only one layout type per file system */
+#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-				3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+				3 + 3 + 3 + nfs4_owner_maxsz + \
+				nfs4_group_maxsz + decode_mdsthreshold_maxsz))
 #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
 				nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -1170,6 +1173,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
 			   bitmask[1] & nfs4_fattr_bitmap[1], hdr);
 }
 
+static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+				 struct compound_hdr *hdr)
+{
+	encode_getattr_three(xdr,
+			     bitmask[0] & nfs4_fattr_bitmap[0],
+			     bitmask[1] & nfs4_fattr_bitmap[1],
+			     bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+			     hdr);
+}
+
 static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
 {
 	encode_getattr_three(xdr,
@@ -2161,7 +2174,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_open(xdr, args, &hdr);
 	encode_getfh(xdr, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_getfattr_open(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -4183,6 +4196,110 @@ xdr_error:
 	return status;
 }
 
+static int decode_threshold_hint(struct xdr_stream *xdr,
+				  uint32_t *bitmap,
+				  uint64_t *res,
+				  uint32_t hint_bit)
+{
+	__be32 *p;
+
+	*res = 0;
+	if (likely(bitmap[0] & hint_bit)) {
+		p = xdr_inline_decode(xdr, 8);
+		if (unlikely(!p))
+			goto out_overflow;
+		xdr_decode_hyper(p, res);
+	}
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int decode_first_threshold_item4(struct xdr_stream *xdr,
+					struct nfs4_threshold *res)
+{
+	__be32 *p, *savep;
+	uint32_t bitmap[3] = {0,}, attrlen;
+	int status;
+
+	/* layout type */
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p)) {
+		print_overflow_msg(__func__, xdr);
+		return -EIO;
+	}
+	res->l_type = be32_to_cpup(p);
+
+	/* thi_hintset bitmap */
+	status = decode_attr_bitmap(xdr, bitmap);
+	if (status < 0)
+		goto xdr_error;
+
+	/* thi_hintlist length */
+	status = decode_attr_length(xdr, &attrlen, &savep);
+	if (status < 0)
+		goto xdr_error;
+	/* thi_hintlist */
+	status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
+	if (status < 0)
+		goto xdr_error;
+	status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
+	if (status < 0)
+		goto xdr_error;
+	status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
+				       THRESHOLD_RD_IO);
+	if (status < 0)
+		goto xdr_error;
+	status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
+				       THRESHOLD_WR_IO);
+	if (status < 0)
+		goto xdr_error;
+
+	status = verify_attr_len(xdr, savep, attrlen);
+	res->bm = bitmap[0];
+
+	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+		 __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
+		res->wr_io_sz);
+xdr_error:
+	dprintk("%s ret=%d!\n", __func__, status);
+	return status;
+}
+
+/*
+ * Thresholds on pNFS direct I/O vrs MDS I/O
+ */
+static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
+				    uint32_t *bitmap,
+				    struct nfs4_threshold *res)
+{
+	__be32 *p;
+	int status = 0;
+	uint32_t num;
+
+	if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
+		return -EIO;
+	if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
+		num = be32_to_cpup(p);
+		if (num == 0)
+			return 0;
+		if (num > 1)
+			printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
+				"drivers per filesystem not supported\n",
+				__func__);
+
+		status = decode_first_threshold_item4(xdr, res);
+	}
+	return status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		struct nfs_fattr *fattr, struct nfs_fh *fh,
 		struct nfs4_fs_locations *fs_loc,
@@ -4289,6 +4406,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
+	status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
+	if (status < 0)
+		goto xdr_error;
+
 xdr_error:
 	dprintk("%s: xdr returned %d\n", __func__, -status);
 	return status;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 0987146..72b6bad 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -526,6 +526,13 @@ enum lock_type4 {
 #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+#define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
+
+/* MDS threshold bitmap bits */
+#define THRESHOLD_RD                    (1UL << 0)
+#define THRESHOLD_WR                    (1UL << 1)
+#define THRESHOLD_RD_IO                 (1UL << 2)
+#define THRESHOLD_WR_IO                 (1UL << 3)
 
 #define NFSPROC4_NULL 0
 #define NFSPROC4_COMPOUND 1
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2e53a3f..5b8e42e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
 	return a->major == b->major && a->minor == b->minor;
 }
 
+struct nfs4_threshold {
+	__u32	bm;
+	__u32	l_type;
+	__u64	rd_sz;
+	__u64	wr_sz;
+	__u64	rd_io_sz;
+	__u64	wr_io_sz;
+};
+
 struct nfs_fattr {
 	unsigned int		valid;		/* which fields are valid */
 	umode_t			mode;
@@ -67,6 +76,7 @@ struct nfs_fattr {
 	unsigned long		gencount;
 	struct nfs4_string	*owner_name;
 	struct nfs4_string	*group_name;
+	struct nfs4_threshold	*mdsthreshold;	/* pNFS threshold hints */
 };
 
 #define NFS_ATTR_FATTR_TYPE		(1U << 0)
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN
  2012-05-23  9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
@ 2012-05-23  9:02 ` andros
  2012-05-23  9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
  2012-05-23  9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
  2 siblings, 0 replies; 8+ messages in thread
From: andros @ 2012-05-23  9:02 UTC (permalink / raw)
  To: trond.myklebust; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/inode.c         |    2 ++
 fs/nfs/nfs4proc.c      |   38 +++++++++++++++++++++++++++++++++-----
 fs/nfs/pnfs.c          |   12 ++++++++++++
 fs/nfs/pnfs.h          |   21 +++++++++++++++++++++
 include/linux/nfs_fs.h |    1 +
 5 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9ad81ce..889f7e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -641,6 +641,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
 	nfs_init_lock_context(&ctx->lock_context);
 	ctx->lock_context.open_context = ctx;
 	INIT_LIST_HEAD(&ctx->list);
+	ctx->mdsthreshold = NULL;
 	return ctx;
 }
 
@@ -669,6 +670,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 		put_rpccred(ctx->cred);
 	dput(ctx->dentry);
 	nfs_sb_deactive(sb);
+	kfree(ctx->mdsthreshold);
 	kfree(ctx);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78784e5..d84c633 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1781,7 +1781,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir,
+			struct dentry *dentry,
+			fmode_t fmode,
+			int flags,
+			struct iattr *sattr,
+			struct rpc_cred *cred,
+			struct nfs4_state **res,
+			struct nfs4_threshold **ctx_th)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
@@ -1806,6 +1813,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
 	if (opendata == NULL)
 		goto err_put_state_owner;
 
+	if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+		opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+		if (!opendata->f_attr.mdsthreshold)
+			goto err_opendata_put;
+	}
 	if (dentry->d_inode != NULL)
 		opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
 
@@ -1831,11 +1843,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
 			nfs_setattr_update_inode(state->inode, sattr);
 		nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
 	}
+
+	if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+		*ctx_th = opendata->f_attr.mdsthreshold;
+	else
+		kfree(opendata->f_attr.mdsthreshold);
+	opendata->f_attr.mdsthreshold = NULL;
+
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	*res = state;
 	return 0;
 err_opendata_put:
+	kfree(opendata->f_attr.mdsthreshold);
 	nfs4_opendata_put(opendata);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
@@ -1845,14 +1865,21 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir,
+					struct dentry *dentry,
+					fmode_t fmode,
+					int flags,
+					struct iattr *sattr,
+					struct rpc_cred *cred,
+					struct nfs4_threshold **ctx_th)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
+				       &res, ctx_th);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2176,7 +2203,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
 	struct nfs4_state *state;
 
 	/* Protect against concurrent sillydeletes */
-	state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
+	state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
+			     ctx->cred, &ctx->mdsthreshold);
 	if (IS_ERR(state))
 		return ERR_CAST(state);
 	ctx->state = state;
@@ -2778,7 +2806,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		fmode = ctx->mode;
 	}
 	sattr->ia_mode &= ~current_umask();
-	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
+	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5d09a36..cbcb6ae 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1630,3 +1630,15 @@ out_free:
 	kfree(data);
 	goto out;
 }
+
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+	struct nfs4_threshold *thp;
+
+	thp = kzalloc(sizeof(*thp), GFP_NOFS);
+	if (!thp) {
+		dprintk("%s mdsthreshold allocation failed\n", __func__);
+		return NULL;
+	}
+	return thp;
+}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 7980756..29fd23c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -227,6 +227,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
 			const struct nfs_pgio_completion_ops *compl_ops);
 int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
 			const struct nfs_pgio_completion_ops *compl_ops);
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 
 /* nfs4_deviceid_flags */
 enum {
@@ -360,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+		   struct nfs_server *nfss)
+{
+	return (dst && src && src->bm != 0 &&
+					nfss->pnfs_curr_ld->id == src->l_type);
+}
+
 #ifdef NFS_DEBUG
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 #else
@@ -485,6 +494,18 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 	return 0;
 }
 
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+		   struct nfs_server *nfss)
+{
+	return false;
+}
+
+static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6cc7dba..ca4a707 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -102,6 +102,7 @@ struct nfs_open_context {
 	int error;
 
 	struct list_head list;
+	struct nfs4_threshold	*mdsthreshold;
 };
 
 struct nfs_open_dir_context {
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
  2012-05-23  9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
  2012-05-23  9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
@ 2012-05-23  9:02 ` andros
  2012-05-23 18:19   ` Myklebust, Trond
  2012-05-23  9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
  2 siblings, 1 reply; 8+ messages in thread
From: andros @ 2012-05-23  9:02 UTC (permalink / raw)
  To: trond.myklebust; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Keep track of the number of bytes read or written, including those queued
up to be flushed. For use by mdsthreshold i/o size hints.

No locking needed as this is used as hint information.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/file.c          |    8 ++++++--
 fs/nfs/inode.c         |    2 ++
 fs/nfs/pnfs.c          |    3 +++
 include/linux/nfs_fs.h |    3 +++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eda8a6..c4cc096 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -203,8 +203,10 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
 		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
-		if (result > 0)
+		if (result > 0) {
+			NFS_I(inode)->read_io += result;
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
+		}
 	}
 	return result;
 }
@@ -613,8 +615,10 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 		if (err < 0)
 			result = err;
 	}
-	if (result > 0)
+	if (result > 0) {
+		NFS_I(inode)->write_io += written;
 		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+	}
 out:
 	return result;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 889f7e5..a6f5fbb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		inode->i_gid = -2;
 		inode->i_blocks = 0;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+		nfsi->write_io = 0;
+		nfsi->read_io = 0;
 
 		nfsi->read_cache_jiffies = fattr->time_start;
 		nfsi->attr_gencount = fattr->gencount;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cbcb6ae..6620606 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 	dprintk("%s:Begin lo %p\n", __func__, lo);
 
 	if (list_empty(&lo->plh_segs)) {
+		/* Reset MDS Threshold I/O counters */
+		NFS_I(lo->plh_inode)->write_io = 0;
+		NFS_I(lo->plh_inode)->read_io = 0;
 		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
 			put_layout_hdr_locked(lo);
 		return 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ca4a707..c6954ac 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -201,6 +201,9 @@ struct nfs_inode {
 
 	/* pNFS layout information */
 	struct pnfs_layout_hdr *layout;
+	/* how many bytes have been written/read and how many bytes queued up */
+	__u64 write_io;
+	__u64 read_io;
 #endif /* CONFIG_NFS_V4*/
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters
  2012-05-23  9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
  2012-05-23  9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
  2012-05-23  9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
@ 2012-05-23  9:02 ` andros
  2012-05-23 13:25   ` Boaz Harrosh
  2 siblings, 1 reply; 8+ messages in thread
From: andros @ 2012-05-23  9:02 UTC (permalink / raw)
  To: trond.myklebust; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/pnfs.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 79 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6620606..b8323aa 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
 }
 
 /*
+ * Use mdsthreshold hints set at each OPEN to determine if I/O should go
+ * to the MDS or over pNFS
+ *
+ * The nfs_inode read_io and write_io fields are cumulative counters reset
+ * when there are no layout segments. Note that in pnfs_update_layout iomode
+ * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
+ * WRITE request.
+ *
+ * A return of true means use MDS I/O.
+ *
+ * From rfc 5661:
+ * If a file's size is smaller than the file size threshold, data accesses
+ * SHOULD be sent to the metadata server.  If an I/O request has a length that
+ * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
+ * server.  If both file size and I/O size are provided, the client SHOULD
+ * reach or exceed  both thresholds before sending its read or write
+ * requests to the data server.
+ */
+static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
+				     struct inode *ino, int iomode)
+{
+	struct nfs4_threshold *t = ctx->mdsthreshold;
+	struct nfs_inode *nfsi = NFS_I(ino);
+	loff_t fsize = i_size_read(ino);
+	bool size = false, size_set = false, io = false, io_set = false, ret = false;
+
+	if (t == NULL)
+		return ret;
+
+	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
+
+	switch (iomode) {
+	case IOMODE_READ:
+		if (t->bm & THRESHOLD_RD) {
+			dprintk("%s fsize %llu\n", __func__, fsize);
+			size_set = true;
+			if (fsize < t->rd_sz)
+				size = true;
+		}
+		if (t->bm & THRESHOLD_RD_IO) {
+			dprintk("%s nfsi->read_io %llu\n", __func__,
+				nfsi->read_io);
+			io_set = true;
+			if (nfsi->read_io < t->rd_io_sz)
+				io = true;
+		}
+		break;
+	case IOMODE_RW:
+		if (t->bm & THRESHOLD_WR) {
+			dprintk("%s fsize %llu\n", __func__, fsize);
+			size_set = true;
+			if (fsize < t->wr_sz)
+				size = true;
+		}
+		if (t->bm & THRESHOLD_WR_IO) {
+			dprintk("%s nfsi->write_io %llu\n", __func__,
+				nfsi->write_io);
+			io_set = true;
+			if (nfsi->write_io < t->wr_io_sz)
+				io = true;
+		}
+		break;
+	}
+	if (size_set && io_set) {
+		if (size && io)
+			ret = true;
+	} else if (size || io)
+		ret = true;
+
+	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
+	return ret;
+}
+
+/*
  * Layout segment is retreived from the server if not cached.
  * The appropriate layout segment is referenced and returned to the caller.
  */
@@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
 
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		return NULL;
+
+	if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+		return NULL;
+
 	spin_lock(&ino->i_lock);
 	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
 	if (lo == NULL) {
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters
  2012-05-23  9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
@ 2012-05-23 13:25   ` Boaz Harrosh
  0 siblings, 0 replies; 8+ messages in thread
From: Boaz Harrosh @ 2012-05-23 13:25 UTC (permalink / raw)
  To: andros; +Cc: trond.myklebust, linux-nfs

On 05/23/2012 12:02 PM, andros@netapp.com wrote:

> From: Andy Adamson <andros@netapp.com>
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> ---
>  fs/nfs/pnfs.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 79 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 6620606..b8323aa 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
>  }
>  
>  /*
> + * Use mdsthreshold hints set at each OPEN to determine if I/O should go
> + * to the MDS or over pNFS
> + *
> + * The nfs_inode read_io and write_io fields are cumulative counters reset
> + * when there are no layout segments. Note that in pnfs_update_layout iomode
> + * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
> + * WRITE request.
> + *
> + * A return of true means use MDS I/O.
> + *
> + * From rfc 5661:
> + * If a file's size is smaller than the file size threshold, data accesses
> + * SHOULD be sent to the metadata server.  If an I/O request has a length that
> + * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
> + * server.  If both file size and I/O size are provided, the client SHOULD
> + * reach or exceed  both thresholds before sending its read or write
> + * requests to the data server.
> + */
> +static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
> +				     struct inode *ino, int iomode)
> +{
> +	struct nfs4_threshold *t = ctx->mdsthreshold;
> +	struct nfs_inode *nfsi = NFS_I(ino);
> +	loff_t fsize = i_size_read(ino);
> +	bool size = false, size_set = false, io = false, io_set = false, ret = false;
> +
> +	if (t == NULL)
> +		return ret;
> +
> +	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
> +		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
> +
> +	switch (iomode) {
> +	case IOMODE_READ:
> +		if (t->bm & THRESHOLD_RD) {
> +			dprintk("%s fsize %llu\n", __func__, fsize);
> +			size_set = true;
> +			if (fsize < t->rd_sz)
> +				size = true;
> +		}
> +		if (t->bm & THRESHOLD_RD_IO) {
> +			dprintk("%s nfsi->read_io %llu\n", __func__,
> +				nfsi->read_io);
> +			io_set = true;
> +			if (nfsi->read_io < t->rd_io_sz)
> +				io = true;
> +		}
> +		break;
> +	case IOMODE_RW:
> +		if (t->bm & THRESHOLD_WR) {
> +			dprintk("%s fsize %llu\n", __func__, fsize);
> +			size_set = true;
> +			if (fsize < t->wr_sz)
> +				size = true;
> +		}
> +		if (t->bm & THRESHOLD_WR_IO) {
> +			dprintk("%s nfsi->write_io %llu\n", __func__,
> +				nfsi->write_io);
> +			io_set = true;
> +			if (nfsi->write_io < t->wr_io_sz)
> +				io = true;
> +		}
> +		break;
> +	}
> +	if (size_set && io_set) {
> +		if (size && io)
> +			ret = true;
> +	} else if (size || io)
> +		ret = true;
> +
> +	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
> +	return ret;
> +}
> +
> +/*
>   * Layout segment is retreived from the server if not cached.
>   * The appropriate layout segment is referenced and returned to the caller.
>   */
> @@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
>  
>  	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
>  		return NULL;
> +
> +	if (pnfs_within_mdsthreshold(ctx, ino, iomode))
> +		return NULL;
> +


Would we want to use these counters as the recommended layout_size in
read and write, instead of current's PAGE_SIZE?

Boaz

>  	spin_lock(&ino->i_lock);
>  	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
>  	if (lo == NULL) {



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
  2012-05-23  9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
@ 2012-05-23 18:19   ` Myklebust, Trond
  2012-05-23 18:41     ` Adamson, Andy
  0 siblings, 1 reply; 8+ messages in thread
From: Myklebust, Trond @ 2012-05-23 18:19 UTC (permalink / raw)
  To: Adamson, Andy; +Cc: linux-nfs

T24gV2VkLCAyMDEyLTA1LTIzIGF0IDA1OjAyIC0wNDAwLCBhbmRyb3NAbmV0YXBwLmNvbSB3cm90
ZToNCj4gRnJvbTogQW5keSBBZGFtc29uIDxhbmRyb3NAbmV0YXBwLmNvbT4NCj4gDQo+IEtlZXAg
dHJhY2sgb2YgdGhlIG51bWJlciBvZiBieXRlcyByZWFkIG9yIHdyaXR0ZW4sIGluY2x1ZGluZyB0
aG9zZSBxdWV1ZWQNCj4gdXAgdG8gYmUgZmx1c2hlZC4gRm9yIHVzZSBieSBtZHN0aHJlc2hvbGQg
aS9vIHNpemUgaGludHMuDQo+IA0KPiBObyBsb2NraW5nIG5lZWRlZCBhcyB0aGlzIGlzIHVzZWQg
YXMgaGludCBpbmZvcm1hdGlvbi4NCj4gDQo+IFNpZ25lZC1vZmYtYnk6IEFuZHkgQWRhbXNvbiA8
YW5kcm9zQG5ldGFwcC5jb20+DQo+IC0tLQ0KPiAgZnMvbmZzL2ZpbGUuYyAgICAgICAgICB8ICAg
IDggKysrKysrLS0NCj4gIGZzL25mcy9pbm9kZS5jICAgICAgICAgfCAgICAyICsrDQo+ICBmcy9u
ZnMvcG5mcy5jICAgICAgICAgIHwgICAgMyArKysNCj4gIGluY2x1ZGUvbGludXgvbmZzX2ZzLmgg
fCAgICAzICsrKw0KPiAgNCBmaWxlcyBjaGFuZ2VkLCAxNCBpbnNlcnRpb25zKCspLCAyIGRlbGV0
aW9ucygtKQ0KPiANCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9maWxlLmMgYi9mcy9uZnMvZmlsZS5j
DQo+IGluZGV4IDhlZGE4YTYuLmM0Y2MwOTYgMTAwNjQ0DQo+IC0tLSBhL2ZzL25mcy9maWxlLmMN
Cj4gKysrIGIvZnMvbmZzL2ZpbGUuYw0KPiBAQCAtMjAzLDggKzIwMywxMCBAQCBuZnNfZmlsZV9y
ZWFkKHN0cnVjdCBraW9jYiAqaW9jYiwgY29uc3Qgc3RydWN0IGlvdmVjICppb3YsDQo+ICAJcmVz
dWx0ID0gbmZzX3JldmFsaWRhdGVfbWFwcGluZyhpbm9kZSwgaW9jYi0+a2lfZmlscC0+Zl9tYXBw
aW5nKTsNCj4gIAlpZiAoIXJlc3VsdCkgew0KPiAgCQlyZXN1bHQgPSBnZW5lcmljX2ZpbGVfYWlv
X3JlYWQoaW9jYiwgaW92LCBucl9zZWdzLCBwb3MpOw0KPiAtCQlpZiAocmVzdWx0ID4gMCkNCj4g
KwkJaWYgKHJlc3VsdCA+IDApIHsNCj4gKwkJCU5GU19JKGlub2RlKS0+cmVhZF9pbyArPSByZXN1
bHQ7DQoNClNob3VsZCB3ZSBwZXJoYXBzIHJhdGhlciBkbyB0aGlzIGZyb20gbmZzX3JlYWRwYWdl
cygpLCBuZnNfcmVhZHBhZ2UoKQ0KYW5kIG5mc19kaXJlY3RfcmVhZCgpPw0KDQpJZiB3ZSBkbyBp
dCBoZXJlIGluIG5mc19maWxlX3JlYWQsIHdlIG1pc3MgbW1hcGVkIHJlYWRzLCBPX0RJUkVDVCBy
ZWFkcywNCmFzIHdlbGwgYXMgc3BsaWNlIHJlYWRzLiBXZSBhbHNvIGNvdW50IHJlYWQgY2FjaGUg
aGl0cyB3aGVyZSB3ZSBkb24ndA0KaGF2ZSB0byBhY3R1YWxseSBhY2Nlc3MgdGhlIHNlcnZlci4N
Cg0KPiAgCQkJbmZzX2FkZF9zdGF0cyhpbm9kZSwgTkZTSU9TX05PUk1BTFJFQURCWVRFUywgcmVz
dWx0KTsNCj4gKwkJfQ0KPiAgCX0NCj4gIAlyZXR1cm4gcmVzdWx0Ow0KPiAgfQ0KPiBAQCAtNjEz
LDggKzYxNSwxMCBAQCBzdGF0aWMgc3NpemVfdCBuZnNfZmlsZV93cml0ZShzdHJ1Y3Qga2lvY2Ig
KmlvY2IsIGNvbnN0IHN0cnVjdCBpb3ZlYyAqaW92LA0KPiAgCQlpZiAoZXJyIDwgMCkNCj4gIAkJ
CXJlc3VsdCA9IGVycjsNCj4gIAl9DQo+IC0JaWYgKHJlc3VsdCA+IDApDQo+ICsJaWYgKHJlc3Vs
dCA+IDApIHsNCj4gKwkJTkZTX0koaW5vZGUpLT53cml0ZV9pbyArPSB3cml0dGVuOw0KDQpGb3Ig
dGhlIHNhbWUgcmVhc29uLCBwZXJoYXBzIHdlIHNob3VsZCBtb3ZlIHRoaXMgdG8NCm5mc19kaXJl
Y3Rfd3JpdGVfc2NoZWR1bGVfaW92ZWMoKSwgYW5kIG5mc193cml0ZV9lbmQoKS4NCg0KPiAgCQlu
ZnNfYWRkX3N0YXRzKGlub2RlLCBORlNJT1NfTk9STUFMV1JJVFRFTkJZVEVTLCB3cml0dGVuKTsN
Cj4gKwl9DQo+ICBvdXQ6DQo+ICAJcmV0dXJuIHJlc3VsdDsNCj4gIA0KPiBkaWZmIC0tZ2l0IGEv
ZnMvbmZzL2lub2RlLmMgYi9mcy9uZnMvaW5vZGUuYw0KPiBpbmRleCA4ODlmN2U1Li5hNmY1ZmJi
IDEwMDY0NA0KPiAtLS0gYS9mcy9uZnMvaW5vZGUuYw0KPiArKysgYi9mcy9uZnMvaW5vZGUuYw0K
PiBAQCAtMzIzLDYgKzMyMyw4IEBAIG5mc19maGdldChzdHJ1Y3Qgc3VwZXJfYmxvY2sgKnNiLCBz
dHJ1Y3QgbmZzX2ZoICpmaCwgc3RydWN0IG5mc19mYXR0ciAqZmF0dHIpDQo+ICAJCWlub2RlLT5p
X2dpZCA9IC0yOw0KPiAgCQlpbm9kZS0+aV9ibG9ja3MgPSAwOw0KPiAgCQltZW1zZXQobmZzaS0+
Y29va2lldmVyZiwgMCwgc2l6ZW9mKG5mc2ktPmNvb2tpZXZlcmYpKTsNCj4gKwkJbmZzaS0+d3Jp
dGVfaW8gPSAwOw0KPiArCQluZnNpLT5yZWFkX2lvID0gMDsNCj4gIA0KPiAgCQluZnNpLT5yZWFk
X2NhY2hlX2ppZmZpZXMgPSBmYXR0ci0+dGltZV9zdGFydDsNCj4gIAkJbmZzaS0+YXR0cl9nZW5j
b3VudCA9IGZhdHRyLT5nZW5jb3VudDsNCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9wbmZzLmMgYi9m
cy9uZnMvcG5mcy5jDQo+IGluZGV4IGNiY2I2YWUuLjY2MjA2MDYgMTAwNjQ0DQo+IC0tLSBhL2Zz
L25mcy9wbmZzLmMNCj4gKysrIGIvZnMvbmZzL3BuZnMuYw0KPiBAQCAtMzk1LDYgKzM5NSw5IEBA
IG1hcmtfbWF0Y2hpbmdfbHNlZ3NfaW52YWxpZChzdHJ1Y3QgcG5mc19sYXlvdXRfaGRyICpsbywN
Cj4gIAlkcHJpbnRrKCIlczpCZWdpbiBsbyAlcFxuIiwgX19mdW5jX18sIGxvKTsNCj4gIA0KPiAg
CWlmIChsaXN0X2VtcHR5KCZsby0+cGxoX3NlZ3MpKSB7DQo+ICsJCS8qIFJlc2V0IE1EUyBUaHJl
c2hvbGQgSS9PIGNvdW50ZXJzICovDQo+ICsJCU5GU19JKGxvLT5wbGhfaW5vZGUpLT53cml0ZV9p
byA9IDA7DQo+ICsJCU5GU19JKGxvLT5wbGhfaW5vZGUpLT5yZWFkX2lvID0gMDsNCj4gIAkJaWYg
KCF0ZXN0X2FuZF9zZXRfYml0KE5GU19MQVlPVVRfREVTVFJPWUVELCAmbG8tPnBsaF9mbGFncykp
DQo+ICAJCQlwdXRfbGF5b3V0X2hkcl9sb2NrZWQobG8pOw0KPiAgCQlyZXR1cm4gMDsNCj4gZGlm
ZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvbmZzX2ZzLmggYi9pbmNsdWRlL2xpbnV4L25mc19mcy5o
DQo+IGluZGV4IGNhNGE3MDcuLmM2OTU0YWMgMTAwNjQ0DQo+IC0tLSBhL2luY2x1ZGUvbGludXgv
bmZzX2ZzLmgNCj4gKysrIGIvaW5jbHVkZS9saW51eC9uZnNfZnMuaA0KPiBAQCAtMjAxLDYgKzIw
MSw5IEBAIHN0cnVjdCBuZnNfaW5vZGUgew0KPiAgDQo+ICAJLyogcE5GUyBsYXlvdXQgaW5mb3Jt
YXRpb24gKi8NCj4gIAlzdHJ1Y3QgcG5mc19sYXlvdXRfaGRyICpsYXlvdXQ7DQo+ICsJLyogaG93
IG1hbnkgYnl0ZXMgaGF2ZSBiZWVuIHdyaXR0ZW4vcmVhZCBhbmQgaG93IG1hbnkgYnl0ZXMgcXVl
dWVkIHVwICovDQo+ICsJX191NjQgd3JpdGVfaW87DQo+ICsJX191NjQgcmVhZF9pbzsNCj4gICNl
bmRpZiAvKiBDT05GSUdfTkZTX1Y0Ki8NCg0KXl5eXiBUaGlzIGRvZXNuJ3QgbG9vayBhcyBpZiBp
dCB3aWxsIGNvbXBpbGUgd2l0aG91dCBDT05GSUdfTkZTX1Y0Lg0KDQo+ICAjaWZkZWYgQ09ORklH
X05GU19GU0NBQ0hFDQo+ICAJc3RydWN0IGZzY2FjaGVfY29va2llCSpmc2NhY2hlOw0KDQotLSAN
ClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyDQoNCk5ldEFwcA0K
VHJvbmQuTXlrbGVidXN0QG5ldGFwcC5jb20NCnd3dy5uZXRhcHAuY29tDQoNCg==

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
  2012-05-23 18:19   ` Myklebust, Trond
@ 2012-05-23 18:41     ` Adamson, Andy
  0 siblings, 0 replies; 8+ messages in thread
From: Adamson, Andy @ 2012-05-23 18:41 UTC (permalink / raw)
  To: Myklebust, Trond; +Cc: Adamson, Andy, linux-nfs


On May 23, 2012, at 2:19 PM, Myklebust, Trond wrote:

> On Wed, 2012-05-23 at 05:02 -0400, andros@netapp.com wrote:
>> From: Andy Adamson <andros@netapp.com>
>> 
>> Keep track of the number of bytes read or written, including those queued
>> up to be flushed. For use by mdsthreshold i/o size hints.
>> 
>> No locking needed as this is used as hint information.
>> 
>> Signed-off-by: Andy Adamson <andros@netapp.com>
>> ---
>> fs/nfs/file.c          |    8 ++++++--
>> fs/nfs/inode.c         |    2 ++
>> fs/nfs/pnfs.c          |    3 +++
>> include/linux/nfs_fs.h |    3 +++
>> 4 files changed, 14 insertions(+), 2 deletions(-)
>> 
>> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
>> index 8eda8a6..c4cc096 100644
>> --- a/fs/nfs/file.c
>> +++ b/fs/nfs/file.c
>> @@ -203,8 +203,10 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
>> 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
>> 	if (!result) {
>> 		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
>> -		if (result > 0)
>> +		if (result > 0) {
>> +			NFS_I(inode)->read_io += result;
> 
> Should we perhaps rather do this from nfs_readpages(), nfs_readpage()
> and nfs_direct_read()?
> 
> If we do it here in nfs_file_read, we miss mmaped reads, O_DIRECT reads,
> as well as splice reads.

Well that's not good.

> We also count read cache hits where we don't
> have to actually access the server.

OK.

> 
>> 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
>> +		}
>> 	}
>> 	return result;
>> }
>> @@ -613,8 +615,10 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
>> 		if (err < 0)
>> 			result = err;
>> 	}
>> -	if (result > 0)
>> +	if (result > 0) {
>> +		NFS_I(inode)->write_io += written;
> 
> For the same reason, perhaps we should move this to
> nfs_direct_write_schedule_iovec(), and nfs_write_end().
> 
>> 		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
>> +	}
>> out:
>> 	return result;
>> 
>> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
>> index 889f7e5..a6f5fbb 100644
>> --- a/fs/nfs/inode.c
>> +++ b/fs/nfs/inode.c
>> @@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
>> 		inode->i_gid = -2;
>> 		inode->i_blocks = 0;
>> 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
>> +		nfsi->write_io = 0;
>> +		nfsi->read_io = 0;
>> 
>> 		nfsi->read_cache_jiffies = fattr->time_start;
>> 		nfsi->attr_gencount = fattr->gencount;
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index cbcb6ae..6620606 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
>> 	dprintk("%s:Begin lo %p\n", __func__, lo);
>> 
>> 	if (list_empty(&lo->plh_segs)) {
>> +		/* Reset MDS Threshold I/O counters */
>> +		NFS_I(lo->plh_inode)->write_io = 0;
>> +		NFS_I(lo->plh_inode)->read_io = 0;
>> 		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
>> 			put_layout_hdr_locked(lo);
>> 		return 0;
>> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
>> index ca4a707..c6954ac 100644
>> --- a/include/linux/nfs_fs.h
>> +++ b/include/linux/nfs_fs.h
>> @@ -201,6 +201,9 @@ struct nfs_inode {
>> 
>> 	/* pNFS layout information */
>> 	struct pnfs_layout_hdr *layout;
>> +	/* how many bytes have been written/read and how many bytes queued up */
>> +	__u64 write_io;
>> +	__u64 read_io;
>> #endif /* CONFIG_NFS_V4*/
> 
> ^^^^ This doesn't look as if it will compile without CONFIG_NFS_V4.

I'll fix and resend. Thanks for the review :)

-->Andy

> 
>> #ifdef CONFIG_NFS_FSCACHE
>> 	struct fscache_cookie	*fscache;
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer
> 
> NetApp
> Trond.Myklebust@netapp.com
> www.netapp.com
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
  2012-05-24 17:13 [PATCH 0/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
@ 2012-05-24 17:13 ` andros
  0 siblings, 0 replies; 8+ messages in thread
From: andros @ 2012-05-24 17:13 UTC (permalink / raw)
  To: trond.myklebust; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Keep track of the number of bytes read or written via buffered, direct, and
mem-mapped i/o for use by mdsthreshold size_io hints.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/direct.c        |    2 ++
 fs/nfs/file.c          |    1 +
 fs/nfs/inode.c         |    2 ++
 fs/nfs/pnfs.c          |    3 +++
 fs/nfs/read.c          |    2 ++
 include/linux/nfs_fs.h |    3 +++
 6 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c47a46e..23d170b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -447,6 +447,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
+	NFS_I(inode)->read_io += result;
 out_release:
 	nfs_direct_req_release(dreq);
 out:
@@ -785,6 +786,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 		pos += vec->iov_len;
 	}
 	nfs_pageio_complete(&desc);
+	NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
 
 	/*
 	 * If no bytes were started, return the error, and let the
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eda8a6..56311ca 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -424,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 
 	if (status < 0)
 		return status;
+	NFS_I(mapping->host)->write_io += copied;
 	return copied;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 889f7e5..a6f5fbb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		inode->i_gid = -2;
 		inode->i_blocks = 0;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+		nfsi->write_io = 0;
+		nfsi->read_io = 0;
 
 		nfsi->read_cache_jiffies = fattr->time_start;
 		nfsi->attr_gencount = fattr->gencount;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cbcb6ae..6620606 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 	dprintk("%s:Begin lo %p\n", __func__, lo);
 
 	if (list_empty(&lo->plh_segs)) {
+		/* Reset MDS Threshold I/O counters */
+		NFS_I(lo->plh_inode)->write_io = 0;
+		NFS_I(lo->plh_inode)->read_io = 0;
 		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
 			put_layout_hdr_locked(lo);
 		return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2cfdd77..86ced78 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,6 +152,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
+	NFS_I(inode)->read_io += pgio.pg_bytes_written;
 	return 0;
 }
 
@@ -656,6 +657,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
 	nfs_pageio_complete(&pgio);
+	NFS_I(inode)->read_io += pgio.pg_bytes_written;
 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 read_complete:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ca4a707..9d44860 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -202,6 +202,9 @@ struct nfs_inode {
 	/* pNFS layout information */
 	struct pnfs_layout_hdr *layout;
 #endif /* CONFIG_NFS_V4*/
+	/* how many bytes have been written/read and how many bytes queued up */
+	__u64 write_io;
+	__u64 read_io;
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
 #endif
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-05-24 17:14 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-23  9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
2012-05-23  9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
2012-05-23  9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
2012-05-23 18:19   ` Myklebust, Trond
2012-05-23 18:41     ` Adamson, Andy
2012-05-23  9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
2012-05-23 13:25   ` Boaz Harrosh
2012-05-24 17:13 [PATCH 0/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
2012-05-24 17:13 ` [PATCH 3/4] " andros

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.