* [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr
@ 2012-05-23 9:02 andros
2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: andros @ 2012-05-23 9:02 UTC (permalink / raw)
To: trond.myklebust; +Cc: linux-nfs, Andy Adamson
From: Andy Adamson <andros@netapp.com>
We only support one layout type per file system, so one threshold_item4 per
mdsthreshold4.
Signed-off-by: Andy Adamson <andros@netapp.com>
---
fs/nfs/nfs4xdr.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/nfs4.h | 7 +++
include/linux/nfs_xdr.h | 10 ++++
3 files changed, 140 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db040e9..db199f8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -99,9 +99,12 @@ static int nfs4_stat_to_errno(int);
#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We support only one layout type per file system */
+#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
/* This is based on getfattr, which uses the most attributes: */
#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
- 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+ 3 + 3 + 3 + nfs4_owner_maxsz + \
+ nfs4_group_maxsz + decode_mdsthreshold_maxsz))
#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
nfs4_fattr_value_maxsz)
#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -1170,6 +1173,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
bitmask[1] & nfs4_fattr_bitmap[1], hdr);
}
+static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+ struct compound_hdr *hdr)
+{
+ encode_getattr_three(xdr,
+ bitmask[0] & nfs4_fattr_bitmap[0],
+ bitmask[1] & nfs4_fattr_bitmap[1],
+ bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+ hdr);
+}
+
static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
{
encode_getattr_three(xdr,
@@ -2161,7 +2174,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_open(xdr, args, &hdr);
encode_getfh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_getfattr_open(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -4183,6 +4196,110 @@ xdr_error:
return status;
}
+static int decode_threshold_hint(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ uint64_t *res,
+ uint32_t hint_bit)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (likely(bitmap[0] & hint_bit)) {
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
+ }
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_first_threshold_item4(struct xdr_stream *xdr,
+ struct nfs4_threshold *res)
+{
+ __be32 *p, *savep;
+ uint32_t bitmap[3] = {0,}, attrlen;
+ int status;
+
+ /* layout type */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p)) {
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+ }
+ res->l_type = be32_to_cpup(p);
+
+ /* thi_hintset bitmap */
+ status = decode_attr_bitmap(xdr, bitmap);
+ if (status < 0)
+ goto xdr_error;
+
+ /* thi_hintlist length */
+ status = decode_attr_length(xdr, &attrlen, &savep);
+ if (status < 0)
+ goto xdr_error;
+ /* thi_hintlist */
+ status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
+ THRESHOLD_RD_IO);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
+ THRESHOLD_WR_IO);
+ if (status < 0)
+ goto xdr_error;
+
+ status = verify_attr_len(xdr, savep, attrlen);
+ res->bm = bitmap[0];
+
+ dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+ __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
+ res->wr_io_sz);
+xdr_error:
+ dprintk("%s ret=%d!\n", __func__, status);
+ return status;
+}
+
+/*
+ * Thresholds on pNFS direct I/O vrs MDS I/O
+ */
+static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ struct nfs4_threshold *res)
+{
+ __be32 *p;
+ int status = 0;
+ uint32_t num;
+
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ num = be32_to_cpup(p);
+ if (num == 0)
+ return 0;
+ if (num > 1)
+ printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
+ "drivers per filesystem not supported\n",
+ __func__);
+
+ status = decode_first_threshold_item4(xdr, res);
+ }
+ return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs_fattr *fattr, struct nfs_fh *fh,
struct nfs4_fs_locations *fs_loc,
@@ -4289,6 +4406,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
+ status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
+ if (status < 0)
+ goto xdr_error;
+
xdr_error:
dprintk("%s: xdr returned %d\n", __func__, -status);
return status;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 0987146..72b6bad 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -526,6 +526,13 @@ enum lock_type4 {
#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
+#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
+
+/* MDS threshold bitmap bits */
+#define THRESHOLD_RD (1UL << 0)
+#define THRESHOLD_WR (1UL << 1)
+#define THRESHOLD_RD_IO (1UL << 2)
+#define THRESHOLD_WR_IO (1UL << 3)
#define NFSPROC4_NULL 0
#define NFSPROC4_COMPOUND 1
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2e53a3f..5b8e42e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
return a->major == b->major && a->minor == b->minor;
}
+struct nfs4_threshold {
+ __u32 bm;
+ __u32 l_type;
+ __u64 rd_sz;
+ __u64 wr_sz;
+ __u64 rd_io_sz;
+ __u64 wr_io_sz;
+};
+
struct nfs_fattr {
unsigned int valid; /* which fields are valid */
umode_t mode;
@@ -67,6 +76,7 @@ struct nfs_fattr {
unsigned long gencount;
struct nfs4_string *owner_name;
struct nfs4_string *group_name;
+ struct nfs4_threshold *mdsthreshold; /* pNFS threshold hints */
};
#define NFS_ATTR_FATTR_TYPE (1U << 0)
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN
2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
@ 2012-05-23 9:02 ` andros
2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
2 siblings, 0 replies; 8+ messages in thread
From: andros @ 2012-05-23 9:02 UTC (permalink / raw)
To: trond.myklebust; +Cc: linux-nfs, Andy Adamson
From: Andy Adamson <andros@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
---
fs/nfs/inode.c | 2 ++
fs/nfs/nfs4proc.c | 38 +++++++++++++++++++++++++++++++++-----
fs/nfs/pnfs.c | 12 ++++++++++++
fs/nfs/pnfs.h | 21 +++++++++++++++++++++
include/linux/nfs_fs.h | 1 +
5 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9ad81ce..889f7e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -641,6 +641,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
nfs_init_lock_context(&ctx->lock_context);
ctx->lock_context.open_context = ctx;
INIT_LIST_HEAD(&ctx->list);
+ ctx->mdsthreshold = NULL;
return ctx;
}
@@ -669,6 +670,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
put_rpccred(ctx->cred);
dput(ctx->dentry);
nfs_sb_deactive(sb);
+ kfree(ctx->mdsthreshold);
kfree(ctx);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78784e5..d84c633 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1781,7 +1781,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
/*
* Returns a referenced nfs4_state
*/
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir,
+ struct dentry *dentry,
+ fmode_t fmode,
+ int flags,
+ struct iattr *sattr,
+ struct rpc_cred *cred,
+ struct nfs4_state **res,
+ struct nfs4_threshold **ctx_th)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
@@ -1806,6 +1813,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
if (opendata == NULL)
goto err_put_state_owner;
+ if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+ opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+ if (!opendata->f_attr.mdsthreshold)
+ goto err_opendata_put;
+ }
if (dentry->d_inode != NULL)
opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
@@ -1831,11 +1843,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
nfs_setattr_update_inode(state->inode, sattr);
nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
}
+
+ if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+ *ctx_th = opendata->f_attr.mdsthreshold;
+ else
+ kfree(opendata->f_attr.mdsthreshold);
+ opendata->f_attr.mdsthreshold = NULL;
+
nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
*res = state;
return 0;
err_opendata_put:
+ kfree(opendata->f_attr.mdsthreshold);
nfs4_opendata_put(opendata);
err_put_state_owner:
nfs4_put_state_owner(sp);
@@ -1845,14 +1865,21 @@ out_err:
}
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir,
+ struct dentry *dentry,
+ fmode_t fmode,
+ int flags,
+ struct iattr *sattr,
+ struct rpc_cred *cred,
+ struct nfs4_threshold **ctx_th)
{
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
do {
- status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
+ status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
+ &res, ctx_th);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2176,7 +2203,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
struct nfs4_state *state;
/* Protect against concurrent sillydeletes */
- state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
+ state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
+ ctx->cred, &ctx->mdsthreshold);
if (IS_ERR(state))
return ERR_CAST(state);
ctx->state = state;
@@ -2778,7 +2806,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
fmode = ctx->mode;
}
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
+ state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5d09a36..cbcb6ae 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1630,3 +1630,15 @@ out_free:
kfree(data);
goto out;
}
+
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+ struct nfs4_threshold *thp;
+
+ thp = kzalloc(sizeof(*thp), GFP_NOFS);
+ if (!thp) {
+ dprintk("%s mdsthreshold allocation failed\n", __func__);
+ return NULL;
+ }
+ return thp;
+}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 7980756..29fd23c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -227,6 +227,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
const struct nfs_pgio_completion_ops *compl_ops);
int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
const struct nfs_pgio_completion_ops *compl_ops);
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */
enum {
@@ -360,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+ struct nfs_server *nfss)
+{
+ return (dst && src && src->bm != 0 &&
+ nfss->pnfs_curr_ld->id == src->l_type);
+}
+
#ifdef NFS_DEBUG
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
#else
@@ -485,6 +494,18 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
return 0;
}
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+ struct nfs_server *nfss)
+{
+ return false;
+}
+
+static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+ return NULL;
+}
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6cc7dba..ca4a707 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -102,6 +102,7 @@ struct nfs_open_context {
int error;
struct list_head list;
+ struct nfs4_threshold *mdsthreshold;
};
struct nfs_open_dir_context {
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
@ 2012-05-23 9:02 ` andros
2012-05-23 18:19 ` Myklebust, Trond
2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
2 siblings, 1 reply; 8+ messages in thread
From: andros @ 2012-05-23 9:02 UTC (permalink / raw)
To: trond.myklebust; +Cc: linux-nfs, Andy Adamson
From: Andy Adamson <andros@netapp.com>
Keep track of the number of bytes read or written, including those queued
up to be flushed. For use by mdsthreshold i/o size hints.
No locking needed as this is used as hint information.
Signed-off-by: Andy Adamson <andros@netapp.com>
---
fs/nfs/file.c | 8 ++++++--
fs/nfs/inode.c | 2 ++
fs/nfs/pnfs.c | 3 +++
include/linux/nfs_fs.h | 3 +++
4 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eda8a6..c4cc096 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -203,8 +203,10 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_aio_read(iocb, iov, nr_segs, pos);
- if (result > 0)
+ if (result > 0) {
+ NFS_I(inode)->read_io += result;
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
+ }
}
return result;
}
@@ -613,8 +615,10 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0)
result = err;
}
- if (result > 0)
+ if (result > 0) {
+ NFS_I(inode)->write_io += written;
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+ }
out:
return result;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 889f7e5..a6f5fbb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_gid = -2;
inode->i_blocks = 0;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cbcb6ae..6620606 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
if (list_empty(&lo->plh_segs)) {
+ /* Reset MDS Threshold I/O counters */
+ NFS_I(lo->plh_inode)->write_io = 0;
+ NFS_I(lo->plh_inode)->read_io = 0;
if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
put_layout_hdr_locked(lo);
return 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ca4a707..c6954ac 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -201,6 +201,9 @@ struct nfs_inode {
/* pNFS layout information */
struct pnfs_layout_hdr *layout;
+ /* how many bytes have been written/read and how many bytes queued up */
+ __u64 write_io;
+ __u64 read_io;
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache;
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters
2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
@ 2012-05-23 9:02 ` andros
2012-05-23 13:25 ` Boaz Harrosh
2 siblings, 1 reply; 8+ messages in thread
From: andros @ 2012-05-23 9:02 UTC (permalink / raw)
To: trond.myklebust; +Cc: linux-nfs, Andy Adamson
From: Andy Adamson <andros@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
---
fs/nfs/pnfs.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 79 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6620606..b8323aa 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
}
/*
+ * Use mdsthreshold hints set at each OPEN to determine if I/O should go
+ * to the MDS or over pNFS
+ *
+ * The nfs_inode read_io and write_io fields are cumulative counters reset
+ * when there are no layout segments. Note that in pnfs_update_layout iomode
+ * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
+ * WRITE request.
+ *
+ * A return of true means use MDS I/O.
+ *
+ * From rfc 5661:
+ * If a file's size is smaller than the file size threshold, data accesses
+ * SHOULD be sent to the metadata server. If an I/O request has a length that
+ * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
+ * server. If both file size and I/O size are provided, the client SHOULD
+ * reach or exceed both thresholds before sending its read or write
+ * requests to the data server.
+ */
+static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
+ struct inode *ino, int iomode)
+{
+ struct nfs4_threshold *t = ctx->mdsthreshold;
+ struct nfs_inode *nfsi = NFS_I(ino);
+ loff_t fsize = i_size_read(ino);
+ bool size = false, size_set = false, io = false, io_set = false, ret = false;
+
+ if (t == NULL)
+ return ret;
+
+ dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+ __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
+
+ switch (iomode) {
+ case IOMODE_READ:
+ if (t->bm & THRESHOLD_RD) {
+ dprintk("%s fsize %llu\n", __func__, fsize);
+ size_set = true;
+ if (fsize < t->rd_sz)
+ size = true;
+ }
+ if (t->bm & THRESHOLD_RD_IO) {
+ dprintk("%s nfsi->read_io %llu\n", __func__,
+ nfsi->read_io);
+ io_set = true;
+ if (nfsi->read_io < t->rd_io_sz)
+ io = true;
+ }
+ break;
+ case IOMODE_RW:
+ if (t->bm & THRESHOLD_WR) {
+ dprintk("%s fsize %llu\n", __func__, fsize);
+ size_set = true;
+ if (fsize < t->wr_sz)
+ size = true;
+ }
+ if (t->bm & THRESHOLD_WR_IO) {
+ dprintk("%s nfsi->write_io %llu\n", __func__,
+ nfsi->write_io);
+ io_set = true;
+ if (nfsi->write_io < t->wr_io_sz)
+ io = true;
+ }
+ break;
+ }
+ if (size_set && io_set) {
+ if (size && io)
+ ret = true;
+ } else if (size || io)
+ ret = true;
+
+ dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
+ return ret;
+}
+
+/*
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
*/
@@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
return NULL;
+
+ if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+ return NULL;
+
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters
2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
@ 2012-05-23 13:25 ` Boaz Harrosh
0 siblings, 0 replies; 8+ messages in thread
From: Boaz Harrosh @ 2012-05-23 13:25 UTC (permalink / raw)
To: andros; +Cc: trond.myklebust, linux-nfs
On 05/23/2012 12:02 PM, andros@netapp.com wrote:
> From: Andy Adamson <andros@netapp.com>
>
> Signed-off-by: Andy Adamson <andros@netapp.com>
> ---
> fs/nfs/pnfs.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 79 insertions(+), 0 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 6620606..b8323aa 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
> }
>
> /*
> + * Use mdsthreshold hints set at each OPEN to determine if I/O should go
> + * to the MDS or over pNFS
> + *
> + * The nfs_inode read_io and write_io fields are cumulative counters reset
> + * when there are no layout segments. Note that in pnfs_update_layout iomode
> + * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
> + * WRITE request.
> + *
> + * A return of true means use MDS I/O.
> + *
> + * From rfc 5661:
> + * If a file's size is smaller than the file size threshold, data accesses
> + * SHOULD be sent to the metadata server. If an I/O request has a length that
> + * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
> + * server. If both file size and I/O size are provided, the client SHOULD
> + * reach or exceed both thresholds before sending its read or write
> + * requests to the data server.
> + */
> +static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
> + struct inode *ino, int iomode)
> +{
> + struct nfs4_threshold *t = ctx->mdsthreshold;
> + struct nfs_inode *nfsi = NFS_I(ino);
> + loff_t fsize = i_size_read(ino);
> + bool size = false, size_set = false, io = false, io_set = false, ret = false;
> +
> + if (t == NULL)
> + return ret;
> +
> + dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
> + __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
> +
> + switch (iomode) {
> + case IOMODE_READ:
> + if (t->bm & THRESHOLD_RD) {
> + dprintk("%s fsize %llu\n", __func__, fsize);
> + size_set = true;
> + if (fsize < t->rd_sz)
> + size = true;
> + }
> + if (t->bm & THRESHOLD_RD_IO) {
> + dprintk("%s nfsi->read_io %llu\n", __func__,
> + nfsi->read_io);
> + io_set = true;
> + if (nfsi->read_io < t->rd_io_sz)
> + io = true;
> + }
> + break;
> + case IOMODE_RW:
> + if (t->bm & THRESHOLD_WR) {
> + dprintk("%s fsize %llu\n", __func__, fsize);
> + size_set = true;
> + if (fsize < t->wr_sz)
> + size = true;
> + }
> + if (t->bm & THRESHOLD_WR_IO) {
> + dprintk("%s nfsi->write_io %llu\n", __func__,
> + nfsi->write_io);
> + io_set = true;
> + if (nfsi->write_io < t->wr_io_sz)
> + io = true;
> + }
> + break;
> + }
> + if (size_set && io_set) {
> + if (size && io)
> + ret = true;
> + } else if (size || io)
> + ret = true;
> +
> + dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
> + return ret;
> +}
> +
> +/*
> * Layout segment is retreived from the server if not cached.
> * The appropriate layout segment is referenced and returned to the caller.
> */
> @@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
>
> if (!pnfs_enabled_sb(NFS_SERVER(ino)))
> return NULL;
> +
> + if (pnfs_within_mdsthreshold(ctx, ino, iomode))
> + return NULL;
> +
Would we want to use these counters as the recommended layout_size in
read and write, instead of current's PAGE_SIZE?
Boaz
> spin_lock(&ino->i_lock);
> lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
> if (lo == NULL) {
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
@ 2012-05-23 18:19 ` Myklebust, Trond
2012-05-23 18:41 ` Adamson, Andy
0 siblings, 1 reply; 8+ messages in thread
From: Myklebust, Trond @ 2012-05-23 18:19 UTC (permalink / raw)
To: Adamson, Andy; +Cc: linux-nfs
T24gV2VkLCAyMDEyLTA1LTIzIGF0IDA1OjAyIC0wNDAwLCBhbmRyb3NAbmV0YXBwLmNvbSB3cm90
ZToNCj4gRnJvbTogQW5keSBBZGFtc29uIDxhbmRyb3NAbmV0YXBwLmNvbT4NCj4gDQo+IEtlZXAg
dHJhY2sgb2YgdGhlIG51bWJlciBvZiBieXRlcyByZWFkIG9yIHdyaXR0ZW4sIGluY2x1ZGluZyB0
aG9zZSBxdWV1ZWQNCj4gdXAgdG8gYmUgZmx1c2hlZC4gRm9yIHVzZSBieSBtZHN0aHJlc2hvbGQg
aS9vIHNpemUgaGludHMuDQo+IA0KPiBObyBsb2NraW5nIG5lZWRlZCBhcyB0aGlzIGlzIHVzZWQg
YXMgaGludCBpbmZvcm1hdGlvbi4NCj4gDQo+IFNpZ25lZC1vZmYtYnk6IEFuZHkgQWRhbXNvbiA8
YW5kcm9zQG5ldGFwcC5jb20+DQo+IC0tLQ0KPiAgZnMvbmZzL2ZpbGUuYyAgICAgICAgICB8ICAg
IDggKysrKysrLS0NCj4gIGZzL25mcy9pbm9kZS5jICAgICAgICAgfCAgICAyICsrDQo+ICBmcy9u
ZnMvcG5mcy5jICAgICAgICAgIHwgICAgMyArKysNCj4gIGluY2x1ZGUvbGludXgvbmZzX2ZzLmgg
fCAgICAzICsrKw0KPiAgNCBmaWxlcyBjaGFuZ2VkLCAxNCBpbnNlcnRpb25zKCspLCAyIGRlbGV0
aW9ucygtKQ0KPiANCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9maWxlLmMgYi9mcy9uZnMvZmlsZS5j
DQo+IGluZGV4IDhlZGE4YTYuLmM0Y2MwOTYgMTAwNjQ0DQo+IC0tLSBhL2ZzL25mcy9maWxlLmMN
Cj4gKysrIGIvZnMvbmZzL2ZpbGUuYw0KPiBAQCAtMjAzLDggKzIwMywxMCBAQCBuZnNfZmlsZV9y
ZWFkKHN0cnVjdCBraW9jYiAqaW9jYiwgY29uc3Qgc3RydWN0IGlvdmVjICppb3YsDQo+ICAJcmVz
dWx0ID0gbmZzX3JldmFsaWRhdGVfbWFwcGluZyhpbm9kZSwgaW9jYi0+a2lfZmlscC0+Zl9tYXBw
aW5nKTsNCj4gIAlpZiAoIXJlc3VsdCkgew0KPiAgCQlyZXN1bHQgPSBnZW5lcmljX2ZpbGVfYWlv
X3JlYWQoaW9jYiwgaW92LCBucl9zZWdzLCBwb3MpOw0KPiAtCQlpZiAocmVzdWx0ID4gMCkNCj4g
KwkJaWYgKHJlc3VsdCA+IDApIHsNCj4gKwkJCU5GU19JKGlub2RlKS0+cmVhZF9pbyArPSByZXN1
bHQ7DQoNClNob3VsZCB3ZSBwZXJoYXBzIHJhdGhlciBkbyB0aGlzIGZyb20gbmZzX3JlYWRwYWdl
cygpLCBuZnNfcmVhZHBhZ2UoKQ0KYW5kIG5mc19kaXJlY3RfcmVhZCgpPw0KDQpJZiB3ZSBkbyBp
dCBoZXJlIGluIG5mc19maWxlX3JlYWQsIHdlIG1pc3MgbW1hcGVkIHJlYWRzLCBPX0RJUkVDVCBy
ZWFkcywNCmFzIHdlbGwgYXMgc3BsaWNlIHJlYWRzLiBXZSBhbHNvIGNvdW50IHJlYWQgY2FjaGUg
aGl0cyB3aGVyZSB3ZSBkb24ndA0KaGF2ZSB0byBhY3R1YWxseSBhY2Nlc3MgdGhlIHNlcnZlci4N
Cg0KPiAgCQkJbmZzX2FkZF9zdGF0cyhpbm9kZSwgTkZTSU9TX05PUk1BTFJFQURCWVRFUywgcmVz
dWx0KTsNCj4gKwkJfQ0KPiAgCX0NCj4gIAlyZXR1cm4gcmVzdWx0Ow0KPiAgfQ0KPiBAQCAtNjEz
LDggKzYxNSwxMCBAQCBzdGF0aWMgc3NpemVfdCBuZnNfZmlsZV93cml0ZShzdHJ1Y3Qga2lvY2Ig
KmlvY2IsIGNvbnN0IHN0cnVjdCBpb3ZlYyAqaW92LA0KPiAgCQlpZiAoZXJyIDwgMCkNCj4gIAkJ
CXJlc3VsdCA9IGVycjsNCj4gIAl9DQo+IC0JaWYgKHJlc3VsdCA+IDApDQo+ICsJaWYgKHJlc3Vs
dCA+IDApIHsNCj4gKwkJTkZTX0koaW5vZGUpLT53cml0ZV9pbyArPSB3cml0dGVuOw0KDQpGb3Ig
dGhlIHNhbWUgcmVhc29uLCBwZXJoYXBzIHdlIHNob3VsZCBtb3ZlIHRoaXMgdG8NCm5mc19kaXJl
Y3Rfd3JpdGVfc2NoZWR1bGVfaW92ZWMoKSwgYW5kIG5mc193cml0ZV9lbmQoKS4NCg0KPiAgCQlu
ZnNfYWRkX3N0YXRzKGlub2RlLCBORlNJT1NfTk9STUFMV1JJVFRFTkJZVEVTLCB3cml0dGVuKTsN
Cj4gKwl9DQo+ICBvdXQ6DQo+ICAJcmV0dXJuIHJlc3VsdDsNCj4gIA0KPiBkaWZmIC0tZ2l0IGEv
ZnMvbmZzL2lub2RlLmMgYi9mcy9uZnMvaW5vZGUuYw0KPiBpbmRleCA4ODlmN2U1Li5hNmY1ZmJi
IDEwMDY0NA0KPiAtLS0gYS9mcy9uZnMvaW5vZGUuYw0KPiArKysgYi9mcy9uZnMvaW5vZGUuYw0K
PiBAQCAtMzIzLDYgKzMyMyw4IEBAIG5mc19maGdldChzdHJ1Y3Qgc3VwZXJfYmxvY2sgKnNiLCBz
dHJ1Y3QgbmZzX2ZoICpmaCwgc3RydWN0IG5mc19mYXR0ciAqZmF0dHIpDQo+ICAJCWlub2RlLT5p
X2dpZCA9IC0yOw0KPiAgCQlpbm9kZS0+aV9ibG9ja3MgPSAwOw0KPiAgCQltZW1zZXQobmZzaS0+
Y29va2lldmVyZiwgMCwgc2l6ZW9mKG5mc2ktPmNvb2tpZXZlcmYpKTsNCj4gKwkJbmZzaS0+d3Jp
dGVfaW8gPSAwOw0KPiArCQluZnNpLT5yZWFkX2lvID0gMDsNCj4gIA0KPiAgCQluZnNpLT5yZWFk
X2NhY2hlX2ppZmZpZXMgPSBmYXR0ci0+dGltZV9zdGFydDsNCj4gIAkJbmZzaS0+YXR0cl9nZW5j
b3VudCA9IGZhdHRyLT5nZW5jb3VudDsNCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9wbmZzLmMgYi9m
cy9uZnMvcG5mcy5jDQo+IGluZGV4IGNiY2I2YWUuLjY2MjA2MDYgMTAwNjQ0DQo+IC0tLSBhL2Zz
L25mcy9wbmZzLmMNCj4gKysrIGIvZnMvbmZzL3BuZnMuYw0KPiBAQCAtMzk1LDYgKzM5NSw5IEBA
IG1hcmtfbWF0Y2hpbmdfbHNlZ3NfaW52YWxpZChzdHJ1Y3QgcG5mc19sYXlvdXRfaGRyICpsbywN
Cj4gIAlkcHJpbnRrKCIlczpCZWdpbiBsbyAlcFxuIiwgX19mdW5jX18sIGxvKTsNCj4gIA0KPiAg
CWlmIChsaXN0X2VtcHR5KCZsby0+cGxoX3NlZ3MpKSB7DQo+ICsJCS8qIFJlc2V0IE1EUyBUaHJl
c2hvbGQgSS9PIGNvdW50ZXJzICovDQo+ICsJCU5GU19JKGxvLT5wbGhfaW5vZGUpLT53cml0ZV9p
byA9IDA7DQo+ICsJCU5GU19JKGxvLT5wbGhfaW5vZGUpLT5yZWFkX2lvID0gMDsNCj4gIAkJaWYg
KCF0ZXN0X2FuZF9zZXRfYml0KE5GU19MQVlPVVRfREVTVFJPWUVELCAmbG8tPnBsaF9mbGFncykp
DQo+ICAJCQlwdXRfbGF5b3V0X2hkcl9sb2NrZWQobG8pOw0KPiAgCQlyZXR1cm4gMDsNCj4gZGlm
ZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvbmZzX2ZzLmggYi9pbmNsdWRlL2xpbnV4L25mc19mcy5o
DQo+IGluZGV4IGNhNGE3MDcuLmM2OTU0YWMgMTAwNjQ0DQo+IC0tLSBhL2luY2x1ZGUvbGludXgv
bmZzX2ZzLmgNCj4gKysrIGIvaW5jbHVkZS9saW51eC9uZnNfZnMuaA0KPiBAQCAtMjAxLDYgKzIw
MSw5IEBAIHN0cnVjdCBuZnNfaW5vZGUgew0KPiAgDQo+ICAJLyogcE5GUyBsYXlvdXQgaW5mb3Jt
YXRpb24gKi8NCj4gIAlzdHJ1Y3QgcG5mc19sYXlvdXRfaGRyICpsYXlvdXQ7DQo+ICsJLyogaG93
IG1hbnkgYnl0ZXMgaGF2ZSBiZWVuIHdyaXR0ZW4vcmVhZCBhbmQgaG93IG1hbnkgYnl0ZXMgcXVl
dWVkIHVwICovDQo+ICsJX191NjQgd3JpdGVfaW87DQo+ICsJX191NjQgcmVhZF9pbzsNCj4gICNl
bmRpZiAvKiBDT05GSUdfTkZTX1Y0Ki8NCg0KXl5eXiBUaGlzIGRvZXNuJ3QgbG9vayBhcyBpZiBp
dCB3aWxsIGNvbXBpbGUgd2l0aG91dCBDT05GSUdfTkZTX1Y0Lg0KDQo+ICAjaWZkZWYgQ09ORklH
X05GU19GU0NBQ0hFDQo+ICAJc3RydWN0IGZzY2FjaGVfY29va2llCSpmc2NhY2hlOw0KDQotLSAN
ClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyDQoNCk5ldEFwcA0K
VHJvbmQuTXlrbGVidXN0QG5ldGFwcC5jb20NCnd3dy5uZXRhcHAuY29tDQoNCg==
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
2012-05-23 18:19 ` Myklebust, Trond
@ 2012-05-23 18:41 ` Adamson, Andy
0 siblings, 0 replies; 8+ messages in thread
From: Adamson, Andy @ 2012-05-23 18:41 UTC (permalink / raw)
To: Myklebust, Trond; +Cc: Adamson, Andy, linux-nfs
On May 23, 2012, at 2:19 PM, Myklebust, Trond wrote:
> On Wed, 2012-05-23 at 05:02 -0400, andros@netapp.com wrote:
>> From: Andy Adamson <andros@netapp.com>
>>
>> Keep track of the number of bytes read or written, including those queued
>> up to be flushed. For use by mdsthreshold i/o size hints.
>>
>> No locking needed as this is used as hint information.
>>
>> Signed-off-by: Andy Adamson <andros@netapp.com>
>> ---
>> fs/nfs/file.c | 8 ++++++--
>> fs/nfs/inode.c | 2 ++
>> fs/nfs/pnfs.c | 3 +++
>> include/linux/nfs_fs.h | 3 +++
>> 4 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
>> index 8eda8a6..c4cc096 100644
>> --- a/fs/nfs/file.c
>> +++ b/fs/nfs/file.c
>> @@ -203,8 +203,10 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
>> result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
>> if (!result) {
>> result = generic_file_aio_read(iocb, iov, nr_segs, pos);
>> - if (result > 0)
>> + if (result > 0) {
>> + NFS_I(inode)->read_io += result;
>
> Should we perhaps rather do this from nfs_readpages(), nfs_readpage()
> and nfs_direct_read()?
>
> If we do it here in nfs_file_read, we miss mmaped reads, O_DIRECT reads,
> as well as splice reads.
Well that's not good.
> We also count read cache hits where we don't
> have to actually access the server.
OK.
>
>> nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
>> + }
>> }
>> return result;
>> }
>> @@ -613,8 +615,10 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
>> if (err < 0)
>> result = err;
>> }
>> - if (result > 0)
>> + if (result > 0) {
>> + NFS_I(inode)->write_io += written;
>
> For the same reason, perhaps we should move this to
> nfs_direct_write_schedule_iovec(), and nfs_write_end().
>
>> nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
>> + }
>> out:
>> return result;
>>
>> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
>> index 889f7e5..a6f5fbb 100644
>> --- a/fs/nfs/inode.c
>> +++ b/fs/nfs/inode.c
>> @@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
>> inode->i_gid = -2;
>> inode->i_blocks = 0;
>> memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
>> + nfsi->write_io = 0;
>> + nfsi->read_io = 0;
>>
>> nfsi->read_cache_jiffies = fattr->time_start;
>> nfsi->attr_gencount = fattr->gencount;
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index cbcb6ae..6620606 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
>> dprintk("%s:Begin lo %p\n", __func__, lo);
>>
>> if (list_empty(&lo->plh_segs)) {
>> + /* Reset MDS Threshold I/O counters */
>> + NFS_I(lo->plh_inode)->write_io = 0;
>> + NFS_I(lo->plh_inode)->read_io = 0;
>> if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
>> put_layout_hdr_locked(lo);
>> return 0;
>> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
>> index ca4a707..c6954ac 100644
>> --- a/include/linux/nfs_fs.h
>> +++ b/include/linux/nfs_fs.h
>> @@ -201,6 +201,9 @@ struct nfs_inode {
>>
>> /* pNFS layout information */
>> struct pnfs_layout_hdr *layout;
>> + /* how many bytes have been written/read and how many bytes queued up */
>> + __u64 write_io;
>> + __u64 read_io;
>> #endif /* CONFIG_NFS_V4*/
>
> ^^^^ This doesn't look as if it will compile without CONFIG_NFS_V4.
I'll fix and resend. Thanks for the review :)
-->Andy
>
>> #ifdef CONFIG_NFS_FSCACHE
>> struct fscache_cookie *fscache;
>
> --
> Trond Myklebust
> Linux NFS client maintainer
>
> NetApp
> Trond.Myklebust@netapp.com
> www.netapp.com
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold
2012-05-24 17:13 [PATCH 0/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
@ 2012-05-24 17:13 ` andros
0 siblings, 0 replies; 8+ messages in thread
From: andros @ 2012-05-24 17:13 UTC (permalink / raw)
To: trond.myklebust; +Cc: linux-nfs, Andy Adamson
From: Andy Adamson <andros@netapp.com>
Keep track of the number of bytes read or written via buffered, direct, and
mem-mapped i/o for use by mdsthreshold size_io hints.
Signed-off-by: Andy Adamson <andros@netapp.com>
---
fs/nfs/direct.c | 2 ++
fs/nfs/file.c | 1 +
fs/nfs/inode.c | 2 ++
fs/nfs/pnfs.c | 3 +++
fs/nfs/read.c | 2 ++
include/linux/nfs_fs.h | 3 +++
6 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c47a46e..23d170b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -447,6 +447,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
+ NFS_I(inode)->read_io += result;
out_release:
nfs_direct_req_release(dreq);
out:
@@ -785,6 +786,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
+ NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
/*
* If no bytes were started, return the error, and let the
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eda8a6..56311ca 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -424,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
if (status < 0)
return status;
+ NFS_I(mapping->host)->write_io += copied;
return copied;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 889f7e5..a6f5fbb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_gid = -2;
inode->i_blocks = 0;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cbcb6ae..6620606 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
if (list_empty(&lo->plh_segs)) {
+ /* Reset MDS Threshold I/O counters */
+ NFS_I(lo->plh_inode)->write_io = 0;
+ NFS_I(lo->plh_inode)->read_io = 0;
if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
put_layout_hdr_locked(lo);
return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2cfdd77..86ced78 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,6 +152,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
+ NFS_I(inode)->read_io += pgio.pg_bytes_written;
return 0;
}
@@ -656,6 +657,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
nfs_pageio_complete(&pgio);
+ NFS_I(inode)->read_io += pgio.pg_bytes_written;
npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
read_complete:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ca4a707..9d44860 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -202,6 +202,9 @@ struct nfs_inode {
/* pNFS layout information */
struct pnfs_layout_hdr *layout;
#endif /* CONFIG_NFS_V4*/
+ /* how many bytes have been written/read and how many bytes queued up */
+ __u64 write_io;
+ __u64 read_io;
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache;
#endif
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-05-24 17:14 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-23 9:02 [PATCH 1/4] NFSv4.1 mdsthreshold attribute xdr andros
2012-05-23 9:02 ` [PATCH 2/4] NFSv4.1 cache mdsthreshold values on OPEN andros
2012-05-23 9:02 ` [PATCH 3/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
2012-05-23 18:19 ` Myklebust, Trond
2012-05-23 18:41 ` Adamson, Andy
2012-05-23 9:02 ` [PATCH 4/4] NFSv4.1 test the mdsthreshold hint parameters andros
2012-05-23 13:25 ` Boaz Harrosh
2012-05-24 17:13 [PATCH 0/4] NFSv4.1 add nfs_inode book keeping for mdsthreshold andros
2012-05-24 17:13 ` [PATCH 3/4] " andros
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.