* [PATCH v4 1/8] ceph: add global dentry lease metric support
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 10:38 ` [PATCH v4 2/8] ceph: add caps perf metric for each session xiubli
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
For the dentry lease we will only count the hit/miss info triggered
from the vfs calls, for the cases like request reply handling and
perodically ceph_trim_dentries() we will ignore them.
Currently only the debugfs is support:
The output will be:
item total miss hit
-------------------------------------------------
d_lease 11 7 141
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 32 ++++++++++++++++++++++++++++----
fs/ceph/dir.c | 18 ++++++++++++++++--
fs/ceph/mds_client.c | 37 +++++++++++++++++++++++++++++++++++--
fs/ceph/mds_client.h | 9 +++++++++
fs/ceph/super.h | 1 +
5 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb7cabd98e7b..40a22da0214a 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,22 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+static int metric_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ seq_printf(s, "item total miss hit\n");
+ seq_printf(s, "-------------------------------------------------\n");
+
+ seq_printf(s, "%-14s%-16lld%-16lld%lld\n", "d_lease",
+ atomic64_read(&mdsc->metric.total_dentries),
+ percpu_counter_sum(&mdsc->metric.d_lease_mis),
+ percpu_counter_sum(&mdsc->metric.d_lease_hit));
+
+ return 0;
+}
+
static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
{
struct seq_file *s = p;
@@ -220,6 +236,7 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
CEPH_DEFINE_SHOW_FUNC(mdsc_show)
+CEPH_DEFINE_SHOW_FUNC(metric_show)
CEPH_DEFINE_SHOW_FUNC(caps_show)
CEPH_DEFINE_SHOW_FUNC(mds_sessions_show)
@@ -255,6 +272,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_mdsmap);
debugfs_remove(fsc->debugfs_mds_sessions);
debugfs_remove(fsc->debugfs_caps);
+ debugfs_remove(fsc->debugfs_metric);
debugfs_remove(fsc->debugfs_mdsc);
}
@@ -295,11 +313,17 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&mdsc_show_fops);
+ fsc->debugfs_metric = debugfs_create_file("metrics",
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &metric_show_fops);
+
fsc->debugfs_caps = debugfs_create_file("caps",
- 0400,
- fsc->client->debugfs_dir,
- fsc,
- &caps_show_fops);
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &caps_show_fops);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 10294f07f5f0..658c55b323cc 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -38,6 +38,8 @@ static int __dir_lease_try_check(const struct dentry *dentry);
static int ceph_d_init(struct dentry *dentry)
{
struct ceph_dentry_info *di;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
if (!di)
@@ -48,6 +50,9 @@ static int ceph_d_init(struct dentry *dentry)
di->time = jiffies;
dentry->d_fsdata = di;
INIT_LIST_HEAD(&di->lease_list);
+
+ atomic64_inc(&mdsc->metric.total_dentries);
+
return 0;
}
@@ -1613,6 +1618,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
{
+ struct ceph_mds_client *mdsc;
int valid = 0;
struct dentry *parent;
struct inode *dir, *inode;
@@ -1651,9 +1657,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
}
}
+ mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
if (!valid) {
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(dir->i_sb)->mdsc;
struct ceph_mds_request *req;
int op, err;
u32 mask;
@@ -1661,6 +1666,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
+ percpu_counter_inc(&mdsc->metric.d_lease_mis);
+
op = ceph_snap(dir) == CEPH_SNAPDIR ?
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
@@ -1692,6 +1699,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
dout("d_revalidate %p lookup result=%d\n",
dentry, err);
}
+ } else {
+ percpu_counter_inc(&mdsc->metric.d_lease_hit);
}
dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
@@ -1700,6 +1709,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
if (!(flags & LOOKUP_RCU))
dput(parent);
+
return valid;
}
@@ -1734,9 +1744,13 @@ static int ceph_d_delete(const struct dentry *dentry)
static void ceph_d_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
dout("d_release %p\n", dentry);
+ atomic64_dec(&mdsc->metric.total_dentries);
+
spin_lock(&dentry->d_lock);
__dentry_lease_unlist(di);
dentry->d_fsdata = NULL;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8263f75badfc..a24fd00676b8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4158,10 +4158,31 @@ static void delayed_work(struct work_struct *work)
schedule_delayed(mdsc);
}
+static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+{
+ int ret;
+
+ if (!metric)
+ return -EINVAL;
+
+ atomic64_set(&metric->total_dentries, 0);
+ ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+ ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
+ if (ret) {
+ percpu_counter_destroy(&metric->d_lease_hit);
+ return ret;
+ }
+
+ return 0;
+}
+
int ceph_mdsc_init(struct ceph_fs_client *fsc)
{
struct ceph_mds_client *mdsc;
+ int err;
mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
if (!mdsc)
@@ -4170,8 +4191,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
mutex_init(&mdsc->mutex);
mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
if (!mdsc->mdsmap) {
- kfree(mdsc);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_mdsc;
}
fsc->mdsc = mdsc;
@@ -4210,6 +4231,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_waitqueue_head(&mdsc->cap_flushing_wq);
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
atomic_set(&mdsc->cap_reclaim_pending, 0);
+ err = ceph_mdsc_metric_init(&mdsc->metric);
+ if (err)
+ goto err_mdsmap;
spin_lock_init(&mdsc->dentry_list_lock);
INIT_LIST_HEAD(&mdsc->dentry_leases);
@@ -4228,6 +4252,12 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
strscpy(mdsc->nodename, utsname()->nodename,
sizeof(mdsc->nodename));
return 0;
+
+err_mdsmap:
+ kfree(mdsc->mdsmap);
+err_mdsc:
+ kfree(mdsc);
+ return err;
}
/*
@@ -4485,6 +4515,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
ceph_mdsc_stop(mdsc);
+ percpu_counter_destroy(&mdsc->metric.d_lease_mis);
+ percpu_counter_destroy(&mdsc->metric.d_lease_hit);
+
fsc->mdsc = NULL;
kfree(mdsc);
dout("mdsc_destroy %p done\n", mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 27a7446e10d3..dd1f417b90eb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -358,6 +358,13 @@ struct cap_wait {
int want;
};
+/* This is the global metrics */
+struct ceph_client_metric {
+ atomic64_t total_dentries;
+ struct percpu_counter d_lease_hit;
+ struct percpu_counter d_lease_mis;
+};
+
/*
* mds client state
*/
@@ -446,6 +453,8 @@ struct ceph_mds_client {
struct list_head dentry_leases; /* fifo list */
struct list_head dentry_dir_leases; /* lru list */
+ struct ceph_client_metric metric;
+
spinlock_t snapid_map_lock;
struct rb_root snapid_map_tree;
struct list_head snapid_map_lru;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3ef17dd6491e..7af91628636c 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -128,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_congestion_kb;
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+ struct dentry *debugfs_metric;
struct dentry *debugfs_mds_sessions;
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v4 2/8] ceph: add caps perf metric for each session
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
2020-01-16 10:38 ` [PATCH v4 1/8] ceph: add global dentry lease metric support xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 10:38 ` [PATCH v4 3/8] ceph: add global read latency metric support xiubli
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
This will fulfill the caps hit/miss metric for each session. When
checking the "need" mask and if one cap has the subset of the "need"
mask it means hit, or missed.
item total miss hit
-------------------------------------------------
d_lease 295 0 993
session caps miss hit
-------------------------------------------------
0 295 107 4119
1 1 107 9
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/acl.c | 2 ++
fs/ceph/addr.c | 2 ++
fs/ceph/caps.c | 74 ++++++++++++++++++++++++++++++++++++++++++++
fs/ceph/debugfs.c | 20 ++++++++++++
fs/ceph/dir.c | 9 ++++--
fs/ceph/file.c | 3 ++
fs/ceph/mds_client.c | 16 +++++++++-
fs/ceph/mds_client.h | 3 ++
fs/ceph/quota.c | 9 ++++--
fs/ceph/super.h | 11 +++++++
fs/ceph/xattr.c | 17 ++++++++--
11 files changed, 158 insertions(+), 8 deletions(-)
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 26be6520d3fb..58e119e3519f 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -22,6 +22,8 @@ static inline void ceph_set_cached_acl(struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
spin_lock(&ci->i_ceph_lock);
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
+
if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
set_cached_acl(inode, type, acl);
else
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7ab616601141..29d4513eff8c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1706,6 +1706,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
err = -ENOMEM;
goto out;
}
+
+ ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
err = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, true);
if (err < 0) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7fc87b693ba4..af2e9e826f8c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -783,6 +783,75 @@ static int __cap_is_valid(struct ceph_cap *cap)
return 1;
}
+/*
+ * Counts the cap metric.
+ */
+void __ceph_caps_metric(struct ceph_inode_info *ci, int mask)
+{
+ int have = ci->i_snap_caps;
+ struct ceph_mds_session *s;
+ struct ceph_cap *cap;
+ struct rb_node *p;
+ bool skip_auth = false;
+
+ lockdep_assert_held(&ci->i_ceph_lock);
+
+ if (mask <= 0)
+ return;
+
+ /* Counts the snap caps metric in the auth cap */
+ if (ci->i_auth_cap) {
+ cap = ci->i_auth_cap;
+ if (have) {
+ have |= cap->issued;
+
+ dout("%s %p cap %p issued %s, mask %s\n", __func__,
+ &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
+ ceph_cap_string(mask));
+
+ s = ceph_get_mds_session(cap->session);
+ if (s) {
+ if (mask & have)
+ percpu_counter_inc(&s->i_caps_hit);
+ else
+ percpu_counter_inc(&s->i_caps_mis);
+ ceph_put_mds_session(s);
+ }
+ skip_auth = true;
+ }
+ }
+
+ if ((mask & have) == mask)
+ return;
+
+ /* Checks others */
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ if (!__cap_is_valid(cap))
+ continue;
+
+ if (skip_auth && cap == ci->i_auth_cap)
+ continue;
+
+ dout("%s %p cap %p issued %s, mask %s\n", __func__,
+ &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
+ ceph_cap_string(mask));
+
+ s = ceph_get_mds_session(cap->session);
+ if (s) {
+ if (mask & cap->issued)
+ percpu_counter_inc(&s->i_caps_hit);
+ else
+ percpu_counter_inc(&s->i_caps_mis);
+ ceph_put_mds_session(s);
+ }
+
+ have |= cap->issued;
+ if ((mask & have) == mask)
+ return;
+ }
+}
+
/*
* Return set of valid cap bits issued to us. Note that caps time
* out, and may be invalidated in bulk if the client session times out
@@ -2746,6 +2815,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
int ceph_try_get_caps(struct inode *inode, int need, int want,
bool nonblock, int *got)
{
+ struct ceph_inode_info *ci = ceph_inode(inode);
int ret;
BUG_ON(need & ~CEPH_CAP_FILE_RD);
@@ -2758,6 +2828,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
CEPH_CAP_ANY_DIR_OPS));
+ ceph_caps_metric(ci, need | want);
ret = try_get_cap_refs(inode, need, want, 0, nonblock, got);
return ret == -EAGAIN ? 0 : ret;
}
@@ -2784,6 +2855,8 @@ int ceph_get_caps(struct file *filp, int need, int want,
fi->filp_gen != READ_ONCE(fsc->filp_gen))
return -EBADF;
+ ceph_caps_metric(ci, need | want);
+
while (true) {
if (endoff > 0)
check_max_size(inode, endoff);
@@ -2871,6 +2944,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
* getattr request will bring inline data into
* page cache
*/
+ ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
ret = __ceph_do_getattr(inode, NULL,
CEPH_STAT_CAP_INLINE_DATA,
true);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 40a22da0214a..c132fdb40d53 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -128,6 +128,7 @@ static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
+ int i;
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
@@ -137,6 +138,25 @@ static int metric_show(struct seq_file *s, void *p)
percpu_counter_sum(&mdsc->metric.d_lease_mis),
percpu_counter_sum(&mdsc->metric.d_lease_hit));
+ seq_printf(s, "\n");
+ seq_printf(s, "session caps miss hit\n");
+ seq_printf(s, "-------------------------------------------------\n");
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ seq_printf(s, "%-14d%-16d%-16lld%lld\n", i,
+ session->s_nr_caps,
+ percpu_counter_sum(&session->i_caps_mis),
+ percpu_counter_sum(&session->i_caps_hit));
+ ceph_put_mds_session(session);
+ }
+ mutex_unlock(&mdsc->mutex);
+
return 0;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 658c55b323cc..33eb239e09e2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -313,7 +313,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
int i;
- int err;
+ int err, ret = -1;
unsigned frag = -1;
struct ceph_mds_reply_info_parsed *rinfo;
@@ -346,13 +346,16 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete_ordered(ci) &&
- __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
+ (ret = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
int shared_gen = atomic_read(&ci->i_shared_gen);
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
spin_unlock(&ci->i_ceph_lock);
err = __dcache_readdir(file, ctx, shared_gen);
if (err != -EAGAIN)
return err;
} else {
+ if (ret != -1)
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
spin_unlock(&ci->i_ceph_lock);
}
@@ -757,6 +760,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_dentry_info *di = ceph_dentry(dentry);
spin_lock(&ci->i_ceph_lock);
+ __ceph_caps_metric(ci, CEPH_CAP_FILE_SHARED);
+
dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
if (strncmp(dentry->d_name.name,
fsc->mount_options->snapdir_name,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 1e6cdf2dfe90..c78dfbbb7b91 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -384,6 +384,8 @@ int ceph_open(struct inode *inode, struct file *file)
* asynchronously.
*/
spin_lock(&ci->i_ceph_lock);
+ __ceph_caps_metric(ci, wanted);
+
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci, true);
@@ -1340,6 +1342,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
return -ENOMEM;
}
+ ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
statret = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, !!page);
if (statret < 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a24fd00676b8..141c1c03636c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -558,6 +558,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
if (refcount_dec_and_test(&s->s_ref)) {
if (s->s_auth.authorizer)
ceph_auth_destroy_authorizer(s->s_auth.authorizer);
+ percpu_counter_destroy(&s->i_caps_hit);
+ percpu_counter_destroy(&s->i_caps_mis);
kfree(s);
}
}
@@ -598,6 +600,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
int mds)
{
struct ceph_mds_session *s;
+ int err;
if (mds >= mdsc->mdsmap->possible_max_rank)
return ERR_PTR(-EINVAL);
@@ -612,8 +615,10 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
dout("%s: realloc to %d\n", __func__, newmax);
sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
- if (!sa)
+ if (!sa) {
+ err = -ENOMEM;
goto fail_realloc;
+ }
if (mdsc->sessions) {
memcpy(sa, mdsc->sessions,
mdsc->max_sessions * sizeof(void *));
@@ -653,6 +658,13 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_cap_flushing);
+ err = percpu_counter_init(&s->i_caps_hit, 0, GFP_NOFS);
+ if (err)
+ goto fail_realloc;
+ err = percpu_counter_init(&s->i_caps_mis, 0, GFP_NOFS);
+ if (err)
+ goto fail_init;
+
mdsc->sessions[mds] = s;
atomic_inc(&mdsc->num_sessions);
refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */
@@ -662,6 +674,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
return s;
+fail_init:
+ percpu_counter_destroy(&s->i_caps_hit);
fail_realloc:
kfree(s);
return ERR_PTR(-ENOMEM);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index dd1f417b90eb..ba74ff74c59c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -201,6 +201,9 @@ struct ceph_mds_session {
struct list_head s_waiting; /* waiting requests */
struct list_head s_unsafe; /* unsafe requests */
+
+ struct percpu_counter i_caps_hit;
+ struct percpu_counter i_caps_mis;
};
/*
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index de56dee60540..4ce2f658e63d 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -147,9 +147,14 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
return NULL;
}
if (qri->inode) {
+ struct ceph_inode_info *ci = ceph_inode(qri->inode);
+ int ret;
+
+ ceph_caps_metric(ci, CEPH_STAT_CAP_INODE);
+
/* get caps */
- int ret = __ceph_do_getattr(qri->inode, NULL,
- CEPH_STAT_CAP_INODE, true);
+ ret = __ceph_do_getattr(qri->inode, NULL,
+ CEPH_STAT_CAP_INODE, true);
if (ret >= 0)
in = qri->inode;
else
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7af91628636c..3f4829222528 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -641,6 +641,14 @@ static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
return !RB_EMPTY_ROOT(&ci->i_caps);
}
+extern void __ceph_caps_metric(struct ceph_inode_info *ci, int mask);
+static inline void ceph_caps_metric(struct ceph_inode_info *ci, int mask)
+{
+ spin_lock(&ci->i_ceph_lock);
+ __ceph_caps_metric(ci, mask);
+ spin_unlock(&ci->i_ceph_lock);
+}
+
extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
@@ -927,6 +935,9 @@ extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
int mask, bool force);
static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ ceph_caps_metric(ci, mask);
return __ceph_do_getattr(inode, NULL, mask, force);
}
extern int ceph_permission(struct inode *inode, int mask);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index d58fa14c1f01..ebd522edb0a8 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -829,6 +829,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
struct ceph_vxattr *vxattr = NULL;
int req_mask;
ssize_t err;
+ int ret = -1;
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
@@ -856,7 +857,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
if (ci->i_xattrs.version == 0 ||
!((req_mask & CEPH_CAP_XATTR_SHARED) ||
- __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
+ (ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)))) {
+ if (ret != -1)
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
spin_unlock(&ci->i_ceph_lock);
/* security module gets xattr while filling trace */
@@ -871,6 +874,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
if (err)
return err;
spin_lock(&ci->i_ceph_lock);
+ } else {
+ if (ret != -1)
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
}
err = __build_xattrs(inode);
@@ -907,19 +913,24 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
struct ceph_inode_info *ci = ceph_inode(inode);
bool len_only = (size == 0);
u32 namelen;
- int err;
+ int err, ret = -1;
spin_lock(&ci->i_ceph_lock);
dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
ci->i_xattrs.version, ci->i_xattrs.index_version);
if (ci->i_xattrs.version == 0 ||
- !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
+ !(ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
+ if (ret != -1)
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
spin_unlock(&ci->i_ceph_lock);
err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
if (err)
return err;
spin_lock(&ci->i_ceph_lock);
+ } else {
+ if (ret != -1)
+ __ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
}
err = __build_xattrs(inode);
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v4 3/8] ceph: add global read latency metric support
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
2020-01-16 10:38 ` [PATCH v4 1/8] ceph: add global dentry lease metric support xiubli
2020-01-16 10:38 ` [PATCH v4 2/8] ceph: add caps perf metric for each session xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 13:55 ` Jeff Layton
2020-01-16 10:38 ` [PATCH v4 4/8] ceph: add global write " xiubli
` (4 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
item total sum_lat(us) avg_lat(us)
-----------------------------------------------------
read 73 3590000 49178082
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/addr.c | 15 ++++++++++++++-
fs/ceph/debugfs.c | 13 +++++++++++++
fs/ceph/file.c | 25 +++++++++++++++++++++++++
fs/ceph/mds_client.c | 25 ++++++++++++++++++++++++-
fs/ceph/mds_client.h | 7 +++++++
include/linux/ceph/osd_client.h | 2 +-
net/ceph/osd_client.c | 9 ++++++++-
7 files changed, 92 insertions(+), 4 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 29d4513eff8c..479ecd0a6e9d 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -190,6 +190,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
+ s64 latency;
int err = 0;
u64 off = page_offset(page);
u64 len = PAGE_SIZE;
@@ -221,7 +223,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, off, &len,
ci->i_truncate_seq, ci->i_truncate_size,
- &page, 1, 0);
+ &page, 1, 0, &latency);
if (err == -ENOENT)
err = 0;
if (err < 0) {
@@ -241,6 +243,9 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
ceph_readpage_to_fscache(inode, page);
out:
+ if (latency)
+ ceph_mdsc_update_read_latency(metric, latency);
+
return err < 0 ? err : 0;
}
@@ -260,6 +265,8 @@ static int ceph_readpage(struct file *filp, struct page *page)
static void finish_read(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_data *osd_data;
int rc = req->r_result <= 0 ? req->r_result : 0;
int bytes = req->r_result >= 0 ? req->r_result : 0;
@@ -297,6 +304,12 @@ static void finish_read(struct ceph_osd_request *req)
put_page(page);
bytes -= PAGE_SIZE;
}
+
+ if (rc >= 0 || rc == -ENOENT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
+
kfree(osd_data->pages);
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index c132fdb40d53..8200bf025ccd 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -128,8 +128,21 @@ static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
+ s64 total, sum, avg = 0;
int i;
+ seq_printf(s, "item total sum_lat(us) avg_lat(us)\n");
+ seq_printf(s, "-----------------------------------------------------\n");
+
+ spin_lock(&mdsc->metric.read_lock);
+ total = atomic64_read(&mdsc->metric.total_reads),
+ sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
+ spin_unlock(&mdsc->metric.read_lock);
+ avg = total ? sum / total : 0;
+ seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read",
+ total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+
+ seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c78dfbbb7b91..f479b699db14 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -588,6 +588,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_client *osdc = &fsc->client->osdc;
ssize_t ret;
u64 off = iocb->ki_pos;
@@ -660,6 +661,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
ret = ceph_osdc_start_request(osdc, req, false);
if (!ret)
ret = ceph_osdc_wait_request(osdc, req);
+
+ if (ret >= 0 || ret == -ENOENT || ret == -ETIMEDOUT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
ceph_osdc_put_request(req);
i_size = i_size_read(inode);
@@ -798,13 +804,24 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
struct inode *inode = req->r_inode;
struct ceph_aio_request *aio_req = req->r_priv;
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
BUG_ON(!osd_data->num_bvecs);
+ BUG_ON(!aio_req);
dout("ceph_aio_complete_req %p rc %d bytes %u\n",
inode, rc, osd_data->bvec_pos.iter.bi_size);
+ /* r_start_stamp == 0 means the request was not submitted */
+ if (req->r_start_stamp && (rc >= 0 || rc == -ENOENT)) {
+ s64 latency = jiffies - req->r_start_stamp;
+
+ if (!aio_req->write)
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
+
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
BUG_ON(!aio_req->write);
@@ -933,6 +950,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_vino vino;
struct ceph_osd_request *req;
struct bio_vec *bvecs;
@@ -1049,6 +1067,13 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if ((ret >= 0 || ret == -ENOENT || ret == -ETIMEDOUT)) {
+ s64 latency = jiffies - req->r_start_stamp;
+
+ if (!write)
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
+
size = i_size_read(inode);
if (!write) {
if (ret == -ENOENT)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 141c1c03636c..dc2cda55a5a5 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4093,6 +4093,25 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
ceph_force_reconnect(fsc->sb);
}
+/*
+ * metric helpers
+ */
+void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
+ s64 latency)
+{
+ struct timespec64 ts;
+
+ if (!m)
+ return;
+
+ jiffies_to_timespec64(latency, &ts);
+
+ spin_lock(&m->read_lock);
+ atomic64_inc(&m->total_reads);
+ m->read_latency_sum = timespec64_add(m->read_latency_sum, ts);
+ spin_unlock(&m->read_lock);
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4182,13 +4201,17 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
atomic64_set(&metric->total_dentries, 0);
ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
if (ret)
- return ret;
+ return ret;;
ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
if (ret) {
percpu_counter_destroy(&metric->d_lease_hit);
return ret;
}
+ spin_lock_init(&metric->read_lock);
+ memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_reads, 0);
+
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ba74ff74c59c..cdc59037ef14 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -366,6 +366,10 @@ struct ceph_client_metric {
atomic64_t total_dentries;
struct percpu_counter d_lease_hit;
struct percpu_counter d_lease_mis;
+
+ spinlock_t read_lock;
+ atomic64_t total_reads;
+ struct timespec64 read_latency_sum;
};
/*
@@ -549,4 +553,7 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
int max_caps);
+
+extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
+ s64 latency);
#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 5a62dbd3f4c2..43e4240d88e7 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -515,7 +515,7 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
struct page **pages, int nr_pages,
- int page_align);
+ int page_align, s64 *latency);
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct ceph_vino vino,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b68b376d8c2f..62eb758f2474 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5238,11 +5238,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino, struct ceph_file_layout *layout,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
- struct page **pages, int num_pages, int page_align)
+ struct page **pages, int num_pages, int page_align,
+ s64 *latency)
{
struct ceph_osd_request *req;
int rc = 0;
+ if (latency)
+ *latency = 0;
+
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
@@ -5263,6 +5267,9 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
+ if (latency && (rc >= 0 || rc == -ENOENT || rc == -ETIMEDOUT))
+ *latency = jiffies - req->r_start_stamp;
+
ceph_osdc_put_request(req);
dout("readpages result %d\n", rc);
return rc;
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v4 3/8] ceph: add global read latency metric support
2020-01-16 10:38 ` [PATCH v4 3/8] ceph: add global read latency metric support xiubli
@ 2020-01-16 13:55 ` Jeff Layton
2020-01-16 14:50 ` Ilya Dryomov
0 siblings, 1 reply; 13+ messages in thread
From: Jeff Layton @ 2020-01-16 13:55 UTC (permalink / raw)
To: xiubli, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On Thu, 2020-01-16 at 05:38 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> item total sum_lat(us) avg_lat(us)
> -----------------------------------------------------
> read 73 3590000 49178082
>
> URL: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/addr.c | 15 ++++++++++++++-
> fs/ceph/debugfs.c | 13 +++++++++++++
> fs/ceph/file.c | 25 +++++++++++++++++++++++++
> fs/ceph/mds_client.c | 25 ++++++++++++++++++++++++-
> fs/ceph/mds_client.h | 7 +++++++
> include/linux/ceph/osd_client.h | 2 +-
> net/ceph/osd_client.c | 9 ++++++++-
> 7 files changed, 92 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 29d4513eff8c..479ecd0a6e9d 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -190,6 +190,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
> struct inode *inode = file_inode(filp);
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> + s64 latency;
> int err = 0;
> u64 off = page_offset(page);
> u64 len = PAGE_SIZE;
> @@ -221,7 +223,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
> err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
> &ci->i_layout, off, &len,
> ci->i_truncate_seq, ci->i_truncate_size,
> - &page, 1, 0);
> + &page, 1, 0, &latency);
> if (err == -ENOENT)
> err = 0;
> if (err < 0) {
> @@ -241,6 +243,9 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
> ceph_readpage_to_fscache(inode, page);
>
> out:
> + if (latency)
> + ceph_mdsc_update_read_latency(metric, latency);
> +
> return err < 0 ? err : 0;
> }
>
> @@ -260,6 +265,8 @@ static int ceph_readpage(struct file *filp, struct page *page)
> static void finish_read(struct ceph_osd_request *req)
> {
> struct inode *inode = req->r_inode;
> + struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> struct ceph_osd_data *osd_data;
> int rc = req->r_result <= 0 ? req->r_result : 0;
> int bytes = req->r_result >= 0 ? req->r_result : 0;
> @@ -297,6 +304,12 @@ static void finish_read(struct ceph_osd_request *req)
> put_page(page);
> bytes -= PAGE_SIZE;
> }
> +
> + if (rc >= 0 || rc == -ENOENT) {
> + s64 latency = jiffies - req->r_start_stamp;
> + ceph_mdsc_update_read_latency(metric, latency);
> + }
> +
> kfree(osd_data->pages);
> }
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index c132fdb40d53..8200bf025ccd 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -128,8 +128,21 @@ static int metric_show(struct seq_file *s, void *p)
> {
> struct ceph_fs_client *fsc = s->private;
> struct ceph_mds_client *mdsc = fsc->mdsc;
> + s64 total, sum, avg = 0;
> int i;
>
> + seq_printf(s, "item total sum_lat(us) avg_lat(us)\n");
> + seq_printf(s, "-----------------------------------------------------\n");
> +
> + spin_lock(&mdsc->metric.read_lock);
> + total = atomic64_read(&mdsc->metric.total_reads),
> + sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
> + spin_unlock(&mdsc->metric.read_lock);
> + avg = total ? sum / total : 0;
> + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read",
> + total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
> +
> + seq_printf(s, "\n");
> seq_printf(s, "item total miss hit\n");
> seq_printf(s, "-------------------------------------------------\n");
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index c78dfbbb7b91..f479b699db14 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -588,6 +588,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
> struct inode *inode = file_inode(file);
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> struct ceph_osd_client *osdc = &fsc->client->osdc;
> ssize_t ret;
> u64 off = iocb->ki_pos;
> @@ -660,6 +661,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
> ret = ceph_osdc_start_request(osdc, req, false);
> if (!ret)
> ret = ceph_osdc_wait_request(osdc, req);
> +
> + if (ret >= 0 || ret == -ENOENT || ret == -ETIMEDOUT) {
> + s64 latency = jiffies - req->r_start_stamp;
> + ceph_mdsc_update_read_latency(metric, latency);
> + }
> ceph_osdc_put_request(req);
>
> i_size = i_size_read(inode);
> @@ -798,13 +804,24 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
> struct inode *inode = req->r_inode;
> struct ceph_aio_request *aio_req = req->r_priv;
> struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
> + struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> + struct ceph_client_metric *metric = &fsc->mdsc->metric;
>
> BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
> BUG_ON(!osd_data->num_bvecs);
> + BUG_ON(!aio_req);
>
> dout("ceph_aio_complete_req %p rc %d bytes %u\n",
> inode, rc, osd_data->bvec_pos.iter.bi_size);
>
> + /* r_start_stamp == 0 means the request was not submitted */
> + if (req->r_start_stamp && (rc >= 0 || rc == -ENOENT)) {
> + s64 latency = jiffies - req->r_start_stamp;
> +
> + if (!aio_req->write)
> + ceph_mdsc_update_read_latency(metric, latency);
> + }
> +
> if (rc == -EOLDSNAPC) {
> struct ceph_aio_work *aio_work;
> BUG_ON(!aio_req->write);
> @@ -933,6 +950,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
> struct inode *inode = file_inode(file);
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> struct ceph_vino vino;
> struct ceph_osd_request *req;
> struct bio_vec *bvecs;
> @@ -1049,6 +1067,13 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
> if (!ret)
> ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
>
> + if ((ret >= 0 || ret == -ENOENT || ret == -ETIMEDOUT)) {
> + s64 latency = jiffies - req->r_start_stamp;
> +
> + if (!write)
> + ceph_mdsc_update_read_latency(metric, latency);
> + }
> +
> size = i_size_read(inode);
> if (!write) {
> if (ret == -ENOENT)
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 141c1c03636c..dc2cda55a5a5 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -4093,6 +4093,25 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
> ceph_force_reconnect(fsc->sb);
> }
>
> +/*
> + * metric helpers
> + */
> +void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
> + s64 latency)
> +{
> + struct timespec64 ts;
> +
> + if (!m)
> + return;
> +
> + jiffies_to_timespec64(latency, &ts);
> +
> + spin_lock(&m->read_lock);
> + atomic64_inc(&m->total_reads);
> + m->read_latency_sum = timespec64_add(m->read_latency_sum, ts);
> + spin_unlock(&m->read_lock);
> +}
> +
> /*
> * delayed work -- periodically trim expired leases, renew caps with mds
> */
> @@ -4182,13 +4201,17 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
> atomic64_set(&metric->total_dentries, 0);
> ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
> if (ret)
> - return ret;
> + return ret;;
> ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
> if (ret) {
> percpu_counter_destroy(&metric->d_lease_hit);
> return ret;
> }
>
> + spin_lock_init(&metric->read_lock);
> + memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
> + atomic64_set(&metric->total_reads, 0);
> +
> return 0;
> }
>
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index ba74ff74c59c..cdc59037ef14 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -366,6 +366,10 @@ struct ceph_client_metric {
> atomic64_t total_dentries;
> struct percpu_counter d_lease_hit;
> struct percpu_counter d_lease_mis;
> +
> + spinlock_t read_lock;
> + atomic64_t total_reads;
> + struct timespec64 read_latency_sum;
> };
>
> /*
> @@ -549,4 +553,7 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
> extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
> struct ceph_mds_session *session,
> int max_caps);
> +
> +extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
> + s64 latency);
> #endif
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 5a62dbd3f4c2..43e4240d88e7 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -515,7 +515,7 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
> u64 off, u64 *plen,
> u32 truncate_seq, u64 truncate_size,
> struct page **pages, int nr_pages,
> - int page_align);
> + int page_align, s64 *latency);
>
> extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
> struct ceph_vino vino,
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index b68b376d8c2f..62eb758f2474 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -5238,11 +5238,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
> struct ceph_vino vino, struct ceph_file_layout *layout,
> u64 off, u64 *plen,
> u32 truncate_seq, u64 truncate_size,
> - struct page **pages, int num_pages, int page_align)
> + struct page **pages, int num_pages, int page_align,
> + s64 *latency)
> {
> struct ceph_osd_request *req;
> int rc = 0;
>
> + if (latency)
> + *latency = 0;
> +
> dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
> vino.snap, off, *plen);
> req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
> @@ -5263,6 +5267,9 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
> if (!rc)
> rc = ceph_osdc_wait_request(osdc, req);
>
> + if (latency && (rc >= 0 || rc == -ENOENT || rc == -ETIMEDOUT))
> + *latency = jiffies - req->r_start_stamp;
> +
> ceph_osdc_put_request(req);
> dout("readpages result %d\n", rc);
> return rc;
This function is only called from ceph_do_readpage().
I think it'd be better to just turn ceph_osdc_readpages into
ceph_osdc_readpages_submit, make it not wait on the req and have it
return a pointer to the req with a reference held.
Then the caller could then handle the waiting and do the latency
calculation afterward and we wouldn't need to add a new pointer argument
here and push these details into libcephfs.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v4 3/8] ceph: add global read latency metric support
2020-01-16 13:55 ` Jeff Layton
@ 2020-01-16 14:50 ` Ilya Dryomov
0 siblings, 0 replies; 13+ messages in thread
From: Ilya Dryomov @ 2020-01-16 14:50 UTC (permalink / raw)
To: Jeff Layton
Cc: Xiubo Li, Yan, Zheng, Sage Weil, Patrick Donnelly, Ceph Development
On Thu, Jan 16, 2020 at 2:55 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Thu, 2020-01-16 at 05:38 -0500, xiubli@redhat.com wrote:
> > From: Xiubo Li <xiubli@redhat.com>
> >
> > item total sum_lat(us) avg_lat(us)
> > -----------------------------------------------------
> > read 73 3590000 49178082
> >
> > URL: https://tracker.ceph.com/issues/43215
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> > fs/ceph/addr.c | 15 ++++++++++++++-
> > fs/ceph/debugfs.c | 13 +++++++++++++
> > fs/ceph/file.c | 25 +++++++++++++++++++++++++
> > fs/ceph/mds_client.c | 25 ++++++++++++++++++++++++-
> > fs/ceph/mds_client.h | 7 +++++++
> > include/linux/ceph/osd_client.h | 2 +-
> > net/ceph/osd_client.c | 9 ++++++++-
> > 7 files changed, 92 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> > index 29d4513eff8c..479ecd0a6e9d 100644
> > --- a/fs/ceph/addr.c
> > +++ b/fs/ceph/addr.c
> > @@ -190,6 +190,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
> > struct inode *inode = file_inode(filp);
> > struct ceph_inode_info *ci = ceph_inode(inode);
> > struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> > + s64 latency;
> > int err = 0;
> > u64 off = page_offset(page);
> > u64 len = PAGE_SIZE;
> > @@ -221,7 +223,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
> > err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
> > &ci->i_layout, off, &len,
> > ci->i_truncate_seq, ci->i_truncate_size,
> > - &page, 1, 0);
> > + &page, 1, 0, &latency);
> > if (err == -ENOENT)
> > err = 0;
> > if (err < 0) {
> > @@ -241,6 +243,9 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
> > ceph_readpage_to_fscache(inode, page);
> >
> > out:
> > + if (latency)
> > + ceph_mdsc_update_read_latency(metric, latency);
> > +
> > return err < 0 ? err : 0;
> > }
> >
> > @@ -260,6 +265,8 @@ static int ceph_readpage(struct file *filp, struct page *page)
> > static void finish_read(struct ceph_osd_request *req)
> > {
> > struct inode *inode = req->r_inode;
> > + struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> > struct ceph_osd_data *osd_data;
> > int rc = req->r_result <= 0 ? req->r_result : 0;
> > int bytes = req->r_result >= 0 ? req->r_result : 0;
> > @@ -297,6 +304,12 @@ static void finish_read(struct ceph_osd_request *req)
> > put_page(page);
> > bytes -= PAGE_SIZE;
> > }
> > +
> > + if (rc >= 0 || rc == -ENOENT) {
> > + s64 latency = jiffies - req->r_start_stamp;
> > + ceph_mdsc_update_read_latency(metric, latency);
> > + }
> > +
> > kfree(osd_data->pages);
> > }
> >
> > diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> > index c132fdb40d53..8200bf025ccd 100644
> > --- a/fs/ceph/debugfs.c
> > +++ b/fs/ceph/debugfs.c
> > @@ -128,8 +128,21 @@ static int metric_show(struct seq_file *s, void *p)
> > {
> > struct ceph_fs_client *fsc = s->private;
> > struct ceph_mds_client *mdsc = fsc->mdsc;
> > + s64 total, sum, avg = 0;
> > int i;
> >
> > + seq_printf(s, "item total sum_lat(us) avg_lat(us)\n");
> > + seq_printf(s, "-----------------------------------------------------\n");
> > +
> > + spin_lock(&mdsc->metric.read_lock);
> > + total = atomic64_read(&mdsc->metric.total_reads),
> > + sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
> > + spin_unlock(&mdsc->metric.read_lock);
> > + avg = total ? sum / total : 0;
> > + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read",
> > + total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
> > +
> > + seq_printf(s, "\n");
> > seq_printf(s, "item total miss hit\n");
> > seq_printf(s, "-------------------------------------------------\n");
> >
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index c78dfbbb7b91..f479b699db14 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -588,6 +588,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
> > struct inode *inode = file_inode(file);
> > struct ceph_inode_info *ci = ceph_inode(inode);
> > struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> > struct ceph_osd_client *osdc = &fsc->client->osdc;
> > ssize_t ret;
> > u64 off = iocb->ki_pos;
> > @@ -660,6 +661,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
> > ret = ceph_osdc_start_request(osdc, req, false);
> > if (!ret)
> > ret = ceph_osdc_wait_request(osdc, req);
> > +
> > + if (ret >= 0 || ret == -ENOENT || ret == -ETIMEDOUT) {
> > + s64 latency = jiffies - req->r_start_stamp;
> > + ceph_mdsc_update_read_latency(metric, latency);
> > + }
> > ceph_osdc_put_request(req);
> >
> > i_size = i_size_read(inode);
> > @@ -798,13 +804,24 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
> > struct inode *inode = req->r_inode;
> > struct ceph_aio_request *aio_req = req->r_priv;
> > struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
> > + struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> >
> > BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
> > BUG_ON(!osd_data->num_bvecs);
> > + BUG_ON(!aio_req);
> >
> > dout("ceph_aio_complete_req %p rc %d bytes %u\n",
> > inode, rc, osd_data->bvec_pos.iter.bi_size);
> >
> > + /* r_start_stamp == 0 means the request was not submitted */
> > + if (req->r_start_stamp && (rc >= 0 || rc == -ENOENT)) {
> > + s64 latency = jiffies - req->r_start_stamp;
> > +
> > + if (!aio_req->write)
> > + ceph_mdsc_update_read_latency(metric, latency);
> > + }
> > +
> > if (rc == -EOLDSNAPC) {
> > struct ceph_aio_work *aio_work;
> > BUG_ON(!aio_req->write);
> > @@ -933,6 +950,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
> > struct inode *inode = file_inode(file);
> > struct ceph_inode_info *ci = ceph_inode(inode);
> > struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > + struct ceph_client_metric *metric = &fsc->mdsc->metric;
> > struct ceph_vino vino;
> > struct ceph_osd_request *req;
> > struct bio_vec *bvecs;
> > @@ -1049,6 +1067,13 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
> > if (!ret)
> > ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
> >
> > + if ((ret >= 0 || ret == -ENOENT || ret == -ETIMEDOUT)) {
> > + s64 latency = jiffies - req->r_start_stamp;
> > +
> > + if (!write)
> > + ceph_mdsc_update_read_latency(metric, latency);
> > + }
> > +
> > size = i_size_read(inode);
> > if (!write) {
> > if (ret == -ENOENT)
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 141c1c03636c..dc2cda55a5a5 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -4093,6 +4093,25 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
> > ceph_force_reconnect(fsc->sb);
> > }
> >
> > +/*
> > + * metric helpers
> > + */
> > +void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
> > + s64 latency)
> > +{
> > + struct timespec64 ts;
> > +
> > + if (!m)
> > + return;
> > +
> > + jiffies_to_timespec64(latency, &ts);
> > +
> > + spin_lock(&m->read_lock);
> > + atomic64_inc(&m->total_reads);
> > + m->read_latency_sum = timespec64_add(m->read_latency_sum, ts);
> > + spin_unlock(&m->read_lock);
> > +}
> > +
> > /*
> > * delayed work -- periodically trim expired leases, renew caps with mds
> > */
> > @@ -4182,13 +4201,17 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
> > atomic64_set(&metric->total_dentries, 0);
> > ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
> > if (ret)
> > - return ret;
> > + return ret;;
> > ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
> > if (ret) {
> > percpu_counter_destroy(&metric->d_lease_hit);
> > return ret;
> > }
> >
> > + spin_lock_init(&metric->read_lock);
> > + memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
> > + atomic64_set(&metric->total_reads, 0);
> > +
> > return 0;
> > }
> >
> > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > index ba74ff74c59c..cdc59037ef14 100644
> > --- a/fs/ceph/mds_client.h
> > +++ b/fs/ceph/mds_client.h
> > @@ -366,6 +366,10 @@ struct ceph_client_metric {
> > atomic64_t total_dentries;
> > struct percpu_counter d_lease_hit;
> > struct percpu_counter d_lease_mis;
> > +
> > + spinlock_t read_lock;
> > + atomic64_t total_reads;
> > + struct timespec64 read_latency_sum;
> > };
> >
> > /*
> > @@ -549,4 +553,7 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
> > extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
> > struct ceph_mds_session *session,
> > int max_caps);
> > +
> > +extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
> > + s64 latency);
> > #endif
> > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> > index 5a62dbd3f4c2..43e4240d88e7 100644
> > --- a/include/linux/ceph/osd_client.h
> > +++ b/include/linux/ceph/osd_client.h
> > @@ -515,7 +515,7 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
> > u64 off, u64 *plen,
> > u32 truncate_seq, u64 truncate_size,
> > struct page **pages, int nr_pages,
> > - int page_align);
> > + int page_align, s64 *latency);
> >
> > extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
> > struct ceph_vino vino,
> > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> > index b68b376d8c2f..62eb758f2474 100644
> > --- a/net/ceph/osd_client.c
> > +++ b/net/ceph/osd_client.c
> > @@ -5238,11 +5238,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
> > struct ceph_vino vino, struct ceph_file_layout *layout,
> > u64 off, u64 *plen,
> > u32 truncate_seq, u64 truncate_size,
> > - struct page **pages, int num_pages, int page_align)
> > + struct page **pages, int num_pages, int page_align,
> > + s64 *latency)
> > {
> > struct ceph_osd_request *req;
> > int rc = 0;
> >
> > + if (latency)
> > + *latency = 0;
> > +
> > dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
> > vino.snap, off, *plen);
> > req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
> > @@ -5263,6 +5267,9 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
> > if (!rc)
> > rc = ceph_osdc_wait_request(osdc, req);
> >
> > + if (latency && (rc >= 0 || rc == -ENOENT || rc == -ETIMEDOUT))
> > + *latency = jiffies - req->r_start_stamp;
> > +
> > ceph_osdc_put_request(req);
> > dout("readpages result %d\n", rc);
> > return rc;
>
> This function is only called from ceph_do_readpage().
>
> I think it'd be better to just turn ceph_osdc_readpages into
> ceph_osdc_readpages_submit, make it not wait on the req and have it
> return a pointer to the req with a reference held.
>
> Then the caller could then handle the waiting and do the latency
> calculation afterward and we wouldn't need to add a new pointer argument
> here and push these details into libcephfs.
Same as for ceph_osdc_writepages(), let's keep the existing name.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 4/8] ceph: add global write latency metric support
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
` (2 preceding siblings ...)
2020-01-16 10:38 ` [PATCH v4 3/8] ceph: add global read latency metric support xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 10:38 ` [PATCH v4 5/8] ceph: add global metadata perf " xiubli
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
item total sum_lat(us) avg_lat(us)
-----------------------------------------------------
write 222 5287750000 23818693
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/addr.c | 23 +++++++++++++++++++++--
fs/ceph/debugfs.c | 8 ++++++++
fs/ceph/file.c | 11 ++++++++++-
fs/ceph/mds_client.c | 20 ++++++++++++++++++++
fs/ceph/mds_client.h | 6 ++++++
include/linux/ceph/osd_client.h | 3 ++-
net/ceph/osd_client.c | 9 ++++++++-
7 files changed, 75 insertions(+), 5 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 479ecd0a6e9d..d29bf1548b99 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -598,12 +598,15 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
loff_t page_off = page_offset(page);
int err, len = PAGE_SIZE;
struct ceph_writeback_ctl ceph_wbc;
+ struct ceph_client_metric *metric;
+ s64 latency;
dout("writepage %p idx %lu\n", page, page->index);
inode = page->mapping->host;
ci = ceph_inode(inode);
fsc = ceph_inode_to_client(inode);
+ metric = &fsc->mdsc->metric;
/* verify this is a writeable snap context */
snapc = page_snap_context(page);
@@ -645,7 +648,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
&ci->i_layout, snapc, page_off, len,
ceph_wbc.truncate_seq,
ceph_wbc.truncate_size,
- &inode->i_mtime, &page, 1);
+ &inode->i_mtime, &page, 1,
+ &latency);
+ if (latency)
+ ceph_mdsc_update_write_latency(metric, latency);
+
if (err < 0) {
struct writeback_control tmp_wbc;
if (!wbc)
@@ -707,6 +714,8 @@ static void writepages_finish(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_data *osd_data;
struct page *page;
int num_pages, total_pages = 0;
@@ -714,7 +723,6 @@ static void writepages_finish(struct ceph_osd_request *req)
int rc = req->r_result;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
bool remove_page;
dout("writepages_finish %p rc %d\n", inode, rc);
@@ -783,6 +791,11 @@ static void writepages_finish(struct ceph_osd_request *req)
ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
else
kfree(osd_data->pages);
+
+ if (!rc) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_write_latency(metric, latency);
+ }
ceph_osdc_put_request(req);
}
@@ -1675,6 +1688,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_request *req;
struct page *page = NULL;
u64 len, inline_version;
@@ -1787,6 +1801,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+ if (!err || err == -ETIMEDOUT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_write_latency(metric, latency);
+ }
out_put:
ceph_osdc_put_request(req);
if (err == -ECANCELED)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 8200bf025ccd..3fdb15af0a83 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -142,6 +142,14 @@ static int metric_show(struct seq_file *s, void *p)
seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read",
total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+ spin_lock(&mdsc->metric.write_lock);
+ total = atomic64_read(&mdsc->metric.total_writes),
+ sum = timespec64_to_ns(&mdsc->metric.write_latency_sum);
+ spin_unlock(&mdsc->metric.write_lock);
+ avg = total ? sum / total : 0;
+ seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write",
+ total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+
seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f479b699db14..1dbfbc47f5e1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -818,7 +818,9 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
if (req->r_start_stamp && (rc >= 0 || rc == -ENOENT)) {
s64 latency = jiffies - req->r_start_stamp;
- if (!aio_req->write)
+ if (aio_req->write)
+ ceph_mdsc_update_write_latency(metric, latency);
+ else
ceph_mdsc_update_read_latency(metric, latency);
}
@@ -1072,6 +1074,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (!write)
ceph_mdsc_update_read_latency(metric, latency);
+ else if (write && ret != -ENOENT)
+ ceph_mdsc_update_write_latency(metric, latency);
}
size = i_size_read(inode);
@@ -1160,6 +1164,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_vino vino;
struct ceph_osd_request *req;
struct page **pages;
@@ -1245,6 +1250,10 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if (!ret || ret == -ETIMEDOUT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_write_latency(metric, latency);
+ }
out:
ceph_osdc_put_request(req);
if (ret != 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index dc2cda55a5a5..2569f9303c0c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4112,6 +4112,22 @@ void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
spin_unlock(&m->read_lock);
}
+void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
+ s64 latency)
+{
+ struct timespec64 ts;
+
+ if (!m)
+ return;
+
+ jiffies_to_timespec64(latency, &ts);
+
+ spin_lock(&m->write_lock);
+ atomic64_inc(&m->total_writes);
+ m->write_latency_sum = timespec64_add(m->write_latency_sum, ts);
+ spin_unlock(&m->write_lock);
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4212,6 +4228,10 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
atomic64_set(&metric->total_reads, 0);
+ spin_lock_init(&metric->write_lock);
+ memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_writes, 0);
+
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index cdc59037ef14..104b21e4b06c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -370,6 +370,10 @@ struct ceph_client_metric {
spinlock_t read_lock;
atomic64_t total_reads;
struct timespec64 read_latency_sum;
+
+ spinlock_t write_lock;
+ atomic64_t total_writes;
+ struct timespec64 write_latency_sum;
};
/*
@@ -556,4 +560,6 @@ extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
s64 latency);
+extern void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
+ s64 latency);
#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 43e4240d88e7..e73439d18f28 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -524,7 +524,8 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
- struct page **pages, int nr_pages);
+ struct page **pages, int nr_pages,
+ s64 *latency);
int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
u64 src_snapid, u64 src_version,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 62eb758f2474..9f6833ab733c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5285,12 +5285,16 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
- struct page **pages, int num_pages)
+ struct page **pages, int num_pages,
+ s64 *latency)
{
struct ceph_osd_request *req;
int rc = 0;
int page_align = off & ~PAGE_MASK;
+ if (latency)
+ *latency = 0;
+
req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
@@ -5308,6 +5312,9 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
+ if (latency && (!rc || rc == -ETIMEDOUT))
+ *latency = jiffies - req->r_start_stamp;
+
ceph_osdc_put_request(req);
if (rc == 0)
rc = len;
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v4 5/8] ceph: add global metadata perf metric support
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
` (3 preceding siblings ...)
2020-01-16 10:38 ` [PATCH v4 4/8] ceph: add global write " xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 10:38 ` [PATCH v4 6/8] ceph: periodically send perf metrics to MDS xiubli
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
item total sum_lat(us) avg_lat(us)
-----------------------------------------------------
metadata 1288 24506000 19026
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 8 ++++++++
fs/ceph/mds_client.c | 25 +++++++++++++++++++++++++
fs/ceph/mds_client.h | 6 ++++++
3 files changed, 39 insertions(+)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3fdb15af0a83..df8c1cc685d9 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -150,6 +150,14 @@ static int metric_show(struct seq_file *s, void *p)
seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write",
total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+ spin_lock(&mdsc->metric.metadata_lock);
+ total = atomic64_read(&mdsc->metric.total_metadatas),
+ sum = timespec64_to_ns(&mdsc->metric.metadata_latency_sum);
+ spin_unlock(&mdsc->metric.metadata_lock);
+ avg = total ? sum / total : 0;
+ seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata",
+ total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+
seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2569f9303c0c..409dcb7990aa 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2903,6 +2903,11 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
result = le32_to_cpu(head->result);
+ if (!result || result == -ENOENT) {
+ s64 latency = jiffies - req->r_started;
+ ceph_mdsc_update_metadata_latency(&mdsc->metric, latency);
+ }
+
/*
* Handle an ESTALE
* if we're not talking to the authority, send to them
@@ -4128,6 +4133,22 @@ void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
spin_unlock(&m->write_lock);
}
+void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
+ s64 latency)
+{
+ struct timespec64 ts;
+
+ if (!m)
+ return;
+
+ jiffies_to_timespec64(latency, &ts);
+
+ spin_lock(&m->metadata_lock);
+ atomic64_inc(&m->total_metadatas);
+ m->metadata_latency_sum = timespec64_add(m->metadata_latency_sum, ts);
+ spin_unlock(&m->metadata_lock);
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4232,6 +4253,10 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
atomic64_set(&metric->total_writes, 0);
+ spin_lock_init(&metric->metadata_lock);
+ memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_metadatas, 0);
+
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 104b21e4b06c..60bac2b96577 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -374,6 +374,10 @@ struct ceph_client_metric {
spinlock_t write_lock;
atomic64_t total_writes;
struct timespec64 write_latency_sum;
+
+ spinlock_t metadata_lock;
+ atomic64_t total_metadatas;
+ struct timespec64 metadata_latency_sum;
};
/*
@@ -562,4 +566,6 @@ extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
s64 latency);
extern void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
s64 latency);
+extern void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
+ s64 latency);
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v4 6/8] ceph: periodically send perf metrics to MDS
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
` (4 preceding siblings ...)
2020-01-16 10:38 ` [PATCH v4 5/8] ceph: add global metadata perf " xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 10:38 ` [PATCH v4 7/8] ceph: add reset metrics support xiubli
2020-01-16 10:38 ` [PATCH v4 8/8] ceph: send client provided metric flags in client metadata xiubli
7 siblings, 0 replies; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Add enable/disable sending metrics to MDS debugfs and disabled as
default, if it's enabled the kclient will send metrics every
second.
This will send global dentry lease hit/miss and read/write/metadata
latency metrics and each session's caps hit/miss metric to MDS.
Every time only sends the global metrics once via any availible
session.
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 44 +++++++-
fs/ceph/mds_client.c | 205 ++++++++++++++++++++++++++++++++---
fs/ceph/mds_client.h | 3 +
fs/ceph/super.h | 1 +
include/linux/ceph/ceph_fs.h | 77 +++++++++++++
5 files changed, 312 insertions(+), 18 deletions(-)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index df8c1cc685d9..bb96fb4d04c4 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,40 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+/*
+ * metrics debugfs
+ */
+static int sending_metrics_set(void *data, u64 val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ if (val > 1) {
+ pr_err("Invalid sending metrics set value %llu\n", val);
+ return -EINVAL;
+ }
+
+ mutex_lock(&mdsc->mutex);
+ mdsc->sending_metrics = (unsigned int)val;
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+
+static int sending_metrics_get(void *data, u64 *val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ mutex_lock(&mdsc->mutex);
+ *val = (u64)mdsc->sending_metrics;
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
+ sending_metrics_set, "%llu\n");
+
static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
@@ -308,11 +342,9 @@ static int congestion_kb_get(void *data, u64 *val)
*val = (u64)fsc->mount_options->congestion_kb;
return 0;
}
-
DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
congestion_kb_set, "%llu\n");
-
void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
{
dout("ceph_fs_debugfs_cleanup\n");
@@ -322,6 +354,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_mds_sessions);
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_metric);
+ debugfs_remove(fsc->debugfs_sending_metrics);
debugfs_remove(fsc->debugfs_mdsc);
}
@@ -362,6 +395,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&mdsc_show_fops);
+ fsc->debugfs_sending_metrics =
+ debugfs_create_file("sending_metrics",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &sending_metrics_fops);
+
fsc->debugfs_metric = debugfs_create_file("metrics",
0400,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 409dcb7990aa..3fc2bdb1153a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4149,13 +4149,162 @@ void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
spin_unlock(&m->metadata_lock);
}
+/*
+ * called under s_mutex
+ */
+static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s,
+ bool skip_global)
+{
+ struct ceph_metric_head *head;
+ struct ceph_metric_cap *cap;
+ struct ceph_metric_dentry_lease *lease;
+ struct ceph_metric_read_latency *read;
+ struct ceph_metric_write_latency *write;
+ struct ceph_metric_metadata_latency *meta;
+ struct ceph_msg *msg;
+ struct timespec64 ts;
+ s32 len = sizeof(*head) + sizeof(*cap);
+ s64 sum, total, avg;
+ s32 items = 0;
+
+ if (!mdsc || !s)
+ return false;
+
+ if (!skip_global) {
+ len += sizeof(*lease);
+ len += sizeof(*read);
+ len += sizeof(*write);
+ len += sizeof(*meta);
+ }
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
+ if (!msg) {
+ pr_err("send metrics to mds%d, failed to allocate message\n",
+ s->s_mds);
+ return false;
+ }
+
+ head = msg->front.iov_base;
+
+ /* encode the cap metric */
+ cap = (struct ceph_metric_cap *)(head + 1);
+ cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+ cap->ver = 1;
+ cap->campat = 1;
+ cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
+ cap->hit = cpu_to_le64(percpu_counter_sum(&s->i_caps_hit));
+ cap->mis = cpu_to_le64(percpu_counter_sum(&s->i_caps_mis));
+ cap->total = cpu_to_le64(s->s_nr_caps);
+ items++;
+
+ dout("cap metric hit %lld, mis %lld, total caps %lld",
+ le64_to_cpu(cap->hit), le64_to_cpu(cap->mis),
+ le64_to_cpu(cap->total));
+
+ /* only send the global once */
+ if (skip_global)
+ goto skip_global;
+
+ /* encode the dentry lease metric */
+ lease = (struct ceph_metric_dentry_lease *)(cap + 1);
+ lease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+ lease->ver = 1;
+ lease->campat = 1;
+ lease->data_len = cpu_to_le32(sizeof(*lease) - 10);
+ lease->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_hit));
+ lease->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_mis));
+ lease->total = cpu_to_le64(atomic64_read(&mdsc->metric.total_dentries));
+ items++;
+
+ dout("dentry lease metric hit %lld, mis %lld, total dentries %lld",
+ le64_to_cpu(lease->hit), le64_to_cpu(lease->mis),
+ le64_to_cpu(lease->total));
+
+ /* encode the read latency metric */
+ read = (struct ceph_metric_read_latency *)(lease + 1);
+ read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+ read->ver = 1;
+ read->campat = 1;
+ read->data_len = cpu_to_le32(sizeof(*read) - 10);
+ spin_lock(&mdsc->metric.read_lock);
+ total = atomic64_read(&mdsc->metric.total_reads),
+ sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
+ spin_unlock(&mdsc->metric.read_lock);
+ avg = total ? sum / total : 0;
+ ts = ns_to_timespec64(avg);
+ read->sec = cpu_to_le32(ts.tv_sec);
+ read->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ dout("read latency metric total %lld, sum lat %lld, avg lat %lld",
+ total, sum, avg);
+
+ /* encode the write latency metric */
+ write = (struct ceph_metric_write_latency *)(read + 1);
+ write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+ write->ver = 1;
+ write->campat = 1;
+ write->data_len = cpu_to_le32(sizeof(*write) - 10);
+ spin_lock(&mdsc->metric.write_lock);
+ total = atomic64_read(&mdsc->metric.total_writes),
+ sum = timespec64_to_ns(&mdsc->metric.write_latency_sum);
+ spin_unlock(&mdsc->metric.write_lock);
+ avg = total ? sum / total : 0;
+ ts = ns_to_timespec64(avg);
+ write->sec = cpu_to_le32(ts.tv_sec);
+ write->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ dout("write latency metric total %lld, sum lat %lld, avg lat %lld",
+ total, sum, avg);
+
+ /* encode the metadata latency metric */
+ meta = (struct ceph_metric_metadata_latency *)(write + 1);
+ meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+ meta->ver = 1;
+ meta->campat = 1;
+ meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
+ spin_lock(&mdsc->metric.metadata_lock);
+ total = atomic64_read(&mdsc->metric.total_metadatas),
+ sum = timespec64_to_ns(&mdsc->metric.metadata_latency_sum);
+ spin_unlock(&mdsc->metric.metadata_lock);
+ avg = total ? sum / total : 0;
+ ts = ns_to_timespec64(avg);
+ meta->sec = cpu_to_le32(ts.tv_sec);
+ meta->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ dout("metadata latency metric total %lld, sum lat %lld, avg lat %lld",
+ total, sum, avg);
+
+skip_global:
+ put_unaligned_le32(items, &head->num);
+ msg->front.iov_len = cpu_to_le32(len);
+ msg->hdr.version = cpu_to_le16(1);
+ msg->hdr.compat_version = cpu_to_le16(1);
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+ dout("send metrics to mds%d %p\n", s->s_mds, msg);
+ ceph_con_send(&s->s_con, msg);
+
+ return true;
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
+#define CEPH_WORK_DELAY_DEF 5
static void schedule_delayed(struct ceph_mds_client *mdsc)
{
- int delay = 5;
- unsigned hz = round_jiffies_relative(HZ * delay);
+ unsigned int hz;
+ int delay = CEPH_WORK_DELAY_DEF;
+
+ mutex_lock(&mdsc->mutex);
+ if (mdsc->sending_metrics)
+ delay = 1;
+ mutex_unlock(&mdsc->mutex);
+
+ hz = round_jiffies_relative(HZ * delay);
schedule_delayed_work(&mdsc->delayed_work, hz);
}
@@ -4166,18 +4315,28 @@ static void delayed_work(struct work_struct *work)
container_of(work, struct ceph_mds_client, delayed_work.work);
int renew_interval;
int renew_caps;
+ bool metric_only;
+ bool sending_metrics;
+ bool g_skip = false;
dout("mdsc delayed_work\n");
mutex_lock(&mdsc->mutex);
- renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
- renew_caps = time_after_eq(jiffies, HZ*renew_interval +
- mdsc->last_renew_caps);
- if (renew_caps)
- mdsc->last_renew_caps = jiffies;
+ sending_metrics = !!mdsc->sending_metrics;
+ metric_only = mdsc->sending_metrics &&
+ (mdsc->ticks++ % CEPH_WORK_DELAY_DEF);
+
+ if (!metric_only) {
+ renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+ renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+ mdsc->last_renew_caps);
+ if (renew_caps)
+ mdsc->last_renew_caps = jiffies;
+ }
for (i = 0; i < mdsc->max_sessions; i++) {
struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+
if (!s)
continue;
if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4203,13 +4362,20 @@ static void delayed_work(struct work_struct *work)
mutex_unlock(&mdsc->mutex);
mutex_lock(&s->s_mutex);
- if (renew_caps)
- send_renew_caps(mdsc, s);
- else
- ceph_con_keepalive(&s->s_con);
- if (s->s_state == CEPH_MDS_SESSION_OPEN ||
- s->s_state == CEPH_MDS_SESSION_HUNG)
- ceph_send_cap_releases(mdsc, s);
+
+ if (sending_metrics)
+ g_skip = ceph_mdsc_send_metrics(mdsc, s, g_skip);
+
+ if (!metric_only) {
+ if (renew_caps)
+ send_renew_caps(mdsc, s);
+ else
+ ceph_con_keepalive(&s->s_con);
+ if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+ s->s_state == CEPH_MDS_SESSION_HUNG)
+ ceph_send_cap_releases(mdsc, s);
+ }
+
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
@@ -4217,6 +4383,9 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
+ if (metric_only)
+ goto delay_work;
+
ceph_check_delayed_caps(mdsc);
ceph_queue_cap_reclaim_work(mdsc);
@@ -4225,11 +4394,13 @@ static void delayed_work(struct work_struct *work)
maybe_recover_session(mdsc);
+delay_work:
schedule_delayed(mdsc);
}
-static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+static int ceph_mdsc_metric_init(struct ceph_mds_client *mdsc)
{
+ struct ceph_client_metric *metric = &mdsc->metric;
int ret;
if (!metric)
@@ -4257,6 +4428,8 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
atomic64_set(&metric->total_metadatas, 0);
+ mdsc->sending_metrics = 0;
+ mdsc->ticks = 0;
return 0;
}
@@ -4313,7 +4486,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_waitqueue_head(&mdsc->cap_flushing_wq);
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
atomic_set(&mdsc->cap_reclaim_pending, 0);
- err = ceph_mdsc_metric_init(&mdsc->metric);
+ err = ceph_mdsc_metric_init(mdsc);
if (err)
goto err_mdsmap;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 60bac2b96577..153ca6aa6d2c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -468,6 +468,9 @@ struct ceph_mds_client {
struct list_head dentry_leases; /* fifo list */
struct list_head dentry_dir_leases; /* lru list */
+ /* metrics */
+ unsigned int sending_metrics;
+ unsigned int ticks;
struct ceph_client_metric metric;
spinlock_t snapid_map_lock;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3f4829222528..a91431e9bdf7 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -128,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_congestion_kb;
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+ struct dentry *debugfs_sending_metrics;
struct dentry *debugfs_metric;
struct dentry *debugfs_mds_sessions;
#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index a099f60feb7b..115a4dee379b 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -130,6 +130,7 @@ struct ceph_dir_layout {
#define CEPH_MSG_CLIENT_REQUEST 24
#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
#define CEPH_MSG_CLIENT_REPLY 26
+#define CEPH_MSG_CLIENT_METRICS 29
#define CEPH_MSG_CLIENT_CAPS 0x310
#define CEPH_MSG_CLIENT_LEASE 0x311
#define CEPH_MSG_CLIENT_SNAP 0x312
@@ -761,6 +762,82 @@ struct ceph_mds_lease {
} __attribute__ ((packed));
/* followed by a __le32+string for dname */
+enum ceph_metric_type {
+ CLIENT_METRIC_TYPE_CAP_INFO,
+ CLIENT_METRIC_TYPE_READ_LATENCY,
+ CLIENT_METRIC_TYPE_WRITE_LATENCY,
+ CLIENT_METRIC_TYPE_METADATA_LATENCY,
+ CLIENT_METRIC_TYPE_DENTRY_LEASE,
+
+ CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+};
+
+/* metric caps header */
+struct ceph_metric_cap {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(hit + mis + total) */
+ __le64 hit;
+ __le64 mis;
+ __le64 total;
+} __attribute__ ((packed));
+
+/* metric dentry lease header */
+struct ceph_metric_dentry_lease {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(hit + mis + total) */
+ __le64 hit;
+ __le64 mis;
+ __le64 total;
+} __attribute__ ((packed));
+
+/* metric read latency header */
+struct ceph_metric_read_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __attribute__ ((packed));
+
+/* metric write latency header */
+struct ceph_metric_write_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __attribute__ ((packed));
+
+/* metric metadata latency header */
+struct ceph_metric_metadata_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __attribute__ ((packed));
+
+struct ceph_metric_head {
+ __le32 num; /* the number of metrics will be sent */
+} __attribute__ ((packed));
+
/* client reconnect */
struct ceph_mds_cap_reconnect {
__le64 cap_id;
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v4 7/8] ceph: add reset metrics support
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
` (5 preceding siblings ...)
2020-01-16 10:38 ` [PATCH v4 6/8] ceph: periodically send perf metrics to MDS xiubli
@ 2020-01-16 10:38 ` xiubli
2020-01-16 15:02 ` Ilya Dryomov
2020-01-16 10:38 ` [PATCH v4 8/8] ceph: send client provided metric flags in client metadata xiubli
7 siblings, 1 reply; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
This will reset the most metric counters, except the cap and dentry
total numbers.
Sometimes we need to discard the old metrics and start to get new
metrics.
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++
fs/ceph/super.h | 1 +
2 files changed, 58 insertions(+)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index bb96fb4d04c4..c24a704d4e99 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -158,6 +158,55 @@ static int sending_metrics_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
sending_metrics_set, "%llu\n");
+static int reset_metrics_set(void *data, u64 val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_client_metric *metric = &mdsc->metric;
+ int i;
+
+ if (val != 1) {
+ pr_err("Invalid reset metrics set value %llu\n", val);
+ return -EINVAL;
+ }
+
+ percpu_counter_set(&metric->d_lease_hit, 0);
+ percpu_counter_set(&metric->d_lease_mis, 0);
+
+ spin_lock(&metric->read_lock);
+ memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_reads, 0),
+ spin_unlock(&metric->read_lock);
+
+ spin_lock(&metric->write_lock);
+ memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_writes, 0),
+ spin_unlock(&metric->write_lock);
+
+ spin_lock(&metric->metadata_lock);
+ memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_metadatas, 0),
+ spin_unlock(&metric->metadata_lock);
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ percpu_counter_set(&session->i_caps_hit, 0);
+ percpu_counter_set(&session->i_caps_mis, 0);
+ ceph_put_mds_session(session);
+ }
+
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(reset_metrics_fops, NULL, reset_metrics_set, "%llu\n");
+
static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
@@ -355,6 +404,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_metric);
debugfs_remove(fsc->debugfs_sending_metrics);
+ debugfs_remove(fsc->debugfs_reset_metrics);
debugfs_remove(fsc->debugfs_mdsc);
}
@@ -402,6 +452,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&sending_metrics_fops);
+ fsc->debugfs_reset_metrics =
+ debugfs_create_file("reset_metrics",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &reset_metrics_fops);
+
fsc->debugfs_metric = debugfs_create_file("metrics",
0400,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a91431e9bdf7..d24929f1c4bf 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -129,6 +129,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_sending_metrics;
+ struct dentry *debugfs_reset_metrics;
struct dentry *debugfs_metric;
struct dentry *debugfs_mds_sessions;
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v4 7/8] ceph: add reset metrics support
2020-01-16 10:38 ` [PATCH v4 7/8] ceph: add reset metrics support xiubli
@ 2020-01-16 15:02 ` Ilya Dryomov
2020-01-17 1:57 ` Xiubo Li
0 siblings, 1 reply; 13+ messages in thread
From: Ilya Dryomov @ 2020-01-16 15:02 UTC (permalink / raw)
To: Xiubo Li
Cc: Jeff Layton, Yan, Zheng, Sage Weil, Patrick Donnelly, Ceph Development
On Thu, Jan 16, 2020 at 11:39 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> This will reset the most metric counters, except the cap and dentry
> total numbers.
>
> Sometimes we need to discard the old metrics and start to get new
> metrics.
>
> URL: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++
> fs/ceph/super.h | 1 +
> 2 files changed, 58 insertions(+)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index bb96fb4d04c4..c24a704d4e99 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -158,6 +158,55 @@ static int sending_metrics_get(void *data, u64 *val)
> DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
> sending_metrics_set, "%llu\n");
>
> +static int reset_metrics_set(void *data, u64 val)
> +{
> + struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
> + struct ceph_mds_client *mdsc = fsc->mdsc;
> + struct ceph_client_metric *metric = &mdsc->metric;
> + int i;
> +
> + if (val != 1) {
> + pr_err("Invalid reset metrics set value %llu\n", val);
> + return -EINVAL;
> + }
> +
> + percpu_counter_set(&metric->d_lease_hit, 0);
> + percpu_counter_set(&metric->d_lease_mis, 0);
> +
> + spin_lock(&metric->read_lock);
> + memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
> + atomic64_set(&metric->total_reads, 0),
> + spin_unlock(&metric->read_lock);
> +
> + spin_lock(&metric->write_lock);
> + memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
> + atomic64_set(&metric->total_writes, 0),
> + spin_unlock(&metric->write_lock);
> +
> + spin_lock(&metric->metadata_lock);
> + memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
> + atomic64_set(&metric->total_metadatas, 0),
> + spin_unlock(&metric->metadata_lock);
> +
> + mutex_lock(&mdsc->mutex);
> + for (i = 0; i < mdsc->max_sessions; i++) {
> + struct ceph_mds_session *session;
> +
> + session = __ceph_lookup_mds_session(mdsc, i);
> + if (!session)
> + continue;
> + percpu_counter_set(&session->i_caps_hit, 0);
> + percpu_counter_set(&session->i_caps_mis, 0);
> + ceph_put_mds_session(session);
> + }
> +
> + mutex_unlock(&mdsc->mutex);
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(reset_metrics_fops, NULL, reset_metrics_set, "%llu\n");
> +
> static int metric_show(struct seq_file *s, void *p)
> {
> struct ceph_fs_client *fsc = s->private;
> @@ -355,6 +404,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
> debugfs_remove(fsc->debugfs_caps);
> debugfs_remove(fsc->debugfs_metric);
> debugfs_remove(fsc->debugfs_sending_metrics);
> + debugfs_remove(fsc->debugfs_reset_metrics);
> debugfs_remove(fsc->debugfs_mdsc);
> }
>
> @@ -402,6 +452,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> fsc,
> &sending_metrics_fops);
>
> + fsc->debugfs_reset_metrics =
> + debugfs_create_file("reset_metrics",
> + 0600,
> + fsc->client->debugfs_dir,
> + fsc,
> + &reset_metrics_fops);
> +
> fsc->debugfs_metric = debugfs_create_file("metrics",
> 0400,
> fsc->client->debugfs_dir,
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a91431e9bdf7..d24929f1c4bf 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -129,6 +129,7 @@ struct ceph_fs_client {
> struct dentry *debugfs_bdi;
> struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> struct dentry *debugfs_sending_metrics;
> + struct dentry *debugfs_reset_metrics;
> struct dentry *debugfs_metric;
> struct dentry *debugfs_mds_sessions;
> #endif
Do we need a separate attribute for this? Did you think about making
metrics attribute writeable and accepting some string, e.g. "reset"?
Thanks,
Ilya
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v4 7/8] ceph: add reset metrics support
2020-01-16 15:02 ` Ilya Dryomov
@ 2020-01-17 1:57 ` Xiubo Li
0 siblings, 0 replies; 13+ messages in thread
From: Xiubo Li @ 2020-01-17 1:57 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Jeff Layton, Yan, Zheng, Sage Weil, Patrick Donnelly, Ceph Development
On 2020/1/16 23:02, Ilya Dryomov wrote:
> On Thu, Jan 16, 2020 at 11:39 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> This will reset the most metric counters, except the cap and dentry
>> total numbers.
>>
>> Sometimes we need to discard the old metrics and start to get new
>> metrics.
>>
>> URL: https://tracker.ceph.com/issues/43215
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++
>> fs/ceph/super.h | 1 +
>> 2 files changed, 58 insertions(+)
>>
>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>> index bb96fb4d04c4..c24a704d4e99 100644
>> --- a/fs/ceph/debugfs.c
>> +++ b/fs/ceph/debugfs.c
>> @@ -158,6 +158,55 @@ static int sending_metrics_get(void *data, u64 *val)
>> DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
>> sending_metrics_set, "%llu\n");
>>
>> +static int reset_metrics_set(void *data, u64 val)
>> +{
>> + struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
>> + struct ceph_mds_client *mdsc = fsc->mdsc;
>> + struct ceph_client_metric *metric = &mdsc->metric;
>> + int i;
>> +
>> + if (val != 1) {
>> + pr_err("Invalid reset metrics set value %llu\n", val);
>> + return -EINVAL;
>> + }
>> +
>> + percpu_counter_set(&metric->d_lease_hit, 0);
>> + percpu_counter_set(&metric->d_lease_mis, 0);
>> +
>> + spin_lock(&metric->read_lock);
>> + memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
>> + atomic64_set(&metric->total_reads, 0),
>> + spin_unlock(&metric->read_lock);
>> +
>> + spin_lock(&metric->write_lock);
>> + memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
>> + atomic64_set(&metric->total_writes, 0),
>> + spin_unlock(&metric->write_lock);
>> +
>> + spin_lock(&metric->metadata_lock);
>> + memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
>> + atomic64_set(&metric->total_metadatas, 0),
>> + spin_unlock(&metric->metadata_lock);
>> +
>> + mutex_lock(&mdsc->mutex);
>> + for (i = 0; i < mdsc->max_sessions; i++) {
>> + struct ceph_mds_session *session;
>> +
>> + session = __ceph_lookup_mds_session(mdsc, i);
>> + if (!session)
>> + continue;
>> + percpu_counter_set(&session->i_caps_hit, 0);
>> + percpu_counter_set(&session->i_caps_mis, 0);
>> + ceph_put_mds_session(session);
>> + }
>> +
>> + mutex_unlock(&mdsc->mutex);
>> +
>> + return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(reset_metrics_fops, NULL, reset_metrics_set, "%llu\n");
>> +
>> static int metric_show(struct seq_file *s, void *p)
>> {
>> struct ceph_fs_client *fsc = s->private;
>> @@ -355,6 +404,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>> debugfs_remove(fsc->debugfs_caps);
>> debugfs_remove(fsc->debugfs_metric);
>> debugfs_remove(fsc->debugfs_sending_metrics);
>> + debugfs_remove(fsc->debugfs_reset_metrics);
>> debugfs_remove(fsc->debugfs_mdsc);
>> }
>>
>> @@ -402,6 +452,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>> fsc,
>> &sending_metrics_fops);
>>
>> + fsc->debugfs_reset_metrics =
>> + debugfs_create_file("reset_metrics",
>> + 0600,
>> + fsc->client->debugfs_dir,
>> + fsc,
>> + &reset_metrics_fops);
>> +
>> fsc->debugfs_metric = debugfs_create_file("metrics",
>> 0400,
>> fsc->client->debugfs_dir,
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index a91431e9bdf7..d24929f1c4bf 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -129,6 +129,7 @@ struct ceph_fs_client {
>> struct dentry *debugfs_bdi;
>> struct dentry *debugfs_mdsc, *debugfs_mdsmap;
>> struct dentry *debugfs_sending_metrics;
>> + struct dentry *debugfs_reset_metrics;
>> struct dentry *debugfs_metric;
>> struct dentry *debugfs_mds_sessions;
>> #endif
> Do we need a separate attribute for this? Did you think about making
> metrics attribute writeable and accepting some string, e.g. "reset"?
Let's make the "metrics" writeable, which will means reset.
Thanks.
> Thanks,
>
> Ilya
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 8/8] ceph: send client provided metric flags in client metadata
2020-01-16 10:38 [PATCH v4 0/8] ceph: add perf metrics support xiubli
` (6 preceding siblings ...)
2020-01-16 10:38 ` [PATCH v4 7/8] ceph: add reset metrics support xiubli
@ 2020-01-16 10:38 ` xiubli
7 siblings, 0 replies; 13+ messages in thread
From: xiubli @ 2020-01-16 10:38 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Will send the metric flags to MDS, currently it supports the cap,
dentry lease, read latency, write latency and metadata latency.
URL: https://tracker.ceph.com/issues/43435
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/mds_client.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
fs/ceph/mds_client.h | 14 +++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3fc2bdb1153a..6b9c7fc5824d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1096,6 +1096,41 @@ static void encode_supported_features(void **p, void *end)
}
}
+static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
+#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
+static void encode_metric_spec(void **p, void *end)
+{
+ static const size_t count = ARRAY_SIZE(metric_bits);
+
+ /* header */
+ BUG_ON(*p + 2 > end);
+ ceph_encode_8(p, 1); /* version */
+ ceph_encode_8(p, 1); /* compat */
+
+ if (count > 0) {
+ size_t i;
+ size_t size = METRIC_BYTES(count);
+
+ BUG_ON(*p + 4 + 4 + size > end);
+
+ /* metric spec info length */
+ ceph_encode_32(p, 4 + size);
+
+ /* metric spec */
+ ceph_encode_32(p, size);
+ memset(*p, 0, size);
+ for (i = 0; i < count; i++)
+ ((unsigned char*)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
+ *p += size;
+ } else {
+ BUG_ON(*p + 4 + 4 > end);
+ /* metric spec info length */
+ ceph_encode_32(p, 4);
+ /* metric spec */
+ ceph_encode_32(p, 0);
+ }
+}
+
/*
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
@@ -1135,6 +1170,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
size = FEATURE_BYTES(count);
extra_bytes += 4 + size;
+ /* metric spec */
+ size = 0;
+ count = ARRAY_SIZE(metric_bits);
+ if (count > 0)
+ size = METRIC_BYTES(count);
+ extra_bytes += 2 + 4 + 4 + size;
+
/* Allocate the message */
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
@@ -1153,9 +1195,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
*
- * ClientSession messages with metadata are v3
+ * ClientSession messages with metadata are v4
*/
- msg->hdr.version = cpu_to_le16(3);
+ msg->hdr.version = cpu_to_le16(4);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
@@ -1178,6 +1220,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
}
encode_supported_features(&p, end);
+ encode_metric_spec(&p, end);
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 153ca6aa6d2c..1a668d318501 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,6 +42,20 @@ enum ceph_feature_type {
}
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+/*
+ * This will always have the highest metric bit value
+ * as the last element of the array.
+ */
+#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \
+ CLIENT_METRIC_TYPE_CAP_INFO, \
+ CLIENT_METRIC_TYPE_READ_LATENCY, \
+ CLIENT_METRIC_TYPE_WRITE_LATENCY, \
+ CLIENT_METRIC_TYPE_METADATA_LATENCY, \
+ CLIENT_METRIC_TYPE_DENTRY_LEASE, \
+ \
+ CLIENT_METRIC_TYPE_MAX, \
+}
+
/*
* Some lock dependencies:
*
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread