* [PATCH v2 1/8] ceph: add global dentry lease metric support
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-09 13:52 ` Jeff Layton
2020-01-08 10:41 ` [PATCH v2 2/8] ceph: add caps perf metric for each session xiubli
` (7 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
For the dentry lease we will only count the hit/miss info triggered
from the vfs calls, for the cases like request reply handling and
perodically ceph_trim_dentries() we will ignore them.
Currently only the debugfs is support:
The output will be:
item total miss hit
-------------------------------------------------
d_lease 11 7 141
Fixes: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 32 ++++++++++++++++++++++++++++----
fs/ceph/dir.c | 18 ++++++++++++++++--
fs/ceph/mds_client.c | 37 +++++++++++++++++++++++++++++++++++--
fs/ceph/mds_client.h | 9 +++++++++
fs/ceph/super.h | 1 +
5 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb7cabd98e7b..40a22da0214a 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,22 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+static int metric_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ seq_printf(s, "item total miss hit\n");
+ seq_printf(s, "-------------------------------------------------\n");
+
+ seq_printf(s, "%-14s%-16lld%-16lld%lld\n", "d_lease",
+ atomic64_read(&mdsc->metric.total_dentries),
+ percpu_counter_sum(&mdsc->metric.d_lease_mis),
+ percpu_counter_sum(&mdsc->metric.d_lease_hit));
+
+ return 0;
+}
+
static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
{
struct seq_file *s = p;
@@ -220,6 +236,7 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
CEPH_DEFINE_SHOW_FUNC(mdsc_show)
+CEPH_DEFINE_SHOW_FUNC(metric_show)
CEPH_DEFINE_SHOW_FUNC(caps_show)
CEPH_DEFINE_SHOW_FUNC(mds_sessions_show)
@@ -255,6 +272,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_mdsmap);
debugfs_remove(fsc->debugfs_mds_sessions);
debugfs_remove(fsc->debugfs_caps);
+ debugfs_remove(fsc->debugfs_metric);
debugfs_remove(fsc->debugfs_mdsc);
}
@@ -295,11 +313,17 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&mdsc_show_fops);
+ fsc->debugfs_metric = debugfs_create_file("metrics",
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &metric_show_fops);
+
fsc->debugfs_caps = debugfs_create_file("caps",
- 0400,
- fsc->client->debugfs_dir,
- fsc,
- &caps_show_fops);
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &caps_show_fops);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d0cd0aba5843..382beb04bacb 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -38,6 +38,8 @@ static int __dir_lease_try_check(const struct dentry *dentry);
static int ceph_d_init(struct dentry *dentry)
{
struct ceph_dentry_info *di;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
if (!di)
@@ -48,6 +50,9 @@ static int ceph_d_init(struct dentry *dentry)
di->time = jiffies;
dentry->d_fsdata = di;
INIT_LIST_HEAD(&di->lease_list);
+
+ atomic64_inc(&mdsc->metric.total_dentries);
+
return 0;
}
@@ -1551,6 +1556,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
{
+ struct ceph_mds_client *mdsc;
int valid = 0;
struct dentry *parent;
struct inode *dir, *inode;
@@ -1589,13 +1595,14 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
}
}
+ mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
if (!valid) {
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(dir->i_sb)->mdsc;
struct ceph_mds_request *req;
int op, err;
u32 mask;
+ percpu_counter_inc(&mdsc->metric.d_lease_mis);
+
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1630,6 +1637,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
dout("d_revalidate %p lookup result=%d\n",
dentry, err);
}
+ } else {
+ percpu_counter_inc(&mdsc->metric.d_lease_hit);
}
dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
@@ -1638,6 +1647,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
if (!(flags & LOOKUP_RCU))
dput(parent);
+
return valid;
}
@@ -1672,9 +1682,13 @@ static int ceph_d_delete(const struct dentry *dentry)
static void ceph_d_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
dout("d_release %p\n", dentry);
+ atomic64_dec(&mdsc->metric.total_dentries);
+
spin_lock(&dentry->d_lock);
__dentry_lease_unlist(di);
dentry->d_fsdata = NULL;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d379f489ab63..a976febf9647 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4157,10 +4157,31 @@ static void delayed_work(struct work_struct *work)
schedule_delayed(mdsc);
}
+static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+{
+ int ret;
+
+ if (!metric)
+ return -EINVAL;
+
+ atomic64_set(&metric->total_dentries, 0);
+ ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
+ if (ret)
+ return ret;
+ ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
+ if (ret) {
+ percpu_counter_destroy(&metric->d_lease_hit);
+ return ret;
+ }
+
+ return 0;
+}
+
int ceph_mdsc_init(struct ceph_fs_client *fsc)
{
struct ceph_mds_client *mdsc;
+ int err;
mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
if (!mdsc)
@@ -4169,8 +4190,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
mutex_init(&mdsc->mutex);
mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
if (!mdsc->mdsmap) {
- kfree(mdsc);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_mdsc;
}
fsc->mdsc = mdsc;
@@ -4209,6 +4230,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_waitqueue_head(&mdsc->cap_flushing_wq);
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
atomic_set(&mdsc->cap_reclaim_pending, 0);
+ err = ceph_mdsc_metric_init(&mdsc->metric);
+ if (err)
+ goto err_mdsmap;
spin_lock_init(&mdsc->dentry_list_lock);
INIT_LIST_HEAD(&mdsc->dentry_leases);
@@ -4227,6 +4251,12 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
strscpy(mdsc->nodename, utsname()->nodename,
sizeof(mdsc->nodename));
return 0;
+
+err_mdsmap:
+ kfree(mdsc->mdsmap);
+err_mdsc:
+ kfree(mdsc);
+ return err;
}
/*
@@ -4484,6 +4514,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
ceph_mdsc_stop(mdsc);
+ percpu_counter_destroy(&mdsc->metric.d_lease_mis);
+ percpu_counter_destroy(&mdsc->metric.d_lease_hit);
+
fsc->mdsc = NULL;
kfree(mdsc);
dout("mdsc_destroy %p done\n", mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c950f8f88f58..22186060bc37 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -358,6 +358,13 @@ struct cap_wait {
int want;
};
+/* This is the global metrics */
+struct ceph_client_metric {
+ atomic64_t total_dentries;
+ struct percpu_counter d_lease_hit;
+ struct percpu_counter d_lease_mis;
+};
+
/*
* mds client state
*/
@@ -446,6 +453,8 @@ struct ceph_mds_client {
struct list_head dentry_leases; /* fifo list */
struct list_head dentry_dir_leases; /* lru list */
+ struct ceph_client_metric metric;
+
spinlock_t snapid_map_lock;
struct rb_root snapid_map_tree;
struct list_head snapid_map_lru;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3bf1a01cd536..40703588b889 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -123,6 +123,7 @@ struct ceph_fs_client {
struct dentry *debugfs_congestion_kb;
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+ struct dentry *debugfs_metric;
struct dentry *debugfs_mds_sessions;
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/8] ceph: add global dentry lease metric support
2020-01-08 10:41 ` [PATCH v2 1/8] ceph: add global dentry lease metric support xiubli
@ 2020-01-09 13:52 ` Jeff Layton
2020-01-10 2:41 ` Xiubo Li
0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2020-01-09 13:52 UTC (permalink / raw)
To: xiubli, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> For the dentry lease we will only count the hit/miss info triggered
> from the vfs calls, for the cases like request reply handling and
> perodically ceph_trim_dentries() we will ignore them.
>
> Currently only the debugfs is support:
>
> The output will be:
>
> item total miss hit
> -------------------------------------------------
> d_lease 11 7 141
>
> Fixes: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/debugfs.c | 32 ++++++++++++++++++++++++++++----
> fs/ceph/dir.c | 18 ++++++++++++++++--
> fs/ceph/mds_client.c | 37 +++++++++++++++++++++++++++++++++++--
> fs/ceph/mds_client.h | 9 +++++++++
> fs/ceph/super.h | 1 +
> 5 files changed, 89 insertions(+), 8 deletions(-)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index fb7cabd98e7b..40a22da0214a 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -124,6 +124,22 @@ static int mdsc_show(struct seq_file *s, void *p)
> return 0;
> }
>
> +static int metric_show(struct seq_file *s, void *p)
> +{
> + struct ceph_fs_client *fsc = s->private;
> + struct ceph_mds_client *mdsc = fsc->mdsc;
> +
> + seq_printf(s, "item total miss hit\n");
> + seq_printf(s, "-------------------------------------------------\n");
> +
> + seq_printf(s, "%-14s%-16lld%-16lld%lld\n", "d_lease",
> + atomic64_read(&mdsc->metric.total_dentries),
> + percpu_counter_sum(&mdsc->metric.d_lease_mis),
> + percpu_counter_sum(&mdsc->metric.d_lease_hit));
> +
> + return 0;
> +}
> +
> static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
> {
> struct seq_file *s = p;
> @@ -220,6 +236,7 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
>
> CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
> CEPH_DEFINE_SHOW_FUNC(mdsc_show)
> +CEPH_DEFINE_SHOW_FUNC(metric_show)
> CEPH_DEFINE_SHOW_FUNC(caps_show)
> CEPH_DEFINE_SHOW_FUNC(mds_sessions_show)
>
> @@ -255,6 +272,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
> debugfs_remove(fsc->debugfs_mdsmap);
> debugfs_remove(fsc->debugfs_mds_sessions);
> debugfs_remove(fsc->debugfs_caps);
> + debugfs_remove(fsc->debugfs_metric);
> debugfs_remove(fsc->debugfs_mdsc);
> }
>
> @@ -295,11 +313,17 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> fsc,
> &mdsc_show_fops);
>
> + fsc->debugfs_metric = debugfs_create_file("metrics",
> + 0400,
> + fsc->client->debugfs_dir,
> + fsc,
> + &metric_show_fops);
> +
> fsc->debugfs_caps = debugfs_create_file("caps",
> - 0400,
> - fsc->client->debugfs_dir,
> - fsc,
> - &caps_show_fops);
> + 0400,
> + fsc->client->debugfs_dir,
> + fsc,
> + &caps_show_fops);
> }
>
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index d0cd0aba5843..382beb04bacb 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -38,6 +38,8 @@ static int __dir_lease_try_check(const struct dentry *dentry);
> static int ceph_d_init(struct dentry *dentry)
> {
> struct ceph_dentry_info *di;
> + struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
> + struct ceph_mds_client *mdsc = fsc->mdsc;
>
> di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
> if (!di)
> @@ -48,6 +50,9 @@ static int ceph_d_init(struct dentry *dentry)
> di->time = jiffies;
> dentry->d_fsdata = di;
> INIT_LIST_HEAD(&di->lease_list);
> +
> + atomic64_inc(&mdsc->metric.total_dentries);
> +
> return 0;
> }
>
> @@ -1551,6 +1556,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
> */
> static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
> {
> + struct ceph_mds_client *mdsc;
> int valid = 0;
> struct dentry *parent;
> struct inode *dir, *inode;
> @@ -1589,13 +1595,14 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
> }
> }
>
> + mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
> if (!valid) {
> - struct ceph_mds_client *mdsc =
> - ceph_sb_to_client(dir->i_sb)->mdsc;
> struct ceph_mds_request *req;
> int op, err;
> u32 mask;
>
> + percpu_counter_inc(&mdsc->metric.d_lease_mis);
> +
> if (flags & LOOKUP_RCU)
> return -ECHILD;
>
So suppose we're doing an RCU walk, and call d_revalidate and the dentry
is invalid. We'll bump the counter here, but then return -ECHILD and the
kernel will do the d_revalidate all over again with refwalk, and we bump
the counter again.
Won't that end up double-counting the cache miss? Or is that intended
here?
> @@ -1630,6 +1637,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
> dout("d_revalidate %p lookup result=%d\n",
> dentry, err);
> }
> + } else {
> + percpu_counter_inc(&mdsc->metric.d_lease_hit);
> }
>
> dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
> @@ -1638,6 +1647,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
>
> if (!(flags & LOOKUP_RCU))
> dput(parent);
> +
> return valid;
> }
>
> @@ -1672,9 +1682,13 @@ static int ceph_d_delete(const struct dentry *dentry)
> static void ceph_d_release(struct dentry *dentry)
> {
> struct ceph_dentry_info *di = ceph_dentry(dentry);
> + struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
> + struct ceph_mds_client *mdsc = fsc->mdsc;
>
> dout("d_release %p\n", dentry);
>
> + atomic64_dec(&mdsc->metric.total_dentries);
> +
> spin_lock(&dentry->d_lock);
> __dentry_lease_unlist(di);
> dentry->d_fsdata = NULL;
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index d379f489ab63..a976febf9647 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -4157,10 +4157,31 @@ static void delayed_work(struct work_struct *work)
> schedule_delayed(mdsc);
> }
>
> +static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
> +{
> + int ret;
> +
> + if (!metric)
> + return -EINVAL;
> +
> + atomic64_set(&metric->total_dentries, 0);
> + ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
> + if (ret)
> + return ret;
> + ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
> + if (ret) {
> + percpu_counter_destroy(&metric->d_lease_hit);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> int ceph_mdsc_init(struct ceph_fs_client *fsc)
>
> {
> struct ceph_mds_client *mdsc;
> + int err;
>
> mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
> if (!mdsc)
> @@ -4169,8 +4190,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
> mutex_init(&mdsc->mutex);
> mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
> if (!mdsc->mdsmap) {
> - kfree(mdsc);
> - return -ENOMEM;
> + err = -ENOMEM;
> + goto err_mdsc;
> }
>
> fsc->mdsc = mdsc;
> @@ -4209,6 +4230,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
> init_waitqueue_head(&mdsc->cap_flushing_wq);
> INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
> atomic_set(&mdsc->cap_reclaim_pending, 0);
> + err = ceph_mdsc_metric_init(&mdsc->metric);
> + if (err)
> + goto err_mdsmap;
>
> spin_lock_init(&mdsc->dentry_list_lock);
> INIT_LIST_HEAD(&mdsc->dentry_leases);
> @@ -4227,6 +4251,12 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
> strscpy(mdsc->nodename, utsname()->nodename,
> sizeof(mdsc->nodename));
> return 0;
> +
> +err_mdsmap:
> + kfree(mdsc->mdsmap);
> +err_mdsc:
> + kfree(mdsc);
> + return err;
> }
>
> /*
> @@ -4484,6 +4514,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
>
> ceph_mdsc_stop(mdsc);
>
> + percpu_counter_destroy(&mdsc->metric.d_lease_mis);
> + percpu_counter_destroy(&mdsc->metric.d_lease_hit);
> +
> fsc->mdsc = NULL;
> kfree(mdsc);
> dout("mdsc_destroy %p done\n", mdsc);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index c950f8f88f58..22186060bc37 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -358,6 +358,13 @@ struct cap_wait {
> int want;
> };
>
> +/* This is the global metrics */
> +struct ceph_client_metric {
> + atomic64_t total_dentries;
> + struct percpu_counter d_lease_hit;
> + struct percpu_counter d_lease_mis;
> +};
> +
> /*
> * mds client state
> */
> @@ -446,6 +453,8 @@ struct ceph_mds_client {
> struct list_head dentry_leases; /* fifo list */
> struct list_head dentry_dir_leases; /* lru list */
>
> + struct ceph_client_metric metric;
> +
> spinlock_t snapid_map_lock;
> struct rb_root snapid_map_tree;
> struct list_head snapid_map_lru;
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 3bf1a01cd536..40703588b889 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -123,6 +123,7 @@ struct ceph_fs_client {
> struct dentry *debugfs_congestion_kb;
> struct dentry *debugfs_bdi;
> struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> + struct dentry *debugfs_metric;
> struct dentry *debugfs_mds_sessions;
> #endif
>
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/8] ceph: add global dentry lease metric support
2020-01-09 13:52 ` Jeff Layton
@ 2020-01-10 2:41 ` Xiubo Li
0 siblings, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2020-01-10 2:41 UTC (permalink / raw)
To: Jeff Layton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On 2020/1/9 21:52, Jeff Layton wrote:
> On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
[...]
>> @@ -1589,13 +1595,14 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
>> }
>> }
>>
>> + mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
>> if (!valid) {
>> - struct ceph_mds_client *mdsc =
>> - ceph_sb_to_client(dir->i_sb)->mdsc;
>> struct ceph_mds_request *req;
>> int op, err;
>> u32 mask;
>>
>> + percpu_counter_inc(&mdsc->metric.d_lease_mis);
>> +
>> if (flags & LOOKUP_RCU)
>> return -ECHILD;
>>
> So suppose we're doing an RCU walk, and call d_revalidate and the dentry
> is invalid. We'll bump the counter here, but then return -ECHILD and the
> kernel will do the d_revalidate all over again with refwalk, and we bump
> the counter again.
>
> Won't that end up double-counting the cache miss? Or is that intended
> here?
Yeah, we should count it once. Will fix it in the next version.
Thanks,
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
2020-01-08 10:41 ` [PATCH v2 1/8] ceph: add global dentry lease metric support xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-09 14:52 ` Jeff Layton
2020-01-08 10:41 ` [PATCH v2 3/8] ceph: add global read latency metric support xiubli
` (6 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
This will fulfill the caps hit/miss metric for each session. When
checking the "need" mask and if one cap has the subset of the "need"
mask it means hit, or missed.
item total miss hit
-------------------------------------------------
d_lease 295 0 993
session caps miss hit
-------------------------------------------------
0 295 107 4119
1 1 107 9
Fixes: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/acl.c | 2 +-
fs/ceph/caps.c | 63 +++++++++++++++++++++++++++++++++++---------
fs/ceph/debugfs.c | 20 ++++++++++++++
fs/ceph/dir.c | 20 +++++++-------
fs/ceph/file.c | 6 ++---
fs/ceph/inode.c | 8 +++---
fs/ceph/mds_client.c | 16 ++++++++++-
fs/ceph/mds_client.h | 3 +++
fs/ceph/snap.c | 2 +-
fs/ceph/super.h | 12 +++++----
fs/ceph/xattr.c | 8 +++---
11 files changed, 120 insertions(+), 40 deletions(-)
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 26be6520d3fb..fca6ff231020 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -22,7 +22,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
spin_lock(&ci->i_ceph_lock);
- if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
+ if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0, true))
set_cached_acl(inode, type, acl);
else
forget_cached_acl(inode, type);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 28ae0c134700..6ab02aab7d9c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
unsigned issued)
{
- unsigned had = __ceph_caps_issued(ci, NULL);
+ unsigned int had = __ceph_caps_issued(ci, NULL, -1);
/*
* Each time we receive FILE_CACHE anew, we increment
@@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
* out, and may be invalidated in bulk if the client session times out
* and session->s_cap_gen is bumped.
*/
-int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
+int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
{
int have = ci->i_snap_caps;
struct ceph_cap *cap;
struct rb_node *p;
+ int revoking = 0;
+
+ if (ci->i_auth_cap) {
+ cap = ci->i_auth_cap;
+ revoking = cap->implemented & ~cap->issued;
+ }
if (implemented)
*implemented = 0;
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ struct ceph_mds_session *s;
+ int r = 0;
+
cap = rb_entry(p, struct ceph_cap, ci_node);
if (!__cap_is_valid(cap))
continue;
dout("__ceph_caps_issued %p cap %p issued %s\n",
&ci->vfs_inode, cap, ceph_cap_string(cap->issued));
+
+ if (mask >= 0) {
+ s = ceph_get_mds_session(cap->session);
+ if (cap == ci->i_auth_cap)
+ r = revoking;
+ if (s) {
+ if (mask & (cap->issued & ~r))
+ percpu_counter_inc(&s->i_caps_hit);
+ else
+ percpu_counter_inc(&s->i_caps_mis);
+ ceph_put_mds_session(s);
+ }
+ }
+
have |= cap->issued;
if (implemented)
*implemented |= cap->implemented;
@@ -810,10 +833,8 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
* by the auth MDS. The non-auth MDS should be revoking/exporting
* these caps, but the message is delayed.
*/
- if (ci->i_auth_cap) {
- cap = ci->i_auth_cap;
- have &= ~cap->implemented | cap->issued;
- }
+ have &= ~revoking;
+
return have;
}
@@ -862,7 +883,8 @@ static void __touch_cap(struct ceph_cap *cap)
* front of their respective LRUs. (This is the preferred way for
* callers to check for caps they want.)
*/
-int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
+int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch,
+ bool metric)
{
struct ceph_cap *cap;
struct rb_node *p;
@@ -877,9 +899,13 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
}
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ struct ceph_mds_session *s;
+
cap = rb_entry(p, struct ceph_cap, ci_node);
if (!__cap_is_valid(cap))
continue;
+
+ s = ceph_get_mds_session(cap->session);
if ((cap->issued & mask) == mask) {
dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
" (mask %s)\n", ci->vfs_inode.i_ino, cap,
@@ -887,9 +913,22 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
ceph_cap_string(mask));
if (touch)
__touch_cap(cap);
+ if (s) {
+ if (metric)
+ percpu_counter_inc(&s->i_caps_hit);
+ ceph_put_mds_session(s);
+ }
return 1;
}
+ if (s) {
+ if (cap->issued & mask)
+ percpu_counter_inc(&s->i_caps_hit);
+ else
+ percpu_counter_inc(&s->i_caps_mis);
+ ceph_put_mds_session(s);
+ }
+
/* does a combination of caps satisfy mask? */
have |= cap->issued;
if ((have & mask) == mask) {
@@ -1849,7 +1888,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
retry_locked:
file_wanted = __ceph_caps_file_wanted(ci);
used = __ceph_caps_used(ci);
- issued = __ceph_caps_issued(ci, &implemented);
+ issued = __ceph_caps_issued(ci, &implemented, -1);
revoking = implemented & ~issued;
want = file_wanted;
@@ -2577,7 +2616,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
spin_lock(&ci->i_ceph_lock);
}
- have = __ceph_caps_issued(ci, &implemented);
+ have = __ceph_caps_issued(ci, &implemented, need);
if (have & need & CEPH_CAP_FILE_WR) {
if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
@@ -3563,7 +3602,7 @@ static void handle_cap_trunc(struct inode *inode,
u64 size = le64_to_cpu(trunc->size);
int implemented = 0;
int dirty = __ceph_caps_dirty(ci);
- int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
+ int issued = __ceph_caps_issued(ceph_inode(inode), &implemented, -1);
int queue_trunc = 0;
issued |= implemented | dirty;
@@ -3770,7 +3809,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
}
}
- __ceph_caps_issued(ci, &issued);
+ __ceph_caps_issued(ci, &issued, -1);
issued |= __ceph_caps_dirty(ci);
ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
@@ -3996,7 +4035,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
switch (op) {
case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT:
- __ceph_caps_issued(ci, &extra_info.issued);
+ __ceph_caps_issued(ci, &extra_info.issued, -1);
extra_info.issued |= __ceph_caps_dirty(ci);
handle_cap_grant(inode, session, cap,
h, msg->middle, &extra_info);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 40a22da0214a..c132fdb40d53 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -128,6 +128,7 @@ static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
+ int i;
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
@@ -137,6 +138,25 @@ static int metric_show(struct seq_file *s, void *p)
percpu_counter_sum(&mdsc->metric.d_lease_mis),
percpu_counter_sum(&mdsc->metric.d_lease_hit));
+ seq_printf(s, "\n");
+ seq_printf(s, "session caps miss hit\n");
+ seq_printf(s, "-------------------------------------------------\n");
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ seq_printf(s, "%-14d%-16d%-16lld%lld\n", i,
+ session->s_nr_caps,
+ percpu_counter_sum(&session->i_caps_mis),
+ percpu_counter_sum(&session->i_caps_hit));
+ ceph_put_mds_session(session);
+ }
+ mutex_unlock(&mdsc->mutex);
+
return 0;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 382beb04bacb..1e1ccae8953d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -30,7 +30,7 @@
const struct dentry_operations ceph_dentry_ops;
static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
-static int __dir_lease_try_check(const struct dentry *dentry);
+static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
/*
* Initialize ceph dentry state.
@@ -346,7 +346,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete_ordered(ci) &&
- __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
+ __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1, true)) {
int shared_gen = atomic_read(&ci->i_shared_gen);
spin_unlock(&ci->i_ceph_lock);
err = __dcache_readdir(file, ctx, shared_gen);
@@ -764,7 +764,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
!is_root_ceph_dentry(dir, dentry) &&
ceph_test_mount_opt(fsc, DCACHE) &&
__ceph_dir_is_complete(ci) &&
- (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
+ (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1,
+ true))) {
spin_unlock(&ci->i_ceph_lock);
dout(" dir %p complete, -ENOENT\n", dir);
d_add(dentry, NULL);
@@ -1341,7 +1342,7 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg)
if (__dentry_lease_is_valid(di))
return STOP;
- ret = __dir_lease_try_check(dentry);
+ ret = __dir_lease_try_check(dentry, false);
if (ret == -EBUSY)
return KEEP;
if (ret > 0)
@@ -1354,7 +1355,7 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
struct ceph_lease_walk_control *lwc = arg;
struct ceph_dentry_info *di = ceph_dentry(dentry);
- int ret = __dir_lease_try_check(dentry);
+ int ret = __dir_lease_try_check(dentry, false);
if (ret == -EBUSY)
return KEEP;
if (ret > 0) {
@@ -1493,7 +1494,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
/*
* Called under dentry->d_lock.
*/
-static int __dir_lease_try_check(const struct dentry *dentry)
+static int __dir_lease_try_check(const struct dentry *dentry, bool metric)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
struct inode *dir;
@@ -1510,7 +1511,8 @@ static int __dir_lease_try_check(const struct dentry *dentry)
if (spin_trylock(&ci->i_ceph_lock)) {
if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen &&
- __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0))
+ __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0,
+ metric))
valid = 1;
spin_unlock(&ci->i_ceph_lock);
} else {
@@ -1532,7 +1534,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
int shared_gen;
spin_lock(&ci->i_ceph_lock);
- valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
+ valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1, true);
shared_gen = atomic_read(&ci->i_shared_gen);
spin_unlock(&ci->i_ceph_lock);
if (valid) {
@@ -1670,7 +1672,7 @@ static int ceph_d_delete(const struct dentry *dentry)
if (di) {
if (__dentry_lease_is_valid(di))
return 0;
- if (__dir_lease_try_check(dentry))
+ if (__dir_lease_try_check(dentry, true))
return 0;
}
return 1;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 11929d2bb594..418c7b30c6db 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -296,7 +296,7 @@ int ceph_renew_caps(struct inode *inode)
wanted = __ceph_caps_file_wanted(ci);
if (__ceph_is_any_real_caps(ci) &&
(!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
- int issued = __ceph_caps_issued(ci, NULL);
+ int issued = __ceph_caps_issued(ci, NULL, -1);
spin_unlock(&ci->i_ceph_lock);
dout("renew caps %p want %s issued %s updating mds_wanted\n",
inode, ceph_cap_string(wanted), ceph_cap_string(issued));
@@ -387,7 +387,7 @@ int ceph_open(struct inode *inode, struct file *file)
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci, true);
- int issued = __ceph_caps_issued(ci, NULL);
+ int issued = __ceph_caps_issued(ci, NULL, fmode);
dout("open %p fmode %d want %s issued %s using existing\n",
inode, fmode, ceph_cap_string(wanted),
@@ -403,7 +403,7 @@ int ceph_open(struct inode *inode, struct file *file)
return ceph_init_file(inode, file, fmode);
} else if (ceph_snap(inode) != CEPH_NOSNAP &&
- (ci->i_snap_caps & wanted) == wanted) {
+ (ci->i_snap_caps & wanted) == wanted) {
__ceph_get_fmode(ci, fmode);
spin_unlock(&ci->i_ceph_lock);
return ceph_init_file(inode, file, fmode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 64634c5af403..c0108b8582c3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -798,7 +798,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
/* Update change_attribute */
inode_set_max_iversion_raw(inode, iinfo->change_attr);
- __ceph_caps_issued(ci, &issued);
+ __ceph_caps_issued(ci, &issued, -1);
issued |= __ceph_caps_dirty(ci);
new_issued = ~issued & info_caps;
@@ -2029,7 +2029,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
}
spin_lock(&ci->i_ceph_lock);
- issued = __ceph_caps_issued(ci, NULL);
+ issued = __ceph_caps_issued(ci, NULL, -1);
if (!ci->i_head_snapc &&
(issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) {
@@ -2038,7 +2038,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
spin_unlock(&ci->i_ceph_lock);
down_read(&mdsc->snap_rwsem);
spin_lock(&ci->i_ceph_lock);
- issued = __ceph_caps_issued(ci, NULL);
+ issued = __ceph_caps_issued(ci, NULL, -1);
}
}
@@ -2269,7 +2269,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
dout("do_getattr inode %p mask %s mode 0%o\n",
inode, ceph_cap_string(mask), inode->i_mode);
- if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
+ if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1, true))
return 0;
mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a976febf9647..606fa8cd687f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -558,6 +558,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
if (refcount_dec_and_test(&s->s_ref)) {
if (s->s_auth.authorizer)
ceph_auth_destroy_authorizer(s->s_auth.authorizer);
+ percpu_counter_destroy(&s->i_caps_hit);
+ percpu_counter_destroy(&s->i_caps_mis);
kfree(s);
}
}
@@ -598,6 +600,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
int mds)
{
struct ceph_mds_session *s;
+ int err;
if (mds >= mdsc->mdsmap->possible_max_rank)
return ERR_PTR(-EINVAL);
@@ -612,8 +615,10 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
dout("%s: realloc to %d\n", __func__, newmax);
sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
- if (!sa)
+ if (!sa) {
+ err = -ENOMEM;
goto fail_realloc;
+ }
if (mdsc->sessions) {
memcpy(sa, mdsc->sessions,
mdsc->max_sessions * sizeof(void *));
@@ -653,6 +658,13 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_cap_flushing);
+ err = percpu_counter_init(&s->i_caps_hit, 0, GFP_NOFS);
+ if (err)
+ goto fail_realloc;
+ err = percpu_counter_init(&s->i_caps_mis, 0, GFP_NOFS);
+ if (err)
+ goto fail_init;
+
mdsc->sessions[mds] = s;
atomic_inc(&mdsc->num_sessions);
refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */
@@ -662,6 +674,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
return s;
+fail_init:
+ percpu_counter_destroy(&s->i_caps_hit);
fail_realloc:
kfree(s);
return ERR_PTR(-ENOMEM);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 22186060bc37..c8935fd0d8bb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -201,6 +201,9 @@ struct ceph_mds_session {
refcount_t s_ref;
struct list_head s_waiting; /* waiting requests */
struct list_head s_unsafe; /* unsafe requests */
+
+ struct percpu_counter i_caps_hit;
+ struct percpu_counter i_caps_mis;
};
/*
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index ccfcc66aaf44..3d34af145f81 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -534,7 +534,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
INIT_LIST_HEAD(&capsnap->ci_item);
capsnap->follows = old_snapc->seq;
- capsnap->issued = __ceph_caps_issued(ci, NULL);
+ capsnap->issued = __ceph_caps_issued(ci, NULL, -1);
capsnap->dirty = dirty;
capsnap->mode = inode->i_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 40703588b889..88da9e21af75 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -635,8 +635,10 @@ static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
return !RB_EMPTY_ROOT(&ci->i_caps);
}
-extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
-extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
+extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented,
+ int mask);
+extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t,
+ bool metric);
extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
struct ceph_cap *cap);
@@ -644,17 +646,17 @@ static inline int ceph_caps_issued(struct ceph_inode_info *ci)
{
int issued;
spin_lock(&ci->i_ceph_lock);
- issued = __ceph_caps_issued(ci, NULL);
+ issued = __ceph_caps_issued(ci, NULL, -1);
spin_unlock(&ci->i_ceph_lock);
return issued;
}
static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
- int touch)
+ int touch, bool metric)
{
int r;
spin_lock(&ci->i_ceph_lock);
- r = __ceph_caps_issued_mask(ci, mask, touch);
+ r = __ceph_caps_issued_mask(ci, mask, touch, metric);
spin_unlock(&ci->i_ceph_lock);
return r;
}
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 98a9a3101cda..aa9ee2c2d8f3 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -856,7 +856,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
if (ci->i_xattrs.version == 0 ||
!((req_mask & CEPH_CAP_XATTR_SHARED) ||
- __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
+ __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1, true))) {
spin_unlock(&ci->i_ceph_lock);
/* security module gets xattr while filling trace */
@@ -914,7 +914,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
ci->i_xattrs.version, ci->i_xattrs.index_version);
if (ci->i_xattrs.version == 0 ||
- !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
+ !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1, true)) {
spin_unlock(&ci->i_ceph_lock);
err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
if (err)
@@ -1064,7 +1064,7 @@ int __ceph_setxattr(struct inode *inode, const char *name,
spin_lock(&ci->i_ceph_lock);
retry:
- issued = __ceph_caps_issued(ci, NULL);
+ issued = __ceph_caps_issued(ci, NULL, -1);
if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
goto do_sync;
@@ -1192,7 +1192,7 @@ bool ceph_security_xattr_deadlock(struct inode *in)
spin_lock(&ci->i_ceph_lock);
ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
!(ci->i_xattrs.version > 0 &&
- __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
+ __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0, true));
spin_unlock(&ci->i_ceph_lock);
return ret;
}
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-08 10:41 ` [PATCH v2 2/8] ceph: add caps perf metric for each session xiubli
@ 2020-01-09 14:52 ` Jeff Layton
2020-01-10 3:38 ` Xiubo Li
0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2020-01-09 14:52 UTC (permalink / raw)
To: xiubli, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> This will fulfill the caps hit/miss metric for each session. When
> checking the "need" mask and if one cap has the subset of the "need"
> mask it means hit, or missed.
>
> item total miss hit
> -------------------------------------------------
> d_lease 295 0 993
>
> session caps miss hit
> -------------------------------------------------
> 0 295 107 4119
> 1 1 107 9
>
> Fixes: https://tracker.ceph.com/issues/43215
For the record, "Fixes:" has a different meaning for kernel patches.
It's used to reference an earlier patch that introduced the bug that the
patch is fixing.
It's a pity that the ceph team decided to use that to reference tracker
tickets in their tree. For the kernel we usually use a generic "URL:"
tag for that.
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/acl.c | 2 +-
> fs/ceph/caps.c | 63 +++++++++++++++++++++++++++++++++++---------
> fs/ceph/debugfs.c | 20 ++++++++++++++
> fs/ceph/dir.c | 20 +++++++-------
> fs/ceph/file.c | 6 ++---
> fs/ceph/inode.c | 8 +++---
> fs/ceph/mds_client.c | 16 ++++++++++-
> fs/ceph/mds_client.h | 3 +++
> fs/ceph/snap.c | 2 +-
> fs/ceph/super.h | 12 +++++----
> fs/ceph/xattr.c | 8 +++---
> 11 files changed, 120 insertions(+), 40 deletions(-)
>
> diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
> index 26be6520d3fb..fca6ff231020 100644
> --- a/fs/ceph/acl.c
> +++ b/fs/ceph/acl.c
> @@ -22,7 +22,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
> struct ceph_inode_info *ci = ceph_inode(inode);
>
> spin_lock(&ci->i_ceph_lock);
> - if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
> + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0, true))
> set_cached_acl(inode, type, acl);
> else
> forget_cached_acl(inode, type);
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 28ae0c134700..6ab02aab7d9c 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
> static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
> unsigned issued)
> {
> - unsigned had = __ceph_caps_issued(ci, NULL);
> + unsigned int had = __ceph_caps_issued(ci, NULL, -1);
>
> /*
> * Each time we receive FILE_CACHE anew, we increment
> @@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
> * out, and may be invalidated in bulk if the client session times out
> * and session->s_cap_gen is bumped.
> */
> -int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
> +int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
This seems like the wrong approach. This function returns a set of caps,
so it seems like the callers ought to determine whether a miss or hit
occurred, and whether to record it.
> {
> int have = ci->i_snap_caps;
> struct ceph_cap *cap;
> struct rb_node *p;
> + int revoking = 0;
> +
> + if (ci->i_auth_cap) {
> + cap = ci->i_auth_cap;
> + revoking = cap->implemented & ~cap->issued;
> + }
>
> if (implemented)
> *implemented = 0;
> for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
> + struct ceph_mds_session *s;
> + int r = 0;
> +
> cap = rb_entry(p, struct ceph_cap, ci_node);
> if (!__cap_is_valid(cap))
> continue;
> dout("__ceph_caps_issued %p cap %p issued %s\n",
> &ci->vfs_inode, cap, ceph_cap_string(cap->issued));
> +
> + if (mask >= 0) {
> + s = ceph_get_mds_session(cap->session);
> + if (cap == ci->i_auth_cap)
> + r = revoking;
> + if (s) {
> + if (mask & (cap->issued & ~r))
> + percpu_counter_inc(&s->i_caps_hit);
> + else
> + percpu_counter_inc(&s->i_caps_mis);
> + ceph_put_mds_session(s);
> + }
> + }
> +
> have |= cap->issued;
> if (implemented)
> *implemented |= cap->implemented;
> @@ -810,10 +833,8 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
> * by the auth MDS. The non-auth MDS should be revoking/exporting
> * these caps, but the message is delayed.
> */
> - if (ci->i_auth_cap) {
> - cap = ci->i_auth_cap;
> - have &= ~cap->implemented | cap->issued;
> - }
> + have &= ~revoking;
> +
> return have;
> }
>
> @@ -862,7 +883,8 @@ static void __touch_cap(struct ceph_cap *cap)
> * front of their respective LRUs. (This is the preferred way for
> * callers to check for caps they want.)
> */
> -int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
> +int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch,
> + bool metric)
> {
> struct ceph_cap *cap;
> struct rb_node *p;
> @@ -877,9 +899,13 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
> }
>
> for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
> + struct ceph_mds_session *s;
> +
> cap = rb_entry(p, struct ceph_cap, ci_node);
> if (!__cap_is_valid(cap))
> continue;
> +
> + s = ceph_get_mds_session(cap->session);
> if ((cap->issued & mask) == mask) {
> dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
> " (mask %s)\n", ci->vfs_inode.i_ino, cap,
> @@ -887,9 +913,22 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
> ceph_cap_string(mask));
> if (touch)
> __touch_cap(cap);
> + if (s) {
> + if (metric)
> + percpu_counter_inc(&s->i_caps_hit);
> + ceph_put_mds_session(s);
> + }
> return 1;
> }
>
> + if (s) {
> + if (cap->issued & mask)
> + percpu_counter_inc(&s->i_caps_hit);
> + else
> + percpu_counter_inc(&s->i_caps_mis);
> + ceph_put_mds_session(s);
> + }
> +
> /* does a combination of caps satisfy mask? */
> have |= cap->issued;
> if ((have & mask) == mask) {
> @@ -1849,7 +1888,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> retry_locked:
> file_wanted = __ceph_caps_file_wanted(ci);
> used = __ceph_caps_used(ci);
> - issued = __ceph_caps_issued(ci, &implemented);
> + issued = __ceph_caps_issued(ci, &implemented, -1);
> revoking = implemented & ~issued;
>
> want = file_wanted;
> @@ -2577,7 +2616,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
> spin_lock(&ci->i_ceph_lock);
> }
>
> - have = __ceph_caps_issued(ci, &implemented);
> + have = __ceph_caps_issued(ci, &implemented, need);
>
> if (have & need & CEPH_CAP_FILE_WR) {
> if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
> @@ -3563,7 +3602,7 @@ static void handle_cap_trunc(struct inode *inode,
> u64 size = le64_to_cpu(trunc->size);
> int implemented = 0;
> int dirty = __ceph_caps_dirty(ci);
> - int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
> + int issued = __ceph_caps_issued(ceph_inode(inode), &implemented, -1);
> int queue_trunc = 0;
>
> issued |= implemented | dirty;
> @@ -3770,7 +3809,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
> }
> }
>
> - __ceph_caps_issued(ci, &issued);
> + __ceph_caps_issued(ci, &issued, -1);
> issued |= __ceph_caps_dirty(ci);
>
> ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
> @@ -3996,7 +4035,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
> switch (op) {
> case CEPH_CAP_OP_REVOKE:
> case CEPH_CAP_OP_GRANT:
> - __ceph_caps_issued(ci, &extra_info.issued);
> + __ceph_caps_issued(ci, &extra_info.issued, -1);
> extra_info.issued |= __ceph_caps_dirty(ci);
> handle_cap_grant(inode, session, cap,
> h, msg->middle, &extra_info);
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 40a22da0214a..c132fdb40d53 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -128,6 +128,7 @@ static int metric_show(struct seq_file *s, void *p)
> {
> struct ceph_fs_client *fsc = s->private;
> struct ceph_mds_client *mdsc = fsc->mdsc;
> + int i;
>
> seq_printf(s, "item total miss hit\n");
> seq_printf(s, "-------------------------------------------------\n");
> @@ -137,6 +138,25 @@ static int metric_show(struct seq_file *s, void *p)
> percpu_counter_sum(&mdsc->metric.d_lease_mis),
> percpu_counter_sum(&mdsc->metric.d_lease_hit));
>
> + seq_printf(s, "\n");
> + seq_printf(s, "session caps miss hit\n");
> + seq_printf(s, "-------------------------------------------------\n");
> +
> + mutex_lock(&mdsc->mutex);
> + for (i = 0; i < mdsc->max_sessions; i++) {
> + struct ceph_mds_session *session;
> +
> + session = __ceph_lookup_mds_session(mdsc, i);
> + if (!session)
> + continue;
> + seq_printf(s, "%-14d%-16d%-16lld%lld\n", i,
> + session->s_nr_caps,
> + percpu_counter_sum(&session->i_caps_mis),
> + percpu_counter_sum(&session->i_caps_hit));
> + ceph_put_mds_session(session);
> + }
> + mutex_unlock(&mdsc->mutex);
> +
> return 0;
> }
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 382beb04bacb..1e1ccae8953d 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -30,7 +30,7 @@
> const struct dentry_operations ceph_dentry_ops;
>
> static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
> -static int __dir_lease_try_check(const struct dentry *dentry);
> +static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
>
AFAICT, this function is only called when trimming dentries and in
d_delete. I don't think we care about measuring cache hits/misses for
either of those cases.
> /*
> * Initialize ceph dentry state.
> @@ -346,7 +346,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
> ceph_snap(inode) != CEPH_SNAPDIR &&
> __ceph_dir_is_complete_ordered(ci) &&
> - __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
> + __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1, true)) {
> int shared_gen = atomic_read(&ci->i_shared_gen);
> spin_unlock(&ci->i_ceph_lock);
> err = __dcache_readdir(file, ctx, shared_gen);
> @@ -764,7 +764,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
> !is_root_ceph_dentry(dir, dentry) &&
> ceph_test_mount_opt(fsc, DCACHE) &&
> __ceph_dir_is_complete(ci) &&
> - (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
> + (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1,
> + true))) {
> spin_unlock(&ci->i_ceph_lock);
> dout(" dir %p complete, -ENOENT\n", dir);
> d_add(dentry, NULL);
> @@ -1341,7 +1342,7 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg)
>
> if (__dentry_lease_is_valid(di))
> return STOP;
> - ret = __dir_lease_try_check(dentry);
> + ret = __dir_lease_try_check(dentry, false);
> if (ret == -EBUSY)
> return KEEP;
> if (ret > 0)
> @@ -1354,7 +1355,7 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
> struct ceph_lease_walk_control *lwc = arg;
> struct ceph_dentry_info *di = ceph_dentry(dentry);
>
> - int ret = __dir_lease_try_check(dentry);
> + int ret = __dir_lease_try_check(dentry, false);
> if (ret == -EBUSY)
> return KEEP;
> if (ret > 0) {
> @@ -1493,7 +1494,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
> /*
> * Called under dentry->d_lock.
> */
> -static int __dir_lease_try_check(const struct dentry *dentry)
> +static int __dir_lease_try_check(const struct dentry *dentry, bool metric)
> {
> struct ceph_dentry_info *di = ceph_dentry(dentry);
> struct inode *dir;
> @@ -1510,7 +1511,8 @@ static int __dir_lease_try_check(const struct dentry *dentry)
>
> if (spin_trylock(&ci->i_ceph_lock)) {
> if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen &&
> - __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0))
> + __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0,
> + metric))
> valid = 1;
> spin_unlock(&ci->i_ceph_lock);
> } else {
> @@ -1532,7 +1534,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
> int shared_gen;
>
> spin_lock(&ci->i_ceph_lock);
> - valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
> + valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1, true);
> shared_gen = atomic_read(&ci->i_shared_gen);
> spin_unlock(&ci->i_ceph_lock);
> if (valid) {
> @@ -1670,7 +1672,7 @@ static int ceph_d_delete(const struct dentry *dentry)
> if (di) {
> if (__dentry_lease_is_valid(di))
> return 0;
> - if (__dir_lease_try_check(dentry))
> + if (__dir_lease_try_check(dentry, true))
> return 0;
> }
> return 1;
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 11929d2bb594..418c7b30c6db 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -296,7 +296,7 @@ int ceph_renew_caps(struct inode *inode)
> wanted = __ceph_caps_file_wanted(ci);
> if (__ceph_is_any_real_caps(ci) &&
> (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
> - int issued = __ceph_caps_issued(ci, NULL);
> + int issued = __ceph_caps_issued(ci, NULL, -1);
> spin_unlock(&ci->i_ceph_lock);
> dout("renew caps %p want %s issued %s updating mds_wanted\n",
> inode, ceph_cap_string(wanted), ceph_cap_string(issued));
> @@ -387,7 +387,7 @@ int ceph_open(struct inode *inode, struct file *file)
> if (__ceph_is_any_real_caps(ci) &&
> (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
> int mds_wanted = __ceph_caps_mds_wanted(ci, true);
> - int issued = __ceph_caps_issued(ci, NULL);
> + int issued = __ceph_caps_issued(ci, NULL, fmode);
>
> dout("open %p fmode %d want %s issued %s using existing\n",
> inode, fmode, ceph_cap_string(wanted),
> @@ -403,7 +403,7 @@ int ceph_open(struct inode *inode, struct file *file)
>
> return ceph_init_file(inode, file, fmode);
> } else if (ceph_snap(inode) != CEPH_NOSNAP &&
> - (ci->i_snap_caps & wanted) == wanted) {
> + (ci->i_snap_caps & wanted) == wanted) {
> __ceph_get_fmode(ci, fmode);
> spin_unlock(&ci->i_ceph_lock);
> return ceph_init_file(inode, file, fmode);
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 64634c5af403..c0108b8582c3 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -798,7 +798,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
> /* Update change_attribute */
> inode_set_max_iversion_raw(inode, iinfo->change_attr);
>
> - __ceph_caps_issued(ci, &issued);
> + __ceph_caps_issued(ci, &issued, -1);
> issued |= __ceph_caps_dirty(ci);
> new_issued = ~issued & info_caps;
>
> @@ -2029,7 +2029,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
> }
>
> spin_lock(&ci->i_ceph_lock);
> - issued = __ceph_caps_issued(ci, NULL);
> + issued = __ceph_caps_issued(ci, NULL, -1);
>
> if (!ci->i_head_snapc &&
> (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) {
> @@ -2038,7 +2038,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
> spin_unlock(&ci->i_ceph_lock);
> down_read(&mdsc->snap_rwsem);
> spin_lock(&ci->i_ceph_lock);
> - issued = __ceph_caps_issued(ci, NULL);
> + issued = __ceph_caps_issued(ci, NULL, -1);
> }
> }
>
> @@ -2269,7 +2269,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
>
> dout("do_getattr inode %p mask %s mode 0%o\n",
> inode, ceph_cap_string(mask), inode->i_mode);
> - if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
> + if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1, true))
> return 0;
>
> mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS;
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index a976febf9647..606fa8cd687f 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -558,6 +558,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
> if (refcount_dec_and_test(&s->s_ref)) {
> if (s->s_auth.authorizer)
> ceph_auth_destroy_authorizer(s->s_auth.authorizer);
> + percpu_counter_destroy(&s->i_caps_hit);
> + percpu_counter_destroy(&s->i_caps_mis);
> kfree(s);
> }
> }
> @@ -598,6 +600,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
> int mds)
> {
> struct ceph_mds_session *s;
> + int err;
>
> if (mds >= mdsc->mdsmap->possible_max_rank)
> return ERR_PTR(-EINVAL);
> @@ -612,8 +615,10 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>
> dout("%s: realloc to %d\n", __func__, newmax);
> sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
> - if (!sa)
> + if (!sa) {
> + err = -ENOMEM;
> goto fail_realloc;
> + }
> if (mdsc->sessions) {
> memcpy(sa, mdsc->sessions,
> mdsc->max_sessions * sizeof(void *));
> @@ -653,6 +658,13 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>
> INIT_LIST_HEAD(&s->s_cap_flushing);
>
> + err = percpu_counter_init(&s->i_caps_hit, 0, GFP_NOFS);
> + if (err)
> + goto fail_realloc;
> + err = percpu_counter_init(&s->i_caps_mis, 0, GFP_NOFS);
> + if (err)
> + goto fail_init;
> +
> mdsc->sessions[mds] = s;
> atomic_inc(&mdsc->num_sessions);
> refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */
> @@ -662,6 +674,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>
> return s;
>
> +fail_init:
> + percpu_counter_destroy(&s->i_caps_hit);
> fail_realloc:
> kfree(s);
> return ERR_PTR(-ENOMEM);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 22186060bc37..c8935fd0d8bb 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -201,6 +201,9 @@ struct ceph_mds_session {
> refcount_t s_ref;
> struct list_head s_waiting; /* waiting requests */
> struct list_head s_unsafe; /* unsafe requests */
> +
> + struct percpu_counter i_caps_hit;
> + struct percpu_counter i_caps_mis;
> };
>
> /*
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index ccfcc66aaf44..3d34af145f81 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -534,7 +534,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
> INIT_LIST_HEAD(&capsnap->ci_item);
>
> capsnap->follows = old_snapc->seq;
> - capsnap->issued = __ceph_caps_issued(ci, NULL);
> + capsnap->issued = __ceph_caps_issued(ci, NULL, -1);
> capsnap->dirty = dirty;
>
> capsnap->mode = inode->i_mode;
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 40703588b889..88da9e21af75 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -635,8 +635,10 @@ static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
> return !RB_EMPTY_ROOT(&ci->i_caps);
> }
>
> -extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
> -extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
> +extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented,
> + int mask);
> +extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t,
> + bool metric);
> extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
> struct ceph_cap *cap);
>
> @@ -644,17 +646,17 @@ static inline int ceph_caps_issued(struct ceph_inode_info *ci)
> {
> int issued;
> spin_lock(&ci->i_ceph_lock);
> - issued = __ceph_caps_issued(ci, NULL);
> + issued = __ceph_caps_issued(ci, NULL, -1);
> spin_unlock(&ci->i_ceph_lock);
> return issued;
> }
>
> static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
> - int touch)
> + int touch, bool metric)
> {
> int r;
> spin_lock(&ci->i_ceph_lock);
> - r = __ceph_caps_issued_mask(ci, mask, touch);
> + r = __ceph_caps_issued_mask(ci, mask, touch, metric);
> spin_unlock(&ci->i_ceph_lock);
> return r;
> }
> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> index 98a9a3101cda..aa9ee2c2d8f3 100644
> --- a/fs/ceph/xattr.c
> +++ b/fs/ceph/xattr.c
> @@ -856,7 +856,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
>
> if (ci->i_xattrs.version == 0 ||
> !((req_mask & CEPH_CAP_XATTR_SHARED) ||
> - __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
> + __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1, true))) {
> spin_unlock(&ci->i_ceph_lock);
>
> /* security module gets xattr while filling trace */
> @@ -914,7 +914,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
> ci->i_xattrs.version, ci->i_xattrs.index_version);
>
> if (ci->i_xattrs.version == 0 ||
> - !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
> + !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1, true)) {
> spin_unlock(&ci->i_ceph_lock);
> err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
> if (err)
> @@ -1064,7 +1064,7 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>
> spin_lock(&ci->i_ceph_lock);
> retry:
> - issued = __ceph_caps_issued(ci, NULL);
> + issued = __ceph_caps_issued(ci, NULL, -1);
> if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
> goto do_sync;
>
> @@ -1192,7 +1192,7 @@ bool ceph_security_xattr_deadlock(struct inode *in)
> spin_lock(&ci->i_ceph_lock);
> ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
> !(ci->i_xattrs.version > 0 &&
> - __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
> + __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0, true));
> spin_unlock(&ci->i_ceph_lock);
> return ret;
> }
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-09 14:52 ` Jeff Layton
@ 2020-01-10 3:38 ` Xiubo Li
2020-01-10 11:51 ` Jeff Layton
0 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2020-01-10 3:38 UTC (permalink / raw)
To: Jeff Layton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On 2020/1/9 22:52, Jeff Layton wrote:
> On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> This will fulfill the caps hit/miss metric for each session. When
>> checking the "need" mask and if one cap has the subset of the "need"
>> mask it means hit, or missed.
>>
>> item total miss hit
>> -------------------------------------------------
>> d_lease 295 0 993
>>
>> session caps miss hit
>> -------------------------------------------------
>> 0 295 107 4119
>> 1 1 107 9
>>
>> Fixes: https://tracker.ceph.com/issues/43215
> For the record, "Fixes:" has a different meaning for kernel patches.
> It's used to reference an earlier patch that introduced the bug that the
> patch is fixing.
>
> It's a pity that the ceph team decided to use that to reference tracker
> tickets in their tree. For the kernel we usually use a generic "URL:"
> tag for that.
Sure, will fix it.
[...]
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 28ae0c134700..6ab02aab7d9c 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
>> static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
>> unsigned issued)
>> {
>> - unsigned had = __ceph_caps_issued(ci, NULL);
>> + unsigned int had = __ceph_caps_issued(ci, NULL, -1);
>>
>> /*
>> * Each time we receive FILE_CACHE anew, we increment
>> @@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
>> * out, and may be invalidated in bulk if the client session times out
>> * and session->s_cap_gen is bumped.
>> */
>> -int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
>> +int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
>
> This seems like the wrong approach. This function returns a set of caps,
> so it seems like the callers ought to determine whether a miss or hit
> occurred, and whether to record it.
Currently this approach will count the hit/miss for each i_cap entity in
ci->i_caps, for example, if a i_cap has a subset of the requested cap
mask it means the i_cap hit, or the i_cap miss.
This approach will be like:
session caps miss hit
-------------------------------------------------
0 295 107 4119
1 1 107 9
The "caps" here is the total i_caps in all the ceph_inodes we have.
Another approach is only when the ci->i_caps have all the requested cap
mask, it means hit, or miss, this is what you meant as above.
This approach will be like:
session inodes miss hit
-------------------------------------------------
0 295 107 4119
1 1 107 9
The "inodes" here is the total ceph_inodes we have.
Which one will be better ?
>
>> {
[...]
>>
>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>> index 382beb04bacb..1e1ccae8953d 100644
>> --- a/fs/ceph/dir.c
>> +++ b/fs/ceph/dir.c
>> @@ -30,7 +30,7 @@
>> const struct dentry_operations ceph_dentry_ops;
>>
>> static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
>> -static int __dir_lease_try_check(const struct dentry *dentry);
>> +static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
>>
> AFAICT, this function is only called when trimming dentries and in
> d_delete. I don't think we care about measuring cache hits/misses for
> either of those cases.
Yeah, it is.
This will ignore the trimming dentries case, and will count from the
d_delete.
This approach will only count the cap hit/miss called from VFS layer.
Thanks
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-10 3:38 ` Xiubo Li
@ 2020-01-10 11:51 ` Jeff Layton
2020-01-13 1:12 ` Xiubo Li
0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2020-01-10 11:51 UTC (permalink / raw)
To: Xiubo Li, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On Fri, 2020-01-10 at 11:38 +0800, Xiubo Li wrote:
> On 2020/1/9 22:52, Jeff Layton wrote:
> > On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > >
> > > This will fulfill the caps hit/miss metric for each session. When
> > > checking the "need" mask and if one cap has the subset of the "need"
> > > mask it means hit, or missed.
> > >
> > > item total miss hit
> > > -------------------------------------------------
> > > d_lease 295 0 993
> > >
> > > session caps miss hit
> > > -------------------------------------------------
> > > 0 295 107 4119
> > > 1 1 107 9
> > >
> > > Fixes: https://tracker.ceph.com/issues/43215
> > For the record, "Fixes:" has a different meaning for kernel patches.
> > It's used to reference an earlier patch that introduced the bug that the
> > patch is fixing.
> >
> > It's a pity that the ceph team decided to use that to reference tracker
> > tickets in their tree. For the kernel we usually use a generic "URL:"
> > tag for that.
>
> Sure, will fix it.
>
> [...]
> > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > > index 28ae0c134700..6ab02aab7d9c 100644
> > > --- a/fs/ceph/caps.c
> > > +++ b/fs/ceph/caps.c
> > > @@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
> > > static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
> > > unsigned issued)
> > > {
> > > - unsigned had = __ceph_caps_issued(ci, NULL);
> > > + unsigned int had = __ceph_caps_issued(ci, NULL, -1);
> > >
> > > /*
> > > * Each time we receive FILE_CACHE anew, we increment
> > > @@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
> > > * out, and may be invalidated in bulk if the client session times out
> > > * and session->s_cap_gen is bumped.
> > > */
> > > -int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
> > > +int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
> >
> > This seems like the wrong approach. This function returns a set of caps,
> > so it seems like the callers ought to determine whether a miss or hit
> > occurred, and whether to record it.
>
> Currently this approach will count the hit/miss for each i_cap entity in
> ci->i_caps, for example, if a i_cap has a subset of the requested cap
> mask it means the i_cap hit, or the i_cap miss.
>
> This approach will be like:
>
> session caps miss hit
> -------------------------------------------------
> 0 295 107 4119
> 1 1 107 9
>
> The "caps" here is the total i_caps in all the ceph_inodes we have.
>
>
> Another approach is only when the ci->i_caps have all the requested cap
> mask, it means hit, or miss, this is what you meant as above.
>
> This approach will be like:
>
> session inodes miss hit
> -------------------------------------------------
> 0 295 107 4119
> 1 1 107 9
>
> The "inodes" here is the total ceph_inodes we have.
>
> Which one will be better ?
>
>
I think I wasn't clear. My objection was to adding this "mask" field to
__ceph_caps_issued and having the counting logic in there. It would be
cleaner to have the callers do that instead. __ceph_caps_issued returns
the issued caps, so the callers have all of the information they need to
increment the proper counters without having to change
__ceph_caps_issued.
> > > {
> [...]
> > >
> > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > index 382beb04bacb..1e1ccae8953d 100644
> > > --- a/fs/ceph/dir.c
> > > +++ b/fs/ceph/dir.c
> > > @@ -30,7 +30,7 @@
> > > const struct dentry_operations ceph_dentry_ops;
> > >
> > > static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
> > > -static int __dir_lease_try_check(const struct dentry *dentry);
> > > +static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
> > >
> > AFAICT, this function is only called when trimming dentries and in
> > d_delete. I don't think we care about measuring cache hits/misses for
> > either of those cases.
>
> Yeah, it is.
>
> This will ignore the trimming dentries case, and will count from the
> d_delete.
>
> This approach will only count the cap hit/miss called from VFS layer.
>
Why do you need this "metric" parameter here? We _know_ that we won't be
counting hits and misses in this codepath, so it doesn't seem to serve
any useful purpose.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-10 11:51 ` Jeff Layton
@ 2020-01-13 1:12 ` Xiubo Li
2020-01-13 13:20 ` Jeff Layton
0 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2020-01-13 1:12 UTC (permalink / raw)
To: Jeff Layton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On 2020/1/10 19:51, Jeff Layton wrote:
> On Fri, 2020-01-10 at 11:38 +0800, Xiubo Li wrote:
>> On 2020/1/9 22:52, Jeff Layton wrote:
>>> On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> This will fulfill the caps hit/miss metric for each session. When
>>>> checking the "need" mask and if one cap has the subset of the "need"
>>>> mask it means hit, or missed.
>>>>
>>>> item total miss hit
>>>> -------------------------------------------------
>>>> d_lease 295 0 993
>>>>
>>>> session caps miss hit
>>>> -------------------------------------------------
>>>> 0 295 107 4119
>>>> 1 1 107 9
>>>>
>>>> Fixes: https://tracker.ceph.com/issues/43215
>>> For the record, "Fixes:" has a different meaning for kernel patches.
>>> It's used to reference an earlier patch that introduced the bug that the
>>> patch is fixing.
>>>
>>> It's a pity that the ceph team decided to use that to reference tracker
>>> tickets in their tree. For the kernel we usually use a generic "URL:"
>>> tag for that.
>> Sure, will fix it.
>>
>> [...]
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index 28ae0c134700..6ab02aab7d9c 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
>>>> static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
>>>> unsigned issued)
>>>> {
>>>> - unsigned had = __ceph_caps_issued(ci, NULL);
>>>> + unsigned int had = __ceph_caps_issued(ci, NULL, -1);
>>>>
>>>> /*
>>>> * Each time we receive FILE_CACHE anew, we increment
>>>> @@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
>>>> * out, and may be invalidated in bulk if the client session times out
>>>> * and session->s_cap_gen is bumped.
>>>> */
>>>> -int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
>>>> +int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
>>> This seems like the wrong approach. This function returns a set of caps,
>>> so it seems like the callers ought to determine whether a miss or hit
>>> occurred, and whether to record it.
>> Currently this approach will count the hit/miss for each i_cap entity in
>> ci->i_caps, for example, if a i_cap has a subset of the requested cap
>> mask it means the i_cap hit, or the i_cap miss.
>>
>> This approach will be like:
>>
>> session caps miss hit
>> -------------------------------------------------
>> 0 295 107 4119
>> 1 1 107 9
>>
>> The "caps" here is the total i_caps in all the ceph_inodes we have.
>>
>>
>> Another approach is only when the ci->i_caps have all the requested cap
>> mask, it means hit, or miss, this is what you meant as above.
>>
>> This approach will be like:
>>
>> session inodes miss hit
>> -------------------------------------------------
>> 0 295 107 4119
>> 1 1 107 9
>>
>> The "inodes" here is the total ceph_inodes we have.
>>
>> Which one will be better ?
>>
>>
> I think I wasn't clear. My objection was to adding this "mask" field to
> __ceph_caps_issued and having the counting logic in there. It would be
> cleaner to have the callers do that instead. __ceph_caps_issued returns
> the issued caps, so the callers have all of the information they need to
> increment the proper counters without having to change
> __ceph_caps_issued.
Do you mean if the (mask & issued == mask) the caller will increase 1 to
the hit counter, or increase 1 miss counter, right ?
For currently approach of this patch, when traversing the ceph_inode's
i_caps and if a i_cap has only a subset or all of the "mask" that means
hit, or miss.
This is why changing the __ceph_caps_issued().
>
>>>> {
>> [...]
>>>>
>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>> index 382beb04bacb..1e1ccae8953d 100644
>>>> --- a/fs/ceph/dir.c
>>>> +++ b/fs/ceph/dir.c
>>>> @@ -30,7 +30,7 @@
>>>> const struct dentry_operations ceph_dentry_ops;
>>>>
>>>> static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
>>>> -static int __dir_lease_try_check(const struct dentry *dentry);
>>>> +static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
>>>>
>>> AFAICT, this function is only called when trimming dentries and in
>>> d_delete. I don't think we care about measuring cache hits/misses for
>>> either of those cases.
>> Yeah, it is.
>>
>> This will ignore the trimming dentries case, and will count from the
>> d_delete.
>>
>> This approach will only count the cap hit/miss called from VFS layer.
>>
> Why do you need this "metric" parameter here? We _know_ that we won't be
> counting hits and misses in this codepath, so it doesn't seem to serve
> any useful purpose.
>
We need to know where the caller comes from:
In Case1: caller is vfs
This will count the hit/miss counters.
ceph_d_delete() --> __dir_lease_try_check(metric = true) -->
__ceph_caps_issued_mask(mask = XXX, metric = true)
In Case2: caller is ceph.ko itself
This will ignore the hit/miss counters.
ceph_trim_dentries() --> __dentry_lease_check() -->
__dir_lease_try_check(metric = false) --> __ceph_caps_issued_mask(mask =
XXX, metric = false)
Thanks.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-13 1:12 ` Xiubo Li
@ 2020-01-13 13:20 ` Jeff Layton
2020-01-13 13:28 ` Xiubo Li
0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2020-01-13 13:20 UTC (permalink / raw)
To: Xiubo Li, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On Mon, 2020-01-13 at 09:12 +0800, Xiubo Li wrote:
> On 2020/1/10 19:51, Jeff Layton wrote:
> > On Fri, 2020-01-10 at 11:38 +0800, Xiubo Li wrote:
> > > On 2020/1/9 22:52, Jeff Layton wrote:
> > > > On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
> > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > >
> > > > > This will fulfill the caps hit/miss metric for each session. When
> > > > > checking the "need" mask and if one cap has the subset of the "need"
> > > > > mask it means hit, or missed.
> > > > >
> > > > > item total miss hit
> > > > > -------------------------------------------------
> > > > > d_lease 295 0 993
> > > > >
> > > > > session caps miss hit
> > > > > -------------------------------------------------
> > > > > 0 295 107 4119
> > > > > 1 1 107 9
> > > > >
> > > > > Fixes: https://tracker.ceph.com/issues/43215
> > > > For the record, "Fixes:" has a different meaning for kernel patches.
> > > > It's used to reference an earlier patch that introduced the bug that the
> > > > patch is fixing.
> > > >
> > > > It's a pity that the ceph team decided to use that to reference tracker
> > > > tickets in their tree. For the kernel we usually use a generic "URL:"
> > > > tag for that.
> > > Sure, will fix it.
> > >
> > > [...]
> > > > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > > > > index 28ae0c134700..6ab02aab7d9c 100644
> > > > > --- a/fs/ceph/caps.c
> > > > > +++ b/fs/ceph/caps.c
> > > > > @@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
> > > > > static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
> > > > > unsigned issued)
> > > > > {
> > > > > - unsigned had = __ceph_caps_issued(ci, NULL);
> > > > > + unsigned int had = __ceph_caps_issued(ci, NULL, -1);
> > > > >
> > > > > /*
> > > > > * Each time we receive FILE_CACHE anew, we increment
> > > > > @@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
> > > > > * out, and may be invalidated in bulk if the client session times out
> > > > > * and session->s_cap_gen is bumped.
> > > > > */
> > > > > -int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
> > > > > +int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
> > > > This seems like the wrong approach. This function returns a set of caps,
> > > > so it seems like the callers ought to determine whether a miss or hit
> > > > occurred, and whether to record it.
> > > Currently this approach will count the hit/miss for each i_cap entity in
> > > ci->i_caps, for example, if a i_cap has a subset of the requested cap
> > > mask it means the i_cap hit, or the i_cap miss.
> > >
> > > This approach will be like:
> > >
> > > session caps miss hit
> > > -------------------------------------------------
> > > 0 295 107 4119
> > > 1 1 107 9
> > >
> > > The "caps" here is the total i_caps in all the ceph_inodes we have.
> > >
> > >
> > > Another approach is only when the ci->i_caps have all the requested cap
> > > mask, it means hit, or miss, this is what you meant as above.
> > >
> > > This approach will be like:
> > >
> > > session inodes miss hit
> > > -------------------------------------------------
> > > 0 295 107 4119
> > > 1 1 107 9
> > >
> > > The "inodes" here is the total ceph_inodes we have.
> > >
> > > Which one will be better ?
> > >
> > >
> > I think I wasn't clear. My objection was to adding this "mask" field to
> > __ceph_caps_issued and having the counting logic in there. It would be
> > cleaner to have the callers do that instead. __ceph_caps_issued returns
> > the issued caps, so the callers have all of the information they need to
> > increment the proper counters without having to change
> > __ceph_caps_issued.
>
> Do you mean if the (mask & issued == mask) the caller will increase 1 to
> the hit counter, or increase 1 miss counter, right ?
>
> For currently approach of this patch, when traversing the ceph_inode's
> i_caps and if a i_cap has only a subset or all of the "mask" that means
> hit, or miss.
>
> This is why changing the __ceph_caps_issued().
>
In this case, I'm specifically saying that you should move the hit and
miss counting into the _callers_ of __ceph_caps_issued().
That function is a piece of core infrastructure that returns the
currently issued caps. I think that it should not be changed to count
hits and misses, as the calling functions are in a better position to
make that determination and it needlessly complicates low-level code.
> > > > > {
> > > [...]
> > > > >
> > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > > > index 382beb04bacb..1e1ccae8953d 100644
> > > > > --- a/fs/ceph/dir.c
> > > > > +++ b/fs/ceph/dir.c
> > > > > @@ -30,7 +30,7 @@
> > > > > const struct dentry_operations ceph_dentry_ops;
> > > > >
> > > > > static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
> > > > > -static int __dir_lease_try_check(const struct dentry *dentry);
> > > > > +static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
> > > > >
> > > > AFAICT, this function is only called when trimming dentries and in
> > > > d_delete. I don't think we care about measuring cache hits/misses for
> > > > either of those cases.
> > > Yeah, it is.
> > >
> > > This will ignore the trimming dentries case, and will count from the
> > > d_delete.
> > >
> > > This approach will only count the cap hit/miss called from VFS layer.
> > >
> > Why do you need this "metric" parameter here? We _know_ that we won't be
> > counting hits and misses in this codepath, so it doesn't seem to serve
> > any useful purpose.
> >
> We need to know where the caller comes from:
>
> In Case1: caller is vfs
>
> This will count the hit/miss counters.
>
> ceph_d_delete() --> __dir_lease_try_check(metric = true) -->
> __ceph_caps_issued_mask(mask = XXX, metric = true)
>
Why would you count hits/misses from the d_delete codepath? That isn't
generally driven by user activity, and it's just checking to see whether
we have a lease for the dentry (in which case we'll keep it around). I
don't think we should count the lease check here as it's basically
similar to trimming.
>
> In Case2: caller is ceph.ko itself
>
> This will ignore the hit/miss counters.
>
> ceph_trim_dentries() --> __dentry_lease_check() -->
> __dir_lease_try_check(metric = false) --> __ceph_caps_issued_mask(mask =
> XXX, metric = false)
Right, so the caller (__dentry_lease_check()) just wouldn't count it in
this case.
In general, adding a boolean argument to a function like this is often a
sign that you're doing something wrong. You're almost always better off
doing this sort of thing in the caller, or making two different
functions that call the same underlying code.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/8] ceph: add caps perf metric for each session
2020-01-13 13:20 ` Jeff Layton
@ 2020-01-13 13:28 ` Xiubo Li
0 siblings, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2020-01-13 13:28 UTC (permalink / raw)
To: Jeff Layton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
On 2020/1/13 21:20, Jeff Layton wrote:
> On Mon, 2020-01-13 at 09:12 +0800, Xiubo Li wrote:
>> On 2020/1/10 19:51, Jeff Layton wrote:
>>> On Fri, 2020-01-10 at 11:38 +0800, Xiubo Li wrote:
>>>> On 2020/1/9 22:52, Jeff Layton wrote:
>>>>> On Wed, 2020-01-08 at 05:41 -0500, xiubli@redhat.com wrote:
>>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>>
>>>>>> This will fulfill the caps hit/miss metric for each session. When
>>>>>> checking the "need" mask and if one cap has the subset of the "need"
>>>>>> mask it means hit, or missed.
>>>>>>
>>>>>> item total miss hit
>>>>>> -------------------------------------------------
>>>>>> d_lease 295 0 993
>>>>>>
>>>>>> session caps miss hit
>>>>>> -------------------------------------------------
>>>>>> 0 295 107 4119
>>>>>> 1 1 107 9
>>>>>>
>>>>>> Fixes: https://tracker.ceph.com/issues/43215
>>>>> For the record, "Fixes:" has a different meaning for kernel patches.
>>>>> It's used to reference an earlier patch that introduced the bug that the
>>>>> patch is fixing.
>>>>>
>>>>> It's a pity that the ceph team decided to use that to reference tracker
>>>>> tickets in their tree. For the kernel we usually use a generic "URL:"
>>>>> tag for that.
>>>> Sure, will fix it.
>>>>
>>>> [...]
>>>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>>>> index 28ae0c134700..6ab02aab7d9c 100644
>>>>>> --- a/fs/ceph/caps.c
>>>>>> +++ b/fs/ceph/caps.c
>>>>>> @@ -567,7 +567,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
>>>>>> static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
>>>>>> unsigned issued)
>>>>>> {
>>>>>> - unsigned had = __ceph_caps_issued(ci, NULL);
>>>>>> + unsigned int had = __ceph_caps_issued(ci, NULL, -1);
>>>>>>
>>>>>> /*
>>>>>> * Each time we receive FILE_CACHE anew, we increment
>>>>>> @@ -787,20 +787,43 @@ static int __cap_is_valid(struct ceph_cap *cap)
>>>>>> * out, and may be invalidated in bulk if the client session times out
>>>>>> * and session->s_cap_gen is bumped.
>>>>>> */
>>>>>> -int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
>>>>>> +int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented, int mask)
>>>>> This seems like the wrong approach. This function returns a set of caps,
>>>>> so it seems like the callers ought to determine whether a miss or hit
>>>>> occurred, and whether to record it.
>>>> Currently this approach will count the hit/miss for each i_cap entity in
>>>> ci->i_caps, for example, if a i_cap has a subset of the requested cap
>>>> mask it means the i_cap hit, or the i_cap miss.
>>>>
>>>> This approach will be like:
>>>>
>>>> session caps miss hit
>>>> -------------------------------------------------
>>>> 0 295 107 4119
>>>> 1 1 107 9
>>>>
>>>> The "caps" here is the total i_caps in all the ceph_inodes we have.
>>>>
>>>>
>>>> Another approach is only when the ci->i_caps have all the requested cap
>>>> mask, it means hit, or miss, this is what you meant as above.
>>>>
>>>> This approach will be like:
>>>>
>>>> session inodes miss hit
>>>> -------------------------------------------------
>>>> 0 295 107 4119
>>>> 1 1 107 9
>>>>
>>>> The "inodes" here is the total ceph_inodes we have.
>>>>
>>>> Which one will be better ?
>>>>
>>>>
>>> I think I wasn't clear. My objection was to adding this "mask" field to
>>> __ceph_caps_issued and having the counting logic in there. It would be
>>> cleaner to have the callers do that instead. __ceph_caps_issued returns
>>> the issued caps, so the callers have all of the information they need to
>>> increment the proper counters without having to change
>>> __ceph_caps_issued.
>> Do you mean if the (mask & issued == mask) the caller will increase 1 to
>> the hit counter, or increase 1 miss counter, right ?
>>
>> For currently approach of this patch, when traversing the ceph_inode's
>> i_caps and if a i_cap has only a subset or all of the "mask" that means
>> hit, or miss.
>>
>> This is why changing the __ceph_caps_issued().
>>
> In this case, I'm specifically saying that you should move the hit and
> miss counting into the _callers_ of __ceph_caps_issued().
>
> That function is a piece of core infrastructure that returns the
> currently issued caps. I think that it should not be changed to count
> hits and misses, as the calling functions are in a better position to
> make that determination and it needlessly complicates low-level code.
Sure, sounds reasonable.
Will fix this part later.
>
>>>>>> {
>>>> [...]
>>>>>>
>>>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>>>> index 382beb04bacb..1e1ccae8953d 100644
>>>>>> --- a/fs/ceph/dir.c
>>>>>> +++ b/fs/ceph/dir.c
>>>>>> @@ -30,7 +30,7 @@
>>>>>> const struct dentry_operations ceph_dentry_ops;
>>>>>>
>>>>>> static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
>>>>>> -static int __dir_lease_try_check(const struct dentry *dentry);
>>>>>> +static int __dir_lease_try_check(const struct dentry *dentry, bool metric);
>>>>>>
>>>>> AFAICT, this function is only called when trimming dentries and in
>>>>> d_delete. I don't think we care about measuring cache hits/misses for
>>>>> either of those cases.
>>>> Yeah, it is.
>>>>
>>>> This will ignore the trimming dentries case, and will count from the
>>>> d_delete.
>>>>
>>>> This approach will only count the cap hit/miss called from VFS layer.
>>>>
>>> Why do you need this "metric" parameter here? We _know_ that we won't be
>>> counting hits and misses in this codepath, so it doesn't seem to serve
>>> any useful purpose.
>>>
>> We need to know where the caller comes from:
>>
>> In Case1: caller is vfs
>>
>> This will count the hit/miss counters.
>>
>> ceph_d_delete() --> __dir_lease_try_check(metric = true) -->
>> __ceph_caps_issued_mask(mask = XXX, metric = true)
>>
> Why would you count hits/misses from the d_delete codepath? That isn't
> generally driven by user activity, and it's just checking to see whether
> we have a lease for the dentry (in which case we'll keep it around). I
> don't think we should count the lease check here as it's basically
> similar to trimming.
Okay, will remove it.
>> In Case2: caller is ceph.ko itself
>>
>> This will ignore the hit/miss counters.
>>
>> ceph_trim_dentries() --> __dentry_lease_check() -->
>> __dir_lease_try_check(metric = false) --> __ceph_caps_issued_mask(mask =
>> XXX, metric = false)
> Right, so the caller (__dentry_lease_check()) just wouldn't count it in
> this case.
>
> In general, adding a boolean argument to a function like this is often a
> sign that you're doing something wrong. You're almost always better off
> doing this sort of thing in the caller, or making two different
> functions that call the same underlying code.
Sure, this is reasonable.
BRs
Xiubo
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 3/8] ceph: add global read latency metric support
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
2020-01-08 10:41 ` [PATCH v2 1/8] ceph: add global dentry lease metric support xiubli
2020-01-08 10:41 ` [PATCH v2 2/8] ceph: add caps perf metric for each session xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-08 10:41 ` [PATCH v2 4/8] ceph: add global write " xiubli
` (5 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
item total sum_lat(us) avg_lat(us)
-----------------------------------------------------
read 73 3590000 49178082
Fixes: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/addr.c | 15 ++++++++++++++-
fs/ceph/debugfs.c | 13 +++++++++++++
fs/ceph/file.c | 12 ++++++++++++
fs/ceph/mds_client.c | 25 ++++++++++++++++++++++++-
fs/ceph/mds_client.h | 7 +++++++
include/linux/ceph/osd_client.h | 2 +-
net/ceph/osd_client.c | 9 ++++++++-
7 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7ab616601141..2c51da3515a0 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -190,6 +190,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
+ s64 latency;
int err = 0;
u64 off = page_offset(page);
u64 len = PAGE_SIZE;
@@ -221,7 +223,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, off, &len,
ci->i_truncate_seq, ci->i_truncate_size,
- &page, 1, 0);
+ &page, 1, 0, &latency);
if (err == -ENOENT)
err = 0;
if (err < 0) {
@@ -241,6 +243,9 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
ceph_readpage_to_fscache(inode, page);
out:
+ if (latency)
+ ceph_mdsc_update_read_latency(metric, latency);
+
return err < 0 ? err : 0;
}
@@ -260,6 +265,8 @@ static int ceph_readpage(struct file *filp, struct page *page)
static void finish_read(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_data *osd_data;
int rc = req->r_result <= 0 ? req->r_result : 0;
int bytes = req->r_result >= 0 ? req->r_result : 0;
@@ -297,6 +304,12 @@ static void finish_read(struct ceph_osd_request *req)
put_page(page);
bytes -= PAGE_SIZE;
}
+
+ if (!rc || rc == -ENOENT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
+
kfree(osd_data->pages);
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index c132fdb40d53..8200bf025ccd 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -128,8 +128,21 @@ static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
+ s64 total, sum, avg = 0;
int i;
+ seq_printf(s, "item total sum_lat(us) avg_lat(us)\n");
+ seq_printf(s, "-----------------------------------------------------\n");
+
+ spin_lock(&mdsc->metric.read_lock);
+ total = atomic64_read(&mdsc->metric.total_reads),
+ sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
+ spin_unlock(&mdsc->metric.read_lock);
+ avg = total ? sum / total : 0;
+ seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read",
+ total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+
+ seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 418c7b30c6db..84c9de9da022 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -586,6 +586,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_client *osdc = &fsc->client->osdc;
ssize_t ret;
u64 off = iocb->ki_pos;
@@ -658,6 +659,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
ret = ceph_osdc_start_request(osdc, req, false);
if (!ret)
ret = ceph_osdc_wait_request(osdc, req);
+
+ if (ret > 0 || ret == -ETIMEDOUT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
ceph_osdc_put_request(req);
i_size = i_size_read(inode);
@@ -931,6 +937,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_vino vino;
struct ceph_osd_request *req;
struct bio_vec *bvecs;
@@ -1047,6 +1054,11 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if ((ret > 0 || ret == -ETIMEDOUT) && !write) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_read_latency(metric, latency);
+ }
+
size = i_size_read(inode);
if (!write) {
if (ret == -ENOENT)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 606fa8cd687f..d36464766e2c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4092,6 +4092,25 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
ceph_force_reconnect(fsc->sb);
}
+/*
+ * metric helpers
+ */
+void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
+ s64 latency)
+{
+ struct timespec64 ts;
+
+ if (!m)
+ return;
+
+ jiffies_to_timespec64(latency, &ts);
+
+ spin_lock(&m->read_lock);
+ atomic64_inc(&m->total_reads);
+ m->read_latency_sum = timespec64_add(m->read_latency_sum, ts);
+ spin_unlock(&m->read_lock);
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4181,13 +4200,17 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
atomic64_set(&metric->total_dentries, 0);
ret = percpu_counter_init(&metric->d_lease_hit, 0, GFP_KERNEL);
if (ret)
- return ret;
+ return ret;;
ret = percpu_counter_init(&metric->d_lease_mis, 0, GFP_KERNEL);
if (ret) {
percpu_counter_destroy(&metric->d_lease_hit);
return ret;
}
+ spin_lock_init(&metric->read_lock);
+ memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_reads, 0);
+
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c8935fd0d8bb..fb8412a1169b 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -366,6 +366,10 @@ struct ceph_client_metric {
atomic64_t total_dentries;
struct percpu_counter d_lease_hit;
struct percpu_counter d_lease_mis;
+
+ spinlock_t read_lock;
+ atomic64_t total_reads;
+ struct timespec64 read_latency_sum;
};
/*
@@ -549,4 +553,7 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
int max_caps);
+
+extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
+ s64 latency);
#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index eaffbdddf89a..8a7acd95f883 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -515,7 +515,7 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
struct page **pages, int nr_pages,
- int page_align);
+ int page_align, s64 *latency);
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct ceph_vino vino,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ba45b074a362..547946d0c5d6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5238,11 +5238,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino, struct ceph_file_layout *layout,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
- struct page **pages, int num_pages, int page_align)
+ struct page **pages, int num_pages, int page_align,
+ s64 *latency)
{
struct ceph_osd_request *req;
int rc = 0;
+ if (latency)
+ *latency = 0;
+
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
@@ -5263,6 +5267,9 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
+ if (latency && (rc > 0 || rc == -ETIMEDOUT))
+ *latency = jiffies - req->r_start_stamp;
+
ceph_osdc_put_request(req);
dout("readpages result %d\n", rc);
return rc;
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 4/8] ceph: add global write latency metric support
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
` (2 preceding siblings ...)
2020-01-08 10:41 ` [PATCH v2 3/8] ceph: add global read latency metric support xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-08 10:41 ` [PATCH v2 5/8] ceph: add global metadata perf " xiubli
` (4 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
item total sum_lat(us) avg_lat(us)
-----------------------------------------------------
write 222 5287750000 23818693
Fixes: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/addr.c | 23 +++++++++++++++++++++--
fs/ceph/debugfs.c | 8 ++++++++
fs/ceph/file.c | 12 ++++++++++--
fs/ceph/mds_client.c | 20 ++++++++++++++++++++
fs/ceph/mds_client.h | 6 ++++++
include/linux/ceph/osd_client.h | 3 ++-
net/ceph/osd_client.c | 9 ++++++++-
7 files changed, 75 insertions(+), 6 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 2c51da3515a0..5337202530ec 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -598,12 +598,15 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
loff_t page_off = page_offset(page);
int err, len = PAGE_SIZE;
struct ceph_writeback_ctl ceph_wbc;
+ struct ceph_client_metric *metric;
+ s64 latency;
dout("writepage %p idx %lu\n", page, page->index);
inode = page->mapping->host;
ci = ceph_inode(inode);
fsc = ceph_inode_to_client(inode);
+ metric = &fsc->mdsc->metric;
/* verify this is a writeable snap context */
snapc = page_snap_context(page);
@@ -645,7 +648,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
&ci->i_layout, snapc, page_off, len,
ceph_wbc.truncate_seq,
ceph_wbc.truncate_size,
- &inode->i_mtime, &page, 1);
+ &inode->i_mtime, &page, 1,
+ &latency);
+ if (latency)
+ ceph_mdsc_update_write_latency(metric, latency);
+
if (err < 0) {
struct writeback_control tmp_wbc;
if (!wbc)
@@ -707,6 +714,8 @@ static void writepages_finish(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_data *osd_data;
struct page *page;
int num_pages, total_pages = 0;
@@ -714,7 +723,6 @@ static void writepages_finish(struct ceph_osd_request *req)
int rc = req->r_result;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
bool remove_page;
dout("writepages_finish %p rc %d\n", inode, rc);
@@ -783,6 +791,11 @@ static void writepages_finish(struct ceph_osd_request *req)
ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
else
kfree(osd_data->pages);
+
+ if (!rc || rc == -ENOENT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_write_latency(metric, latency);
+ }
ceph_osdc_put_request(req);
}
@@ -1675,6 +1688,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_osd_request *req;
struct page *page = NULL;
u64 len, inline_version;
@@ -1785,6 +1799,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+ if (!err || err == -ETIMEDOUT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_write_latency(metric, latency);
+ }
out_put:
ceph_osdc_put_request(req);
if (err == -ECANCELED)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 8200bf025ccd..3fdb15af0a83 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -142,6 +142,14 @@ static int metric_show(struct seq_file *s, void *p)
seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read",
total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+ spin_lock(&mdsc->metric.write_lock);
+ total = atomic64_read(&mdsc->metric.total_writes),
+ sum = timespec64_to_ns(&mdsc->metric.write_latency_sum);
+ spin_unlock(&mdsc->metric.write_lock);
+ avg = total ? sum / total : 0;
+ seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write",
+ total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+
seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 84c9de9da022..f5754acac3b9 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1054,9 +1054,12 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
- if ((ret > 0 || ret == -ETIMEDOUT) && !write) {
+ if (ret > 0 || ret == -ETIMEDOUT) {
s64 latency = jiffies - req->r_start_stamp;
- ceph_mdsc_update_read_latency(metric, latency);
+ if (write)
+ ceph_mdsc_update_write_latency(metric, latency);
+ else
+ ceph_mdsc_update_read_latency(metric, latency);
}
size = i_size_read(inode);
@@ -1145,6 +1148,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_vino vino;
struct ceph_osd_request *req;
struct page **pages;
@@ -1230,6 +1234,10 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if (!ret || ret == -ETIMEDOUT) {
+ s64 latency = jiffies - req->r_start_stamp;
+ ceph_mdsc_update_write_latency(metric, latency);
+ }
out:
ceph_osdc_put_request(req);
if (ret != 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d36464766e2c..865ff33aac0b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4111,6 +4111,22 @@ void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
spin_unlock(&m->read_lock);
}
+void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
+ s64 latency)
+{
+ struct timespec64 ts;
+
+ if (!m)
+ return;
+
+ jiffies_to_timespec64(latency, &ts);
+
+ spin_lock(&m->write_lock);
+ atomic64_inc(&m->total_writes);
+ m->write_latency_sum = timespec64_add(m->write_latency_sum, ts);
+ spin_unlock(&m->write_lock);
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4211,6 +4227,10 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
atomic64_set(&metric->total_reads, 0);
+ spin_lock_init(&metric->write_lock);
+ memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_writes, 0);
+
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index fb8412a1169b..1c8c446fb7bb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -370,6 +370,10 @@ struct ceph_client_metric {
spinlock_t read_lock;
atomic64_t total_reads;
struct timespec64 read_latency_sum;
+
+ spinlock_t write_lock;
+ atomic64_t total_writes;
+ struct timespec64 write_latency_sum;
};
/*
@@ -556,4 +560,6 @@ extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
s64 latency);
+extern void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
+ s64 latency);
#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 8a7acd95f883..9d16c9cb1d69 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -524,7 +524,8 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
- struct page **pages, int nr_pages);
+ struct page **pages, int nr_pages,
+ s64 *latency);
int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
u64 src_snapid, u64 src_version,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 547946d0c5d6..09632d79cd9a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5285,12 +5285,16 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
- struct page **pages, int num_pages)
+ struct page **pages, int num_pages,
+ s64 *latency)
{
struct ceph_osd_request *req;
int rc = 0;
int page_align = off & ~PAGE_MASK;
+ if (latency)
+ *latency = 0;
+
req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
@@ -5308,6 +5312,9 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
+ if (latency && (rc > 0 || rc == -ETIMEDOUT))
+ *latency = jiffies - req->r_start_stamp;
+
ceph_osdc_put_request(req);
if (rc == 0)
rc = len;
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 5/8] ceph: add global metadata perf metric support
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
` (3 preceding siblings ...)
2020-01-08 10:41 ` [PATCH v2 4/8] ceph: add global write " xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-08 10:41 ` [PATCH v2 6/8] ceph: periodically send perf metrics to MDS xiubli
` (3 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
item total sum_lat(us) avg_lat(us)
-----------------------------------------------------
write 756 9225615920 12418192
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 8 ++++++++
fs/ceph/mds_client.c | 25 +++++++++++++++++++++++++
fs/ceph/mds_client.h | 6 ++++++
3 files changed, 39 insertions(+)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3fdb15af0a83..df8c1cc685d9 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -150,6 +150,14 @@ static int metric_show(struct seq_file *s, void *p)
seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write",
total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+ spin_lock(&mdsc->metric.metadata_lock);
+ total = atomic64_read(&mdsc->metric.total_metadatas),
+ sum = timespec64_to_ns(&mdsc->metric.metadata_latency_sum);
+ spin_unlock(&mdsc->metric.metadata_lock);
+ avg = total ? sum / total : 0;
+ seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata",
+ total, sum / NSEC_PER_USEC, avg / NSEC_PER_USEC);
+
seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 865ff33aac0b..ae2fe0277c6c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2894,6 +2894,11 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
result = le32_to_cpu(head->result);
+ if (!result || result == -ESTALE || result == -ENOENT) {
+ s64 latency = jiffies - req->r_started;
+ ceph_mdsc_update_metadata_latency(&mdsc->metric, latency);
+ }
+
/*
* Handle an ESTALE
* if we're not talking to the authority, send to them
@@ -4127,6 +4132,22 @@ void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
spin_unlock(&m->write_lock);
}
+void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
+ s64 latency)
+{
+ struct timespec64 ts;
+
+ if (!m)
+ return;
+
+ jiffies_to_timespec64(latency, &ts);
+
+ spin_lock(&m->metadata_lock);
+ atomic64_inc(&m->total_metadatas);
+ m->metadata_latency_sum = timespec64_add(m->metadata_latency_sum, ts);
+ spin_unlock(&m->metadata_lock);
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4231,6 +4252,10 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
atomic64_set(&metric->total_writes, 0);
+ spin_lock_init(&metric->metadata_lock);
+ memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_metadatas, 0);
+
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 1c8c446fb7bb..ee36ab87e5f6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -374,6 +374,10 @@ struct ceph_client_metric {
spinlock_t write_lock;
atomic64_t total_writes;
struct timespec64 write_latency_sum;
+
+ spinlock_t metadata_lock;
+ atomic64_t total_metadatas;
+ struct timespec64 metadata_latency_sum;
};
/*
@@ -562,4 +566,6 @@ extern void ceph_mdsc_update_read_latency(struct ceph_client_metric *m,
s64 latency);
extern void ceph_mdsc_update_write_latency(struct ceph_client_metric *m,
s64 latency);
+extern void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
+ s64 latency);
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 6/8] ceph: periodically send perf metrics to MDS
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
` (4 preceding siblings ...)
2020-01-08 10:41 ` [PATCH v2 5/8] ceph: add global metadata perf " xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-08 10:41 ` [PATCH v2 7/8] ceph: add reset metrics support xiubli
` (2 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Add enable/disable sending metrics to MDS debugfs and disabled as
default, if it's enabled the kclient will send metrics every
second.
This will send global dentry lease hit/miss and read/write/metadata
latency metrics and each session's caps hit/miss metric to MDS.
Every time only sends the global metrics once via any availible
session.
Fixes: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 44 +++++++-
fs/ceph/mds_client.c | 205 ++++++++++++++++++++++++++++++++---
fs/ceph/mds_client.h | 3 +
fs/ceph/super.h | 1 +
include/linux/ceph/ceph_fs.h | 77 +++++++++++++
5 files changed, 312 insertions(+), 18 deletions(-)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index df8c1cc685d9..bb96fb4d04c4 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,40 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+/*
+ * metrics debugfs
+ */
+static int sending_metrics_set(void *data, u64 val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ if (val > 1) {
+ pr_err("Invalid sending metrics set value %llu\n", val);
+ return -EINVAL;
+ }
+
+ mutex_lock(&mdsc->mutex);
+ mdsc->sending_metrics = (unsigned int)val;
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+
+static int sending_metrics_get(void *data, u64 *val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ mutex_lock(&mdsc->mutex);
+ *val = (u64)mdsc->sending_metrics;
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
+ sending_metrics_set, "%llu\n");
+
static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
@@ -308,11 +342,9 @@ static int congestion_kb_get(void *data, u64 *val)
*val = (u64)fsc->mount_options->congestion_kb;
return 0;
}
-
DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
congestion_kb_set, "%llu\n");
-
void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
{
dout("ceph_fs_debugfs_cleanup\n");
@@ -322,6 +354,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_mds_sessions);
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_metric);
+ debugfs_remove(fsc->debugfs_sending_metrics);
debugfs_remove(fsc->debugfs_mdsc);
}
@@ -362,6 +395,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&mdsc_show_fops);
+ fsc->debugfs_sending_metrics =
+ debugfs_create_file("sending_metrics",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &sending_metrics_fops);
+
fsc->debugfs_metric = debugfs_create_file("metrics",
0400,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ae2fe0277c6c..a0693ed6f54f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4148,13 +4148,162 @@ void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
spin_unlock(&m->metadata_lock);
}
+/*
+ * called under s_mutex
+ */
+static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s,
+ bool skip_global)
+{
+ struct ceph_metric_head *head;
+ struct ceph_metric_cap *cap;
+ struct ceph_metric_dentry_lease *lease;
+ struct ceph_metric_read_latency *read;
+ struct ceph_metric_write_latency *write;
+ struct ceph_metric_metadata_latency *meta;
+ struct ceph_msg *msg;
+ struct timespec64 ts;
+ s32 len = sizeof(*head) + sizeof(*cap);
+ s64 sum, total, avg;
+ s32 items = 0;
+
+ if (!mdsc || !s)
+ return false;
+
+ if (!skip_global) {
+ len += sizeof(*lease);
+ len += sizeof(*read);
+ len += sizeof(*write);
+ len += sizeof(*meta);
+ }
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
+ if (!msg) {
+ pr_err("send metrics to mds%d, failed to allocate message\n",
+ s->s_mds);
+ return false;
+ }
+
+ head = msg->front.iov_base;
+
+ /* encode the cap metric */
+ cap = (struct ceph_metric_cap *)(head + 1);
+ cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+ cap->ver = 1;
+ cap->campat = 1;
+ cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
+ cap->hit = cpu_to_le64(percpu_counter_sum(&s->i_caps_hit));
+ cap->mis = cpu_to_le64(percpu_counter_sum(&s->i_caps_mis));
+ cap->total = cpu_to_le64(s->s_nr_caps);
+ items++;
+
+ dout("cap metric hit %lld, mis %lld, total caps %lld",
+ le64_to_cpu(cap->hit), le64_to_cpu(cap->mis),
+ le64_to_cpu(cap->total));
+
+ /* only send the global once */
+ if (skip_global)
+ goto skip_global;
+
+ /* encode the dentry lease metric */
+ lease = (struct ceph_metric_dentry_lease *)(cap + 1);
+ lease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+ lease->ver = 1;
+ lease->campat = 1;
+ lease->data_len = cpu_to_le32(sizeof(*lease) - 10);
+ lease->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_hit));
+ lease->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_mis));
+ lease->total = cpu_to_le64(atomic64_read(&mdsc->metric.total_dentries));
+ items++;
+
+ dout("dentry lease metric hit %lld, mis %lld, total dentries %lld",
+ le64_to_cpu(lease->hit), le64_to_cpu(lease->mis),
+ le64_to_cpu(lease->total));
+
+ /* encode the read latency metric */
+ read = (struct ceph_metric_read_latency *)(lease + 1);
+ read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+ read->ver = 1;
+ read->campat = 1;
+ read->data_len = cpu_to_le32(sizeof(*read) - 10);
+ spin_lock(&mdsc->metric.read_lock);
+ total = atomic64_read(&mdsc->metric.total_reads),
+ sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
+ spin_unlock(&mdsc->metric.read_lock);
+ avg = total ? sum / total : 0;
+ ts = ns_to_timespec64(avg);
+ read->sec = cpu_to_le32(ts.tv_sec);
+ read->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ dout("read latency metric total %lld, sum lat %lld, avg lat %lld",
+ total, sum, avg);
+
+ /* encode the write latency metric */
+ write = (struct ceph_metric_write_latency *)(read + 1);
+ write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+ write->ver = 1;
+ write->campat = 1;
+ write->data_len = cpu_to_le32(sizeof(*write) - 10);
+ spin_lock(&mdsc->metric.write_lock);
+ total = atomic64_read(&mdsc->metric.total_writes),
+ sum = timespec64_to_ns(&mdsc->metric.write_latency_sum);
+ spin_unlock(&mdsc->metric.write_lock);
+ avg = total ? sum / total : 0;
+ ts = ns_to_timespec64(avg);
+ write->sec = cpu_to_le32(ts.tv_sec);
+ write->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ dout("write latency metric total %lld, sum lat %lld, avg lat %lld",
+ total, sum, avg);
+
+ /* encode the metadata latency metric */
+ meta = (struct ceph_metric_metadata_latency *)(write + 1);
+ meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+ meta->ver = 1;
+ meta->campat = 1;
+ meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
+ spin_lock(&mdsc->metric.metadata_lock);
+ total = atomic64_read(&mdsc->metric.total_metadatas),
+ sum = timespec64_to_ns(&mdsc->metric.metadata_latency_sum);
+ spin_unlock(&mdsc->metric.metadata_lock);
+ avg = total ? sum / total : 0;
+ ts = ns_to_timespec64(avg);
+ meta->sec = cpu_to_le32(ts.tv_sec);
+ meta->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ dout("metadata latency metric total %lld, sum lat %lld, avg lat %lld",
+ total, sum, avg);
+
+skip_global:
+ put_unaligned_le32(items, &head->num);
+ msg->front.iov_len = cpu_to_le32(len);
+ msg->hdr.version = cpu_to_le16(1);
+ msg->hdr.compat_version = cpu_to_le16(1);
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+ dout("send metrics to mds%d %p\n", s->s_mds, msg);
+ ceph_con_send(&s->s_con, msg);
+
+ return true;
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
+#define CEPH_WORK_DELAY_DEF 5
static void schedule_delayed(struct ceph_mds_client *mdsc)
{
- int delay = 5;
- unsigned hz = round_jiffies_relative(HZ * delay);
+ unsigned int hz;
+ int delay = CEPH_WORK_DELAY_DEF;
+
+ mutex_lock(&mdsc->mutex);
+ if (mdsc->sending_metrics)
+ delay = 1;
+ mutex_unlock(&mdsc->mutex);
+
+ hz = round_jiffies_relative(HZ * delay);
schedule_delayed_work(&mdsc->delayed_work, hz);
}
@@ -4165,18 +4314,28 @@ static void delayed_work(struct work_struct *work)
container_of(work, struct ceph_mds_client, delayed_work.work);
int renew_interval;
int renew_caps;
+ bool metric_only;
+ bool sending_metrics;
+ bool g_skip = false;
dout("mdsc delayed_work\n");
mutex_lock(&mdsc->mutex);
- renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
- renew_caps = time_after_eq(jiffies, HZ*renew_interval +
- mdsc->last_renew_caps);
- if (renew_caps)
- mdsc->last_renew_caps = jiffies;
+ sending_metrics = !!mdsc->sending_metrics;
+ metric_only = mdsc->sending_metrics &&
+ (mdsc->ticks++ % CEPH_WORK_DELAY_DEF);
+
+ if (!metric_only) {
+ renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+ renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+ mdsc->last_renew_caps);
+ if (renew_caps)
+ mdsc->last_renew_caps = jiffies;
+ }
for (i = 0; i < mdsc->max_sessions; i++) {
struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+
if (!s)
continue;
if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4202,13 +4361,20 @@ static void delayed_work(struct work_struct *work)
mutex_unlock(&mdsc->mutex);
mutex_lock(&s->s_mutex);
- if (renew_caps)
- send_renew_caps(mdsc, s);
- else
- ceph_con_keepalive(&s->s_con);
- if (s->s_state == CEPH_MDS_SESSION_OPEN ||
- s->s_state == CEPH_MDS_SESSION_HUNG)
- ceph_send_cap_releases(mdsc, s);
+
+ if (sending_metrics)
+ g_skip = ceph_mdsc_send_metrics(mdsc, s, g_skip);
+
+ if (!metric_only) {
+ if (renew_caps)
+ send_renew_caps(mdsc, s);
+ else
+ ceph_con_keepalive(&s->s_con);
+ if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+ s->s_state == CEPH_MDS_SESSION_HUNG)
+ ceph_send_cap_releases(mdsc, s);
+ }
+
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
@@ -4216,6 +4382,9 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
+ if (metric_only)
+ goto delay_work;
+
ceph_check_delayed_caps(mdsc);
ceph_queue_cap_reclaim_work(mdsc);
@@ -4224,11 +4393,13 @@ static void delayed_work(struct work_struct *work)
maybe_recover_session(mdsc);
+delay_work:
schedule_delayed(mdsc);
}
-static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+static int ceph_mdsc_metric_init(struct ceph_mds_client *mdsc)
{
+ struct ceph_client_metric *metric = &mdsc->metric;
int ret;
if (!metric)
@@ -4256,6 +4427,8 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
atomic64_set(&metric->total_metadatas, 0);
+ mdsc->sending_metrics = 0;
+ mdsc->ticks = 0;
return 0;
}
@@ -4312,7 +4485,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_waitqueue_head(&mdsc->cap_flushing_wq);
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
atomic_set(&mdsc->cap_reclaim_pending, 0);
- err = ceph_mdsc_metric_init(&mdsc->metric);
+ err = ceph_mdsc_metric_init(mdsc);
if (err)
goto err_mdsmap;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ee36ab87e5f6..5bffa4e6ba5d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -468,6 +468,9 @@ struct ceph_mds_client {
struct list_head dentry_leases; /* fifo list */
struct list_head dentry_dir_leases; /* lru list */
+ /* metrics */
+ unsigned int sending_metrics;
+ unsigned int ticks;
struct ceph_client_metric metric;
spinlock_t snapid_map_lock;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88da9e21af75..9d2a5f1ce418 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -123,6 +123,7 @@ struct ceph_fs_client {
struct dentry *debugfs_congestion_kb;
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+ struct dentry *debugfs_sending_metrics;
struct dentry *debugfs_metric;
struct dentry *debugfs_mds_sessions;
#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index cb21c5cf12c3..a0b227fc99dc 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -130,6 +130,7 @@ struct ceph_dir_layout {
#define CEPH_MSG_CLIENT_REQUEST 24
#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
#define CEPH_MSG_CLIENT_REPLY 26
+#define CEPH_MSG_CLIENT_METRICS 29
#define CEPH_MSG_CLIENT_CAPS 0x310
#define CEPH_MSG_CLIENT_LEASE 0x311
#define CEPH_MSG_CLIENT_SNAP 0x312
@@ -752,6 +753,82 @@ struct ceph_mds_lease {
} __attribute__ ((packed));
/* followed by a __le32+string for dname */
+enum ceph_metric_type {
+ CLIENT_METRIC_TYPE_CAP_INFO,
+ CLIENT_METRIC_TYPE_READ_LATENCY,
+ CLIENT_METRIC_TYPE_WRITE_LATENCY,
+ CLIENT_METRIC_TYPE_METADATA_LATENCY,
+ CLIENT_METRIC_TYPE_DENTRY_LEASE,
+
+ CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+};
+
+/* metric caps header */
+struct ceph_metric_cap {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(hit + mis + total) */
+ __le64 hit;
+ __le64 mis;
+ __le64 total;
+} __attribute__ ((packed));
+
+/* metric dentry lease header */
+struct ceph_metric_dentry_lease {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(hit + mis + total) */
+ __le64 hit;
+ __le64 mis;
+ __le64 total;
+} __attribute__ ((packed));
+
+/* metric read latency header */
+struct ceph_metric_read_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __attribute__ ((packed));
+
+/* metric write latency header */
+struct ceph_metric_write_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __attribute__ ((packed));
+
+/* metric metadata latency header */
+struct ceph_metric_metadata_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 campat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __attribute__ ((packed));
+
+struct ceph_metric_head {
+ __le32 num; /* the number of metrics will be sent */
+} __attribute__ ((packed));
+
/* client reconnect */
struct ceph_mds_cap_reconnect {
__le64 cap_id;
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 7/8] ceph: add reset metrics support
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
` (5 preceding siblings ...)
2020-01-08 10:41 ` [PATCH v2 6/8] ceph: periodically send perf metrics to MDS xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-08 10:41 ` [PATCH v2 8/8] ceph: send client provided metric flags in client metadata xiubli
2020-01-08 10:46 ` [PATCH v2 0/8] ceph: add perf metrics support Xiubo Li
8 siblings, 0 replies; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
This will reset the most metric counters, except the cap and dentry
total numbers.
Sometimes we need to discard the old metrics and start to get new
metrics.
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++
fs/ceph/super.h | 1 +
2 files changed, 58 insertions(+)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index bb96fb4d04c4..c24a704d4e99 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -158,6 +158,55 @@ static int sending_metrics_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
sending_metrics_set, "%llu\n");
+static int reset_metrics_set(void *data, u64 val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_client_metric *metric = &mdsc->metric;
+ int i;
+
+ if (val != 1) {
+ pr_err("Invalid reset metrics set value %llu\n", val);
+ return -EINVAL;
+ }
+
+ percpu_counter_set(&metric->d_lease_hit, 0);
+ percpu_counter_set(&metric->d_lease_mis, 0);
+
+ spin_lock(&metric->read_lock);
+ memset(&metric->read_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_reads, 0),
+ spin_unlock(&metric->read_lock);
+
+ spin_lock(&metric->write_lock);
+ memset(&metric->write_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_writes, 0),
+ spin_unlock(&metric->write_lock);
+
+ spin_lock(&metric->metadata_lock);
+ memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
+ atomic64_set(&metric->total_metadatas, 0),
+ spin_unlock(&metric->metadata_lock);
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ percpu_counter_set(&session->i_caps_hit, 0);
+ percpu_counter_set(&session->i_caps_mis, 0);
+ ceph_put_mds_session(session);
+ }
+
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(reset_metrics_fops, NULL, reset_metrics_set, "%llu\n");
+
static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
@@ -355,6 +404,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_metric);
debugfs_remove(fsc->debugfs_sending_metrics);
+ debugfs_remove(fsc->debugfs_reset_metrics);
debugfs_remove(fsc->debugfs_mdsc);
}
@@ -402,6 +452,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&sending_metrics_fops);
+ fsc->debugfs_reset_metrics =
+ debugfs_create_file("reset_metrics",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &reset_metrics_fops);
+
fsc->debugfs_metric = debugfs_create_file("metrics",
0400,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 9d2a5f1ce418..c2f8baf6043d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -124,6 +124,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_sending_metrics;
+ struct dentry *debugfs_reset_metrics;
struct dentry *debugfs_metric;
struct dentry *debugfs_mds_sessions;
#endif
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 8/8] ceph: send client provided metric flags in client metadata
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
` (6 preceding siblings ...)
2020-01-08 10:41 ` [PATCH v2 7/8] ceph: add reset metrics support xiubli
@ 2020-01-08 10:41 ` xiubli
2020-01-08 10:46 ` [PATCH v2 0/8] ceph: add perf metrics support Xiubo Li
8 siblings, 0 replies; 18+ messages in thread
From: xiubli @ 2020-01-08 10:41 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Will send the cap info and dentry lease metric flags to MDS.
Fixes: https://tracker.ceph.com/issues/43435
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/mds_client.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
fs/ceph/mds_client.h | 14 +++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a0693ed6f54f..949743eacbfc 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1090,6 +1090,41 @@ static void encode_supported_features(void **p, void *end)
}
}
+static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
+#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
+static void encode_metric_spec(void **p, void *end)
+{
+ static const size_t count = ARRAY_SIZE(metric_bits);
+
+ /* header */
+ BUG_ON(*p + 2 > end);
+ ceph_encode_8(p, 1); /* version */
+ ceph_encode_8(p, 1); /* compat */
+
+ if (count > 0) {
+ size_t i;
+ size_t size = METRIC_BYTES(count);
+
+ BUG_ON(*p + 4 + 4 + size > end);
+
+ /* metric spec info length */
+ ceph_encode_32(p, 4 + size);
+
+ /* metric spec */
+ ceph_encode_32(p, size);
+ memset(*p, 0, size);
+ for (i = 0; i < count; i++)
+ ((unsigned char*)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
+ *p += size;
+ } else {
+ BUG_ON(*p + 4 + 4 > end);
+ /* metric spec info length */
+ ceph_encode_32(p, 4);
+ /* metric spec */
+ ceph_encode_32(p, 0);
+ }
+}
+
/*
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
@@ -1129,6 +1164,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
size = FEATURE_BYTES(count);
extra_bytes += 4 + size;
+ /* metric spec */
+ size = 0;
+ count = ARRAY_SIZE(metric_bits);
+ if (count > 0)
+ size = METRIC_BYTES(count);
+ extra_bytes += 2 + 4 + 4 + size;
+
/* Allocate the message */
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
@@ -1147,9 +1189,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
*
- * ClientSession messages with metadata are v3
+ * ClientSession messages with metadata are v4
*/
- msg->hdr.version = cpu_to_le16(3);
+ msg->hdr.version = cpu_to_le16(4);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
@@ -1172,6 +1214,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
}
encode_supported_features(&p, end);
+ encode_metric_spec(&p, end);
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5bffa4e6ba5d..3b168a59ab74 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,6 +42,20 @@ enum ceph_feature_type {
}
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+/*
+ * This will always have the highest metric bit value
+ * as the last element of the array.
+ */
+#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \
+ CLIENT_METRIC_TYPE_CAP_INFO, \
+ CLIENT_METRIC_TYPE_READ_LATENCY, \
+ CLIENT_METRIC_TYPE_WRITE_LATENCY, \
+ CLIENT_METRIC_TYPE_METADATA_LATENCY, \
+ CLIENT_METRIC_TYPE_DENTRY_LEASE, \
+ \
+ CLIENT_METRIC_TYPE_MAX, \
+}
+
/*
* Some lock dependencies:
*
--
2.21.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 0/8] ceph: add perf metrics support
2020-01-08 10:41 [PATCH v2 0/8] ceph: add perf metrics support xiubli
` (7 preceding siblings ...)
2020-01-08 10:41 ` [PATCH v2 8/8] ceph: send client provided metric flags in client metadata xiubli
@ 2020-01-08 10:46 ` Xiubo Li
8 siblings, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2020-01-08 10:46 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, ceph-devel
Additional info for provided metric flags in client metadata
$./bin/cephfs-journal-tool --rank=1:0 event get --type=SESSION json
Wrote output to JSON file 'dump'
$ cat dump
[
{
"client instance": "client.4275 v1:192.168.195.165:0/461391971",
"open": "true",
"client map version": 1,
"inos": "[]",
"inotable version": 0,
"client_metadata": {
"client_features": {
"feature_bits": "0000000000001bff"
},
"metric_spec": {
"metric_flags": {
"feature_bits": "000000000000001f" <<===== metric
flags provided by kclient
}
},
"entity_id": "",
"hostname": "fedora1",
"kernel_version": "5.5.0-rc2+",
"root": "/"
}
},
[...]
On 2020/1/8 18:41, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> Changed in V2:
> - add read/write/metadata latency metric support.
> - add and send client provided metric flags in client metadata
> - addressed the comments from Ilya and merged the 4/4 patch into 3/4.
> - addressed all the other comments in v1 series.
>
> In this version it will send the metrics to the MDSs every second if
> sending_metrics is enabled, disable as default.
>
>
>
> We can get the metrics from the debugfs:
>
> $ cat /sys/kernel/debug/ceph/0c93a60d-5645-4c46-8568-4c8f63db4c7f.client4267/metrics
> item total sum_lat(us) avg_lat(us)
> -----------------------------------------------------
> read 13 417000 32076
> write 42 131205000 3123928
> metadata 104 493000 4740
>
> item total miss hit
> -------------------------------------------------
> d_lease 204 0 918
>
> session caps miss hit
> -------------------------------------------------
> 0 204 213 368218
>
>
> In the MDS side, we can get the metrics(NOTE: the latency is in
> nanosecond):
>
> $ ./bin/ceph fs perf stats | python -m json.tool
> {
> "client_metadata": {
> "client.4267": {
> "IP": "v1:192.168.195.165",
> "hostname": "fedora1",
> "mount_point": "N/A",
> "root": "/"
> }
> },
> "counters": [
> "cap_hit"
> ],
> "global_counters": [
> "read_latency",
> "write_latency",
> "metadata_latency",
> "dentry_lease_hit"
> ],
> "global_metrics": {
> "client.4267": [
> [
> 0,
> 32076923
> ],
> [
> 3,
> 123928571
> ],
> [
> 0,
> 4740384
> ],
> [
> 918,
> 0
> ]
> ]
> },
> "metrics": {
> "delayed_ranks": [],
> "mds.0": {
> "client.4267": [
> [
> 368218,
> 213
> ]
> ]
> }
> }
> }
>
>
>
> Xiubo Li (8):
> ceph: add global dentry lease metric support
> ceph: add caps perf metric for each session
> ceph: add global read latency metric support
> ceph: add global write latency metric support
> ceph: add global metadata perf metric support
> ceph: periodically send perf metrics to MDS
> ceph: add reset metrics support
> ceph: send client provided metric flags in client metadata
>
> fs/ceph/acl.c | 2 +-
> fs/ceph/addr.c | 38 +++-
> fs/ceph/caps.c | 63 ++++--
> fs/ceph/debugfs.c | 182 +++++++++++++++-
> fs/ceph/dir.c | 38 +++-
> fs/ceph/file.c | 26 ++-
> fs/ceph/inode.c | 8 +-
> fs/ceph/mds_client.c | 369 ++++++++++++++++++++++++++++++--
> fs/ceph/mds_client.h | 48 +++++
> fs/ceph/snap.c | 2 +-
> fs/ceph/super.h | 15 +-
> fs/ceph/xattr.c | 8 +-
> include/linux/ceph/ceph_fs.h | 77 +++++++
> include/linux/ceph/osd_client.h | 5 +-
> net/ceph/osd_client.c | 18 +-
> 15 files changed, 826 insertions(+), 73 deletions(-)
>
^ permalink raw reply [flat|nested] 18+ messages in thread