* [PATCH 0/2] ceph: periodically send perf metrics to ceph @ 2020-06-16 12:52 xiubli 2020-06-16 12:52 ` [PATCH 1/2] " xiubli 2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli 0 siblings, 2 replies; 9+ messages in thread From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw) To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li From: Xiubo Li <xiubli@redhat.com> This series is based the previous patches of the metrics in kceph[1] and mds daemons record and forward client side metrics to manager[2]. This will send the caps/read/write/metadata metrics to any available MDS only once per second as default, which will be the same as the userland client, or every metric_send_interval seconds, which is a module parameter, the valid values for metric_send_interval will be 1~5 seconds. And will also send the metric flags to MDS, currently it supports the cap, read latency, write latency and metadata latency. [1] https://patchwork.kernel.org/project/ceph-devel/list/?series=238907 [Merged] [2] https://github.com/ceph/ceph/pull/26004 [Merged] Xiubo Li (2): ceph: periodically send perf metrics to ceph ceph: send client provided metric flags in client metadata fs/ceph/mds_client.c | 93 +++++++++++++++++++++++------- fs/ceph/mds_client.h | 4 ++ fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++ fs/ceph/metric.h | 91 +++++++++++++++++++++++++++++ fs/ceph/super.c | 29 ++++++++++ include/linux/ceph/ceph_fs.h | 1 + 6 files changed, 332 insertions(+), 19 deletions(-) -- 1.8.3.1 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] ceph: periodically send perf metrics to ceph 2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli @ 2020-06-16 12:52 ` xiubli 2020-06-16 13:40 ` Jeff Layton 2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli 1 sibling, 1 reply; 9+ messages in thread From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw) To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li From: Xiubo Li <xiubli@redhat.com> This will send the caps/read/write/metadata metrics to any available MDS only once per second as default, which will be the same as the userland client, or every metric_send_interval seconds, which is a module parameter. URL: https://tracker.ceph.com/issues/43215 Signed-off-by: Xiubo Li <xiubli@redhat.com> --- fs/ceph/mds_client.c | 46 +++++++++------ fs/ceph/mds_client.h | 4 ++ fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++ fs/ceph/metric.h | 78 +++++++++++++++++++++++++ fs/ceph/super.c | 29 ++++++++++ include/linux/ceph/ceph_fs.h | 1 + 6 files changed, 274 insertions(+), 17 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a504971..f996363 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4263,6 +4263,30 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) ceph_force_reconnect(fsc->sb); } +bool check_session_state(struct ceph_mds_client *mdsc, + struct ceph_mds_session *s) +{ + if (s->s_state == CEPH_MDS_SESSION_CLOSING) { + dout("resending session close request for mds%d\n", + s->s_mds); + request_close_session(mdsc, s); + return false; + } + if (s->s_ttl && time_after(jiffies, s->s_ttl)) { + if (s->s_state == CEPH_MDS_SESSION_OPEN) { + s->s_state = CEPH_MDS_SESSION_HUNG; + pr_info("mds%d hung\n", s->s_mds); + } + } + if (s->s_state == CEPH_MDS_SESSION_NEW || + s->s_state == CEPH_MDS_SESSION_RESTARTING || + s->s_state == CEPH_MDS_SESSION_REJECTED) + /* this mds is failed or recovering, just wait */ + return false; + + return true; +} + /* * delayed work -- periodically trim expired leases, renew caps with mds */ @@ -4294,23 +4318,8 @@ static void delayed_work(struct work_struct *work) struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); if (!s) continue; - if (s->s_state == CEPH_MDS_SESSION_CLOSING) { - dout("resending session close request for mds%d\n", - s->s_mds); - request_close_session(mdsc, s); - ceph_put_mds_session(s); - continue; - } - if (s->s_ttl && time_after(jiffies, s->s_ttl)) { - if (s->s_state == CEPH_MDS_SESSION_OPEN) { - s->s_state = CEPH_MDS_SESSION_HUNG; - pr_info("mds%d hung\n", s->s_mds); - } - } - if (s->s_state == CEPH_MDS_SESSION_NEW || - s->s_state == CEPH_MDS_SESSION_RESTARTING || - s->s_state == CEPH_MDS_SESSION_REJECTED) { - /* this mds is failed or recovering, just wait */ + + if (!check_session_state(mdsc, s)) { ceph_put_mds_session(s); continue; } @@ -4616,6 +4625,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) cancel_work_sync(&mdsc->cap_reclaim_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ + cancel_delayed_work_sync(&mdsc->metric.delayed_work); /* cancel timer */ dout("stopped\n"); } @@ -4658,6 +4668,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ + cancel_delayed_work_sync(&mdsc->metric.delayed_work); /* cancel timer */ if (mdsc->mdsmap) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); @@ -4815,6 +4826,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) mutex_unlock(&mdsc->mutex); schedule_delayed(mdsc); + metric_schedule_delayed(&mdsc->metric); return; bad_unlock: diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5e0c407..bcb3892 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -18,6 +18,7 @@ #include <linux/ceph/auth.h> #include "metric.h" +#include "super.h" /* The first 8 bits are reserved for old ceph releases */ enum ceph_feature_type { @@ -476,6 +477,9 @@ struct ceph_mds_client { extern const char *ceph_mds_op_name(int op); +extern bool check_session_state(struct ceph_mds_client *mdsc, + struct ceph_mds_session *s); + extern struct ceph_mds_session * __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 9217f35..3c9b0ec 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -1,10 +1,141 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/ceph/ceph_debug.h> #include <linux/types.h> #include <linux/percpu_counter.h> #include <linux/math64.h> #include "metric.h" +#include "mds_client.h" + +static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, + struct ceph_mds_session *s, + u64 nr_caps) +{ + struct ceph_metric_head *head; + struct ceph_metric_cap *cap; + struct ceph_metric_read_latency *read; + struct ceph_metric_write_latency *write; + struct ceph_metric_metadata_latency *meta; + struct ceph_client_metric *m = &mdsc->metric; + struct ceph_msg *msg; + struct timespec64 ts; + s64 sum, total; + s32 items = 0; + s32 len; + + len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + + sizeof(*meta); + + msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); + if (!msg) { + pr_err("send metrics to mds%d, failed to allocate message\n", + s->s_mds); + return false; + } + + head = msg->front.iov_base; + + /* encode the cap metric */ + cap = (struct ceph_metric_cap *)(head + 1); + cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); + cap->ver = 1; + cap->compat = 1; + cap->data_len = cpu_to_le32(sizeof(*cap) - 10); + cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); + cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); + cap->total = cpu_to_le64(nr_caps); + items++; + + /* encode the read latency metric */ + read = (struct ceph_metric_read_latency *)(cap + 1); + read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); + read->ver = 1; + read->compat = 1; + read->data_len = cpu_to_le32(sizeof(*read) - 10); + total = m->total_reads; + sum = m->read_latency_sum; + jiffies_to_timespec64(sum, &ts); + read->sec = cpu_to_le32(ts.tv_sec); + read->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + /* encode the write latency metric */ + write = (struct ceph_metric_write_latency *)(read + 1); + write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); + write->ver = 1; + write->compat = 1; + write->data_len = cpu_to_le32(sizeof(*write) - 10); + total = m->total_writes; + sum = m->write_latency_sum; + jiffies_to_timespec64(sum, &ts); + write->sec = cpu_to_le32(ts.tv_sec); + write->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + /* encode the metadata latency metric */ + meta = (struct ceph_metric_metadata_latency *)(write + 1); + meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); + meta->ver = 1; + meta->compat = 1; + meta->data_len = cpu_to_le32(sizeof(*meta) - 10); + total = m->total_metadatas; + sum = m->metadata_latency_sum; + jiffies_to_timespec64(sum, &ts); + meta->sec = cpu_to_le32(ts.tv_sec); + meta->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + put_unaligned_le32(items, &head->num); + msg->front.iov_len = cpu_to_le32(len); + msg->hdr.version = cpu_to_le16(1); + msg->hdr.compat_version = cpu_to_le16(1); + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("send metrics to mds%d %p\n", s->s_mds, msg); + ceph_con_send(&s->s_con, msg); + + return true; +} + +static void metric_delayed_work(struct work_struct *work) +{ + struct ceph_client_metric *m = + container_of(work, struct ceph_client_metric, delayed_work.work); + struct ceph_mds_client *mdsc = + container_of(m, struct ceph_mds_client, metric); + struct ceph_mds_session *s; + u64 nr_caps = 0; + bool ret; + int i; + + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + s = __ceph_lookup_mds_session(mdsc, i); + if (!s) + continue; + nr_caps += s->s_nr_caps; + ceph_put_mds_session(s); + } + + for (i = 0; i < mdsc->max_sessions; i++) { + s = __ceph_lookup_mds_session(mdsc, i); + if (!s) + continue; + if (!check_session_state(mdsc, s)) { + ceph_put_mds_session(s); + continue; + } + + /* Only send the metric once in any available session */ + ret = ceph_mdsc_send_metrics(mdsc, s, nr_caps); + ceph_put_mds_session(s); + if (ret) + break; + } + mutex_unlock(&mdsc->mutex); + + metric_schedule_delayed(&mdsc->metric); +} int ceph_metric_init(struct ceph_client_metric *m) { @@ -51,6 +182,8 @@ int ceph_metric_init(struct ceph_client_metric *m) m->total_metadatas = 0; m->metadata_latency_sum = 0; + INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); + return 0; err_i_caps_mis: diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index ccd8128..2af9e0b 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -6,6 +6,71 @@ #include <linux/percpu_counter.h> #include <linux/ktime.h> +extern unsigned int metric_send_interval; + +enum ceph_metric_type { + CLIENT_METRIC_TYPE_CAP_INFO, + CLIENT_METRIC_TYPE_READ_LATENCY, + CLIENT_METRIC_TYPE_WRITE_LATENCY, + CLIENT_METRIC_TYPE_METADATA_LATENCY, + CLIENT_METRIC_TYPE_DENTRY_LEASE, + + CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, +}; + +/* metric caps header */ +struct ceph_metric_cap { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(hit + mis + total) */ + __le64 hit; + __le64 mis; + __le64 total; +} __packed; + +/* metric read latency header */ +struct ceph_metric_read_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +/* metric write latency header */ +struct ceph_metric_write_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +/* metric metadata latency header */ +struct ceph_metric_metadata_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +struct ceph_metric_head { + __le32 num; /* the number of metrics that will be sent */ +} __packed; + /* This is the global metrics */ struct ceph_client_metric { atomic64_t total_dentries; @@ -35,8 +100,21 @@ struct ceph_client_metric { ktime_t metadata_latency_sq_sum; ktime_t metadata_latency_min; ktime_t metadata_latency_max; + + struct delayed_work delayed_work; /* delayed work */ }; +static inline void metric_schedule_delayed(struct ceph_client_metric *m) +{ + /* per second as default */ + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval); + + if (!metric_send_interval) + return; + + schedule_delayed_work(&m->delayed_work, hz); +} + extern int ceph_metric_init(struct ceph_client_metric *m); extern void ceph_metric_destroy(struct ceph_client_metric *m); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index c9784eb1..66a940c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void) destroy_caches(); } +static int param_set_metric_interval(const char *val, const struct kernel_param *kp) +{ + int ret; + unsigned int interval; + + ret = kstrtouint(val, 0, &interval); + if (ret < 0) { + pr_err("Failed to parse metric interval '%s'\n", val); + return ret; + } + + if (interval > 5 || interval < 1) { + pr_err("Invalid metric interval %u\n", interval); + return -EINVAL; + } + + metric_send_interval = interval; + return 0; +} + +static const struct kernel_param_ops param_ops_metric_interval = { + .set = param_set_metric_interval, + .get = param_get_uint, +}; + +unsigned int metric_send_interval = 1; +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644); +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)"); + module_init(init_ceph); module_exit(exit_ceph); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ebf5ba6..455e9b9 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -130,6 +130,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_REQUEST 24 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 #define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_METRICS 29 #define CEPH_MSG_CLIENT_CAPS 0x310 #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph 2020-06-16 12:52 ` [PATCH 1/2] " xiubli @ 2020-06-16 13:40 ` Jeff Layton 2020-06-16 18:14 ` Xiubo Li 0 siblings, 1 reply; 9+ messages in thread From: Jeff Layton @ 2020-06-16 13:40 UTC (permalink / raw) To: xiubli, idryomov; +Cc: zyan, pdonnell, ceph-devel On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > This will send the caps/read/write/metadata metrics to any available > MDS only once per second as default, which will be the same as the > userland client, or every metric_send_interval seconds, which is a > module parameter. > > URL: https://tracker.ceph.com/issues/43215 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/mds_client.c | 46 +++++++++------ > fs/ceph/mds_client.h | 4 ++ > fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++ > fs/ceph/metric.h | 78 +++++++++++++++++++++++++ > fs/ceph/super.c | 29 ++++++++++ > include/linux/ceph/ceph_fs.h | 1 + > 6 files changed, 274 insertions(+), 17 deletions(-) > Long patch! Might not hurt to break out some of the cleanups into separate patches, but they're fairly straighforward so I won't require it. [...] > /* This is the global metrics */ > struct ceph_client_metric { > atomic64_t total_dentries; > @@ -35,8 +100,21 @@ struct ceph_client_metric { > ktime_t metadata_latency_sq_sum; > ktime_t metadata_latency_min; > ktime_t metadata_latency_max; > + > + struct delayed_work delayed_work; /* delayed work */ > }; > > +static inline void metric_schedule_delayed(struct ceph_client_metric *m) > +{ > + /* per second as default */ > + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval); > + > + if (!metric_send_interval) > + return; > + > + schedule_delayed_work(&m->delayed_work, hz); > +} > + Should this also be gated on a MDS feature bit or anything? What happens if we're running against a really old MDS that doesn't support these stats? Does it just drop them on the floor? Should we disable sending them in that case? > extern int ceph_metric_init(struct ceph_client_metric *m); > extern void ceph_metric_destroy(struct ceph_client_metric *m); > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index c9784eb1..66a940c 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void) > destroy_caches(); > } > > +static int param_set_metric_interval(const char *val, const struct kernel_param *kp) > +{ > + int ret; > + unsigned int interval; > + > + ret = kstrtouint(val, 0, &interval); > + if (ret < 0) { > + pr_err("Failed to parse metric interval '%s'\n", val); > + return ret; > + } > + > + if (interval > 5 || interval < 1) { > + pr_err("Invalid metric interval %u\n", interval); > + return -EINVAL; > + } > + > + metric_send_interval = interval; > + return 0; > +} > + > +static const struct kernel_param_ops param_ops_metric_interval = { > + .set = param_set_metric_interval, > + .get = param_get_uint, > +}; > + > +unsigned int metric_send_interval = 1; > +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644); > +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)"); > + Aren't valid values 0-5, with 0 disabling this feature? That's what metric_schedule_delayed() seems to indicate... > module_init(init_ceph); > module_exit(exit_ceph); > > diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h > index ebf5ba6..455e9b9 100644 > --- a/include/linux/ceph/ceph_fs.h > +++ b/include/linux/ceph/ceph_fs.h > @@ -130,6 +130,7 @@ struct ceph_dir_layout { > #define CEPH_MSG_CLIENT_REQUEST 24 > #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 > #define CEPH_MSG_CLIENT_REPLY 26 > +#define CEPH_MSG_CLIENT_METRICS 29 > #define CEPH_MSG_CLIENT_CAPS 0x310 > #define CEPH_MSG_CLIENT_LEASE 0x311 > #define CEPH_MSG_CLIENT_SNAP 0x312 -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph 2020-06-16 13:40 ` Jeff Layton @ 2020-06-16 18:14 ` Xiubo Li 2020-06-16 18:58 ` Jeff Layton 0 siblings, 1 reply; 9+ messages in thread From: Xiubo Li @ 2020-06-16 18:14 UTC (permalink / raw) To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel On 2020/6/16 21:40, Jeff Layton wrote: > On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com wrote: >> From: Xiubo Li<xiubli@redhat.com> >> >> This will send the caps/read/write/metadata metrics to any available >> MDS only once per second as default, which will be the same as the >> userland client, or every metric_send_interval seconds, which is a >> module parameter. >> >> URL:https://tracker.ceph.com/issues/43215 >> Signed-off-by: Xiubo Li<xiubli@redhat.com> >> --- >> fs/ceph/mds_client.c | 46 +++++++++------ >> fs/ceph/mds_client.h | 4 ++ >> fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++ >> fs/ceph/metric.h | 78 +++++++++++++++++++++++++ >> fs/ceph/super.c | 29 ++++++++++ >> include/linux/ceph/ceph_fs.h | 1 + >> 6 files changed, 274 insertions(+), 17 deletions(-) >> > Long patch! Might not hurt to break out some of the cleanups into > separate patches, but they're fairly straighforward so I won't require > it. Sure, I will split the patch into small ones if possible. > [...] > >> /* This is the global metrics */ >> struct ceph_client_metric { >> atomic64_t total_dentries; >> @@ -35,8 +100,21 @@ struct ceph_client_metric { >> ktime_t metadata_latency_sq_sum; >> ktime_t metadata_latency_min; >> ktime_t metadata_latency_max; >> + >> + struct delayed_work delayed_work; /* delayed work */ >> }; >> >> +static inline void metric_schedule_delayed(struct ceph_client_metric *m) >> +{ >> + /* per second as default */ >> + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval); >> + >> + if (!metric_send_interval) >> + return; >> + >> + schedule_delayed_work(&m->delayed_work, hz); >> +} >> + > Should this also be gated on a MDS feature bit or anything? What happens > if we're running against a really old MDS that doesn't support these > stats? Does it just drop them on the floor? Should we disable sending > them in that case? Tested without metric code in the ceph and when ceph saw a unknown type message, it will close the socket connection directly. >> extern int ceph_metric_init(struct ceph_client_metric *m); >> extern void ceph_metric_destroy(struct ceph_client_metric *m); >> >> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >> index c9784eb1..66a940c 100644 >> --- a/fs/ceph/super.c >> +++ b/fs/ceph/super.c >> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void) >> destroy_caches(); >> } >> >> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp) >> +{ >> + int ret; >> + unsigned int interval; >> + >> + ret = kstrtouint(val, 0, &interval); >> + if (ret < 0) { >> + pr_err("Failed to parse metric interval '%s'\n", val); >> + return ret; >> + } >> + >> + if (interval > 5 || interval < 1) { >> + pr_err("Invalid metric interval %u\n", interval); >> + return -EINVAL; >> + } >> + >> + metric_send_interval = interval; >> + return 0; >> +} >> + >> +static const struct kernel_param_ops param_ops_metric_interval = { >> + .set = param_set_metric_interval, >> + .get = param_get_uint, >> +}; >> + >> +unsigned int metric_send_interval = 1; >> +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644); >> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)"); >> + > Aren't valid values 0-5, with 0 disabling this feature? That's > what metric_schedule_delayed() seems to indicate... What value should it be as default ? 0 to disable it or 1 ? Maybe in future we should let the ceph side send a notification/request to all the clients if it wants to collect the metrics and then we could enable it here, and defautly just disable it. >> module_init(init_ceph); >> module_exit(exit_ceph); >> >> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h >> index ebf5ba6..455e9b9 100644 >> --- a/include/linux/ceph/ceph_fs.h >> +++ b/include/linux/ceph/ceph_fs.h >> @@ -130,6 +130,7 @@ struct ceph_dir_layout { >> #define CEPH_MSG_CLIENT_REQUEST 24 >> #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 >> #define CEPH_MSG_CLIENT_REPLY 26 >> +#define CEPH_MSG_CLIENT_METRICS 29 >> #define CEPH_MSG_CLIENT_CAPS 0x310 >> #define CEPH_MSG_CLIENT_LEASE 0x311 >> #define CEPH_MSG_CLIENT_SNAP 0x312 ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph 2020-06-16 18:14 ` Xiubo Li @ 2020-06-16 18:58 ` Jeff Layton 2020-06-17 0:28 ` Xiubo Li 0 siblings, 1 reply; 9+ messages in thread From: Jeff Layton @ 2020-06-16 18:58 UTC (permalink / raw) To: Xiubo Li, idryomov; +Cc: zyan, pdonnell, ceph-devel On Wed, 2020-06-17 at 02:14 +0800, Xiubo Li wrote: > On 2020/6/16 21:40, Jeff Layton wrote: > > On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com wrote: > > > From: Xiubo Li<xiubli@redhat.com> > > > > > > This will send the caps/read/write/metadata metrics to any available > > > MDS only once per second as default, which will be the same as the > > > userland client, or every metric_send_interval seconds, which is a > > > module parameter. > > > > > > URL:https://tracker.ceph.com/issues/43215 > > > Signed-off-by: Xiubo Li<xiubli@redhat.com> > > > --- > > > fs/ceph/mds_client.c | 46 +++++++++------ > > > fs/ceph/mds_client.h | 4 ++ > > > fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++ > > > fs/ceph/metric.h | 78 +++++++++++++++++++++++++ > > > fs/ceph/super.c | 29 ++++++++++ > > > include/linux/ceph/ceph_fs.h | 1 + > > > 6 files changed, 274 insertions(+), 17 deletions(-) > > > > > Long patch! Might not hurt to break out some of the cleanups into > > separate patches, but they're fairly straighforward so I won't require > > it. > > Sure, I will split the patch into small ones if possible. > > > > [...] > > > > > /* This is the global metrics */ > > > struct ceph_client_metric { > > > atomic64_t total_dentries; > > > @@ -35,8 +100,21 @@ struct ceph_client_metric { > > > ktime_t metadata_latency_sq_sum; > > > ktime_t metadata_latency_min; > > > ktime_t metadata_latency_max; > > > + > > > + struct delayed_work delayed_work; /* delayed work */ > > > }; > > > > > > +static inline void metric_schedule_delayed(struct ceph_client_metric *m) > > > +{ > > > + /* per second as default */ > > > + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval); > > > + > > > + if (!metric_send_interval) > > > + return; > > > + > > > + schedule_delayed_work(&m->delayed_work, hz); > > > +} > > > + > > Should this also be gated on a MDS feature bit or anything? What happens > > if we're running against a really old MDS that doesn't support these > > stats? Does it just drop them on the floor? Should we disable sending > > them in that case? > > Tested without metric code in the ceph and when ceph saw a unknown type > message, it will close the socket connection directly. > Ouch, that sounds bad. How does the userland client handle this? This seems like the kind of thing we usually add a feature bit for somewhere... > > > > extern int ceph_metric_init(struct ceph_client_metric *m); > > > extern void ceph_metric_destroy(struct ceph_client_metric *m); > > > > > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > > > index c9784eb1..66a940c 100644 > > > --- a/fs/ceph/super.c > > > +++ b/fs/ceph/super.c > > > @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void) > > > destroy_caches(); > > > } > > > > > > +static int param_set_metric_interval(const char *val, const struct kernel_param *kp) > > > +{ > > > + int ret; > > > + unsigned int interval; > > > + > > > + ret = kstrtouint(val, 0, &interval); > > > + if (ret < 0) { > > > + pr_err("Failed to parse metric interval '%s'\n", val); > > > + return ret; > > > + } > > > + > > > + if (interval > 5 || interval < 1) { > > > + pr_err("Invalid metric interval %u\n", interval); > > > + return -EINVAL; > > > + } > > > + > > > + metric_send_interval = interval; > > > + return 0; > > > +} > > > + > > > +static const struct kernel_param_ops param_ops_metric_interval = { > > > + .set = param_set_metric_interval, > > > + .get = param_get_uint, > > > +}; > > > + > > > +unsigned int metric_send_interval = 1; > > > +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644); > > > +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)"); > > > + > > Aren't valid values 0-5, with 0 disabling this feature? That's > > what metric_schedule_delayed() seems to indicate... > > What value should it be as default ? 0 to disable it or 1 ? > I don't have strong preference here. It's probably safe enough to make it 1. I do like the ability to turn this off though, as that gives us a workaround in the event that it does cause trouble. > Maybe in future we should let the ceph side send a notification/request > to all the clients if it wants to collect the metrics and then we could > enable it here, and defautly just disable it. > That seems like a heavier-weight solution than is called for here. This seems like something that ought to have a feature bit. Then if that's turned off, we can just disable this altogether. > > > > module_init(init_ceph); > > > module_exit(exit_ceph); > > > > > > diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h > > > index ebf5ba6..455e9b9 100644 > > > --- a/include/linux/ceph/ceph_fs.h > > > +++ b/include/linux/ceph/ceph_fs.h > > > @@ -130,6 +130,7 @@ struct ceph_dir_layout { > > > #define CEPH_MSG_CLIENT_REQUEST 24 > > > #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 > > > #define CEPH_MSG_CLIENT_REPLY 26 > > > +#define CEPH_MSG_CLIENT_METRICS 29 > > > #define CEPH_MSG_CLIENT_CAPS 0x310 > > > #define CEPH_MSG_CLIENT_LEASE 0x311 > > > #define CEPH_MSG_CLIENT_SNAP 0x312 > > -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph 2020-06-16 18:58 ` Jeff Layton @ 2020-06-17 0:28 ` Xiubo Li 0 siblings, 0 replies; 9+ messages in thread From: Xiubo Li @ 2020-06-17 0:28 UTC (permalink / raw) To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel On 2020/6/17 2:58, Jeff Layton wrote: > On Wed, 2020-06-17 at 02:14 +0800, Xiubo Li wrote: >> On 2020/6/16 21:40, Jeff Layton wrote: >>> On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com wrote: >>>> From: Xiubo Li<xiubli@redhat.com> >>>> >>>> This will send the caps/read/write/metadata metrics to any available >>>> MDS only once per second as default, which will be the same as the >>>> userland client, or every metric_send_interval seconds, which is a >>>> module parameter. >>>> >>>> URL:https://tracker.ceph.com/issues/43215 >>>> Signed-off-by: Xiubo Li<xiubli@redhat.com> >>>> --- >>>> fs/ceph/mds_client.c | 46 +++++++++------ >>>> fs/ceph/mds_client.h | 4 ++ >>>> fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++ >>>> fs/ceph/metric.h | 78 +++++++++++++++++++++++++ >>>> fs/ceph/super.c | 29 ++++++++++ >>>> include/linux/ceph/ceph_fs.h | 1 + >>>> 6 files changed, 274 insertions(+), 17 deletions(-) >>>> >>> Long patch! Might not hurt to break out some of the cleanups into >>> separate patches, but they're fairly straighforward so I won't require >>> it. >> Sure, I will split the patch into small ones if possible. >> >> >>> [...] >>> >>>> /* This is the global metrics */ >>>> struct ceph_client_metric { >>>> atomic64_t total_dentries; >>>> @@ -35,8 +100,21 @@ struct ceph_client_metric { >>>> ktime_t metadata_latency_sq_sum; >>>> ktime_t metadata_latency_min; >>>> ktime_t metadata_latency_max; >>>> + >>>> + struct delayed_work delayed_work; /* delayed work */ >>>> }; >>>> >>>> +static inline void metric_schedule_delayed(struct ceph_client_metric *m) >>>> +{ >>>> + /* per second as default */ >>>> + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval); >>>> + >>>> + if (!metric_send_interval) >>>> + return; >>>> + >>>> + schedule_delayed_work(&m->delayed_work, hz); >>>> +} >>>> + >>> Should this also be gated on a MDS feature bit or anything? What happens >>> if we're running against a really old MDS that doesn't support these >>> stats? Does it just drop them on the floor? Should we disable sending >>> them in that case? >> Tested without metric code in the ceph and when ceph saw a unknown type >> message, it will close the socket connection directly. >> > Ouch, that sounds bad. How does the userland client handle this? This > seems like the kind of thing we usually add a feature bit for > somewhere... The userland doesn't handle this case too, I will try to add a feature bit or something to fix it. >>>> extern int ceph_metric_init(struct ceph_client_metric *m); >>>> extern void ceph_metric_destroy(struct ceph_client_metric *m); >>>> >>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>>> index c9784eb1..66a940c 100644 >>>> --- a/fs/ceph/super.c >>>> +++ b/fs/ceph/super.c >>>> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void) >>>> destroy_caches(); >>>> } >>>> >>>> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp) >>>> +{ >>>> + int ret; >>>> + unsigned int interval; >>>> + >>>> + ret = kstrtouint(val, 0, &interval); >>>> + if (ret < 0) { >>>> + pr_err("Failed to parse metric interval '%s'\n", val); >>>> + return ret; >>>> + } >>>> + >>>> + if (interval > 5 || interval < 1) { >>>> + pr_err("Invalid metric interval %u\n", interval); >>>> + return -EINVAL; >>>> + } >>>> + >>>> + metric_send_interval = interval; >>>> + return 0; >>>> +} >>>> + >>>> +static const struct kernel_param_ops param_ops_metric_interval = { >>>> + .set = param_set_metric_interval, >>>> + .get = param_get_uint, >>>> +}; >>>> + >>>> +unsigned int metric_send_interval = 1; >>>> +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644); >>>> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)"); >>>> + >>> Aren't valid values 0-5, with 0 disabling this feature? That's >>> what metric_schedule_delayed() seems to indicate... >> What value should it be as default ? 0 to disable it or 1 ? >> > I don't have strong preference here. It's probably safe enough to make > it 1. I do like the ability to turn this off though, as that gives us a > workaround in the event that it does cause trouble. Sure, I will add it back. > >> Maybe in future we should let the ceph side send a notification/request >> to all the clients if it wants to collect the metrics and then we could >> enable it here, and defautly just disable it. >> > That seems like a heavier-weight solution than is called for here. This > seems like something that ought to have a feature bit. Then if that's > turned off, we can just disable this altogether. Okay, sounds better. Thanks >>>> module_init(init_ceph); >>>> module_exit(exit_ceph); >>>> >>>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h >>>> index ebf5ba6..455e9b9 100644 >>>> --- a/include/linux/ceph/ceph_fs.h >>>> +++ b/include/linux/ceph/ceph_fs.h >>>> @@ -130,6 +130,7 @@ struct ceph_dir_layout { >>>> #define CEPH_MSG_CLIENT_REQUEST 24 >>>> #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 >>>> #define CEPH_MSG_CLIENT_REPLY 26 >>>> +#define CEPH_MSG_CLIENT_METRICS 29 >>>> #define CEPH_MSG_CLIENT_CAPS 0x310 >>>> #define CEPH_MSG_CLIENT_LEASE 0x311 >>>> #define CEPH_MSG_CLIENT_SNAP 0x312 >> ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/2] ceph: send client provided metric flags in client metadata 2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli 2020-06-16 12:52 ` [PATCH 1/2] " xiubli @ 2020-06-16 12:52 ` xiubli 2020-06-16 14:11 ` Jeff Layton 1 sibling, 1 reply; 9+ messages in thread From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw) To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li From: Xiubo Li <xiubli@redhat.com> Will send the metric flags to MDS, currently it supports the cap, read latency, write latency and metadata latency. URL: https://tracker.ceph.com/issues/43435 Signed-off-by: Xiubo Li <xiubli@redhat.com> --- fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- fs/ceph/metric.h | 13 +++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f996363..8b7ff41 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end) } } +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) +static void encode_metric_spec(void **p, void *end) +{ + static const size_t count = ARRAY_SIZE(metric_bits); + + /* header */ + BUG_ON(*p + 2 > end); + ceph_encode_8(p, 1); /* version */ + ceph_encode_8(p, 1); /* compat */ + + if (count > 0) { + size_t i; + size_t size = METRIC_BYTES(count); + + BUG_ON(*p + 4 + 4 + size > end); + + /* metric spec info length */ + ceph_encode_32(p, 4 + size); + + /* metric spec */ + ceph_encode_32(p, size); + memset(*p, 0, size); + for (i = 0; i < count; i++) + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); + *p += size; + } else { + BUG_ON(*p + 4 + 4 > end); + /* metric spec info length */ + ceph_encode_32(p, 4); + /* metric spec */ + ceph_encode_32(p, 0); + } +} + /* * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 size = FEATURE_BYTES(count); extra_bytes += 4 + size; + /* metric spec */ + size = 0; + count = ARRAY_SIZE(metric_bits); + if (count > 0) + size = METRIC_BYTES(count); + extra_bytes += 2 + 4 + 4 + size; + /* Allocate the message */ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map<string, string> * - * ClientSession messages with metadata are v3 + * ClientSession messages with metadata are v4 */ - msg->hdr.version = cpu_to_le16(3); + msg->hdr.version = cpu_to_le16(4); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 } encode_supported_features(&p, end); + encode_metric_spec(&p, end); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index 2af9e0b..f34adf7 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -18,6 +18,19 @@ enum ceph_metric_type { CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, }; +/* + * This will always have the highest metric bit value + * as the last element of the array. + */ +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ + CLIENT_METRIC_TYPE_CAP_INFO, \ + CLIENT_METRIC_TYPE_READ_LATENCY, \ + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ + \ + CLIENT_METRIC_TYPE_MAX, \ +} + /* metric caps header */ struct ceph_metric_cap { __le32 type; /* ceph metric type */ -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] ceph: send client provided metric flags in client metadata 2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli @ 2020-06-16 14:11 ` Jeff Layton 2020-06-16 14:18 ` Xiubo Li 0 siblings, 1 reply; 9+ messages in thread From: Jeff Layton @ 2020-06-16 14:11 UTC (permalink / raw) To: xiubli, idryomov; +Cc: zyan, pdonnell, ceph-devel On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > Will send the metric flags to MDS, currently it supports the cap, > read latency, write latency and metadata latency. > > URL: https://tracker.ceph.com/issues/43435 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- > fs/ceph/metric.h | 13 +++++++++++++ > 2 files changed, 58 insertions(+), 2 deletions(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index f996363..8b7ff41 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end) > } > } > > +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; > +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) > +static void encode_metric_spec(void **p, void *end) > +{ > + static const size_t count = ARRAY_SIZE(metric_bits); > + > + /* header */ > + BUG_ON(*p + 2 > end); > + ceph_encode_8(p, 1); /* version */ > + ceph_encode_8(p, 1); /* compat */ > + > + if (count > 0) { > + size_t i; > + size_t size = METRIC_BYTES(count); > + > + BUG_ON(*p + 4 + 4 + size > end); > + I know it's unlikely to ever trip, but let's not BUG_ON here. Maybe just WARN and return an error... Of course, we'd probably want that error to bubble up to the callers of __open_session() but none of its callers check the return value. Would you mind fixing that while you're in there too? > + /* metric spec info length */ > + ceph_encode_32(p, 4 + size); > + > + /* metric spec */ > + ceph_encode_32(p, size); > + memset(*p, 0, size); > + for (i = 0; i < count; i++) > + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); > + *p += size; > + } else { > + BUG_ON(*p + 4 + 4 > end); > + /* metric spec info length */ > + ceph_encode_32(p, 4); > + /* metric spec */ > + ceph_encode_32(p, 0); > + } > +} > + > /* > * session message, specialization for CEPH_SESSION_REQUEST_OPEN > * to include additional client metadata fields. > @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 > size = FEATURE_BYTES(count); > extra_bytes += 4 + size; > > + /* metric spec */ > + size = 0; > + count = ARRAY_SIZE(metric_bits); > + if (count > 0) > + size = METRIC_BYTES(count); > + extra_bytes += 2 + 4 + 4 + size; > + > /* Allocate the message */ > msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, > GFP_NOFS, false); > @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 > * Serialize client metadata into waiting buffer space, using > * the format that userspace expects for map<string, string> > * > - * ClientSession messages with metadata are v3 > + * ClientSession messages with metadata are v4 > */ > - msg->hdr.version = cpu_to_le16(3); > + msg->hdr.version = cpu_to_le16(4); > msg->hdr.compat_version = cpu_to_le16(1); > > /* The write pointer, following the session_head structure */ > @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 > } > > encode_supported_features(&p, end); > + encode_metric_spec(&p, end); > msg->front.iov_len = p - msg->front.iov_base; > msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); > > diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h > index 2af9e0b..f34adf7 100644 > --- a/fs/ceph/metric.h > +++ b/fs/ceph/metric.h > @@ -18,6 +18,19 @@ enum ceph_metric_type { > CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, > }; > > +/* > + * This will always have the highest metric bit value > + * as the last element of the array. > + */ > +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ > + CLIENT_METRIC_TYPE_CAP_INFO, \ > + CLIENT_METRIC_TYPE_READ_LATENCY, \ > + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ > + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ > + \ > + CLIENT_METRIC_TYPE_MAX, \ > +} > + > /* metric caps header */ > struct ceph_metric_cap { > __le32 type; /* ceph metric type */ -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] ceph: send client provided metric flags in client metadata 2020-06-16 14:11 ` Jeff Layton @ 2020-06-16 14:18 ` Xiubo Li 0 siblings, 0 replies; 9+ messages in thread From: Xiubo Li @ 2020-06-16 14:18 UTC (permalink / raw) To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel On 2020/6/16 22:11, Jeff Layton wrote: > On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> Will send the metric flags to MDS, currently it supports the cap, >> read latency, write latency and metadata latency. >> >> URL: https://tracker.ceph.com/issues/43435 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- >> fs/ceph/metric.h | 13 +++++++++++++ >> 2 files changed, 58 insertions(+), 2 deletions(-) >> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index f996363..8b7ff41 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end) >> } >> } >> >> +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; >> +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) >> +static void encode_metric_spec(void **p, void *end) >> +{ >> + static const size_t count = ARRAY_SIZE(metric_bits); >> + >> + /* header */ >> + BUG_ON(*p + 2 > end); >> + ceph_encode_8(p, 1); /* version */ >> + ceph_encode_8(p, 1); /* compat */ >> + >> + if (count > 0) { >> + size_t i; >> + size_t size = METRIC_BYTES(count); >> + >> + BUG_ON(*p + 4 + 4 + size > end); >> + > I know it's unlikely to ever trip, but let's not BUG_ON here. Maybe just > WARN and return an error... > > Of course, we'd probably want that error to bubble up to the callers of > __open_session() but none of its callers check the return value. Would > you mind fixing that while you're in there too? Sure, this makes sense and the checkpatch.pl also suggests to use WARN, I will fix them all later. Thanks BRs Xiubo > >> + /* metric spec info length */ >> + ceph_encode_32(p, 4 + size); >> + >> + /* metric spec */ >> + ceph_encode_32(p, size); >> + memset(*p, 0, size); >> + for (i = 0; i < count; i++) >> + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); >> + *p += size; >> + } else { >> + BUG_ON(*p + 4 + 4 > end); >> + /* metric spec info length */ >> + ceph_encode_32(p, 4); >> + /* metric spec */ >> + ceph_encode_32(p, 0); >> + } >> +} >> + >> /* >> * session message, specialization for CEPH_SESSION_REQUEST_OPEN >> * to include additional client metadata fields. >> @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 >> size = FEATURE_BYTES(count); >> extra_bytes += 4 + size; >> >> + /* metric spec */ >> + size = 0; >> + count = ARRAY_SIZE(metric_bits); >> + if (count > 0) >> + size = METRIC_BYTES(count); >> + extra_bytes += 2 + 4 + 4 + size; >> + >> /* Allocate the message */ >> msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, >> GFP_NOFS, false); >> @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 >> * Serialize client metadata into waiting buffer space, using >> * the format that userspace expects for map<string, string> >> * >> - * ClientSession messages with metadata are v3 >> + * ClientSession messages with metadata are v4 >> */ >> - msg->hdr.version = cpu_to_le16(3); >> + msg->hdr.version = cpu_to_le16(4); >> msg->hdr.compat_version = cpu_to_le16(1); >> >> /* The write pointer, following the session_head structure */ >> @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 >> } >> >> encode_supported_features(&p, end); >> + encode_metric_spec(&p, end); >> msg->front.iov_len = p - msg->front.iov_base; >> msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); >> >> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h >> index 2af9e0b..f34adf7 100644 >> --- a/fs/ceph/metric.h >> +++ b/fs/ceph/metric.h >> @@ -18,6 +18,19 @@ enum ceph_metric_type { >> CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, >> }; >> >> +/* >> + * This will always have the highest metric bit value >> + * as the last element of the array. >> + */ >> +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ >> + CLIENT_METRIC_TYPE_CAP_INFO, \ >> + CLIENT_METRIC_TYPE_READ_LATENCY, \ >> + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ >> + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ >> + \ >> + CLIENT_METRIC_TYPE_MAX, \ >> +} >> + >> /* metric caps header */ >> struct ceph_metric_cap { >> __le32 type; /* ceph metric type */ ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2020-06-17 0:28 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli 2020-06-16 12:52 ` [PATCH 1/2] " xiubli 2020-06-16 13:40 ` Jeff Layton 2020-06-16 18:14 ` Xiubo Li 2020-06-16 18:58 ` Jeff Layton 2020-06-17 0:28 ` Xiubo Li 2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli 2020-06-16 14:11 ` Jeff Layton 2020-06-16 14:18 ` Xiubo Li
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.