* [PATCH 0/2] ceph: periodically send perf metrics to ceph
@ 2020-06-16 12:52 xiubli
2020-06-16 12:52 ` [PATCH 1/2] " xiubli
2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
0 siblings, 2 replies; 9+ messages in thread
From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw)
To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
This series is based the previous patches of the metrics in kceph[1]
and mds daemons record and forward client side metrics to manager[2].
This will send the caps/read/write/metadata metrics to any available
MDS only once per second as default, which will be the same as the
userland client, or every metric_send_interval seconds, which is a
module parameter, the valid values for metric_send_interval will be
1~5 seconds.
And will also send the metric flags to MDS, currently it supports the
cap, read latency, write latency and metadata latency.
[1] https://patchwork.kernel.org/project/ceph-devel/list/?series=238907 [Merged]
[2] https://github.com/ceph/ceph/pull/26004 [Merged]
Xiubo Li (2):
ceph: periodically send perf metrics to ceph
ceph: send client provided metric flags in client metadata
fs/ceph/mds_client.c | 93 +++++++++++++++++++++++-------
fs/ceph/mds_client.h | 4 ++
fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++
fs/ceph/metric.h | 91 +++++++++++++++++++++++++++++
fs/ceph/super.c | 29 ++++++++++
include/linux/ceph/ceph_fs.h | 1 +
6 files changed, 332 insertions(+), 19 deletions(-)
--
1.8.3.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] ceph: periodically send perf metrics to ceph
2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli
@ 2020-06-16 12:52 ` xiubli
2020-06-16 13:40 ` Jeff Layton
2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
1 sibling, 1 reply; 9+ messages in thread
From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw)
To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
This will send the caps/read/write/metadata metrics to any available
MDS only once per second as default, which will be the same as the
userland client, or every metric_send_interval seconds, which is a
module parameter.
URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/mds_client.c | 46 +++++++++------
fs/ceph/mds_client.h | 4 ++
fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++
fs/ceph/metric.h | 78 +++++++++++++++++++++++++
fs/ceph/super.c | 29 ++++++++++
include/linux/ceph/ceph_fs.h | 1 +
6 files changed, 274 insertions(+), 17 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a504971..f996363 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4263,6 +4263,30 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
ceph_force_reconnect(fsc->sb);
}
+bool check_session_state(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s)
+{
+ if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+ dout("resending session close request for mds%d\n",
+ s->s_mds);
+ request_close_session(mdsc, s);
+ return false;
+ }
+ if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
+ if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+ s->s_state = CEPH_MDS_SESSION_HUNG;
+ pr_info("mds%d hung\n", s->s_mds);
+ }
+ }
+ if (s->s_state == CEPH_MDS_SESSION_NEW ||
+ s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+ s->s_state == CEPH_MDS_SESSION_REJECTED)
+ /* this mds is failed or recovering, just wait */
+ return false;
+
+ return true;
+}
+
/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
@@ -4294,23 +4318,8 @@ static void delayed_work(struct work_struct *work)
struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
if (!s)
continue;
- if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
- dout("resending session close request for mds%d\n",
- s->s_mds);
- request_close_session(mdsc, s);
- ceph_put_mds_session(s);
- continue;
- }
- if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
- if (s->s_state == CEPH_MDS_SESSION_OPEN) {
- s->s_state = CEPH_MDS_SESSION_HUNG;
- pr_info("mds%d hung\n", s->s_mds);
- }
- }
- if (s->s_state == CEPH_MDS_SESSION_NEW ||
- s->s_state == CEPH_MDS_SESSION_RESTARTING ||
- s->s_state == CEPH_MDS_SESSION_REJECTED) {
- /* this mds is failed or recovering, just wait */
+
+ if (!check_session_state(mdsc, s)) {
ceph_put_mds_session(s);
continue;
}
@@ -4616,6 +4625,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
cancel_work_sync(&mdsc->cap_reclaim_work);
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+ cancel_delayed_work_sync(&mdsc->metric.delayed_work); /* cancel timer */
dout("stopped\n");
}
@@ -4658,6 +4668,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
{
dout("stop\n");
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+ cancel_delayed_work_sync(&mdsc->metric.delayed_work); /* cancel timer */
if (mdsc->mdsmap)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
@@ -4815,6 +4826,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
mutex_unlock(&mdsc->mutex);
schedule_delayed(mdsc);
+ metric_schedule_delayed(&mdsc->metric);
return;
bad_unlock:
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5e0c407..bcb3892 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -18,6 +18,7 @@
#include <linux/ceph/auth.h>
#include "metric.h"
+#include "super.h"
/* The first 8 bits are reserved for old ceph releases */
enum ceph_feature_type {
@@ -476,6 +477,9 @@ struct ceph_mds_client {
extern const char *ceph_mds_op_name(int op);
+extern bool check_session_state(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s);
+
extern struct ceph_mds_session *
__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 9217f35..3c9b0ec 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -1,10 +1,141 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/ceph/ceph_debug.h>
#include <linux/types.h>
#include <linux/percpu_counter.h>
#include <linux/math64.h>
#include "metric.h"
+#include "mds_client.h"
+
+static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s,
+ u64 nr_caps)
+{
+ struct ceph_metric_head *head;
+ struct ceph_metric_cap *cap;
+ struct ceph_metric_read_latency *read;
+ struct ceph_metric_write_latency *write;
+ struct ceph_metric_metadata_latency *meta;
+ struct ceph_client_metric *m = &mdsc->metric;
+ struct ceph_msg *msg;
+ struct timespec64 ts;
+ s64 sum, total;
+ s32 items = 0;
+ s32 len;
+
+ len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ + sizeof(*meta);
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
+ if (!msg) {
+ pr_err("send metrics to mds%d, failed to allocate message\n",
+ s->s_mds);
+ return false;
+ }
+
+ head = msg->front.iov_base;
+
+ /* encode the cap metric */
+ cap = (struct ceph_metric_cap *)(head + 1);
+ cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+ cap->ver = 1;
+ cap->compat = 1;
+ cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
+ cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
+ cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
+ cap->total = cpu_to_le64(nr_caps);
+ items++;
+
+ /* encode the read latency metric */
+ read = (struct ceph_metric_read_latency *)(cap + 1);
+ read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+ read->ver = 1;
+ read->compat = 1;
+ read->data_len = cpu_to_le32(sizeof(*read) - 10);
+ total = m->total_reads;
+ sum = m->read_latency_sum;
+ jiffies_to_timespec64(sum, &ts);
+ read->sec = cpu_to_le32(ts.tv_sec);
+ read->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ /* encode the write latency metric */
+ write = (struct ceph_metric_write_latency *)(read + 1);
+ write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+ write->ver = 1;
+ write->compat = 1;
+ write->data_len = cpu_to_le32(sizeof(*write) - 10);
+ total = m->total_writes;
+ sum = m->write_latency_sum;
+ jiffies_to_timespec64(sum, &ts);
+ write->sec = cpu_to_le32(ts.tv_sec);
+ write->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ /* encode the metadata latency metric */
+ meta = (struct ceph_metric_metadata_latency *)(write + 1);
+ meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+ meta->ver = 1;
+ meta->compat = 1;
+ meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
+ total = m->total_metadatas;
+ sum = m->metadata_latency_sum;
+ jiffies_to_timespec64(sum, &ts);
+ meta->sec = cpu_to_le32(ts.tv_sec);
+ meta->nsec = cpu_to_le32(ts.tv_nsec);
+ items++;
+
+ put_unaligned_le32(items, &head->num);
+ msg->front.iov_len = cpu_to_le32(len);
+ msg->hdr.version = cpu_to_le16(1);
+ msg->hdr.compat_version = cpu_to_le16(1);
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+ dout("send metrics to mds%d %p\n", s->s_mds, msg);
+ ceph_con_send(&s->s_con, msg);
+
+ return true;
+}
+
+static void metric_delayed_work(struct work_struct *work)
+{
+ struct ceph_client_metric *m =
+ container_of(work, struct ceph_client_metric, delayed_work.work);
+ struct ceph_mds_client *mdsc =
+ container_of(m, struct ceph_mds_client, metric);
+ struct ceph_mds_session *s;
+ u64 nr_caps = 0;
+ bool ret;
+ int i;
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ s = __ceph_lookup_mds_session(mdsc, i);
+ if (!s)
+ continue;
+ nr_caps += s->s_nr_caps;
+ ceph_put_mds_session(s);
+ }
+
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ s = __ceph_lookup_mds_session(mdsc, i);
+ if (!s)
+ continue;
+ if (!check_session_state(mdsc, s)) {
+ ceph_put_mds_session(s);
+ continue;
+ }
+
+ /* Only send the metric once in any available session */
+ ret = ceph_mdsc_send_metrics(mdsc, s, nr_caps);
+ ceph_put_mds_session(s);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ metric_schedule_delayed(&mdsc->metric);
+}
int ceph_metric_init(struct ceph_client_metric *m)
{
@@ -51,6 +182,8 @@ int ceph_metric_init(struct ceph_client_metric *m)
m->total_metadatas = 0;
m->metadata_latency_sum = 0;
+ INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
+
return 0;
err_i_caps_mis:
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index ccd8128..2af9e0b 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -6,6 +6,71 @@
#include <linux/percpu_counter.h>
#include <linux/ktime.h>
+extern unsigned int metric_send_interval;
+
+enum ceph_metric_type {
+ CLIENT_METRIC_TYPE_CAP_INFO,
+ CLIENT_METRIC_TYPE_READ_LATENCY,
+ CLIENT_METRIC_TYPE_WRITE_LATENCY,
+ CLIENT_METRIC_TYPE_METADATA_LATENCY,
+ CLIENT_METRIC_TYPE_DENTRY_LEASE,
+
+ CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+};
+
+/* metric caps header */
+struct ceph_metric_cap {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(hit + mis + total) */
+ __le64 hit;
+ __le64 mis;
+ __le64 total;
+} __packed;
+
+/* metric read latency header */
+struct ceph_metric_read_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __packed;
+
+/* metric write latency header */
+struct ceph_metric_write_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __packed;
+
+/* metric metadata latency header */
+struct ceph_metric_metadata_latency {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(sec + nsec) */
+ __le32 sec;
+ __le32 nsec;
+} __packed;
+
+struct ceph_metric_head {
+ __le32 num; /* the number of metrics that will be sent */
+} __packed;
+
/* This is the global metrics */
struct ceph_client_metric {
atomic64_t total_dentries;
@@ -35,8 +100,21 @@ struct ceph_client_metric {
ktime_t metadata_latency_sq_sum;
ktime_t metadata_latency_min;
ktime_t metadata_latency_max;
+
+ struct delayed_work delayed_work; /* delayed work */
};
+static inline void metric_schedule_delayed(struct ceph_client_metric *m)
+{
+ /* per second as default */
+ unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
+
+ if (!metric_send_interval)
+ return;
+
+ schedule_delayed_work(&m->delayed_work, hz);
+}
+
extern int ceph_metric_init(struct ceph_client_metric *m);
extern void ceph_metric_destroy(struct ceph_client_metric *m);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index c9784eb1..66a940c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
destroy_caches();
}
+static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
+{
+ int ret;
+ unsigned int interval;
+
+ ret = kstrtouint(val, 0, &interval);
+ if (ret < 0) {
+ pr_err("Failed to parse metric interval '%s'\n", val);
+ return ret;
+ }
+
+ if (interval > 5 || interval < 1) {
+ pr_err("Invalid metric interval %u\n", interval);
+ return -EINVAL;
+ }
+
+ metric_send_interval = interval;
+ return 0;
+}
+
+static const struct kernel_param_ops param_ops_metric_interval = {
+ .set = param_set_metric_interval,
+ .get = param_get_uint,
+};
+
+unsigned int metric_send_interval = 1;
+module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644);
+MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
+
module_init(init_ceph);
module_exit(exit_ceph);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ebf5ba6..455e9b9 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -130,6 +130,7 @@ struct ceph_dir_layout {
#define CEPH_MSG_CLIENT_REQUEST 24
#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
#define CEPH_MSG_CLIENT_REPLY 26
+#define CEPH_MSG_CLIENT_METRICS 29
#define CEPH_MSG_CLIENT_CAPS 0x310
#define CEPH_MSG_CLIENT_LEASE 0x311
#define CEPH_MSG_CLIENT_SNAP 0x312
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] ceph: send client provided metric flags in client metadata
2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli
2020-06-16 12:52 ` [PATCH 1/2] " xiubli
@ 2020-06-16 12:52 ` xiubli
2020-06-16 14:11 ` Jeff Layton
1 sibling, 1 reply; 9+ messages in thread
From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw)
To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Will send the metric flags to MDS, currently it supports the cap,
read latency, write latency and metadata latency.
URL: https://tracker.ceph.com/issues/43435
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
fs/ceph/metric.h | 13 +++++++++++++
2 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f996363..8b7ff41 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end)
}
}
+static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
+#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
+static void encode_metric_spec(void **p, void *end)
+{
+ static const size_t count = ARRAY_SIZE(metric_bits);
+
+ /* header */
+ BUG_ON(*p + 2 > end);
+ ceph_encode_8(p, 1); /* version */
+ ceph_encode_8(p, 1); /* compat */
+
+ if (count > 0) {
+ size_t i;
+ size_t size = METRIC_BYTES(count);
+
+ BUG_ON(*p + 4 + 4 + size > end);
+
+ /* metric spec info length */
+ ceph_encode_32(p, 4 + size);
+
+ /* metric spec */
+ ceph_encode_32(p, size);
+ memset(*p, 0, size);
+ for (i = 0; i < count; i++)
+ ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
+ *p += size;
+ } else {
+ BUG_ON(*p + 4 + 4 > end);
+ /* metric spec info length */
+ ceph_encode_32(p, 4);
+ /* metric spec */
+ ceph_encode_32(p, 0);
+ }
+}
+
/*
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
@@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
size = FEATURE_BYTES(count);
extra_bytes += 4 + size;
+ /* metric spec */
+ size = 0;
+ count = ARRAY_SIZE(metric_bits);
+ if (count > 0)
+ size = METRIC_BYTES(count);
+ extra_bytes += 2 + 4 + 4 + size;
+
/* Allocate the message */
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
@@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
*
- * ClientSession messages with metadata are v3
+ * ClientSession messages with metadata are v4
*/
- msg->hdr.version = cpu_to_le16(3);
+ msg->hdr.version = cpu_to_le16(4);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
@@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
}
encode_supported_features(&p, end);
+ encode_metric_spec(&p, end);
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 2af9e0b..f34adf7 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -18,6 +18,19 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
};
+/*
+ * This will always have the highest metric bit value
+ * as the last element of the array.
+ */
+#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \
+ CLIENT_METRIC_TYPE_CAP_INFO, \
+ CLIENT_METRIC_TYPE_READ_LATENCY, \
+ CLIENT_METRIC_TYPE_WRITE_LATENCY, \
+ CLIENT_METRIC_TYPE_METADATA_LATENCY, \
+ \
+ CLIENT_METRIC_TYPE_MAX, \
+}
+
/* metric caps header */
struct ceph_metric_cap {
__le32 type; /* ceph metric type */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
2020-06-16 12:52 ` [PATCH 1/2] " xiubli
@ 2020-06-16 13:40 ` Jeff Layton
2020-06-16 18:14 ` Xiubo Li
0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2020-06-16 13:40 UTC (permalink / raw)
To: xiubli, idryomov; +Cc: zyan, pdonnell, ceph-devel
On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> This will send the caps/read/write/metadata metrics to any available
> MDS only once per second as default, which will be the same as the
> userland client, or every metric_send_interval seconds, which is a
> module parameter.
>
> URL: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/mds_client.c | 46 +++++++++------
> fs/ceph/mds_client.h | 4 ++
> fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++
> fs/ceph/metric.h | 78 +++++++++++++++++++++++++
> fs/ceph/super.c | 29 ++++++++++
> include/linux/ceph/ceph_fs.h | 1 +
> 6 files changed, 274 insertions(+), 17 deletions(-)
>
Long patch! Might not hurt to break out some of the cleanups into
separate patches, but they're fairly straighforward so I won't require
it.
[...]
> /* This is the global metrics */
> struct ceph_client_metric {
> atomic64_t total_dentries;
> @@ -35,8 +100,21 @@ struct ceph_client_metric {
> ktime_t metadata_latency_sq_sum;
> ktime_t metadata_latency_min;
> ktime_t metadata_latency_max;
> +
> + struct delayed_work delayed_work; /* delayed work */
> };
>
> +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
> +{
> + /* per second as default */
> + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
> +
> + if (!metric_send_interval)
> + return;
> +
> + schedule_delayed_work(&m->delayed_work, hz);
> +}
> +
Should this also be gated on a MDS feature bit or anything? What happens
if we're running against a really old MDS that doesn't support these
stats? Does it just drop them on the floor? Should we disable sending
them in that case?
> extern int ceph_metric_init(struct ceph_client_metric *m);
> extern void ceph_metric_destroy(struct ceph_client_metric *m);
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index c9784eb1..66a940c 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
> destroy_caches();
> }
>
> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
> +{
> + int ret;
> + unsigned int interval;
> +
> + ret = kstrtouint(val, 0, &interval);
> + if (ret < 0) {
> + pr_err("Failed to parse metric interval '%s'\n", val);
> + return ret;
> + }
> +
> + if (interval > 5 || interval < 1) {
> + pr_err("Invalid metric interval %u\n", interval);
> + return -EINVAL;
> + }
> +
> + metric_send_interval = interval;
> + return 0;
> +}
> +
> +static const struct kernel_param_ops param_ops_metric_interval = {
> + .set = param_set_metric_interval,
> + .get = param_get_uint,
> +};
> +
> +unsigned int metric_send_interval = 1;
> +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644);
> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
> +
Aren't valid values 0-5, with 0 disabling this feature? That's
what metric_schedule_delayed() seems to indicate...
> module_init(init_ceph);
> module_exit(exit_ceph);
>
> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
> index ebf5ba6..455e9b9 100644
> --- a/include/linux/ceph/ceph_fs.h
> +++ b/include/linux/ceph/ceph_fs.h
> @@ -130,6 +130,7 @@ struct ceph_dir_layout {
> #define CEPH_MSG_CLIENT_REQUEST 24
> #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
> #define CEPH_MSG_CLIENT_REPLY 26
> +#define CEPH_MSG_CLIENT_METRICS 29
> #define CEPH_MSG_CLIENT_CAPS 0x310
> #define CEPH_MSG_CLIENT_LEASE 0x311
> #define CEPH_MSG_CLIENT_SNAP 0x312
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] ceph: send client provided metric flags in client metadata
2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
@ 2020-06-16 14:11 ` Jeff Layton
2020-06-16 14:18 ` Xiubo Li
0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2020-06-16 14:11 UTC (permalink / raw)
To: xiubli, idryomov; +Cc: zyan, pdonnell, ceph-devel
On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> Will send the metric flags to MDS, currently it supports the cap,
> read latency, write latency and metadata latency.
>
> URL: https://tracker.ceph.com/issues/43435
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
> fs/ceph/metric.h | 13 +++++++++++++
> 2 files changed, 58 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index f996363..8b7ff41 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end)
> }
> }
>
> +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
> +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
> +static void encode_metric_spec(void **p, void *end)
> +{
> + static const size_t count = ARRAY_SIZE(metric_bits);
> +
> + /* header */
> + BUG_ON(*p + 2 > end);
> + ceph_encode_8(p, 1); /* version */
> + ceph_encode_8(p, 1); /* compat */
> +
> + if (count > 0) {
> + size_t i;
> + size_t size = METRIC_BYTES(count);
> +
> + BUG_ON(*p + 4 + 4 + size > end);
> +
I know it's unlikely to ever trip, but let's not BUG_ON here. Maybe just
WARN and return an error...
Of course, we'd probably want that error to bubble up to the callers of
__open_session() but none of its callers check the return value. Would
you mind fixing that while you're in there too?
> + /* metric spec info length */
> + ceph_encode_32(p, 4 + size);
> +
> + /* metric spec */
> + ceph_encode_32(p, size);
> + memset(*p, 0, size);
> + for (i = 0; i < count; i++)
> + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
> + *p += size;
> + } else {
> + BUG_ON(*p + 4 + 4 > end);
> + /* metric spec info length */
> + ceph_encode_32(p, 4);
> + /* metric spec */
> + ceph_encode_32(p, 0);
> + }
> +}
> +
> /*
> * session message, specialization for CEPH_SESSION_REQUEST_OPEN
> * to include additional client metadata fields.
> @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
> size = FEATURE_BYTES(count);
> extra_bytes += 4 + size;
>
> + /* metric spec */
> + size = 0;
> + count = ARRAY_SIZE(metric_bits);
> + if (count > 0)
> + size = METRIC_BYTES(count);
> + extra_bytes += 2 + 4 + 4 + size;
> +
> /* Allocate the message */
> msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
> GFP_NOFS, false);
> @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
> * Serialize client metadata into waiting buffer space, using
> * the format that userspace expects for map<string, string>
> *
> - * ClientSession messages with metadata are v3
> + * ClientSession messages with metadata are v4
> */
> - msg->hdr.version = cpu_to_le16(3);
> + msg->hdr.version = cpu_to_le16(4);
> msg->hdr.compat_version = cpu_to_le16(1);
>
> /* The write pointer, following the session_head structure */
> @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
> }
>
> encode_supported_features(&p, end);
> + encode_metric_spec(&p, end);
> msg->front.iov_len = p - msg->front.iov_base;
> msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
>
> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
> index 2af9e0b..f34adf7 100644
> --- a/fs/ceph/metric.h
> +++ b/fs/ceph/metric.h
> @@ -18,6 +18,19 @@ enum ceph_metric_type {
> CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
> };
>
> +/*
> + * This will always have the highest metric bit value
> + * as the last element of the array.
> + */
> +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \
> + CLIENT_METRIC_TYPE_CAP_INFO, \
> + CLIENT_METRIC_TYPE_READ_LATENCY, \
> + CLIENT_METRIC_TYPE_WRITE_LATENCY, \
> + CLIENT_METRIC_TYPE_METADATA_LATENCY, \
> + \
> + CLIENT_METRIC_TYPE_MAX, \
> +}
> +
> /* metric caps header */
> struct ceph_metric_cap {
> __le32 type; /* ceph metric type */
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] ceph: send client provided metric flags in client metadata
2020-06-16 14:11 ` Jeff Layton
@ 2020-06-16 14:18 ` Xiubo Li
0 siblings, 0 replies; 9+ messages in thread
From: Xiubo Li @ 2020-06-16 14:18 UTC (permalink / raw)
To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel
On 2020/6/16 22:11, Jeff Layton wrote:
> On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Will send the metric flags to MDS, currently it supports the cap,
>> read latency, write latency and metadata latency.
>>
>> URL: https://tracker.ceph.com/issues/43435
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
>> fs/ceph/metric.h | 13 +++++++++++++
>> 2 files changed, 58 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index f996363..8b7ff41 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end)
>> }
>> }
>>
>> +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
>> +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
>> +static void encode_metric_spec(void **p, void *end)
>> +{
>> + static const size_t count = ARRAY_SIZE(metric_bits);
>> +
>> + /* header */
>> + BUG_ON(*p + 2 > end);
>> + ceph_encode_8(p, 1); /* version */
>> + ceph_encode_8(p, 1); /* compat */
>> +
>> + if (count > 0) {
>> + size_t i;
>> + size_t size = METRIC_BYTES(count);
>> +
>> + BUG_ON(*p + 4 + 4 + size > end);
>> +
> I know it's unlikely to ever trip, but let's not BUG_ON here. Maybe just
> WARN and return an error...
>
> Of course, we'd probably want that error to bubble up to the callers of
> __open_session() but none of its callers check the return value. Would
> you mind fixing that while you're in there too?
Sure, this makes sense and the checkpatch.pl also suggests to use WARN,
I will fix them all later.
Thanks
BRs
Xiubo
>
>> + /* metric spec info length */
>> + ceph_encode_32(p, 4 + size);
>> +
>> + /* metric spec */
>> + ceph_encode_32(p, size);
>> + memset(*p, 0, size);
>> + for (i = 0; i < count; i++)
>> + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
>> + *p += size;
>> + } else {
>> + BUG_ON(*p + 4 + 4 > end);
>> + /* metric spec info length */
>> + ceph_encode_32(p, 4);
>> + /* metric spec */
>> + ceph_encode_32(p, 0);
>> + }
>> +}
>> +
>> /*
>> * session message, specialization for CEPH_SESSION_REQUEST_OPEN
>> * to include additional client metadata fields.
>> @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>> size = FEATURE_BYTES(count);
>> extra_bytes += 4 + size;
>>
>> + /* metric spec */
>> + size = 0;
>> + count = ARRAY_SIZE(metric_bits);
>> + if (count > 0)
>> + size = METRIC_BYTES(count);
>> + extra_bytes += 2 + 4 + 4 + size;
>> +
>> /* Allocate the message */
>> msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
>> GFP_NOFS, false);
>> @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>> * Serialize client metadata into waiting buffer space, using
>> * the format that userspace expects for map<string, string>
>> *
>> - * ClientSession messages with metadata are v3
>> + * ClientSession messages with metadata are v4
>> */
>> - msg->hdr.version = cpu_to_le16(3);
>> + msg->hdr.version = cpu_to_le16(4);
>> msg->hdr.compat_version = cpu_to_le16(1);
>>
>> /* The write pointer, following the session_head structure */
>> @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>> }
>>
>> encode_supported_features(&p, end);
>> + encode_metric_spec(&p, end);
>> msg->front.iov_len = p - msg->front.iov_base;
>> msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
>>
>> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
>> index 2af9e0b..f34adf7 100644
>> --- a/fs/ceph/metric.h
>> +++ b/fs/ceph/metric.h
>> @@ -18,6 +18,19 @@ enum ceph_metric_type {
>> CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
>> };
>>
>> +/*
>> + * This will always have the highest metric bit value
>> + * as the last element of the array.
>> + */
>> +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \
>> + CLIENT_METRIC_TYPE_CAP_INFO, \
>> + CLIENT_METRIC_TYPE_READ_LATENCY, \
>> + CLIENT_METRIC_TYPE_WRITE_LATENCY, \
>> + CLIENT_METRIC_TYPE_METADATA_LATENCY, \
>> + \
>> + CLIENT_METRIC_TYPE_MAX, \
>> +}
>> +
>> /* metric caps header */
>> struct ceph_metric_cap {
>> __le32 type; /* ceph metric type */
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
2020-06-16 13:40 ` Jeff Layton
@ 2020-06-16 18:14 ` Xiubo Li
2020-06-16 18:58 ` Jeff Layton
0 siblings, 1 reply; 9+ messages in thread
From: Xiubo Li @ 2020-06-16 18:14 UTC (permalink / raw)
To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel
On 2020/6/16 21:40, Jeff Layton wrote:
> On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com wrote:
>> From: Xiubo Li<xiubli@redhat.com>
>>
>> This will send the caps/read/write/metadata metrics to any available
>> MDS only once per second as default, which will be the same as the
>> userland client, or every metric_send_interval seconds, which is a
>> module parameter.
>>
>> URL:https://tracker.ceph.com/issues/43215
>> Signed-off-by: Xiubo Li<xiubli@redhat.com>
>> ---
>> fs/ceph/mds_client.c | 46 +++++++++------
>> fs/ceph/mds_client.h | 4 ++
>> fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++
>> fs/ceph/metric.h | 78 +++++++++++++++++++++++++
>> fs/ceph/super.c | 29 ++++++++++
>> include/linux/ceph/ceph_fs.h | 1 +
>> 6 files changed, 274 insertions(+), 17 deletions(-)
>>
> Long patch! Might not hurt to break out some of the cleanups into
> separate patches, but they're fairly straighforward so I won't require
> it.
Sure, I will split the patch into small ones if possible.
> [...]
>
>> /* This is the global metrics */
>> struct ceph_client_metric {
>> atomic64_t total_dentries;
>> @@ -35,8 +100,21 @@ struct ceph_client_metric {
>> ktime_t metadata_latency_sq_sum;
>> ktime_t metadata_latency_min;
>> ktime_t metadata_latency_max;
>> +
>> + struct delayed_work delayed_work; /* delayed work */
>> };
>>
>> +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
>> +{
>> + /* per second as default */
>> + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
>> +
>> + if (!metric_send_interval)
>> + return;
>> +
>> + schedule_delayed_work(&m->delayed_work, hz);
>> +}
>> +
> Should this also be gated on a MDS feature bit or anything? What happens
> if we're running against a really old MDS that doesn't support these
> stats? Does it just drop them on the floor? Should we disable sending
> them in that case?
Tested without metric code in the ceph and when ceph saw a unknown type
message, it will close the socket connection directly.
>> extern int ceph_metric_init(struct ceph_client_metric *m);
>> extern void ceph_metric_destroy(struct ceph_client_metric *m);
>>
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index c9784eb1..66a940c 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
>> destroy_caches();
>> }
>>
>> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
>> +{
>> + int ret;
>> + unsigned int interval;
>> +
>> + ret = kstrtouint(val, 0, &interval);
>> + if (ret < 0) {
>> + pr_err("Failed to parse metric interval '%s'\n", val);
>> + return ret;
>> + }
>> +
>> + if (interval > 5 || interval < 1) {
>> + pr_err("Invalid metric interval %u\n", interval);
>> + return -EINVAL;
>> + }
>> +
>> + metric_send_interval = interval;
>> + return 0;
>> +}
>> +
>> +static const struct kernel_param_ops param_ops_metric_interval = {
>> + .set = param_set_metric_interval,
>> + .get = param_get_uint,
>> +};
>> +
>> +unsigned int metric_send_interval = 1;
>> +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644);
>> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
>> +
> Aren't valid values 0-5, with 0 disabling this feature? That's
> what metric_schedule_delayed() seems to indicate...
What value should it be as default ? 0 to disable it or 1 ?
Maybe in future we should let the ceph side send a notification/request
to all the clients if it wants to collect the metrics and then we could
enable it here, and defautly just disable it.
>> module_init(init_ceph);
>> module_exit(exit_ceph);
>>
>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
>> index ebf5ba6..455e9b9 100644
>> --- a/include/linux/ceph/ceph_fs.h
>> +++ b/include/linux/ceph/ceph_fs.h
>> @@ -130,6 +130,7 @@ struct ceph_dir_layout {
>> #define CEPH_MSG_CLIENT_REQUEST 24
>> #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
>> #define CEPH_MSG_CLIENT_REPLY 26
>> +#define CEPH_MSG_CLIENT_METRICS 29
>> #define CEPH_MSG_CLIENT_CAPS 0x310
>> #define CEPH_MSG_CLIENT_LEASE 0x311
>> #define CEPH_MSG_CLIENT_SNAP 0x312
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
2020-06-16 18:14 ` Xiubo Li
@ 2020-06-16 18:58 ` Jeff Layton
2020-06-17 0:28 ` Xiubo Li
0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2020-06-16 18:58 UTC (permalink / raw)
To: Xiubo Li, idryomov; +Cc: zyan, pdonnell, ceph-devel
On Wed, 2020-06-17 at 02:14 +0800, Xiubo Li wrote:
> On 2020/6/16 21:40, Jeff Layton wrote:
> > On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com wrote:
> > > From: Xiubo Li<xiubli@redhat.com>
> > >
> > > This will send the caps/read/write/metadata metrics to any available
> > > MDS only once per second as default, which will be the same as the
> > > userland client, or every metric_send_interval seconds, which is a
> > > module parameter.
> > >
> > > URL:https://tracker.ceph.com/issues/43215
> > > Signed-off-by: Xiubo Li<xiubli@redhat.com>
> > > ---
> > > fs/ceph/mds_client.c | 46 +++++++++------
> > > fs/ceph/mds_client.h | 4 ++
> > > fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++
> > > fs/ceph/metric.h | 78 +++++++++++++++++++++++++
> > > fs/ceph/super.c | 29 ++++++++++
> > > include/linux/ceph/ceph_fs.h | 1 +
> > > 6 files changed, 274 insertions(+), 17 deletions(-)
> > >
> > Long patch! Might not hurt to break out some of the cleanups into
> > separate patches, but they're fairly straighforward so I won't require
> > it.
>
> Sure, I will split the patch into small ones if possible.
>
>
> > [...]
> >
> > > /* This is the global metrics */
> > > struct ceph_client_metric {
> > > atomic64_t total_dentries;
> > > @@ -35,8 +100,21 @@ struct ceph_client_metric {
> > > ktime_t metadata_latency_sq_sum;
> > > ktime_t metadata_latency_min;
> > > ktime_t metadata_latency_max;
> > > +
> > > + struct delayed_work delayed_work; /* delayed work */
> > > };
> > >
> > > +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
> > > +{
> > > + /* per second as default */
> > > + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
> > > +
> > > + if (!metric_send_interval)
> > > + return;
> > > +
> > > + schedule_delayed_work(&m->delayed_work, hz);
> > > +}
> > > +
> > Should this also be gated on a MDS feature bit or anything? What happens
> > if we're running against a really old MDS that doesn't support these
> > stats? Does it just drop them on the floor? Should we disable sending
> > them in that case?
>
> Tested without metric code in the ceph and when ceph saw a unknown type
> message, it will close the socket connection directly.
>
Ouch, that sounds bad. How does the userland client handle this? This
seems like the kind of thing we usually add a feature bit for
somewhere...
>
> > > extern int ceph_metric_init(struct ceph_client_metric *m);
> > > extern void ceph_metric_destroy(struct ceph_client_metric *m);
> > >
> > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > > index c9784eb1..66a940c 100644
> > > --- a/fs/ceph/super.c
> > > +++ b/fs/ceph/super.c
> > > @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
> > > destroy_caches();
> > > }
> > >
> > > +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
> > > +{
> > > + int ret;
> > > + unsigned int interval;
> > > +
> > > + ret = kstrtouint(val, 0, &interval);
> > > + if (ret < 0) {
> > > + pr_err("Failed to parse metric interval '%s'\n", val);
> > > + return ret;
> > > + }
> > > +
> > > + if (interval > 5 || interval < 1) {
> > > + pr_err("Invalid metric interval %u\n", interval);
> > > + return -EINVAL;
> > > + }
> > > +
> > > + metric_send_interval = interval;
> > > + return 0;
> > > +}
> > > +
> > > +static const struct kernel_param_ops param_ops_metric_interval = {
> > > + .set = param_set_metric_interval,
> > > + .get = param_get_uint,
> > > +};
> > > +
> > > +unsigned int metric_send_interval = 1;
> > > +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644);
> > > +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
> > > +
> > Aren't valid values 0-5, with 0 disabling this feature? That's
> > what metric_schedule_delayed() seems to indicate...
>
> What value should it be as default ? 0 to disable it or 1 ?
>
I don't have strong preference here. It's probably safe enough to make
it 1. I do like the ability to turn this off though, as that gives us a
workaround in the event that it does cause trouble.
> Maybe in future we should let the ceph side send a notification/request
> to all the clients if it wants to collect the metrics and then we could
> enable it here, and defautly just disable it.
>
That seems like a heavier-weight solution than is called for here. This
seems like something that ought to have a feature bit. Then if that's
turned off, we can just disable this altogether.
>
> > > module_init(init_ceph);
> > > module_exit(exit_ceph);
> > >
> > > diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
> > > index ebf5ba6..455e9b9 100644
> > > --- a/include/linux/ceph/ceph_fs.h
> > > +++ b/include/linux/ceph/ceph_fs.h
> > > @@ -130,6 +130,7 @@ struct ceph_dir_layout {
> > > #define CEPH_MSG_CLIENT_REQUEST 24
> > > #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
> > > #define CEPH_MSG_CLIENT_REPLY 26
> > > +#define CEPH_MSG_CLIENT_METRICS 29
> > > #define CEPH_MSG_CLIENT_CAPS 0x310
> > > #define CEPH_MSG_CLIENT_LEASE 0x311
> > > #define CEPH_MSG_CLIENT_SNAP 0x312
>
>
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
2020-06-16 18:58 ` Jeff Layton
@ 2020-06-17 0:28 ` Xiubo Li
0 siblings, 0 replies; 9+ messages in thread
From: Xiubo Li @ 2020-06-17 0:28 UTC (permalink / raw)
To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel
On 2020/6/17 2:58, Jeff Layton wrote:
> On Wed, 2020-06-17 at 02:14 +0800, Xiubo Li wrote:
>> On 2020/6/16 21:40, Jeff Layton wrote:
>>> On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com wrote:
>>>> From: Xiubo Li<xiubli@redhat.com>
>>>>
>>>> This will send the caps/read/write/metadata metrics to any available
>>>> MDS only once per second as default, which will be the same as the
>>>> userland client, or every metric_send_interval seconds, which is a
>>>> module parameter.
>>>>
>>>> URL:https://tracker.ceph.com/issues/43215
>>>> Signed-off-by: Xiubo Li<xiubli@redhat.com>
>>>> ---
>>>> fs/ceph/mds_client.c | 46 +++++++++------
>>>> fs/ceph/mds_client.h | 4 ++
>>>> fs/ceph/metric.c | 133 +++++++++++++++++++++++++++++++++++++++++++
>>>> fs/ceph/metric.h | 78 +++++++++++++++++++++++++
>>>> fs/ceph/super.c | 29 ++++++++++
>>>> include/linux/ceph/ceph_fs.h | 1 +
>>>> 6 files changed, 274 insertions(+), 17 deletions(-)
>>>>
>>> Long patch! Might not hurt to break out some of the cleanups into
>>> separate patches, but they're fairly straighforward so I won't require
>>> it.
>> Sure, I will split the patch into small ones if possible.
>>
>>
>>> [...]
>>>
>>>> /* This is the global metrics */
>>>> struct ceph_client_metric {
>>>> atomic64_t total_dentries;
>>>> @@ -35,8 +100,21 @@ struct ceph_client_metric {
>>>> ktime_t metadata_latency_sq_sum;
>>>> ktime_t metadata_latency_min;
>>>> ktime_t metadata_latency_max;
>>>> +
>>>> + struct delayed_work delayed_work; /* delayed work */
>>>> };
>>>>
>>>> +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
>>>> +{
>>>> + /* per second as default */
>>>> + unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
>>>> +
>>>> + if (!metric_send_interval)
>>>> + return;
>>>> +
>>>> + schedule_delayed_work(&m->delayed_work, hz);
>>>> +}
>>>> +
>>> Should this also be gated on a MDS feature bit or anything? What happens
>>> if we're running against a really old MDS that doesn't support these
>>> stats? Does it just drop them on the floor? Should we disable sending
>>> them in that case?
>> Tested without metric code in the ceph and when ceph saw a unknown type
>> message, it will close the socket connection directly.
>>
> Ouch, that sounds bad. How does the userland client handle this? This
> seems like the kind of thing we usually add a feature bit for
> somewhere...
The userland doesn't handle this case too, I will try to add a feature
bit or something to fix it.
>>>> extern int ceph_metric_init(struct ceph_client_metric *m);
>>>> extern void ceph_metric_destroy(struct ceph_client_metric *m);
>>>>
>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>>> index c9784eb1..66a940c 100644
>>>> --- a/fs/ceph/super.c
>>>> +++ b/fs/ceph/super.c
>>>> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
>>>> destroy_caches();
>>>> }
>>>>
>>>> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
>>>> +{
>>>> + int ret;
>>>> + unsigned int interval;
>>>> +
>>>> + ret = kstrtouint(val, 0, &interval);
>>>> + if (ret < 0) {
>>>> + pr_err("Failed to parse metric interval '%s'\n", val);
>>>> + return ret;
>>>> + }
>>>> +
>>>> + if (interval > 5 || interval < 1) {
>>>> + pr_err("Invalid metric interval %u\n", interval);
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> + metric_send_interval = interval;
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static const struct kernel_param_ops param_ops_metric_interval = {
>>>> + .set = param_set_metric_interval,
>>>> + .get = param_get_uint,
>>>> +};
>>>> +
>>>> +unsigned int metric_send_interval = 1;
>>>> +module_param_cb(metric_send_interval, ¶m_ops_metric_interval, &metric_send_interval, 0644);
>>>> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
>>>> +
>>> Aren't valid values 0-5, with 0 disabling this feature? That's
>>> what metric_schedule_delayed() seems to indicate...
>> What value should it be as default ? 0 to disable it or 1 ?
>>
> I don't have strong preference here. It's probably safe enough to make
> it 1. I do like the ability to turn this off though, as that gives us a
> workaround in the event that it does cause trouble.
Sure, I will add it back.
>
>> Maybe in future we should let the ceph side send a notification/request
>> to all the clients if it wants to collect the metrics and then we could
>> enable it here, and defautly just disable it.
>>
> That seems like a heavier-weight solution than is called for here. This
> seems like something that ought to have a feature bit. Then if that's
> turned off, we can just disable this altogether.
Okay, sounds better.
Thanks
>>>> module_init(init_ceph);
>>>> module_exit(exit_ceph);
>>>>
>>>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
>>>> index ebf5ba6..455e9b9 100644
>>>> --- a/include/linux/ceph/ceph_fs.h
>>>> +++ b/include/linux/ceph/ceph_fs.h
>>>> @@ -130,6 +130,7 @@ struct ceph_dir_layout {
>>>> #define CEPH_MSG_CLIENT_REQUEST 24
>>>> #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
>>>> #define CEPH_MSG_CLIENT_REPLY 26
>>>> +#define CEPH_MSG_CLIENT_METRICS 29
>>>> #define CEPH_MSG_CLIENT_CAPS 0x310
>>>> #define CEPH_MSG_CLIENT_LEASE 0x311
>>>> #define CEPH_MSG_CLIENT_SNAP 0x312
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2020-06-17 0:28 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli
2020-06-16 12:52 ` [PATCH 1/2] " xiubli
2020-06-16 13:40 ` Jeff Layton
2020-06-16 18:14 ` Xiubo Li
2020-06-16 18:58 ` Jeff Layton
2020-06-17 0:28 ` Xiubo Li
2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
2020-06-16 14:11 ` Jeff Layton
2020-06-16 14:18 ` Xiubo Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.