All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] ceph: periodically send perf metrics to ceph
@ 2020-06-16 12:52 xiubli
  2020-06-16 12:52 ` [PATCH 1/2] " xiubli
  2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
  0 siblings, 2 replies; 9+ messages in thread
From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw)
  To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

This series is based the previous patches of the metrics in kceph[1]
and mds daemons record and forward client side metrics to manager[2].

This will send the caps/read/write/metadata metrics to any available
MDS only once per second as default, which will be the same as the
userland client, or every metric_send_interval seconds, which is a
module parameter, the valid values for metric_send_interval will be
1~5 seconds.

And will also send the metric flags to MDS, currently it supports the
cap, read latency, write latency and metadata latency.

[1] https://patchwork.kernel.org/project/ceph-devel/list/?series=238907 [Merged]
[2] https://github.com/ceph/ceph/pull/26004 [Merged]

Xiubo Li (2):
  ceph: periodically send perf metrics to ceph
  ceph: send client provided metric flags in client metadata

 fs/ceph/mds_client.c         |  93 +++++++++++++++++++++++-------
 fs/ceph/mds_client.h         |   4 ++
 fs/ceph/metric.c             | 133 +++++++++++++++++++++++++++++++++++++++++++
 fs/ceph/metric.h             |  91 +++++++++++++++++++++++++++++
 fs/ceph/super.c              |  29 ++++++++++
 include/linux/ceph/ceph_fs.h |   1 +
 6 files changed, 332 insertions(+), 19 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] ceph: periodically send perf metrics to ceph
  2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli
@ 2020-06-16 12:52 ` xiubli
  2020-06-16 13:40   ` Jeff Layton
  2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
  1 sibling, 1 reply; 9+ messages in thread
From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw)
  To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

This will send the caps/read/write/metadata metrics to any available
MDS only once per second as default, which will be the same as the
userland client, or every metric_send_interval seconds, which is a
module parameter.

URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c         |  46 +++++++++------
 fs/ceph/mds_client.h         |   4 ++
 fs/ceph/metric.c             | 133 +++++++++++++++++++++++++++++++++++++++++++
 fs/ceph/metric.h             |  78 +++++++++++++++++++++++++
 fs/ceph/super.c              |  29 ++++++++++
 include/linux/ceph/ceph_fs.h |   1 +
 6 files changed, 274 insertions(+), 17 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a504971..f996363 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4263,6 +4263,30 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 	ceph_force_reconnect(fsc->sb);
 }
 
+bool check_session_state(struct ceph_mds_client *mdsc,
+			 struct ceph_mds_session *s)
+{
+	if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+		dout("resending session close request for mds%d\n",
+				s->s_mds);
+		request_close_session(mdsc, s);
+		return false;
+	}
+	if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
+		if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+			s->s_state = CEPH_MDS_SESSION_HUNG;
+			pr_info("mds%d hung\n", s->s_mds);
+		}
+	}
+	if (s->s_state == CEPH_MDS_SESSION_NEW ||
+	    s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+	    s->s_state == CEPH_MDS_SESSION_REJECTED)
+		/* this mds is failed or recovering, just wait */
+		return false;
+
+	return true;
+}
+
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
  */
@@ -4294,23 +4318,8 @@ static void delayed_work(struct work_struct *work)
 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
 		if (!s)
 			continue;
-		if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
-			dout("resending session close request for mds%d\n",
-			     s->s_mds);
-			request_close_session(mdsc, s);
-			ceph_put_mds_session(s);
-			continue;
-		}
-		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
-			if (s->s_state == CEPH_MDS_SESSION_OPEN) {
-				s->s_state = CEPH_MDS_SESSION_HUNG;
-				pr_info("mds%d hung\n", s->s_mds);
-			}
-		}
-		if (s->s_state == CEPH_MDS_SESSION_NEW ||
-		    s->s_state == CEPH_MDS_SESSION_RESTARTING ||
-		    s->s_state == CEPH_MDS_SESSION_REJECTED) {
-			/* this mds is failed or recovering, just wait */
+
+		if (!check_session_state(mdsc, s)) {
 			ceph_put_mds_session(s);
 			continue;
 		}
@@ -4616,6 +4625,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 
 	cancel_work_sync(&mdsc->cap_reclaim_work);
 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+	cancel_delayed_work_sync(&mdsc->metric.delayed_work); /* cancel timer */
 
 	dout("stopped\n");
 }
@@ -4658,6 +4668,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 {
 	dout("stop\n");
 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+	cancel_delayed_work_sync(&mdsc->metric.delayed_work); /* cancel timer */
 	if (mdsc->mdsmap)
 		ceph_mdsmap_destroy(mdsc->mdsmap);
 	kfree(mdsc->sessions);
@@ -4815,6 +4826,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
 	mutex_unlock(&mdsc->mutex);
 	schedule_delayed(mdsc);
+	metric_schedule_delayed(&mdsc->metric);
 	return;
 
 bad_unlock:
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5e0c407..bcb3892 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -18,6 +18,7 @@
 #include <linux/ceph/auth.h>
 
 #include "metric.h"
+#include "super.h"
 
 /* The first 8 bits are reserved for old ceph releases */
 enum ceph_feature_type {
@@ -476,6 +477,9 @@ struct ceph_mds_client {
 
 extern const char *ceph_mds_op_name(int op);
 
+extern bool check_session_state(struct ceph_mds_client *mdsc,
+				struct ceph_mds_session *s);
+
 extern struct ceph_mds_session *
 __ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
 
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 9217f35..3c9b0ec 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -1,10 +1,141 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/types.h>
 #include <linux/percpu_counter.h>
 #include <linux/math64.h>
 
 #include "metric.h"
+#include "mds_client.h"
+
+static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *s,
+				   u64 nr_caps)
+{
+	struct ceph_metric_head *head;
+	struct ceph_metric_cap *cap;
+	struct ceph_metric_read_latency *read;
+	struct ceph_metric_write_latency *write;
+	struct ceph_metric_metadata_latency *meta;
+	struct ceph_client_metric *m = &mdsc->metric;
+	struct ceph_msg *msg;
+	struct timespec64 ts;
+	s64 sum, total;
+	s32 items = 0;
+	s32 len;
+
+	len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+	      + sizeof(*meta);
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
+	if (!msg) {
+		pr_err("send metrics to mds%d, failed to allocate message\n",
+		       s->s_mds);
+		return false;
+	}
+
+	head = msg->front.iov_base;
+
+	/* encode the cap metric */
+	cap = (struct ceph_metric_cap *)(head + 1);
+	cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+	cap->ver = 1;
+	cap->compat = 1;
+	cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
+	cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
+	cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
+	cap->total = cpu_to_le64(nr_caps);
+	items++;
+
+	/* encode the read latency metric */
+	read = (struct ceph_metric_read_latency *)(cap + 1);
+	read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+	read->ver = 1;
+	read->compat = 1;
+	read->data_len = cpu_to_le32(sizeof(*read) - 10);
+	total = m->total_reads;
+	sum = m->read_latency_sum;
+	jiffies_to_timespec64(sum, &ts);
+	read->sec = cpu_to_le32(ts.tv_sec);
+	read->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	/* encode the write latency metric */
+	write = (struct ceph_metric_write_latency *)(read + 1);
+	write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+	write->ver = 1;
+	write->compat = 1;
+	write->data_len = cpu_to_le32(sizeof(*write) - 10);
+	total = m->total_writes;
+	sum = m->write_latency_sum;
+	jiffies_to_timespec64(sum, &ts);
+	write->sec = cpu_to_le32(ts.tv_sec);
+	write->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	/* encode the metadata latency metric */
+	meta = (struct ceph_metric_metadata_latency *)(write + 1);
+	meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+	meta->ver = 1;
+	meta->compat = 1;
+	meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
+	total = m->total_metadatas;
+	sum = m->metadata_latency_sum;
+	jiffies_to_timespec64(sum, &ts);
+	meta->sec = cpu_to_le32(ts.tv_sec);
+	meta->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	put_unaligned_le32(items, &head->num);
+	msg->front.iov_len = cpu_to_le32(len);
+	msg->hdr.version = cpu_to_le16(1);
+	msg->hdr.compat_version = cpu_to_le16(1);
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	dout("send metrics to mds%d %p\n", s->s_mds, msg);
+	ceph_con_send(&s->s_con, msg);
+
+	return true;
+}
+
+static void metric_delayed_work(struct work_struct *work)
+{
+	struct ceph_client_metric *m =
+		container_of(work, struct ceph_client_metric, delayed_work.work);
+	struct ceph_mds_client *mdsc =
+		container_of(m, struct ceph_mds_client, metric);
+	struct ceph_mds_session *s;
+	u64 nr_caps = 0;
+	bool ret;
+	int i;
+
+	mutex_lock(&mdsc->mutex);
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		s = __ceph_lookup_mds_session(mdsc, i);
+		if (!s)
+			continue;
+		nr_caps += s->s_nr_caps;
+		ceph_put_mds_session(s);
+	}
+
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		s = __ceph_lookup_mds_session(mdsc, i);
+		if (!s)
+			continue;
+		if (!check_session_state(mdsc, s)) {
+			ceph_put_mds_session(s);
+			continue;
+		}
+
+		/* Only send the metric once in any available session */
+		ret = ceph_mdsc_send_metrics(mdsc, s, nr_caps);
+		ceph_put_mds_session(s);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&mdsc->mutex);
+
+	metric_schedule_delayed(&mdsc->metric);
+}
 
 int ceph_metric_init(struct ceph_client_metric *m)
 {
@@ -51,6 +182,8 @@ int ceph_metric_init(struct ceph_client_metric *m)
 	m->total_metadatas = 0;
 	m->metadata_latency_sum = 0;
 
+	INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
+
 	return 0;
 
 err_i_caps_mis:
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index ccd8128..2af9e0b 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -6,6 +6,71 @@
 #include <linux/percpu_counter.h>
 #include <linux/ktime.h>
 
+extern unsigned int metric_send_interval;
+
+enum ceph_metric_type {
+	CLIENT_METRIC_TYPE_CAP_INFO,
+	CLIENT_METRIC_TYPE_READ_LATENCY,
+	CLIENT_METRIC_TYPE_WRITE_LATENCY,
+	CLIENT_METRIC_TYPE_METADATA_LATENCY,
+	CLIENT_METRIC_TYPE_DENTRY_LEASE,
+
+	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+};
+
+/* metric caps header */
+struct ceph_metric_cap {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  compat;
+
+	__le32 data_len; /* length of sizeof(hit + mis + total) */
+	__le64 hit;
+	__le64 mis;
+	__le64 total;
+} __packed;
+
+/* metric read latency header */
+struct ceph_metric_read_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  compat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __packed;
+
+/* metric write latency header */
+struct ceph_metric_write_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  compat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __packed;
+
+/* metric metadata latency header */
+struct ceph_metric_metadata_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  compat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __packed;
+
+struct ceph_metric_head {
+	__le32 num;	/* the number of metrics that will be sent */
+} __packed;
+
 /* This is the global metrics */
 struct ceph_client_metric {
 	atomic64_t            total_dentries;
@@ -35,8 +100,21 @@ struct ceph_client_metric {
 	ktime_t metadata_latency_sq_sum;
 	ktime_t metadata_latency_min;
 	ktime_t metadata_latency_max;
+
+	struct delayed_work delayed_work;  /* delayed work */
 };
 
+static inline void metric_schedule_delayed(struct ceph_client_metric *m)
+{
+	/* per second as default */
+	unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
+
+	if (!metric_send_interval)
+		return;
+
+	schedule_delayed_work(&m->delayed_work, hz);
+}
+
 extern int ceph_metric_init(struct ceph_client_metric *m);
 extern void ceph_metric_destroy(struct ceph_client_metric *m);
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index c9784eb1..66a940c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
 	destroy_caches();
 }
 
+static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+	unsigned int interval;
+
+	ret = kstrtouint(val, 0, &interval);
+	if (ret < 0) {
+		pr_err("Failed to parse metric interval '%s'\n", val);
+		return ret;
+	}
+
+	if (interval > 5 || interval < 1) {
+		pr_err("Invalid metric interval %u\n", interval);
+		return -EINVAL;
+	}
+
+	metric_send_interval = interval;
+	return 0;
+}
+
+static const struct kernel_param_ops param_ops_metric_interval = {
+	.set = param_set_metric_interval,
+	.get = param_get_uint,
+};
+
+unsigned int metric_send_interval = 1;
+module_param_cb(metric_send_interval, &param_ops_metric_interval, &metric_send_interval, 0644);
+MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
+
 module_init(init_ceph);
 module_exit(exit_ceph);
 
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ebf5ba6..455e9b9 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -130,6 +130,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_CLIENT_REQUEST         24
 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
 #define CEPH_MSG_CLIENT_REPLY           26
+#define CEPH_MSG_CLIENT_METRICS         29
 #define CEPH_MSG_CLIENT_CAPS            0x310
 #define CEPH_MSG_CLIENT_LEASE           0x311
 #define CEPH_MSG_CLIENT_SNAP            0x312
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] ceph: send client provided metric flags in client metadata
  2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli
  2020-06-16 12:52 ` [PATCH 1/2] " xiubli
@ 2020-06-16 12:52 ` xiubli
  2020-06-16 14:11   ` Jeff Layton
  1 sibling, 1 reply; 9+ messages in thread
From: xiubli @ 2020-06-16 12:52 UTC (permalink / raw)
  To: jlayton, idryomov; +Cc: zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Will send the metric flags to MDS, currently it supports the cap,
read latency, write latency and metadata latency.

URL: https://tracker.ceph.com/issues/43435
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 fs/ceph/metric.h     | 13 +++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f996363..8b7ff41 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end)
 	}
 }
 
+static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
+#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
+static void encode_metric_spec(void **p, void *end)
+{
+	static const size_t count = ARRAY_SIZE(metric_bits);
+
+	/* header */
+	BUG_ON(*p + 2 > end);
+	ceph_encode_8(p, 1); /* version */
+	ceph_encode_8(p, 1); /* compat */
+
+	if (count > 0) {
+		size_t i;
+		size_t size = METRIC_BYTES(count);
+
+		BUG_ON(*p + 4 + 4 + size > end);
+
+		/* metric spec info length */
+		ceph_encode_32(p, 4 + size);
+
+		/* metric spec */
+		ceph_encode_32(p, size);
+		memset(*p, 0, size);
+		for (i = 0; i < count; i++)
+			((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
+		*p += size;
+	} else {
+		BUG_ON(*p + 4 + 4 > end);
+		/* metric spec info length */
+		ceph_encode_32(p, 4);
+		/* metric spec */
+		ceph_encode_32(p, 0);
+	}
+}
+
 /*
  * session message, specialization for CEPH_SESSION_REQUEST_OPEN
  * to include additional client metadata fields.
@@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 		size = FEATURE_BYTES(count);
 	extra_bytes += 4 + size;
 
+	/* metric spec */
+	size = 0;
+	count = ARRAY_SIZE(metric_bits);
+	if (count > 0)
+		size = METRIC_BYTES(count);
+	extra_bytes += 2 + 4 + 4 + size;
+
 	/* Allocate the message */
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
 			   GFP_NOFS, false);
@@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	 * Serialize client metadata into waiting buffer space, using
 	 * the format that userspace expects for map<string, string>
 	 *
-	 * ClientSession messages with metadata are v3
+	 * ClientSession messages with metadata are v4
 	 */
-	msg->hdr.version = cpu_to_le16(3);
+	msg->hdr.version = cpu_to_le16(4);
 	msg->hdr.compat_version = cpu_to_le16(1);
 
 	/* The write pointer, following the session_head structure */
@@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	}
 
 	encode_supported_features(&p, end);
+	encode_metric_spec(&p, end);
 	msg->front.iov_len = p - msg->front.iov_base;
 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 2af9e0b..f34adf7 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -18,6 +18,19 @@ enum ceph_metric_type {
 	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
 };
 
+/*
+ * This will always have the highest metric bit value
+ * as the last element of the array.
+ */
+#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED {	\
+	CLIENT_METRIC_TYPE_CAP_INFO,		\
+	CLIENT_METRIC_TYPE_READ_LATENCY,	\
+	CLIENT_METRIC_TYPE_WRITE_LATENCY,	\
+	CLIENT_METRIC_TYPE_METADATA_LATENCY,	\
+						\
+	CLIENT_METRIC_TYPE_MAX,			\
+}
+
 /* metric caps header */
 struct ceph_metric_cap {
 	__le32 type;     /* ceph metric type */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
  2020-06-16 12:52 ` [PATCH 1/2] " xiubli
@ 2020-06-16 13:40   ` Jeff Layton
  2020-06-16 18:14     ` Xiubo Li
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2020-06-16 13:40 UTC (permalink / raw)
  To: xiubli, idryomov; +Cc: zyan, pdonnell, ceph-devel

On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> This will send the caps/read/write/metadata metrics to any available
> MDS only once per second as default, which will be the same as the
> userland client, or every metric_send_interval seconds, which is a
> module parameter.
> 
> URL: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c         |  46 +++++++++------
>  fs/ceph/mds_client.h         |   4 ++
>  fs/ceph/metric.c             | 133 +++++++++++++++++++++++++++++++++++++++++++
>  fs/ceph/metric.h             |  78 +++++++++++++++++++++++++
>  fs/ceph/super.c              |  29 ++++++++++
>  include/linux/ceph/ceph_fs.h |   1 +
>  6 files changed, 274 insertions(+), 17 deletions(-)
> 

Long patch! Might not hurt to break out some of the cleanups into
separate patches, but they're fairly straighforward so I won't require
it.

[...]

>  /* This is the global metrics */
>  struct ceph_client_metric {
>  	atomic64_t            total_dentries;
> @@ -35,8 +100,21 @@ struct ceph_client_metric {
>  	ktime_t metadata_latency_sq_sum;
>  	ktime_t metadata_latency_min;
>  	ktime_t metadata_latency_max;
> +
> +	struct delayed_work delayed_work;  /* delayed work */
>  };
>  
> +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
> +{
> +	/* per second as default */
> +	unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
> +
> +	if (!metric_send_interval)
> +		return;
> +
> +	schedule_delayed_work(&m->delayed_work, hz);
> +}
> +

Should this also be gated on a MDS feature bit or anything? What happens
if we're running against a really old MDS that doesn't support these
stats? Does it just drop them on the floor? Should we disable sending
them in that case?

>  extern int ceph_metric_init(struct ceph_client_metric *m);
>  extern void ceph_metric_destroy(struct ceph_client_metric *m);
>  
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index c9784eb1..66a940c 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
>  	destroy_caches();
>  }
>  
> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
> +{
> +	int ret;
> +	unsigned int interval;
> +
> +	ret = kstrtouint(val, 0, &interval);
> +	if (ret < 0) {
> +		pr_err("Failed to parse metric interval '%s'\n", val);
> +		return ret;
> +	}
> +
> +	if (interval > 5 || interval < 1) {
> +		pr_err("Invalid metric interval %u\n", interval);
> +		return -EINVAL;
> +	}
> +
> +	metric_send_interval = interval;
> +	return 0;
> +}
> +
> +static const struct kernel_param_ops param_ops_metric_interval = {
> +	.set = param_set_metric_interval,
> +	.get = param_get_uint,
> +};
> +
> +unsigned int metric_send_interval = 1;
> +module_param_cb(metric_send_interval, &param_ops_metric_interval, &metric_send_interval, 0644);
> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
> +

Aren't valid values 0-5, with 0 disabling this feature? That's
what metric_schedule_delayed() seems to indicate...

>  module_init(init_ceph);
>  module_exit(exit_ceph);
>  
> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
> index ebf5ba6..455e9b9 100644
> --- a/include/linux/ceph/ceph_fs.h
> +++ b/include/linux/ceph/ceph_fs.h
> @@ -130,6 +130,7 @@ struct ceph_dir_layout {
>  #define CEPH_MSG_CLIENT_REQUEST         24
>  #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
>  #define CEPH_MSG_CLIENT_REPLY           26
> +#define CEPH_MSG_CLIENT_METRICS         29
>  #define CEPH_MSG_CLIENT_CAPS            0x310
>  #define CEPH_MSG_CLIENT_LEASE           0x311
>  #define CEPH_MSG_CLIENT_SNAP            0x312

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] ceph: send client provided metric flags in client metadata
  2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
@ 2020-06-16 14:11   ` Jeff Layton
  2020-06-16 14:18     ` Xiubo Li
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2020-06-16 14:11 UTC (permalink / raw)
  To: xiubli, idryomov; +Cc: zyan, pdonnell, ceph-devel

On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Will send the metric flags to MDS, currently it supports the cap,
> read latency, write latency and metadata latency.
> 
> URL: https://tracker.ceph.com/issues/43435
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
>  fs/ceph/metric.h     | 13 +++++++++++++
>  2 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index f996363..8b7ff41 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end)
>  	}
>  }
>  
> +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
> +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
> +static void encode_metric_spec(void **p, void *end)
> +{
> +	static const size_t count = ARRAY_SIZE(metric_bits);
> +
> +	/* header */
> +	BUG_ON(*p + 2 > end);
> +	ceph_encode_8(p, 1); /* version */
> +	ceph_encode_8(p, 1); /* compat */
> +
> +	if (count > 0) {
> +		size_t i;
> +		size_t size = METRIC_BYTES(count);
> +
> +		BUG_ON(*p + 4 + 4 + size > end);
> +

I know it's unlikely to ever trip, but let's not BUG_ON here. Maybe just
WARN and return an error...

Of course, we'd probably want that error to bubble up to the callers of
__open_session() but none of its callers check the return value. Would
you mind fixing that while you're in there too?


> +		/* metric spec info length */
> +		ceph_encode_32(p, 4 + size);
> +
> +		/* metric spec */
> +		ceph_encode_32(p, size);
> +		memset(*p, 0, size);
> +		for (i = 0; i < count; i++)
> +			((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
> +		*p += size;
> +	} else {
> +		BUG_ON(*p + 4 + 4 > end);
> +		/* metric spec info length */
> +		ceph_encode_32(p, 4);
> +		/* metric spec */
> +		ceph_encode_32(p, 0);
> +	}
> +}
> +
>  /*
>   * session message, specialization for CEPH_SESSION_REQUEST_OPEN
>   * to include additional client metadata fields.
> @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>  		size = FEATURE_BYTES(count);
>  	extra_bytes += 4 + size;
>  
> +	/* metric spec */
> +	size = 0;
> +	count = ARRAY_SIZE(metric_bits);
> +	if (count > 0)
> +		size = METRIC_BYTES(count);
> +	extra_bytes += 2 + 4 + 4 + size;
> +
>  	/* Allocate the message */
>  	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
>  			   GFP_NOFS, false);
> @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>  	 * Serialize client metadata into waiting buffer space, using
>  	 * the format that userspace expects for map<string, string>
>  	 *
> -	 * ClientSession messages with metadata are v3
> +	 * ClientSession messages with metadata are v4
>  	 */
> -	msg->hdr.version = cpu_to_le16(3);
> +	msg->hdr.version = cpu_to_le16(4);
>  	msg->hdr.compat_version = cpu_to_le16(1);
>  
>  	/* The write pointer, following the session_head structure */
> @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>  	}
>  
>  	encode_supported_features(&p, end);
> +	encode_metric_spec(&p, end);
>  	msg->front.iov_len = p - msg->front.iov_base;
>  	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
>  
> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
> index 2af9e0b..f34adf7 100644
> --- a/fs/ceph/metric.h
> +++ b/fs/ceph/metric.h
> @@ -18,6 +18,19 @@ enum ceph_metric_type {
>  	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
>  };
>  
> +/*
> + * This will always have the highest metric bit value
> + * as the last element of the array.
> + */
> +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED {	\
> +	CLIENT_METRIC_TYPE_CAP_INFO,		\
> +	CLIENT_METRIC_TYPE_READ_LATENCY,	\
> +	CLIENT_METRIC_TYPE_WRITE_LATENCY,	\
> +	CLIENT_METRIC_TYPE_METADATA_LATENCY,	\
> +						\
> +	CLIENT_METRIC_TYPE_MAX,			\
> +}
> +
>  /* metric caps header */
>  struct ceph_metric_cap {
>  	__le32 type;     /* ceph metric type */

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] ceph: send client provided metric flags in client metadata
  2020-06-16 14:11   ` Jeff Layton
@ 2020-06-16 14:18     ` Xiubo Li
  0 siblings, 0 replies; 9+ messages in thread
From: Xiubo Li @ 2020-06-16 14:18 UTC (permalink / raw)
  To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel

On 2020/6/16 22:11, Jeff Layton wrote:
> On Tue, 2020-06-16 at 08:52 -0400, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Will send the metric flags to MDS, currently it supports the cap,
>> read latency, write latency and metadata latency.
>>
>> URL: https://tracker.ceph.com/issues/43435
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
>>   fs/ceph/metric.h     | 13 +++++++++++++
>>   2 files changed, 58 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index f996363..8b7ff41 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1188,6 +1188,41 @@ static void encode_supported_features(void **p, void *end)
>>   	}
>>   }
>>   
>> +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
>> +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
>> +static void encode_metric_spec(void **p, void *end)
>> +{
>> +	static const size_t count = ARRAY_SIZE(metric_bits);
>> +
>> +	/* header */
>> +	BUG_ON(*p + 2 > end);
>> +	ceph_encode_8(p, 1); /* version */
>> +	ceph_encode_8(p, 1); /* compat */
>> +
>> +	if (count > 0) {
>> +		size_t i;
>> +		size_t size = METRIC_BYTES(count);
>> +
>> +		BUG_ON(*p + 4 + 4 + size > end);
>> +
> I know it's unlikely to ever trip, but let's not BUG_ON here. Maybe just
> WARN and return an error...
>
> Of course, we'd probably want that error to bubble up to the callers of
> __open_session() but none of its callers check the return value. Would
> you mind fixing that while you're in there too?

Sure, this makes sense and the checkpatch.pl also suggests to use WARN, 
I will fix them all later.

Thanks
BRs
Xiubo


>
>> +		/* metric spec info length */
>> +		ceph_encode_32(p, 4 + size);
>> +
>> +		/* metric spec */
>> +		ceph_encode_32(p, size);
>> +		memset(*p, 0, size);
>> +		for (i = 0; i < count; i++)
>> +			((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
>> +		*p += size;
>> +	} else {
>> +		BUG_ON(*p + 4 + 4 > end);
>> +		/* metric spec info length */
>> +		ceph_encode_32(p, 4);
>> +		/* metric spec */
>> +		ceph_encode_32(p, 0);
>> +	}
>> +}
>> +
>>   /*
>>    * session message, specialization for CEPH_SESSION_REQUEST_OPEN
>>    * to include additional client metadata fields.
>> @@ -1227,6 +1262,13 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>>   		size = FEATURE_BYTES(count);
>>   	extra_bytes += 4 + size;
>>   
>> +	/* metric spec */
>> +	size = 0;
>> +	count = ARRAY_SIZE(metric_bits);
>> +	if (count > 0)
>> +		size = METRIC_BYTES(count);
>> +	extra_bytes += 2 + 4 + 4 + size;
>> +
>>   	/* Allocate the message */
>>   	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
>>   			   GFP_NOFS, false);
>> @@ -1245,9 +1287,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>>   	 * Serialize client metadata into waiting buffer space, using
>>   	 * the format that userspace expects for map<string, string>
>>   	 *
>> -	 * ClientSession messages with metadata are v3
>> +	 * ClientSession messages with metadata are v4
>>   	 */
>> -	msg->hdr.version = cpu_to_le16(3);
>> +	msg->hdr.version = cpu_to_le16(4);
>>   	msg->hdr.compat_version = cpu_to_le16(1);
>>   
>>   	/* The write pointer, following the session_head structure */
>> @@ -1270,6 +1312,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>>   	}
>>   
>>   	encode_supported_features(&p, end);
>> +	encode_metric_spec(&p, end);
>>   	msg->front.iov_len = p - msg->front.iov_base;
>>   	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
>>   
>> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
>> index 2af9e0b..f34adf7 100644
>> --- a/fs/ceph/metric.h
>> +++ b/fs/ceph/metric.h
>> @@ -18,6 +18,19 @@ enum ceph_metric_type {
>>   	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
>>   };
>>   
>> +/*
>> + * This will always have the highest metric bit value
>> + * as the last element of the array.
>> + */
>> +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED {	\
>> +	CLIENT_METRIC_TYPE_CAP_INFO,		\
>> +	CLIENT_METRIC_TYPE_READ_LATENCY,	\
>> +	CLIENT_METRIC_TYPE_WRITE_LATENCY,	\
>> +	CLIENT_METRIC_TYPE_METADATA_LATENCY,	\
>> +						\
>> +	CLIENT_METRIC_TYPE_MAX,			\
>> +}
>> +
>>   /* metric caps header */
>>   struct ceph_metric_cap {
>>   	__le32 type;     /* ceph metric type */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
  2020-06-16 13:40   ` Jeff Layton
@ 2020-06-16 18:14     ` Xiubo Li
  2020-06-16 18:58       ` Jeff Layton
  0 siblings, 1 reply; 9+ messages in thread
From: Xiubo Li @ 2020-06-16 18:14 UTC (permalink / raw)
  To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel

On 2020/6/16 21:40, Jeff Layton wrote:
> On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com  wrote:
>> From: Xiubo Li<xiubli@redhat.com>
>>
>> This will send the caps/read/write/metadata metrics to any available
>> MDS only once per second as default, which will be the same as the
>> userland client, or every metric_send_interval seconds, which is a
>> module parameter.
>>
>> URL:https://tracker.ceph.com/issues/43215
>> Signed-off-by: Xiubo Li<xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c         |  46 +++++++++------
>>   fs/ceph/mds_client.h         |   4 ++
>>   fs/ceph/metric.c             | 133 +++++++++++++++++++++++++++++++++++++++++++
>>   fs/ceph/metric.h             |  78 +++++++++++++++++++++++++
>>   fs/ceph/super.c              |  29 ++++++++++
>>   include/linux/ceph/ceph_fs.h |   1 +
>>   6 files changed, 274 insertions(+), 17 deletions(-)
>>
> Long patch! Might not hurt to break out some of the cleanups into
> separate patches, but they're fairly straighforward so I won't require
> it.

Sure, I will split the patch into small ones if possible.


> [...]
>
>>   /* This is the global metrics */
>>   struct ceph_client_metric {
>>   	atomic64_t            total_dentries;
>> @@ -35,8 +100,21 @@ struct ceph_client_metric {
>>   	ktime_t metadata_latency_sq_sum;
>>   	ktime_t metadata_latency_min;
>>   	ktime_t metadata_latency_max;
>> +
>> +	struct delayed_work delayed_work;  /* delayed work */
>>   };
>>   
>> +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
>> +{
>> +	/* per second as default */
>> +	unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
>> +
>> +	if (!metric_send_interval)
>> +		return;
>> +
>> +	schedule_delayed_work(&m->delayed_work, hz);
>> +}
>> +
> Should this also be gated on a MDS feature bit or anything? What happens
> if we're running against a really old MDS that doesn't support these
> stats? Does it just drop them on the floor? Should we disable sending
> them in that case?

Tested without metric code in the ceph and when ceph saw a unknown type 
message, it will close the socket connection directly.


>>   extern int ceph_metric_init(struct ceph_client_metric *m);
>>   extern void ceph_metric_destroy(struct ceph_client_metric *m);
>>   
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index c9784eb1..66a940c 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
>>   	destroy_caches();
>>   }
>>   
>> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
>> +{
>> +	int ret;
>> +	unsigned int interval;
>> +
>> +	ret = kstrtouint(val, 0, &interval);
>> +	if (ret < 0) {
>> +		pr_err("Failed to parse metric interval '%s'\n", val);
>> +		return ret;
>> +	}
>> +
>> +	if (interval > 5 || interval < 1) {
>> +		pr_err("Invalid metric interval %u\n", interval);
>> +		return -EINVAL;
>> +	}
>> +
>> +	metric_send_interval = interval;
>> +	return 0;
>> +}
>> +
>> +static const struct kernel_param_ops param_ops_metric_interval = {
>> +	.set = param_set_metric_interval,
>> +	.get = param_get_uint,
>> +};
>> +
>> +unsigned int metric_send_interval = 1;
>> +module_param_cb(metric_send_interval, &param_ops_metric_interval, &metric_send_interval, 0644);
>> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
>> +
> Aren't valid values 0-5, with 0 disabling this feature? That's
> what metric_schedule_delayed() seems to indicate...

What value should it be as default ? 0 to disable it or 1 ?

Maybe in future we should let the ceph side send a notification/request 
to all the clients if it wants to collect the metrics and then we could 
enable it here, and defautly just disable it.


>>   module_init(init_ceph);
>>   module_exit(exit_ceph);
>>   
>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
>> index ebf5ba6..455e9b9 100644
>> --- a/include/linux/ceph/ceph_fs.h
>> +++ b/include/linux/ceph/ceph_fs.h
>> @@ -130,6 +130,7 @@ struct ceph_dir_layout {
>>   #define CEPH_MSG_CLIENT_REQUEST         24
>>   #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
>>   #define CEPH_MSG_CLIENT_REPLY           26
>> +#define CEPH_MSG_CLIENT_METRICS         29
>>   #define CEPH_MSG_CLIENT_CAPS            0x310
>>   #define CEPH_MSG_CLIENT_LEASE           0x311
>>   #define CEPH_MSG_CLIENT_SNAP            0x312

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
  2020-06-16 18:14     ` Xiubo Li
@ 2020-06-16 18:58       ` Jeff Layton
  2020-06-17  0:28         ` Xiubo Li
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2020-06-16 18:58 UTC (permalink / raw)
  To: Xiubo Li, idryomov; +Cc: zyan, pdonnell, ceph-devel

On Wed, 2020-06-17 at 02:14 +0800, Xiubo Li wrote:
> On 2020/6/16 21:40, Jeff Layton wrote:
> > On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com  wrote:
> > > From: Xiubo Li<xiubli@redhat.com>
> > > 
> > > This will send the caps/read/write/metadata metrics to any available
> > > MDS only once per second as default, which will be the same as the
> > > userland client, or every metric_send_interval seconds, which is a
> > > module parameter.
> > > 
> > > URL:https://tracker.ceph.com/issues/43215
> > > Signed-off-by: Xiubo Li<xiubli@redhat.com>
> > > ---
> > >   fs/ceph/mds_client.c         |  46 +++++++++------
> > >   fs/ceph/mds_client.h         |   4 ++
> > >   fs/ceph/metric.c             | 133 +++++++++++++++++++++++++++++++++++++++++++
> > >   fs/ceph/metric.h             |  78 +++++++++++++++++++++++++
> > >   fs/ceph/super.c              |  29 ++++++++++
> > >   include/linux/ceph/ceph_fs.h |   1 +
> > >   6 files changed, 274 insertions(+), 17 deletions(-)
> > > 
> > Long patch! Might not hurt to break out some of the cleanups into
> > separate patches, but they're fairly straighforward so I won't require
> > it.
> 
> Sure, I will split the patch into small ones if possible.
> 
> 
> > [...]
> > 
> > >   /* This is the global metrics */
> > >   struct ceph_client_metric {
> > >   	atomic64_t            total_dentries;
> > > @@ -35,8 +100,21 @@ struct ceph_client_metric {
> > >   	ktime_t metadata_latency_sq_sum;
> > >   	ktime_t metadata_latency_min;
> > >   	ktime_t metadata_latency_max;
> > > +
> > > +	struct delayed_work delayed_work;  /* delayed work */
> > >   };
> > >   
> > > +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
> > > +{
> > > +	/* per second as default */
> > > +	unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
> > > +
> > > +	if (!metric_send_interval)
> > > +		return;
> > > +
> > > +	schedule_delayed_work(&m->delayed_work, hz);
> > > +}
> > > +
> > Should this also be gated on a MDS feature bit or anything? What happens
> > if we're running against a really old MDS that doesn't support these
> > stats? Does it just drop them on the floor? Should we disable sending
> > them in that case?
> 
> Tested without metric code in the ceph and when ceph saw a unknown type 
> message, it will close the socket connection directly.
> 

Ouch, that sounds bad. How does the userland client handle this? This
seems like the kind of thing we usually add a feature bit for
somewhere...

> 
> > >   extern int ceph_metric_init(struct ceph_client_metric *m);
> > >   extern void ceph_metric_destroy(struct ceph_client_metric *m);
> > >   
> > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > > index c9784eb1..66a940c 100644
> > > --- a/fs/ceph/super.c
> > > +++ b/fs/ceph/super.c
> > > @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
> > >   	destroy_caches();
> > >   }
> > >   
> > > +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
> > > +{
> > > +	int ret;
> > > +	unsigned int interval;
> > > +
> > > +	ret = kstrtouint(val, 0, &interval);
> > > +	if (ret < 0) {
> > > +		pr_err("Failed to parse metric interval '%s'\n", val);
> > > +		return ret;
> > > +	}
> > > +
> > > +	if (interval > 5 || interval < 1) {
> > > +		pr_err("Invalid metric interval %u\n", interval);
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	metric_send_interval = interval;
> > > +	return 0;
> > > +}
> > > +
> > > +static const struct kernel_param_ops param_ops_metric_interval = {
> > > +	.set = param_set_metric_interval,
> > > +	.get = param_get_uint,
> > > +};
> > > +
> > > +unsigned int metric_send_interval = 1;
> > > +module_param_cb(metric_send_interval, &param_ops_metric_interval, &metric_send_interval, 0644);
> > > +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
> > > +
> > Aren't valid values 0-5, with 0 disabling this feature? That's
> > what metric_schedule_delayed() seems to indicate...
> 
> What value should it be as default ? 0 to disable it or 1 ?
> 

I don't have strong preference here. It's probably safe enough to make
it 1. I do like the ability to turn this off though, as that gives us a
workaround in the event that it does cause trouble.

> Maybe in future we should let the ceph side send a notification/request 
> to all the clients if it wants to collect the metrics and then we could 
> enable it here, and defautly just disable it.
> 

That seems like a heavier-weight solution than is called for here. This
seems like something that ought to have a feature bit. Then if that's
turned off, we can just disable this altogether.

> 
> > >   module_init(init_ceph);
> > >   module_exit(exit_ceph);
> > >   
> > > diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
> > > index ebf5ba6..455e9b9 100644
> > > --- a/include/linux/ceph/ceph_fs.h
> > > +++ b/include/linux/ceph/ceph_fs.h
> > > @@ -130,6 +130,7 @@ struct ceph_dir_layout {
> > >   #define CEPH_MSG_CLIENT_REQUEST         24
> > >   #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
> > >   #define CEPH_MSG_CLIENT_REPLY           26
> > > +#define CEPH_MSG_CLIENT_METRICS         29
> > >   #define CEPH_MSG_CLIENT_CAPS            0x310
> > >   #define CEPH_MSG_CLIENT_LEASE           0x311
> > >   #define CEPH_MSG_CLIENT_SNAP            0x312
> 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] ceph: periodically send perf metrics to ceph
  2020-06-16 18:58       ` Jeff Layton
@ 2020-06-17  0:28         ` Xiubo Li
  0 siblings, 0 replies; 9+ messages in thread
From: Xiubo Li @ 2020-06-17  0:28 UTC (permalink / raw)
  To: Jeff Layton, idryomov; +Cc: zyan, pdonnell, ceph-devel

On 2020/6/17 2:58, Jeff Layton wrote:
> On Wed, 2020-06-17 at 02:14 +0800, Xiubo Li wrote:
>> On 2020/6/16 21:40, Jeff Layton wrote:
>>> On Tue, 2020-06-16 at 08:52 -0400,xiubli@redhat.com  wrote:
>>>> From: Xiubo Li<xiubli@redhat.com>
>>>>
>>>> This will send the caps/read/write/metadata metrics to any available
>>>> MDS only once per second as default, which will be the same as the
>>>> userland client, or every metric_send_interval seconds, which is a
>>>> module parameter.
>>>>
>>>> URL:https://tracker.ceph.com/issues/43215
>>>> Signed-off-by: Xiubo Li<xiubli@redhat.com>
>>>> ---
>>>>    fs/ceph/mds_client.c         |  46 +++++++++------
>>>>    fs/ceph/mds_client.h         |   4 ++
>>>>    fs/ceph/metric.c             | 133 +++++++++++++++++++++++++++++++++++++++++++
>>>>    fs/ceph/metric.h             |  78 +++++++++++++++++++++++++
>>>>    fs/ceph/super.c              |  29 ++++++++++
>>>>    include/linux/ceph/ceph_fs.h |   1 +
>>>>    6 files changed, 274 insertions(+), 17 deletions(-)
>>>>
>>> Long patch! Might not hurt to break out some of the cleanups into
>>> separate patches, but they're fairly straighforward so I won't require
>>> it.
>> Sure, I will split the patch into small ones if possible.
>>
>>
>>> [...]
>>>
>>>>    /* This is the global metrics */
>>>>    struct ceph_client_metric {
>>>>    	atomic64_t            total_dentries;
>>>> @@ -35,8 +100,21 @@ struct ceph_client_metric {
>>>>    	ktime_t metadata_latency_sq_sum;
>>>>    	ktime_t metadata_latency_min;
>>>>    	ktime_t metadata_latency_max;
>>>> +
>>>> +	struct delayed_work delayed_work;  /* delayed work */
>>>>    };
>>>>    
>>>> +static inline void metric_schedule_delayed(struct ceph_client_metric *m)
>>>> +{
>>>> +	/* per second as default */
>>>> +	unsigned int hz = round_jiffies_relative(HZ * metric_send_interval);
>>>> +
>>>> +	if (!metric_send_interval)
>>>> +		return;
>>>> +
>>>> +	schedule_delayed_work(&m->delayed_work, hz);
>>>> +}
>>>> +
>>> Should this also be gated on a MDS feature bit or anything? What happens
>>> if we're running against a really old MDS that doesn't support these
>>> stats? Does it just drop them on the floor? Should we disable sending
>>> them in that case?
>> Tested without metric code in the ceph and when ceph saw a unknown type
>> message, it will close the socket connection directly.
>>
> Ouch, that sounds bad. How does the userland client handle this? This
> seems like the kind of thing we usually add a feature bit for
> somewhere...

The userland doesn't handle this case too, I will try to add a feature 
bit or something to fix it.


>>>>    extern int ceph_metric_init(struct ceph_client_metric *m);
>>>>    extern void ceph_metric_destroy(struct ceph_client_metric *m);
>>>>    
>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>>> index c9784eb1..66a940c 100644
>>>> --- a/fs/ceph/super.c
>>>> +++ b/fs/ceph/super.c
>>>> @@ -1282,6 +1282,35 @@ static void __exit exit_ceph(void)
>>>>    	destroy_caches();
>>>>    }
>>>>    
>>>> +static int param_set_metric_interval(const char *val, const struct kernel_param *kp)
>>>> +{
>>>> +	int ret;
>>>> +	unsigned int interval;
>>>> +
>>>> +	ret = kstrtouint(val, 0, &interval);
>>>> +	if (ret < 0) {
>>>> +		pr_err("Failed to parse metric interval '%s'\n", val);
>>>> +		return ret;
>>>> +	}
>>>> +
>>>> +	if (interval > 5 || interval < 1) {
>>>> +		pr_err("Invalid metric interval %u\n", interval);
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	metric_send_interval = interval;
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static const struct kernel_param_ops param_ops_metric_interval = {
>>>> +	.set = param_set_metric_interval,
>>>> +	.get = param_get_uint,
>>>> +};
>>>> +
>>>> +unsigned int metric_send_interval = 1;
>>>> +module_param_cb(metric_send_interval, &param_ops_metric_interval, &metric_send_interval, 0644);
>>>> +MODULE_PARM_DESC(metric_send_interval, "Interval (in seconds) of sending perf metric to ceph cluster, valid values are 1~5 (default: 1)");
>>>> +
>>> Aren't valid values 0-5, with 0 disabling this feature? That's
>>> what metric_schedule_delayed() seems to indicate...
>> What value should it be as default ? 0 to disable it or 1 ?
>>
> I don't have strong preference here. It's probably safe enough to make
> it 1. I do like the ability to turn this off though, as that gives us a
> workaround in the event that it does cause trouble.

Sure, I will add it back.


>
>> Maybe in future we should let the ceph side send a notification/request
>> to all the clients if it wants to collect the metrics and then we could
>> enable it here, and defautly just disable it.
>>
> That seems like a heavier-weight solution than is called for here. This
> seems like something that ought to have a feature bit. Then if that's
> turned off, we can just disable this altogether.

Okay, sounds better.

Thanks


>>>>    module_init(init_ceph);
>>>>    module_exit(exit_ceph);
>>>>    
>>>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
>>>> index ebf5ba6..455e9b9 100644
>>>> --- a/include/linux/ceph/ceph_fs.h
>>>> +++ b/include/linux/ceph/ceph_fs.h
>>>> @@ -130,6 +130,7 @@ struct ceph_dir_layout {
>>>>    #define CEPH_MSG_CLIENT_REQUEST         24
>>>>    #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
>>>>    #define CEPH_MSG_CLIENT_REPLY           26
>>>> +#define CEPH_MSG_CLIENT_METRICS         29
>>>>    #define CEPH_MSG_CLIENT_CAPS            0x310
>>>>    #define CEPH_MSG_CLIENT_LEASE           0x311
>>>>    #define CEPH_MSG_CLIENT_SNAP            0x312
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-06-17  0:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-16 12:52 [PATCH 0/2] ceph: periodically send perf metrics to ceph xiubli
2020-06-16 12:52 ` [PATCH 1/2] " xiubli
2020-06-16 13:40   ` Jeff Layton
2020-06-16 18:14     ` Xiubo Li
2020-06-16 18:58       ` Jeff Layton
2020-06-17  0:28         ` Xiubo Li
2020-06-16 12:52 ` [PATCH 2/2] ceph: send client provided metric flags in client metadata xiubli
2020-06-16 14:11   ` Jeff Layton
2020-06-16 14:18     ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.