[PATCH 0/3] ceph: only send the metrices supported by the MDS for old cephs

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] ceph: only send the metrices supported by the MDS for old cephs
@ 2022-03-31  6:52 xiubli
  2022-03-31  6:52 ` [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits xiubli
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: xiubli @ 2022-03-31  6:52 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

This will fix the issue in [1], and only send metrics to MDS that
supported, and for Quincy or higher ceph versions since it's safe
will force to send all the metrics. This could make sure the early
Quincy ceph versions still could get the metric which haven't backported
[2], which will fill the metric bits supported by MDS when opening
the sessions, no need to enable the force_ignore_metric_bits module
parameter.

[1]: https://tracker.ceph.com/issues/54411
[2]: https://github.com/ceph/ceph/pull/45370

Xiubo Li (3):
  ceph: add the Octopus,Pacific,Quency feature bits
  ceph: only send the metrices supported by the MDS for old cephs
  ceph: add force_ignore_metric_bits module parameter support

 fs/ceph/mds_client.c |  19 +++-
 fs/ceph/mds_client.h |  10 ++-
 fs/ceph/metric.c     | 207 ++++++++++++++++++++++++-------------------
 fs/ceph/metric.h     |   1 +
 fs/ceph/super.c      |   6 +-
 5 files changed, 144 insertions(+), 99 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits
  2022-03-31  6:52 [PATCH 0/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
@ 2022-03-31  6:52 ` xiubli
  2022-03-31 12:15   ` Jeff Layton
  2022-03-31  6:52 ` [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
  2022-03-31  6:52 ` [PATCH 3/3] ceph: add force_ignore_metric_bits module parameter support xiubli
  2 siblings, 1 reply; 11+ messages in thread
From: xiubli @ 2022-03-31  6:52 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

URL: https://tracker.ceph.com/issues/54411
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 33497846e47e..32107c26f50d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -27,10 +27,13 @@ enum ceph_feature_type {
 	CEPHFS_FEATURE_RECLAIM_CLIENT,
 	CEPHFS_FEATURE_LAZY_CAP_WANTED,
 	CEPHFS_FEATURE_MULTI_RECONNECT,
-	CEPHFS_FEATURE_DELEG_INO,
-	CEPHFS_FEATURE_METRIC_COLLECT,
+	CEPHFS_FEATURE_OCTOPUS,
+	CEPHFS_FEATURE_DELEG_INO = CEPHFS_FEATURE_OCTOPUS,
+	CEPHFS_FEATURE_PACIFIC,
+	CEPHFS_FEATURE_METRIC_COLLECT = CEPHFS_FEATURE_PACIFIC,
+	CEPHFS_FEATURE_QUINCY,
 
-	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_QUINCY,
 };
 
 /*
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-03-31  6:52 [PATCH 0/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
  2022-03-31  6:52 ` [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits xiubli
@ 2022-03-31  6:52 ` xiubli
  2022-03-31 12:11   ` Jeff Layton
  2022-03-31  6:52 ` [PATCH 3/3] ceph: add force_ignore_metric_bits module parameter support xiubli
  2 siblings, 1 reply; 11+ messages in thread
From: xiubli @ 2022-03-31  6:52 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

For some old ceph versions when receives unknown metrics it will
abort the MDS daemons. This will only send the metrics which are
supported by MDSes.

Defautly the MDS won't fill the s_metrics in the MClientSession
reply message, so with this patch will only force sending the
metrics to MDS since Quincy version, which is safe to receive
unknown metrics.

Next we will add one module option to force enable sending the
metrics if users think that is safe.

URL: https://tracker.ceph.com/issues/54411
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c |  19 +++-
 fs/ceph/mds_client.h |   1 +
 fs/ceph/metric.c     | 206 ++++++++++++++++++++++++-------------------
 3 files changed, 131 insertions(+), 95 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f476c65fb985..65980ce97620 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3422,7 +3422,7 @@ static void handle_session(struct ceph_mds_session *session,
 	void *end = p + msg->front.iov_len;
 	struct ceph_mds_session_head *h;
 	u32 op;
-	u64 seq, features = 0;
+	u64 seq, features = 0, metrics = 0;
 	int wake = 0;
 	bool blocklisted = false;
 
@@ -3452,11 +3452,21 @@ static void handle_session(struct ceph_mds_session *session,
 		}
 	}
 
+	/* version >= 4, metric bits */
+	if (msg_version >= 4) {
+		u32 len;
+		/* struct_v, struct_compat, and len */
+		ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
+		ceph_decode_32_safe(&p, end, len, bad);
+		if (len) {
+			ceph_decode_64_safe(&p, end, metrics, bad);
+			p += len - sizeof(metrics);
+		}
+	}
+
+	/* version >= 5, flags   */
 	if (msg_version >= 5) {
 		u32 flags;
-		/* version >= 4, struct_v, struct_cv, len, metric_spec */
-	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
-		/* version >= 5, flags   */
                 ceph_decode_32_safe(&p, end, flags, bad);
 		if (flags & CEPH_SESSION_BLOCKLISTED) {
 		        pr_warn("mds%d session blocklisted\n", session->s_mds);
@@ -3490,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
 			pr_info("mds%d reconnect success\n", session->s_mds);
 		session->s_state = CEPH_MDS_SESSION_OPEN;
 		session->s_features = features;
+		session->s_metrics = metrics;
 		renewed_caps(mdsc, session, 0);
 		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
 			metric_schedule_delayed(&mdsc->metric);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 32107c26f50d..0f2061f5388d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -188,6 +188,7 @@ struct ceph_mds_session {
 	int               s_state;
 	unsigned long     s_ttl;      /* time until mds kills us */
 	unsigned long	  s_features;
+	unsigned long	  s_metrics;
 	u64               s_seq;      /* incoming msg seq # */
 	struct mutex      s_mutex;    /* serialize session messages */
 
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index c47347d2e84e..f01c1f4e6b89 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	struct ceph_client_metric *m = &mdsc->metric;
 	u64 nr_caps = atomic64_read(&m->total_caps);
 	u32 header_len = sizeof(struct ceph_metric_header);
+	bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);
 	struct ceph_msg *msg;
 	s64 sum;
 	s32 items = 0;
@@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	head = msg->front.iov_base;
 
 	/* encode the cap metric */
-	cap = (struct ceph_metric_cap *)(head + 1);
-	cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
-	cap->header.ver = 1;
-	cap->header.compat = 1;
-	cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
-	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
-	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
-	cap->total = cpu_to_le64(nr_caps);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
+		cap = (struct ceph_metric_cap *)(head + 1);
+		cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+		cap->header.ver = 1;
+		cap->header.compat = 1;
+		cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
+		cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
+		cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
+		cap->total = cpu_to_le64(nr_caps);
+		items++;
+	}
 
 	/* encode the read latency metric */
-	read = (struct ceph_metric_read_latency *)(cap + 1);
-	read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
-	read->header.ver = 2;
-	read->header.compat = 1;
-	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
-	sum = m->metric[METRIC_READ].latency_sum;
-	ktime_to_ceph_timespec(&read->lat, sum);
-	ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
-	read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
-	read->count = cpu_to_le64(m->metric[METRIC_READ].total);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, &s->s_metrics)) {
+		read = (struct ceph_metric_read_latency *)(cap + 1);
+		read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+		read->header.ver = 2;
+		read->header.compat = 1;
+		read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
+		sum = m->metric[METRIC_READ].latency_sum;
+		ktime_to_ceph_timespec(&read->lat, sum);
+		ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
+		read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
+		read->count = cpu_to_le64(m->metric[METRIC_READ].total);
+		items++;
+	}
 
 	/* encode the write latency metric */
-	write = (struct ceph_metric_write_latency *)(read + 1);
-	write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
-	write->header.ver = 2;
-	write->header.compat = 1;
-	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
-	sum = m->metric[METRIC_WRITE].latency_sum;
-	ktime_to_ceph_timespec(&write->lat, sum);
-	ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
-	write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
-	write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, &s->s_metrics)) {
+		write = (struct ceph_metric_write_latency *)(read + 1);
+		write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+		write->header.ver = 2;
+		write->header.compat = 1;
+		write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
+		sum = m->metric[METRIC_WRITE].latency_sum;
+		ktime_to_ceph_timespec(&write->lat, sum);
+		ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
+		write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
+		write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
+		items++;
+	}
 
 	/* encode the metadata latency metric */
-	meta = (struct ceph_metric_metadata_latency *)(write + 1);
-	meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
-	meta->header.ver = 2;
-	meta->header.compat = 1;
-	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
-	sum = m->metric[METRIC_METADATA].latency_sum;
-	ktime_to_ceph_timespec(&meta->lat, sum);
-	ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
-	meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
-	meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, &s->s_metrics)) {
+		meta = (struct ceph_metric_metadata_latency *)(write + 1);
+		meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+		meta->header.ver = 2;
+		meta->header.compat = 1;
+		meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
+		sum = m->metric[METRIC_METADATA].latency_sum;
+		ktime_to_ceph_timespec(&meta->lat, sum);
+		ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
+		meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
+		meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
+		items++;
+	}
 
 	/* encode the dentry lease metric */
-	dlease = (struct ceph_metric_dlease *)(meta + 1);
-	dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
-	dlease->header.ver = 1;
-	dlease->header.compat = 1;
-	dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
-	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
-	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
-	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, &s->s_metrics)) {
+		dlease = (struct ceph_metric_dlease *)(meta + 1);
+		dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+		dlease->header.ver = 1;
+		dlease->header.compat = 1;
+		dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
+		dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
+		dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
+		dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
+		items++;
+	}
 
 	sum = percpu_counter_sum(&m->total_inodes);
 
 	/* encode the opened files metric */
-	files = (struct ceph_opened_files *)(dlease + 1);
-	files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
-	files->header.ver = 1;
-	files->header.compat = 1;
-	files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
-	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
-	files->total = cpu_to_le64(sum);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, &s->s_metrics)) {
+		files = (struct ceph_opened_files *)(dlease + 1);
+		files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
+		files->header.ver = 1;
+		files->header.compat = 1;
+		files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
+		files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
+		files->total = cpu_to_le64(sum);
+		items++;
+	}
 
 	/* encode the pinned icaps metric */
-	icaps = (struct ceph_pinned_icaps *)(files + 1);
-	icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
-	icaps->header.ver = 1;
-	icaps->header.compat = 1;
-	icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
-	icaps->pinned_icaps = cpu_to_le64(nr_caps);
-	icaps->total = cpu_to_le64(sum);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, &s->s_metrics)) {
+		icaps = (struct ceph_pinned_icaps *)(files + 1);
+		icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
+		icaps->header.ver = 1;
+		icaps->header.compat = 1;
+		icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
+		icaps->pinned_icaps = cpu_to_le64(nr_caps);
+		icaps->total = cpu_to_le64(sum);
+		items++;
+	}
 
 	/* encode the opened inodes metric */
-	inodes = (struct ceph_opened_inodes *)(icaps + 1);
-	inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
-	inodes->header.ver = 1;
-	inodes->header.compat = 1;
-	inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
-	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
-	inodes->total = cpu_to_le64(sum);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, &s->s_metrics)) {
+		inodes = (struct ceph_opened_inodes *)(icaps + 1);
+		inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
+		inodes->header.ver = 1;
+		inodes->header.compat = 1;
+		inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
+		inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
+		inodes->total = cpu_to_le64(sum);
+		items++;
+	}
 
 	/* encode the read io size metric */
-	rsize = (struct ceph_read_io_size *)(inodes + 1);
-	rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
-	rsize->header.ver = 1;
-	rsize->header.compat = 1;
-	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
-	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
-	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, &s->s_metrics)) {
+		rsize = (struct ceph_read_io_size *)(inodes + 1);
+		rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
+		rsize->header.ver = 1;
+		rsize->header.compat = 1;
+		rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
+		rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
+		rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
+		items++;
+	}
 
 	/* encode the write io size metric */
-	wsize = (struct ceph_write_io_size *)(rsize + 1);
-	wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
-	wsize->header.ver = 1;
-	wsize->header.compat = 1;
-	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
-	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
-	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
-	items++;
+	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, &s->s_metrics)) {
+		wsize = (struct ceph_write_io_size *)(rsize + 1);
+		wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
+		wsize->header.ver = 1;
+		wsize->header.compat = 1;
+		wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
+		wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
+		wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
+		items++;
+	}
+
+	if (!items)
+		return true;
 
 	put_unaligned_le32(items, &head->num);
 	msg->front.iov_len = len;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] ceph: add force_ignore_metric_bits module parameter support
  2022-03-31  6:52 [PATCH 0/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
  2022-03-31  6:52 ` [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits xiubli
  2022-03-31  6:52 ` [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
@ 2022-03-31  6:52 ` xiubli
  2 siblings, 0 replies; 11+ messages in thread
From: xiubli @ 2022-03-31  6:52 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

This parameter will force ignoring the metric bits from the MDS,
and will force sending all the metrics kernel supports. It's
dangerous for some old ceph clusters which will crash the MDSes
when receive unknown metrics.

URL: https://tracker.ceph.com/issues/54411
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/metric.c | 1 +
 fs/ceph/metric.h | 1 +
 fs/ceph/super.c  | 6 +++++-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index f01c1f4e6b89..bfb5e255e3d2 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -51,6 +51,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 
 	head = msg->front.iov_base;
 
+	force = force || force_ignore_metric_bits;
 	/* encode the cap metric */
 	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
 		cap = (struct ceph_metric_cap *)(head + 1);
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 0d0c44bd3332..b0018887b078 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -7,6 +7,7 @@
 #include <linux/ktime.h>
 
 extern bool disable_send_metrics;
+extern bool force_ignore_metric_bits;
 
 enum ceph_metric_type {
 	CLIENT_METRIC_TYPE_CAP_INFO,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a859921bbe96..292222b7b733 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1485,7 +1485,11 @@ static const struct kernel_param_ops param_ops_metrics = {
 
 bool disable_send_metrics = false;
 module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
-MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
+MODULE_PARM_DESC(disable_send_metrics, "Disable sending perf metrics to ceph cluster (default: off)");
+
+bool force_ignore_metric_bits = false;
+module_param_cb(force_ignore_metric_bits, &param_ops_bool, &force_ignore_metric_bits, 0644);
+MODULE_PARM_DESC(disable_send_metrics, "Force ignoring session's metric bits from MDS (default: off)");
 
 /* for both v1 and v2 syntax */
 static bool mount_support = true;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-03-31  6:52 ` [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
@ 2022-03-31 12:11   ` Jeff Layton
  2022-04-01  1:18     ` Xiubo Li
  2022-07-13  1:25     ` Xiubo Li
  0 siblings, 2 replies; 11+ messages in thread
From: Jeff Layton @ 2022-03-31 12:11 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, vshankar, ceph-devel

On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> For some old ceph versions when receives unknown metrics it will
> abort the MDS daemons. This will only send the metrics which are
> supported by MDSes.
> 
> Defautly the MDS won't fill the s_metrics in the MClientSession
> reply message, so with this patch will only force sending the
> metrics to MDS since Quincy version, which is safe to receive
> unknown metrics.
> 
> Next we will add one module option to force enable sending the
> metrics if users think that is safe.
> 


Is this really a problem we need to work around in the client?

This is an MDS bug and the patches to fix that abort are being
backported (or already have been). I think we shouldn't do this at all
and instead insist that this be fixed in the MDS.

> URL: https://tracker.ceph.com/issues/54411
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c |  19 +++-
>  fs/ceph/mds_client.h |   1 +
>  fs/ceph/metric.c     | 206 ++++++++++++++++++++++++-------------------
>  3 files changed, 131 insertions(+), 95 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index f476c65fb985..65980ce97620 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -3422,7 +3422,7 @@ static void handle_session(struct ceph_mds_session *session,
>  	void *end = p + msg->front.iov_len;
>  	struct ceph_mds_session_head *h;
>  	u32 op;
> -	u64 seq, features = 0;
> +	u64 seq, features = 0, metrics = 0;
>  	int wake = 0;
>  	bool blocklisted = false;
>  
> @@ -3452,11 +3452,21 @@ static void handle_session(struct ceph_mds_session *session,
>  		}
>  	}
>  
> +	/* version >= 4, metric bits */
> +	if (msg_version >= 4) {
> +		u32 len;
> +		/* struct_v, struct_compat, and len */
> +		ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
> +		ceph_decode_32_safe(&p, end, len, bad);
> +		if (len) {
> +			ceph_decode_64_safe(&p, end, metrics, bad);
> +			p += len - sizeof(metrics);
> +		}
> +	}
> +
> +	/* version >= 5, flags   */
>  	if (msg_version >= 5) {
>  		u32 flags;
> -		/* version >= 4, struct_v, struct_cv, len, metric_spec */
> -	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
> -		/* version >= 5, flags   */
>                  ceph_decode_32_safe(&p, end, flags, bad);
>  		if (flags & CEPH_SESSION_BLOCKLISTED) {
>  		        pr_warn("mds%d session blocklisted\n", session->s_mds);
> @@ -3490,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>  			pr_info("mds%d reconnect success\n", session->s_mds);
>  		session->s_state = CEPH_MDS_SESSION_OPEN;
>  		session->s_features = features;
> +		session->s_metrics = metrics;
>  		renewed_caps(mdsc, session, 0);
>  		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
>  			metric_schedule_delayed(&mdsc->metric);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 32107c26f50d..0f2061f5388d 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -188,6 +188,7 @@ struct ceph_mds_session {
>  	int               s_state;
>  	unsigned long     s_ttl;      /* time until mds kills us */
>  	unsigned long	  s_features;
> +	unsigned long	  s_metrics;
>  	u64               s_seq;      /* incoming msg seq # */
>  	struct mutex      s_mutex;    /* serialize session messages */
>  
> diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
> index c47347d2e84e..f01c1f4e6b89 100644
> --- a/fs/ceph/metric.c
> +++ b/fs/ceph/metric.c
> @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>  	struct ceph_client_metric *m = &mdsc->metric;
>  	u64 nr_caps = atomic64_read(&m->total_caps);
>  	u32 header_len = sizeof(struct ceph_metric_header);
> +	bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);

I don't necessarily have a problem with adding extra CEPHFS_FEATURE_*
enum values for different releases, as they're nice for documentation
purposes. In the actual client code however, we should ensure that we
only test for the _actual_ feature flag, and not the one corresponding
to a particular release.


>  	struct ceph_msg *msg;
>  	s64 sum;
>  	s32 items = 0;
> @@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>  	head = msg->front.iov_base;
>  
>  	/* encode the cap metric */
> -	cap = (struct ceph_metric_cap *)(head + 1);
> -	cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
> -	cap->header.ver = 1;
> -	cap->header.compat = 1;
> -	cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
> -	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
> -	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
> -	cap->total = cpu_to_le64(nr_caps);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
> +		cap = (struct ceph_metric_cap *)(head + 1);
> +		cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
> +		cap->header.ver = 1;
> +		cap->header.compat = 1;
> +		cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
> +		cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
> +		cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
> +		cap->total = cpu_to_le64(nr_caps);
> +		items++;
> +	}
>  
>  	/* encode the read latency metric */
> -	read = (struct ceph_metric_read_latency *)(cap + 1);
> -	read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
> -	read->header.ver = 2;
> -	read->header.compat = 1;
> -	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
> -	sum = m->metric[METRIC_READ].latency_sum;
> -	ktime_to_ceph_timespec(&read->lat, sum);
> -	ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
> -	read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
> -	read->count = cpu_to_le64(m->metric[METRIC_READ].total);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, &s->s_metrics)) {
> +		read = (struct ceph_metric_read_latency *)(cap + 1);
> +		read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
> +		read->header.ver = 2;
> +		read->header.compat = 1;
> +		read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
> +		sum = m->metric[METRIC_READ].latency_sum;
> +		ktime_to_ceph_timespec(&read->lat, sum);
> +		ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
> +		read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
> +		read->count = cpu_to_le64(m->metric[METRIC_READ].total);
> +		items++;
> +	}
>  
>  	/* encode the write latency metric */
> -	write = (struct ceph_metric_write_latency *)(read + 1);
> -	write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
> -	write->header.ver = 2;
> -	write->header.compat = 1;
> -	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
> -	sum = m->metric[METRIC_WRITE].latency_sum;
> -	ktime_to_ceph_timespec(&write->lat, sum);
> -	ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
> -	write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
> -	write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, &s->s_metrics)) {
> +		write = (struct ceph_metric_write_latency *)(read + 1);
> +		write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
> +		write->header.ver = 2;
> +		write->header.compat = 1;
> +		write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
> +		sum = m->metric[METRIC_WRITE].latency_sum;
> +		ktime_to_ceph_timespec(&write->lat, sum);
> +		ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
> +		write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
> +		write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
> +		items++;
> +	}
>  
>  	/* encode the metadata latency metric */
> -	meta = (struct ceph_metric_metadata_latency *)(write + 1);
> -	meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
> -	meta->header.ver = 2;
> -	meta->header.compat = 1;
> -	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
> -	sum = m->metric[METRIC_METADATA].latency_sum;
> -	ktime_to_ceph_timespec(&meta->lat, sum);
> -	ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
> -	meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
> -	meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, &s->s_metrics)) {
> +		meta = (struct ceph_metric_metadata_latency *)(write + 1);
> +		meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
> +		meta->header.ver = 2;
> +		meta->header.compat = 1;
> +		meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
> +		sum = m->metric[METRIC_METADATA].latency_sum;
> +		ktime_to_ceph_timespec(&meta->lat, sum);
> +		ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
> +		meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
> +		meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
> +		items++;
> +	}
>  
>  	/* encode the dentry lease metric */
> -	dlease = (struct ceph_metric_dlease *)(meta + 1);
> -	dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
> -	dlease->header.ver = 1;
> -	dlease->header.compat = 1;
> -	dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
> -	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
> -	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
> -	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, &s->s_metrics)) {
> +		dlease = (struct ceph_metric_dlease *)(meta + 1);
> +		dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
> +		dlease->header.ver = 1;
> +		dlease->header.compat = 1;
> +		dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
> +		dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
> +		dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
> +		dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
> +		items++;
> +	}
>  
>  	sum = percpu_counter_sum(&m->total_inodes);
>  
>  	/* encode the opened files metric */
> -	files = (struct ceph_opened_files *)(dlease + 1);
> -	files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
> -	files->header.ver = 1;
> -	files->header.compat = 1;
> -	files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
> -	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
> -	files->total = cpu_to_le64(sum);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, &s->s_metrics)) {
> +		files = (struct ceph_opened_files *)(dlease + 1);
> +		files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
> +		files->header.ver = 1;
> +		files->header.compat = 1;
> +		files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
> +		files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
> +		files->total = cpu_to_le64(sum);
> +		items++;
> +	}
>  
>  	/* encode the pinned icaps metric */
> -	icaps = (struct ceph_pinned_icaps *)(files + 1);
> -	icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
> -	icaps->header.ver = 1;
> -	icaps->header.compat = 1;
> -	icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
> -	icaps->pinned_icaps = cpu_to_le64(nr_caps);
> -	icaps->total = cpu_to_le64(sum);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, &s->s_metrics)) {
> +		icaps = (struct ceph_pinned_icaps *)(files + 1);
> +		icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
> +		icaps->header.ver = 1;
> +		icaps->header.compat = 1;
> +		icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
> +		icaps->pinned_icaps = cpu_to_le64(nr_caps);
> +		icaps->total = cpu_to_le64(sum);
> +		items++;
> +	}
>  
>  	/* encode the opened inodes metric */
> -	inodes = (struct ceph_opened_inodes *)(icaps + 1);
> -	inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
> -	inodes->header.ver = 1;
> -	inodes->header.compat = 1;
> -	inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
> -	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
> -	inodes->total = cpu_to_le64(sum);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, &s->s_metrics)) {
> +		inodes = (struct ceph_opened_inodes *)(icaps + 1);
> +		inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
> +		inodes->header.ver = 1;
> +		inodes->header.compat = 1;
> +		inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
> +		inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
> +		inodes->total = cpu_to_le64(sum);
> +		items++;
> +	}
>  
>  	/* encode the read io size metric */
> -	rsize = (struct ceph_read_io_size *)(inodes + 1);
> -	rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
> -	rsize->header.ver = 1;
> -	rsize->header.compat = 1;
> -	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
> -	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
> -	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, &s->s_metrics)) {
> +		rsize = (struct ceph_read_io_size *)(inodes + 1);
> +		rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
> +		rsize->header.ver = 1;
> +		rsize->header.compat = 1;
> +		rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
> +		rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
> +		rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
> +		items++;
> +	}
>  
>  	/* encode the write io size metric */
> -	wsize = (struct ceph_write_io_size *)(rsize + 1);
> -	wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
> -	wsize->header.ver = 1;
> -	wsize->header.compat = 1;
> -	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
> -	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
> -	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
> -	items++;
> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, &s->s_metrics)) {
> +		wsize = (struct ceph_write_io_size *)(rsize + 1);
> +		wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
> +		wsize->header.ver = 1;
> +		wsize->header.compat = 1;
> +		wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
> +		wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
> +		wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
> +		items++;
> +	}
> +
> +	if (!items)
> +		return true;
>  
>  	put_unaligned_le32(items, &head->num);
>  	msg->front.iov_len = len;

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits
  2022-03-31  6:52 ` [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits xiubli
@ 2022-03-31 12:15   ` Jeff Layton
  0 siblings, 0 replies; 11+ messages in thread
From: Jeff Layton @ 2022-03-31 12:15 UTC (permalink / raw)
  To: xiubli, Patrick Donnelly; +Cc: idryomov, vshankar, ceph-devel

On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> URL: https://tracker.ceph.com/issues/54411
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.h | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 33497846e47e..32107c26f50d 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -27,10 +27,13 @@ enum ceph_feature_type {
>  	CEPHFS_FEATURE_RECLAIM_CLIENT,
>  	CEPHFS_FEATURE_LAZY_CAP_WANTED,
>  	CEPHFS_FEATURE_MULTI_RECONNECT,
> -	CEPHFS_FEATURE_DELEG_INO,
> -	CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_OCTOPUS,
> +	CEPHFS_FEATURE_DELEG_INO = CEPHFS_FEATURE_OCTOPUS,
> +	CEPHFS_FEATURE_PACIFIC,
> +	CEPHFS_FEATURE_METRIC_COLLECT = CEPHFS_FEATURE_PACIFIC,
> +	CEPHFS_FEATURE_QUINCY,
>  
> -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_QUINCY,
>  };
>  
>  /*

(cc'ing Patrick)

I think we decided a while back to move away from "release" feature
flags like this, because they're ambiguous. We do occasionally backport
features to later stable versions and then the release flag becomes
meaningless.

If the "feature" here is extended metrics, then this should be something
like CEPHFS_FEATURE_METRIC_V2 or METRIC_EXTENDED or something. IOW, the
flag name should describe the feature that we're advertising. 
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-03-31 12:11   ` Jeff Layton
@ 2022-04-01  1:18     ` Xiubo Li
  2022-04-01  9:41       ` Jeff Layton
  2022-07-13  1:25     ` Xiubo Li
  1 sibling, 1 reply; 11+ messages in thread
From: Xiubo Li @ 2022-04-01  1:18 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, ceph-devel


On 3/31/22 8:11 PM, Jeff Layton wrote:
> On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> For some old ceph versions when receives unknown metrics it will
>> abort the MDS daemons. This will only send the metrics which are
>> supported by MDSes.
>>
>> Defautly the MDS won't fill the s_metrics in the MClientSession
>> reply message, so with this patch will only force sending the
>> metrics to MDS since Quincy version, which is safe to receive
>> unknown metrics.
>>
>> Next we will add one module option to force enable sending the
>> metrics if users think that is safe.
>>
>
> Is this really a problem we need to work around in the client?
>
> This is an MDS bug and the patches to fix that abort are being
> backported (or already have been). I think we shouldn't do this at all
> and instead insist that this be fixed in the MDS.

Yes, though we have fixed that early in MDS there still have some use 
cases which haven't backported yet.

Such as in the tracker#54411 we hit when upgrading from old ceph to new 
ones when the new client will reconnect to the old ceph during the 
upgrading, the mgr has two client instances which will reconnect to the 
old ceph, and we cannot disable sending the metrics through the options, 
because the mgr won't load them from ceph.conf for some reasons on purpose.

I am afraid there will have similar issue or strange use cases we 
couldn't figure out like the tracker#54411. Such as when upgrading if 
the kclients are upgraded first or a little early than ceph ?

Last week one guy pinged about the similar issue in Wechat in their own 
upgrading test case they hit the metric crash.

As a workaround it's always better to crash the MDSes ?

>> URL: https://tracker.ceph.com/issues/54411
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c |  19 +++-
>>   fs/ceph/mds_client.h |   1 +
>>   fs/ceph/metric.c     | 206 ++++++++++++++++++++++++-------------------
>>   3 files changed, 131 insertions(+), 95 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index f476c65fb985..65980ce97620 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -3422,7 +3422,7 @@ static void handle_session(struct ceph_mds_session *session,
>>   	void *end = p + msg->front.iov_len;
>>   	struct ceph_mds_session_head *h;
>>   	u32 op;
>> -	u64 seq, features = 0;
>> +	u64 seq, features = 0, metrics = 0;
>>   	int wake = 0;
>>   	bool blocklisted = false;
>>   
>> @@ -3452,11 +3452,21 @@ static void handle_session(struct ceph_mds_session *session,
>>   		}
>>   	}
>>   
>> +	/* version >= 4, metric bits */
>> +	if (msg_version >= 4) {
>> +		u32 len;
>> +		/* struct_v, struct_compat, and len */
>> +		ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
>> +		ceph_decode_32_safe(&p, end, len, bad);
>> +		if (len) {
>> +			ceph_decode_64_safe(&p, end, metrics, bad);
>> +			p += len - sizeof(metrics);
>> +		}
>> +	}
>> +
>> +	/* version >= 5, flags   */
>>   	if (msg_version >= 5) {
>>   		u32 flags;
>> -		/* version >= 4, struct_v, struct_cv, len, metric_spec */
>> -	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
>> -		/* version >= 5, flags   */
>>                   ceph_decode_32_safe(&p, end, flags, bad);
>>   		if (flags & CEPH_SESSION_BLOCKLISTED) {
>>   		        pr_warn("mds%d session blocklisted\n", session->s_mds);
>> @@ -3490,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>>   			pr_info("mds%d reconnect success\n", session->s_mds);
>>   		session->s_state = CEPH_MDS_SESSION_OPEN;
>>   		session->s_features = features;
>> +		session->s_metrics = metrics;
>>   		renewed_caps(mdsc, session, 0);
>>   		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
>>   			metric_schedule_delayed(&mdsc->metric);
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index 32107c26f50d..0f2061f5388d 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -188,6 +188,7 @@ struct ceph_mds_session {
>>   	int               s_state;
>>   	unsigned long     s_ttl;      /* time until mds kills us */
>>   	unsigned long	  s_features;
>> +	unsigned long	  s_metrics;
>>   	u64               s_seq;      /* incoming msg seq # */
>>   	struct mutex      s_mutex;    /* serialize session messages */
>>   
>> diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
>> index c47347d2e84e..f01c1f4e6b89 100644
>> --- a/fs/ceph/metric.c
>> +++ b/fs/ceph/metric.c
>> @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>>   	struct ceph_client_metric *m = &mdsc->metric;
>>   	u64 nr_caps = atomic64_read(&m->total_caps);
>>   	u32 header_len = sizeof(struct ceph_metric_header);
>> +	bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);
> I don't necessarily have a problem with adding extra CEPHFS_FEATURE_*
> enum values for different releases, as they're nice for documentation
> purposes. In the actual client code however, we should ensure that we
> only test for the _actual_ feature flag, and not the one corresponding
> to a particular release.

Yeah, sounds good.

-- Xiubo

>
>>   	struct ceph_msg *msg;
>>   	s64 sum;
>>   	s32 items = 0;
>> @@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>>   	head = msg->front.iov_base;
>>   
>>   	/* encode the cap metric */
>> -	cap = (struct ceph_metric_cap *)(head + 1);
>> -	cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>> -	cap->header.ver = 1;
>> -	cap->header.compat = 1;
>> -	cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>> -	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>> -	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>> -	cap->total = cpu_to_le64(nr_caps);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
>> +		cap = (struct ceph_metric_cap *)(head + 1);
>> +		cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>> +		cap->header.ver = 1;
>> +		cap->header.compat = 1;
>> +		cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>> +		cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>> +		cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>> +		cap->total = cpu_to_le64(nr_caps);
>> +		items++;
>> +	}
>>   
>>   	/* encode the read latency metric */
>> -	read = (struct ceph_metric_read_latency *)(cap + 1);
>> -	read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>> -	read->header.ver = 2;
>> -	read->header.compat = 1;
>> -	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>> -	sum = m->metric[METRIC_READ].latency_sum;
>> -	ktime_to_ceph_timespec(&read->lat, sum);
>> -	ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
>> -	read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>> -	read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, &s->s_metrics)) {
>> +		read = (struct ceph_metric_read_latency *)(cap + 1);
>> +		read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>> +		read->header.ver = 2;
>> +		read->header.compat = 1;
>> +		read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>> +		sum = m->metric[METRIC_READ].latency_sum;
>> +		ktime_to_ceph_timespec(&read->lat, sum);
>> +		ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
>> +		read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>> +		read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>> +		items++;
>> +	}
>>   
>>   	/* encode the write latency metric */
>> -	write = (struct ceph_metric_write_latency *)(read + 1);
>> -	write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>> -	write->header.ver = 2;
>> -	write->header.compat = 1;
>> -	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>> -	sum = m->metric[METRIC_WRITE].latency_sum;
>> -	ktime_to_ceph_timespec(&write->lat, sum);
>> -	ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
>> -	write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>> -	write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, &s->s_metrics)) {
>> +		write = (struct ceph_metric_write_latency *)(read + 1);
>> +		write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>> +		write->header.ver = 2;
>> +		write->header.compat = 1;
>> +		write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>> +		sum = m->metric[METRIC_WRITE].latency_sum;
>> +		ktime_to_ceph_timespec(&write->lat, sum);
>> +		ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
>> +		write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>> +		write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> +		items++;
>> +	}
>>   
>>   	/* encode the metadata latency metric */
>> -	meta = (struct ceph_metric_metadata_latency *)(write + 1);
>> -	meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>> -	meta->header.ver = 2;
>> -	meta->header.compat = 1;
>> -	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>> -	sum = m->metric[METRIC_METADATA].latency_sum;
>> -	ktime_to_ceph_timespec(&meta->lat, sum);
>> -	ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
>> -	meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>> -	meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, &s->s_metrics)) {
>> +		meta = (struct ceph_metric_metadata_latency *)(write + 1);
>> +		meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>> +		meta->header.ver = 2;
>> +		meta->header.compat = 1;
>> +		meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>> +		sum = m->metric[METRIC_METADATA].latency_sum;
>> +		ktime_to_ceph_timespec(&meta->lat, sum);
>> +		ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
>> +		meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>> +		meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>> +		items++;
>> +	}
>>   
>>   	/* encode the dentry lease metric */
>> -	dlease = (struct ceph_metric_dlease *)(meta + 1);
>> -	dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>> -	dlease->header.ver = 1;
>> -	dlease->header.compat = 1;
>> -	dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
>> -	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>> -	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>> -	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, &s->s_metrics)) {
>> +		dlease = (struct ceph_metric_dlease *)(meta + 1);
>> +		dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>> +		dlease->header.ver = 1;
>> +		dlease->header.compat = 1;
>> +		dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
>> +		dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>> +		dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>> +		dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>> +		items++;
>> +	}
>>   
>>   	sum = percpu_counter_sum(&m->total_inodes);
>>   
>>   	/* encode the opened files metric */
>> -	files = (struct ceph_opened_files *)(dlease + 1);
>> -	files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>> -	files->header.ver = 1;
>> -	files->header.compat = 1;
>> -	files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>> -	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
>> -	files->total = cpu_to_le64(sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, &s->s_metrics)) {
>> +		files = (struct ceph_opened_files *)(dlease + 1);
>> +		files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>> +		files->header.ver = 1;
>> +		files->header.compat = 1;
>> +		files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>> +		files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
>> +		files->total = cpu_to_le64(sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the pinned icaps metric */
>> -	icaps = (struct ceph_pinned_icaps *)(files + 1);
>> -	icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>> -	icaps->header.ver = 1;
>> -	icaps->header.compat = 1;
>> -	icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>> -	icaps->pinned_icaps = cpu_to_le64(nr_caps);
>> -	icaps->total = cpu_to_le64(sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, &s->s_metrics)) {
>> +		icaps = (struct ceph_pinned_icaps *)(files + 1);
>> +		icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>> +		icaps->header.ver = 1;
>> +		icaps->header.compat = 1;
>> +		icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>> +		icaps->pinned_icaps = cpu_to_le64(nr_caps);
>> +		icaps->total = cpu_to_le64(sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the opened inodes metric */
>> -	inodes = (struct ceph_opened_inodes *)(icaps + 1);
>> -	inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>> -	inodes->header.ver = 1;
>> -	inodes->header.compat = 1;
>> -	inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
>> -	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>> -	inodes->total = cpu_to_le64(sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, &s->s_metrics)) {
>> +		inodes = (struct ceph_opened_inodes *)(icaps + 1);
>> +		inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>> +		inodes->header.ver = 1;
>> +		inodes->header.compat = 1;
>> +		inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
>> +		inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>> +		inodes->total = cpu_to_le64(sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the read io size metric */
>> -	rsize = (struct ceph_read_io_size *)(inodes + 1);
>> -	rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>> -	rsize->header.ver = 1;
>> -	rsize->header.compat = 1;
>> -	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>> -	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>> -	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, &s->s_metrics)) {
>> +		rsize = (struct ceph_read_io_size *)(inodes + 1);
>> +		rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>> +		rsize->header.ver = 1;
>> +		rsize->header.compat = 1;
>> +		rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>> +		rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>> +		rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the write io size metric */
>> -	wsize = (struct ceph_write_io_size *)(rsize + 1);
>> -	wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>> -	wsize->header.ver = 1;
>> -	wsize->header.compat = 1;
>> -	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>> -	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> -	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, &s->s_metrics)) {
>> +		wsize = (struct ceph_write_io_size *)(rsize + 1);
>> +		wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>> +		wsize->header.ver = 1;
>> +		wsize->header.compat = 1;
>> +		wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>> +		wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> +		wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>> +		items++;
>> +	}
>> +
>> +	if (!items)
>> +		return true;
>>   
>>   	put_unaligned_le32(items, &head->num);
>>   	msg->front.iov_len = len;


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-04-01  1:18     ` Xiubo Li
@ 2022-04-01  9:41       ` Jeff Layton
  2022-04-01 14:11         ` Xiubo Li
  0 siblings, 1 reply; 11+ messages in thread
From: Jeff Layton @ 2022-04-01  9:41 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, ceph-devel

On Fri, 2022-04-01 at 09:18 +0800, Xiubo Li wrote:
> On 3/31/22 8:11 PM, Jeff Layton wrote:
> > On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > > 
> > > For some old ceph versions when receives unknown metrics it will
> > > abort the MDS daemons. This will only send the metrics which are
> > > supported by MDSes.
> > > 
> > > Defautly the MDS won't fill the s_metrics in the MClientSession
> > > reply message, so with this patch will only force sending the
> > > metrics to MDS since Quincy version, which is safe to receive
> > > unknown metrics.
> > > 
> > > Next we will add one module option to force enable sending the
> > > metrics if users think that is safe.
> > > 
> > 
> > Is this really a problem we need to work around in the client?
> > 
> > This is an MDS bug and the patches to fix that abort are being
> > backported (or already have been). I think we shouldn't do this at all
> > and instead insist that this be fixed in the MDS.
> 
> Yes, though we have fixed that early in MDS there still have some use 
> cases which haven't backported yet.
> 
> Such as in the tracker#54411 we hit when upgrading from old ceph to new 
> ones when the new client will reconnect to the old ceph during the 
> upgrading, the mgr has two client instances which will reconnect to the 
> old ceph, and we cannot disable sending the metrics through the options, 
> because the mgr won't load them from ceph.conf for some reasons on purpose.
> 
> I am afraid there will have similar issue or strange use cases we 
> couldn't figure out like the tracker#54411. Such as when upgrading if 
> the kclients are upgraded first or a little early than ceph ?
> 
> Last week one guy pinged about the similar issue in Wechat in their own 
> upgrading test case they hit the metric crash.
> 
> As a workaround it's always better to crash the MDSes ?
> 

We already have a kernel module option that disables sending metrics if
it's enabled. Why not just use that to disable metrics when dealing with
these problematic MDS versions that apparently can't be fixed?

Adding all of this complexity to avoid sending something to the MDS
because it might fall over really just seems like we're fixing this in
the wrong place.

If we were to take this, at what point can we drop this workaround? We
really don't want to carry this in perpetuity.

> > > URL: https://tracker.ceph.com/issues/54411
> > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > ---
> > >   fs/ceph/mds_client.c |  19 +++-
> > >   fs/ceph/mds_client.h |   1 +
> > >   fs/ceph/metric.c     | 206 ++++++++++++++++++++++++-------------------
> > >   3 files changed, 131 insertions(+), 95 deletions(-)
> > > 
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index f476c65fb985..65980ce97620 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -3422,7 +3422,7 @@ static void handle_session(struct ceph_mds_session *session,
> > >   	void *end = p + msg->front.iov_len;
> > >   	struct ceph_mds_session_head *h;
> > >   	u32 op;
> > > -	u64 seq, features = 0;
> > > +	u64 seq, features = 0, metrics = 0;
> > >   	int wake = 0;
> > >   	bool blocklisted = false;
> > >   
> > > @@ -3452,11 +3452,21 @@ static void handle_session(struct ceph_mds_session *session,
> > >   		}
> > >   	}
> > >   
> > > +	/* version >= 4, metric bits */
> > > +	if (msg_version >= 4) {
> > > +		u32 len;
> > > +		/* struct_v, struct_compat, and len */
> > > +		ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
> > > +		ceph_decode_32_safe(&p, end, len, bad);
> > > +		if (len) {
> > > +			ceph_decode_64_safe(&p, end, metrics, bad);
> > > +			p += len - sizeof(metrics);
> > > +		}
> > > +	}
> > > +
> > > +	/* version >= 5, flags   */
> > >   	if (msg_version >= 5) {
> > >   		u32 flags;
> > > -		/* version >= 4, struct_v, struct_cv, len, metric_spec */
> > > -	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
> > > -		/* version >= 5, flags   */
> > >                   ceph_decode_32_safe(&p, end, flags, bad);
> > >   		if (flags & CEPH_SESSION_BLOCKLISTED) {
> > >   		        pr_warn("mds%d session blocklisted\n", session->s_mds);
> > > @@ -3490,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
> > >   			pr_info("mds%d reconnect success\n", session->s_mds);
> > >   		session->s_state = CEPH_MDS_SESSION_OPEN;
> > >   		session->s_features = features;
> > > +		session->s_metrics = metrics;
> > >   		renewed_caps(mdsc, session, 0);
> > >   		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
> > >   			metric_schedule_delayed(&mdsc->metric);
> > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > > index 32107c26f50d..0f2061f5388d 100644
> > > --- a/fs/ceph/mds_client.h
> > > +++ b/fs/ceph/mds_client.h
> > > @@ -188,6 +188,7 @@ struct ceph_mds_session {
> > >   	int               s_state;
> > >   	unsigned long     s_ttl;      /* time until mds kills us */
> > >   	unsigned long	  s_features;
> > > +	unsigned long	  s_metrics;
> > >   	u64               s_seq;      /* incoming msg seq # */
> > >   	struct mutex      s_mutex;    /* serialize session messages */
> > >   
> > > diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
> > > index c47347d2e84e..f01c1f4e6b89 100644
> > > --- a/fs/ceph/metric.c
> > > +++ b/fs/ceph/metric.c
> > > @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
> > >   	struct ceph_client_metric *m = &mdsc->metric;
> > >   	u64 nr_caps = atomic64_read(&m->total_caps);
> > >   	u32 header_len = sizeof(struct ceph_metric_header);
> > > +	bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);
> > I don't necessarily have a problem with adding extra CEPHFS_FEATURE_*
> > enum values for different releases, as they're nice for documentation
> > purposes. In the actual client code however, we should ensure that we
> > only test for the _actual_ feature flag, and not the one corresponding
> > to a particular release.
> 
> Yeah, sounds good.
> 
> -- Xiubo
> 
> > 
> > >   	struct ceph_msg *msg;
> > >   	s64 sum;
> > >   	s32 items = 0;
> > > @@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
> > >   	head = msg->front.iov_base;
> > >   
> > >   	/* encode the cap metric */
> > > -	cap = (struct ceph_metric_cap *)(head + 1);
> > > -	cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
> > > -	cap->header.ver = 1;
> > > -	cap->header.compat = 1;
> > > -	cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
> > > -	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
> > > -	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
> > > -	cap->total = cpu_to_le64(nr_caps);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
> > > +		cap = (struct ceph_metric_cap *)(head + 1);
> > > +		cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
> > > +		cap->header.ver = 1;
> > > +		cap->header.compat = 1;
> > > +		cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
> > > +		cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
> > > +		cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
> > > +		cap->total = cpu_to_le64(nr_caps);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the read latency metric */
> > > -	read = (struct ceph_metric_read_latency *)(cap + 1);
> > > -	read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
> > > -	read->header.ver = 2;
> > > -	read->header.compat = 1;
> > > -	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
> > > -	sum = m->metric[METRIC_READ].latency_sum;
> > > -	ktime_to_ceph_timespec(&read->lat, sum);
> > > -	ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
> > > -	read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
> > > -	read->count = cpu_to_le64(m->metric[METRIC_READ].total);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, &s->s_metrics)) {
> > > +		read = (struct ceph_metric_read_latency *)(cap + 1);
> > > +		read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
> > > +		read->header.ver = 2;
> > > +		read->header.compat = 1;
> > > +		read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
> > > +		sum = m->metric[METRIC_READ].latency_sum;
> > > +		ktime_to_ceph_timespec(&read->lat, sum);
> > > +		ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
> > > +		read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
> > > +		read->count = cpu_to_le64(m->metric[METRIC_READ].total);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the write latency metric */
> > > -	write = (struct ceph_metric_write_latency *)(read + 1);
> > > -	write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
> > > -	write->header.ver = 2;
> > > -	write->header.compat = 1;
> > > -	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
> > > -	sum = m->metric[METRIC_WRITE].latency_sum;
> > > -	ktime_to_ceph_timespec(&write->lat, sum);
> > > -	ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
> > > -	write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
> > > -	write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, &s->s_metrics)) {
> > > +		write = (struct ceph_metric_write_latency *)(read + 1);
> > > +		write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
> > > +		write->header.ver = 2;
> > > +		write->header.compat = 1;
> > > +		write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
> > > +		sum = m->metric[METRIC_WRITE].latency_sum;
> > > +		ktime_to_ceph_timespec(&write->lat, sum);
> > > +		ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
> > > +		write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
> > > +		write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the metadata latency metric */
> > > -	meta = (struct ceph_metric_metadata_latency *)(write + 1);
> > > -	meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
> > > -	meta->header.ver = 2;
> > > -	meta->header.compat = 1;
> > > -	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
> > > -	sum = m->metric[METRIC_METADATA].latency_sum;
> > > -	ktime_to_ceph_timespec(&meta->lat, sum);
> > > -	ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
> > > -	meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
> > > -	meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, &s->s_metrics)) {
> > > +		meta = (struct ceph_metric_metadata_latency *)(write + 1);
> > > +		meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
> > > +		meta->header.ver = 2;
> > > +		meta->header.compat = 1;
> > > +		meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
> > > +		sum = m->metric[METRIC_METADATA].latency_sum;
> > > +		ktime_to_ceph_timespec(&meta->lat, sum);
> > > +		ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
> > > +		meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
> > > +		meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the dentry lease metric */
> > > -	dlease = (struct ceph_metric_dlease *)(meta + 1);
> > > -	dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
> > > -	dlease->header.ver = 1;
> > > -	dlease->header.compat = 1;
> > > -	dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
> > > -	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
> > > -	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
> > > -	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, &s->s_metrics)) {
> > > +		dlease = (struct ceph_metric_dlease *)(meta + 1);
> > > +		dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
> > > +		dlease->header.ver = 1;
> > > +		dlease->header.compat = 1;
> > > +		dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
> > > +		dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
> > > +		dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
> > > +		dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
> > > +		items++;
> > > +	}
> > >   
> > >   	sum = percpu_counter_sum(&m->total_inodes);
> > >   
> > >   	/* encode the opened files metric */
> > > -	files = (struct ceph_opened_files *)(dlease + 1);
> > > -	files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
> > > -	files->header.ver = 1;
> > > -	files->header.compat = 1;
> > > -	files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
> > > -	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
> > > -	files->total = cpu_to_le64(sum);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, &s->s_metrics)) {
> > > +		files = (struct ceph_opened_files *)(dlease + 1);
> > > +		files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
> > > +		files->header.ver = 1;
> > > +		files->header.compat = 1;
> > > +		files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
> > > +		files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
> > > +		files->total = cpu_to_le64(sum);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the pinned icaps metric */
> > > -	icaps = (struct ceph_pinned_icaps *)(files + 1);
> > > -	icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
> > > -	icaps->header.ver = 1;
> > > -	icaps->header.compat = 1;
> > > -	icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
> > > -	icaps->pinned_icaps = cpu_to_le64(nr_caps);
> > > -	icaps->total = cpu_to_le64(sum);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, &s->s_metrics)) {
> > > +		icaps = (struct ceph_pinned_icaps *)(files + 1);
> > > +		icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
> > > +		icaps->header.ver = 1;
> > > +		icaps->header.compat = 1;
> > > +		icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
> > > +		icaps->pinned_icaps = cpu_to_le64(nr_caps);
> > > +		icaps->total = cpu_to_le64(sum);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the opened inodes metric */
> > > -	inodes = (struct ceph_opened_inodes *)(icaps + 1);
> > > -	inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
> > > -	inodes->header.ver = 1;
> > > -	inodes->header.compat = 1;
> > > -	inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
> > > -	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
> > > -	inodes->total = cpu_to_le64(sum);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, &s->s_metrics)) {
> > > +		inodes = (struct ceph_opened_inodes *)(icaps + 1);
> > > +		inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
> > > +		inodes->header.ver = 1;
> > > +		inodes->header.compat = 1;
> > > +		inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
> > > +		inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
> > > +		inodes->total = cpu_to_le64(sum);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the read io size metric */
> > > -	rsize = (struct ceph_read_io_size *)(inodes + 1);
> > > -	rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
> > > -	rsize->header.ver = 1;
> > > -	rsize->header.compat = 1;
> > > -	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
> > > -	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
> > > -	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, &s->s_metrics)) {
> > > +		rsize = (struct ceph_read_io_size *)(inodes + 1);
> > > +		rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
> > > +		rsize->header.ver = 1;
> > > +		rsize->header.compat = 1;
> > > +		rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
> > > +		rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
> > > +		rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
> > > +		items++;
> > > +	}
> > >   
> > >   	/* encode the write io size metric */
> > > -	wsize = (struct ceph_write_io_size *)(rsize + 1);
> > > -	wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
> > > -	wsize->header.ver = 1;
> > > -	wsize->header.compat = 1;
> > > -	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
> > > -	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
> > > -	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
> > > -	items++;
> > > +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, &s->s_metrics)) {
> > > +		wsize = (struct ceph_write_io_size *)(rsize + 1);
> > > +		wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
> > > +		wsize->header.ver = 1;
> > > +		wsize->header.compat = 1;
> > > +		wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
> > > +		wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
> > > +		wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
> > > +		items++;
> > > +	}
> > > +
> > > +	if (!items)
> > > +		return true;
> > >   
> > >   	put_unaligned_le32(items, &head->num);
> > >   	msg->front.iov_len = len;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-04-01  9:41       ` Jeff Layton
@ 2022-04-01 14:11         ` Xiubo Li
  0 siblings, 0 replies; 11+ messages in thread
From: Xiubo Li @ 2022-04-01 14:11 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, ceph-devel


On 4/1/22 5:41 PM, Jeff Layton wrote:
> On Fri, 2022-04-01 at 09:18 +0800, Xiubo Li wrote:
>> On 3/31/22 8:11 PM, Jeff Layton wrote:
>>> On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> For some old ceph versions when receives unknown metrics it will
>>>> abort the MDS daemons. This will only send the metrics which are
>>>> supported by MDSes.
>>>>
>>>> Defautly the MDS won't fill the s_metrics in the MClientSession
>>>> reply message, so with this patch will only force sending the
>>>> metrics to MDS since Quincy version, which is safe to receive
>>>> unknown metrics.
>>>>
>>>> Next we will add one module option to force enable sending the
>>>> metrics if users think that is safe.
>>>>
>>> Is this really a problem we need to work around in the client?
>>>
>>> This is an MDS bug and the patches to fix that abort are being
>>> backported (or already have been). I think we shouldn't do this at all
>>> and instead insist that this be fixed in the MDS.
>> Yes, though we have fixed that early in MDS there still have some use
>> cases which haven't backported yet.
>>
>> Such as in the tracker#54411 we hit when upgrading from old ceph to new
>> ones when the new client will reconnect to the old ceph during the
>> upgrading, the mgr has two client instances which will reconnect to the
>> old ceph, and we cannot disable sending the metrics through the options,
>> because the mgr won't load them from ceph.conf for some reasons on purpose.
>>
>> I am afraid there will have similar issue or strange use cases we
>> couldn't figure out like the tracker#54411. Such as when upgrading if
>> the kclients are upgraded first or a little early than ceph ?
>>
>> Last week one guy pinged about the similar issue in Wechat in their own
>> upgrading test case they hit the metric crash.
>>
>> As a workaround it's always better to crash the MDSes ?
>>
> We already have a kernel module option that disables sending metrics if
> it's enabled. Why not just use that to disable metrics when dealing with
> these problematic MDS versions that apparently can't be fixed?
>
> Adding all of this complexity to avoid sending something to the MDS
> because it might fall over really just seems like we're fixing this in
> the wrong place.
>
> If we were to take this, at what point can we drop this workaround? We
> really don't want to carry this in perpetuity.

Okay, let's drop it for now.

-- Xiubo


>>>> URL: https://tracker.ceph.com/issues/54411
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>    fs/ceph/mds_client.c |  19 +++-
>>>>    fs/ceph/mds_client.h |   1 +
>>>>    fs/ceph/metric.c     | 206 ++++++++++++++++++++++++-------------------
>>>>    3 files changed, 131 insertions(+), 95 deletions(-)
>>>>
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index f476c65fb985..65980ce97620 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -3422,7 +3422,7 @@ static void handle_session(struct ceph_mds_session *session,
>>>>    	void *end = p + msg->front.iov_len;
>>>>    	struct ceph_mds_session_head *h;
>>>>    	u32 op;
>>>> -	u64 seq, features = 0;
>>>> +	u64 seq, features = 0, metrics = 0;
>>>>    	int wake = 0;
>>>>    	bool blocklisted = false;
>>>>    
>>>> @@ -3452,11 +3452,21 @@ static void handle_session(struct ceph_mds_session *session,
>>>>    		}
>>>>    	}
>>>>    
>>>> +	/* version >= 4, metric bits */
>>>> +	if (msg_version >= 4) {
>>>> +		u32 len;
>>>> +		/* struct_v, struct_compat, and len */
>>>> +		ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
>>>> +		ceph_decode_32_safe(&p, end, len, bad);
>>>> +		if (len) {
>>>> +			ceph_decode_64_safe(&p, end, metrics, bad);
>>>> +			p += len - sizeof(metrics);
>>>> +		}
>>>> +	}
>>>> +
>>>> +	/* version >= 5, flags   */
>>>>    	if (msg_version >= 5) {
>>>>    		u32 flags;
>>>> -		/* version >= 4, struct_v, struct_cv, len, metric_spec */
>>>> -	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
>>>> -		/* version >= 5, flags   */
>>>>                    ceph_decode_32_safe(&p, end, flags, bad);
>>>>    		if (flags & CEPH_SESSION_BLOCKLISTED) {
>>>>    		        pr_warn("mds%d session blocklisted\n", session->s_mds);
>>>> @@ -3490,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>>>>    			pr_info("mds%d reconnect success\n", session->s_mds);
>>>>    		session->s_state = CEPH_MDS_SESSION_OPEN;
>>>>    		session->s_features = features;
>>>> +		session->s_metrics = metrics;
>>>>    		renewed_caps(mdsc, session, 0);
>>>>    		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
>>>>    			metric_schedule_delayed(&mdsc->metric);
>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>> index 32107c26f50d..0f2061f5388d 100644
>>>> --- a/fs/ceph/mds_client.h
>>>> +++ b/fs/ceph/mds_client.h
>>>> @@ -188,6 +188,7 @@ struct ceph_mds_session {
>>>>    	int               s_state;
>>>>    	unsigned long     s_ttl;      /* time until mds kills us */
>>>>    	unsigned long	  s_features;
>>>> +	unsigned long	  s_metrics;
>>>>    	u64               s_seq;      /* incoming msg seq # */
>>>>    	struct mutex      s_mutex;    /* serialize session messages */
>>>>    
>>>> diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
>>>> index c47347d2e84e..f01c1f4e6b89 100644
>>>> --- a/fs/ceph/metric.c
>>>> +++ b/fs/ceph/metric.c
>>>> @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>>>>    	struct ceph_client_metric *m = &mdsc->metric;
>>>>    	u64 nr_caps = atomic64_read(&m->total_caps);
>>>>    	u32 header_len = sizeof(struct ceph_metric_header);
>>>> +	bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);
>>> I don't necessarily have a problem with adding extra CEPHFS_FEATURE_*
>>> enum values for different releases, as they're nice for documentation
>>> purposes. In the actual client code however, we should ensure that we
>>> only test for the _actual_ feature flag, and not the one corresponding
>>> to a particular release.
>> Yeah, sounds good.
>>
>> -- Xiubo
>>
>>>>    	struct ceph_msg *msg;
>>>>    	s64 sum;
>>>>    	s32 items = 0;
>>>> @@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>>>>    	head = msg->front.iov_base;
>>>>    
>>>>    	/* encode the cap metric */
>>>> -	cap = (struct ceph_metric_cap *)(head + 1);
>>>> -	cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>>>> -	cap->header.ver = 1;
>>>> -	cap->header.compat = 1;
>>>> -	cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>>>> -	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>>>> -	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>>>> -	cap->total = cpu_to_le64(nr_caps);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
>>>> +		cap = (struct ceph_metric_cap *)(head + 1);
>>>> +		cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>>>> +		cap->header.ver = 1;
>>>> +		cap->header.compat = 1;
>>>> +		cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>>>> +		cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>>>> +		cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>>>> +		cap->total = cpu_to_le64(nr_caps);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the read latency metric */
>>>> -	read = (struct ceph_metric_read_latency *)(cap + 1);
>>>> -	read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>>>> -	read->header.ver = 2;
>>>> -	read->header.compat = 1;
>>>> -	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>>>> -	sum = m->metric[METRIC_READ].latency_sum;
>>>> -	ktime_to_ceph_timespec(&read->lat, sum);
>>>> -	ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
>>>> -	read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>>>> -	read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, &s->s_metrics)) {
>>>> +		read = (struct ceph_metric_read_latency *)(cap + 1);
>>>> +		read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>>>> +		read->header.ver = 2;
>>>> +		read->header.compat = 1;
>>>> +		read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>>>> +		sum = m->metric[METRIC_READ].latency_sum;
>>>> +		ktime_to_ceph_timespec(&read->lat, sum);
>>>> +		ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
>>>> +		read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>>>> +		read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the write latency metric */
>>>> -	write = (struct ceph_metric_write_latency *)(read + 1);
>>>> -	write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>>>> -	write->header.ver = 2;
>>>> -	write->header.compat = 1;
>>>> -	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>>>> -	sum = m->metric[METRIC_WRITE].latency_sum;
>>>> -	ktime_to_ceph_timespec(&write->lat, sum);
>>>> -	ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
>>>> -	write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>>>> -	write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, &s->s_metrics)) {
>>>> +		write = (struct ceph_metric_write_latency *)(read + 1);
>>>> +		write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>>>> +		write->header.ver = 2;
>>>> +		write->header.compat = 1;
>>>> +		write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>>>> +		sum = m->metric[METRIC_WRITE].latency_sum;
>>>> +		ktime_to_ceph_timespec(&write->lat, sum);
>>>> +		ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
>>>> +		write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>>>> +		write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the metadata latency metric */
>>>> -	meta = (struct ceph_metric_metadata_latency *)(write + 1);
>>>> -	meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>>>> -	meta->header.ver = 2;
>>>> -	meta->header.compat = 1;
>>>> -	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>>>> -	sum = m->metric[METRIC_METADATA].latency_sum;
>>>> -	ktime_to_ceph_timespec(&meta->lat, sum);
>>>> -	ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
>>>> -	meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>>>> -	meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, &s->s_metrics)) {
>>>> +		meta = (struct ceph_metric_metadata_latency *)(write + 1);
>>>> +		meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>>>> +		meta->header.ver = 2;
>>>> +		meta->header.compat = 1;
>>>> +		meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>>>> +		sum = m->metric[METRIC_METADATA].latency_sum;
>>>> +		ktime_to_ceph_timespec(&meta->lat, sum);
>>>> +		ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
>>>> +		meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>>>> +		meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the dentry lease metric */
>>>> -	dlease = (struct ceph_metric_dlease *)(meta + 1);
>>>> -	dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>>>> -	dlease->header.ver = 1;
>>>> -	dlease->header.compat = 1;
>>>> -	dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
>>>> -	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>>>> -	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>>>> -	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, &s->s_metrics)) {
>>>> +		dlease = (struct ceph_metric_dlease *)(meta + 1);
>>>> +		dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>>>> +		dlease->header.ver = 1;
>>>> +		dlease->header.compat = 1;
>>>> +		dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
>>>> +		dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>>>> +		dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>>>> +		dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	sum = percpu_counter_sum(&m->total_inodes);
>>>>    
>>>>    	/* encode the opened files metric */
>>>> -	files = (struct ceph_opened_files *)(dlease + 1);
>>>> -	files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>>>> -	files->header.ver = 1;
>>>> -	files->header.compat = 1;
>>>> -	files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>>>> -	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
>>>> -	files->total = cpu_to_le64(sum);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, &s->s_metrics)) {
>>>> +		files = (struct ceph_opened_files *)(dlease + 1);
>>>> +		files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>>>> +		files->header.ver = 1;
>>>> +		files->header.compat = 1;
>>>> +		files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>>>> +		files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
>>>> +		files->total = cpu_to_le64(sum);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the pinned icaps metric */
>>>> -	icaps = (struct ceph_pinned_icaps *)(files + 1);
>>>> -	icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>>>> -	icaps->header.ver = 1;
>>>> -	icaps->header.compat = 1;
>>>> -	icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>>>> -	icaps->pinned_icaps = cpu_to_le64(nr_caps);
>>>> -	icaps->total = cpu_to_le64(sum);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, &s->s_metrics)) {
>>>> +		icaps = (struct ceph_pinned_icaps *)(files + 1);
>>>> +		icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>>>> +		icaps->header.ver = 1;
>>>> +		icaps->header.compat = 1;
>>>> +		icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>>>> +		icaps->pinned_icaps = cpu_to_le64(nr_caps);
>>>> +		icaps->total = cpu_to_le64(sum);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the opened inodes metric */
>>>> -	inodes = (struct ceph_opened_inodes *)(icaps + 1);
>>>> -	inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>>>> -	inodes->header.ver = 1;
>>>> -	inodes->header.compat = 1;
>>>> -	inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
>>>> -	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>>>> -	inodes->total = cpu_to_le64(sum);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, &s->s_metrics)) {
>>>> +		inodes = (struct ceph_opened_inodes *)(icaps + 1);
>>>> +		inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>>>> +		inodes->header.ver = 1;
>>>> +		inodes->header.compat = 1;
>>>> +		inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
>>>> +		inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>>>> +		inodes->total = cpu_to_le64(sum);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the read io size metric */
>>>> -	rsize = (struct ceph_read_io_size *)(inodes + 1);
>>>> -	rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>>>> -	rsize->header.ver = 1;
>>>> -	rsize->header.compat = 1;
>>>> -	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>>>> -	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>>>> -	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, &s->s_metrics)) {
>>>> +		rsize = (struct ceph_read_io_size *)(inodes + 1);
>>>> +		rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>>>> +		rsize->header.ver = 1;
>>>> +		rsize->header.compat = 1;
>>>> +		rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>>>> +		rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>>>> +		rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>>>> +		items++;
>>>> +	}
>>>>    
>>>>    	/* encode the write io size metric */
>>>> -	wsize = (struct ceph_write_io_size *)(rsize + 1);
>>>> -	wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>>>> -	wsize->header.ver = 1;
>>>> -	wsize->header.compat = 1;
>>>> -	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>>>> -	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>>> -	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>>>> -	items++;
>>>> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, &s->s_metrics)) {
>>>> +		wsize = (struct ceph_write_io_size *)(rsize + 1);
>>>> +		wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>>>> +		wsize->header.ver = 1;
>>>> +		wsize->header.compat = 1;
>>>> +		wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>>>> +		wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>>> +		wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>>>> +		items++;
>>>> +	}
>>>> +
>>>> +	if (!items)
>>>> +		return true;
>>>>    
>>>>    	put_unaligned_le32(items, &head->num);
>>>>    	msg->front.iov_len = len;


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-03-31 12:11   ` Jeff Layton
  2022-04-01  1:18     ` Xiubo Li
@ 2022-07-13  1:25     ` Xiubo Li
  2022-07-15  3:20       ` Xiubo Li
  1 sibling, 1 reply; 11+ messages in thread
From: Xiubo Li @ 2022-07-13  1:25 UTC (permalink / raw)
  To: Jeff Layton
  Cc: idryomov, vshankar, ceph-devel, Patrick Donnelly, Gregory Farnum,
	Milind Changire

Jeff,

I think this still makes sense, more detail please see [1], which is as 
I worried for the older ceph and when users want to upgrade the cluster.

Though this is a MDS side bug and have already been fixed in the MDS, 
but if there are existing clusters running before that fix, so when 
upgrading the kclient only or if the upgrade the kclient first, they 
will complain the same issue in [1].

[1] https://tracker.ceph.com/issues/56529

-- Xiubo


On 3/31/22 8:11 PM, Jeff Layton wrote:
> On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> For some old ceph versions when receives unknown metrics it will
>> abort the MDS daemons. This will only send the metrics which are
>> supported by MDSes.
>>
>> Defautly the MDS won't fill the s_metrics in the MClientSession
>> reply message, so with this patch will only force sending the
>> metrics to MDS since Quincy version, which is safe to receive
>> unknown metrics.
>>
>> Next we will add one module option to force enable sending the
>> metrics if users think that is safe.
>>
>
> Is this really a problem we need to work around in the client?
>
> This is an MDS bug and the patches to fix that abort are being
> backported (or already have been). I think we shouldn't do this at all
> and instead insist that this be fixed in the MDS.
>
>> URL: https://tracker.ceph.com/issues/54411
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c |  19 +++-
>>   fs/ceph/mds_client.h |   1 +
>>   fs/ceph/metric.c     | 206 ++++++++++++++++++++++++-------------------
>>   3 files changed, 131 insertions(+), 95 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index f476c65fb985..65980ce97620 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -3422,7 +3422,7 @@ static void handle_session(struct ceph_mds_session *session,
>>   	void *end = p + msg->front.iov_len;
>>   	struct ceph_mds_session_head *h;
>>   	u32 op;
>> -	u64 seq, features = 0;
>> +	u64 seq, features = 0, metrics = 0;
>>   	int wake = 0;
>>   	bool blocklisted = false;
>>   
>> @@ -3452,11 +3452,21 @@ static void handle_session(struct ceph_mds_session *session,
>>   		}
>>   	}
>>   
>> +	/* version >= 4, metric bits */
>> +	if (msg_version >= 4) {
>> +		u32 len;
>> +		/* struct_v, struct_compat, and len */
>> +		ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
>> +		ceph_decode_32_safe(&p, end, len, bad);
>> +		if (len) {
>> +			ceph_decode_64_safe(&p, end, metrics, bad);
>> +			p += len - sizeof(metrics);
>> +		}
>> +	}
>> +
>> +	/* version >= 5, flags   */
>>   	if (msg_version >= 5) {
>>   		u32 flags;
>> -		/* version >= 4, struct_v, struct_cv, len, metric_spec */
>> -	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
>> -		/* version >= 5, flags   */
>>                   ceph_decode_32_safe(&p, end, flags, bad);
>>   		if (flags & CEPH_SESSION_BLOCKLISTED) {
>>   		        pr_warn("mds%d session blocklisted\n", session->s_mds);
>> @@ -3490,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>>   			pr_info("mds%d reconnect success\n", session->s_mds);
>>   		session->s_state = CEPH_MDS_SESSION_OPEN;
>>   		session->s_features = features;
>> +		session->s_metrics = metrics;
>>   		renewed_caps(mdsc, session, 0);
>>   		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
>>   			metric_schedule_delayed(&mdsc->metric);
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index 32107c26f50d..0f2061f5388d 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -188,6 +188,7 @@ struct ceph_mds_session {
>>   	int               s_state;
>>   	unsigned long     s_ttl;      /* time until mds kills us */
>>   	unsigned long	  s_features;
>> +	unsigned long	  s_metrics;
>>   	u64               s_seq;      /* incoming msg seq # */
>>   	struct mutex      s_mutex;    /* serialize session messages */
>>   
>> diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
>> index c47347d2e84e..f01c1f4e6b89 100644
>> --- a/fs/ceph/metric.c
>> +++ b/fs/ceph/metric.c
>> @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>>   	struct ceph_client_metric *m = &mdsc->metric;
>>   	u64 nr_caps = atomic64_read(&m->total_caps);
>>   	u32 header_len = sizeof(struct ceph_metric_header);
>> +	bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);
> I don't necessarily have a problem with adding extra CEPHFS_FEATURE_*
> enum values for different releases, as they're nice for documentation
> purposes. In the actual client code however, we should ensure that we
> only test for the _actual_ feature flag, and not the one corresponding
> to a particular release.
>
>
>>   	struct ceph_msg *msg;
>>   	s64 sum;
>>   	s32 items = 0;
>> @@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
>>   	head = msg->front.iov_base;
>>   
>>   	/* encode the cap metric */
>> -	cap = (struct ceph_metric_cap *)(head + 1);
>> -	cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>> -	cap->header.ver = 1;
>> -	cap->header.compat = 1;
>> -	cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>> -	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>> -	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>> -	cap->total = cpu_to_le64(nr_caps);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, &s->s_metrics)) {
>> +		cap = (struct ceph_metric_cap *)(head + 1);
>> +		cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>> +		cap->header.ver = 1;
>> +		cap->header.compat = 1;
>> +		cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>> +		cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>> +		cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>> +		cap->total = cpu_to_le64(nr_caps);
>> +		items++;
>> +	}
>>   
>>   	/* encode the read latency metric */
>> -	read = (struct ceph_metric_read_latency *)(cap + 1);
>> -	read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>> -	read->header.ver = 2;
>> -	read->header.compat = 1;
>> -	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>> -	sum = m->metric[METRIC_READ].latency_sum;
>> -	ktime_to_ceph_timespec(&read->lat, sum);
>> -	ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
>> -	read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>> -	read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, &s->s_metrics)) {
>> +		read = (struct ceph_metric_read_latency *)(cap + 1);
>> +		read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>> +		read->header.ver = 2;
>> +		read->header.compat = 1;
>> +		read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>> +		sum = m->metric[METRIC_READ].latency_sum;
>> +		ktime_to_ceph_timespec(&read->lat, sum);
>> +		ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg);
>> +		read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>> +		read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>> +		items++;
>> +	}
>>   
>>   	/* encode the write latency metric */
>> -	write = (struct ceph_metric_write_latency *)(read + 1);
>> -	write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>> -	write->header.ver = 2;
>> -	write->header.compat = 1;
>> -	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>> -	sum = m->metric[METRIC_WRITE].latency_sum;
>> -	ktime_to_ceph_timespec(&write->lat, sum);
>> -	ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
>> -	write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>> -	write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, &s->s_metrics)) {
>> +		write = (struct ceph_metric_write_latency *)(read + 1);
>> +		write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>> +		write->header.ver = 2;
>> +		write->header.compat = 1;
>> +		write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>> +		sum = m->metric[METRIC_WRITE].latency_sum;
>> +		ktime_to_ceph_timespec(&write->lat, sum);
>> +		ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg);
>> +		write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>> +		write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> +		items++;
>> +	}
>>   
>>   	/* encode the metadata latency metric */
>> -	meta = (struct ceph_metric_metadata_latency *)(write + 1);
>> -	meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>> -	meta->header.ver = 2;
>> -	meta->header.compat = 1;
>> -	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>> -	sum = m->metric[METRIC_METADATA].latency_sum;
>> -	ktime_to_ceph_timespec(&meta->lat, sum);
>> -	ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
>> -	meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>> -	meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, &s->s_metrics)) {
>> +		meta = (struct ceph_metric_metadata_latency *)(write + 1);
>> +		meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>> +		meta->header.ver = 2;
>> +		meta->header.compat = 1;
>> +		meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>> +		sum = m->metric[METRIC_METADATA].latency_sum;
>> +		ktime_to_ceph_timespec(&meta->lat, sum);
>> +		ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg);
>> +		meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>> +		meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>> +		items++;
>> +	}
>>   
>>   	/* encode the dentry lease metric */
>> -	dlease = (struct ceph_metric_dlease *)(meta + 1);
>> -	dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>> -	dlease->header.ver = 1;
>> -	dlease->header.compat = 1;
>> -	dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
>> -	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>> -	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>> -	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, &s->s_metrics)) {
>> +		dlease = (struct ceph_metric_dlease *)(meta + 1);
>> +		dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>> +		dlease->header.ver = 1;
>> +		dlease->header.compat = 1;
>> +		dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len);
>> +		dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>> +		dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>> +		dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>> +		items++;
>> +	}
>>   
>>   	sum = percpu_counter_sum(&m->total_inodes);
>>   
>>   	/* encode the opened files metric */
>> -	files = (struct ceph_opened_files *)(dlease + 1);
>> -	files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>> -	files->header.ver = 1;
>> -	files->header.compat = 1;
>> -	files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>> -	files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
>> -	files->total = cpu_to_le64(sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, &s->s_metrics)) {
>> +		files = (struct ceph_opened_files *)(dlease + 1);
>> +		files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>> +		files->header.ver = 1;
>> +		files->header.compat = 1;
>> +		files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>> +		files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
>> +		files->total = cpu_to_le64(sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the pinned icaps metric */
>> -	icaps = (struct ceph_pinned_icaps *)(files + 1);
>> -	icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>> -	icaps->header.ver = 1;
>> -	icaps->header.compat = 1;
>> -	icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>> -	icaps->pinned_icaps = cpu_to_le64(nr_caps);
>> -	icaps->total = cpu_to_le64(sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, &s->s_metrics)) {
>> +		icaps = (struct ceph_pinned_icaps *)(files + 1);
>> +		icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>> +		icaps->header.ver = 1;
>> +		icaps->header.compat = 1;
>> +		icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>> +		icaps->pinned_icaps = cpu_to_le64(nr_caps);
>> +		icaps->total = cpu_to_le64(sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the opened inodes metric */
>> -	inodes = (struct ceph_opened_inodes *)(icaps + 1);
>> -	inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>> -	inodes->header.ver = 1;
>> -	inodes->header.compat = 1;
>> -	inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
>> -	inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>> -	inodes->total = cpu_to_le64(sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, &s->s_metrics)) {
>> +		inodes = (struct ceph_opened_inodes *)(icaps + 1);
>> +		inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>> +		inodes->header.ver = 1;
>> +		inodes->header.compat = 1;
>> +		inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len);
>> +		inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>> +		inodes->total = cpu_to_le64(sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the read io size metric */
>> -	rsize = (struct ceph_read_io_size *)(inodes + 1);
>> -	rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>> -	rsize->header.ver = 1;
>> -	rsize->header.compat = 1;
>> -	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>> -	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>> -	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, &s->s_metrics)) {
>> +		rsize = (struct ceph_read_io_size *)(inodes + 1);
>> +		rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>> +		rsize->header.ver = 1;
>> +		rsize->header.compat = 1;
>> +		rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>> +		rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>> +		rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>> +		items++;
>> +	}
>>   
>>   	/* encode the write io size metric */
>> -	wsize = (struct ceph_write_io_size *)(rsize + 1);
>> -	wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>> -	wsize->header.ver = 1;
>> -	wsize->header.compat = 1;
>> -	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>> -	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> -	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>> -	items++;
>> +	if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, &s->s_metrics)) {
>> +		wsize = (struct ceph_write_io_size *)(rsize + 1);
>> +		wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>> +		wsize->header.ver = 1;
>> +		wsize->header.compat = 1;
>> +		wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>> +		wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>> +		wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>> +		items++;
>> +	}
>> +
>> +	if (!items)
>> +		return true;
>>   
>>   	put_unaligned_le32(items, &head->num);
>>   	msg->front.iov_len = len;


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs
  2022-07-13  1:25     ` Xiubo Li
@ 2022-07-15  3:20       ` Xiubo Li
  0 siblings, 0 replies; 11+ messages in thread
From: Xiubo Li @ 2022-07-15  3:20 UTC (permalink / raw)
  To: Jeff Layton, Gregory Farnum, Patrick Donnelly, Venky Shankar
  Cc: idryomov, ceph-devel, Milind Changire

More info.

Check the upstream ceph code:

The commit c669de12e076bcedf3fe6cfe52377713f06d7ceb introduced the 
ceph_abort() code when received an unknown metric from clients.

$ git tag --contain c669de12e076bcedf3fe6cfe52377713f06d7ceb
v16.1.0
v16.2.0
v16.2.1
v16.2.2
v16.2.3
v16.2.4
v16.2.5
v16.2.6
v16.2.7
v16.2.8
v16.2.9
v17.0.0
v17.1.0
v17.2.0
v17.2.1

And a following commit b1b44d775df3160d937c068d5e1079e24199ed6b has 
fixed it:

diff --git a/src/include/cephfs/metrics/Types.h 
b/src/include/cephfs/metrics/Types.h
index 8def1aa7101..d5589136de2 100644
--- a/src/include/cephfs/metrics/Types.h
+++ b/src/include/cephfs/metrics/Types.h
@@ -53,7 +53,7 @@ inline std::ostream &operator<<(std::ostream &os, 
const ClientMetricType &type)
      os << "OPENED_INODES";
      break;
    default:
-    ceph_abort();
+    os << "Unknown metric type: " << type;
    }

    return os;


$ git tag --contain b1b44d775df3160d937c068d5e1079e24199ed6b
v17.1.0
v17.2.0
v17.2.1

For the backported commit for the above patch in Pacific:

$ git tag --contain 48396a2a6effc09768e83e7b2709a0dc42d08199
v16.2.5
v16.2.6
v16.2.7
v16.2.8
v16.2.9

That means for all the Pacific version of:

v16.1.0
v16.2.0
v16.2.1
v16.2.2
v16.2.3
v16.2.4

We must disable the client metrics in kclient if users are using these 
version with newer kernels, or if they are using this ceph versions the 
couldn't see anything about the client metrics even for those they support.

Checked the downstream ceph versions, there are 159 tags gap between the 
bug and fix.

-- Xiubo





On 7/13/22 9:25 AM, Xiubo Li wrote:
> Jeff,
>
> I think this still makes sense, more detail please see [1], which is 
> as I worried for the older ceph and when users want to upgrade the 
> cluster.
>
> Though this is a MDS side bug and have already been fixed in the MDS, 
> but if there are existing clusters running before that fix, so when 
> upgrading the kclient only or if the upgrade the kclient first, they 
> will complain the same issue in [1].
>
> [1] https://tracker.ceph.com/issues/56529
>
> -- Xiubo
>
>
> On 3/31/22 8:11 PM, Jeff Layton wrote:
>> On Thu, 2022-03-31 at 14:52 +0800, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> For some old ceph versions when receives unknown metrics it will
>>> abort the MDS daemons. This will only send the metrics which are
>>> supported by MDSes.
>>>
>>> Defautly the MDS won't fill the s_metrics in the MClientSession
>>> reply message, so with this patch will only force sending the
>>> metrics to MDS since Quincy version, which is safe to receive
>>> unknown metrics.
>>>
>>> Next we will add one module option to force enable sending the
>>> metrics if users think that is safe.
>>>
>>
>> Is this really a problem we need to work around in the client?
>>
>> This is an MDS bug and the patches to fix that abort are being
>> backported (or already have been). I think we shouldn't do this at all
>> and instead insist that this be fixed in the MDS.
>>
>>> URL: https://tracker.ceph.com/issues/54411
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/mds_client.c |  19 +++-
>>>   fs/ceph/mds_client.h |   1 +
>>>   fs/ceph/metric.c     | 206 
>>> ++++++++++++++++++++++++-------------------
>>>   3 files changed, 131 insertions(+), 95 deletions(-)
>>>
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index f476c65fb985..65980ce97620 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -3422,7 +3422,7 @@ static void handle_session(struct 
>>> ceph_mds_session *session,
>>>       void *end = p + msg->front.iov_len;
>>>       struct ceph_mds_session_head *h;
>>>       u32 op;
>>> -    u64 seq, features = 0;
>>> +    u64 seq, features = 0, metrics = 0;
>>>       int wake = 0;
>>>       bool blocklisted = false;
>>>   @@ -3452,11 +3452,21 @@ static void handle_session(struct 
>>> ceph_mds_session *session,
>>>           }
>>>       }
>>>   +    /* version >= 4, metric bits */
>>> +    if (msg_version >= 4) {
>>> +        u32 len;
>>> +        /* struct_v, struct_compat, and len */
>>> +        ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
>>> +        ceph_decode_32_safe(&p, end, len, bad);
>>> +        if (len) {
>>> +            ceph_decode_64_safe(&p, end, metrics, bad);
>>> +            p += len - sizeof(metrics);
>>> +        }
>>> +    }
>>> +
>>> +    /* version >= 5, flags   */
>>>       if (msg_version >= 5) {
>>>           u32 flags;
>>> -        /* version >= 4, struct_v, struct_cv, len, metric_spec */
>>> -            ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
>>> -        /* version >= 5, flags   */
>>>                   ceph_decode_32_safe(&p, end, flags, bad);
>>>           if (flags & CEPH_SESSION_BLOCKLISTED) {
>>>                   pr_warn("mds%d session blocklisted\n", 
>>> session->s_mds);
>>> @@ -3490,6 +3500,7 @@ static void handle_session(struct 
>>> ceph_mds_session *session,
>>>               pr_info("mds%d reconnect success\n", session->s_mds);
>>>           session->s_state = CEPH_MDS_SESSION_OPEN;
>>>           session->s_features = features;
>>> +        session->s_metrics = metrics;
>>>           renewed_caps(mdsc, session, 0);
>>>           if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, 
>>> &session->s_features))
>>>               metric_schedule_delayed(&mdsc->metric);
>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>> index 32107c26f50d..0f2061f5388d 100644
>>> --- a/fs/ceph/mds_client.h
>>> +++ b/fs/ceph/mds_client.h
>>> @@ -188,6 +188,7 @@ struct ceph_mds_session {
>>>       int               s_state;
>>>       unsigned long     s_ttl;      /* time until mds kills us */
>>>       unsigned long      s_features;
>>> +    unsigned long      s_metrics;
>>>       u64               s_seq;      /* incoming msg seq # */
>>>       struct mutex      s_mutex;    /* serialize session messages */
>>>   diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
>>> index c47347d2e84e..f01c1f4e6b89 100644
>>> --- a/fs/ceph/metric.c
>>> +++ b/fs/ceph/metric.c
>>> @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct 
>>> ceph_mds_client *mdsc,
>>>       struct ceph_client_metric *m = &mdsc->metric;
>>>       u64 nr_caps = atomic64_read(&m->total_caps);
>>>       u32 header_len = sizeof(struct ceph_metric_header);
>>> +    bool force = test_bit(CEPHFS_FEATURE_QUINCY, &s->s_features);
>> I don't necessarily have a problem with adding extra CEPHFS_FEATURE_*
>> enum values for different releases, as they're nice for documentation
>> purposes. In the actual client code however, we should ensure that we
>> only test for the _actual_ feature flag, and not the one corresponding
>> to a particular release.
>>
>>
>>>       struct ceph_msg *msg;
>>>       s64 sum;
>>>       s32 items = 0;
>>> @@ -51,117 +52,140 @@ static bool ceph_mdsc_send_metrics(struct 
>>> ceph_mds_client *mdsc,
>>>       head = msg->front.iov_base;
>>>         /* encode the cap metric */
>>> -    cap = (struct ceph_metric_cap *)(head + 1);
>>> -    cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>>> -    cap->header.ver = 1;
>>> -    cap->header.compat = 1;
>>> -    cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>>> -    cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>>> -    cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>>> -    cap->total = cpu_to_le64(nr_caps);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_CAP_INFO, 
>>> &s->s_metrics)) {
>>> +        cap = (struct ceph_metric_cap *)(head + 1);
>>> +        cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
>>> +        cap->header.ver = 1;
>>> +        cap->header.compat = 1;
>>> +        cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len);
>>> +        cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
>>> +        cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
>>> +        cap->total = cpu_to_le64(nr_caps);
>>> +        items++;
>>> +    }
>>>         /* encode the read latency metric */
>>> -    read = (struct ceph_metric_read_latency *)(cap + 1);
>>> -    read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>>> -    read->header.ver = 2;
>>> -    read->header.compat = 1;
>>> -    read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
>>> -    sum = m->metric[METRIC_READ].latency_sum;
>>> -    ktime_to_ceph_timespec(&read->lat, sum);
>>> -    ktime_to_ceph_timespec(&read->avg, 
>>> m->metric[METRIC_READ].latency_avg);
>>> -    read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>>> -    read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_READ_LATENCY, 
>>> &s->s_metrics)) {
>>> +        read = (struct ceph_metric_read_latency *)(cap + 1);
>>> +        read->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
>>> +        read->header.ver = 2;
>>> +        read->header.compat = 1;
>>> +        read->header.data_len = cpu_to_le32(sizeof(*read) - 
>>> header_len);
>>> +        sum = m->metric[METRIC_READ].latency_sum;
>>> +        ktime_to_ceph_timespec(&read->lat, sum);
>>> +        ktime_to_ceph_timespec(&read->avg, 
>>> m->metric[METRIC_READ].latency_avg);
>>> +        read->sq_sum = 
>>> cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum);
>>> +        read->count = cpu_to_le64(m->metric[METRIC_READ].total);
>>> +        items++;
>>> +    }
>>>         /* encode the write latency metric */
>>> -    write = (struct ceph_metric_write_latency *)(read + 1);
>>> -    write->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>>> -    write->header.ver = 2;
>>> -    write->header.compat = 1;
>>> -    write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
>>> -    sum = m->metric[METRIC_WRITE].latency_sum;
>>> -    ktime_to_ceph_timespec(&write->lat, sum);
>>> -    ktime_to_ceph_timespec(&write->avg, 
>>> m->metric[METRIC_WRITE].latency_avg);
>>> -    write->sq_sum = 
>>> cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>>> -    write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_LATENCY, 
>>> &s->s_metrics)) {
>>> +        write = (struct ceph_metric_write_latency *)(read + 1);
>>> +        write->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
>>> +        write->header.ver = 2;
>>> +        write->header.compat = 1;
>>> +        write->header.data_len = cpu_to_le32(sizeof(*write) - 
>>> header_len);
>>> +        sum = m->metric[METRIC_WRITE].latency_sum;
>>> +        ktime_to_ceph_timespec(&write->lat, sum);
>>> +        ktime_to_ceph_timespec(&write->avg, 
>>> m->metric[METRIC_WRITE].latency_avg);
>>> +        write->sq_sum = 
>>> cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum);
>>> +        write->count = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>> +        items++;
>>> +    }
>>>         /* encode the metadata latency metric */
>>> -    meta = (struct ceph_metric_metadata_latency *)(write + 1);
>>> -    meta->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>>> -    meta->header.ver = 2;
>>> -    meta->header.compat = 1;
>>> -    meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
>>> -    sum = m->metric[METRIC_METADATA].latency_sum;
>>> -    ktime_to_ceph_timespec(&meta->lat, sum);
>>> -    ktime_to_ceph_timespec(&meta->avg, 
>>> m->metric[METRIC_METADATA].latency_avg);
>>> -    meta->sq_sum = 
>>> cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>>> -    meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_METADATA_LATENCY, 
>>> &s->s_metrics)) {
>>> +        meta = (struct ceph_metric_metadata_latency *)(write + 1);
>>> +        meta->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
>>> +        meta->header.ver = 2;
>>> +        meta->header.compat = 1;
>>> +        meta->header.data_len = cpu_to_le32(sizeof(*meta) - 
>>> header_len);
>>> +        sum = m->metric[METRIC_METADATA].latency_sum;
>>> +        ktime_to_ceph_timespec(&meta->lat, sum);
>>> +        ktime_to_ceph_timespec(&meta->avg, 
>>> m->metric[METRIC_METADATA].latency_avg);
>>> +        meta->sq_sum = 
>>> cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum);
>>> +        meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total);
>>> +        items++;
>>> +    }
>>>         /* encode the dentry lease metric */
>>> -    dlease = (struct ceph_metric_dlease *)(meta + 1);
>>> -    dlease->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>>> -    dlease->header.ver = 1;
>>> -    dlease->header.compat = 1;
>>> -    dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - 
>>> header_len);
>>> -    dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>>> -    dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>>> -    dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_DENTRY_LEASE, 
>>> &s->s_metrics)) {
>>> +        dlease = (struct ceph_metric_dlease *)(meta + 1);
>>> +        dlease->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
>>> +        dlease->header.ver = 1;
>>> +        dlease->header.compat = 1;
>>> +        dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - 
>>> header_len);
>>> +        dlease->hit = 
>>> cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
>>> +        dlease->mis = 
>>> cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
>>> +        dlease->total = 
>>> cpu_to_le64(atomic64_read(&m->total_dentries));
>>> +        items++;
>>> +    }
>>>         sum = percpu_counter_sum(&m->total_inodes);
>>>         /* encode the opened files metric */
>>> -    files = (struct ceph_opened_files *)(dlease + 1);
>>> -    files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>>> -    files->header.ver = 1;
>>> -    files->header.compat = 1;
>>> -    files->header.data_len = cpu_to_le32(sizeof(*files) - header_len);
>>> -    files->opened_files = 
>>> cpu_to_le64(atomic64_read(&m->opened_files));
>>> -    files->total = cpu_to_le64(sum);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_FILES, 
>>> &s->s_metrics)) {
>>> +        files = (struct ceph_opened_files *)(dlease + 1);
>>> +        files->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
>>> +        files->header.ver = 1;
>>> +        files->header.compat = 1;
>>> +        files->header.data_len = cpu_to_le32(sizeof(*files) - 
>>> header_len);
>>> +        files->opened_files = 
>>> cpu_to_le64(atomic64_read(&m->opened_files));
>>> +        files->total = cpu_to_le64(sum);
>>> +        items++;
>>> +    }
>>>         /* encode the pinned icaps metric */
>>> -    icaps = (struct ceph_pinned_icaps *)(files + 1);
>>> -    icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>>> -    icaps->header.ver = 1;
>>> -    icaps->header.compat = 1;
>>> -    icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len);
>>> -    icaps->pinned_icaps = cpu_to_le64(nr_caps);
>>> -    icaps->total = cpu_to_le64(sum);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_PINNED_ICAPS, 
>>> &s->s_metrics)) {
>>> +        icaps = (struct ceph_pinned_icaps *)(files + 1);
>>> +        icaps->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
>>> +        icaps->header.ver = 1;
>>> +        icaps->header.compat = 1;
>>> +        icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - 
>>> header_len);
>>> +        icaps->pinned_icaps = cpu_to_le64(nr_caps);
>>> +        icaps->total = cpu_to_le64(sum);
>>> +        items++;
>>> +    }
>>>         /* encode the opened inodes metric */
>>> -    inodes = (struct ceph_opened_inodes *)(icaps + 1);
>>> -    inodes->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>>> -    inodes->header.ver = 1;
>>> -    inodes->header.compat = 1;
>>> -    inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - 
>>> header_len);
>>> -    inodes->opened_inodes = 
>>> cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>>> -    inodes->total = cpu_to_le64(sum);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_OPENED_INODES, 
>>> &s->s_metrics)) {
>>> +        inodes = (struct ceph_opened_inodes *)(icaps + 1);
>>> +        inodes->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
>>> +        inodes->header.ver = 1;
>>> +        inodes->header.compat = 1;
>>> +        inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - 
>>> header_len);
>>> +        inodes->opened_inodes = 
>>> cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
>>> +        inodes->total = cpu_to_le64(sum);
>>> +        items++;
>>> +    }
>>>         /* encode the read io size metric */
>>> -    rsize = (struct ceph_read_io_size *)(inodes + 1);
>>> -    rsize->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>>> -    rsize->header.ver = 1;
>>> -    rsize->header.compat = 1;
>>> -    rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
>>> -    rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>>> -    rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_READ_IO_SIZES, 
>>> &s->s_metrics)) {
>>> +        rsize = (struct ceph_read_io_size *)(inodes + 1);
>>> +        rsize->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES);
>>> +        rsize->header.ver = 1;
>>> +        rsize->header.compat = 1;
>>> +        rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - 
>>> header_len);
>>> +        rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
>>> +        rsize->total_size = 
>>> cpu_to_le64(m->metric[METRIC_READ].size_sum);
>>> +        items++;
>>> +    }
>>>         /* encode the write io size metric */
>>> -    wsize = (struct ceph_write_io_size *)(rsize + 1);
>>> -    wsize->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>>> -    wsize->header.ver = 1;
>>> -    wsize->header.compat = 1;
>>> -    wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
>>> -    wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>> -    wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>>> -    items++;
>>> +    if (force || test_bit(CLIENT_METRIC_TYPE_WRITE_IO_SIZES, 
>>> &s->s_metrics)) {
>>> +        wsize = (struct ceph_write_io_size *)(rsize + 1);
>>> +        wsize->header.type = 
>>> cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES);
>>> +        wsize->header.ver = 1;
>>> +        wsize->header.compat = 1;
>>> +        wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - 
>>> header_len);
>>> +        wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
>>> +        wsize->total_size = 
>>> cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
>>> +        items++;
>>> +    }
>>> +
>>> +    if (!items)
>>> +        return true;
>>>         put_unaligned_le32(items, &head->num);
>>>       msg->front.iov_len = len;


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-07-15  3:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-31  6:52 [PATCH 0/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
2022-03-31  6:52 ` [PATCH 1/3] ceph: add the Octopus,Pacific,Quency feature bits xiubli
2022-03-31 12:15   ` Jeff Layton
2022-03-31  6:52 ` [PATCH 2/3] ceph: only send the metrices supported by the MDS for old cephs xiubli
2022-03-31 12:11   ` Jeff Layton
2022-04-01  1:18     ` Xiubo Li
2022-04-01  9:41       ` Jeff Layton
2022-04-01 14:11         ` Xiubo Li
2022-07-13  1:25     ` Xiubo Li
2022-07-15  3:20       ` Xiubo Li
2022-03-31  6:52 ` [PATCH 3/3] ceph: add force_ignore_metric_bits module parameter support xiubli

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.