All of lore.kernel.org
 help / color / mirror / Atom feed
From: xiubli@redhat.com
To: jlayton@kernel.org, idryomov@gmail.com, zyan@redhat.com
Cc: sage@redhat.com, pdonnell@redhat.com, ceph-devel@vger.kernel.org,
	Xiubo Li <xiubli@redhat.com>
Subject: [PATCH v3 6/8] ceph: periodically send perf metrics to MDS
Date: Tue, 14 Jan 2020 22:44:42 -0500	[thread overview]
Message-ID: <20200115034444.14304-7-xiubli@redhat.com> (raw)
In-Reply-To: <20200115034444.14304-1-xiubli@redhat.com>

From: Xiubo Li <xiubli@redhat.com>

Add enable/disable sending metrics to MDS debugfs and disabled as
default, if it's enabled the kclient will send metrics every
second.

This will send global dentry lease hit/miss and read/write/metadata
latency metrics and each session's caps hit/miss metric to MDS.

Every time only sends the global metrics once via any availible
session.

URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/debugfs.c            |  44 +++++++-
 fs/ceph/mds_client.c         | 205 ++++++++++++++++++++++++++++++++---
 fs/ceph/mds_client.h         |   3 +
 fs/ceph/super.h              |   1 +
 include/linux/ceph/ceph_fs.h |  77 +++++++++++++
 5 files changed, 312 insertions(+), 18 deletions(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index df8c1cc685d9..bb96fb4d04c4 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,40 @@ static int mdsc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+/*
+ * metrics debugfs
+ */
+static int sending_metrics_set(void *data, u64 val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	if (val > 1) {
+		pr_err("Invalid sending metrics set value %llu\n", val);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mdsc->mutex);
+	mdsc->sending_metrics = (unsigned int)val;
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+
+static int sending_metrics_get(void *data, u64 *val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	mutex_lock(&mdsc->mutex);
+	*val = (u64)mdsc->sending_metrics;
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
+			sending_metrics_set, "%llu\n");
+
 static int metric_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
@@ -308,11 +342,9 @@ static int congestion_kb_get(void *data, u64 *val)
 	*val = (u64)fsc->mount_options->congestion_kb;
 	return 0;
 }
-
 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
 			congestion_kb_set, "%llu\n");
 
-
 void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
 	dout("ceph_fs_debugfs_cleanup\n");
@@ -322,6 +354,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_mds_sessions);
 	debugfs_remove(fsc->debugfs_caps);
 	debugfs_remove(fsc->debugfs_metric);
+	debugfs_remove(fsc->debugfs_sending_metrics);
 	debugfs_remove(fsc->debugfs_mdsc);
 }
 
@@ -362,6 +395,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						fsc,
 						&mdsc_show_fops);
 
+	fsc->debugfs_sending_metrics =
+			debugfs_create_file("sending_metrics",
+					    0600,
+					    fsc->client->debugfs_dir,
+					    fsc,
+					    &sending_metrics_fops);
+
 	fsc->debugfs_metric = debugfs_create_file("metrics",
 						  0400,
 						  fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4539bd4b3451..3c1ba70f6744 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4149,13 +4149,162 @@ void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
 	spin_unlock(&m->metadata_lock);
 }
 
+/*
+ * called under s_mutex
+ */
+static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *s,
+				   bool skip_global)
+{
+	struct ceph_metric_head *head;
+	struct ceph_metric_cap *cap;
+	struct ceph_metric_dentry_lease *lease;
+	struct ceph_metric_read_latency *read;
+	struct ceph_metric_write_latency *write;
+	struct ceph_metric_metadata_latency *meta;
+	struct ceph_msg *msg;
+	struct timespec64 ts;
+	s32 len = sizeof(*head) + sizeof(*cap);
+	s64 sum, total, avg;
+	s32 items = 0;
+
+	if (!mdsc || !s)
+		return false;
+
+	if (!skip_global) {
+		len += sizeof(*lease);
+		len += sizeof(*read);
+		len += sizeof(*write);
+		len += sizeof(*meta);
+	}
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
+	if (!msg) {
+		pr_err("send metrics to mds%d, failed to allocate message\n",
+		       s->s_mds);
+		return false;
+	}
+
+	head = msg->front.iov_base;
+
+	/* encode the cap metric */
+	cap = (struct ceph_metric_cap *)(head + 1);
+	cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+	cap->ver = 1;
+	cap->campat = 1;
+	cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
+	cap->hit = cpu_to_le64(percpu_counter_sum(&s->i_caps_hit));
+	cap->mis = cpu_to_le64(percpu_counter_sum(&s->i_caps_mis));
+	cap->total = cpu_to_le64(s->s_nr_caps);
+	items++;
+
+	dout("cap metric hit %lld, mis %lld, total caps %lld",
+	     le64_to_cpu(cap->hit), le64_to_cpu(cap->mis),
+	     le64_to_cpu(cap->total));
+
+	/* only send the global once */
+	if (skip_global)
+		goto skip_global;
+
+	/* encode the dentry lease metric */
+	lease = (struct ceph_metric_dentry_lease *)(cap + 1);
+	lease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+	lease->ver = 1;
+	lease->campat = 1;
+	lease->data_len = cpu_to_le32(sizeof(*lease) - 10);
+	lease->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_hit));
+	lease->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_mis));
+	lease->total = cpu_to_le64(atomic64_read(&mdsc->metric.total_dentries));
+	items++;
+
+	dout("dentry lease metric hit %lld, mis %lld, total dentries %lld",
+	     le64_to_cpu(lease->hit), le64_to_cpu(lease->mis),
+	     le64_to_cpu(lease->total));
+
+	/* encode the read latency metric */
+	read = (struct ceph_metric_read_latency *)(lease + 1);
+	read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+	read->ver = 1;
+	read->campat = 1;
+	read->data_len = cpu_to_le32(sizeof(*read) - 10);
+	spin_lock(&mdsc->metric.read_lock);
+	total = atomic64_read(&mdsc->metric.total_reads),
+	sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
+	spin_unlock(&mdsc->metric.read_lock);
+	avg = total ? sum / total : 0;
+	ts = ns_to_timespec64(avg);
+	read->sec = cpu_to_le32(ts.tv_sec);
+	read->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	dout("read latency metric total %lld, sum lat %lld, avg lat %lld",
+	     total, sum, avg);
+
+	/* encode the write latency metric */
+	write = (struct ceph_metric_write_latency *)(read + 1);
+	write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+	write->ver = 1;
+	write->campat = 1;
+	write->data_len = cpu_to_le32(sizeof(*write) - 10);
+	spin_lock(&mdsc->metric.write_lock);
+	total = atomic64_read(&mdsc->metric.total_writes),
+	sum = timespec64_to_ns(&mdsc->metric.write_latency_sum);
+	spin_unlock(&mdsc->metric.write_lock);
+	avg = total ? sum / total : 0;
+	ts = ns_to_timespec64(avg);
+	write->sec = cpu_to_le32(ts.tv_sec);
+	write->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	dout("write latency metric total %lld, sum lat %lld, avg lat %lld",
+	     total, sum, avg);
+
+	/* encode the metadata latency metric */
+	meta = (struct ceph_metric_metadata_latency *)(write + 1);
+	meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+	meta->ver = 1;
+	meta->campat = 1;
+	meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
+	spin_lock(&mdsc->metric.metadata_lock);
+	total = atomic64_read(&mdsc->metric.total_metadatas),
+	sum = timespec64_to_ns(&mdsc->metric.metadata_latency_sum);
+	spin_unlock(&mdsc->metric.metadata_lock);
+	avg = total ? sum / total : 0;
+	ts = ns_to_timespec64(avg);
+	meta->sec = cpu_to_le32(ts.tv_sec);
+	meta->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	dout("metadata latency metric total %lld, sum lat %lld, avg lat %lld",
+	     total, sum, avg);
+
+skip_global:
+	put_unaligned_le32(items, &head->num);
+	msg->front.iov_len = cpu_to_le32(len);
+	msg->hdr.version = cpu_to_le16(1);
+	msg->hdr.compat_version = cpu_to_le16(1);
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	dout("send metrics to mds%d %p\n", s->s_mds, msg);
+	ceph_con_send(&s->s_con, msg);
+
+	return true;
+}
+
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
  */
+#define CEPH_WORK_DELAY_DEF 5
 static void schedule_delayed(struct ceph_mds_client *mdsc)
 {
-	int delay = 5;
-	unsigned hz = round_jiffies_relative(HZ * delay);
+	unsigned int hz;
+	int delay = CEPH_WORK_DELAY_DEF;
+
+	mutex_lock(&mdsc->mutex);
+	if (mdsc->sending_metrics)
+		delay = 1;
+	mutex_unlock(&mdsc->mutex);
+
+	hz = round_jiffies_relative(HZ * delay);
 	schedule_delayed_work(&mdsc->delayed_work, hz);
 }
 
@@ -4166,18 +4315,28 @@ static void delayed_work(struct work_struct *work)
 		container_of(work, struct ceph_mds_client, delayed_work.work);
 	int renew_interval;
 	int renew_caps;
+	bool metric_only;
+	bool sending_metrics;
+	bool g_skip = false;
 
 	dout("mdsc delayed_work\n");
 
 	mutex_lock(&mdsc->mutex);
-	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
-	renew_caps = time_after_eq(jiffies, HZ*renew_interval +
-				   mdsc->last_renew_caps);
-	if (renew_caps)
-		mdsc->last_renew_caps = jiffies;
+	sending_metrics = !!mdsc->sending_metrics;
+	metric_only = mdsc->sending_metrics &&
+		(mdsc->ticks++ % CEPH_WORK_DELAY_DEF);
+
+	if (!metric_only) {
+		renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+		renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+					   mdsc->last_renew_caps);
+		if (renew_caps)
+			mdsc->last_renew_caps = jiffies;
+	}
 
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+
 		if (!s)
 			continue;
 		if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4203,13 +4362,20 @@ static void delayed_work(struct work_struct *work)
 		mutex_unlock(&mdsc->mutex);
 
 		mutex_lock(&s->s_mutex);
-		if (renew_caps)
-			send_renew_caps(mdsc, s);
-		else
-			ceph_con_keepalive(&s->s_con);
-		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
-		    s->s_state == CEPH_MDS_SESSION_HUNG)
-			ceph_send_cap_releases(mdsc, s);
+
+		if (sending_metrics)
+			g_skip = ceph_mdsc_send_metrics(mdsc, s, g_skip);
+
+		if (!metric_only) {
+			if (renew_caps)
+				send_renew_caps(mdsc, s);
+			else
+				ceph_con_keepalive(&s->s_con);
+			if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+					s->s_state == CEPH_MDS_SESSION_HUNG)
+				ceph_send_cap_releases(mdsc, s);
+		}
+
 		mutex_unlock(&s->s_mutex);
 		ceph_put_mds_session(s);
 
@@ -4217,6 +4383,9 @@ static void delayed_work(struct work_struct *work)
 	}
 	mutex_unlock(&mdsc->mutex);
 
+	if (metric_only)
+		goto delay_work;
+
 	ceph_check_delayed_caps(mdsc);
 
 	ceph_queue_cap_reclaim_work(mdsc);
@@ -4225,11 +4394,13 @@ static void delayed_work(struct work_struct *work)
 
 	maybe_recover_session(mdsc);
 
+delay_work:
 	schedule_delayed(mdsc);
 }
 
-static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+static int ceph_mdsc_metric_init(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client_metric *metric = &mdsc->metric;
 	int ret;
 
 	if (!metric)
@@ -4257,6 +4428,8 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
 	memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
 	atomic64_set(&metric->total_metadatas, 0);
 
+	mdsc->sending_metrics = 0;
+	mdsc->ticks = 0;
 	return 0;
 }
 
@@ -4313,7 +4486,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
 	atomic_set(&mdsc->cap_reclaim_pending, 0);
-	err = ceph_mdsc_metric_init(&mdsc->metric);
+	err = ceph_mdsc_metric_init(mdsc);
 	if (err)
 		goto err_mdsmap;
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 37cc12d40eef..dd58ba36d0c9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -468,6 +468,9 @@ struct ceph_mds_client {
 	struct list_head  dentry_leases;     /* fifo list */
 	struct list_head  dentry_dir_leases; /* lru list */
 
+	/* metrics */
+	unsigned int		  sending_metrics;
+	unsigned int		  ticks;
 	struct ceph_client_metric metric;
 
 	spinlock_t		snapid_map_lock;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7a6f9913c8f1..d30c60ecfedb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -128,6 +128,7 @@ struct ceph_fs_client {
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+	struct dentry *debugfs_sending_metrics;
 	struct dentry *debugfs_metric;
 	struct dentry *debugfs_mds_sessions;
 #endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index a099f60feb7b..115a4dee379b 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -130,6 +130,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_CLIENT_REQUEST         24
 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
 #define CEPH_MSG_CLIENT_REPLY           26
+#define CEPH_MSG_CLIENT_METRICS         29
 #define CEPH_MSG_CLIENT_CAPS            0x310
 #define CEPH_MSG_CLIENT_LEASE           0x311
 #define CEPH_MSG_CLIENT_SNAP            0x312
@@ -761,6 +762,82 @@ struct ceph_mds_lease {
 } __attribute__ ((packed));
 /* followed by a __le32+string for dname */
 
+enum ceph_metric_type {
+	CLIENT_METRIC_TYPE_CAP_INFO,
+	CLIENT_METRIC_TYPE_READ_LATENCY,
+	CLIENT_METRIC_TYPE_WRITE_LATENCY,
+	CLIENT_METRIC_TYPE_METADATA_LATENCY,
+	CLIENT_METRIC_TYPE_DENTRY_LEASE,
+
+	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+};
+
+/* metric caps header */
+struct ceph_metric_cap {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(hit + mis + total) */
+	__le64 hit;
+	__le64 mis;
+	__le64 total;
+} __attribute__ ((packed));
+
+/* metric dentry lease header */
+struct ceph_metric_dentry_lease {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(hit + mis + total) */
+	__le64 hit;
+	__le64 mis;
+	__le64 total;
+} __attribute__ ((packed));
+
+/* metric read latency header */
+struct ceph_metric_read_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __attribute__ ((packed));
+
+/* metric write latency header */
+struct ceph_metric_write_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __attribute__ ((packed));
+
+/* metric metadata latency header */
+struct ceph_metric_metadata_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __attribute__ ((packed));
+
+struct ceph_metric_head {
+	__le32 num;	/* the number of metrics will be sent */
+} __attribute__ ((packed));
+
 /* client reconnect */
 struct ceph_mds_cap_reconnect {
 	__le64 cap_id;
-- 
2.21.0

  parent reply	other threads:[~2020-01-15  3:45 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-15  3:44 [PATCH v3 0/8] ceph: add perf metrics support xiubli
2020-01-15  3:44 ` [PATCH v3 1/8] ceph: add global dentry lease metric support xiubli
2020-01-15  3:44 ` [PATCH v3 2/8] ceph: add caps perf metric for each session xiubli
2020-01-15 14:24   ` Jeff Layton
2020-01-16  1:57     ` Xiubo Li
2020-01-15  3:44 ` [PATCH v3 3/8] ceph: add global read latency metric support xiubli
2020-01-15  3:44 ` [PATCH v3 4/8] ceph: add global write " xiubli
2020-01-16 14:14   ` Jeff Layton
2020-01-16 14:46     ` Ilya Dryomov
2020-01-16 16:31       ` Jeff Layton
2020-01-16 19:36         ` Ilya Dryomov
2020-01-17  1:56           ` Xiubo Li
2020-01-17  0:55     ` Xiubo Li
2020-01-15  3:44 ` [PATCH v3 5/8] ceph: add global metadata perf " xiubli
2020-01-15  3:44 ` xiubli [this message]
2020-01-15  3:44 ` [PATCH v3 7/8] ceph: add reset metrics support xiubli
2020-01-15  3:44 ` [PATCH v3 8/8] ceph: send client provided metric flags in client metadata xiubli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200115034444.14304-7-xiubli@redhat.com \
    --to=xiubli@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=pdonnell@redhat.com \
    --cc=sage@redhat.com \
    --cc=zyan@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.