All of lore.kernel.org
 help / color / mirror / Atom feed
* Time stats library (from bcache/bcachefs) & generic latency instrumentation
@ 2022-08-29 16:53 Kent Overstreet
  2022-08-29 16:53 ` [PATCH 1/3] lib/time_stats: New library for statistics on events Kent Overstreet
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Kent Overstreet @ 2022-08-29 16:53 UTC (permalink / raw)
  To: linux-kernel, linux-bcache, colyli

Hey Coly, this is a fragment from a larger patch series - I wanted to send it
out to you now for review because it changes the bcache sysfs interface for
time_stats.

This series takes the time_stats code from bcachefs (which has some additional
improvements), moves it to lib/ converts bcache to use it (otherwise we get
clashes with the time_stats name) - and uses it for some really cool new latency
instrumentation.

These patches won't apply/build, full patch series for any interested is here:

https://evilpiepirate.org/git/bcachefs.git/log/?h=alloc_tags



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] lib/time_stats: New library for statistics on events
  2022-08-29 16:53 Time stats library (from bcache/bcachefs) & generic latency instrumentation Kent Overstreet
@ 2022-08-29 16:53 ` Kent Overstreet
  2022-08-29 23:34   ` Randy Dunlap
  2022-08-29 16:53 ` [PATCH 2/3] bcache: Convert to lib/time_stats Kent Overstreet
  2022-08-29 16:53 ` [PATCH 3/3] Code tagging based latency tracking Kent Overstreet
  2 siblings, 1 reply; 11+ messages in thread
From: Kent Overstreet @ 2022-08-29 16:53 UTC (permalink / raw)
  To: linux-kernel, linux-bcache, colyli; +Cc: Kent Overstreet

This adds a small new library for tracking statistics on events that
have a duration, i.e. a start and end time.

 - number of events
 - rate/frequency
 - average duration
 - max duration
 - duration quantiles

This code comes from bcachefs, and originally bcache: the next patch
will be converting bcache to use this version, and a subsequent patch
will be using code_tagging to instrument all wait_event() calls in the
kernel.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/time_stats.h |  44 +++++++
 lib/Kconfig.debug          |   3 +
 lib/Makefile               |   1 +
 lib/time_stats.c           | 236 +++++++++++++++++++++++++++++++++++++
 4 files changed, 284 insertions(+)
 create mode 100644 include/linux/time_stats.h
 create mode 100644 lib/time_stats.c

diff --git a/include/linux/time_stats.h b/include/linux/time_stats.h
new file mode 100644
index 0000000000..7ae929e6f8
--- /dev/null
+++ b/include/linux/time_stats.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TIMESTATS_H
+#define _LINUX_TIMESTATS_H
+
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+
+#define NR_QUANTILES	15
+
+struct quantiles {
+	struct quantile_entry {
+		u64	m;
+		u64	step;
+	}		entries[NR_QUANTILES];
+};
+
+struct time_stat_buffer {
+	unsigned int	nr;
+	struct time_stat_buffer_entry {
+		u64	start;
+		u64	end;
+	}		entries[32];
+};
+
+struct time_stats {
+	spinlock_t	lock;
+	u64		count;
+	/* all fields are in nanoseconds */
+	u64		average_duration;
+	u64		average_frequency;
+	u64		max_duration;
+	u64		last_event;
+	struct quantiles quantiles;
+
+	struct time_stat_buffer __percpu *buffer;
+};
+
+struct seq_buf;
+void time_stats_update(struct time_stats *stats, u64 start);
+void time_stats_to_text(struct seq_buf *out, struct time_stats *stats);
+void time_stats_exit(struct time_stats *stats);
+
+#endif /* _LINUX_TIMESTATS_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index bbe3ef939c..bfb49505c9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1728,6 +1728,9 @@ config LATENCYTOP
 	  Enable this option if you want to use the LatencyTOP tool
 	  to find out which userspace is blocking on what kernel operations.
 
+config TIME_STATS
+	bool
+
 source "kernel/trace/Kconfig"
 
 config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Makefile b/lib/Makefile
index c69430213e..e09255c881 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -232,6 +232,7 @@ obj-$(CONFIG_ALLOC_TAGGING) += alloc_tag.o
 obj-$(CONFIG_PAGE_ALLOC_TAGGING) += pgalloc_tag.o
 
 obj-$(CONFIG_CODETAG_FAULT_INJECTION) += dynamic_fault.o
+obj-$(CONFIG_TIME_STATS) += time_stats.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/time_stats.c b/lib/time_stats.c
new file mode 100644
index 0000000000..30362364fd
--- /dev/null
+++ b/lib/time_stats.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/gfp.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/percpu.h>
+#include <linux/seq_buf.h>
+#include <linux/spinlock.h>
+#include <linux/time_stats.h>
+#include <linux/timekeeping.h>
+
+static inline unsigned int eytzinger1_child(unsigned int i, unsigned int child)
+{
+	return (i << 1) + child;
+}
+
+static inline unsigned int eytzinger1_right_child(unsigned int i)
+{
+	return eytzinger1_child(i, 1);
+}
+
+static inline unsigned int eytzinger1_next(unsigned int i, unsigned int size)
+{
+	if (eytzinger1_right_child(i) <= size) {
+		i = eytzinger1_right_child(i);
+
+		i <<= __fls(size + 1) - __fls(i);
+		i >>= i > size;
+	} else {
+		i >>= ffz(i) + 1;
+	}
+
+	return i;
+}
+
+static inline unsigned int eytzinger0_child(unsigned int i, unsigned int child)
+{
+	return (i << 1) + 1 + child;
+}
+
+static inline unsigned int eytzinger0_first(unsigned int size)
+{
+	return rounddown_pow_of_two(size) - 1;
+}
+
+static inline unsigned int eytzinger0_next(unsigned int i, unsigned int size)
+{
+	return eytzinger1_next(i + 1, size) - 1;
+}
+
+#define eytzinger0_for_each(_i, _size)			\
+	for ((_i) = eytzinger0_first((_size));		\
+	     (_i) != -1;				\
+	     (_i) = eytzinger0_next((_i), (_size)))
+
+#define ewma_add(ewma, val, weight)					\
+({									\
+	typeof(ewma) _ewma = (ewma);					\
+	typeof(weight) _weight = (weight);				\
+									\
+	(((_ewma << _weight) - _ewma) + (val)) >> _weight;		\
+})
+
+static void quantiles_update(struct quantiles *q, u64 v)
+{
+	unsigned int i = 0;
+
+	while (i < ARRAY_SIZE(q->entries)) {
+		struct quantile_entry *e = q->entries + i;
+
+		if (unlikely(!e->step)) {
+			e->m = v;
+			e->step = max_t(unsigned int, v / 2, 1024);
+		} else if (e->m > v) {
+			e->m = e->m >= e->step
+				? e->m - e->step
+				: 0;
+		} else if (e->m < v) {
+			e->m = e->m + e->step > e->m
+				? e->m + e->step
+				: U32_MAX;
+		}
+
+		if ((e->m > v ? e->m - v : v - e->m) < e->step)
+			e->step = max_t(unsigned int, e->step / 2, 1);
+
+		if (v >= e->m)
+			break;
+
+		i = eytzinger0_child(i, v > e->m);
+	}
+}
+
+static void time_stats_update_one(struct time_stats *stats,
+				  u64 start, u64 end)
+{
+	u64 duration, freq;
+
+	duration	= time_after64(end, start)
+		? end - start : 0;
+	freq		= time_after64(end, stats->last_event)
+		? end - stats->last_event : 0;
+
+	stats->count++;
+
+	stats->average_duration = stats->average_duration
+		? ewma_add(stats->average_duration, duration, 6)
+		: duration;
+
+	stats->average_frequency = stats->average_frequency
+		? ewma_add(stats->average_frequency, freq, 6)
+		: freq;
+
+	stats->max_duration = max(stats->max_duration, duration);
+
+	stats->last_event = end;
+
+	quantiles_update(&stats->quantiles, duration);
+}
+
+void time_stats_update(struct time_stats *stats, u64 start)
+{
+	u64 end = ktime_get_ns();
+	unsigned long flags;
+
+	if (!stats->buffer) {
+		spin_lock_irqsave(&stats->lock, flags);
+		time_stats_update_one(stats, start, end);
+
+		if (stats->average_frequency < 32 &&
+		    stats->count > 1024)
+			stats->buffer =
+				alloc_percpu_gfp(struct time_stat_buffer,
+						 GFP_ATOMIC);
+		spin_unlock_irqrestore(&stats->lock, flags);
+	} else {
+		struct time_stat_buffer_entry *i;
+		struct time_stat_buffer *b;
+
+		preempt_disable();
+		b = this_cpu_ptr(stats->buffer);
+
+		BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
+		b->entries[b->nr++] = (struct time_stat_buffer_entry) {
+			.start = start,
+			.end = end
+		};
+
+		if (b->nr == ARRAY_SIZE(b->entries)) {
+			spin_lock_irqsave(&stats->lock, flags);
+			for (i = b->entries;
+			     i < b->entries + ARRAY_SIZE(b->entries);
+			     i++)
+				time_stats_update_one(stats, i->start, i->end);
+			spin_unlock_irqrestore(&stats->lock, flags);
+
+			b->nr = 0;
+		}
+
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL(time_stats_update);
+
+static const struct time_unit {
+	const char	*name;
+	u32		nsecs;
+} time_units[] = {
+	{ "ns",		1		},
+	{ "us",		NSEC_PER_USEC	},
+	{ "ms",		NSEC_PER_MSEC	},
+	{ "sec",	NSEC_PER_SEC	},
+};
+
+static const struct time_unit *pick_time_units(u64 ns)
+{
+	const struct time_unit *u;
+
+	for (u = time_units;
+	     u + 1 < time_units + ARRAY_SIZE(time_units) &&
+	     ns >= u[1].nsecs << 1;
+	     u++)
+		;
+
+	return u;
+}
+
+static void pr_time_units(struct seq_buf *out, u64 ns)
+{
+	const struct time_unit *u = pick_time_units(ns);
+
+	seq_buf_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
+}
+
+void time_stats_to_text(struct seq_buf *out, struct time_stats *stats)
+{
+	const struct time_unit *u;
+	u64 freq = READ_ONCE(stats->average_frequency);
+	u64 q, last_q = 0;
+	int i;
+
+	seq_buf_printf(out, "count:          %llu\n", stats->count);
+	seq_buf_printf(out, "rate:           %llu/sec\n",
+		       freq ? div64_u64(NSEC_PER_SEC, freq) : 0);
+	seq_buf_printf(out, "frequency:      ");
+	pr_time_units(out, freq);
+	seq_buf_putc(out, '\n');
+
+	seq_buf_printf(out, "avg duration:   ");
+	pr_time_units(out, stats->average_duration);
+	seq_buf_putc(out, '\n');
+
+	seq_buf_printf(out, "max duration:   ");
+	pr_time_units(out, stats->max_duration);
+	seq_buf_putc(out, '\n');
+
+	i = eytzinger0_first(NR_QUANTILES);
+	u = pick_time_units(stats->quantiles.entries[i].m);
+	seq_buf_printf(out, "quantiles (%s): ", u->name);
+	eytzinger0_for_each(i, NR_QUANTILES) {
+		q = max(stats->quantiles.entries[i].m, last_q);
+		seq_buf_printf(out, "%llu ", div_u64(q, u->nsecs));
+		last_q = q;
+	}
+
+	seq_buf_putc(out, '\n');
+}
+EXPORT_SYMBOL_GPL(time_stats_to_text);
+
+void time_stats_exit(struct time_stats *stats)
+{
+	free_percpu(stats->buffer);
+	stats->buffer = NULL;
+}
+EXPORT_SYMBOL_GPL(time_stats_exit);
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] bcache: Convert to lib/time_stats
  2022-08-29 16:53 Time stats library (from bcache/bcachefs) & generic latency instrumentation Kent Overstreet
  2022-08-29 16:53 ` [PATCH 1/3] lib/time_stats: New library for statistics on events Kent Overstreet
@ 2022-08-29 16:53 ` Kent Overstreet
  2022-08-31 16:00   ` Coly Li
  2022-08-29 16:53 ` [PATCH 3/3] Code tagging based latency tracking Kent Overstreet
  2 siblings, 1 reply; 11+ messages in thread
From: Kent Overstreet @ 2022-08-29 16:53 UTC (permalink / raw)
  To: linux-kernel, linux-bcache, colyli; +Cc: Kent Overstreet

This patch converts bcache to the new generic time_stats code
lib/time_stats.c. The new code is from bcachefs, and has some changes
from the version in bcache:

 - we now use ktime_get_ns(), not local_clock(). When the code was
   originally written multi processor systems that lacked synchronized
   TSCs were still common, and so local_clock() was much cheaper than
   sched_clock() (though not necessarily fully accurate, due to TSC
   drift). ktime_get_ns() should be cheap enough on all common hardware
   now, and more standard/correct.

 - time_stats are now exported in a single file in sysfs, which means we
   can improve the statistics we keep track of without changing all
   users. This also means we don't have to manually specify which units
   (ms, us, ns) a given time_stats should be printed in; that's handled
   dynamically.

 - There's a lazily-allocated percpu buffer, which now needs to be freed
   with time_stats_exit().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Coly Li <colyli@suse.de>
---
 drivers/md/bcache/Kconfig  |  1 +
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/bset.c   |  8 +++---
 drivers/md/bcache/bset.h   |  1 +
 drivers/md/bcache/btree.c  | 12 ++++----
 drivers/md/bcache/super.c  |  3 ++
 drivers/md/bcache/sysfs.c  | 43 ++++++++++++++++++++--------
 drivers/md/bcache/util.c   | 30 --------------------
 drivers/md/bcache/util.h   | 57 --------------------------------------
 9 files changed, 47 insertions(+), 109 deletions(-)

diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 529c9d04e9..8d165052e5 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -4,6 +4,7 @@ config BCACHE
 	tristate "Block device as cache"
 	select BLOCK_HOLDER_DEPRECATED if SYSFS
 	select CRC64
+	select TIME_STATS
 	help
 	Allows a block device to be used as cache for other devices; uses
 	a btree for indexing and the layout is optimized for SSDs.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 2acda9cea0..5100010a38 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -185,6 +185,7 @@
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/refcount.h>
+#include <linux/time_stats.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 94d38e8a59..727e9b7aea 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1251,7 +1251,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 		order = state->page_order;
 	}
 
-	start_time = local_clock();
+	start_time = ktime_get_ns();
 
 	btree_mergesort(b, out, iter, fixup, false);
 	b->nsets = start;
@@ -1286,7 +1286,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 	bch_bset_build_written_tree(b);
 
 	if (!start)
-		bch_time_stats_update(&state->time, start_time);
+		time_stats_update(&state->time, start_time);
 }
 
 void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
@@ -1322,14 +1322,14 @@ void bch_btree_sort_and_fix_extents(struct btree_keys *b,
 void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
 			 struct bset_sort_state *state)
 {
-	uint64_t start_time = local_clock();
+	uint64_t start_time = ktime_get_ns();
 	struct btree_iter iter;
 
 	bch_btree_iter_init(b, &iter, NULL);
 
 	btree_mergesort(b, new->set->data, &iter, false, true);
 
-	bch_time_stats_update(&state->time, start_time);
+	time_stats_update(&state->time, start_time);
 
 	new->set->size = 0; // XXX: why?
 }
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index d795c84246..13e524ad77 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -3,6 +3,7 @@
 #define _BCACHE_BSET_H
 
 #include <linux/kernel.h>
+#include <linux/time_stats.h>
 #include <linux/types.h>
 
 #include "bcache_ondisk.h"
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 147c493a98..abf543bc75 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -242,7 +242,7 @@ static void btree_node_read_endio(struct bio *bio)
 
 static void bch_btree_node_read(struct btree *b)
 {
-	uint64_t start_time = local_clock();
+	uint64_t start_time = ktime_get_ns();
 	struct closure cl;
 	struct bio *bio;
 
@@ -270,7 +270,7 @@ static void bch_btree_node_read(struct btree *b)
 		goto err;
 
 	bch_btree_node_read_done(b);
-	bch_time_stats_update(&b->c->btree_read_time, start_time);
+	time_stats_update(&b->c->btree_read_time, start_time);
 
 	return;
 err:
@@ -1789,7 +1789,7 @@ static void bch_btree_gc(struct cache_set *c)
 	struct gc_stat stats;
 	struct closure writes;
 	struct btree_op op;
-	uint64_t start_time = local_clock();
+	uint64_t start_time = ktime_get_ns();
 
 	trace_bcache_gc_start(c);
 
@@ -1815,7 +1815,7 @@ static void bch_btree_gc(struct cache_set *c)
 	bch_btree_gc_finish(c);
 	wake_up_allocators(c);
 
-	bch_time_stats_update(&c->btree_gc_time, start_time);
+	time_stats_update(&c->btree_gc_time, start_time);
 
 	stats.key_bytes *= sizeof(uint64_t);
 	stats.data	<<= 9;
@@ -2191,7 +2191,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
 {
 	bool split;
 	struct btree *n1, *n2 = NULL, *n3 = NULL;
-	uint64_t start_time = local_clock();
+	uint64_t start_time = ktime_get_ns();
 	struct closure cl;
 	struct keylist parent_keys;
 
@@ -2297,7 +2297,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
 	btree_node_free(b);
 	rw_unlock(true, n1);
 
-	bch_time_stats_update(&b->c->btree_split_time, start_time);
+	time_stats_update(&b->c->btree_split_time, start_time);
 
 	return 0;
 err_free2:
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index ba3909bb6b..26c8fa93b5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1691,6 +1691,9 @@ static void cache_set_free(struct closure *cl)
 		kobject_put(&ca->kobj);
 	}
 
+	time_stats_exit(&c->btree_gc_time);
+	time_stats_exit(&c->btree_split_time);
+	time_stats_exit(&c->sort.time);
 
 	if (c->moving_gc_wq)
 		destroy_workqueue(c->moving_gc_wq);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index c6f6770592..01eec5877c 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -16,6 +16,7 @@
 #include <linux/blkdev.h>
 #include <linux/sort.h>
 #include <linux/sched/clock.h>
+#include <linux/seq_buf.h>
 
 extern bool bcache_is_reboot;
 
@@ -79,10 +80,10 @@ read_attribute(active_journal_entries);
 read_attribute(backing_dev_name);
 read_attribute(backing_dev_uuid);
 
-sysfs_time_stats_attribute(btree_gc,	sec, ms);
-sysfs_time_stats_attribute(btree_split, sec, us);
-sysfs_time_stats_attribute(btree_sort,	ms,  us);
-sysfs_time_stats_attribute(btree_read,	ms,  us);
+read_attribute(btree_gc_time);
+read_attribute(btree_split_time);
+read_attribute(btree_sort_time);
+read_attribute(btree_read_time);
 
 read_attribute(btree_nodes);
 read_attribute(btree_used_percent);
@@ -731,6 +732,9 @@ static unsigned int bch_average_key_size(struct cache_set *c)
 SHOW(__bch_cache_set)
 {
 	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+	struct seq_buf s;
+
+	seq_buf_init(&s, buf, PAGE_SIZE);
 
 	sysfs_print(synchronous,		CACHE_SYNC(&c->cache->sb));
 	sysfs_print(journal_delay_ms,		c->journal_delay_ms);
@@ -743,10 +747,25 @@ SHOW(__bch_cache_set)
 	sysfs_print(btree_cache_max_chain,	bch_cache_max_chain(c));
 	sysfs_print(cache_available_percent,	100 - c->gc_stats.in_use);
 
-	sysfs_print_time_stats(&c->btree_gc_time,	btree_gc, sec, ms);
-	sysfs_print_time_stats(&c->btree_split_time,	btree_split, sec, us);
-	sysfs_print_time_stats(&c->sort.time,		btree_sort, ms, us);
-	sysfs_print_time_stats(&c->btree_read_time,	btree_read, ms, us);
+	if (attr == &sysfs_btree_gc_time) {
+		time_stats_to_text(&s, &c->btree_gc_time);
+		return s.len;
+	}
+
+	if (attr == &sysfs_btree_split_time) {
+		time_stats_to_text(&s, &c->btree_split_time);
+		return s.len;
+	}
+
+	if (attr == &sysfs_btree_sort_time) {
+		time_stats_to_text(&s, &c->sort.time);
+		return s.len;
+	}
+
+	if (attr == &sysfs_btree_read_time) {
+		time_stats_to_text(&s, &c->btree_read_time);
+		return s.len;
+	}
 
 	sysfs_print(btree_used_percent,	bch_btree_used(c));
 	sysfs_print(btree_nodes,	c->gc_stats.nodes);
@@ -988,10 +1007,10 @@ KTYPE(bch_cache_set);
 static struct attribute *bch_cache_set_internal_attrs[] = {
 	&sysfs_active_journal_entries,
 
-	sysfs_time_stats_attribute_list(btree_gc, sec, ms)
-	sysfs_time_stats_attribute_list(btree_split, sec, us)
-	sysfs_time_stats_attribute_list(btree_sort, ms, us)
-	sysfs_time_stats_attribute_list(btree_read, ms, us)
+	&sysfs_btree_gc_time,
+	&sysfs_btree_split_time,
+	&sysfs_btree_sort_time,
+	&sysfs_btree_read_time,
 
 	&sysfs_btree_nodes,
 	&sysfs_btree_used_percent,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index ae380bc399..95282bf0f9 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -160,36 +160,6 @@ int bch_parse_uuid(const char *s, char *uuid)
 	return i;
 }
 
-void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
-{
-	uint64_t now, duration, last;
-
-	spin_lock(&stats->lock);
-
-	now		= local_clock();
-	duration	= time_after64(now, start_time)
-		? now - start_time : 0;
-	last		= time_after64(now, stats->last)
-		? now - stats->last : 0;
-
-	stats->max_duration = max(stats->max_duration, duration);
-
-	if (stats->last) {
-		ewma_add(stats->average_duration, duration, 8, 8);
-
-		if (stats->average_frequency)
-			ewma_add(stats->average_frequency, last, 8, 8);
-		else
-			stats->average_frequency  = last << 8;
-	} else {
-		stats->average_duration  = duration << 8;
-	}
-
-	stats->last = now ?: 1;
-
-	spin_unlock(&stats->lock);
-}
-
 /**
  * bch_next_delay() - update ratelimiting statistics and calculate next delay
  * @d: the struct bch_ratelimit to update
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 6f3cb7c921..1e1bdbae95 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -345,68 +345,11 @@ ssize_t bch_hprint(char *buf, int64_t v);
 bool bch_is_zero(const char *p, size_t n);
 int bch_parse_uuid(const char *s, char *uuid);
 
-struct time_stats {
-	spinlock_t	lock;
-	/*
-	 * all fields are in nanoseconds, averages are ewmas stored left shifted
-	 * by 8
-	 */
-	uint64_t	max_duration;
-	uint64_t	average_duration;
-	uint64_t	average_frequency;
-	uint64_t	last;
-};
-
-void bch_time_stats_update(struct time_stats *stats, uint64_t time);
-
 static inline unsigned int local_clock_us(void)
 {
 	return local_clock() >> 10;
 }
 
-#define NSEC_PER_ns			1L
-#define NSEC_PER_us			NSEC_PER_USEC
-#define NSEC_PER_ms			NSEC_PER_MSEC
-#define NSEC_PER_sec			NSEC_PER_SEC
-
-#define __print_time_stat(stats, name, stat, units)			\
-	sysfs_print(name ## _ ## stat ## _ ## units,			\
-		    div_u64((stats)->stat >> 8, NSEC_PER_ ## units))
-
-#define sysfs_print_time_stats(stats, name,				\
-			       frequency_units,				\
-			       duration_units)				\
-do {									\
-	__print_time_stat(stats, name,					\
-			  average_frequency,	frequency_units);	\
-	__print_time_stat(stats, name,					\
-			  average_duration,	duration_units);	\
-	sysfs_print(name ## _ ##max_duration ## _ ## duration_units,	\
-			div_u64((stats)->max_duration,			\
-				NSEC_PER_ ## duration_units));		\
-									\
-	sysfs_print(name ## _last_ ## frequency_units, (stats)->last	\
-		    ? div_s64(local_clock() - (stats)->last,		\
-			      NSEC_PER_ ## frequency_units)		\
-		    : -1LL);						\
-} while (0)
-
-#define sysfs_time_stats_attribute(name,				\
-				   frequency_units,			\
-				   duration_units)			\
-read_attribute(name ## _average_frequency_ ## frequency_units);		\
-read_attribute(name ## _average_duration_ ## duration_units);		\
-read_attribute(name ## _max_duration_ ## duration_units);		\
-read_attribute(name ## _last_ ## frequency_units)
-
-#define sysfs_time_stats_attribute_list(name,				\
-					frequency_units,		\
-					duration_units)			\
-&sysfs_ ## name ## _average_frequency_ ## frequency_units,		\
-&sysfs_ ## name ## _average_duration_ ## duration_units,		\
-&sysfs_ ## name ## _max_duration_ ## duration_units,			\
-&sysfs_ ## name ## _last_ ## frequency_units,
-
 #define ewma_add(ewma, val, weight, factor)				\
 ({									\
 	(ewma) *= (weight) - 1;						\
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] Code tagging based latency tracking
  2022-08-29 16:53 Time stats library (from bcache/bcachefs) & generic latency instrumentation Kent Overstreet
  2022-08-29 16:53 ` [PATCH 1/3] lib/time_stats: New library for statistics on events Kent Overstreet
  2022-08-29 16:53 ` [PATCH 2/3] bcache: Convert to lib/time_stats Kent Overstreet
@ 2022-08-29 16:53 ` Kent Overstreet
  2 siblings, 0 replies; 11+ messages in thread
From: Kent Overstreet @ 2022-08-29 16:53 UTC (permalink / raw)
  To: linux-kernel, linux-bcache, colyli; +Cc: Kent Overstreet

This adds the ability to easily instrument code for measuring latency.
To use, add the following to calls to your code, at the start and end of
the event you wish to measure:

  code_tag_time_stats_start(start_time);
  code_tag_time_stats_finish(start_time);

Stastistics will then show up in debugfs under
/sys/kernel/debug/time_stats, listed by file and line number.

Stastics measured include weighted averages of frequency, duration, max
duration, as well as quantiles.

This patch also instruments all calls to init_wait and finish_wait,
which includes all calls to wait_event. Example debugfs output:

fs/xfs/xfs_trans_ail.c:746 module:xfs func:xfs_ail_push_all_sync
count:          17
rate:           0/sec
frequency:      2 sec
avg duration:   10 us
max duration:   232 us
quantiles (ns): 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128

lib/sbitmap.c:813 module:sbitmap func:sbitmap_finish_wait
count:          3
rate:           0/sec
frequency:      4 sec
avg duration:   4 sec
max duration:   4 sec
quantiles (ns): 0 4288669120 4288669120 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048

net/core/datagram.c:122 module:datagram func:__skb_wait_for_more_packets
count:          10
rate:           1/sec
frequency:      859 ms
avg duration:   472 ms
max duration:   30 sec
quantiles (ns): 0 12279 12279 15669 15669 15669 15669 17217 17217 17217 17217 17217 17217 17217 17217

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/asm-generic/codetag.lds.h  |   3 +-
 include/linux/codetag_time_stats.h |  54 +++++++++++
 include/linux/wait.h               |  22 ++++-
 kernel/sched/wait.c                |   6 +-
 lib/Kconfig.debug                  |   8 ++
 lib/Makefile                       |   1 +
 lib/codetag_time_stats.c           | 143 +++++++++++++++++++++++++++++
 7 files changed, 232 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/codetag_time_stats.h
 create mode 100644 lib/codetag_time_stats.c

diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h
index 16fbf74edc..d799f4aced 100644
--- a/include/asm-generic/codetag.lds.h
+++ b/include/asm-generic/codetag.lds.h
@@ -10,6 +10,7 @@
 
 #define CODETAG_SECTIONS()		\
 	SECTION_WITH_BOUNDARIES(alloc_tags)		\
-	SECTION_WITH_BOUNDARIES(dynamic_fault_tags)
+	SECTION_WITH_BOUNDARIES(dynamic_fault_tags)	\
+	SECTION_WITH_BOUNDARIES(time_stats_tags)
 
 #endif /* __ASM_GENERIC_CODETAG_LDS_H */
diff --git a/include/linux/codetag_time_stats.h b/include/linux/codetag_time_stats.h
new file mode 100644
index 0000000000..7e44c7ee9e
--- /dev/null
+++ b/include/linux/codetag_time_stats.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CODETAG_TIMESTATS_H
+#define _LINUX_CODETAG_TIMESTATS_H
+
+/*
+ * Code tagging based latency tracking:
+ * (C) 2022 Kent Overstreet
+ *
+ * This allows you to easily instrument code to track latency, and have the
+ * results show up in debugfs. To use, add the following two calls to your code
+ * at the beginning and end of the event you wish to instrument:
+ *
+ * code_tag_time_stats_start(start_time);
+ * code_tag_time_stats_finish(start_time);
+ *
+ * Statistics will then show up in debugfs under /sys/kernel/debug/time_stats,
+ * listed by file and line number.
+ */
+
+#ifdef CONFIG_CODETAG_TIME_STATS
+
+#include <linux/codetag.h>
+#include <linux/time_stats.h>
+#include <linux/timekeeping.h>
+
+struct codetag_time_stats {
+	struct codetag		tag;
+	struct time_stats	stats;
+};
+
+#define codetag_time_stats_start(_start_time)	u64 _start_time = ktime_get_ns()
+
+#define codetag_time_stats_finish(_start_time)			\
+do {								\
+	static struct codetag_time_stats			\
+	__used							\
+	__section("time_stats_tags")				\
+	__aligned(8) s = {					\
+		.tag	= CODE_TAG_INIT,			\
+		.stats.lock = __SPIN_LOCK_UNLOCKED(_lock)	\
+	};							\
+								\
+	WARN_ONCE(!(_start_time), "codetag_time_stats_start() not called");\
+	time_stats_update(&s.stats, _start_time);		\
+} while (0)
+
+#else
+
+#define codetag_time_stats_finish(_start_time)	do {} while (0)
+#define codetag_time_stats_start(_start_time)	do {} while (0)
+
+#endif /* CODETAG_CODETAG_TIME_STATS */
+
+#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 91ced6a118..bab11b7ef1 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -4,6 +4,7 @@
 /*
  * Linux wait queue related types and methods
  */
+#include <linux/codetag_time_stats.h>
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
@@ -32,6 +33,9 @@ struct wait_queue_entry {
 	void			*private;
 	wait_queue_func_t	func;
 	struct list_head	entry;
+#ifdef CONFIG_CODETAG_TIME_STATS
+	u64			start_time;
+#endif
 };
 
 struct wait_queue_head {
@@ -79,10 +83,17 @@ extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *n
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
 #endif
 
+#ifdef CONFIG_CODETAG_TIME_STATS
+#define WAIT_QUEUE_ENTRY_START_TIME_INITIALIZER	.start_time = ktime_get_ns(),
+#else
+#define WAIT_QUEUE_ENTRY_START_TIME_INITIALIZER
+#endif
+
 #define WAIT_FUNC_INITIALIZER(name, function) {					\
 	.private	= current,						\
 	.func		= function,						\
 	.entry		= LIST_HEAD_INIT((name).entry),				\
+	WAIT_QUEUE_ENTRY_START_TIME_INITIALIZER					\
 }
 
 #define DEFINE_WAIT_FUNC(name, function)					\
@@ -98,6 +109,9 @@ __init_waitqueue_entry(struct wait_queue_entry *wq_entry, unsigned int flags,
 	wq_entry->private	= private;
 	wq_entry->func		= func;
 	INIT_LIST_HEAD(&wq_entry->entry);
+#ifdef CONFIG_CODETAG_TIME_STATS
+	wq_entry->start_time	= ktime_get_ns();
+#endif
 }
 
 #define init_waitqueue_func_entry(_wq_entry, _func)			\
@@ -1180,11 +1194,17 @@ do {										\
 void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
-void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+void __finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
 int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 
+#define finish_wait(_wq_head, _wq_entry)					\
+do {										\
+	codetag_time_stats_finish((_wq_entry)->start_time);			\
+	__finish_wait(_wq_head, _wq_entry);					\
+} while (0)
+
 typedef int (*task_call_f)(struct task_struct *p, void *arg);
 extern int task_call_func(struct task_struct *p, task_call_f func, void *arg);
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index b992234607..e88de3f0c3 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -367,7 +367,7 @@ int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 EXPORT_SYMBOL(do_wait_intr_irq);
 
 /**
- * finish_wait - clean up after waiting in a queue
+ * __finish_wait - clean up after waiting in a queue
  * @wq_head: waitqueue waited on
  * @wq_entry: wait descriptor
  *
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(do_wait_intr_irq);
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+void __finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
@@ -399,7 +399,7 @@ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_en
 		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
-EXPORT_SYMBOL(finish_wait);
+EXPORT_SYMBOL(__finish_wait);
 
 int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index bfb49505c9..40584fe54a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1731,6 +1731,14 @@ config LATENCYTOP
 config TIME_STATS
 	bool
 
+config CODETAG_TIME_STATS
+	bool "Code tagging based latency measuring"
+	depends on DEBUG_FS
+	select TIME_STATS
+	select CODE_TAGGING
+	help
+	  Enabling this option makes latency statistics available in debugfs
+
 source "kernel/trace/Kconfig"
 
 config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Makefile b/lib/Makefile
index e09255c881..7888bc8baa 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -233,6 +233,7 @@ obj-$(CONFIG_PAGE_ALLOC_TAGGING) += pgalloc_tag.o
 
 obj-$(CONFIG_CODETAG_FAULT_INJECTION) += dynamic_fault.o
 obj-$(CONFIG_TIME_STATS) += time_stats.o
+obj-$(CONFIG_CODETAG_TIME_STATS) += codetag_time_stats.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/codetag_time_stats.c b/lib/codetag_time_stats.c
new file mode 100644
index 0000000000..67f248906d
--- /dev/null
+++ b/lib/codetag_time_stats.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/codetag_time_stats.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_buf.h>
+
+static struct codetag_type *cttype;
+
+struct user_buf {
+	char __user		*buf;	/* destination user buffer */
+	size_t			size;	/* size of requested read */
+	ssize_t			ret;	/* bytes read so far */
+};
+
+static int flush_ubuf(struct user_buf *dst, struct seq_buf *src)
+{
+	if (src->len) {
+		size_t bytes = min_t(size_t, src->len, dst->size);
+		int err = copy_to_user(dst->buf, src->buffer, bytes);
+
+		if (err)
+			return err;
+
+		dst->ret	+= bytes;
+		dst->buf	+= bytes;
+		dst->size	-= bytes;
+		src->len	-= bytes;
+		memmove(src->buffer, src->buffer + bytes, src->len);
+	}
+
+	return 0;
+}
+
+struct time_stats_iter {
+	struct codetag_iterator ct_iter;
+	struct seq_buf		buf;
+	char			rawbuf[4096];
+	bool			first;
+};
+
+static int time_stats_open(struct inode *inode, struct file *file)
+{
+	struct time_stats_iter *iter;
+
+	pr_debug("called");
+
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+
+	codetag_lock_module_list(cttype, true);
+	iter->ct_iter = codetag_get_ct_iter(cttype);
+	codetag_lock_module_list(cttype, false);
+
+	file->private_data = iter;
+	seq_buf_init(&iter->buf, iter->rawbuf, sizeof(iter->rawbuf));
+	iter->first = true;
+	return 0;
+}
+
+static int time_stats_release(struct inode *inode, struct file *file)
+{
+	struct time_stats_iter *i = file->private_data;
+
+	kfree(i);
+	return 0;
+}
+
+static ssize_t time_stats_read(struct file *file, char __user *ubuf,
+			       size_t size, loff_t *ppos)
+{
+	struct time_stats_iter *iter = file->private_data;
+	struct user_buf	buf = { .buf = ubuf, .size = size };
+	struct codetag_time_stats *s;
+	struct codetag *ct;
+	int err;
+
+	codetag_lock_module_list(iter->ct_iter.cttype, true);
+	while (1) {
+		err = flush_ubuf(&buf, &iter->buf);
+		if (err || !buf.size)
+			break;
+
+		ct = codetag_next_ct(&iter->ct_iter);
+		if (!ct)
+			break;
+
+		s = container_of(ct, struct codetag_time_stats, tag);
+		if (s->stats.count) {
+			if (!iter->first) {
+				seq_buf_putc(&iter->buf, '\n');
+				iter->first = true;
+			}
+
+			codetag_to_text(&iter->buf, &s->tag);
+			seq_buf_putc(&iter->buf, '\n');
+			time_stats_to_text(&iter->buf, &s->stats);
+		}
+	}
+	codetag_lock_module_list(iter->ct_iter.cttype, false);
+
+	return err ?: buf.ret;
+}
+
+static const struct file_operations time_stats_ops = {
+	.owner	= THIS_MODULE,
+	.open	= time_stats_open,
+	.release = time_stats_release,
+	.read	= time_stats_read,
+};
+
+static void time_stats_module_unload(struct codetag_type *cttype, struct codetag_module *mod)
+{
+	struct codetag_time_stats *i, *start = (void *) mod->range.start;
+	struct codetag_time_stats *end = (void *) mod->range.stop;
+
+	for (i = start; i != end; i++)
+		time_stats_exit(&i->stats);
+}
+
+static int __init codetag_time_stats_init(void)
+{
+	const struct codetag_type_desc desc = {
+		.section	= "time_stats_tags",
+		.tag_size	= sizeof(struct codetag_time_stats),
+		.module_unload	= time_stats_module_unload,
+	};
+	struct dentry *debugfs_file;
+
+	cttype = codetag_register_type(&desc);
+	if (IS_ERR_OR_NULL(cttype))
+		return PTR_ERR(cttype);
+
+	debugfs_file = debugfs_create_file("time_stats", 0666, NULL, NULL, &time_stats_ops);
+	if (IS_ERR(debugfs_file))
+		return PTR_ERR(debugfs_file);
+
+	return 0;
+}
+module_init(codetag_time_stats_init);
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] lib/time_stats: New library for statistics on events
  2022-08-29 16:53 ` [PATCH 1/3] lib/time_stats: New library for statistics on events Kent Overstreet
@ 2022-08-29 23:34   ` Randy Dunlap
  2022-08-30  0:00     ` Kent Overstreet
  0 siblings, 1 reply; 11+ messages in thread
From: Randy Dunlap @ 2022-08-29 23:34 UTC (permalink / raw)
  To: Kent Overstreet, linux-kernel, linux-bcache, colyli



On 8/29/22 09:53, Kent Overstreet wrote:
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index bbe3ef939c..bfb49505c9 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -1728,6 +1728,9 @@ config LATENCYTOP
>  	  Enable this option if you want to use the LatencyTOP tool
>  	  to find out which userspace is blocking on what kernel operations.
>  
> +config TIME_STATS
> +	bool
> +

Hi Kent,

Why not just in lib/Kconfig?

thanks.
-- 
~Randy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] lib/time_stats: New library for statistics on events
  2022-08-29 23:34   ` Randy Dunlap
@ 2022-08-30  0:00     ` Kent Overstreet
  0 siblings, 0 replies; 11+ messages in thread
From: Kent Overstreet @ 2022-08-30  0:00 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: linux-kernel, linux-bcache, colyli

On Mon, Aug 29, 2022 at 04:34:14PM -0700, Randy Dunlap wrote:
> 
> 
> On 8/29/22 09:53, Kent Overstreet wrote:
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index bbe3ef939c..bfb49505c9 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -1728,6 +1728,9 @@ config LATENCYTOP
> >  	  Enable this option if you want to use the LatencyTOP tool
> >  	  to find out which userspace is blocking on what kernel operations.
> >  
> > +config TIME_STATS
> > +	bool
> > +
> 
> Hi Kent,
> 
> Why not just in lib/Kconfig?

Probably just lazyness, I'll move it there :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] bcache: Convert to lib/time_stats
  2022-08-29 16:53 ` [PATCH 2/3] bcache: Convert to lib/time_stats Kent Overstreet
@ 2022-08-31 16:00   ` Coly Li
  2022-08-31 16:54     ` Kent Overstreet
  0 siblings, 1 reply; 11+ messages in thread
From: Coly Li @ 2022-08-31 16:00 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-kernel, linux-bcache



> 2022年8月30日 00:53,Kent Overstreet <kent.overstreet@linux.dev> 写道:
> 
> This patch converts bcache to the new generic time_stats code
> lib/time_stats.c. The new code is from bcachefs, and has some changes
> from the version in bcache:
> 
> - we now use ktime_get_ns(), not local_clock(). When the code was
>   originally written multi processor systems that lacked synchronized
>   TSCs were still common, and so local_clock() was much cheaper than
>   sched_clock() (though not necessarily fully accurate, due to TSC
>   drift). ktime_get_ns() should be cheap enough on all common hardware
>   now, and more standard/correct.
> 
> - time_stats are now exported in a single file in sysfs, which means we
>   can improve the statistics we keep track of without changing all
>   users. This also means we don't have to manually specify which units
>   (ms, us, ns) a given time_stats should be printed in; that's handled
>   dynamically.
> 
> - There's a lazily-allocated percpu buffer, which now needs to be freed
>   with time_stats_exit().
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Cc: Coly Li <colyli@suse.de>

Hi Kent,

Overall I am OK with the change to bcache code. You may add
	Acked-by: Coly Li <colyli@suse.de>
in future version of this patch.

In bcache-tools, they don’t read the changed sysfs files (including bcache-status), IMHO changing the output format won’t be problem for upstream.

My only question is, how to understand the time_stats_to_text() output format,
count:          3
rate:           0/sec
frequency:      4 sec
avg duration:   4 sec
max duration:   4 sec
quantiles (ns): 0 4288669120 4288669120 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048

Fro the above output, what are “rate”, “frequence” and “quantiles” for?

Thanks.

Coly Li

> ---
> drivers/md/bcache/Kconfig  |  1 +
> drivers/md/bcache/bcache.h |  1 +
> drivers/md/bcache/bset.c   |  8 +++---
> drivers/md/bcache/bset.h   |  1 +
> drivers/md/bcache/btree.c  | 12 ++++----
> drivers/md/bcache/super.c  |  3 ++
> drivers/md/bcache/sysfs.c  | 43 ++++++++++++++++++++--------
> drivers/md/bcache/util.c   | 30 --------------------
> drivers/md/bcache/util.h   | 57 --------------------------------------
> 9 files changed, 47 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
> index 529c9d04e9..8d165052e5 100644
> --- a/drivers/md/bcache/Kconfig
> +++ b/drivers/md/bcache/Kconfig
> @@ -4,6 +4,7 @@ config BCACHE
> 	tristate "Block device as cache"
> 	select BLOCK_HOLDER_DEPRECATED if SYSFS
> 	select CRC64
> +	select TIME_STATS
> 	help
> 	Allows a block device to be used as cache for other devices; uses
> 	a btree for indexing and the layout is optimized for SSDs.
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 2acda9cea0..5100010a38 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -185,6 +185,7 @@
> #include <linux/rbtree.h>
> #include <linux/rwsem.h>
> #include <linux/refcount.h>
> +#include <linux/time_stats.h>
> #include <linux/types.h>
> #include <linux/workqueue.h>
> #include <linux/kthread.h>
> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
> index 94d38e8a59..727e9b7aea 100644
> --- a/drivers/md/bcache/bset.c
> +++ b/drivers/md/bcache/bset.c
> @@ -1251,7 +1251,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
> 		order = state->page_order;
> 	}
> 
> -	start_time = local_clock();
> +	start_time = ktime_get_ns();
> 
> 	btree_mergesort(b, out, iter, fixup, false);
> 	b->nsets = start;
> @@ -1286,7 +1286,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
> 	bch_bset_build_written_tree(b);
> 
> 	if (!start)
> -		bch_time_stats_update(&state->time, start_time);
> +		time_stats_update(&state->time, start_time);
> }
> 
> void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
> @@ -1322,14 +1322,14 @@ void bch_btree_sort_and_fix_extents(struct btree_keys *b,
> void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
> 			 struct bset_sort_state *state)
> {
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 	struct btree_iter iter;
> 
> 	bch_btree_iter_init(b, &iter, NULL);
> 
> 	btree_mergesort(b, new->set->data, &iter, false, true);
> 
> -	bch_time_stats_update(&state->time, start_time);
> +	time_stats_update(&state->time, start_time);
> 
> 	new->set->size = 0; // XXX: why?
> }
> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
> index d795c84246..13e524ad77 100644
> --- a/drivers/md/bcache/bset.h
> +++ b/drivers/md/bcache/bset.h
> @@ -3,6 +3,7 @@
> #define _BCACHE_BSET_H
> 
> #include <linux/kernel.h>
> +#include <linux/time_stats.h>
> #include <linux/types.h>
> 
> #include "bcache_ondisk.h"
> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
> index 147c493a98..abf543bc75 100644
> --- a/drivers/md/bcache/btree.c
> +++ b/drivers/md/bcache/btree.c
> @@ -242,7 +242,7 @@ static void btree_node_read_endio(struct bio *bio)
> 
> static void bch_btree_node_read(struct btree *b)
> {
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 	struct closure cl;
> 	struct bio *bio;
> 
> @@ -270,7 +270,7 @@ static void bch_btree_node_read(struct btree *b)
> 		goto err;
> 
> 	bch_btree_node_read_done(b);
> -	bch_time_stats_update(&b->c->btree_read_time, start_time);
> +	time_stats_update(&b->c->btree_read_time, start_time);
> 
> 	return;
> err:
> @@ -1789,7 +1789,7 @@ static void bch_btree_gc(struct cache_set *c)
> 	struct gc_stat stats;
> 	struct closure writes;
> 	struct btree_op op;
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 
> 	trace_bcache_gc_start(c);
> 
> @@ -1815,7 +1815,7 @@ static void bch_btree_gc(struct cache_set *c)
> 	bch_btree_gc_finish(c);
> 	wake_up_allocators(c);
> 
> -	bch_time_stats_update(&c->btree_gc_time, start_time);
> +	time_stats_update(&c->btree_gc_time, start_time);
> 
> 	stats.key_bytes *= sizeof(uint64_t);
> 	stats.data	<<= 9;
> @@ -2191,7 +2191,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
> {
> 	bool split;
> 	struct btree *n1, *n2 = NULL, *n3 = NULL;
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 	struct closure cl;
> 	struct keylist parent_keys;
> 
> @@ -2297,7 +2297,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
> 	btree_node_free(b);
> 	rw_unlock(true, n1);
> 
> -	bch_time_stats_update(&b->c->btree_split_time, start_time);
> +	time_stats_update(&b->c->btree_split_time, start_time);
> 
> 	return 0;
> err_free2:
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index ba3909bb6b..26c8fa93b5 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1691,6 +1691,9 @@ static void cache_set_free(struct closure *cl)
> 		kobject_put(&ca->kobj);
> 	}
> 
> +	time_stats_exit(&c->btree_gc_time);
> +	time_stats_exit(&c->btree_split_time);
> +	time_stats_exit(&c->sort.time);
> 
> 	if (c->moving_gc_wq)
> 		destroy_workqueue(c->moving_gc_wq);
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index c6f6770592..01eec5877c 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -16,6 +16,7 @@
> #include <linux/blkdev.h>
> #include <linux/sort.h>
> #include <linux/sched/clock.h>
> +#include <linux/seq_buf.h>
> 
> extern bool bcache_is_reboot;
> 
> @@ -79,10 +80,10 @@ read_attribute(active_journal_entries);
> read_attribute(backing_dev_name);
> read_attribute(backing_dev_uuid);
> 
> -sysfs_time_stats_attribute(btree_gc,	sec, ms);
> -sysfs_time_stats_attribute(btree_split, sec, us);
> -sysfs_time_stats_attribute(btree_sort,	ms,  us);
> -sysfs_time_stats_attribute(btree_read,	ms,  us);
> +read_attribute(btree_gc_time);
> +read_attribute(btree_split_time);
> +read_attribute(btree_sort_time);
> +read_attribute(btree_read_time);
> 
> read_attribute(btree_nodes);
> read_attribute(btree_used_percent);
> @@ -731,6 +732,9 @@ static unsigned int bch_average_key_size(struct cache_set *c)
> SHOW(__bch_cache_set)
> {
> 	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
> +	struct seq_buf s;
> +
> +	seq_buf_init(&s, buf, PAGE_SIZE);
> 
> 	sysfs_print(synchronous,		CACHE_SYNC(&c->cache->sb));
> 	sysfs_print(journal_delay_ms,		c->journal_delay_ms);
> @@ -743,10 +747,25 @@ SHOW(__bch_cache_set)
> 	sysfs_print(btree_cache_max_chain,	bch_cache_max_chain(c));
> 	sysfs_print(cache_available_percent,	100 - c->gc_stats.in_use);
> 
> -	sysfs_print_time_stats(&c->btree_gc_time,	btree_gc, sec, ms);
> -	sysfs_print_time_stats(&c->btree_split_time,	btree_split, sec, us);
> -	sysfs_print_time_stats(&c->sort.time,		btree_sort, ms, us);
> -	sysfs_print_time_stats(&c->btree_read_time,	btree_read, ms, us);
> +	if (attr == &sysfs_btree_gc_time) {
> +		time_stats_to_text(&s, &c->btree_gc_time);
> +		return s.len;
> +	}
> +
> +	if (attr == &sysfs_btree_split_time) {
> +		time_stats_to_text(&s, &c->btree_split_time);
> +		return s.len;
> +	}
> +
> +	if (attr == &sysfs_btree_sort_time) {
> +		time_stats_to_text(&s, &c->sort.time);
> +		return s.len;
> +	}
> +
> +	if (attr == &sysfs_btree_read_time) {
> +		time_stats_to_text(&s, &c->btree_read_time);
> +		return s.len;
> +	}
> 
> 	sysfs_print(btree_used_percent,	bch_btree_used(c));
> 	sysfs_print(btree_nodes,	c->gc_stats.nodes);
> @@ -988,10 +1007,10 @@ KTYPE(bch_cache_set);
> static struct attribute *bch_cache_set_internal_attrs[] = {
> 	&sysfs_active_journal_entries,
> 
> -	sysfs_time_stats_attribute_list(btree_gc, sec, ms)
> -	sysfs_time_stats_attribute_list(btree_split, sec, us)
> -	sysfs_time_stats_attribute_list(btree_sort, ms, us)
> -	sysfs_time_stats_attribute_list(btree_read, ms, us)
> +	&sysfs_btree_gc_time,
> +	&sysfs_btree_split_time,
> +	&sysfs_btree_sort_time,
> +	&sysfs_btree_read_time,
> 
> 	&sysfs_btree_nodes,
> 	&sysfs_btree_used_percent,
> diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
> index ae380bc399..95282bf0f9 100644
> --- a/drivers/md/bcache/util.c
> +++ b/drivers/md/bcache/util.c
> @@ -160,36 +160,6 @@ int bch_parse_uuid(const char *s, char *uuid)
> 	return i;
> }
> 
> -void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
> -{
> -	uint64_t now, duration, last;
> -
> -	spin_lock(&stats->lock);
> -
> -	now		= local_clock();
> -	duration	= time_after64(now, start_time)
> -		? now - start_time : 0;
> -	last		= time_after64(now, stats->last)
> -		? now - stats->last : 0;
> -
> -	stats->max_duration = max(stats->max_duration, duration);
> -
> -	if (stats->last) {
> -		ewma_add(stats->average_duration, duration, 8, 8);
> -
> -		if (stats->average_frequency)
> -			ewma_add(stats->average_frequency, last, 8, 8);
> -		else
> -			stats->average_frequency  = last << 8;
> -	} else {
> -		stats->average_duration  = duration << 8;
> -	}
> -
> -	stats->last = now ?: 1;
> -
> -	spin_unlock(&stats->lock);
> -}
> -
> /**
>  * bch_next_delay() - update ratelimiting statistics and calculate next delay
>  * @d: the struct bch_ratelimit to update
> diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
> index 6f3cb7c921..1e1bdbae95 100644
> --- a/drivers/md/bcache/util.h
> +++ b/drivers/md/bcache/util.h
> @@ -345,68 +345,11 @@ ssize_t bch_hprint(char *buf, int64_t v);
> bool bch_is_zero(const char *p, size_t n);
> int bch_parse_uuid(const char *s, char *uuid);
> 
> -struct time_stats {
> -	spinlock_t	lock;
> -	/*
> -	 * all fields are in nanoseconds, averages are ewmas stored left shifted
> -	 * by 8
> -	 */
> -	uint64_t	max_duration;
> -	uint64_t	average_duration;
> -	uint64_t	average_frequency;
> -	uint64_t	last;
> -};
> -
> -void bch_time_stats_update(struct time_stats *stats, uint64_t time);
> -
> static inline unsigned int local_clock_us(void)
> {
> 	return local_clock() >> 10;
> }
> 
> -#define NSEC_PER_ns			1L
> -#define NSEC_PER_us			NSEC_PER_USEC
> -#define NSEC_PER_ms			NSEC_PER_MSEC
> -#define NSEC_PER_sec			NSEC_PER_SEC
> -
> -#define __print_time_stat(stats, name, stat, units)			\
> -	sysfs_print(name ## _ ## stat ## _ ## units,			\
> -		    div_u64((stats)->stat >> 8, NSEC_PER_ ## units))
> -
> -#define sysfs_print_time_stats(stats, name,				\
> -			       frequency_units,				\
> -			       duration_units)				\
> -do {									\
> -	__print_time_stat(stats, name,					\
> -			  average_frequency,	frequency_units);	\
> -	__print_time_stat(stats, name,					\
> -			  average_duration,	duration_units);	\
> -	sysfs_print(name ## _ ##max_duration ## _ ## duration_units,	\
> -			div_u64((stats)->max_duration,			\
> -				NSEC_PER_ ## duration_units));		\
> -									\
> -	sysfs_print(name ## _last_ ## frequency_units, (stats)->last	\
> -		    ? div_s64(local_clock() - (stats)->last,		\
> -			      NSEC_PER_ ## frequency_units)		\
> -		    : -1LL);						\
> -} while (0)
> -
> -#define sysfs_time_stats_attribute(name,				\
> -				   frequency_units,			\
> -				   duration_units)			\
> -read_attribute(name ## _average_frequency_ ## frequency_units);		\
> -read_attribute(name ## _average_duration_ ## duration_units);		\
> -read_attribute(name ## _max_duration_ ## duration_units);		\
> -read_attribute(name ## _last_ ## frequency_units)
> -
> -#define sysfs_time_stats_attribute_list(name,				\
> -					frequency_units,		\
> -					duration_units)			\
> -&sysfs_ ## name ## _average_frequency_ ## frequency_units,		\
> -&sysfs_ ## name ## _average_duration_ ## duration_units,		\
> -&sysfs_ ## name ## _max_duration_ ## duration_units,			\
> -&sysfs_ ## name ## _last_ ## frequency_units,
> -
> #define ewma_add(ewma, val, weight, factor)				\
> ({									\
> 	(ewma) *= (weight) - 1;						\
> -- 
> 2.36.1
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] bcache: Convert to lib/time_stats
  2022-08-31 16:00   ` Coly Li
@ 2022-08-31 16:54     ` Kent Overstreet
  2022-08-31 17:02       ` Coly Li
  0 siblings, 1 reply; 11+ messages in thread
From: Kent Overstreet @ 2022-08-31 16:54 UTC (permalink / raw)
  To: Coly Li; +Cc: linux-kernel, linux-bcache

On Thu, Sep 01, 2022 at 12:00:17AM +0800, Coly Li wrote:
> 
> 
> > 2022年8月30日 00:53,Kent Overstreet <kent.overstreet@linux.dev> 写道:
> > 
> > This patch converts bcache to the new generic time_stats code
> > lib/time_stats.c. The new code is from bcachefs, and has some changes
> > from the version in bcache:
> > 
> > - we now use ktime_get_ns(), not local_clock(). When the code was
> >   originally written multi processor systems that lacked synchronized
> >   TSCs were still common, and so local_clock() was much cheaper than
> >   sched_clock() (though not necessarily fully accurate, due to TSC
> >   drift). ktime_get_ns() should be cheap enough on all common hardware
> >   now, and more standard/correct.
> > 
> > - time_stats are now exported in a single file in sysfs, which means we
> >   can improve the statistics we keep track of without changing all
> >   users. This also means we don't have to manually specify which units
> >   (ms, us, ns) a given time_stats should be printed in; that's handled
> >   dynamically.
> > 
> > - There's a lazily-allocated percpu buffer, which now needs to be freed
> >   with time_stats_exit().
> > 
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > Cc: Coly Li <colyli@suse.de>
> 
> Hi Kent,
> 
> Overall I am OK with the change to bcache code. You may add
> 	Acked-by: Coly Li <colyli@suse.de>
> in future version of this patch.
> 
> In bcache-tools, they don’t read the changed sysfs files (including bcache-status), IMHO changing the output format won’t be problem for upstream.
> 
> My only question is, how to understand the time_stats_to_text() output format,
> count:          3
> rate:           0/sec
> frequency:      4 sec
> avg duration:   4 sec
> max duration:   4 sec
> quantiles (ns): 0 4288669120 4288669120 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048
> 
> Fro the above output, what are “rate”, “frequence” and “quantiles” for?

Rate and frequency are inverses - in this example, we're seeing 4 events per
second.

The quantiles are for the duration, they give you an idea of the statistical
distribution, see https://en.wikipedia.org/wiki/Quantile

In the near future, the quantiles will be removed and replaced with standard
deviation, true and weighted - the quantiles algorithm isn't super accurate, we
can give more accurate numbers with standard deviation.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] bcache: Convert to lib/time_stats
  2022-08-31 16:54     ` Kent Overstreet
@ 2022-08-31 17:02       ` Coly Li
  2022-08-31 17:36         ` Kent Overstreet
  0 siblings, 1 reply; 11+ messages in thread
From: Coly Li @ 2022-08-31 17:02 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-kernel, linux-bcache



> 2022年9月1日 00:54,Kent Overstreet <kent.overstreet@linux.dev> 写道:
> 
> On Thu, Sep 01, 2022 at 12:00:17AM +0800, Coly Li wrote:
>> 
>> 
>>> 2022年8月30日 00:53,Kent Overstreet <kent.overstreet@linux.dev> 写道:
>>> 
>>> This patch converts bcache to the new generic time_stats code
>>> lib/time_stats.c. The new code is from bcachefs, and has some changes
>>> from the version in bcache:
>>> 
>>> - we now use ktime_get_ns(), not local_clock(). When the code was
>>>  originally written multi processor systems that lacked synchronized
>>>  TSCs were still common, and so local_clock() was much cheaper than
>>>  sched_clock() (though not necessarily fully accurate, due to TSC
>>>  drift). ktime_get_ns() should be cheap enough on all common hardware
>>>  now, and more standard/correct.
>>> 
>>> - time_stats are now exported in a single file in sysfs, which means we
>>>  can improve the statistics we keep track of without changing all
>>>  users. This also means we don't have to manually specify which units
>>>  (ms, us, ns) a given time_stats should be printed in; that's handled
>>>  dynamically.
>>> 
>>> - There's a lazily-allocated percpu buffer, which now needs to be freed
>>>  with time_stats_exit().
>>> 
>>> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
>>> Cc: Coly Li <colyli@suse.de>
>> 
>> Hi Kent,
>> 
>> Overall I am OK with the change to bcache code. You may add
>> 	Acked-by: Coly Li <colyli@suse.de>
>> in future version of this patch.
>> 
>> In bcache-tools, they don’t read the changed sysfs files (including bcache-status), IMHO changing the output format won’t be problem for upstream.
>> 
>> My only question is, how to understand the time_stats_to_text() output format,
>> count:          3
>> rate:           0/sec
>> frequency:      4 sec
>> avg duration:   4 sec
>> max duration:   4 sec
>> quantiles (ns): 0 4288669120 4288669120 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048
>> 
>> Fro the above output, what are “rate”, “frequence” and “quantiles” for?
> 
> Rate and frequency are inverses - in this example, we're seeing 4 events per
> second.
> 

Can I understand that rate = 1/frequency ?  Then frequency 4 is around to rate 0.

> The quantiles are for the duration, they give you an idea of the statistical
> distribution, see https://en.wikipedia.org/wiki/Quantile

I wanted to ask how to read the quantiles line. Does it mean that 1 ns is equally divided by 15 segments, and the counter values are for the divided 1/15 ns segments?

> 
> In the near future, the quantiles will be removed and replaced with standard
> deviation, true and weighted - the quantiles algorithm isn't super accurate, we
> can give more accurate numbers with standard deviation.

Quantiles are much humane, standard deviation is scared… Just FYI LOL.

Thanks.

Coly Li


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] bcache: Convert to lib/time_stats
  2022-08-31 17:02       ` Coly Li
@ 2022-08-31 17:36         ` Kent Overstreet
  2022-09-01  2:19           ` Coly Li
  0 siblings, 1 reply; 11+ messages in thread
From: Kent Overstreet @ 2022-08-31 17:36 UTC (permalink / raw)
  To: Coly Li; +Cc: linux-kernel, linux-bcache

On Thu, Sep 01, 2022 at 01:02:37AM +0800, Coly Li wrote:
> Can I understand that rate = 1/frequency ?  Then frequency 4 is around to rate 0.

Correct, rate = 1/frequency.

> > The quantiles are for the duration, they give you an idea of the statistical
> > distribution, see https://en.wikipedia.org/wiki/Quantile
> 
> I wanted to ask how to read the quantiles line. Does it mean that 1 ns is
> equally divided by 15 segments, and the counter values are for the divided
> 1/15 ns segments?

It is the 15 boundaries between 16 equal segments, yes.

> Quantiles are much humane, standard deviation is scared… Just FYI LOL.

That's valuable feedback then, maybe we'll keep both. The more expensive part of
time stats is just ingesting data points, but we've got a percpu buffer frontend
for that - just doing a bit more moth doesn't cost much.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] bcache: Convert to lib/time_stats
  2022-08-31 17:36         ` Kent Overstreet
@ 2022-09-01  2:19           ` Coly Li
  0 siblings, 0 replies; 11+ messages in thread
From: Coly Li @ 2022-09-01  2:19 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-kernel, linux-bcache



> 2022年9月1日 01:36,Kent Overstreet <kent.overstreet@linux.dev> 写道:
> 
> On Thu, Sep 01, 2022 at 01:02:37AM +0800, Coly Li wrote:
>> Can I understand that rate = 1/frequency ?  Then frequency 4 is around to rate 0.
> 
> Correct, rate = 1/frequency.
> 
>>> The quantiles are for the duration, they give you an idea of the statistical
>>> distribution, see https://en.wikipedia.org/wiki/Quantile
>> 
>> I wanted to ask how to read the quantiles line. Does it mean that 1 ns is
>> equally divided by 15 segments, and the counter values are for the divided
>> 1/15 ns segments?
> 
> It is the 15 boundaries between 16 equal segments, yes.
> 
>> Quantiles are much humane, standard deviation is scared… Just FYI LOL.
> 
> That's valuable feedback then, maybe we'll keep both. The more expensive part of
> time stats is just ingesting data points, but we've got a percpu buffer frontend
> for that - just doing a bit more moth doesn't cost much.

I don’t have more question so far. Thanks for the detailed replay.

Coly Li

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-09-01  2:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-29 16:53 Time stats library (from bcache/bcachefs) & generic latency instrumentation Kent Overstreet
2022-08-29 16:53 ` [PATCH 1/3] lib/time_stats: New library for statistics on events Kent Overstreet
2022-08-29 23:34   ` Randy Dunlap
2022-08-30  0:00     ` Kent Overstreet
2022-08-29 16:53 ` [PATCH 2/3] bcache: Convert to lib/time_stats Kent Overstreet
2022-08-31 16:00   ` Coly Li
2022-08-31 16:54     ` Kent Overstreet
2022-08-31 17:02       ` Coly Li
2022-08-31 17:36         ` Kent Overstreet
2022-09-01  2:19           ` Coly Li
2022-08-29 16:53 ` [PATCH 3/3] Code tagging based latency tracking Kent Overstreet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.