linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/7] histogram: add struct histogram
@ 2020-05-28 23:52 Axel Rasmussen
  0 siblings, 0 replies; only message in thread
From: Axel Rasmussen @ 2020-05-28 23:52 UTC (permalink / raw)
  To: Andrew Morton, David Rientjes, Davidlohr Bueso, Ingo Molnar,
	Ingo Molnar, Jerome Glisse, Laurent Dufour, Liam R . Howlett,
	Matthew Wilcox, Michel Lespinasse, Peter Zijlstra,
	Vlastimil Babka, Will Deacon
  Cc: linux-fsdevel, linux-kernel, linux-mm, AKASHI Takahiro,
	Aleksa Sarai, Alexander Potapenko, Alexey Dobriyan, Al Viro,
	Andrei Vagin, Ard Biesheuvel, Brendan Higgins, chenqiwu,
	Christian Brauner, Christian Kellner, Corentin Labbe,
	Daniel Jordan, Dan Williams, David Gow, David S. Miller,
	Dmitry V. Levin, Eric W. Biederman, Eugene Syromiatnikov,
	Jamie Liu, Jason Gunthorpe, John Garry, John Hubbard,
	Jonathan Adams, Junaid Shahid, Kees Cook, Kirill A. Shutemov,
	Konstantin Khlebnikov, Krzysztof Kozlowski, Mark Rutland,
	Masahiro Yamada, Masami Hiramatsu, Mathieu Desnoyers,
	Michal Hocko, Mikhail Zaslonko, Petr Mladek, Ralph Campbell,
	Randy Dunlap, Roman Gushchin, Shakeel Butt, Steven Rostedt,
	Tal Gilboa, Thomas Gleixner, Uwe Kleine-König,
	Vincenzo Frascino, Yang Shi, Yu Zhao, Axel Rasmussen

struct histogram provides a histogram that counts u64 samples in
arbitrary user-configurable buckets, optimized for efficient concurrent
recording.

This is a squashed, refactored, and modified version of a
previously-internal implementation. Thanks to the following individuals
for portions of the implementation:

  Jamie Liu <jamieliu@google.com> - Original implementation
  Yu Zhao <yuzhao@google.com> - Code cleanups + simplification

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
---
 include/linux/histogram.h | 270 ++++++++++++++++++++++++++++++++++++++
 lib/Kconfig               |   3 +
 lib/Makefile              |   2 +
 lib/histogram.c           | 157 ++++++++++++++++++++++
 4 files changed, 432 insertions(+)
 create mode 100644 include/linux/histogram.h
 create mode 100644 lib/histogram.c

diff --git a/include/linux/histogram.h b/include/linux/histogram.h
new file mode 100644
index 000000000000..137930ca933f
--- /dev/null
+++ b/include/linux/histogram.h
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HISTOGRAM_H
+#define _LINUX_HISTOGRAM_H
+
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+/*
+ * Histograms for counting events represented by u64s.
+ *
+ * Each histogram may have any number of thresholds. A histogram with
+ * nr_thresholds thresholds has nr_thresholds buckets, where bucket[i] counts
+ * samples in the half-open interval ( thresholds[i-1], thresholds[i] ].
+ * (thresholds[-1] is implicitly defined as 0, and thresholds[nr_thresholds-1]
+ * must be 2**64-1.) Each histogram's thresholds are immutable after creation.
+ * Each bucket counts up to a u64 worth of events.
+ *
+ * Example usage:
+ *
+ * u64 thresholds[] = {1, 2, 5, 10, 20, 50, 100, ~0};
+ * u64 buckets[ARRAY_SIZE(thresholds)];
+ * struct histogram *hist = histogram_alloc(thresholds, ARRAY_SIZE(thresholds));
+ * if (IS_ERR(hist)) {...}
+ * histogram_record(hist, 0, 1);
+ * histogram_record(hist, 8, 2);
+ * histogram_record(hist, 20, 3);
+ * histogram_record(hist, 110, 4);
+ * histogram_read_buckets(hist, buckets);
+ * // buckets == {1, 0, 0, 2, 3, 0, 0, 4}
+ * histogram_free(hist);
+ *
+ * For convenience, a struct histogram_rcu type is also provided which wraps an
+ * RCU-sched protected pointer to a struct histogram, allowing thresholds to be
+ * modified even while other threads may be recording to the histogram.
+ * Functions that operate on a histogram_rcu are distinguished by an _rcu
+ * suffix.
+ *
+ * Example usage:
+ *
+ * u64 thresholds_1[] = {1, 2, 5, 10, 20, 50, 100, ~0};
+ * u64 thresholds_2[] = {100, 200, 500, 1000, ~0};
+ * u64 buckets[ARRAY_SIZE(thresholds_1)];
+ * struct histogram_rcu hrcu;
+ * if (histogram_init_rcu(&hrcu, thresholds_1, ARRAY_SIZE(thresholds_1))) {...}
+ * histogram_record_rcu(&hrcu, 0, 1);
+ * histogram_record_rcu(&hrcu, 8, 2);
+ * histogram_record_rcu(&hrcu, 20, 3);
+ * histogram_record_rcu(&hrcu, 110, 4);
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ *			  ARRAY_SIZE(thresholds_1)) < 0) {...}
+ * // buckets == {1, 0, 0, 2, 3, 0, 0, 4}
+ * if (histogram_set_thresholds_rcu(&hrcu, thresholds_2,
+ *				    ARRAY_SIZE(thresholds_2)) {...}
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ *			  ARRAY_SIZE(thresholds_2)) < 0) {...}
+ * // buckets == {0, 0, 0, 0, 0}
+ * histogram_record_rcu(&hrcu, 50, 1);
+ * histogram_record_rcu(&hrcu, 150, 2);
+ * histogram_record_rcu(&hrcu, 5000, 3);
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ *			  ARRAY_SIZE(thresholds_2)) < 0) {...}
+ * // buckets == {1, 2, 0, 0, 3}
+ * histogram_destroy_rcu(&hrcu);
+ */
+
+struct histogram {
+	struct rcu_head rcu;
+	u64 __percpu *buckets;
+	size_t nr_thresholds;
+	u64 thresholds[0]; /* flexible array member */
+};
+
+struct histogram_rcu {
+	struct histogram __rcu *hist;
+};
+
+/**
+ * histogram_record() - record samples in histogram
+ * @hist: histogram
+ * @val: sample value
+ * @count: number of samples
+ *
+ * histogram_record() does not require synchronization with concurrent
+ * histogram_record() or histogram_read_buckets().
+ *
+ * histogram_record() may be called from tracing code.
+ */
+void histogram_record(struct histogram *hist, u64 val, u64 count);
+
+/**
+ * histogram_read_buckets() - read histogram buckets
+ * @hist: histogram
+ * @buckets: array with space for at least hist->nr_thresholds elements
+ *
+ * Sets each element of @buckets to the value of the corresponding histogram
+ * bucket.
+ *
+ * histogram_read_buckets() does not require synchronization with concurrent
+ * histogram_record() or histogram_read_buckets().
+ *
+ * histogram_read_buckets() does not block recording, so values are not read
+ * from all CPUs atomically. If this is a problem, the caller should stop
+ * recording first.
+ */
+void histogram_read_buckets(const struct histogram *hist, u64 *buckets);
+
+/**
+ * histogram_alloc() - create a histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds
+ *
+ * Histogram buckets are initialized to zero. @thresholds must be sorted in
+ * ascending order and must not contain any duplicates. @nr_thresholds must be
+ * >= 1. thresholds[nr_thresholds-1] must be ~(u64)0.
+ *
+ * histogram_alloc() makes a copy of @thresholds if successful, so ownership of
+ * @thresholds is unaffected by histogram_alloc().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Pointer to new histogram, or a PTR_ERR on failure.
+ */
+struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_free() - delete a histogram
+ * @hist: histogram
+ */
+void histogram_free(struct histogram *hist);
+
+/**
+ * histogram_record_rcu() - record samples in RCU-protected histogram
+ * @hrcu: histogram
+ * @val: sample value
+ * @count: number of samples
+ *
+ * histogram_record_rcu() does not require external synchronization, even with
+ * histogram_destroy_rcu(). Calling histogram_record_rcu() on a @hrcu that
+ * histogram_destroy_rcu() has been called on is a no-op.
+ *
+ * histogram_record_rcu() may be called from tracing code.
+ */
+static inline void histogram_record_rcu(struct histogram_rcu *hrcu, u64 val,
+					u64 count)
+{
+	struct histogram *hist;
+
+	rcu_read_lock_sched_notrace();
+	hist = rcu_dereference_sched(hrcu->hist);
+	if (likely(hist))
+		histogram_record(hist, val, count);
+	rcu_read_unlock_sched_notrace();
+}
+
+/**
+ * histogram_read_rcu() - read RCU-protected histogram
+ * @hrcu: histogram
+ * @buckets: array with space for at least nr_thresholds elements
+ * @thresholds: array with space for at least nr_thresholds elements
+ * @nr_thresholds: array size (see above)
+ *
+ * If @buckets is not NULL, sets each element of @buckets to the value of the
+ * corresponding histogram bucket.
+ *
+ * If @thresholds is not NULL, sets each element of @thresholds to the
+ * corresponding histogram bucket threshold.
+ *
+ * On failure (for example, if nr_thresholds < hrcu->hist->nr_thresholds),
+ * neither @buckets nor @thresholds will be modified.
+ *
+ * histogram_read_rcu() does not require external synchronization, even with
+ * histogram_destroy_rcu(). Calling histogram_read_rcu() on a @hrcu that
+ * histogram_destroy_rcu() has been called on returns 0.
+ *
+ * histogram_read_rcu() does not block recording, so bucket values are not read
+ * from all CPUs atomically. If this is a problem, the caller should stop
+ * recording first.
+ *
+ * Returns: If successful, returns the actual number of thresholds stored in
+ * @thresholds; if nr_thresholds is too small, returns the negative of the
+ * minimum required nr_thresholds to succeed; if the histogram has been
+ * destroyed by histogram_destroy_rcu(), returns 0. (Note that if nr_thresholds
+ * is too small, it is not guaranteed that calling histogram_read_rcu() again
+ * with the returned value of nr_thresholds will succeed, because another
+ * thread could raise the number of thresholds again in the interim.)
+ */
+ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets,
+			   u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_set_thresholds_rcu() - set RCU-protected histogram thresholds
+ * @hrcu: histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds
+ *
+ * The semantics that apply to @thresholds are the same as for histogram_alloc.
+ *
+ * If successful, all buckets are atomically reset to zero.
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Zero on success, or a negative error code on failure.
+ */
+int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu,
+				 const u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_init_rcu() - initialize RCU-protected histogram
+ * @hrcu: histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds.
+ *
+ * Each struct histogram_rcu must be initialized by histogram_init_rcu() at
+ * least once before it is valid to call any other functions on it. A struct
+ * histogram_rcu that has been previously initialized cannot be initialized
+ * again, unless it has been subsequently destroyed by histogram_destroy_rcu().
+ *
+ * The semantics that apply to @thresholds are the same as for
+ * histogram_alloc(), with one exception: @thresholds may be NULL iff
+ * @nr_thresholds is 0. In this case, @hrcu will behave as if it has already
+ * been destroyed (histogram_record_rcu() will no-op and histogram_read_rcu()
+ * will return 0).
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Zero on success, or a negative error code on failure.
+ */
+static inline int histogram_init_rcu(struct histogram_rcu *hrcu,
+				     const u64 *thresholds,
+				     size_t nr_thresholds)
+{
+	RCU_INIT_POINTER(hrcu->hist, NULL);
+	if (!thresholds && !nr_thresholds)
+		return 0;
+	return histogram_set_thresholds_rcu(hrcu, thresholds, nr_thresholds);
+}
+
+void histogram_destroy_rcu_cb(struct rcu_head *rcu);
+
+/**
+ * histogram_destroy_rcu() - destroy RCU-protected histogram
+ * @hrcu: histogram
+ *
+ * After histogram_destroy_rcu() has been called on a @hrcu, it is valid to call
+ * histogram_init_rcu() on it again.
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ */
+static inline void histogram_destroy_rcu(struct histogram_rcu *hrcu)
+{
+	struct histogram *hist = rcu_dereference_raw(hrcu->hist);
+
+	RCU_INIT_POINTER(hrcu->hist, NULL);
+	if (likely(hist))
+		call_rcu(&hist->rcu, histogram_destroy_rcu_cb);
+}
+
+#endif /* _LINUX_HISTOGRAM_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5d53f9609c25..4714bdfa343b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -648,6 +648,9 @@ config OBJAGG
 config STRING_SELFTEST
 	tristate "Test string functions"
 
+config HISTOGRAM
+	bool
+
 endmenu
 
 config GENERIC_IOREMAP
diff --git a/lib/Makefile b/lib/Makefile
index 685aee60de1d..f61b1c15d656 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -248,6 +248,8 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o
 
 obj-$(CONFIG_FONT_SUPPORT) += fonts/
 
+obj-$(CONFIG_HISTOGRAM) += histogram.o
+
 hostprogs	:= gen_crc32table
 hostprogs	+= gen_crc64table
 clean-files	:= crc32table.h
diff --git a/lib/histogram.c b/lib/histogram.c
new file mode 100644
index 000000000000..b68334275a46
--- /dev/null
+++ b/lib/histogram.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cache.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/histogram.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+void histogram_record(struct histogram *hist, u64 val, u64 count)
+{
+	size_t i, lower, upper;
+
+	/* Binary search invariant: correct bucket in [lower, upper] */
+	lower = 0;
+	upper = hist->nr_thresholds - 1;
+	while (lower < upper) {
+		/* can't realistically overflow, nr_thresholds < 2**63 */
+		i = (lower + upper) / 2;
+		if (val <= hist->thresholds[i])
+			upper = i;
+		else
+			lower = i + 1;
+	}
+	i = upper;
+	/* _notrace because histogram_record may be called from tracing */
+	preempt_disable_notrace();
+	__this_cpu_add(hist->buckets[i], count);
+	preempt_enable_notrace();
+}
+EXPORT_SYMBOL_GPL(histogram_record);
+
+void histogram_read_buckets(const struct histogram *hist, u64 *buckets)
+{
+	int cpu;
+	size_t nr_buckets;
+	size_t i;
+
+	nr_buckets = hist->nr_thresholds;
+	memset(buckets, 0, nr_buckets * sizeof(u64));
+	/*
+	 * This must use for_each_possible_cpu rather than for_each_online_cpu
+	 * to ensure we count records from any CPUs that have since been
+	 * removed.
+	 */
+	for_each_possible_cpu(cpu) {
+		for (i = 0; i < nr_buckets; i++)
+			buckets[i] += per_cpu(hist->buckets[i], cpu);
+	}
+}
+EXPORT_SYMBOL_GPL(histogram_read_buckets);
+
+static int histogram_check_thresholds(const u64 *thresholds,
+				      size_t nr_thresholds)
+{
+	unsigned int i;
+
+	if (!nr_thresholds)
+		return -EINVAL;
+	if (!thresholds)
+		return -EFAULT;
+	for (i = 1; i < nr_thresholds; i++)
+		if (thresholds[i - 1] >= thresholds[i])
+			return -EINVAL;
+	if (thresholds[nr_thresholds - 1] != ~0ULL)
+		return -EINVAL;
+	return 0;
+}
+
+struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds)
+{
+	struct histogram *hist;
+	size_t hist_size;
+	int ret;
+
+	ret = histogram_check_thresholds(thresholds, nr_thresholds);
+	if (ret)
+		return ERR_PTR(ret);
+	hist_size = sizeof(struct histogram) + nr_thresholds * sizeof(u64);
+	hist = kmalloc(ALIGN(hist_size, cache_line_size()), GFP_KERNEL);
+	if (!hist)
+		return ERR_PTR(-ENOMEM);
+	hist->buckets = __alloc_percpu(nr_thresholds * sizeof(*hist->buckets),
+				       __alignof__(*hist->buckets));
+	if (!hist->buckets) {
+		kfree(hist);
+		return ERR_PTR(-ENOMEM);
+	}
+	hist->nr_thresholds = nr_thresholds;
+	memcpy(hist->thresholds, thresholds, nr_thresholds * sizeof(u64));
+	return hist;
+}
+EXPORT_SYMBOL_GPL(histogram_alloc);
+
+void histogram_free(struct histogram *hist)
+{
+	if (!hist)
+		return;
+	free_percpu(hist->buckets);
+	kfree(hist);
+}
+EXPORT_SYMBOL_GPL(histogram_free);
+
+ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets,
+			   u64 *thresholds, size_t nr_thresholds)
+{
+	const struct histogram *hist;
+	ssize_t ret = 0;
+
+	rcu_read_lock_sched();
+	hist = rcu_dereference_sched(hrcu->hist);
+	if (!hist) {
+		ret = 0;
+		goto out;
+	}
+	if (nr_thresholds < hist->nr_thresholds) {
+		ret = -hist->nr_thresholds;
+		goto out;
+	}
+	if (buckets)
+		histogram_read_buckets(hist, buckets);
+	if (thresholds)
+		memcpy(thresholds, hist->thresholds,
+		       hist->nr_thresholds * sizeof(u64));
+	ret = hist->nr_thresholds;
+out:
+	rcu_read_unlock_sched();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(histogram_read_rcu);
+
+int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu,
+				 const u64 *thresholds, size_t nr_thresholds)
+{
+	struct histogram *old_hist = rcu_dereference_raw(hrcu->hist);
+	struct histogram *new_hist = histogram_alloc(thresholds, nr_thresholds);
+
+	if (IS_ERR(new_hist))
+		return PTR_ERR(new_hist);
+	rcu_assign_pointer(hrcu->hist, new_hist);
+	if (old_hist) {
+		synchronize_rcu();
+		histogram_free(old_hist);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(histogram_set_thresholds_rcu);
+
+void histogram_destroy_rcu_cb(struct rcu_head *rcu)
+{
+	histogram_free(container_of(rcu, struct histogram, rcu));
+}
+EXPORT_SYMBOL_GPL(histogram_destroy_rcu_cb);
-- 
2.27.0.rc0.183.gde8f92d652-goog



^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2020-05-28 23:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-28 23:52 [PATCH v2 1/7] histogram: add struct histogram Axel Rasmussen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).