All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ian Rogers <irogers@google.com>
To: Andi Kleen <ak@linux.intel.com>, Jiri Olsa <jolsa@redhat.com>,
	Jin Yao <yao.jin@linux.intel.com>,
	Namhyung Kim <namhyung@kernel.org>,
	John Garry <john.garry@huawei.com>,
	Kajol Jain <kjain@linux.ibm.com>,
	"Paul A . Clarke" <pc@us.ibm.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Riccardo Mancini <rickyman7@gmail.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Kees Cook <keescook@chromium.org>,
	Sami Tolvanen <samitolvanen@google.com>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Jacob Keller <jacob.e.keller@intel.com>,
	Zhen Lei <thunder.leizhen@huawei.com>,
	ToastC <mrtoastcheng@gmail.com>,
	Joakim Zhang <qiangqing.zhang@nxp.com>,
	Felix Fietkau <nbd@nbd.name>,
	Jiapeng Chong <jiapeng.chong@linux.alibaba.com>,
	Song Liu <songliubraving@fb.com>, Fabian Hemmer <copy@copy.sh>,
	Alexander Antonov <alexander.antonov@linux.intel.com>,
	Nicholas Fraser <nfraser@codeweavers.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Denys Zagorui <dzagorui@cisco.com>,
	Wan Jiabing <wanjiabing@vivo.com>,
	Thomas Richter <tmricht@linux.ibm.com>,
	Sumanth Korikkar <sumanthk@linux.ibm.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	Changbin Du <changbin.du@intel.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Andrew Kilroy <andrew.kilroy@arm.com>
Cc: Stephane Eranian <eranian@google.com>, Ian Rogers <irogers@google.com>
Subject: [PATCH v2 01/21] tools lib: Add list_sort.
Date: Fri, 15 Oct 2021 10:21:12 -0700	[thread overview]
Message-ID: <20211015172132.1162559-2-irogers@google.com> (raw)
In-Reply-To: <20211015172132.1162559-1-irogers@google.com>

Add list_sort.[ch] from the main kernel tree. The linux/bug.h #include
is removed due to conflicting definitions. Add check-headers and modify
perf build accordingly.

MANIFEST and python-ext-sources fixes suggested by Arnaldo.

Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/include/linux/list_sort.h    |  14 ++
 tools/lib/list_sort.c              | 252 +++++++++++++++++++++++++++++
 tools/perf/MANIFEST                |   1 +
 tools/perf/check-headers.sh        |   2 +
 tools/perf/util/Build              |   5 +
 tools/perf/util/python-ext-sources |   1 +
 6 files changed, 275 insertions(+)
 create mode 100644 tools/include/linux/list_sort.h
 create mode 100644 tools/lib/list_sort.c

diff --git a/tools/include/linux/list_sort.h b/tools/include/linux/list_sort.h
new file mode 100644
index 000000000000..453105f74e05
--- /dev/null
+++ b/tools/include/linux/list_sort.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <linux/types.h>
+
+struct list_head;
+
+typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *,
+		const struct list_head *, const struct list_head *);
+
+__attribute__((nonnull(2,3)))
+void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp);
+#endif
diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c
new file mode 100644
index 000000000000..10c067e3a8d2
--- /dev/null
+++ b/tools/lib/list_sort.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/list_sort.h>
+#include <linux/list.h>
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+__attribute__((nonnull(2,3,4)))
+static struct list_head *merge(void *priv, list_cmp_func_t cmp,
+				struct list_head *a, struct list_head *b)
+{
+	struct list_head *head, **tail = &head;
+
+	for (;;) {
+		/* if equal, take 'a' -- important for sort stability */
+		if (cmp(priv, a, b) <= 0) {
+			*tail = a;
+			tail = &a->next;
+			a = a->next;
+			if (!a) {
+				*tail = b;
+				break;
+			}
+		} else {
+			*tail = b;
+			tail = &b->next;
+			b = b->next;
+			if (!b) {
+				*tail = a;
+				break;
+			}
+		}
+	}
+	return head;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure.  This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+__attribute__((nonnull(2,3,4,5)))
+static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head,
+			struct list_head *a, struct list_head *b)
+{
+	struct list_head *tail = head;
+	u8 count = 0;
+
+	for (;;) {
+		/* if equal, take 'a' -- important for sort stability */
+		if (cmp(priv, a, b) <= 0) {
+			tail->next = a;
+			a->prev = tail;
+			tail = a;
+			a = a->next;
+			if (!a)
+				break;
+		} else {
+			tail->next = b;
+			b->prev = tail;
+			tail = b;
+			b = b->next;
+			if (!b) {
+				b = a;
+				break;
+			}
+		}
+	}
+
+	/* Finish linking remainder of list b on to tail */
+	tail->next = b;
+	do {
+		/*
+		 * If the merge is highly unbalanced (e.g. the input is
+		 * already sorted), this loop may run many iterations.
+		 * Continue callbacks to the client even though no
+		 * element comparison is needed, so the client's cmp()
+		 * routine can invoke cond_resched() periodically.
+		 */
+		if (unlikely(!++count))
+			cmp(priv, b, b);
+		b->prev = tail;
+		tail = b;
+		b = b->next;
+	} while (b);
+
+	/* And the final links to make a circular doubly-linked list */
+	tail->next = head;
+	head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * The comparison function @cmp must return > 0 if @a should sort after
+ * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
+ * sort before @b *or* their original order should be preserved.  It is
+ * always called with the element that came first in the input in @a,
+ * and list_sort is a stable sort, so it is not necessary to distinguish
+ * the @a < @b and @a == @b cases.
+ *
+ * This is compatible with two styles of @cmp function:
+ * - The traditional style which returns <0 / =0 / >0, or
+ * - Returning a boolean 0/1.
+ * The latter offers a chance to save a few cycles in the comparison
+ * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c).
+ *
+ * A good way to write a multi-word comparison is::
+ *
+ *	if (a->high != b->high)
+ *		return a->high > b->high;
+ *	if (a->middle != b->middle)
+ *		return a->middle > b->middle;
+ *	return a->low > b->low;
+ *
+ *
+ * This mergesort is as eager as possible while always performing at least
+ * 2:1 balanced merges.  Given two pending sublists of size 2^k, they are
+ * merged to a size-2^(k+1) list as soon as we have 2^k following elements.
+ *
+ * Thus, it will avoid cache thrashing as long as 3*2^k elements can
+ * fit into the cache.  Not quite as good as a fully-eager bottom-up
+ * mergesort, but it does use 0.2*n fewer comparisons, so is faster in
+ * the common case that everything fits into L1.
+ *
+ *
+ * The merging is controlled by "count", the number of elements in the
+ * pending lists.  This is beautifully simple code, but rather subtle.
+ *
+ * Each time we increment "count", we set one bit (bit k) and clear
+ * bits k-1 .. 0.  Each time this happens (except the very first time
+ * for each bit, when count increments to 2^k), we merge two lists of
+ * size 2^k into one list of size 2^(k+1).
+ *
+ * This merge happens exactly when the count reaches an odd multiple of
+ * 2^k, which is when we have 2^k elements pending in smaller lists,
+ * so it's safe to merge away two lists of size 2^k.
+ *
+ * After this happens twice, we have created two lists of size 2^(k+1),
+ * which will be merged into a list of size 2^(k+2) before we create
+ * a third list of size 2^(k+1), so there are never more than two pending.
+ *
+ * The number of pending lists of size 2^k is determined by the
+ * state of bit k of "count" plus two extra pieces of information:
+ *
+ * - The state of bit k-1 (when k == 0, consider bit -1 always set), and
+ * - Whether the higher-order bits are zero or non-zero (i.e.
+ *   is count >= 2^(k+1)).
+ *
+ * There are six states we distinguish.  "x" represents some arbitrary
+ * bits, and "y" represents some arbitrary non-zero bits:
+ * 0:  00x: 0 pending of size 2^k;           x pending of sizes < 2^k
+ * 1:  01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * 2: x10x: 0 pending of size 2^k; 2^k     + x pending of sizes < 2^k
+ * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * 4: y00x: 1 pending of size 2^k; 2^k     + x pending of sizes < 2^k
+ * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * (merge and loop back to state 2)
+ *
+ * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because
+ * bit k-1 is set while the more significant bits are non-zero) and
+ * merge them away in the 5->2 transition.  Note in particular that just
+ * before the 5->2 transition, all lower-order bits are 11 (state 3),
+ * so there is one list of each smaller size.
+ *
+ * When we reach the end of the input, we merge all the pending
+ * lists, from smallest to largest.  If you work through cases 2 to
+ * 5 above, you can see that the number of elements we merge with a list
+ * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to
+ * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1).
+ */
+__attribute__((nonnull(2,3)))
+void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp)
+{
+	struct list_head *list = head->next, *pending = NULL;
+	size_t count = 0;	/* Count of pending */
+
+	if (list == head->prev)	/* Zero or one elements */
+		return;
+
+	/* Convert to a null-terminated singly-linked list. */
+	head->prev->next = NULL;
+
+	/*
+	 * Data structure invariants:
+	 * - All lists are singly linked and null-terminated; prev
+	 *   pointers are not maintained.
+	 * - pending is a prev-linked "list of lists" of sorted
+	 *   sublists awaiting further merging.
+	 * - Each of the sorted sublists is power-of-two in size.
+	 * - Sublists are sorted by size and age, smallest & newest at front.
+	 * - There are zero to two sublists of each size.
+	 * - A pair of pending sublists are merged as soon as the number
+	 *   of following pending elements equals their size (i.e.
+	 *   each time count reaches an odd multiple of that size).
+	 *   That ensures each later final merge will be at worst 2:1.
+	 * - Each round consists of:
+	 *   - Merging the two sublists selected by the highest bit
+	 *     which flips when count is incremented, and
+	 *   - Adding an element from the input as a size-1 sublist.
+	 */
+	do {
+		size_t bits;
+		struct list_head **tail = &pending;
+
+		/* Find the least-significant clear bit in count */
+		for (bits = count; bits & 1; bits >>= 1)
+			tail = &(*tail)->prev;
+		/* Do the indicated merge */
+		if (likely(bits)) {
+			struct list_head *a = *tail, *b = a->prev;
+
+			a = merge(priv, cmp, b, a);
+			/* Install the merged result in place of the inputs */
+			a->prev = b->prev;
+			*tail = a;
+		}
+
+		/* Move one element from input list to pending */
+		list->prev = pending;
+		pending = list;
+		list = list->next;
+		pending->next = NULL;
+		count++;
+	} while (list);
+
+	/* End of input; merge together all the pending lists. */
+	list = pending;
+	pending = pending->prev;
+	for (;;) {
+		struct list_head *next = pending->prev;
+
+		if (!next)
+			break;
+		list = merge(priv, cmp, pending, list);
+		pending = next;
+	}
+	/* The final merge, rebuilding prev links */
+	merge_final(priv, cmp, head, pending, list);
+}
+EXPORT_SYMBOL(list_sort);
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index f05c4d48fd7e..e728615a3830 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -17,6 +17,7 @@ tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/find_bit.c
 tools/lib/bitmap.c
+tools/lib/list_sort.c
 tools/lib/str_error_r.c
 tools/lib/vsprintf.c
 tools/lib/zalloc.c
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index f1e46277e822..30ecf3a0f68b 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -26,6 +26,7 @@ include/vdso/bits.h
 include/linux/const.h
 include/vdso/const.h
 include/linux/hash.h
+include/linux/list-sort.h
 include/uapi/linux/hw_breakpoint.h
 arch/x86/include/asm/disabled-features.h
 arch/x86/include/asm/required-features.h
@@ -150,6 +151,7 @@ check include/uapi/linux/mman.h       '-I "^#include <\(uapi/\)*asm/mman.h>"'
 check include/linux/build_bug.h       '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
 check include/linux/ctype.h	      '-I "isdigit("'
 check lib/ctype.c		      '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
+check lib/list_sort.c		      '-I "^#include <linux/bug.h>"'
 
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index f2914d5bed6e..15b2366ad384 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -138,6 +138,7 @@ perf-y += expr.o
 perf-y += branch.o
 perf-y += mem2node.o
 perf-y += clockid.o
+perf-y += list_sort.o
 
 perf-$(CONFIG_LIBBPF) += bpf-loader.o
 perf-$(CONFIG_LIBBPF) += bpf_map.o
@@ -315,3 +316,7 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
 $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index d7c976671e3a..a685d20165f7 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -18,6 +18,7 @@ util/mmap.c
 util/namespaces.c
 ../lib/bitmap.c
 ../lib/find_bit.c
+../lib/list_sort.c
 ../lib/hweight.c
 ../lib/string.c
 ../lib/vsprintf.c
-- 
2.33.0.1079.g6e70778dc9-goog


  reply	other threads:[~2021-10-15 17:21 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-15 17:21 [PATCH v2 00/21] perf metric: Fixes and allow modifiers Ian Rogers
2021-10-15 17:21 ` Ian Rogers [this message]
2021-10-15 17:21 ` [PATCH v2 02/21] perf pmu: Add const to pmu_events_map Ian Rogers
2021-10-26  5:20   ` kajoljain
2021-10-15 17:21 ` [PATCH v2 03/21] perf pmu: Make pmu_sys_event_tables const Ian Rogers
2021-10-19 11:06   ` John Garry
2021-10-26  5:26   ` kajoljain
2021-10-15 17:21 ` [PATCH v2 04/21] perf pmu: Make pmu_event tables const Ian Rogers
2021-10-19 11:19   ` John Garry
2021-10-20 13:24     ` Arnaldo Carvalho de Melo
2021-10-26  6:18   ` kajoljain
2021-10-15 17:21 ` [PATCH v2 05/21] perf metric: Move runtime value to the expr context Ian Rogers
2021-10-26  8:10   ` kajoljain
2021-10-15 17:21 ` [PATCH v2 06/21] perf metric: Add documentation and rename a variable Ian Rogers
2021-10-26  8:18   ` kajoljain
2021-10-15 17:21 ` [PATCH v2 07/21] perf metric: Add metric new and free Ian Rogers
2021-10-19 11:30   ` John Garry
2021-10-19 11:30     ` John Garry
2021-10-15 17:21 ` [PATCH v2 08/21] perf metric: Only add a referenced metric once Ian Rogers
2021-10-15 17:21 ` [PATCH v2 09/21] perf metric: Modify resolution and recursion check Ian Rogers
2021-10-15 17:21 ` [PATCH v2 10/21] perf metric: Comment data structures Ian Rogers
2021-10-15 17:21 ` [PATCH v2 11/21] perf metric: Document the internal 'struct metric' Ian Rogers
2021-10-15 17:21 ` [PATCH v2 12/21] perf metric: Simplify metric_refs calculation Ian Rogers
2021-10-15 17:21 ` [PATCH v2 13/21] perf parse-events: Add const to evsel name Ian Rogers
2021-10-15 17:21 ` [PATCH v2 14/21] perf parse-events: Add new "metric-id" term Ian Rogers
2021-10-15 17:21 ` [PATCH v2 15/21] perf parse-events: Allow config on kernel PMU events Ian Rogers
2021-10-15 17:21 ` [PATCH v2 16/21] perf metric: Encode and use metric-id as qualifier Ian Rogers
2021-10-15 17:21 ` [PATCH v2 17/21] perf expr: Add subset utility Ian Rogers
2021-10-15 17:21 ` [PATCH v2 18/21] perf metrics: Modify setup and deduplication Ian Rogers
2021-10-15 17:21 ` [PATCH v2 19/21] perf metric: Switch fprintf to pr_err Ian Rogers
2021-10-15 17:21 ` [PATCH v2 20/21] perf parse-events: Identify broken modifiers Ian Rogers
2021-10-15 17:21 ` [PATCH v2 21/21] perf metric: Allow modifiers on metrics Ian Rogers
2021-10-19 15:06   ` Arnaldo Carvalho de Melo
2021-10-19 15:13     ` Arnaldo Carvalho de Melo
2021-10-19 15:17       ` Arnaldo Carvalho de Melo
2021-10-19 15:18         ` Arnaldo Carvalho de Melo
2021-10-19 20:00           ` Ian Rogers
2021-10-20  0:35             ` Arnaldo Carvalho de Melo
2021-10-20 14:31 ` [PATCH v2 00/21] perf metric: Fixes and allow modifiers Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211015172132.1162559-2-irogers@google.com \
    --to=irogers@google.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.antonov@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andrew.kilroy@arm.com \
    --cc=changbin.du@intel.com \
    --cc=copy@copy.sh \
    --cc=dzagorui@cisco.com \
    --cc=eranian@google.com \
    --cc=hca@linux.ibm.com \
    --cc=jacob.e.keller@intel.com \
    --cc=jiapeng.chong@linux.alibaba.com \
    --cc=john.garry@huawei.com \
    --cc=jolsa@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=keescook@chromium.org \
    --cc=kjain@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=mrtoastcheng@gmail.com \
    --cc=namhyung@kernel.org \
    --cc=nbd@nbd.name \
    --cc=ndesaulniers@google.com \
    --cc=nfraser@codeweavers.com \
    --cc=pc@us.ibm.com \
    --cc=peterz@infradead.org \
    --cc=qiangqing.zhang@nxp.com \
    --cc=rickyman7@gmail.com \
    --cc=samitolvanen@google.com \
    --cc=songliubraving@fb.com \
    --cc=sumanthk@linux.ibm.com \
    --cc=thunder.leizhen@huawei.com \
    --cc=tmricht@linux.ibm.com \
    --cc=wanjiabing@vivo.com \
    --cc=yao.jin@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.