linux-bcachefs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kuan-Wei Chiu <visitorckw@gmail.com>
To: colyli@suse.de, kent.overstreet@linux.dev
Cc: bfoster@redhat.com, jserv@ccns.ncku.edu.tw,
	linux-bcache@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-bcachefs@vger.kernel.org,
	Kuan-Wei Chiu <visitorckw@gmail.com>
Subject: [PATCH 1/5] bcachefs: Optimize eytzinger0_sort() using bottom-up heapsort
Date: Sun, 21 Jan 2024 23:36:45 +0800	[thread overview]
Message-ID: <20240121153649.2733274-2-visitorckw@gmail.com> (raw)
In-Reply-To: <20240121153649.2733274-1-visitorckw@gmail.com>

This optimization reduces the average number of comparisons required
from 2*n*log2(n) - 3*n + o(n) to n*log2(n) + 0.37*n + o(n). When n is
sufficiently large, it results in approximately 50% fewer comparisons.

Currently, eytzinger0_sort employs the textbook version of heapsort,
where during the heapify process, each level requires two comparisons
to determine the maximum among three elements. In contrast, the
bottom-up heapsort, during heapify, only compares two children at each
level until reaching a leaf node. Then, it backtracks from the leaf
node to find the correct position. Since heapify typically continues
until very close to the leaf node, the standard heapify requires about
2*log2(n) comparisons, while the bottom-up variant only needs log2(n)
comparisons.

The experimental data presented below is based on an array generated
by get_random_u32().

|   N   | comparisons(old) | comparisons(new) | time(old) | time(new) |
|-------|------------------|------------------|-----------|-----------|
| 10000 |     235381       |     136615       |  25545 us |  20366 us |
| 20000 |     510694       |     293425       |  31336 us |  18312 us |
| 30000 |     800384       |     457412       |  35042 us |  27386 us |
| 40000 |    1101617       |     626831       |  48779 us |  38253 us |
| 50000 |    1409762       |     799637       |  62238 us |  46950 us |
| 60000 |    1721191       |     974521       |  75588 us |  58367 us |
| 70000 |    2038536       |    1152171       |  90823 us |  68778 us |
| 80000 |    2362958       |    1333472       | 104165 us |  78625 us |
| 90000 |    2690900       |    1516065       | 116111 us |  89573 us |
| 100000|    3019413       |    1699879       | 133638 us | 100998 us |

Refs:
  BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
  QUICKSORT (if n is not very small)
  Ingo Wegener
  Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
  https://doi.org/10.1016/0304-3975(93)90364-Y

Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
This patch has undergone unit testing and micro benchmarking using the
following code [1].

[1]:
static long long int cmp_count = 0;

static int mycmp(const void *a, const void *b, size_t size)
{
    u32 _a = *(u32 *)a;
	u32 _b = *(u32 *)b;

	cmp_count++;
	if (_a < _b)
		return -1;
	else if (_a > _b)
		return 1;
	else
		return 0;
}

static int test(void)
{
    size_t N, i, L, R;
	ktime_t start, end;
	s64 delta;
    u32 *arr;

	for (N = 10000; N <= 100000; N += 10000) {
		arr = kmalloc_array(N, sizeof(u32), GFP_KERNEL);
		cmp_count = 0;

		for (i = 0; i < N; i++)
			arr[i] = get_random_u32();
		
		start = ktime_get();
		eytzinger0_sort(arr, N, sizeof(u32), mycmp, NULL);
		end = ktime_get();

		delta = ktime_us_delta(end, start);
        printk(KERN_INFO "time: %lld\n", delta);
		printk(KERN_INFO "comparisons: %lld\n", cmp_count);

		for (int i = 0; i < N; i++) {
            L = 2 * i + 1;
            R = 2 * i + 2;
            if (L < N && arr[i] < arr[L])
                goto err;
            if (R < N && arr[i] > arr[R])
                goto err;
        }

		kfree(arr);
	}
    return 0;

err:
    kfree(arr);
    return -1;
}

 fs/bcachefs/util.c | 50 +++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index c2ef7cddaa4f..bbc83b43162e 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -911,7 +911,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 		     int (*cmp_func)(const void *, const void *, size_t),
 		     void (*swap_func)(void *, void *, size_t))
 {
-	int i, c, r;
+	int i, j, k;
 
 	if (!swap_func) {
 		if (size == 4 && alignment_ok(base, 4))
@@ -924,17 +924,22 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 
 	/* heapify */
 	for (i = n / 2 - 1; i >= 0; --i) {
-		for (r = i; r * 2 + 1 < n; r = c) {
-			c = r * 2 + 1;
-
-			if (c + 1 < n &&
-			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-				c++;
-
-			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-				break;
-
-			do_swap(base, n, size, swap_func, r, c);
+		/* Find the sift-down path all the way to the leaves. */
+		for (j = i; k = j * 2 + 1, k + 1 < n;)
+			j = do_cmp(base, n, size, cmp_func, k, k + 1) > 0 ? k : k + 1;
+
+		/* Special case for the last leaf with no sibling. */
+		if (j * 2 + 2 == n)
+			j = j * 2 + 1;
+
+		/* Backtrack to the correct location. */
+		while (j != i && do_cmp(base, n, size, cmp_func, i, j) >= 0)
+			j = (j - 1) / 2;
+
+		/* Shift the element into its correct place. */
+		for (k = j; j != i;) {
+			j = (j - 1) / 2;
+			do_swap(base, n, size, swap_func, j, k);
 		}
 	}
 
@@ -942,17 +947,22 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 	for (i = n - 1; i > 0; --i) {
 		do_swap(base, n, size, swap_func, 0, i);
 
-		for (r = 0; r * 2 + 1 < i; r = c) {
-			c = r * 2 + 1;
+		/* Find the sift-down path all the way to the leaves. */
+		for (j = 0; k = j * 2 + 1, k + 1 < i;)
+			j = do_cmp(base, n, size, cmp_func, k, k + 1) > 0 ? k : k + 1;
 
-			if (c + 1 < i &&
-			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-				c++;
+		/* Special case for the last leaf with no sibling. */
+		if (j * 2 + 2 == i)
+			j = j * 2 + 1;
 
-			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-				break;
+		/* Backtrack to the correct location. */
+		while (j && do_cmp(base, n, size, cmp_func, 0, j) >= 0)
+			j = (j - 1) / 2;
 
-			do_swap(base, n, size, swap_func, r, c);
+		/* Shift the element into its correct place. */
+		for (k = j; j;) {
+			j = (j - 1) / 2;
+			do_swap(base, n, size, swap_func, j, k);
 		}
 	}
 }
-- 
2.25.1


  reply	other threads:[~2024-01-21 15:37 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-21 15:36 [PATCH 0/5] Optimize number of comparisons for heap/heapsort implementaion Kuan-Wei Chiu
2024-01-21 15:36 ` Kuan-Wei Chiu [this message]
2024-01-21 15:36 ` [PATCH 2/5] bcachefs: Introduce parent function for sort_cmp_size() Kuan-Wei Chiu
2024-01-21 16:17   ` Kent Overstreet
2024-01-21 17:05     ` Kuan-Wei Chiu
2024-01-21 17:37       ` Kent Overstreet
2024-01-21 15:36 ` [PATCH 3/5] bcachefs: Optimize sort_cmp_size() using bottom-up heapsort Kuan-Wei Chiu
2024-01-21 15:36 ` [PATCH 4/5] bcachefs: Optimize number of comparisons in heap_sift_down Kuan-Wei Chiu
2024-01-21 15:36 ` [PATCH 5/5] bcache: Optimize number of comparisons in heap_sift Kuan-Wei Chiu
2024-01-21 16:21 ` [PATCH 0/5] Optimize number of comparisons for heap/heapsort implementaion Kent Overstreet
2024-01-21 16:55   ` Kuan-Wei Chiu
2024-01-21 17:41     ` Kent Overstreet
2024-01-22 15:06       ` Kuan-Wei Chiu
2024-01-22 16:06         ` Kent Overstreet
2024-01-22 16:23           ` Kuan-Wei Chiu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240121153649.2733274-2-visitorckw@gmail.com \
    --to=visitorckw@gmail.com \
    --cc=bfoster@redhat.com \
    --cc=colyli@suse.de \
    --cc=jserv@ccns.ncku.edu.tw \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-bcachefs@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).