All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] linux/bitops.h: Refactor function fns to reduce branch usage and eliminate external calls
@ 2024-04-17 13:22 Chin-Chun Chen
  2024-04-17 13:51 ` [PATCH v2] " Chin-Chun Chen
  0 siblings, 1 reply; 2+ messages in thread
From: Chin-Chun Chen @ 2024-04-17 13:22 UTC (permalink / raw)
  To: yury.norov; +Cc: linux, linux-kernel, Chin-Chun Chen

Rewrote the function fns to use macro wrote in linux/include/asm-generic/bitops/const_hweight.h
Decrease the number of required branches and eliminate the need for external calls.
The number of branches is now constant, unaffected by input variations.

This commit improves code efficiency and clarity.

Signed-off-by: Chin-Chun Chen <chinchunchen2001@gmail.com>
---
 include/linux/bitops.h | 54 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 6 deletions(-)

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5842d7d03f19..18899f11aaa7 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -247,6 +247,13 @@ static inline unsigned long __ffs64(u64 word)
 	return __ffs((unsigned long)word);
 }
 
+
+#define __const_hweight2(w)                                                 \
+    ((unsigned int) (!!((word) & (1ULL << 0))) + (!!((word) & (1ULL << 1))))
+
+#define __const_hweight4(w)                                                 \
+    ((unsigned int) (!!((word) & (1ULL << 0))) + (!!((word) & (1ULL << 1))) + \
+                     (!!((word) & (1ULL << 2))) + (!!((word) & (1ULL << 3))))
 /**
  * fns - find N'th set bit in a word
  * @word: The word to search
@@ -255,13 +262,48 @@ static inline unsigned long __ffs64(u64 word)
 static inline unsigned long fns(unsigned long word, unsigned int n)
 {
- 	unsigned int bit;
-
-	while (word) {
-		bit = __ffs(word);
-		if (--n == 0)
-			return bit;
-		__clear_bit(bit, &word);
- 	return BITS_PER_LONG;
+	unsigned int bits;
+	unsigned int sum = 0;
+#if BITS_PER_LONG == 64
+	bits = __const_hweight32(word & 0xffffffff);
+	if (bits <= n) {
+		n -= bits;
+		word >>= 32;
+		sum += 32;
+ 	}
+#endif
+	bits = __const_hweight16(word & 0xffff);
+        if (bits <= n) {
+                n -= bits;
+                word >>= 16;
+                sum += 16;
+        }
+	bits = __const_hweight8(word & 0xff);
+        if (bits <= n) {
+                n -= bits;
+                word >>= 8;
+                sum += 8;
+        }
+	bits = __const_hweight4(word & 0xf);
+        if (bits <= n) {
+                n -= bits;
+                word >>= 4;
+                sum += 4;
+        }
+	bits = __const_hweight2(word & 0x3);
+        if (bits <= n) {
+                n -= bits;
+                word >>= 2;
+                sum += 2;
+        }
+	bits = (unsigned int) (word & 0x1);
+        if (bits <= n) {
+                n -= bits;
+                word >>= 1;
+                sum += 1;
+        }
+	bits = (unsigned int) (word & 0x1);
+        if (bits <= n) {
+                n -= bits;
+                sum += 1;
+        }
+
+	if (!n)
+		return sum;
+
+ 	return BITS_PER_LONG;
 }
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v2] linux/bitops.h: Refactor function fns to reduce branch usage and eliminate external calls
  2024-04-17 13:22 [PATCH] linux/bitops.h: Refactor function fns to reduce branch usage and eliminate external calls Chin-Chun Chen
@ 2024-04-17 13:51 ` Chin-Chun Chen
  0 siblings, 0 replies; 2+ messages in thread
From: Chin-Chun Chen @ 2024-04-17 13:51 UTC (permalink / raw)
  To: yury.norov; +Cc: linux, linux-kernel, Chin-Chun Chen

Rewrote the function fns to use macro wrote in linux/include/asm-generic/bitops/const_hweight.h.
Decrease the number of required branches and eliminate the need for external calls.
The number of branches is now constant, unaffected by input variations.

This commit improves code efficiency and clarity.

Signed-off-by: Chin-Chun Chen <chinchunchen2001@gmail.com>
---
Changes since v1:
 * wrong patch send in v1

 include/linux/bitops.h | 62 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2ba557e067fe..55d81755e56f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -247,6 +247,13 @@ static inline unsigned long __ffs64(u64 word)
 	return __ffs((unsigned long)word);
 }
 
+
+#define __const_hweight2(w)                                                 \
+    ((unsigned int) (!!((word) & (1ULL << 0))) + (!!((word) & (1ULL << 1))))
+
+#define __const_hweight4(w)                                                 \
+    ((unsigned int) (!!((word) & (1ULL << 0))) + (!!((word) & (1ULL << 1))) + \
+                     (!!((word) & (1ULL << 2))) + (!!((word) & (1ULL << 3))))
 /**
  * fns - find N'th set bit in a word
  * @word: The word to search
@@ -254,14 +261,55 @@ static inline unsigned long __ffs64(u64 word)
  */
 static inline unsigned long fns(unsigned long word, unsigned int n)
 {
-	unsigned int bit;
-
-	while (word) {
-		bit = __ffs(word);
-		if (n-- == 0)
-			return bit;
-		__clear_bit(bit, &word);
-       }
-
-       return BITS_PER_LONG;
+	unsigned int bits;
+	unsigned int sum = 0;
+#if BITS_PER_LONG == 64
+	bits = __const_hweight32(word & 0xffffffff);
+	if (bits <= n) {
+		n -= bits;
+		word >>= 32;
+		sum += 32;
+ 	}
+#endif
+	bits = __const_hweight16(word & 0xffff);
+       if (bits <= n) {
+               n -= bits;
+               word >>= 16;
+               sum += 16;
+       }
+	bits = __const_hweight8(word & 0xff);
+       if (bits <= n) {
+               n -= bits;
+               word >>= 8;
+               sum += 8;
+       }
+	bits = __const_hweight4(word & 0xf);
+       if (bits <= n) {
+               n -= bits;
+               word >>= 4;
+               sum += 4;
+       }
+	bits = __const_hweight2(word & 0x3);
+       if (bits <= n) {
+               n -= bits;
+               word >>= 2;
+               sum += 2;
+       }
+	bits = (unsigned int) (word & 0x1);
+       if (bits <= n) {
+               n -= bits;
+               word >>= 1;
+               sum += 1;
+       }
+	bits = (unsigned int) (word & 0x1);
+       if (bits <= n) {
+               n -= bits;
+               sum += 1;
+       }
+
+	if (!n)
+		return sum;
+
+ 
+ 	return BITS_PER_LONG;
 }

base-commit: 8f2c057754b25075aa3da132cd4fd4478cdab854
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-04-17 13:57 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-17 13:22 [PATCH] linux/bitops.h: Refactor function fns to reduce branch usage and eliminate external calls Chin-Chun Chen
2024-04-17 13:51 ` [PATCH v2] " Chin-Chun Chen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.