All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions
@ 2016-04-15  7:52 zengzhaoxiu
  2016-04-15  8:23 ` [PATCH V2] " zengzhaoxiu
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: zengzhaoxiu @ 2016-04-15  7:52 UTC (permalink / raw)
  To: linux-kernel; +Cc: Zhaoxiu Zeng, Steven Miao, adi-buildroot-devel

From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>

blackfin has popcount instruction (ONES), we can do the efficient
computing (ffz, __ffs, ffs, __fls, and fls) use this instruction.

Adapted from "https://en.wikipedia.org/wiki/Find_first_set" and
arch/ia64/include/asm/bitops.h.

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
---
 arch/blackfin/include/asm/bitops.h | 73 +++++++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 5 deletions(-)

diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
index b298b65..602e7c0 100644
--- a/arch/blackfin/include/asm/bitops.h
+++ b/arch/blackfin/include/asm/bitops.h
@@ -9,10 +9,6 @@
 
 #include <linux/compiler.h>
 
-#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/find.h>
 
@@ -21,7 +17,6 @@
 #endif
 
 #include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
 
@@ -137,4 +132,72 @@ static inline unsigned int __arch_hweight8(unsigned int w)
 	return __arch_hweight32(w & 0xff);
 }
 
+/**
+ * ffz - find the first zero bit in a long word
+ * @x: The long word to find the bit in
+ *
+ * Returns the bit-number (0..31) of the first (least significant) zero bit.
+ * Undefined if no zero exists, so code should check against ~0UL first...
+ */
+static inline unsigned long ffz(unsigned long x)
+{
+	return __arch_hweight32(x & (~x - 1));
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+	if (!x)
+		return 0;
+	return __arch_hweight32(x ^ ((unsigned int)x - 1));
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @x: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long x)
+{
+	return __arch_hweight32(~x & (x - 1));
+}
+
+/*
+ * Find the last (most significant) bit set.  Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
+static inline int fls(int x)
+{
+	if (!x)
+		return 0;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return __arch_hweight32(x);
+}
+
+/*
+ * Find the last (most significant) bit set.  Undefined for x==0.
+ * Bits are numbered from 0..31 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long __fls(unsigned long x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return __arch_hweight32(x) - 1;
+}
+
 #endif				/* _BLACKFIN_BITOPS_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH V2] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions
  2016-04-15  7:52 [PATCH] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions zengzhaoxiu
@ 2016-04-15  8:23 ` zengzhaoxiu
  2016-04-15  9:16 ` [PATCH] " kbuild test robot
  2016-04-15 15:20 ` [PATCH V3] " zengzhaoxiu
  2 siblings, 0 replies; 6+ messages in thread
From: zengzhaoxiu @ 2016-04-15  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Zhaoxiu Zeng, Steven Miao, adi-buildroot-devel

From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>

blackfin has popcount instruction (ONES), we can do the efficient
computing (ffz, __ffs, ffs, __fls, and fls) use this instruction.

Adapted from "https://en.wikipedia.org/wiki/Find_first_set" and
arch/ia64/include/asm/bitops.h.

Changes to V1:
- Use hweight32 instead __arch_hweight32

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
---
 arch/blackfin/include/asm/bitops.h | 73 +++++++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 5 deletions(-)

diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
index b298b65..602e7c0 100644
--- a/arch/blackfin/include/asm/bitops.h
+++ b/arch/blackfin/include/asm/bitops.h
@@ -9,10 +9,6 @@
 
 #include <linux/compiler.h>
 
-#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/find.h>
 
@@ -21,7 +17,6 @@
 #endif
 
 #include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
 
@@ -137,4 +132,72 @@ static inline unsigned int __arch_hweight8(unsigned int w)
 	return __arch_hweight32(w & 0xff);
 }
 
+/**
+ * ffz - find the first zero bit in a long word
+ * @x: The long word to find the bit in
+ *
+ * Returns the bit-number (0..31) of the first (least significant) zero bit.
+ * Undefined if no zero exists, so code should check against ~0UL first...
+ */
+static inline unsigned long ffz(unsigned long x)
+{
+	return hweight32(x & (~x - 1));
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+	if (!x)
+		return 0;
+	return hweight32(x ^ ((unsigned int)x - 1));
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @x: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long x)
+{
+	return hweight32(~x & (x - 1));
+}
+
+/*
+ * Find the last (most significant) bit set.  Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
+static inline int fls(int x)
+{
+	if (!x)
+		return 0;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return hweight32(x);
+}
+
+/*
+ * Find the last (most significant) bit set.  Undefined for x==0.
+ * Bits are numbered from 0..31 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long __fls(unsigned long x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return hweight32(x) - 1;
+}
+
 #endif				/* _BLACKFIN_BITOPS_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions
  2016-04-15  7:52 [PATCH] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions zengzhaoxiu
  2016-04-15  8:23 ` [PATCH V2] " zengzhaoxiu
@ 2016-04-15  9:16 ` kbuild test robot
  2016-04-15 15:20 ` [PATCH V3] " zengzhaoxiu
  2 siblings, 0 replies; 6+ messages in thread
From: kbuild test robot @ 2016-04-15  9:16 UTC (permalink / raw)
  To: zengzhaoxiu
  Cc: kbuild-all, linux-kernel, Zhaoxiu Zeng, Steven Miao, adi-buildroot-devel

[-- Attachment #1: Type: text/plain, Size: 4143 bytes --]

Hi Zhaoxiu,

[auto build test ERROR on v4.6-rc3]
[also build test ERROR on next-20160414]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/zengzhaoxiu-163-com/blackfin-optimize-ffz-__ffs-ffs-__fls-and-fls-functions/20160415-155549
config: blackfin-TCM-BF537_defconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=blackfin 

All errors (new ones prefixed by >>):

   In file included from arch/blackfin/include/asm/bitops.h:12:0,
                    from include/linux/bitops.h:36,
                    from include/linux/kernel.h:10,
                    from include/asm-generic/bug.h:13,
                    from arch/blackfin/include/asm/bug.h:71,
                    from include/linux/bug.h:4,
                    from include/linux/page-flags.h:9,
                    from kernel/bounds.c:9:
   include/asm-generic/bitops/fls64.h: In function 'fls64':
>> include/asm-generic/bitops/fls64.h:22:3: error: implicit declaration of function 'fls' [-Werror=implicit-function-declaration]
   In file included from arch/blackfin/include/asm/bitops.h:19:0,
                    from include/linux/bitops.h:36,
                    from include/linux/kernel.h:10,
                    from include/asm-generic/bug.h:13,
                    from arch/blackfin/include/asm/bug.h:71,
                    from include/linux/bug.h:4,
                    from include/linux/page-flags.h:9,
                    from kernel/bounds.c:9:
   include/asm-generic/bitops/sched.h: In function 'sched_find_first_bit':
>> include/asm-generic/bitops/sched.h:20:3: error: implicit declaration of function '__ffs' [-Werror=implicit-function-declaration]
   In file included from include/linux/bitops.h:36:0,
                    from include/linux/kernel.h:10,
                    from include/asm-generic/bug.h:13,
                    from arch/blackfin/include/asm/bug.h:71,
                    from include/linux/bug.h:4,
                    from include/linux/page-flags.h:9,
                    from kernel/bounds.c:9:
   arch/blackfin/include/asm/bitops.h: At top level:
>> arch/blackfin/include/asm/bitops.h:168:29: error: conflicting types for '__ffs'
   include/asm-generic/bitops/sched.h:20:10: note: previous implicit declaration of '__ffs' was here
>> arch/blackfin/include/asm/bitops.h:177:19: error: static declaration of 'fls' follows non-static declaration
   include/asm-generic/bitops/fls64.h:22:10: note: previous implicit declaration of 'fls' was here
   cc1: some warnings being treated as errors
   make[2]: *** [kernel/bounds.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [sub-make] Error 2

vim +/fls +22 include/asm-generic/bitops/fls64.h

d57594c20 Alexander van Heukelum 2008-03-15  16   */
d57594c20 Alexander van Heukelum 2008-03-15  17  #if BITS_PER_LONG == 32
c8399943b Andi Kleen             2009-01-12  18  static __always_inline int fls64(__u64 x)
2dfc383ad Akinobu Mita           2006-03-26  19  {
2dfc383ad Akinobu Mita           2006-03-26  20  	__u32 h = x >> 32;
2dfc383ad Akinobu Mita           2006-03-26  21  	if (h)
2dfc383ad Akinobu Mita           2006-03-26 @22  		return fls(h) + 32;
2dfc383ad Akinobu Mita           2006-03-26  23  	return fls(x);
2dfc383ad Akinobu Mita           2006-03-26  24  }
d57594c20 Alexander van Heukelum 2008-03-15  25  #elif BITS_PER_LONG == 64

:::::: The code at line 22 was first introduced by commit
:::::: 2dfc383ad587bbead84739a9ff9273df3eda983d [PATCH] bitops: generic fls64()

:::::: TO: Akinobu Mita <mita@miraclelinux.com>
:::::: CC: Linus Torvalds <torvalds@g5.osdl.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 10386 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH V3] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions
  2016-04-15  7:52 [PATCH] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions zengzhaoxiu
  2016-04-15  8:23 ` [PATCH V2] " zengzhaoxiu
  2016-04-15  9:16 ` [PATCH] " kbuild test robot
@ 2016-04-15 15:20 ` zengzhaoxiu
  2016-04-15 16:08   ` Joe Perches
  2 siblings, 1 reply; 6+ messages in thread
From: zengzhaoxiu @ 2016-04-15 15:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: Zeng Zhaoxiu, Steven Miao, adi-buildroot-devel

From: Zeng Zhaoxiu <zhaoxiu.zeng@gmail.com>

blackfin has popcount instruction (ONES), we can do the efficient
computing (ffz, __ffs, ffs, __fls, and fls) use this instruction.

Adapted from "https://en.wikipedia.org/wiki/Find_first_set" and
arch/ia64/include/asm/bitops.h.

Changes to V2:
- Fix build errors

Changes to V1:
- Use hweight32 instead __arch_hweight32

Signed-off-by: Zeng Zhaoxiu <zhaoxiu.zeng@gmail.com>
---
 arch/blackfin/include/asm/bitops.h | 144 ++++++++++++++++++++++++++-----------
 1 file changed, 104 insertions(+), 40 deletions(-)

diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
index b298b65..8380d1b 100644
--- a/arch/blackfin/include/asm/bitops.h
+++ b/arch/blackfin/include/asm/bitops.h
@@ -9,20 +9,115 @@
 
 #include <linux/compiler.h>
 
-#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
-
 #ifndef _LINUX_BITOPS_H
 #error only <linux/bitops.h> can be included directly
 #endif
 
-#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+	unsigned int res;
+
+	__asm__ ("%0.l = ONES %1;"
+		"%0 = %0.l (Z);"
+		: "=d" (res) : "d" (w));
+	return res;
+}
+
+static inline unsigned int __arch_hweight64(__u64 w)
+{
+	return __arch_hweight32((unsigned int)(w >> 32)) +
+	       __arch_hweight32((unsigned int)w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+	return __arch_hweight32(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+	return __arch_hweight32(w & 0xff);
+}
+
 #include <asm-generic/bitops/const_hweight.h>
+
+/**
+ * ffz - find the first zero bit in a long word
+ * @x: The long word to find the bit in
+ *
+ * Returns the bit-number (0..31) of the first (least significant) zero bit.
+ * Undefined if no zero exists, so code should check against ~0UL first...
+ */
+static inline unsigned long ffz(unsigned long x)
+{
+	return hweight32(x & (~x - 1));
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+	if (!x)
+		return 0;
+	return hweight32(x ^ (x - 1));
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @x: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long x)
+{
+	return hweight32(~x & (x - 1));
+}
+
+/*
+ * Find the last (most significant) bit set.  Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
+static inline int fls(int x)
+{
+	if (!x)
+		return 0;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return hweight32(x);
+}
+
+/*
+ * Find the last (most significant) bit set.  Undefined for x==0.
+ * Bits are numbered from 0..31 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long __fls(unsigned long x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return hweight32(x) - 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+
+#include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/lock.h>
 
 #include <asm-generic/bitops/ext2-atomic.h>
@@ -106,35 +201,4 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 /* Needs to be after test_bit and friends */
 #include <asm-generic/bitops/le.h>
 
-/*
- * hweightN: returns the hamming weight (i.e. the number
- * of bits set) of a N-bit word
- */
-
-static inline unsigned int __arch_hweight32(unsigned int w)
-{
-	unsigned int res;
-
-	__asm__ ("%0.l = ONES %1;"
-		"%0 = %0.l (Z);"
-		: "=d" (res) : "d" (w));
-	return res;
-}
-
-static inline unsigned int __arch_hweight64(__u64 w)
-{
-	return __arch_hweight32((unsigned int)(w >> 32)) +
-	       __arch_hweight32((unsigned int)w);
-}
-
-static inline unsigned int __arch_hweight16(unsigned int w)
-{
-	return __arch_hweight32(w & 0xffff);
-}
-
-static inline unsigned int __arch_hweight8(unsigned int w)
-{
-	return __arch_hweight32(w & 0xff);
-}
-
 #endif				/* _BLACKFIN_BITOPS_H */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions
  2016-04-15 15:20 ` [PATCH V3] " zengzhaoxiu
@ 2016-04-15 16:08   ` Joe Perches
  2016-04-15 16:18     ` Zhaoxiu Zeng
  0 siblings, 1 reply; 6+ messages in thread
From: Joe Perches @ 2016-04-15 16:08 UTC (permalink / raw)
  To: zengzhaoxiu, linux-kernel; +Cc: Zeng Zhaoxiu, Steven Miao, adi-buildroot-devel

On Fri, 2016-04-15 at 23:20 +0800, zengzhaoxiu@163.com wrote:
> From: Zeng Zhaoxiu <zhaoxiu.zeng@gmail.com>
> 
> blackfin has popcount instruction (ONES), we can do the efficient
> computing (ffz, __ffs, ffs, __fls, and fls) use this instruction.
[]
> diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
[]
> +static inline unsigned int __arch_hweight64(__u64 w)
> +{
> +	return __arch_hweight32((unsigned int)(w >> 32)) +
> +	       __arch_hweight32((unsigned int)w);
> +}

trivia:  perhaps this is more readable as:

	return __arch_hweight32(upper_32_bits(w)) +
	       __arch_hweight32(lower_32_bits(w));

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions
  2016-04-15 16:08   ` Joe Perches
@ 2016-04-15 16:18     ` Zhaoxiu Zeng
  0 siblings, 0 replies; 6+ messages in thread
From: Zhaoxiu Zeng @ 2016-04-15 16:18 UTC (permalink / raw)
  To: Joe Perches, linux-kernel; +Cc: Zeng Zhaoxiu, Steven Miao, adi-buildroot-devel

在 2016/4/16 0:08, Joe Perches 写道:
> On Fri, 2016-04-15 at 23:20 +0800, zengzhaoxiu@163.com wrote:
>> From: Zeng Zhaoxiu <zhaoxiu.zeng@gmail.com>
>>
>> blackfin has popcount instruction (ONES), we can do the efficient
>> computing (ffz, __ffs, ffs, __fls, and fls) use this instruction.
> []
>> diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
> []
>> +static inline unsigned int __arch_hweight64(__u64 w)
>> +{
>> +	return __arch_hweight32((unsigned int)(w >> 32)) +
>> +	       __arch_hweight32((unsigned int)w);
>> +}
> trivia:  perhaps this is more readable as:
>
> 	return __arch_hweight32(upper_32_bits(w)) +
> 	       __arch_hweight32(lower_32_bits(w));
>
>

Yes.
I just moved these codes from the bottom of  bitops.h, didn't change anything.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-04-15 16:19 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-15  7:52 [PATCH] blackfin: optimize ffz, __ffs, ffs, __fls, and fls functions zengzhaoxiu
2016-04-15  8:23 ` [PATCH V2] " zengzhaoxiu
2016-04-15  9:16 ` [PATCH] " kbuild test robot
2016-04-15 15:20 ` [PATCH V3] " zengzhaoxiu
2016-04-15 16:08   ` Joe Perches
2016-04-15 16:18     ` Zhaoxiu Zeng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.