* [PATCH] arm: Use builtins for ffs/fls
@ 2023-07-31 21:27 Sean Anderson
2023-07-31 21:36 ` Tom Rini
` (4 more replies)
0 siblings, 5 replies; 7+ messages in thread
From: Sean Anderson @ 2023-07-31 21:27 UTC (permalink / raw)
To: u-boot, Tom Rini
Cc: Matthias Brugger, Peter Robinson, Eugen Hristev, Sean Anderson
Since ARMv5, the clz instruction allows for efficient implementation of
ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
only available in ARM mode. LTO makes it difficult to force specific
functions to be in ARM mode, as it is effectively a form of very
aggressive inlining. To work around this, fls/ffs are implemented in
assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
Overall, this saves around 75 bytes per call.
Signed-off-by: Sean Anderson <sean.anderson@seco.com>
---
I only tested this on ARMv8. If someone has an ARMv5 or ARMv6 board,
please test this.
arch/arm/include/asm/bitops.h | 27 ++++++++++++-
arch/arm/lib/Makefile | 5 +++
arch/arm/lib/bitops.S | 45 ++++++++++++++++++++++
include/asm-generic/bitops/builtin-__ffs.h | 16 ++++++++
include/asm-generic/bitops/builtin-__fls.h | 16 ++++++++
include/asm-generic/bitops/builtin-ffs.h | 15 ++++++++
include/asm-generic/bitops/builtin-fls.h | 17 ++++++++
7 files changed, 140 insertions(+), 1 deletion(-)
create mode 100644 arch/arm/lib/bitops.S
create mode 100644 include/asm-generic/bitops/builtin-__ffs.h
create mode 100644 include/asm-generic/bitops/builtin-__fls.h
create mode 100644 include/asm-generic/bitops/builtin-ffs.h
create mode 100644 include/asm-generic/bitops/builtin-fls.h
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index fa8548624a..8e897833bb 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -15,9 +15,34 @@
#ifndef __ASM_ARM_BITOPS_H
#define __ASM_ARM_BITOPS_H
+#if __LINUX_ARM_ARCH__ < 5
+
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls.h>
+
+#else
+
+#define PLATFORM_FFS
+#define PLATFORM_FLS
+
+#if !IS_ENABLED(CONFIG_HAS_THUMB2) && CONFIG_IS_ENABLED(SYS_THUMB_BUILD)
+
+unsigned long __fls(unsigned long word);
+unsigned long __ffs(unsigned long word);
+int fls(unsigned int x);
+int ffs(int x);
+
+#else
+
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-fls.h>
+#include <asm-generic/bitops/builtin-ffs.h>
+
+#endif
+#endif
+
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
@@ -113,7 +138,7 @@ static inline int test_bit(int nr, const void * addr)
static inline int __ilog2(unsigned int x)
{
- return generic_fls(x) - 1;
+ return fls(x) - 1;
}
#define ffz(x) __ffs(~(x))
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 62cf80f373..b1bcd37466 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -113,6 +113,11 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork
AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork
AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD
AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD
+
+# This is only necessary to force ARM mode on THUMB1 targets.
+ifneq ($(CONFIG_SYS_ARM_ARCH),4)
+obj-y += bitops.o
+endif
endif
endif
diff --git a/arch/arm/lib/bitops.S b/arch/arm/lib/bitops.S
new file mode 100644
index 0000000000..29d1524634
--- /dev/null
+++ b/arch/arm/lib/bitops.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Sean Anderson <sean.anderson@seco.com>
+ *
+ * ARM bitops to call when using THUMB1, which doesn't have these instructions.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+.pushsection .text.__fls
+ENTRY(__fls)
+ clz r0, r0
+ rsb r0, r0, #31
+ ret lr
+ENDPROC(__fls)
+.popsection
+
+.pushsection .text.__ffs
+ENTRY(__ffs)
+ rsb r3, r0, #0
+ and r0, r0, r3
+ clz r0, r0
+ rsb r0, r0, #31
+ ret lr
+ENDPROC(__ffs)
+.popsection
+
+.pushsection .text.fls
+ENTRY(fls)
+ cmp r0, #0
+ clzne r0, r0
+ rsbne r0, r0, #32
+ ret lr
+ENDPROC(fls)
+.popsection
+
+.pushsection .text.ffs
+ENTRY(ffs)
+ rsb r3, r0, #0
+ and r0, r0, r3
+ clz r0, r0
+ rsb r0, r0, #32
+ ret lr
+ENDPROC(ffs)
+.popsection
diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h
new file mode 100644
index 0000000000..87024da44d
--- /dev/null
+++ b/include/asm-generic/bitops/builtin-__ffs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+ return __builtin_ctzl(word);
+}
+
+#endif
diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h
new file mode 100644
index 0000000000..43a5aa9afb
--- /dev/null
+++ b/include/asm-generic/bitops/builtin-__fls.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __fls(unsigned long word)
+{
+ return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+#endif
diff --git a/include/asm-generic/bitops/builtin-ffs.h b/include/asm-generic/bitops/builtin-ffs.h
new file mode 100644
index 0000000000..7b12932904
--- /dev/null
+++ b/include/asm-generic/bitops/builtin-ffs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from ffz (man ffs).
+ */
+#define ffs(x) __builtin_ffs(x)
+
+#endif
diff --git a/include/asm-generic/bitops/builtin-fls.h b/include/asm-generic/bitops/builtin-fls.h
new file mode 100644
index 0000000000..c8455cc288
--- /dev/null
+++ b/include/asm-generic/bitops/builtin-fls.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __always_inline int fls(unsigned int x)
+{
+ return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+}
+
+#endif
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] arm: Use builtins for ffs/fls
2023-07-31 21:27 [PATCH] arm: Use builtins for ffs/fls Sean Anderson
@ 2023-07-31 21:36 ` Tom Rini
2023-07-31 21:42 ` Sean Anderson
2023-07-31 21:48 ` Sean Anderson
` (3 subsequent siblings)
4 siblings, 1 reply; 7+ messages in thread
From: Tom Rini @ 2023-07-31 21:36 UTC (permalink / raw)
To: Sean Anderson; +Cc: u-boot, Matthias Brugger, Peter Robinson, Eugen Hristev
[-- Attachment #1: Type: text/plain, Size: 676 bytes --]
On Mon, Jul 31, 2023 at 05:27:33PM -0400, Sean Anderson wrote:
> Since ARMv5, the clz instruction allows for efficient implementation of
> ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
> only available in ARM mode. LTO makes it difficult to force specific
> functions to be in ARM mode, as it is effectively a form of very
> aggressive inlining. To work around this, fls/ffs are implemented in
> assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
> Overall, this saves around 75 bytes per call.
>
> Signed-off-by: Sean Anderson <sean.anderson@seco.com>
This looks like it's synced from the kernel, what tag?
--
Tom
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm: Use builtins for ffs/fls
2023-07-31 21:36 ` Tom Rini
@ 2023-07-31 21:42 ` Sean Anderson
0 siblings, 0 replies; 7+ messages in thread
From: Sean Anderson @ 2023-07-31 21:42 UTC (permalink / raw)
To: Tom Rini; +Cc: u-boot, Matthias Brugger, Peter Robinson, Eugen Hristev
On 7/31/23 17:36, Tom Rini wrote:
> On Mon, Jul 31, 2023 at 05:27:33PM -0400, Sean Anderson wrote:
>
>> Since ARMv5, the clz instruction allows for efficient implementation of
>> ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
>> only available in ARM mode. LTO makes it difficult to force specific
>> functions to be in ARM mode, as it is effectively a form of very
>> aggressive inlining. To work around this, fls/ffs are implemented in
>> assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
>> Overall, this saves around 75 bytes per call.
>>
>> Signed-off-by: Sean Anderson <sean.anderson@seco.com>
>
> This looks like it's synced from the kernel, what tag?
>
The builtins are synced from v5.15 and they haven't changed since.
--Sean
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm: Use builtins for ffs/fls
2023-07-31 21:27 [PATCH] arm: Use builtins for ffs/fls Sean Anderson
2023-07-31 21:36 ` Tom Rini
@ 2023-07-31 21:48 ` Sean Anderson
2023-08-10 22:11 ` Tom Rini
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Sean Anderson @ 2023-07-31 21:48 UTC (permalink / raw)
To: u-boot, Tom Rini
Cc: Matthias Brugger, Peter Robinson, Eugen Hristev, Linus Walleij
On 7/31/23 17:27, Sean Anderson wrote:
> Since ARMv5, the clz instruction allows for efficient implementation of
> ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
> only available in ARM mode. LTO makes it difficult to force specific
> functions to be in ARM mode, as it is effectively a form of very
> aggressive inlining. To work around this, fls/ffs are implemented in
> assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
> Overall, this saves around 75 bytes per call.
>
> Signed-off-by: Sean Anderson <sean.anderson@seco.com>
> ---
> I only tested this on ARMv8. If someone has an ARMv5 or ARMv6 board,
> please test this.
>
+CC Linus since he may care about integrator.
--Sean
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm: Use builtins for ffs/fls
2023-07-31 21:27 [PATCH] arm: Use builtins for ffs/fls Sean Anderson
2023-07-31 21:36 ` Tom Rini
2023-07-31 21:48 ` Sean Anderson
@ 2023-08-10 22:11 ` Tom Rini
2023-08-18 17:49 ` Tom Rini
2023-08-24 18:15 ` Tony Dinh
4 siblings, 0 replies; 7+ messages in thread
From: Tom Rini @ 2023-08-10 22:11 UTC (permalink / raw)
To: Sean Anderson; +Cc: u-boot, Matthias Brugger, Peter Robinson, Eugen Hristev
[-- Attachment #1: Type: text/plain, Size: 748 bytes --]
On Mon, Jul 31, 2023 at 05:27:33PM -0400, Sean Anderson wrote:
> Since ARMv5, the clz instruction allows for efficient implementation of
> ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
> only available in ARM mode. LTO makes it difficult to force specific
> functions to be in ARM mode, as it is effectively a form of very
> aggressive inlining. To work around this, fls/ffs are implemented in
> assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
> Overall, this saves around 75 bytes per call.
>
> Signed-off-by: Sean Anderson <sean.anderson@seco.com>
I'll add some wording about when this was synced from the kernel when
applying.
Reviewed-by: Tom Rini <trini@konsulko.com>
--
Tom
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm: Use builtins for ffs/fls
2023-07-31 21:27 [PATCH] arm: Use builtins for ffs/fls Sean Anderson
` (2 preceding siblings ...)
2023-08-10 22:11 ` Tom Rini
@ 2023-08-18 17:49 ` Tom Rini
2023-08-24 18:15 ` Tony Dinh
4 siblings, 0 replies; 7+ messages in thread
From: Tom Rini @ 2023-08-18 17:49 UTC (permalink / raw)
To: Sean Anderson; +Cc: u-boot, Matthias Brugger, Peter Robinson, Eugen Hristev
[-- Attachment #1: Type: text/plain, Size: 758 bytes --]
On Mon, Jul 31, 2023 at 05:27:33PM -0400, Sean Anderson wrote:
> Since ARMv5, the clz instruction allows for efficient implementation of
> ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
> only available in ARM mode. LTO makes it difficult to force specific
> functions to be in ARM mode, as it is effectively a form of very
> aggressive inlining. To work around this, fls/ffs are implemented in
> assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
> Overall, this saves around 75 bytes per call.
>
> This code is synced with v5.15 of the Linux kernel.
>
> Signed-off-by: Sean Anderson <sean.anderson@seco.com>
> Reviewed-by: Tom Rini <trini@konsulko.com>
Applied to u-boot/next, thanks!
--
Tom
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] arm: Use builtins for ffs/fls
2023-07-31 21:27 [PATCH] arm: Use builtins for ffs/fls Sean Anderson
` (3 preceding siblings ...)
2023-08-18 17:49 ` Tom Rini
@ 2023-08-24 18:15 ` Tony Dinh
4 siblings, 0 replies; 7+ messages in thread
From: Tony Dinh @ 2023-08-24 18:15 UTC (permalink / raw)
To: Sean Anderson
Cc: u-boot, Tom Rini, Matthias Brugger, Peter Robinson, Eugen Hristev
Hi Sean,
On Mon, Jul 31, 2023 at 2:28 PM Sean Anderson <sean.anderson@seco.com> wrote:
>
> Since ARMv5, the clz instruction allows for efficient implementation of
> ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
> only available in ARM mode. LTO makes it difficult to force specific
> functions to be in ARM mode, as it is effectively a form of very
> aggressive inlining. To work around this, fls/ffs are implemented in
> assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
> Overall, this saves around 75 bytes per call.
>
> Signed-off-by: Sean Anderson <sean.anderson@seco.com>
> ---
> I only tested this on ARMv8. If someone has an ARMv5 or ARMv6 board,
> please test this.
Thanks for the patch! I've tested it with the Pogo V4 board (Kirkwood
88F6192, ARMv5) with LTO and SYS_THUMB_BUILD.
Also nice size reduction:
Before: 502K
After: 499K
So for ARMv5,
Tested-by: Tony Dinh <mibodhi@gmail.com>
All the best,
Tony
>
> arch/arm/include/asm/bitops.h | 27 ++++++++++++-
> arch/arm/lib/Makefile | 5 +++
> arch/arm/lib/bitops.S | 45 ++++++++++++++++++++++
> include/asm-generic/bitops/builtin-__ffs.h | 16 ++++++++
> include/asm-generic/bitops/builtin-__fls.h | 16 ++++++++
> include/asm-generic/bitops/builtin-ffs.h | 15 ++++++++
> include/asm-generic/bitops/builtin-fls.h | 17 ++++++++
> 7 files changed, 140 insertions(+), 1 deletion(-)
> create mode 100644 arch/arm/lib/bitops.S
> create mode 100644 include/asm-generic/bitops/builtin-__ffs.h
> create mode 100644 include/asm-generic/bitops/builtin-__fls.h
> create mode 100644 include/asm-generic/bitops/builtin-ffs.h
> create mode 100644 include/asm-generic/bitops/builtin-fls.h
>
> diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
> index fa8548624a..8e897833bb 100644
> --- a/arch/arm/include/asm/bitops.h
> +++ b/arch/arm/include/asm/bitops.h
> @@ -15,9 +15,34 @@
> #ifndef __ASM_ARM_BITOPS_H
> #define __ASM_ARM_BITOPS_H
>
> +#if __LINUX_ARM_ARCH__ < 5
> +
> #include <asm-generic/bitops/__ffs.h>
> #include <asm-generic/bitops/__fls.h>
> #include <asm-generic/bitops/fls.h>
> +
> +#else
> +
> +#define PLATFORM_FFS
> +#define PLATFORM_FLS
> +
> +#if !IS_ENABLED(CONFIG_HAS_THUMB2) && CONFIG_IS_ENABLED(SYS_THUMB_BUILD)
> +
> +unsigned long __fls(unsigned long word);
> +unsigned long __ffs(unsigned long word);
> +int fls(unsigned int x);
> +int ffs(int x);
> +
> +#else
> +
> +#include <asm-generic/bitops/builtin-__fls.h>
> +#include <asm-generic/bitops/builtin-__ffs.h>
> +#include <asm-generic/bitops/builtin-fls.h>
> +#include <asm-generic/bitops/builtin-ffs.h>
> +
> +#endif
> +#endif
> +
> #include <asm-generic/bitops/fls64.h>
>
> #ifdef __KERNEL__
> @@ -113,7 +138,7 @@ static inline int test_bit(int nr, const void * addr)
>
> static inline int __ilog2(unsigned int x)
> {
> - return generic_fls(x) - 1;
> + return fls(x) - 1;
> }
>
> #define ffz(x) __ffs(~(x))
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index 62cf80f373..b1bcd37466 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -113,6 +113,11 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork
> AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork
> AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD
> AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD
> +
> +# This is only necessary to force ARM mode on THUMB1 targets.
> +ifneq ($(CONFIG_SYS_ARM_ARCH),4)
> +obj-y += bitops.o
> +endif
> endif
> endif
>
> diff --git a/arch/arm/lib/bitops.S b/arch/arm/lib/bitops.S
> new file mode 100644
> index 0000000000..29d1524634
> --- /dev/null
> +++ b/arch/arm/lib/bitops.S
> @@ -0,0 +1,45 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2023 Sean Anderson <sean.anderson@seco.com>
> + *
> + * ARM bitops to call when using THUMB1, which doesn't have these instructions.
> + */
> +#include <linux/linkage.h>
> +#include <asm/assembler.h>
> +
> +.pushsection .text.__fls
> +ENTRY(__fls)
> + clz r0, r0
> + rsb r0, r0, #31
> + ret lr
> +ENDPROC(__fls)
> +.popsection
> +
> +.pushsection .text.__ffs
> +ENTRY(__ffs)
> + rsb r3, r0, #0
> + and r0, r0, r3
> + clz r0, r0
> + rsb r0, r0, #31
> + ret lr
> +ENDPROC(__ffs)
> +.popsection
> +
> +.pushsection .text.fls
> +ENTRY(fls)
> + cmp r0, #0
> + clzne r0, r0
> + rsbne r0, r0, #32
> + ret lr
> +ENDPROC(fls)
> +.popsection
> +
> +.pushsection .text.ffs
> +ENTRY(ffs)
> + rsb r3, r0, #0
> + and r0, r0, r3
> + clz r0, r0
> + rsb r0, r0, #32
> + ret lr
> +ENDPROC(ffs)
> +.popsection
> diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h
> new file mode 100644
> index 0000000000..87024da44d
> --- /dev/null
> +++ b/include/asm-generic/bitops/builtin-__ffs.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_
> +#define _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_
> +
> +/**
> + * __ffs - find first bit in word.
> + * @word: The word to search
> + *
> + * Undefined if no bit exists, so code should check against 0 first.
> + */
> +static __always_inline unsigned long __ffs(unsigned long word)
> +{
> + return __builtin_ctzl(word);
> +}
> +
> +#endif
> diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h
> new file mode 100644
> index 0000000000..43a5aa9afb
> --- /dev/null
> +++ b/include/asm-generic/bitops/builtin-__fls.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_
> +#define _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_
> +
> +/**
> + * __fls - find last (most-significant) set bit in a long word
> + * @word: the word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +static __always_inline unsigned long __fls(unsigned long word)
> +{
> + return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
> +}
> +
> +#endif
> diff --git a/include/asm-generic/bitops/builtin-ffs.h b/include/asm-generic/bitops/builtin-ffs.h
> new file mode 100644
> index 0000000000..7b12932904
> --- /dev/null
> +++ b/include/asm-generic/bitops/builtin-ffs.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_
> +#define _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_
> +
> +/**
> + * ffs - find first bit set
> + * @x: the word to search
> + *
> + * This is defined the same way as
> + * the libc and compiler builtin ffs routines, therefore
> + * differs in spirit from ffz (man ffs).
> + */
> +#define ffs(x) __builtin_ffs(x)
> +
> +#endif
> diff --git a/include/asm-generic/bitops/builtin-fls.h b/include/asm-generic/bitops/builtin-fls.h
> new file mode 100644
> index 0000000000..c8455cc288
> --- /dev/null
> +++ b/include/asm-generic/bitops/builtin-fls.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_
> +#define _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_
> +
> +/**
> + * fls - find last (most-significant) bit set
> + * @x: the word to search
> + *
> + * This is defined the same way as ffs.
> + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
> + */
> +static __always_inline int fls(unsigned int x)
> +{
> + return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
> +}
> +
> +#endif
> --
> 2.40.1
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-08-24 18:15 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-31 21:27 [PATCH] arm: Use builtins for ffs/fls Sean Anderson
2023-07-31 21:36 ` Tom Rini
2023-07-31 21:42 ` Sean Anderson
2023-07-31 21:48 ` Sean Anderson
2023-08-10 22:11 ` Tom Rini
2023-08-18 17:49 ` Tom Rini
2023-08-24 18:15 ` Tony Dinh
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.