linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Optimize bitops with Zbb extension
@ 2023-08-06  2:47 Xiao Wang
  2023-08-06  9:38 ` Ard Biesheuvel
  2023-08-29 11:08 ` Anup Patel
  0 siblings, 2 replies; 15+ messages in thread
From: Xiao Wang @ 2023-08-06  2:47 UTC (permalink / raw)
  To: paul.walmsley, palmer, aou, ardb
  Cc: anup, haicheng.li, linux-riscv, linux-efi, linux-kernel, Xiao Wang

This patch leverages the alternative mechanism to dynamically optimize
bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
Zbb ext is not supported by the runtime CPU, legacy implementation is
used. If Zbb is supported, then the optimized variants will be selected
via alternative patching.

The legacy bitops support is taken from the generic C implementation as
fallback.

If the parameter is a build-time constant, we leverage compiler builtin to
calculate the result directly, this approach is inspired by x86 bitops
implementation.

EFI stub runs before the kernel, so alternative mechanism should not be
used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
purpose.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 arch/riscv/include/asm/bitops.h       | 266 +++++++++++++++++++++++++-
 drivers/firmware/efi/libstub/Makefile |   2 +-
 2 files changed, 264 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 3540b690944b..f727f6489cd5 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -15,13 +15,273 @@
 #include <asm/barrier.h>
 #include <asm/bitsperlong.h>
 
+#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
 #include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+
+#else
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CTZW	"ctzw "
+#define CLZW	"clzw "
+#elif (BITS_PER_LONG == 32)
+#define CTZW	"ctz "
+#define CLZW	"clz "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned long variable__ffs(unsigned long word)
+{
+	int num;
+
+	asm_volatile_goto(
+		ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+		: : : : legacy);
+
+	asm volatile (
+		".option push\n"
+		".option arch,+zbb\n"
+		"ctz %0, %1\n"
+		".option pop\n"
+		: "=r" (word) : "r" (word) :);
+
+	return word;
+
+legacy:
+	num = 0;
+#if BITS_PER_LONG == 64
+	if ((word & 0xffffffff) == 0) {
+		num += 32;
+		word >>= 32;
+	}
+#endif
+	if ((word & 0xffff) == 0) {
+		num += 16;
+		word >>= 16;
+	}
+	if ((word & 0xff) == 0) {
+		num += 8;
+		word >>= 8;
+	}
+	if ((word & 0xf) == 0) {
+		num += 4;
+		word >>= 4;
+	}
+	if ((word & 0x3) == 0) {
+		num += 2;
+		word >>= 2;
+	}
+	if ((word & 0x1) == 0)
+		num += 1;
+	return num;
+}
+
+/**
+ * __ffs - find first set bit in a long word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __ffs(word)				\
+	(__builtin_constant_p(word) ?		\
+	 (unsigned long)__builtin_ctzl(word) :	\
+	 variable__ffs(word))
+
+static __always_inline unsigned long variable__fls(unsigned long word)
+{
+	int num;
+
+	asm_volatile_goto(
+		ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+		: : : : legacy);
+
+	asm volatile (
+		".option push\n"
+		".option arch,+zbb\n"
+		"clz %0, %1\n"
+		".option pop\n"
+		: "=r" (word) : "r" (word) :);
+
+	return BITS_PER_LONG - 1 - word;
+
+legacy:
+	num = BITS_PER_LONG - 1;
+#if BITS_PER_LONG == 64
+	if (!(word & (~0ul << 32))) {
+		num -= 32;
+		word <<= 32;
+	}
+#endif
+	if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+		num -= 16;
+		word <<= 16;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+		num -= 8;
+		word <<= 8;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+		num -= 4;
+		word <<= 4;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+		num -= 2;
+		word <<= 2;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-1))))
+		num -= 1;
+	return num;
+}
+
+/**
+ * __fls - find last set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __fls(word)							\
+	(__builtin_constant_p(word) ?					\
+	 (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) :	\
+	 variable__fls(word))
+
+static __always_inline int variable_ffs(int x)
+{
+	int r;
+
+	asm_volatile_goto(
+		ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+		: : : : legacy);
+
+	asm volatile (
+		".option push\n"
+		".option arch,+zbb\n"
+		"bnez %1, 1f\n"
+		"li %0, 0\n"
+		"j 2f\n"
+		"1:\n"
+		CTZW "%0, %1\n"
+		"addi %0, %0, 1\n"
+		"2:\n"
+		".option pop\n"
+		: "=r" (r) : "r" (x) :);
+
+	return r;
+
+legacy:
+	r = 1;
+	if (!x)
+		return 0;
+	if (!(x & 0xffff)) {
+		x >>= 16;
+		r += 16;
+	}
+	if (!(x & 0xff)) {
+		x >>= 8;
+		r += 8;
+	}
+	if (!(x & 0xf)) {
+		x >>= 4;
+		r += 4;
+	}
+	if (!(x & 3)) {
+		x >>= 2;
+		r += 2;
+	}
+	if (!(x & 1)) {
+		x >>= 1;
+		r += 1;
+	}
+	return r;
+}
+
+/**
+ * ffs - find first set bit in a word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs routines.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first set bit if
+ * value is nonzero. The first (least significant) bit is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+static __always_inline int variable_fls(unsigned int x)
+{
+	int r;
+
+	asm_volatile_goto(
+		ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
+		: : : : legacy);
+
+	asm volatile (
+		".option push\n"
+		".option arch,+zbb\n"
+		"bnez %1, 1f\n"
+		"li %0, 0\n"
+		"j 2f\n"
+		"1:\n"
+		CLZW "%0, %1\n"
+		"neg %0, %0\n"
+		"addi %0, %0, 32\n"
+		"2:\n"
+		".option pop\n"
+		: "=r" (r) : "r" (x) :);
+
+	return r;
+
+legacy:
+	r = 32;
+	if (!x)
+		return 0;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u)) {
+		x <<= 1;
+		r -= 1;
+	}
+	return r;
+}
+
+/**
+ * fls - find last set bit in a word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as ffs, but returns the position of the most
+ * significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last set bit if
+ * value is nonzero. The last (most significant) bit is at position 32.
+ */
+#define fls(x)								\
+	(__builtin_constant_p(x) ?					\
+	 (int)(((x) != 0) ?						\
+	  (sizeof(unsigned int) * 8 - __builtin_clz(x)) : 0) :		\
+	 variable_fls(x))
+
+#endif
+
+#include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
 
 #include <asm-generic/bitops/hweight.h>
 
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 16d64a34d1e1..b0f8c495c10f 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM)		+= -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
 				   -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
 				   -DEFI_HAVE_STRCMP -fno-builtin -fpic \
 				   $(call cc-option,-mno-single-pic-base)
-cflags-$(CONFIG_RISCV)		+= -fpic
+cflags-$(CONFIG_RISCV)		+= -fpic -DEFI_NO_ALTERNATIVE
 cflags-$(CONFIG_LOONGARCH)	+= -fpie
 
 cflags-$(CONFIG_EFI_PARAMS_FROM_FDT)	+= -I$(srctree)/scripts/dtc/libfdt
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-06  2:47 [PATCH] RISC-V: Optimize bitops with Zbb extension Xiao Wang
@ 2023-08-06  9:38 ` Ard Biesheuvel
  2023-08-06 10:24   ` Wang, Xiao W
  2023-08-29 11:08 ` Anup Patel
  1 sibling, 1 reply; 15+ messages in thread
From: Ard Biesheuvel @ 2023-08-06  9:38 UTC (permalink / raw)
  To: Xiao Wang
  Cc: paul.walmsley, palmer, aou, anup, haicheng.li, linux-riscv,
	linux-efi, linux-kernel

On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
>
> This patch leverages the alternative mechanism to dynamically optimize
> bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> Zbb ext is not supported by the runtime CPU, legacy implementation is
> used. If Zbb is supported, then the optimized variants will be selected
> via alternative patching.
>
> The legacy bitops support is taken from the generic C implementation as
> fallback.
>
> If the parameter is a build-time constant, we leverage compiler builtin to
> calculate the result directly, this approach is inspired by x86 bitops
> implementation.
>
> EFI stub runs before the kernel, so alternative mechanism should not be
> used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> purpose.
>

Why? The unpatched sequences work fine, no?


> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> ---
>  arch/riscv/include/asm/bitops.h       | 266 +++++++++++++++++++++++++-
>  drivers/firmware/efi/libstub/Makefile |   2 +-
>  2 files changed, 264 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index 3540b690944b..f727f6489cd5 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -15,13 +15,273 @@
>  #include <asm/barrier.h>
>  #include <asm/bitsperlong.h>
>
> +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
>  #include <asm-generic/bitops/__ffs.h>
> -#include <asm-generic/bitops/ffz.h>
> -#include <asm-generic/bitops/fls.h>
>  #include <asm-generic/bitops/__fls.h>
> +#include <asm-generic/bitops/ffs.h>
> +#include <asm-generic/bitops/fls.h>
> +
> +#else
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
> +
> +#if (BITS_PER_LONG == 64)
> +#define CTZW   "ctzw "
> +#define CLZW   "clzw "
> +#elif (BITS_PER_LONG == 32)
> +#define CTZW   "ctz "
> +#define CLZW   "clz "
> +#else
> +#error "Unexpected BITS_PER_LONG"
> +#endif
> +
> +static __always_inline unsigned long variable__ffs(unsigned long word)
> +{
> +       int num;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "ctz %0, %1\n"
> +               ".option pop\n"
> +               : "=r" (word) : "r" (word) :);
> +
> +       return word;
> +
> +legacy:
> +       num = 0;
> +#if BITS_PER_LONG == 64
> +       if ((word & 0xffffffff) == 0) {
> +               num += 32;
> +               word >>= 32;
> +       }
> +#endif
> +       if ((word & 0xffff) == 0) {
> +               num += 16;
> +               word >>= 16;
> +       }
> +       if ((word & 0xff) == 0) {
> +               num += 8;
> +               word >>= 8;
> +       }
> +       if ((word & 0xf) == 0) {
> +               num += 4;
> +               word >>= 4;
> +       }
> +       if ((word & 0x3) == 0) {
> +               num += 2;
> +               word >>= 2;
> +       }
> +       if ((word & 0x1) == 0)
> +               num += 1;
> +       return num;
> +}
> +
> +/**
> + * __ffs - find first set bit in a long word
> + * @word: The word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +#define __ffs(word)                            \
> +       (__builtin_constant_p(word) ?           \
> +        (unsigned long)__builtin_ctzl(word) :  \
> +        variable__ffs(word))
> +
> +static __always_inline unsigned long variable__fls(unsigned long word)
> +{
> +       int num;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "clz %0, %1\n"
> +               ".option pop\n"
> +               : "=r" (word) : "r" (word) :);
> +
> +       return BITS_PER_LONG - 1 - word;
> +
> +legacy:
> +       num = BITS_PER_LONG - 1;
> +#if BITS_PER_LONG == 64
> +       if (!(word & (~0ul << 32))) {
> +               num -= 32;
> +               word <<= 32;
> +       }
> +#endif
> +       if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
> +               num -= 16;
> +               word <<= 16;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
> +               num -= 8;
> +               word <<= 8;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
> +               num -= 4;
> +               word <<= 4;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
> +               num -= 2;
> +               word <<= 2;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-1))))
> +               num -= 1;
> +       return num;
> +}
> +
> +/**
> + * __fls - find last set bit in a long word
> + * @word: the word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +#define __fls(word)                                                    \
> +       (__builtin_constant_p(word) ?                                   \
> +        (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) :    \
> +        variable__fls(word))
> +
> +static __always_inline int variable_ffs(int x)
> +{
> +       int r;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "bnez %1, 1f\n"
> +               "li %0, 0\n"
> +               "j 2f\n"
> +               "1:\n"
> +               CTZW "%0, %1\n"
> +               "addi %0, %0, 1\n"
> +               "2:\n"
> +               ".option pop\n"
> +               : "=r" (r) : "r" (x) :);
> +
> +       return r;
> +
> +legacy:
> +       r = 1;
> +       if (!x)
> +               return 0;
> +       if (!(x & 0xffff)) {
> +               x >>= 16;
> +               r += 16;
> +       }
> +       if (!(x & 0xff)) {
> +               x >>= 8;
> +               r += 8;
> +       }
> +       if (!(x & 0xf)) {
> +               x >>= 4;
> +               r += 4;
> +       }
> +       if (!(x & 3)) {
> +               x >>= 2;
> +               r += 2;
> +       }
> +       if (!(x & 1)) {
> +               x >>= 1;
> +               r += 1;
> +       }
> +       return r;
> +}
> +
> +/**
> + * ffs - find first set bit in a word
> + * @x: the word to search
> + *
> + * This is defined the same way as the libc and compiler builtin ffs routines.
> + *
> + * ffs(value) returns 0 if value is 0 or the position of the first set bit if
> + * value is nonzero. The first (least significant) bit is at position 1.
> + */
> +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
> +
> +static __always_inline int variable_fls(unsigned int x)
> +{
> +       int r;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "bnez %1, 1f\n"
> +               "li %0, 0\n"
> +               "j 2f\n"
> +               "1:\n"
> +               CLZW "%0, %1\n"
> +               "neg %0, %0\n"
> +               "addi %0, %0, 32\n"
> +               "2:\n"
> +               ".option pop\n"
> +               : "=r" (r) : "r" (x) :);
> +
> +       return r;
> +
> +legacy:
> +       r = 32;
> +       if (!x)
> +               return 0;
> +       if (!(x & 0xffff0000u)) {
> +               x <<= 16;
> +               r -= 16;
> +       }
> +       if (!(x & 0xff000000u)) {
> +               x <<= 8;
> +               r -= 8;
> +       }
> +       if (!(x & 0xf0000000u)) {
> +               x <<= 4;
> +               r -= 4;
> +       }
> +       if (!(x & 0xc0000000u)) {
> +               x <<= 2;
> +               r -= 2;
> +       }
> +       if (!(x & 0x80000000u)) {
> +               x <<= 1;
> +               r -= 1;
> +       }
> +       return r;
> +}
> +
> +/**
> + * fls - find last set bit in a word
> + * @x: the word to search
> + *
> + * This is defined in a similar way as ffs, but returns the position of the most
> + * significant set bit.
> + *
> + * fls(value) returns 0 if value is 0 or the position of the last set bit if
> + * value is nonzero. The last (most significant) bit is at position 32.
> + */
> +#define fls(x)                                                         \
> +       (__builtin_constant_p(x) ?                                      \
> +        (int)(((x) != 0) ?                                             \
> +         (sizeof(unsigned int) * 8 - __builtin_clz(x)) : 0) :          \
> +        variable_fls(x))
> +
> +#endif
> +
> +#include <asm-generic/bitops/ffz.h>
>  #include <asm-generic/bitops/fls64.h>
>  #include <asm-generic/bitops/sched.h>
> -#include <asm-generic/bitops/ffs.h>
>
>  #include <asm-generic/bitops/hweight.h>
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 16d64a34d1e1..b0f8c495c10f 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM)          += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
>                                    -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
>                                    -DEFI_HAVE_STRCMP -fno-builtin -fpic \
>                                    $(call cc-option,-mno-single-pic-base)
> -cflags-$(CONFIG_RISCV)         += -fpic
> +cflags-$(CONFIG_RISCV)         += -fpic -DEFI_NO_ALTERNATIVE
>  cflags-$(CONFIG_LOONGARCH)     += -fpie
>
>  cflags-$(CONFIG_EFI_PARAMS_FROM_FDT)   += -I$(srctree)/scripts/dtc/libfdt
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-06  9:38 ` Ard Biesheuvel
@ 2023-08-06 10:24   ` Wang, Xiao W
  2023-08-27  9:25     ` Wang, Xiao W
  0 siblings, 1 reply; 15+ messages in thread
From: Wang, Xiao W @ 2023-08-06 10:24 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: paul.walmsley, palmer, aou, anup, Li, Haicheng, linux-riscv,
	linux-efi, linux-kernel

Hi,

> -----Original Message-----
> From: Ard Biesheuvel <ardb@kernel.org>
> Sent: Sunday, August 6, 2023 5:39 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> efi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
> >
> > This patch leverages the alternative mechanism to dynamically optimize
> > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > used. If Zbb is supported, then the optimized variants will be selected
> > via alternative patching.
> >
> > The legacy bitops support is taken from the generic C implementation as
> > fallback.
> >
> > If the parameter is a build-time constant, we leverage compiler builtin to
> > calculate the result directly, this approach is inspired by x86 bitops
> > implementation.
> >
> > EFI stub runs before the kernel, so alternative mechanism should not be
> > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > purpose.
> >
> 
> Why? The unpatched sequences work fine, no?

It works. But there would be build warning: orphan section `.init.alternative' from `./drivers/firmware/efi/libstub/gop.stub.o' being placed in section `.init.alternative'. Besides, w/o this MACRO, the optimized variant would never be used at runtime, so this patch choose to disable alternative.

BRs,
Xiao

> 
> 
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> >  arch/riscv/include/asm/bitops.h       | 266 +++++++++++++++++++++++++-
> >  drivers/firmware/efi/libstub/Makefile |   2 +-
> >  2 files changed, 264 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/bitops.h
> b/arch/riscv/include/asm/bitops.h
> > index 3540b690944b..f727f6489cd5 100644
> > --- a/arch/riscv/include/asm/bitops.h
> > +++ b/arch/riscv/include/asm/bitops.h
> > @@ -15,13 +15,273 @@
> >  #include <asm/barrier.h>
> >  #include <asm/bitsperlong.h>
> >
> > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> >  #include <asm-generic/bitops/__ffs.h>
> > -#include <asm-generic/bitops/ffz.h>
> > -#include <asm-generic/bitops/fls.h>
> >  #include <asm-generic/bitops/__fls.h>
> > +#include <asm-generic/bitops/ffs.h>
> > +#include <asm-generic/bitops/fls.h>
> > +
> > +#else
> > +#include <asm/alternative-macros.h>
> > +#include <asm/hwcap.h>
> > +
> > +#if (BITS_PER_LONG == 64)
> > +#define CTZW   "ctzw "
> > +#define CLZW   "clzw "
> > +#elif (BITS_PER_LONG == 32)
> > +#define CTZW   "ctz "
> > +#define CLZW   "clz "
> > +#else
> > +#error "Unexpected BITS_PER_LONG"
> > +#endif
[...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-06 10:24   ` Wang, Xiao W
@ 2023-08-27  9:25     ` Wang, Xiao W
  2023-08-28 10:28       ` Ard Biesheuvel
  0 siblings, 1 reply; 15+ messages in thread
From: Wang, Xiao W @ 2023-08-27  9:25 UTC (permalink / raw)
  To: Ard Biesheuvel, paul.walmsley, palmer, aou, anup
  Cc: Li, Haicheng, linux-riscv, linux-efi, linux-kernel

Hi,

A gentle ping.
Any other comments or suggestions for this patch? Or maybe we would review it in the Linux 6.7 development cycle?

BRs,
Xiao

> -----Original Message-----
> From: Wang, Xiao W
> Sent: Sunday, August 6, 2023 6:24 PM
> To: Ard Biesheuvel <ardb@kernel.org>
> Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> efi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> Hi,
> 
> > -----Original Message-----
> > From: Ard Biesheuvel <ardb@kernel.org>
> > Sent: Sunday, August 6, 2023 5:39 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> >
> > On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
> > >
> > > This patch leverages the alternative mechanism to dynamically optimize
> > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > used. If Zbb is supported, then the optimized variants will be selected
> > > via alternative patching.
> > >
> > > The legacy bitops support is taken from the generic C implementation as
> > > fallback.
> > >
> > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > calculate the result directly, this approach is inspired by x86 bitops
> > > implementation.
> > >
> > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > purpose.
> > >
> >
> > Why? The unpatched sequences work fine, no?
> 
> It works. But there would be build warning: orphan section `.init.alternative'
> from `./drivers/firmware/efi/libstub/gop.stub.o' being placed in section
> `.init.alternative'. Besides, w/o this MACRO, the optimized variant would
> never be used at runtime, so this patch choose to disable alternative.
> 
> BRs,
> Xiao
> 
> >
> >
> > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > ---
> > >  arch/riscv/include/asm/bitops.h       | 266
> +++++++++++++++++++++++++-
> > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/arch/riscv/include/asm/bitops.h
> > b/arch/riscv/include/asm/bitops.h
> > > index 3540b690944b..f727f6489cd5 100644
> > > --- a/arch/riscv/include/asm/bitops.h
> > > +++ b/arch/riscv/include/asm/bitops.h
> > > @@ -15,13 +15,273 @@
> > >  #include <asm/barrier.h>
> > >  #include <asm/bitsperlong.h>
> > >
> > > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> > >  #include <asm-generic/bitops/__ffs.h>
> > > -#include <asm-generic/bitops/ffz.h>
> > > -#include <asm-generic/bitops/fls.h>
> > >  #include <asm-generic/bitops/__fls.h>
> > > +#include <asm-generic/bitops/ffs.h>
> > > +#include <asm-generic/bitops/fls.h>
> > > +
> > > +#else
> > > +#include <asm/alternative-macros.h>
> > > +#include <asm/hwcap.h>
> > > +
> > > +#if (BITS_PER_LONG == 64)
> > > +#define CTZW   "ctzw "
> > > +#define CLZW   "clzw "
> > > +#elif (BITS_PER_LONG == 32)
> > > +#define CTZW   "ctz "
> > > +#define CLZW   "clz "
> > > +#else
> > > +#error "Unexpected BITS_PER_LONG"
> > > +#endif
> [...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-27  9:25     ` Wang, Xiao W
@ 2023-08-28 10:28       ` Ard Biesheuvel
  2023-08-30  5:46         ` Wang, Xiao W
  0 siblings, 1 reply; 15+ messages in thread
From: Ard Biesheuvel @ 2023-08-28 10:28 UTC (permalink / raw)
  To: Wang, Xiao W
  Cc: paul.walmsley, palmer, aou, anup, Li, Haicheng, linux-riscv,
	linux-efi, linux-kernel

On Sun, 27 Aug 2023 at 11:26, Wang, Xiao W <xiao.w.wang@intel.com> wrote:
>
> Hi,
>
> A gentle ping.
> Any other comments or suggestions for this patch? Or maybe we would review it in the Linux 6.7 development cycle?
>

This is going to be 6.7 material at the earliest in any case.

I am fine with the change as far as the EFI code is concerned, but I'd
suggest dropping EFI_ from the macro name, as it could be #define'd
for other reasons too.


>
> > -----Original Message-----
> > From: Wang, Xiao W
> > Sent: Sunday, August 6, 2023 6:24 PM
> > To: Ard Biesheuvel <ardb@kernel.org>
> > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
> >
> > Hi,
> >
> > > -----Original Message-----
> > > From: Ard Biesheuvel <ardb@kernel.org>
> > > Sent: Sunday, August 6, 2023 5:39 PM
> > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > >
> > > On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com> wrote:
> > > >
> > > > This patch leverages the alternative mechanism to dynamically optimize
> > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > > used. If Zbb is supported, then the optimized variants will be selected
> > > > via alternative patching.
> > > >
> > > > The legacy bitops support is taken from the generic C implementation as
> > > > fallback.
> > > >
> > > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > implementation.
> > > >
> > > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > > purpose.
> > > >
> > >
> > > Why? The unpatched sequences work fine, no?
> >
> > It works. But there would be build warning: orphan section `.init.alternative'
> > from `./drivers/firmware/efi/libstub/gop.stub.o' being placed in section
> > `.init.alternative'. Besides, w/o this MACRO, the optimized variant would
> > never be used at runtime, so this patch choose to disable alternative.
> >
> > BRs,
> > Xiao
> >
> > >
> > >
> > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > ---
> > > >  arch/riscv/include/asm/bitops.h       | 266
> > +++++++++++++++++++++++++-
> > > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > b/arch/riscv/include/asm/bitops.h
> > > > index 3540b690944b..f727f6489cd5 100644
> > > > --- a/arch/riscv/include/asm/bitops.h
> > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > @@ -15,13 +15,273 @@
> > > >  #include <asm/barrier.h>
> > > >  #include <asm/bitsperlong.h>
> > > >
> > > > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> > > >  #include <asm-generic/bitops/__ffs.h>
> > > > -#include <asm-generic/bitops/ffz.h>
> > > > -#include <asm-generic/bitops/fls.h>
> > > >  #include <asm-generic/bitops/__fls.h>
> > > > +#include <asm-generic/bitops/ffs.h>
> > > > +#include <asm-generic/bitops/fls.h>
> > > > +
> > > > +#else
> > > > +#include <asm/alternative-macros.h>
> > > > +#include <asm/hwcap.h>
> > > > +
> > > > +#if (BITS_PER_LONG == 64)
> > > > +#define CTZW   "ctzw "
> > > > +#define CLZW   "clzw "
> > > > +#elif (BITS_PER_LONG == 32)
> > > > +#define CTZW   "ctz "
> > > > +#define CLZW   "clz "
> > > > +#else
> > > > +#error "Unexpected BITS_PER_LONG"
> > > > +#endif
> > [...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-06  2:47 [PATCH] RISC-V: Optimize bitops with Zbb extension Xiao Wang
  2023-08-06  9:38 ` Ard Biesheuvel
@ 2023-08-29 11:08 ` Anup Patel
  2023-08-30  6:14   ` Wang, Xiao W
  1 sibling, 1 reply; 15+ messages in thread
From: Anup Patel @ 2023-08-29 11:08 UTC (permalink / raw)
  To: Xiao Wang
  Cc: paul.walmsley, palmer, aou, ardb, haicheng.li, linux-riscv,
	linux-efi, linux-kernel

On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@intel.com> wrote:
>
> This patch leverages the alternative mechanism to dynamically optimize
> bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> Zbb ext is not supported by the runtime CPU, legacy implementation is
> used. If Zbb is supported, then the optimized variants will be selected
> via alternative patching.
>
> The legacy bitops support is taken from the generic C implementation as
> fallback.
>
> If the parameter is a build-time constant, we leverage compiler builtin to
> calculate the result directly, this approach is inspired by x86 bitops
> implementation.
>
> EFI stub runs before the kernel, so alternative mechanism should not be
> used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> purpose.

I am getting the following compile error with this patch:

  GEN     Makefile
  UPD     include/config/kernel.release
  UPD     include/generated/utsrelease.h
  CC      kernel/bounds.s
In file included from /home/anup/Work/riscv-test/linux/include/linux/bitmap.h:9,
                 from
/home/anup/Work/riscv-test/linux/arch/riscv/include/asm/cpufeature.h:9,
                 from
/home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90,
                 from
/home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26,
                 from
/home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
                 from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12,
                 from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13:
/home/anup/Work/riscv-test/linux/include/linux/find.h: In function
'find_next_bit':
/home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
implicit declaration of function '__ffs'
[-Werror=implicit-function-declaration]
   64 |                 return val ? __ffs(val) : size;

Regards,
Anup


>
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> ---
>  arch/riscv/include/asm/bitops.h       | 266 +++++++++++++++++++++++++-
>  drivers/firmware/efi/libstub/Makefile |   2 +-
>  2 files changed, 264 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index 3540b690944b..f727f6489cd5 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -15,13 +15,273 @@
>  #include <asm/barrier.h>
>  #include <asm/bitsperlong.h>
>
> +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
>  #include <asm-generic/bitops/__ffs.h>
> -#include <asm-generic/bitops/ffz.h>
> -#include <asm-generic/bitops/fls.h>
>  #include <asm-generic/bitops/__fls.h>
> +#include <asm-generic/bitops/ffs.h>
> +#include <asm-generic/bitops/fls.h>
> +
> +#else
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
> +
> +#if (BITS_PER_LONG == 64)
> +#define CTZW   "ctzw "
> +#define CLZW   "clzw "
> +#elif (BITS_PER_LONG == 32)
> +#define CTZW   "ctz "
> +#define CLZW   "clz "
> +#else
> +#error "Unexpected BITS_PER_LONG"
> +#endif
> +
> +static __always_inline unsigned long variable__ffs(unsigned long word)
> +{
> +       int num;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "ctz %0, %1\n"
> +               ".option pop\n"
> +               : "=r" (word) : "r" (word) :);
> +
> +       return word;
> +
> +legacy:
> +       num = 0;
> +#if BITS_PER_LONG == 64
> +       if ((word & 0xffffffff) == 0) {
> +               num += 32;
> +               word >>= 32;
> +       }
> +#endif
> +       if ((word & 0xffff) == 0) {
> +               num += 16;
> +               word >>= 16;
> +       }
> +       if ((word & 0xff) == 0) {
> +               num += 8;
> +               word >>= 8;
> +       }
> +       if ((word & 0xf) == 0) {
> +               num += 4;
> +               word >>= 4;
> +       }
> +       if ((word & 0x3) == 0) {
> +               num += 2;
> +               word >>= 2;
> +       }
> +       if ((word & 0x1) == 0)
> +               num += 1;
> +       return num;
> +}
> +
> +/**
> + * __ffs - find first set bit in a long word
> + * @word: The word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +#define __ffs(word)                            \
> +       (__builtin_constant_p(word) ?           \
> +        (unsigned long)__builtin_ctzl(word) :  \
> +        variable__ffs(word))
> +
> +static __always_inline unsigned long variable__fls(unsigned long word)
> +{
> +       int num;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "clz %0, %1\n"
> +               ".option pop\n"
> +               : "=r" (word) : "r" (word) :);
> +
> +       return BITS_PER_LONG - 1 - word;
> +
> +legacy:
> +       num = BITS_PER_LONG - 1;
> +#if BITS_PER_LONG == 64
> +       if (!(word & (~0ul << 32))) {
> +               num -= 32;
> +               word <<= 32;
> +       }
> +#endif
> +       if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
> +               num -= 16;
> +               word <<= 16;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
> +               num -= 8;
> +               word <<= 8;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
> +               num -= 4;
> +               word <<= 4;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
> +               num -= 2;
> +               word <<= 2;
> +       }
> +       if (!(word & (~0ul << (BITS_PER_LONG-1))))
> +               num -= 1;
> +       return num;
> +}
> +
> +/**
> + * __fls - find last set bit in a long word
> + * @word: the word to search
> + *
> + * Undefined if no set bit exists, so code should check against 0 first.
> + */
> +#define __fls(word)                                                    \
> +       (__builtin_constant_p(word) ?                                   \
> +        (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) :    \
> +        variable__fls(word))
> +
> +static __always_inline int variable_ffs(int x)
> +{
> +       int r;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "bnez %1, 1f\n"
> +               "li %0, 0\n"
> +               "j 2f\n"
> +               "1:\n"
> +               CTZW "%0, %1\n"
> +               "addi %0, %0, 1\n"
> +               "2:\n"
> +               ".option pop\n"
> +               : "=r" (r) : "r" (x) :);
> +
> +       return r;
> +
> +legacy:
> +       r = 1;
> +       if (!x)
> +               return 0;
> +       if (!(x & 0xffff)) {
> +               x >>= 16;
> +               r += 16;
> +       }
> +       if (!(x & 0xff)) {
> +               x >>= 8;
> +               r += 8;
> +       }
> +       if (!(x & 0xf)) {
> +               x >>= 4;
> +               r += 4;
> +       }
> +       if (!(x & 3)) {
> +               x >>= 2;
> +               r += 2;
> +       }
> +       if (!(x & 1)) {
> +               x >>= 1;
> +               r += 1;
> +       }
> +       return r;
> +}
> +
> +/**
> + * ffs - find first set bit in a word
> + * @x: the word to search
> + *
> + * This is defined the same way as the libc and compiler builtin ffs routines.
> + *
> + * ffs(value) returns 0 if value is 0 or the position of the first set bit if
> + * value is nonzero. The first (least significant) bit is at position 1.
> + */
> +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
> +
> +static __always_inline int variable_fls(unsigned int x)
> +{
> +       int r;
> +
> +       asm_volatile_goto(
> +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> +               : : : : legacy);
> +
> +       asm volatile (
> +               ".option push\n"
> +               ".option arch,+zbb\n"
> +               "bnez %1, 1f\n"
> +               "li %0, 0\n"
> +               "j 2f\n"
> +               "1:\n"
> +               CLZW "%0, %1\n"
> +               "neg %0, %0\n"
> +               "addi %0, %0, 32\n"
> +               "2:\n"
> +               ".option pop\n"
> +               : "=r" (r) : "r" (x) :);
> +
> +       return r;
> +
> +legacy:
> +       r = 32;
> +       if (!x)
> +               return 0;
> +       if (!(x & 0xffff0000u)) {
> +               x <<= 16;
> +               r -= 16;
> +       }
> +       if (!(x & 0xff000000u)) {
> +               x <<= 8;
> +               r -= 8;
> +       }
> +       if (!(x & 0xf0000000u)) {
> +               x <<= 4;
> +               r -= 4;
> +       }
> +       if (!(x & 0xc0000000u)) {
> +               x <<= 2;
> +               r -= 2;
> +       }
> +       if (!(x & 0x80000000u)) {
> +               x <<= 1;
> +               r -= 1;
> +       }
> +       return r;
> +}
> +
> +/**
> + * fls - find last set bit in a word
> + * @x: the word to search
> + *
> + * This is defined in a similar way as ffs, but returns the position of the most
> + * significant set bit.
> + *
> + * fls(value) returns 0 if value is 0 or the position of the last set bit if
> + * value is nonzero. The last (most significant) bit is at position 32.
> + */
> +#define fls(x)                                                         \
> +       (__builtin_constant_p(x) ?                                      \
> +        (int)(((x) != 0) ?                                             \
> +         (sizeof(unsigned int) * 8 - __builtin_clz(x)) : 0) :          \
> +        variable_fls(x))
> +
> +#endif
> +
> +#include <asm-generic/bitops/ffz.h>
>  #include <asm-generic/bitops/fls64.h>
>  #include <asm-generic/bitops/sched.h>
> -#include <asm-generic/bitops/ffs.h>
>
>  #include <asm-generic/bitops/hweight.h>
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 16d64a34d1e1..b0f8c495c10f 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM)          += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
>                                    -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
>                                    -DEFI_HAVE_STRCMP -fno-builtin -fpic \
>                                    $(call cc-option,-mno-single-pic-base)
> -cflags-$(CONFIG_RISCV)         += -fpic
> +cflags-$(CONFIG_RISCV)         += -fpic -DEFI_NO_ALTERNATIVE
>  cflags-$(CONFIG_LOONGARCH)     += -fpie
>
>  cflags-$(CONFIG_EFI_PARAMS_FROM_FDT)   += -I$(srctree)/scripts/dtc/libfdt
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-28 10:28       ` Ard Biesheuvel
@ 2023-08-30  5:46         ` Wang, Xiao W
  0 siblings, 0 replies; 15+ messages in thread
From: Wang, Xiao W @ 2023-08-30  5:46 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: paul.walmsley, palmer, aou, anup, Li, Haicheng, linux-riscv,
	linux-efi, linux-kernel

Hi,

> -----Original Message-----
> From: Ard Biesheuvel <ardb@kernel.org>
> Sent: Monday, August 28, 2023 6:28 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> efi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> On Sun, 27 Aug 2023 at 11:26, Wang, Xiao W <xiao.w.wang@intel.com>
> wrote:
> >
> > Hi,
> >
> > A gentle ping.
> > Any other comments or suggestions for this patch? Or maybe we would
> review it in the Linux 6.7 development cycle?
> >
> 
> This is going to be 6.7 material at the earliest in any case.
> 
> I am fine with the change as far as the EFI code is concerned, but I'd
> suggest dropping EFI_ from the macro name, as it could be #define'd
> for other reasons too.

Yes, I agree with you. Would remove the EFI_ prefix in next version.

Thanks,
Xiao

> 
> 
> >
> > > -----Original Message-----
> > > From: Wang, Xiao W
> > > Sent: Sunday, August 6, 2023 6:24 PM
> > > To: Ard Biesheuvel <ardb@kernel.org>
> > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > >
> > > Hi,
> > >
> > > > -----Original Message-----
> > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > Sent: Sunday, August 6, 2023 5:39 PM
> > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > > aou@eecs.berkeley.edu; anup@brainfault.org; Li, Haicheng
> > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > >
> > > > On Sun, 6 Aug 2023 at 04:39, Xiao Wang <xiao.w.wang@intel.com>
> wrote:
> > > > >
> > > > > This patch leverages the alternative mechanism to dynamically optimize
> > > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > > > used. If Zbb is supported, then the optimized variants will be selected
> > > > > via alternative patching.
> > > > >
> > > > > The legacy bitops support is taken from the generic C implementation as
> > > > > fallback.
> > > > >
> > > > > If the parameter is a build-time constant, we leverage compiler builtin
> to
> > > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > > implementation.
> > > > >
> > > > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > > > purpose.
> > > > >
> > > >
> > > > Why? The unpatched sequences work fine, no?
> > >
> > > It works. But there would be build warning: orphan section
> `.init.alternative'
> > > from `./drivers/firmware/efi/libstub/gop.stub.o' being placed in section
> > > `.init.alternative'. Besides, w/o this MACRO, the optimized variant would
> > > never be used at runtime, so this patch choose to disable alternative.
> > >
> > > BRs,
> > > Xiao
> > >
> > > >
> > > >
> > > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > > ---
> > > > >  arch/riscv/include/asm/bitops.h       | 266
> > > +++++++++++++++++++++++++-
> > > > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > > > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > > b/arch/riscv/include/asm/bitops.h
> > > > > index 3540b690944b..f727f6489cd5 100644
> > > > > --- a/arch/riscv/include/asm/bitops.h
> > > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > > @@ -15,13 +15,273 @@
> > > > >  #include <asm/barrier.h>
> > > > >  #include <asm/bitsperlong.h>
> > > > >
> > > > > +#if !defined(CONFIG_RISCV_ISA_ZBB) ||
> defined(EFI_NO_ALTERNATIVE)
> > > > >  #include <asm-generic/bitops/__ffs.h>
> > > > > -#include <asm-generic/bitops/ffz.h>
> > > > > -#include <asm-generic/bitops/fls.h>
> > > > >  #include <asm-generic/bitops/__fls.h>
> > > > > +#include <asm-generic/bitops/ffs.h>
> > > > > +#include <asm-generic/bitops/fls.h>
> > > > > +
> > > > > +#else
> > > > > +#include <asm/alternative-macros.h>
> > > > > +#include <asm/hwcap.h>
> > > > > +
> > > > > +#if (BITS_PER_LONG == 64)
> > > > > +#define CTZW   "ctzw "
> > > > > +#define CLZW   "clzw "
> > > > > +#elif (BITS_PER_LONG == 32)
> > > > > +#define CTZW   "ctz "
> > > > > +#define CLZW   "clz "
> > > > > +#else
> > > > > +#error "Unexpected BITS_PER_LONG"
> > > > > +#endif
> > > [...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-29 11:08 ` Anup Patel
@ 2023-08-30  6:14   ` Wang, Xiao W
  2023-08-30  6:59     ` Conor Dooley
  0 siblings, 1 reply; 15+ messages in thread
From: Wang, Xiao W @ 2023-08-30  6:14 UTC (permalink / raw)
  To: Anup Patel
  Cc: paul.walmsley, palmer, aou, ardb, Li, Haicheng, linux-riscv,
	linux-efi, linux-kernel

Hi,

> -----Original Message-----
> From: Anup Patel <anup@brainfault.org>
> Sent: Tuesday, August 29, 2023 7:08 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> efi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@intel.com> wrote:
> >
> > This patch leverages the alternative mechanism to dynamically optimize
> > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > used. If Zbb is supported, then the optimized variants will be selected
> > via alternative patching.
> >
> > The legacy bitops support is taken from the generic C implementation as
> > fallback.
> >
> > If the parameter is a build-time constant, we leverage compiler builtin to
> > calculate the result directly, this approach is inspired by x86 bitops
> > implementation.
> >
> > EFI stub runs before the kernel, so alternative mechanism should not be
> > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > purpose.
> 
> I am getting the following compile error with this patch:
> 
>   GEN     Makefile
>   UPD     include/config/kernel.release
>   UPD     include/generated/utsrelease.h
>   CC      kernel/bounds.s
> In file included from /home/anup/Work/riscv-
> test/linux/include/linux/bitmap.h:9,
>                  from
> /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/cpufeature.h:9,
>                  from
> /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90,


It looks there's a cyclic header including, which leads to this build error.
I checked https://github.com/kvm-riscv/linux/tree/master and https://github.com/torvalds/linux/tree/master, but I don't see "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss something, could you help point me to the repo/branch I should work on?

Thanks,
Xiao

>                  from
> /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26,
>                  from
> /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
>                  from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12,
>                  from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13:
> /home/anup/Work/riscv-test/linux/include/linux/find.h: In function
> 'find_next_bit':
> /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
> implicit declaration of function '__ffs'
> [-Werror=implicit-function-declaration]
>    64 |                 return val ? __ffs(val) : size;
> 
> Regards,
> Anup
> 
> 
> >
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> >  arch/riscv/include/asm/bitops.h       | 266 +++++++++++++++++++++++++-
> >  drivers/firmware/efi/libstub/Makefile |   2 +-
> >  2 files changed, 264 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/bitops.h
> b/arch/riscv/include/asm/bitops.h
> > index 3540b690944b..f727f6489cd5 100644
> > --- a/arch/riscv/include/asm/bitops.h
> > +++ b/arch/riscv/include/asm/bitops.h
> > @@ -15,13 +15,273 @@
> >  #include <asm/barrier.h>
> >  #include <asm/bitsperlong.h>
> >
> > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> >  #include <asm-generic/bitops/__ffs.h>
> > -#include <asm-generic/bitops/ffz.h>
> > -#include <asm-generic/bitops/fls.h>
> >  #include <asm-generic/bitops/__fls.h>
> > +#include <asm-generic/bitops/ffs.h>
> > +#include <asm-generic/bitops/fls.h>
> > +
> > +#else
> > +#include <asm/alternative-macros.h>
> > +#include <asm/hwcap.h>
> > +
> > +#if (BITS_PER_LONG == 64)
> > +#define CTZW   "ctzw "
> > +#define CLZW   "clzw "
> > +#elif (BITS_PER_LONG == 32)
> > +#define CTZW   "ctz "
> > +#define CLZW   "clz "
> > +#else
> > +#error "Unexpected BITS_PER_LONG"
> > +#endif
> > +
> > +static __always_inline unsigned long variable__ffs(unsigned long word)
> > +{
> > +       int num;
> > +
> > +       asm_volatile_goto(
> > +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> > +               : : : : legacy);
> > +
> > +       asm volatile (
> > +               ".option push\n"
> > +               ".option arch,+zbb\n"
> > +               "ctz %0, %1\n"
> > +               ".option pop\n"
> > +               : "=r" (word) : "r" (word) :);
> > +
> > +       return word;
> > +
> > +legacy:
> > +       num = 0;
> > +#if BITS_PER_LONG == 64
> > +       if ((word & 0xffffffff) == 0) {
> > +               num += 32;
> > +               word >>= 32;
> > +       }
> > +#endif
> > +       if ((word & 0xffff) == 0) {
> > +               num += 16;
> > +               word >>= 16;
> > +       }
> > +       if ((word & 0xff) == 0) {
> > +               num += 8;
> > +               word >>= 8;
> > +       }
> > +       if ((word & 0xf) == 0) {
> > +               num += 4;
> > +               word >>= 4;
> > +       }
> > +       if ((word & 0x3) == 0) {
> > +               num += 2;
> > +               word >>= 2;
> > +       }
> > +       if ((word & 0x1) == 0)
> > +               num += 1;
> > +       return num;
> > +}
> > +
> > +/**
> > + * __ffs - find first set bit in a long word
> > + * @word: The word to search
> > + *
> > + * Undefined if no set bit exists, so code should check against 0 first.
> > + */
> > +#define __ffs(word)                            \
> > +       (__builtin_constant_p(word) ?           \
> > +        (unsigned long)__builtin_ctzl(word) :  \
> > +        variable__ffs(word))
> > +
[...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-30  6:14   ` Wang, Xiao W
@ 2023-08-30  6:59     ` Conor Dooley
  2023-08-31 15:59       ` Wang, Xiao W
  0 siblings, 1 reply; 15+ messages in thread
From: Conor Dooley @ 2023-08-30  6:59 UTC (permalink / raw)
  To: Wang, Xiao W
  Cc: Anup Patel, paul.walmsley, palmer, aou, ardb, Li, Haicheng,
	linux-riscv, linux-efi, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 6573 bytes --]

On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Anup Patel <anup@brainfault.org>
> > Sent: Tuesday, August 29, 2023 7:08 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > 
> > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@intel.com> wrote:
> > >
> > > This patch leverages the alternative mechanism to dynamically optimize
> > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > used. If Zbb is supported, then the optimized variants will be selected
> > > via alternative patching.
> > >
> > > The legacy bitops support is taken from the generic C implementation as
> > > fallback.
> > >
> > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > calculate the result directly, this approach is inspired by x86 bitops
> > > implementation.
> > >
> > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > purpose.
> > 
> > I am getting the following compile error with this patch:
> > 
> >   GEN     Makefile
> >   UPD     include/config/kernel.release
> >   UPD     include/generated/utsrelease.h
> >   CC      kernel/bounds.s
> > In file included from /home/anup/Work/riscv-
> > test/linux/include/linux/bitmap.h:9,
> >                  from
> > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/cpufeature.h:9,
> >                  from
> > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90,
> 
> 
> It looks there's a cyclic header including, which leads to this build error.
> I checked https://github.com/kvm-riscv/linux/tree/master and
> https://github.com/torvalds/linux/tree/master, but I don't see
> "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss something,
> could you help point me to the repo/branch I should work on?

From MAINTAINERS:
	RISC-V ARCHITECTURE
	...
	T:	git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git

The for-next branch there is what you should be basing work on top of.
AFAICT, you've made bitops.h include hwcap.h while cpufeature.h includes
both bitops.h (indirectly) and hwcap.h.

Hope that helps,
Conor.

> >                  from
> > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26,
> >                  from
> > /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
> >                  from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12,
> >                  from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13:
> > /home/anup/Work/riscv-test/linux/include/linux/find.h: In function
> > 'find_next_bit':
> > /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
> > implicit declaration of function '__ffs'
> > [-Werror=implicit-function-declaration]
> >    64 |                 return val ? __ffs(val) : size;
> > 
> > Regards,
> > Anup
> > 
> > 
> > >
> > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > ---
> > >  arch/riscv/include/asm/bitops.h       | 266 +++++++++++++++++++++++++-
> > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/arch/riscv/include/asm/bitops.h
> > b/arch/riscv/include/asm/bitops.h
> > > index 3540b690944b..f727f6489cd5 100644
> > > --- a/arch/riscv/include/asm/bitops.h
> > > +++ b/arch/riscv/include/asm/bitops.h
> > > @@ -15,13 +15,273 @@
> > >  #include <asm/barrier.h>
> > >  #include <asm/bitsperlong.h>
> > >
> > > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE)
> > >  #include <asm-generic/bitops/__ffs.h>
> > > -#include <asm-generic/bitops/ffz.h>
> > > -#include <asm-generic/bitops/fls.h>
> > >  #include <asm-generic/bitops/__fls.h>
> > > +#include <asm-generic/bitops/ffs.h>
> > > +#include <asm-generic/bitops/fls.h>
> > > +
> > > +#else
> > > +#include <asm/alternative-macros.h>
> > > +#include <asm/hwcap.h>
> > > +
> > > +#if (BITS_PER_LONG == 64)
> > > +#define CTZW   "ctzw "
> > > +#define CLZW   "clzw "
> > > +#elif (BITS_PER_LONG == 32)
> > > +#define CTZW   "ctz "
> > > +#define CLZW   "clz "
> > > +#else
> > > +#error "Unexpected BITS_PER_LONG"
> > > +#endif
> > > +
> > > +static __always_inline unsigned long variable__ffs(unsigned long word)
> > > +{
> > > +       int num;
> > > +
> > > +       asm_volatile_goto(
> > > +               ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1)
> > > +               : : : : legacy);
> > > +
> > > +       asm volatile (
> > > +               ".option push\n"
> > > +               ".option arch,+zbb\n"
> > > +               "ctz %0, %1\n"
> > > +               ".option pop\n"
> > > +               : "=r" (word) : "r" (word) :);
> > > +
> > > +       return word;
> > > +
> > > +legacy:
> > > +       num = 0;
> > > +#if BITS_PER_LONG == 64
> > > +       if ((word & 0xffffffff) == 0) {
> > > +               num += 32;
> > > +               word >>= 32;
> > > +       }
> > > +#endif
> > > +       if ((word & 0xffff) == 0) {
> > > +               num += 16;
> > > +               word >>= 16;
> > > +       }
> > > +       if ((word & 0xff) == 0) {
> > > +               num += 8;
> > > +               word >>= 8;
> > > +       }
> > > +       if ((word & 0xf) == 0) {
> > > +               num += 4;
> > > +               word >>= 4;
> > > +       }
> > > +       if ((word & 0x3) == 0) {
> > > +               num += 2;
> > > +               word >>= 2;
> > > +       }
> > > +       if ((word & 0x1) == 0)
> > > +               num += 1;
> > > +       return num;
> > > +}
> > > +
> > > +/**
> > > + * __ffs - find first set bit in a long word
> > > + * @word: The word to search
> > > + *
> > > + * Undefined if no set bit exists, so code should check against 0 first.
> > > + */
> > > +#define __ffs(word)                            \
> > > +       (__builtin_constant_p(word) ?           \
> > > +        (unsigned long)__builtin_ctzl(word) :  \
> > > +        variable__ffs(word))
> > > +
> [...]

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-30  6:59     ` Conor Dooley
@ 2023-08-31 15:59       ` Wang, Xiao W
  2023-08-31 16:07         ` Anup Patel
  0 siblings, 1 reply; 15+ messages in thread
From: Wang, Xiao W @ 2023-08-31 15:59 UTC (permalink / raw)
  To: Conor Dooley, Anup Patel
  Cc: paul.walmsley, palmer, aou, ardb, Li, Haicheng, linux-riscv,
	linux-efi, linux-kernel


> -----Original Message-----
> From: Conor Dooley <conor.dooley@microchip.com>
> Sent: Wednesday, August 30, 2023 2:59 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: Anup Patel <anup@brainfault.org>; paul.walmsley@sifive.com;
> palmer@dabbelt.com; aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> efi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> > Hi,
> >
> > > -----Original Message-----
> > > From: Anup Patel <anup@brainfault.org>
> > > Sent: Tuesday, August 29, 2023 7:08 PM
> > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > >
> > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@intel.com>
> wrote:
> > > >
> > > > This patch leverages the alternative mechanism to dynamically optimize
> > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > > used. If Zbb is supported, then the optimized variants will be selected
> > > > via alternative patching.
> > > >
> > > > The legacy bitops support is taken from the generic C implementation as
> > > > fallback.
> > > >
> > > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > implementation.
> > > >
> > > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > > purpose.
> > >
> > > I am getting the following compile error with this patch:
> > >
> > >   GEN     Makefile
> > >   UPD     include/config/kernel.release
> > >   UPD     include/generated/utsrelease.h
> > >   CC      kernel/bounds.s
> > > In file included from /home/anup/Work/riscv-
> > > test/linux/include/linux/bitmap.h:9,
> > >                  from
> > > /home/anup/Work/riscv-
> test/linux/arch/riscv/include/asm/cpufeature.h:9,
> > >                  from
> > > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90,
> >
> >
> > It looks there's a cyclic header including, which leads to this build error.
> > I checked https://github.com/kvm-riscv/linux/tree/master and
> > https://github.com/torvalds/linux/tree/master, but I don't see
> > "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss
> something,
> > could you help point me to the repo/branch I should work on?
> 
> From MAINTAINERS:
> 	RISC-V ARCHITECTURE
> 	...
> 	T:	git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
> 
> The for-next branch there is what you should be basing work on top of.
> AFAICT, you've made bitops.h include hwcap.h while cpufeature.h includes
> both bitops.h (indirectly) and hwcap.h.

Thanks for the info, but I can't reproduce Anup's build error with this for-next branch, cpufeature.h is not included by hwcap.h there.
Maybe Anup could help double check the test environment?

BRs,
Xiao


> 
> Hope that helps,
> Conor.
> 
> > >                  from
> > > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26,
> > >                  from
> > > /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
> > >                  from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12,
> > >                  from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13:
> > > /home/anup/Work/riscv-test/linux/include/linux/find.h: In function
> > > 'find_next_bit':
> > > /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
> > > implicit declaration of function '__ffs'
> > > [-Werror=implicit-function-declaration]
> > >    64 |                 return val ? __ffs(val) : size;
> > >
> > > Regards,
> > > Anup
> > >
> > >
> > > >
> > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > ---
> > > >  arch/riscv/include/asm/bitops.h       | 266
> +++++++++++++++++++++++++-
> > > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > b/arch/riscv/include/asm/bitops.h
> > > > index 3540b690944b..f727f6489cd5 100644
> > > > --- a/arch/riscv/include/asm/bitops.h
> > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > @@ -15,13 +15,273 @@
> > > >  #include <asm/barrier.h>
> > > >  #include <asm/bitsperlong.h>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-31 15:59       ` Wang, Xiao W
@ 2023-08-31 16:07         ` Anup Patel
  2023-08-31 17:00           ` Andrew Jones
  0 siblings, 1 reply; 15+ messages in thread
From: Anup Patel @ 2023-08-31 16:07 UTC (permalink / raw)
  To: Wang, Xiao W
  Cc: Conor Dooley, Anup Patel, paul.walmsley, palmer, aou, ardb, Li,
	Haicheng, linux-riscv, linux-efi, linux-kernel, Andrew Jones

+Andrew

On Thu, Aug 31, 2023 at 9:29 PM Wang, Xiao W <xiao.w.wang@intel.com> wrote:
>
>
> > -----Original Message-----
> > From: Conor Dooley <conor.dooley@microchip.com>
> > Sent: Wednesday, August 30, 2023 2:59 PM
> > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > Cc: Anup Patel <anup@brainfault.org>; paul.walmsley@sifive.com;
> > palmer@dabbelt.com; aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> >
> > On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> > > Hi,
> > >
> > > > -----Original Message-----
> > > > From: Anup Patel <anup@brainfault.org>
> > > > Sent: Tuesday, August 29, 2023 7:08 PM
> > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > >
> > > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@intel.com>
> > wrote:
> > > > >
> > > > > This patch leverages the alternative mechanism to dynamically optimize
> > > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > > > used. If Zbb is supported, then the optimized variants will be selected
> > > > > via alternative patching.
> > > > >
> > > > > The legacy bitops support is taken from the generic C implementation as
> > > > > fallback.
> > > > >
> > > > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > > implementation.
> > > > >
> > > > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > > > purpose.
> > > >
> > > > I am getting the following compile error with this patch:
> > > >
> > > >   GEN     Makefile
> > > >   UPD     include/config/kernel.release
> > > >   UPD     include/generated/utsrelease.h
> > > >   CC      kernel/bounds.s
> > > > In file included from /home/anup/Work/riscv-
> > > > test/linux/include/linux/bitmap.h:9,
> > > >                  from
> > > > /home/anup/Work/riscv-
> > test/linux/arch/riscv/include/asm/cpufeature.h:9,
> > > >                  from
> > > > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90,
> > >
> > >
> > > It looks there's a cyclic header including, which leads to this build error.
> > > I checked https://github.com/kvm-riscv/linux/tree/master and
> > > https://github.com/torvalds/linux/tree/master, but I don't see
> > > "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss
> > something,
> > > could you help point me to the repo/branch I should work on?
> >
> > From MAINTAINERS:
> >       RISC-V ARCHITECTURE
> >       ...
> >       T:      git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
> >
> > The for-next branch there is what you should be basing work on top of.
> > AFAICT, you've made bitops.h include hwcap.h while cpufeature.h includes
> > both bitops.h (indirectly) and hwcap.h.
>
> Thanks for the info, but I can't reproduce Anup's build error with this for-next branch, cpufeature.h is not included by hwcap.h there.
> Maybe Anup could help double check the test environment?

I figured that cpufeature.h included in hwcap.h is added by
Drew's patch "RISC-V: Enable cbo.zero in usermode"

I had tried this patch on-top-of dev-upstream branch of
https://github.com/ventanamicro/linux.git

Regards,
Anup

>
> BRs,
> Xiao
>
>
> >
> > Hope that helps,
> > Conor.
> >
> > > >                  from
> > > > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26,
> > > >                  from
> > > > /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
> > > >                  from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12,
> > > >                  from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13:
> > > > /home/anup/Work/riscv-test/linux/include/linux/find.h: In function
> > > > 'find_next_bit':
> > > > /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
> > > > implicit declaration of function '__ffs'
> > > > [-Werror=implicit-function-declaration]
> > > >    64 |                 return val ? __ffs(val) : size;
> > > >
> > > > Regards,
> > > > Anup
> > > >
> > > >
> > > > >
> > > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > > ---
> > > > >  arch/riscv/include/asm/bitops.h       | 266
> > +++++++++++++++++++++++++-
> > > > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > > > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > > b/arch/riscv/include/asm/bitops.h
> > > > > index 3540b690944b..f727f6489cd5 100644
> > > > > --- a/arch/riscv/include/asm/bitops.h
> > > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > > @@ -15,13 +15,273 @@
> > > > >  #include <asm/barrier.h>
> > > > >  #include <asm/bitsperlong.h>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-31 16:07         ` Anup Patel
@ 2023-08-31 17:00           ` Andrew Jones
  2023-09-05  9:46             ` Wang, Xiao W
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Jones @ 2023-08-31 17:00 UTC (permalink / raw)
  To: Anup Patel
  Cc: Wang, Xiao W, Conor Dooley, Anup Patel, paul.walmsley, palmer,
	aou, ardb, Li, Haicheng, linux-riscv, linux-efi, linux-kernel

On Thu, Aug 31, 2023 at 09:37:30PM +0530, Anup Patel wrote:
> +Andrew
> 
> On Thu, Aug 31, 2023 at 9:29 PM Wang, Xiao W <xiao.w.wang@intel.com> wrote:
> >
> >
> > > -----Original Message-----
> > > From: Conor Dooley <conor.dooley@microchip.com>
> > > Sent: Wednesday, August 30, 2023 2:59 PM
> > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > Cc: Anup Patel <anup@brainfault.org>; paul.walmsley@sifive.com;
> > > palmer@dabbelt.com; aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > >
> > > On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> > > > Hi,
> > > >
> > > > > -----Original Message-----
> > > > > From: Anup Patel <anup@brainfault.org>
> > > > > Sent: Tuesday, August 29, 2023 7:08 PM
> > > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > > > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > > >
> > > > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@intel.com>
> > > wrote:
> > > > > >
> > > > > > This patch leverages the alternative mechanism to dynamically optimize
> > > > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > > > Zbb ext is not supported by the runtime CPU, legacy implementation is
> > > > > > used. If Zbb is supported, then the optimized variants will be selected
> > > > > > via alternative patching.
> > > > > >
> > > > > > The legacy bitops support is taken from the generic C implementation as
> > > > > > fallback.
> > > > > >
> > > > > > If the parameter is a build-time constant, we leverage compiler builtin to
> > > > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > > > implementation.
> > > > > >
> > > > > > EFI stub runs before the kernel, so alternative mechanism should not be
> > > > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this
> > > > > > purpose.
> > > > >
> > > > > I am getting the following compile error with this patch:
> > > > >
> > > > >   GEN     Makefile
> > > > >   UPD     include/config/kernel.release
> > > > >   UPD     include/generated/utsrelease.h
> > > > >   CC      kernel/bounds.s
> > > > > In file included from /home/anup/Work/riscv-
> > > > > test/linux/include/linux/bitmap.h:9,
> > > > >                  from
> > > > > /home/anup/Work/riscv-
> > > test/linux/arch/riscv/include/asm/cpufeature.h:9,
> > > > >                  from
> > > > > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90,
> > > >
> > > >
> > > > It looks there's a cyclic header including, which leads to this build error.
> > > > I checked https://github.com/kvm-riscv/linux/tree/master and
> > > > https://github.com/torvalds/linux/tree/master, but I don't see
> > > > "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss
> > > something,
> > > > could you help point me to the repo/branch I should work on?
> > >
> > > From MAINTAINERS:
> > >       RISC-V ARCHITECTURE
> > >       ...
> > >       T:      git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
> > >
> > > The for-next branch there is what you should be basing work on top of.
> > > AFAICT, you've made bitops.h include hwcap.h while cpufeature.h includes
> > > both bitops.h (indirectly) and hwcap.h.
> >
> > Thanks for the info, but I can't reproduce Anup's build error with this for-next branch, cpufeature.h is not included by hwcap.h there.
> > Maybe Anup could help double check the test environment?
> 
> I figured that cpufeature.h included in hwcap.h is added by
> Drew's patch "RISC-V: Enable cbo.zero in usermode"

I think we should probably split hwcap.h into two parts. The defines stay
and the rest can move to cpufeature.h

Thanks,
drew

> 
> I had tried this patch on-top-of dev-upstream branch of
> https://github.com/ventanamicro/linux.git
> 
> Regards,
> Anup
> 
> >
> > BRs,
> > Xiao
> >
> >
> > >
> > > Hope that helps,
> > > Conor.
> > >
> > > > >                  from
> > > > > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26,
> > > > >                  from
> > > > > /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
> > > > >                  from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12,
> > > > >                  from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13:
> > > > > /home/anup/Work/riscv-test/linux/include/linux/find.h: In function
> > > > > 'find_next_bit':
> > > > > /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
> > > > > implicit declaration of function '__ffs'
> > > > > [-Werror=implicit-function-declaration]
> > > > >    64 |                 return val ? __ffs(val) : size;
> > > > >
> > > > > Regards,
> > > > > Anup
> > > > >
> > > > >
> > > > > >
> > > > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > > > ---
> > > > > >  arch/riscv/include/asm/bitops.h       | 266
> > > +++++++++++++++++++++++++-
> > > > > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > > > > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > > > > >
> > > > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > > > b/arch/riscv/include/asm/bitops.h
> > > > > > index 3540b690944b..f727f6489cd5 100644
> > > > > > --- a/arch/riscv/include/asm/bitops.h
> > > > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > > > @@ -15,13 +15,273 @@
> > > > > >  #include <asm/barrier.h>
> > > > > >  #include <asm/bitsperlong.h>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-08-31 17:00           ` Andrew Jones
@ 2023-09-05  9:46             ` Wang, Xiao W
  2023-09-05 10:31               ` Andrew Jones
  0 siblings, 1 reply; 15+ messages in thread
From: Wang, Xiao W @ 2023-09-05  9:46 UTC (permalink / raw)
  To: Andrew Jones, Anup Patel
  Cc: Conor Dooley, Anup Patel, paul.walmsley, palmer, aou, ardb, Li,
	Haicheng, linux-riscv, linux-efi, linux-kernel



> -----Original Message-----
> From: Andrew Jones <ajones@ventanamicro.com>
> Sent: Friday, September 1, 2023 1:00 AM
> To: Anup Patel <apatel@ventanamicro.com>
> Cc: Wang, Xiao W <xiao.w.wang@intel.com>; Conor Dooley
> <conor.dooley@microchip.com>; Anup Patel <anup@brainfault.org>;
> paul.walmsley@sifive.com; palmer@dabbelt.com; aou@eecs.berkeley.edu;
> ardb@kernel.org; Li, Haicheng <haicheng.li@intel.com>; linux-
> riscv@lists.infradead.org; linux-efi@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> On Thu, Aug 31, 2023 at 09:37:30PM +0530, Anup Patel wrote:
> > +Andrew
> >
> > On Thu, Aug 31, 2023 at 9:29 PM Wang, Xiao W <xiao.w.wang@intel.com>
> wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Conor Dooley <conor.dooley@microchip.com>
> > > > Sent: Wednesday, August 30, 2023 2:59 PM
> > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > Cc: Anup Patel <anup@brainfault.org>; paul.walmsley@sifive.com;
> > > > palmer@dabbelt.com; aou@eecs.berkeley.edu; ardb@kernel.org; Li,
> Haicheng
> > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > >
> > > > On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> > > > > Hi,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Anup Patel <anup@brainfault.org>
> > > > > > Sent: Tuesday, August 29, 2023 7:08 PM
> > > > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > > > > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > > > >
> > > > > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang
> <xiao.w.wang@intel.com>
> > > > wrote:
> > > > > > >
> > > > > > > This patch leverages the alternative mechanism to dynamically
> optimize
> > > > > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > > > > Zbb ext is not supported by the runtime CPU, legacy implementation
> is
> > > > > > > used. If Zbb is supported, then the optimized variants will be
> selected
> > > > > > > via alternative patching.
> > > > > > >
> > > > > > > The legacy bitops support is taken from the generic C
> implementation as
> > > > > > > fallback.
> > > > > > >
> > > > > > > If the parameter is a build-time constant, we leverage compiler
> builtin to
> > > > > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > > > > implementation.
> > > > > > >
> > > > > > > EFI stub runs before the kernel, so alternative mechanism should not
> be
> > > > > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for
> this
> > > > > > > purpose.
> > > > > >
> > > > > > I am getting the following compile error with this patch:
> > > > > >
> > > > > >   GEN     Makefile
> > > > > >   UPD     include/config/kernel.release
> > > > > >   UPD     include/generated/utsrelease.h
> > > > > >   CC      kernel/bounds.s
> > > > > > In file included from /home/anup/Work/riscv-
> > > > > > test/linux/include/linux/bitmap.h:9,
> > > > > >                  from
> > > > > > /home/anup/Work/riscv-
> > > > test/linux/arch/riscv/include/asm/cpufeature.h:9,
> > > > > >                  from
> > > > > > /home/anup/Work/riscv-
> test/linux/arch/riscv/include/asm/hwcap.h:90,
> > > > >
> > > > >
> > > > > It looks there's a cyclic header including, which leads to this build error.
> > > > > I checked https://github.com/kvm-riscv/linux/tree/master and
> > > > > https://github.com/torvalds/linux/tree/master, but I don't see
> > > > > "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss
> > > > something,
> > > > > could you help point me to the repo/branch I should work on?
> > > >
> > > > From MAINTAINERS:
> > > >       RISC-V ARCHITECTURE
> > > >       ...
> > > >       T:      git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
> > > >
> > > > The for-next branch there is what you should be basing work on top of.
> > > > AFAICT, you've made bitops.h include hwcap.h while cpufeature.h
> includes
> > > > both bitops.h (indirectly) and hwcap.h.
> > >
> > > Thanks for the info, but I can't reproduce Anup's build error with this for-
> next branch, cpufeature.h is not included by hwcap.h there.
> > > Maybe Anup could help double check the test environment?
> >
> > I figured that cpufeature.h included in hwcap.h is added by
> > Drew's patch "RISC-V: Enable cbo.zero in usermode"
> 
> I think we should probably split hwcap.h into two parts. The defines stay
> and the rest can move to cpufeature.h

OK, I will base on your cbo.zero enabling patch series to make a new version. Will move some contents from hwcap.h into cpufeature.h so that we can remove the including of cpufeature.h in hwcap.h.

Thanks,
Xiao

> 
> Thanks,
> drew
> 
> >
> > I had tried this patch on-top-of dev-upstream branch of
> > https://github.com/ventanamicro/linux.git
> >
> > Regards,
> > Anup
> >
> > >
> > > BRs,
> > > Xiao
> > >
> > >
> > > >
> > > > Hope that helps,
> > > > Conor.
> > > >
> > > > > >                  from
> > > > > > /home/anup/Work/riscv-
> test/linux/arch/riscv/include/asm/bitops.h:26,
> > > > > >                  from
> > > > > > /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68,
> > > > > >                  from /home/anup/Work/riscv-
> test/linux/include/linux/log2.h:12,
> > > > > >                  from /home/anup/Work/riscv-
> test/linux/kernel/bounds.c:13:
> > > > > > /home/anup/Work/riscv-test/linux/include/linux/find.h: In function
> > > > > > 'find_next_bit':
> > > > > > /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error:
> > > > > > implicit declaration of function '__ffs'
> > > > > > [-Werror=implicit-function-declaration]
> > > > > >    64 |                 return val ? __ffs(val) : size;
> > > > > >
> > > > > > Regards,
> > > > > > Anup
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > > > > > > ---
> > > > > > >  arch/riscv/include/asm/bitops.h       | 266
> > > > +++++++++++++++++++++++++-
> > > > > > >  drivers/firmware/efi/libstub/Makefile |   2 +-
> > > > > > >  2 files changed, 264 insertions(+), 4 deletions(-)
> > > > > > >
> > > > > > > diff --git a/arch/riscv/include/asm/bitops.h
> > > > > > b/arch/riscv/include/asm/bitops.h
> > > > > > > index 3540b690944b..f727f6489cd5 100644
> > > > > > > --- a/arch/riscv/include/asm/bitops.h
> > > > > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > > > > @@ -15,13 +15,273 @@
> > > > > > >  #include <asm/barrier.h>
> > > > > > >  #include <asm/bitsperlong.h>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-09-05  9:46             ` Wang, Xiao W
@ 2023-09-05 10:31               ` Andrew Jones
  2023-09-05 12:38                 ` Wang, Xiao W
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Jones @ 2023-09-05 10:31 UTC (permalink / raw)
  To: Wang, Xiao W
  Cc: Anup Patel, Conor Dooley, Anup Patel, paul.walmsley, palmer, aou,
	ardb, Li, Haicheng, linux-riscv, linux-efi, linux-kernel


Hi Xiao,

On Tue, Sep 05, 2023 at 09:46:20AM +0000, Wang, Xiao W wrote:
> 
> 
> > -----Original Message-----
> > From: Andrew Jones <ajones@ventanamicro.com>
> > Sent: Friday, September 1, 2023 1:00 AM
> > To: Anup Patel <apatel@ventanamicro.com>
> > Cc: Wang, Xiao W <xiao.w.wang@intel.com>; Conor Dooley
> > <conor.dooley@microchip.com>; Anup Patel <anup@brainfault.org>;
> > paul.walmsley@sifive.com; palmer@dabbelt.com; aou@eecs.berkeley.edu;
> > ardb@kernel.org; Li, Haicheng <haicheng.li@intel.com>; linux-
> > riscv@lists.infradead.org; linux-efi@vger.kernel.org; linux-
> > kernel@vger.kernel.org
> > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > 
> > On Thu, Aug 31, 2023 at 09:37:30PM +0530, Anup Patel wrote:
> > > +Andrew
> > >
> > > On Thu, Aug 31, 2023 at 9:29 PM Wang, Xiao W <xiao.w.wang@intel.com>
> > wrote:
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Conor Dooley <conor.dooley@microchip.com>
> > > > > Sent: Wednesday, August 30, 2023 2:59 PM
> > > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > > Cc: Anup Patel <anup@brainfault.org>; paul.walmsley@sifive.com;
> > > > > palmer@dabbelt.com; aou@eecs.berkeley.edu; ardb@kernel.org; Li,
> > Haicheng
> > > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > > >
> > > > > On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> > > > > > Hi,
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Anup Patel <anup@brainfault.org>
> > > > > > > Sent: Tuesday, August 29, 2023 7:08 PM
> > > > > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > > > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > > > > > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > > > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > > > > >
> > > > > > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang
> > <xiao.w.wang@intel.com>
> > > > > wrote:
> > > > > > > >
> > > > > > > > This patch leverages the alternative mechanism to dynamically
> > optimize
> > > > > > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > > > > > Zbb ext is not supported by the runtime CPU, legacy implementation
> > is
> > > > > > > > used. If Zbb is supported, then the optimized variants will be
> > selected
> > > > > > > > via alternative patching.
> > > > > > > >
> > > > > > > > The legacy bitops support is taken from the generic C
> > implementation as
> > > > > > > > fallback.
> > > > > > > >
> > > > > > > > If the parameter is a build-time constant, we leverage compiler
> > builtin to
> > > > > > > > calculate the result directly, this approach is inspired by x86 bitops
> > > > > > > > implementation.
> > > > > > > >
> > > > > > > > EFI stub runs before the kernel, so alternative mechanism should not
> > be
> > > > > > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for
> > this
> > > > > > > > purpose.
> > > > > > >
> > > > > > > I am getting the following compile error with this patch:
> > > > > > >
> > > > > > >   GEN     Makefile
> > > > > > >   UPD     include/config/kernel.release
> > > > > > >   UPD     include/generated/utsrelease.h
> > > > > > >   CC      kernel/bounds.s
> > > > > > > In file included from /home/anup/Work/riscv-
> > > > > > > test/linux/include/linux/bitmap.h:9,
> > > > > > >                  from
> > > > > > > /home/anup/Work/riscv-
> > > > > test/linux/arch/riscv/include/asm/cpufeature.h:9,
> > > > > > >                  from
> > > > > > > /home/anup/Work/riscv-
> > test/linux/arch/riscv/include/asm/hwcap.h:90,
> > > > > >
> > > > > >
> > > > > > It looks there's a cyclic header including, which leads to this build error.
> > > > > > I checked https://github.com/kvm-riscv/linux/tree/master and
> > > > > > https://github.com/torvalds/linux/tree/master, but I don't see
> > > > > > "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss
> > > > > something,
> > > > > > could you help point me to the repo/branch I should work on?
> > > > >
> > > > > From MAINTAINERS:
> > > > >       RISC-V ARCHITECTURE
> > > > >       ...
> > > > >       T:      git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
> > > > >
> > > > > The for-next branch there is what you should be basing work on top of.
> > > > > AFAICT, you've made bitops.h include hwcap.h while cpufeature.h
> > includes
> > > > > both bitops.h (indirectly) and hwcap.h.
> > > >
> > > > Thanks for the info, but I can't reproduce Anup's build error with this for-
> > next branch, cpufeature.h is not included by hwcap.h there.
> > > > Maybe Anup could help double check the test environment?
> > >
> > > I figured that cpufeature.h included in hwcap.h is added by
> > > Drew's patch "RISC-V: Enable cbo.zero in usermode"
> > 
> > I think we should probably split hwcap.h into two parts. The defines stay
> > and the rest can move to cpufeature.h
> 
> OK, I will base on your cbo.zero enabling patch series to make a new version. Will move some contents from hwcap.h into cpufeature.h so that we can remove the including of cpufeature.h in hwcap.h.
> 

I just realized I forgot to CC you on my v3 posting of the cbo.zero
series[1] yesterday. Sorry about that.

[1] https://lore.kernel.org/all/20230904170220.167816-8-ajones@ventanamicro.com/

Thanks,
drew

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] RISC-V: Optimize bitops with Zbb extension
  2023-09-05 10:31               ` Andrew Jones
@ 2023-09-05 12:38                 ` Wang, Xiao W
  0 siblings, 0 replies; 15+ messages in thread
From: Wang, Xiao W @ 2023-09-05 12:38 UTC (permalink / raw)
  To: Andrew Jones
  Cc: Anup Patel, Conor Dooley, Anup Patel, paul.walmsley, palmer, aou,
	ardb, Li, Haicheng, linux-riscv, linux-efi, linux-kernel



> -----Original Message-----
> From: Andrew Jones <ajones@ventanamicro.com>
> Sent: Tuesday, September 5, 2023 6:32 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>
> Cc: Anup Patel <apatel@ventanamicro.com>; Conor Dooley
> <conor.dooley@microchip.com>; Anup Patel <anup@brainfault.org>;
> paul.walmsley@sifive.com; palmer@dabbelt.com; aou@eecs.berkeley.edu;
> ardb@kernel.org; Li, Haicheng <haicheng.li@intel.com>; linux-
> riscv@lists.infradead.org; linux-efi@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> 
> 
> Hi Xiao,
> 
> On Tue, Sep 05, 2023 at 09:46:20AM +0000, Wang, Xiao W wrote:
> >
> >
> > > -----Original Message-----
> > > From: Andrew Jones <ajones@ventanamicro.com>
> > > Sent: Friday, September 1, 2023 1:00 AM
> > > To: Anup Patel <apatel@ventanamicro.com>
> > > Cc: Wang, Xiao W <xiao.w.wang@intel.com>; Conor Dooley
> > > <conor.dooley@microchip.com>; Anup Patel <anup@brainfault.org>;
> > > paul.walmsley@sifive.com; palmer@dabbelt.com; aou@eecs.berkeley.edu;
> > > ardb@kernel.org; Li, Haicheng <haicheng.li@intel.com>; linux-
> > > riscv@lists.infradead.org; linux-efi@vger.kernel.org; linux-
> > > kernel@vger.kernel.org
> > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > >
> > > On Thu, Aug 31, 2023 at 09:37:30PM +0530, Anup Patel wrote:
> > > > +Andrew
> > > >
> > > > On Thu, Aug 31, 2023 at 9:29 PM Wang, Xiao W
> <xiao.w.wang@intel.com>
> > > wrote:
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Conor Dooley <conor.dooley@microchip.com>
> > > > > > Sent: Wednesday, August 30, 2023 2:59 PM
> > > > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > > > Cc: Anup Patel <anup@brainfault.org>; paul.walmsley@sifive.com;
> > > > > > palmer@dabbelt.com; aou@eecs.berkeley.edu; ardb@kernel.org; Li,
> > > Haicheng
> > > > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > > > >
> > > > > > On Wed, Aug 30, 2023 at 06:14:12AM +0000, Wang, Xiao W wrote:
> > > > > > > Hi,
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Anup Patel <anup@brainfault.org>
> > > > > > > > Sent: Tuesday, August 29, 2023 7:08 PM
> > > > > > > > To: Wang, Xiao W <xiao.w.wang@intel.com>
> > > > > > > > Cc: paul.walmsley@sifive.com; palmer@dabbelt.com;
> > > > > > > > aou@eecs.berkeley.edu; ardb@kernel.org; Li, Haicheng
> > > > > > > > <haicheng.li@intel.com>; linux-riscv@lists.infradead.org; linux-
> > > > > > > > efi@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > > > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension
> > > > > > > >
> > > > > > > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang
> > > <xiao.w.wang@intel.com>
> > > > > > wrote:
> > > > > > > > >
> > > > > > > > > This patch leverages the alternative mechanism to dynamically
> > > optimize
> > > > > > > > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When
> > > > > > > > > Zbb ext is not supported by the runtime CPU, legacy
> implementation
> > > is
> > > > > > > > > used. If Zbb is supported, then the optimized variants will be
> > > selected
> > > > > > > > > via alternative patching.
> > > > > > > > >
> > > > > > > > > The legacy bitops support is taken from the generic C
> > > implementation as
> > > > > > > > > fallback.
> > > > > > > > >
> > > > > > > > > If the parameter is a build-time constant, we leverage compiler
> > > builtin to
> > > > > > > > > calculate the result directly, this approach is inspired by x86
> bitops
> > > > > > > > > implementation.
> > > > > > > > >
> > > > > > > > > EFI stub runs before the kernel, so alternative mechanism should
> not
> > > be
> > > > > > > > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE
> for
> > > this
> > > > > > > > > purpose.
> > > > > > > >
> > > > > > > > I am getting the following compile error with this patch:
> > > > > > > >
> > > > > > > >   GEN     Makefile
> > > > > > > >   UPD     include/config/kernel.release
> > > > > > > >   UPD     include/generated/utsrelease.h
> > > > > > > >   CC      kernel/bounds.s
> > > > > > > > In file included from /home/anup/Work/riscv-
> > > > > > > > test/linux/include/linux/bitmap.h:9,
> > > > > > > >                  from
> > > > > > > > /home/anup/Work/riscv-
> > > > > > test/linux/arch/riscv/include/asm/cpufeature.h:9,
> > > > > > > >                  from
> > > > > > > > /home/anup/Work/riscv-
> > > test/linux/arch/riscv/include/asm/hwcap.h:90,
> > > > > > >
> > > > > > >
> > > > > > > It looks there's a cyclic header including, which leads to this build
> error.
> > > > > > > I checked https://github.com/kvm-riscv/linux/tree/master and
> > > > > > > https://github.com/torvalds/linux/tree/master, but I don't see
> > > > > > > "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss
> > > > > > something,
> > > > > > > could you help point me to the repo/branch I should work on?
> > > > > >
> > > > > > From MAINTAINERS:
> > > > > >       RISC-V ARCHITECTURE
> > > > > >       ...
> > > > > >       T:      git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
> > > > > >
> > > > > > The for-next branch there is what you should be basing work on top
> of.
> > > > > > AFAICT, you've made bitops.h include hwcap.h while cpufeature.h
> > > includes
> > > > > > both bitops.h (indirectly) and hwcap.h.
> > > > >
> > > > > Thanks for the info, but I can't reproduce Anup's build error with this
> for-
> > > next branch, cpufeature.h is not included by hwcap.h there.
> > > > > Maybe Anup could help double check the test environment?
> > > >
> > > > I figured that cpufeature.h included in hwcap.h is added by
> > > > Drew's patch "RISC-V: Enable cbo.zero in usermode"
> > >
> > > I think we should probably split hwcap.h into two parts. The defines stay
> > > and the rest can move to cpufeature.h
> >
> > OK, I will base on your cbo.zero enabling patch series to make a new version.
> Will move some contents from hwcap.h into cpufeature.h so that we can
> remove the including of cpufeature.h in hwcap.h.
> >
> 
> I just realized I forgot to CC you on my v3 posting of the cbo.zero
> series[1] yesterday. Sorry about that.
> 
> [1] https://lore.kernel.org/all/20230904170220.167816-8-
> ajones@ventanamicro.com/
> 
> Thanks,
> drew

NP, I will take a look at that.

BRs,
Xiao

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-09-05 17:08 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-06  2:47 [PATCH] RISC-V: Optimize bitops with Zbb extension Xiao Wang
2023-08-06  9:38 ` Ard Biesheuvel
2023-08-06 10:24   ` Wang, Xiao W
2023-08-27  9:25     ` Wang, Xiao W
2023-08-28 10:28       ` Ard Biesheuvel
2023-08-30  5:46         ` Wang, Xiao W
2023-08-29 11:08 ` Anup Patel
2023-08-30  6:14   ` Wang, Xiao W
2023-08-30  6:59     ` Conor Dooley
2023-08-31 15:59       ` Wang, Xiao W
2023-08-31 16:07         ` Anup Patel
2023-08-31 17:00           ` Andrew Jones
2023-09-05  9:46             ` Wang, Xiao W
2023-09-05 10:31               ` Andrew Jones
2023-09-05 12:38                 ` Wang, Xiao W

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).