All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Jan Beulich" <JBeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>, "Wei Liu" <wl@xen.org>,
	"Stefano Stabellini" <sstabellini@kernel.org>,
	"Julien Grall" <julien@xen.org>,
	"Volodymyr Babchuk" <Volodymyr_Babchuk@epam.com>,
	"Bertrand Marquis" <bertrand.marquis@arm.com>,
	"Michal Orzel" <michal.orzel@amd.com>,
	"Oleksii Kurochko" <oleksii.kurochko@gmail.com>,
	"Shawn Anastasio" <sanastasio@raptorengineering.com>,
	"consulting @ bugseng . com" <consulting@bugseng.com>,
	"Simone Ballarin" <simone.ballarin@bugseng.com>,
	"Federico Serafini" <federico.serafini@bugseng.com>,
	"Nicola Vetrini" <nicola.vetrini@bugseng.com>
Subject: [PATCH 2/7] xen/bitops: Implement ffs() in common logic
Date: Wed, 13 Mar 2024 17:27:11 +0000	[thread overview]
Message-ID: <20240313172716.2325427-3-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <20240313172716.2325427-1-andrew.cooper3@citrix.com>

Allow the optimiser to elimiate the call completely, and use the compiler
builtin by default.  Architectures should only proide arch_ffs() if they think
they can do better than the compiler.

Confirm the expected behaviour with compile time and boot time tests.

For x86, correct the prototype, and simplify the asm() with the statement
given by the Intel architects to Linux about the behaviour on processors newer
than the 486.

For PPC, __builtin_ffs() is 1/3 of the size of size of the transform to
generic_fls().  Drop the definition entirely.

For ARM, simply rename ffs() to arch_ffs().  It appears that the
transformation to __builtin_clz() still makes better code than
__builtin_ffs().

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
CC: Oleksii Kurochko <oleksii.kurochko@gmail.com>
CC: Shawn Anastasio <sanastasio@raptorengineering.com>
CC: consulting@bugseng.com <consulting@bugseng.com>
CC: Simone Ballarin <simone.ballarin@bugseng.com>
CC: Federico Serafini <federico.serafini@bugseng.com>
CC: Nicola Vetrini <nicola.vetrini@bugseng.com>
---
 xen/arch/arm/include/asm/bitops.h |  2 +-
 xen/arch/ppc/include/asm/bitops.h |  1 -
 xen/arch/x86/include/asm/bitops.h | 19 +++++++++++++------
 xen/common/bitops.c               | 10 ++++++++++
 xen/include/xen/bitops.h          | 15 +++++++++++++++
 5 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/xen/arch/arm/include/asm/bitops.h b/xen/arch/arm/include/asm/bitops.h
index ab030b6cb032..09c6064274a7 100644
--- a/xen/arch/arm/include/asm/bitops.h
+++ b/xen/arch/arm/include/asm/bitops.h
@@ -157,7 +157,7 @@ static inline int fls(unsigned int x)
 }
 
 
-#define ffs(x) ({ unsigned int __t = (x); fls(ISOLATE_LSB(__t)); })
+#define arch_ffs(x) ({ unsigned int __t = (x); fls(ISOLATE_LSB(__t)); })
 #define ffsl(x) ({ unsigned long __t = (x); flsl(ISOLATE_LSB(__t)); })
 
 /**
diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h
index 5820b9ce7bb5..635a3b4e3e33 100644
--- a/xen/arch/ppc/include/asm/bitops.h
+++ b/xen/arch/ppc/include/asm/bitops.h
@@ -173,7 +173,6 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr)
 
 #define flsl(x) generic_flsl(x)
 #define fls(x) generic_fls(x)
-#define ffs(x) ({ unsigned int t_ = (x); fls(t_ & -t_); })
 #define ffsl(x) ({ unsigned long t_ = (x); flsl(t_ & -t_); })
 
 /* Based on linux/include/asm-generic/bitops/ffz.h */
diff --git a/xen/arch/x86/include/asm/bitops.h b/xen/arch/x86/include/asm/bitops.h
index 5a71afbc89d5..2c5b103cbbd9 100644
--- a/xen/arch/x86/include/asm/bitops.h
+++ b/xen/arch/x86/include/asm/bitops.h
@@ -430,16 +430,23 @@ static inline int ffsl(unsigned long x)
     return (int)r+1;
 }
 
-static inline int ffs(unsigned int x)
+static inline unsigned int arch_ffs(unsigned int x)
 {
-    int r;
+    int r = -1;
+
+    /*
+     * The AMD manual states that BSF won't modify the destination register if
+     * x=0.  The Intel manual states that the result is undefined, but the
+     * architects have said that the register is written back with it's old
+     * value, possibly zero extended above 32 bits.
+     */
+    asm ( "bsf %[val], %[res]"
+          : [res] "+r" (r)
+          : [val] "rm" (x) );
 
-    asm ( "bsf %1,%0\n\t"
-          "jnz 1f\n\t"
-          "mov $-1,%0\n"
-          "1:" : "=r" (r) : "rm" (x));
     return r + 1;
 }
+#define arch_ffs arch_ffs
 
 /**
  * fls - find last bit set
diff --git a/xen/common/bitops.c b/xen/common/bitops.c
index 4c07191b4030..484df68768ad 100644
--- a/xen/common/bitops.c
+++ b/xen/common/bitops.c
@@ -34,8 +34,18 @@
         RUNTIME_CHECK(fn, val, res);            \
     } while ( 0 )
 
+static void test_ffs(void)
+{
+    /* unsigned int ffs(unsigned int) */
+    CHECK(ffs, 0, 0);
+    CHECK(ffs, 1, 1);
+    CHECK(ffs, 0x80000000U, 32);
+}
+
 static int __init cf_check test_bitops(void)
 {
+    test_ffs();
+
     return 0;
 }
 __initcall(test_bitops);
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index 9b40f20381a2..fb3645d9cf87 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -110,6 +110,21 @@ static inline int generic_flsl(unsigned long x)
 
 #include <asm/bitops.h>
 
+/*
+ * Find First Set bit.  Bits are labelled from 1.
+ */
+static always_inline __pure unsigned int ffs(unsigned int x)
+{
+    if ( __builtin_constant_p(x) )
+        return __builtin_ffs(x);
+
+#ifndef arch_ffs
+#define arch_ffs __builtin_ffs
+#endif
+
+    return arch_ffs(x);
+}
+
 /* --------------------- Please tidy below here --------------------- */
 
 #ifndef find_next_bit
-- 
2.30.2



  parent reply	other threads:[~2024-03-13 17:27 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-13 17:27 [PATCH 0/7] xen/bitops: Reduce the mess, starting with ffs() Andrew Cooper
2024-03-13 17:27 ` [PATCH 1/7] xen/bitops: Cleanup ahead of rearrangements Andrew Cooper
2024-03-13 18:39   ` Shawn Anastasio
2024-03-13 23:06   ` Andrew Cooper
2024-03-14 13:59   ` Jan Beulich
2024-03-13 17:27 ` Andrew Cooper [this message]
2024-03-14 14:16   ` [PATCH 2/7] xen/bitops: Implement ffs() in common logic Jan Beulich
2024-03-14 16:23     ` Andrew Cooper
2024-03-14 16:35       ` Jan Beulich
2024-03-13 17:27 ` [PATCH 3/7] xen/bitops: Implement ffsl() " Andrew Cooper
2024-03-13 17:48   ` Andrew Cooper
2024-03-14 13:45     ` Andrew Cooper
2024-03-13 18:16   ` Andrew Cooper
2024-03-13 17:27 ` [PATCH 4/7] xen/bitops: Delete generic_ffs{,l}() Andrew Cooper
2024-03-13 17:27 ` [PATCH 5/7] xen/bitops: Implement ffs64() in common logic Andrew Cooper
2024-03-14 15:56   ` Jan Beulich
2024-03-13 17:27 ` [PATCH 6/7] xen: Swap find_first_set_bit() for ffsl() - 1 Andrew Cooper
2024-03-14 14:30   ` Jan Beulich
2024-03-14 16:48     ` Oleksii
2024-03-14 16:55       ` Jan Beulich
2024-03-14 18:47     ` Andrew Cooper
2024-03-14 18:51       ` Andrew Cooper
2024-03-18  9:13         ` Jan Beulich
2024-03-18 12:27           ` Andrew Cooper
2024-03-13 17:27 ` [PATCH 7/7] xen/bitops: Delete find_first_set_bit() Andrew Cooper
2024-03-14 15:59   ` Jan Beulich
2024-03-14 17:14     ` Andrew Cooper
2024-03-15 13:48       ` Andrew Cooper
2024-03-15 14:16         ` Jan Beulich
2024-03-14 14:45 ` [RISCV] [PATCH 0/7] xen/bitops: Reduce the mess, starting with ffs() Andrew Cooper
2024-03-14 15:33   ` Jan Beulich
2024-03-14 15:55     ` Andrew Cooper
2024-03-14 16:32     ` Oleksii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240313172716.2325427-3-andrew.cooper3@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=Volodymyr_Babchuk@epam.com \
    --cc=bertrand.marquis@arm.com \
    --cc=consulting@bugseng.com \
    --cc=federico.serafini@bugseng.com \
    --cc=julien@xen.org \
    --cc=michal.orzel@amd.com \
    --cc=nicola.vetrini@bugseng.com \
    --cc=oleksii.kurochko@gmail.com \
    --cc=roger.pau@citrix.com \
    --cc=sanastasio@raptorengineering.com \
    --cc=simone.ballarin@bugseng.com \
    --cc=sstabellini@kernel.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.