All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>, WeiLiu <wl@xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>,
	Julien Grall <julien.grall@arm.com>
Subject: [PATCH 1/4] bitops: speed up hweight<N>()
Date: Fri, 31 May 2019 03:51:50 -0600	[thread overview]
Message-ID: <5CF0F9360200007800233E01@prv1-mh.provo.novell.com> (raw)
In-Reply-To: <5CF0F8530200007800233DE0@prv1-mh.provo.novell.com>

Algorithmically this gets us in line with current Linux, where the same
change did happen about 13 years ago. See in particular Linux commits
f9b4192923 ("bitops: hweight() speedup") and 0136611c62 ("optimize
hweight64 for x86_64").

Kconfig changes for actually setting HAVE_FAST_MULTIPLY will follow.

Take the opportunity and change generic_hweight64()'s return type to
unsigned int.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -31,6 +31,9 @@ config HAS_DEVICE_TREE
 config HAS_EX_TABLE
 	bool
 
+config HAS_FAST_MULTIPLY
+	bool
+
 config MEM_ACCESS_ALWAYS_ON
 	bool
 
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -153,41 +153,54 @@ static __inline__ int get_count_order(un
 
 static inline unsigned int generic_hweight32(unsigned int w)
 {
-    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
-    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
-    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
-    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
-    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+    w -= (w >> 1) & 0x55555555;
+    w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+    w =  (w + (w >> 4)) & 0x0f0f0f0f;
+
+#ifdef CONFIG_HAS_FAST_MULTIPLY
+    return (w * 0x01010101) >> 24;
+#else
+    w += w >> 8;
+
+    return (w + (w >> 16)) & 0xff;
+#endif
 }
 
 static inline unsigned int generic_hweight16(unsigned int w)
 {
-    unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
-    res = (res & 0x3333) + ((res >> 2) & 0x3333);
-    res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
-    return (res & 0x00FF) + ((res >> 8) & 0x00FF);
+    w -= ((w >> 1) & 0x5555);
+    w =  (w & 0x3333) + ((w >> 2) & 0x3333);
+    w =  (w + (w >> 4)) & 0x0f0f;
+
+    return (w + (w >> 8)) & 0xff;
 }
 
 static inline unsigned int generic_hweight8(unsigned int w)
 {
-    unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
-    res = (res & 0x33) + ((res >> 2) & 0x33);
-    return (res & 0x0F) + ((res >> 4) & 0x0F);
+    w -= ((w >> 1) & 0x55);
+    w =  (w & 0x33) + ((w >> 2) & 0x33);
+
+    return (w + (w >> 4)) & 0x0f;
 }
 
-static inline unsigned long generic_hweight64(__u64 w)
+static inline unsigned int generic_hweight64(uint64_t w)
 {
 #if BITS_PER_LONG < 64
     return generic_hweight32((unsigned int)(w >> 32)) +
         generic_hweight32((unsigned int)w);
 #else
-    u64 res;
-    res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
-    res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
-    res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
-    res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
-    res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
-    return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+    w -= (w >> 1) & 0x5555555555555555ul;
+    w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+    w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+
+# ifdef CONFIG_HAS_FAST_MULTIPLY
+    return (w * 0x0101010101010101ul) >> 56;
+# else
+    w += w >> 8;
+    w += w >> 16;
+
+    return (w + (w >> 32)) & 0xFF;
+# endif
 #endif
 }
 





_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

WARNING: multiple messages have this Message-ID (diff)
From: "Jan Beulich" <JBeulich@suse.com>
To: "xen-devel" <xen-devel@lists.xenproject.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>, WeiLiu <wl@xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>,
	Julien Grall <julien.grall@arm.com>
Subject: [Xen-devel] [PATCH 1/4] bitops: speed up hweight<N>()
Date: Fri, 31 May 2019 03:51:50 -0600	[thread overview]
Message-ID: <5CF0F9360200007800233E01@prv1-mh.provo.novell.com> (raw)
Message-ID: <20190531095150.fAKZ0jSrtZQ_IzzcIqC99B7LS5pxh45xHERFW-GeFPk@z> (raw)
In-Reply-To: <5CF0F8530200007800233DE0@prv1-mh.provo.novell.com>

Algorithmically this gets us in line with current Linux, where the same
change did happen about 13 years ago. See in particular Linux commits
f9b4192923 ("bitops: hweight() speedup") and 0136611c62 ("optimize
hweight64 for x86_64").

Kconfig changes for actually setting HAVE_FAST_MULTIPLY will follow.

Take the opportunity and change generic_hweight64()'s return type to
unsigned int.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -31,6 +31,9 @@ config HAS_DEVICE_TREE
 config HAS_EX_TABLE
 	bool
 
+config HAS_FAST_MULTIPLY
+	bool
+
 config MEM_ACCESS_ALWAYS_ON
 	bool
 
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -153,41 +153,54 @@ static __inline__ int get_count_order(un
 
 static inline unsigned int generic_hweight32(unsigned int w)
 {
-    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
-    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
-    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
-    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
-    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+    w -= (w >> 1) & 0x55555555;
+    w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+    w =  (w + (w >> 4)) & 0x0f0f0f0f;
+
+#ifdef CONFIG_HAS_FAST_MULTIPLY
+    return (w * 0x01010101) >> 24;
+#else
+    w += w >> 8;
+
+    return (w + (w >> 16)) & 0xff;
+#endif
 }
 
 static inline unsigned int generic_hweight16(unsigned int w)
 {
-    unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
-    res = (res & 0x3333) + ((res >> 2) & 0x3333);
-    res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
-    return (res & 0x00FF) + ((res >> 8) & 0x00FF);
+    w -= ((w >> 1) & 0x5555);
+    w =  (w & 0x3333) + ((w >> 2) & 0x3333);
+    w =  (w + (w >> 4)) & 0x0f0f;
+
+    return (w + (w >> 8)) & 0xff;
 }
 
 static inline unsigned int generic_hweight8(unsigned int w)
 {
-    unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
-    res = (res & 0x33) + ((res >> 2) & 0x33);
-    return (res & 0x0F) + ((res >> 4) & 0x0F);
+    w -= ((w >> 1) & 0x55);
+    w =  (w & 0x33) + ((w >> 2) & 0x33);
+
+    return (w + (w >> 4)) & 0x0f;
 }
 
-static inline unsigned long generic_hweight64(__u64 w)
+static inline unsigned int generic_hweight64(uint64_t w)
 {
 #if BITS_PER_LONG < 64
     return generic_hweight32((unsigned int)(w >> 32)) +
         generic_hweight32((unsigned int)w);
 #else
-    u64 res;
-    res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
-    res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
-    res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
-    res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
-    res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
-    return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+    w -= (w >> 1) & 0x5555555555555555ul;
+    w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+    w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+
+# ifdef CONFIG_HAS_FAST_MULTIPLY
+    return (w * 0x0101010101010101ul) >> 56;
+# else
+    w += w >> 8;
+    w += w >> 16;
+
+    return (w + (w >> 32)) & 0xFF;
+# endif
 #endif
 }
 





_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  reply	other threads:[~2019-05-31  9:51 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-31  9:48 [PATCH 0/4] bitops: hweight<N>() improvements Jan Beulich
2019-05-31  9:48 ` [Xen-devel] " Jan Beulich
2019-05-31  9:51 ` Jan Beulich [this message]
2019-05-31  9:51   ` [Xen-devel] [PATCH 1/4] bitops: speed up hweight<N>() Jan Beulich
2019-05-31 19:40   ` Andrew Cooper
2019-05-31 19:40     ` [Xen-devel] " Andrew Cooper
2019-06-03  7:50     ` Jan Beulich
2019-06-03  7:50       ` [Xen-devel] " Jan Beulich
2019-06-03 10:02     ` Jan Beulich
2019-06-03 10:02       ` [Xen-devel] " Jan Beulich
2019-06-03 10:04       ` Andrew Cooper
2019-06-03 10:04         ` [Xen-devel] " Andrew Cooper
2019-05-31  9:52 ` [PATCH 2/4] x86: further speed-up to hweight{32,64}() Jan Beulich
2019-05-31  9:52   ` [Xen-devel] " Jan Beulich
2019-05-31 19:23   ` [PATCH 2/4] x86: further speed-up to hweight{32, 64}() Andrew Cooper
2019-05-31 19:23     ` [Xen-devel] " Andrew Cooper
2019-06-03  7:52     ` Jan Beulich
2019-06-03  7:52       ` [Xen-devel] " Jan Beulich
2019-05-31  9:53 ` [PATCH RFC 3/4] Arm64: " Jan Beulich
2019-05-31  9:53   ` [Xen-devel] " Jan Beulich
2019-06-04 16:11   ` Julien Grall
2019-06-04 17:30     ` Robin Murphy
2019-06-05  7:42     ` Jan Beulich
2019-06-05  9:24       ` Julien Grall
2019-06-05  9:59         ` Jan Beulich
2019-06-05 11:05           ` Julien Grall
2019-06-06 15:29         ` Julien Grall
2019-05-31  9:54 ` [PATCH 4/4] x86: use POPCNT for hweight<N>() when available Jan Beulich
2019-05-31  9:54   ` [Xen-devel] " Jan Beulich
2019-05-31 20:43   ` Andrew Cooper
2019-05-31 20:43     ` [Xen-devel] " Andrew Cooper
2019-06-03  8:13     ` Jan Beulich
2019-06-03  8:13       ` [Xen-devel] " Jan Beulich
2019-06-04  6:08       ` Jan Beulich
2019-06-04  6:08         ` [Xen-devel] " Jan Beulich
2019-06-04 19:07       ` Andrew Cooper
2019-06-05  8:52         ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5CF0F9360200007800233E01@prv1-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=julien.grall@arm.com \
    --cc=konrad.wilk@oracle.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.