From: Yury Norov <yury.norov@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Yury Norov <yury.norov@gmail.com>,
linux-m68k@lists.linux-m68k.org, linux-arch@vger.kernel.org,
linux-sh@vger.kernel.org, Alexey Klimov <aklimov@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Andy Shevchenko <andriy.shevchenko@linux.intel.com>,
Arnd Bergmann <arnd@arndb.de>, David Sterba <dsterba@suse.com>,
Dennis Zhou <dennis@kernel.org>,
Geert Uytterhoeven <geert@linux-m68k.org>,
Jianpeng Ma <jianpeng.ma@intel.com>,
Joe Perches <joe@perches.com>,
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>,
Josh Poimboeuf <jpoimboe@redhat.com>,
Rasmus Villemoes <linux@rasmusvillemoes.dk>,
Rich Felker <dalias@libc.org>,
Stefano Brivio <sbrivio@redhat.com>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Wolfram Sang <wsa+renesas@sang-engineering.com>,
Yoshinori Sato <ysato@users.sourceforge.jp>
Subject: [PATCH 11/14] lib: add fast path for find_next_*_bit()
Date: Wed, 17 Feb 2021 20:05:09 -0800 [thread overview]
Message-ID: <20210218040512.709186-12-yury.norov@gmail.com> (raw)
In-Reply-To: <20210218040512.709186-1-yury.norov@gmail.com>
Similarly to bitmap functions, find_next_*_bit() users will benefit
if we'll handle a case of bitmaps that fit into a single word. In the
very best case, the compiler may replace a function call with a few
instructions.
This is the quite typical find_next_bit() user:
unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
}
EXPORT_SYMBOL(cpumask_next);
On ARM64 if CONFIG_FAST_PATH is disabled it generates:
0000000000000000 <cpumask_next>:
0: a9bf7bfd stp x29, x30, [sp, #-16]!
4: 11000402 add w2, w0, #0x1
8: aa0103e0 mov x0, x1
c: d2800401 mov x1, #0x40 // #64
10: 910003fd mov x29, sp
14: 93407c42 sxtw x2, w2
18: 94000000 bl 0 <find_next_bit>
1c: a8c17bfd ldp x29, x30, [sp], #16
20: d65f03c0 ret
24: d503201f nop
If CONFIG_FAST_PATH is enabled:
0000000000000140 <cpumask_next>:
140: 11000400 add w0, w0, #0x1
144: 93407c00 sxtw x0, w0
148: f100fc1f cmp x0, #0x3f
14c: 54000168 b.hi 178 <cpumask_next+0x38> // b.pmore
150: f9400023 ldr x3, [x1]
154: 92800001 mov x1, #0xffffffffffffffff // #-1
158: 9ac02020 lsl x0, x1, x0
15c: 52800802 mov w2, #0x40 // #64
160: 8a030001 and x1, x0, x3
164: dac00020 rbit x0, x1
168: f100003f cmp x1, #0x0
16c: dac01000 clz x0, x0
170: 1a800040 csel w0, w2, w0, eq // eq = none
174: d65f03c0 ret
178: 52800800 mov w0, #0x40 // #64
17c: d65f03c0 ret
find_next_bit() call is replaced with 6 instructions. (And I suspect
we can improve the GENMASK() for better code generation.) find_next_bit()
itself is 41 instructions.
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
include/asm-generic/bitops/find.h | 30 ++++++++++++++++++++++++++++++
include/asm-generic/bitops/le.h | 21 +++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 7ad70dab8e93..8bd7a33a889d 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -20,6 +20,16 @@ static inline
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
+ if (SMALL_CONST(size - 1)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
}
#endif
@@ -40,6 +50,16 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
{
+ if (SMALL_CONST(size - 1)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
}
#endif
@@ -58,6 +78,16 @@ static inline
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
+ if (SMALL_CONST(size - 1)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
}
#endif
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 21305f6cea0b..18ebcf639d7f 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -5,6 +5,7 @@
#include <asm-generic/bitops/find.h>
#include <asm/types.h>
#include <asm/byteorder.h>
+#include <linux/swab.h>
#if defined(__LITTLE_ENDIAN)
@@ -37,6 +38,16 @@ static inline
unsigned long find_next_zero_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
+ if (SMALL_CONST(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
}
#endif
@@ -46,6 +57,16 @@ static inline
unsigned long find_next_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
+ if (SMALL_CONST(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
}
#endif
--
2.25.1
next prev parent reply other threads:[~2021-02-18 4:08 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-18 4:04 [PATCH v3 00/14] lib/find_bit: fast path for small bitmaps Yury Norov
2021-02-18 4:04 ` [PATCH 01/14] tools: disable -Wno-type-limits Yury Norov
2021-02-18 4:05 ` [PATCH 02/14] tools: bitmap: sync function declarations with the kernel Yury Norov
2021-02-18 4:05 ` [PATCH 03/14] arch: rearrange headers inclusion order in asm/bitops for m68k and sh Yury Norov
2021-02-18 4:05 ` [PATCH 04/14] lib: introduce BITS_{FIRST,LAST} macro Yury Norov
2021-02-18 22:51 ` Rasmus Villemoes
2021-03-12 4:30 ` Yury Norov
2021-02-18 4:05 ` [PATCH 05/14] tools: sync BITS_MASK macros with the kernel Yury Norov
2021-02-18 4:05 ` [PATCH 06/14] bitsperlong.h: introduce SMALL_CONST() macro Yury Norov
2021-02-18 23:07 ` Rasmus Villemoes
2021-03-12 5:28 ` Yury Norov
2021-03-12 9:12 ` Rasmus Villemoes
2021-03-12 21:53 ` Yury Norov
2021-02-18 4:05 ` [PATCH 07/14] tools: " Yury Norov
2021-02-18 4:05 ` [PATCH 08/14] lib/Kconfig: introduce FAST_PATH option Yury Norov
2021-02-18 15:15 ` Andy Shevchenko
2021-02-18 19:24 ` Yury Norov
2021-02-19 10:52 ` Andy Shevchenko
2021-02-18 4:05 ` [PATCH 09/14] lib: inline _find_next_bit() wrappers Yury Norov
2021-02-18 4:05 ` [PATCH 10/14] tools: sync find_next_bit implementation Yury Norov
2021-02-18 4:05 ` Yury Norov [this message]
2021-02-18 15:24 ` [PATCH 11/14] lib: add fast path for find_next_*_bit() Andy Shevchenko
2021-02-18 4:05 ` [PATCH 12/14] lib: add fast path for find_first_*_bit() and find_last_bit() Yury Norov
2021-02-18 4:05 ` [PATCH 13/14] tools: sync lib/find_bit implementation Yury Norov
2021-02-18 4:05 ` [PATCH 14/14] MAINTAINERS: Add entry for the bitmap API Yury Norov
2021-02-18 15:28 ` Andy Shevchenko
2021-02-18 15:34 ` Yury Norov
2021-03-12 9:15 ` Rasmus Villemoes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210218040512.709186-12-yury.norov@gmail.com \
--to=yury.norov@gmail.com \
--cc=aklimov@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=andriy.shevchenko@linux.intel.com \
--cc=arnd@arndb.de \
--cc=dalias@libc.org \
--cc=dennis@kernel.org \
--cc=dsterba@suse.com \
--cc=geert@linux-m68k.org \
--cc=glaubitz@physik.fu-berlin.de \
--cc=jianpeng.ma@intel.com \
--cc=joe@perches.com \
--cc=jpoimboe@redhat.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-m68k@lists.linux-m68k.org \
--cc=linux-sh@vger.kernel.org \
--cc=linux@rasmusvillemoes.dk \
--cc=richard.weiyang@linux.alibaba.com \
--cc=sbrivio@redhat.com \
--cc=wsa+renesas@sang-engineering.com \
--cc=ysato@users.sourceforge.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.