All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yury Norov <yury.norov@gmail.com>
To: linux-kernel@vger.kernel.org, Andrew Morton <akpm@linux-foundation.org>
Cc: Yury Norov <yury.norov@gmail.com>,
	linux-m68k@lists.linux-m68k.org, linux-arch@vger.kernel.org,
	linux-sh@vger.kernel.org, Alexey Klimov <aklimov@redhat.com>,
	Andy Shevchenko <andriy.shevchenko@linux.intel.com>,
	Arnd Bergmann <arnd@arndb.de>, David Sterba <dsterba@suse.com>,
	Dennis Zhou <dennis@kernel.org>,
	Geert Uytterhoeven <geert@linux-m68k.org>,
	Jianpeng Ma <jianpeng.ma@intel.com>,
	Joe Perches <joe@perches.com>,
	John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Rasmus Villemoes <linux@rasmusvillemoes.dk>,
	Rich Felker <dalias@libc.org>,
	Stefano Brivio <sbrivio@redhat.com>,
	Wei Yang <richard.weiyang@linux.alibaba.com>,
	Wolfram Sang <wsa+renesas@sang-engineering.com>,
	Yoshinori Sato <ysato@users.sourceforge.jp>
Subject: [PATCH 09/12] lib: add fast path for find_next_*_bit()
Date: Wed, 31 Mar 2021 17:31:50 -0700	[thread overview]
Message-ID: <20210401003153.97325-10-yury.norov@gmail.com> (raw)
In-Reply-To: <20210401003153.97325-1-yury.norov@gmail.com>

Similarly to bitmap functions, find_next_*_bit() users will benefit
if we'll handle a case of bitmaps that fit into a single word inline.
In the very best case, the compiler may replace a function call with
a few instructions.

This is the quite typical find_next_bit() user:

	unsigned int cpumask_next(int n, const struct cpumask *srcp)
	{
		/* -1 is a legal arg here. */
		if (n != -1)
			cpumask_check(n);
		return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
	}
	EXPORT_SYMBOL(cpumask_next);

Currently, on ARM64 the generated code looks like this:
	0000000000000000 <cpumask_next>:
	   0:   a9bf7bfd        stp     x29, x30, [sp, #-16]!
	   4:   11000402        add     w2, w0, #0x1
	   8:   aa0103e0        mov     x0, x1
	   c:   d2800401        mov     x1, #0x40                       // #64
	  10:   910003fd        mov     x29, sp
	  14:   93407c42        sxtw    x2, w2
	  18:   94000000        bl      0 <find_next_bit>
	  1c:   a8c17bfd        ldp     x29, x30, [sp], #16
	  20:   d65f03c0        ret
	  24:   d503201f        nop

After applying this patch:
	0000000000000140 <cpumask_next>:
	 140:   11000400        add     w0, w0, #0x1
	 144:   93407c00        sxtw    x0, w0
	 148:   f100fc1f        cmp     x0, #0x3f
	 14c:   54000168        b.hi    178 <cpumask_next+0x38>  // b.pmore
	 150:   f9400023        ldr     x3, [x1]
	 154:   92800001        mov     x1, #0xffffffffffffffff         // #-1
	 158:   9ac02020        lsl     x0, x1, x0
	 15c:   52800802        mov     w2, #0x40                       // #64
	 160:   8a030001        and     x1, x0, x3
	 164:   dac00020        rbit    x0, x1
	 168:   f100003f        cmp     x1, #0x0
	 16c:   dac01000        clz     x0, x0
	 170:   1a800040        csel    w0, w2, w0, eq  // eq = none
	 174:   d65f03c0        ret
	 178:   52800800        mov     w0, #0x40                       // #64
	 17c:   d65f03c0        ret

find_next_bit() call is replaced with 6 instructions. find_next_bit()
itself is 41 instructions plus function call overhead.

Despite inlining, the scripts/bloat-o-meter report smaller .text size
after applying the series:
	add/remove: 11/9 grow/shrink: 233/176 up/down: 5780/-6768 (-988)

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 include/asm-generic/bitops/find.h | 30 ++++++++++++++++++++++++++++++
 include/asm-generic/bitops/le.h   | 21 +++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 7ad70dab8e93..4148c74a1e4d 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -20,6 +20,16 @@ static inline
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = *addr & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
 	return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
 }
 #endif
@@ -40,6 +50,16 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
 {
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
 	return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
 }
 #endif
@@ -58,6 +78,16 @@ static inline
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = *addr | ~GENMASK(size - 1, offset);
+		return val == ~0UL ? size : ffz(val);
+	}
+
 	return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
 }
 #endif
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 21305f6cea0b..5a28629cbf4d 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -5,6 +5,7 @@
 #include <asm-generic/bitops/find.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
+#include <linux/swab.h>
 
 #if defined(__LITTLE_ENDIAN)
 
@@ -37,6 +38,16 @@ static inline
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
+	if (small_const_nbits(size)) {
+		unsigned long val = *(const unsigned long *)addr;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = swab(val) | ~GENMASK(size - 1, offset);
+		return val == ~0UL ? size : ffz(val);
+	}
+
 	return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
 }
 #endif
@@ -46,6 +57,16 @@ static inline
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
+	if (small_const_nbits(size)) {
+		unsigned long val = *(const unsigned long *)addr;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = swab(val) & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
 	return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
 }
 #endif
-- 
2.25.1


  parent reply	other threads:[~2021-04-01  0:33 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-01  0:31 [PATCH v6 00/12] lib/find_bit: fast path for small bitmaps Yury Norov
2021-04-01  0:31 ` [PATCH 01/12] tools: disable -Wno-type-limits Yury Norov
2021-04-01  0:31 ` [PATCH 02/12] tools: bitmap: sync function declarations with the kernel Yury Norov
2021-04-01  0:31 ` [PATCH 03/12] tools: sync BITMAP_LAST_WORD_MASK() macro " Yury Norov
2021-04-01  0:31 ` [PATCH 04/12] arch: rearrange headers inclusion order in asm/bitops for m68k and sh Yury Norov
2021-04-01  0:31 ` [PATCH 05/12] lib: extend the scope of small_const_nbits() macro Yury Norov
2021-04-01  8:35   ` Andy Shevchenko
2021-04-01  0:31 ` [PATCH 06/12] tools: sync small_const_nbits() macro with the kernel Yury Norov
2021-04-01  0:31 ` [PATCH 07/12] lib: inline _find_next_bit() wrappers Yury Norov
2021-04-01  8:37   ` Andy Shevchenko
2021-04-01  0:31 ` [PATCH 08/12] tools: sync find_next_bit implementation Yury Norov
2021-04-01  0:31 ` Yury Norov [this message]
2021-04-01  8:48   ` [PATCH 09/12] lib: add fast path for find_next_*_bit() Andy Shevchenko
2021-04-01  0:31 ` [PATCH 10/12] lib: add fast path for find_first_*_bit() and find_last_bit() Yury Norov
2021-04-01  4:21   ` kernel test robot
2021-04-01  8:58   ` Andy Shevchenko
2021-04-01  0:31 ` [PATCH 11/12] tools: sync lib/find_bit implementation Yury Norov
2021-05-10 15:27   ` Tetsuo Handa
2021-05-10 15:44     ` Andy Shevchenko
2021-05-10 17:21       ` Yury Norov
2021-05-10 22:51       ` Rikard Falkeborn
2021-05-11  7:28         ` Andy Shevchenko
2021-05-11 10:36           ` Rikard Falkeborn
2021-05-11 11:53             ` Tetsuo Handa
2021-05-11 20:37               ` Rikard Falkeborn
2021-05-12  7:48                 ` Arnd Bergmann
2021-05-12  8:15                   ` Rasmus Villemoes
2021-05-12  8:33                     ` Arnd Bergmann
2021-05-11 12:17             ` Andy Shevchenko
2021-04-01  0:31 ` [PATCH 12/12] MAINTAINERS: Add entry for the bitmap API Yury Norov
2021-04-01  9:14 ` [PATCH v6 00/12] lib/find_bit: fast path for small bitmaps Andy Shevchenko
2021-04-01  9:28   ` Arnd Bergmann
2021-04-01  9:50     ` Andy Shevchenko
2021-04-02  0:32       ` Andrew Morton
  -- strict thread matches above, loose matches on Subject: below --
2021-03-21 21:54 [PATCH v5 " Yury Norov
2021-03-21 21:54 ` [PATCH 09/12] lib: add fast path for find_next_*_bit() Yury Norov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210401003153.97325-10-yury.norov@gmail.com \
    --to=yury.norov@gmail.com \
    --cc=aklimov@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=arnd@arndb.de \
    --cc=dalias@libc.org \
    --cc=dennis@kernel.org \
    --cc=dsterba@suse.com \
    --cc=geert@linux-m68k.org \
    --cc=glaubitz@physik.fu-berlin.de \
    --cc=jianpeng.ma@intel.com \
    --cc=joe@perches.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-m68k@lists.linux-m68k.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=richard.weiyang@linux.alibaba.com \
    --cc=sbrivio@redhat.com \
    --cc=wsa+renesas@sang-engineering.com \
    --cc=ysato@users.sourceforge.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.