All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] uapi: rename ext2_swab() to swab() and share globally in swab.h
@ 2020-01-03 20:28 Yury Norov
  2020-01-03 20:28 ` [PATCH 2/3] lib/find_bit.c: join _find_next_bit{_le} Yury Norov
  2020-01-03 20:28 ` [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit() Yury Norov
  0 siblings, 2 replies; 6+ messages in thread
From: Yury Norov @ 2020-01-03 20:28 UTC (permalink / raw)
  To: Yury Norov, Thomas Gleixner, Andrew Morton, Allison Randal,
	William Breathitt Gray, linux-kernel

ext2_swab() is defined locally in lib/find_bit.c However it is not specific
to ext2, neither to bitmaps.

There are many potential users of it, so rename it to just swab() and move
to include/uapi/linux/swab.h

ABI guarantees that size of unsigned long corresponds to BITS_PER_LONG,
therefore drop unneeded cast.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/swab.h      |  1 +
 include/uapi/linux/swab.h | 10 ++++++++++
 lib/find_bit.c            | 16 ++--------------
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/include/linux/swab.h b/include/linux/swab.h
index e466fd159c857..bcff5149861a9 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -7,6 +7,7 @@
 # define swab16 __swab16
 # define swab32 __swab32
 # define swab64 __swab64
+# define swab __swab
 # define swahw32 __swahw32
 # define swahb32 __swahb32
 # define swab16p __swab16p
diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index 23cd84868cc3b..fa7f97da5b768 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <asm/bitsperlong.h>
 #include <asm/swab.h>
 
 /*
@@ -132,6 +133,15 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val)
 	__fswab64(x))
 #endif
 
+static __always_inline unsigned long __swab(const unsigned long y)
+{
+#if BITS_PER_LONG == 64
+	return __swab64(y);
+#else /* BITS_PER_LONG == 32 */
+	return __swab32(y);
+#endif
+}
+
 /**
  * __swahw32 - return a word-swapped 32-bit value
  * @x: value to wordswap
diff --git a/lib/find_bit.c b/lib/find_bit.c
index e35a76b291e69..06e503c339f37 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -149,18 +149,6 @@ EXPORT_SYMBOL(find_last_bit);
 
 #ifdef __BIG_ENDIAN
 
-/* include/linux/byteorder does not support "unsigned long" type */
-static inline unsigned long ext2_swab(const unsigned long y)
-{
-#if BITS_PER_LONG == 64
-	return (unsigned long) __swab64((u64) y);
-#elif BITS_PER_LONG == 32
-	return (unsigned long) __swab32((u32) y);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
 #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
 static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long nbits,
@@ -177,7 +165,7 @@ static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
 	tmp ^= invert;
 
 	/* Handle 1st word. */
-	tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
+	tmp &= swab(BITMAP_FIRST_WORD_MASK(start));
 	start = round_down(start, BITS_PER_LONG);
 
 	while (!tmp) {
@@ -191,7 +179,7 @@ static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
 		tmp ^= invert;
 	}
 
-	return min(start + __ffs(ext2_swab(tmp)), nbits);
+	return min(start + __ffs(swab(tmp)), nbits);
 }
 #endif
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] lib/find_bit.c: join _find_next_bit{_le}
  2020-01-03 20:28 [PATCH 1/3] uapi: rename ext2_swab() to swab() and share globally in swab.h Yury Norov
@ 2020-01-03 20:28 ` Yury Norov
  2020-01-03 20:28 ` [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit() Yury Norov
  1 sibling, 0 replies; 6+ messages in thread
From: Yury Norov @ 2020-01-03 20:28 UTC (permalink / raw)
  To: Yury Norov, Thomas Gleixner, Andrew Morton, Allison Randal,
	William Breathitt Gray, linux-kernel

_find_next_bit and _find_next_bit_le are very similar functions.
It's possible to join them by adding 1 parameter and a couple of
simple checks. It's simplify maintenance and make possible to
shrink the size of .text by un-inlining the unified function (in
the following patch).

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 lib/find_bit.c | 64 +++++++++++++++-----------------------------------
 1 file changed, 19 insertions(+), 45 deletions(-)

diff --git a/lib/find_bit.c b/lib/find_bit.c
index 06e503c339f37..c03cbecb2b1f6 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -17,9 +17,9 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 
-#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
-		!defined(find_next_and_bit)
-
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) ||			\
+	!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) ||	\
+	!defined(find_next_and_bit)
 /*
  * This is a common helper function for find_next_bit, find_next_zero_bit, and
  * find_next_and_bit. The differences are:
@@ -29,9 +29,9 @@
  */
 static inline unsigned long _find_next_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long nbits,
-		unsigned long start, unsigned long invert)
+		unsigned long start, unsigned long invert, unsigned long le)
 {
-	unsigned long tmp;
+	unsigned long tmp, mask;
 
 	if (unlikely(start >= nbits))
 		return nbits;
@@ -42,7 +42,12 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1,
 	tmp ^= invert;
 
 	/* Handle 1st word. */
-	tmp &= BITMAP_FIRST_WORD_MASK(start);
+	mask = BITMAP_FIRST_WORD_MASK(start);
+	if (le)
+		mask = swab(mask);
+
+	tmp &= mask;
+
 	start = round_down(start, BITS_PER_LONG);
 
 	while (!tmp) {
@@ -56,6 +61,9 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1,
 		tmp ^= invert;
 	}
 
+	if (le)
+		tmp = swab(tmp);
+
 	return min(start + __ffs(tmp), nbits);
 }
 #endif
@@ -67,7 +75,7 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1,
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	return _find_next_bit(addr, NULL, size, offset, 0UL);
+	return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
@@ -76,7 +84,7 @@ EXPORT_SYMBOL(find_next_bit);
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	return _find_next_bit(addr, NULL, size, offset, ~0UL);
+	return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
@@ -86,7 +94,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
 {
-	return _find_next_bit(addr1, addr2, size, offset, 0UL);
+	return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
 }
 EXPORT_SYMBOL(find_next_and_bit);
 #endif
@@ -149,45 +157,11 @@ EXPORT_SYMBOL(find_last_bit);
 
 #ifdef __BIG_ENDIAN
 
-#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
-static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
-		const unsigned long *addr2, unsigned long nbits,
-		unsigned long start, unsigned long invert)
-{
-	unsigned long tmp;
-
-	if (unlikely(start >= nbits))
-		return nbits;
-
-	tmp = addr1[start / BITS_PER_LONG];
-	if (addr2)
-		tmp &= addr2[start / BITS_PER_LONG];
-	tmp ^= invert;
-
-	/* Handle 1st word. */
-	tmp &= swab(BITMAP_FIRST_WORD_MASK(start));
-	start = round_down(start, BITS_PER_LONG);
-
-	while (!tmp) {
-		start += BITS_PER_LONG;
-		if (start >= nbits)
-			return nbits;
-
-		tmp = addr1[start / BITS_PER_LONG];
-		if (addr2)
-			tmp &= addr2[start / BITS_PER_LONG];
-		tmp ^= invert;
-	}
-
-	return min(start + __ffs(swab(tmp)), nbits);
-}
-#endif
-
 #ifndef find_next_zero_bit_le
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, NULL, size, offset, ~0UL);
+	return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -196,7 +170,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, NULL, size, offset, 0UL);
+	return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit()
  2020-01-03 20:28 [PATCH 1/3] uapi: rename ext2_swab() to swab() and share globally in swab.h Yury Norov
  2020-01-03 20:28 ` [PATCH 2/3] lib/find_bit.c: join _find_next_bit{_le} Yury Norov
@ 2020-01-03 20:28 ` Yury Norov
  2020-01-03 21:46   ` Joe Perches
  1 sibling, 1 reply; 6+ messages in thread
From: Yury Norov @ 2020-01-03 20:28 UTC (permalink / raw)
  To: Yury Norov, Thomas Gleixner, Andrew Morton, Allison Randal,
	William Breathitt Gray, linux-kernel

It saves 25% of .text for arm64, and more for BE architectures.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 lib/find_bit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/find_bit.c b/lib/find_bit.c
index c03cbecb2b1f6..0e4b2b90c9c02 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -27,7 +27,7 @@
  *    searching it for one bits.
  *  - The optional "addr2", which is anded with "addr1" if present.
  */
-static inline unsigned long _find_next_bit(const unsigned long *addr1,
+static unsigned long _find_next_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long nbits,
 		unsigned long start, unsigned long invert, unsigned long le)
 {
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit()
  2020-01-03 20:28 ` [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit() Yury Norov
@ 2020-01-03 21:46   ` Joe Perches
  2020-01-04  0:11     ` Yury Norov
  0 siblings, 1 reply; 6+ messages in thread
From: Joe Perches @ 2020-01-03 21:46 UTC (permalink / raw)
  To: Yury Norov, Thomas Gleixner, Andrew Morton, Allison Randal,
	William Breathitt Gray, linux-kernel

On Fri, 2020-01-03 at 12:28 -0800, Yury Norov wrote:
> It saves 25% of .text for arm64, and more for BE architectures.

This seems a rather misleading code size reduction description.

Please detail the specific code sizes using "size lib/find_bit.o"
before and after this change.

Also, _find_next_bit is used 3 times, perhaps any code size
increase is appropriate given likely reduced run time.

> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  lib/find_bit.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/find_bit.c b/lib/find_bit.c
> index c03cbecb2b1f6..0e4b2b90c9c02 100644
> --- a/lib/find_bit.c
> +++ b/lib/find_bit.c
> @@ -27,7 +27,7 @@
>   *    searching it for one bits.
>   *  - The optional "addr2", which is anded with "addr1" if present.
>   */
> -static inline unsigned long _find_next_bit(const unsigned long *addr1,
> +static unsigned long _find_next_bit(const unsigned long *addr1,
>  		const unsigned long *addr2, unsigned long nbits,
>  		unsigned long start, unsigned long invert, unsigned long le)
>  {


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit()
  2020-01-03 21:46   ` Joe Perches
@ 2020-01-04  0:11     ` Yury Norov
  2020-01-04  1:05       ` Yury Norov
  0 siblings, 1 reply; 6+ messages in thread
From: Yury Norov @ 2020-01-04  0:11 UTC (permalink / raw)
  To: Joe Perches
  Cc: Thomas Gleixner, Andrew Morton, Allison Randal,
	William Breathitt Gray, linux-kernel

On Fri, Jan 03, 2020 at 01:46:07PM -0800, Joe Perches wrote:
> On Fri, 2020-01-03 at 12:28 -0800, Yury Norov wrote:
> > It saves 25% of .text for arm64, and more for BE architectures.
> 
> This seems a rather misleading code size reduction description.
> 
> Please detail the specific code sizes using "size lib/find_bit.o"
> before and after this change.

Before:
$ size lib/find_bit.o
   text    data     bss     dec     hex filename
   1012      56       0    1068     42c lib/find_bit.o

After:
$ size lib/find_bit.o
   text    data     bss     dec     hex filename
    776      56       0     832     340 lib/find_bit.o

> Also, _find_next_bit is used 3 times, perhaps any code size
> increase is appropriate given likely reduced run time.

Second patch of the series switches find_next_zero_bit_le()
and find_next_bit_le() to _find_next_bit(), so totally 5.
 
Yury

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit()
  2020-01-04  0:11     ` Yury Norov
@ 2020-01-04  1:05       ` Yury Norov
  0 siblings, 0 replies; 6+ messages in thread
From: Yury Norov @ 2020-01-04  1:05 UTC (permalink / raw)
  To: Joe Perches
  Cc: Thomas Gleixner, Andrew Morton, Allison Randal,
	William Breathitt Gray, Yury Norov, linux-kernel

On Fri, Jan 03, 2020 at 04:08:43PM -0800, Yury Norov wrote:
> On Fri, Jan 03, 2020 at 01:46:07PM -0800, Joe Perches wrote:
> > On Fri, 2020-01-03 at 12:28 -0800, Yury Norov wrote:
> > > It saves 25% of .text for arm64, and more for BE architectures.
> > 
> > This seems a rather misleading code size reduction description.
> > 
> > Please detail the specific code sizes using "size lib/find_bit.o"
> > before and after this change.
> 
> Before:
> $ size lib/find_bit.o
>    text    data     bss     dec     hex filename
>    1012      56       0    1068     42c lib/find_bit.o
> 
> After:
> $ size lib/find_bit.o
>    text    data     bss     dec     hex filename
>     776      56       0     832     340 lib/find_bit.o
> 
> > Also, _find_next_bit is used 3 times, perhaps any code size
> > increase is appropriate given likely reduced run time.
> 
> Second patch of the series switches find_next_zero_bit_le()
> and find_next_bit_le() to _find_next_bit(), so totally 5.
>  
> Yury

> > perhaps any code size
> > increase is appropriate given likely reduced run time.

I have a benchmark for the find_bit functions upstream, however, it cannot
measure the overall performance degradation related to increased probability
of cache eviction.

When I originally wrote _find_next_bit() in 2014, it was simpler and had 2
users. Now there are 5 of them, and I think it's good time to stop inlining
_find_next_bit().

Yury

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-01-04  1:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-03 20:28 [PATCH 1/3] uapi: rename ext2_swab() to swab() and share globally in swab.h Yury Norov
2020-01-03 20:28 ` [PATCH 2/3] lib/find_bit.c: join _find_next_bit{_le} Yury Norov
2020-01-03 20:28 ` [PATCH 3/3] lib/find_bit.c: uninline helper _find_next_bit() Yury Norov
2020-01-03 21:46   ` Joe Perches
2020-01-04  0:11     ` Yury Norov
2020-01-04  1:05       ` Yury Norov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.