linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] lib: find_*_bit reimplementation
@ 2015-01-31 20:58 yury.norov
  2015-01-31 20:58 ` [PATCH v2 2/3] lib: move find_last_bit to lib/find_next_bit.c yury.norov
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: yury.norov @ 2015-01-31 20:58 UTC (permalink / raw)
  To: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf
  Cc: linux-kernel, Yury Norov, Yury Norov

From: Yury Norov <y.norov@samsung.com>

New implementations takes less space in source file (see diffstat)
and in object. For me it's 710 vs 453 bytes of text.

Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
Performance tests were ran on userspace with code like this:

	/* addr[] is filled from /dev/urandom */
	start = clock();
	while (ret < nbits)
		ret = find_next_bit(addr, nbits, ret + 1);

	end = clock();
	printf("%ld\t", (unsigned long) end - start);

On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
(for find_next_bit, nbits is 8M, for find_first_bit - 80K)

	find_next_bit:		find_first_bit:
	new	current		new	current
	26932	43151		14777	14925
	26947	43182		14521	15423
	26507	43824		15053	14705
	27329	43759		14473	14777
	26895	43367		14847	15023
	26990	43693		15103	15163
	26775	43299		15067	15232
	27282	42752		14544	15121
	27504	43088		14644	14858
	26761	43856		14699	15193
	26692	43075		14781	14681
	27137	42969		14451	15061
	...			...

find_next_bit performance gain is 35-40%;
find_first_bit - no measurable difference.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 lib/find_last_bit.c |  31 ++-----
 lib/find_next_bit.c | 254 +++++++++++++++-------------------------------------
 2 files changed, 79 insertions(+), 206 deletions(-)

diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 91ca09f..e67e970 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -4,44 +4,29 @@
  * Written by Rusty Russell <rusty@rustcorp.com.au>
  * (Inspired by David Howell's find_next_bit implementation)
  *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/bitops.h>
 #include <linux/export.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
 
 #ifndef find_last_bit
 
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
-	unsigned long words;
-	unsigned long tmp;
-
-	/* Start at final word. */
-	words = size / BITS_PER_LONG;
-
-	/* Partial final word? */
-	if (size & (BITS_PER_LONG-1)) {
-		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
-					 - (size & (BITS_PER_LONG-1)))));
-		if (tmp)
-			goto found;
-	}
+	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
 
-	while (words) {
-		tmp = addr[--words];
-		if (tmp) {
-found:
-			return words * BITS_PER_LONG + __fls(tmp);
-		}
+	while (idx--) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
 	}
 
-	/* Not found */
 	return size;
 }
 EXPORT_SYMBOL(find_last_bit);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 0cbfc0b..ebfb3dc 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -3,18 +3,45 @@
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/bitops.h>
 #include <linux/export.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
+
+#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+static unsigned long _find_next_bit(const unsigned long *addr,
+		unsigned long nbits, unsigned long start, bool set)
+{
+	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+
+	/* Handle 1st word. */
+	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
+		start = round_down(start, BITS_PER_LONG);
+	}
 
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+	}
+
+	return start + __ffs(tmp);
+}
+#endif
 
 #ifndef find_next_bit
 /*
@@ -23,86 +50,22 @@
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
 	if (offset >= size)
 		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
 
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
+	return min(_find_next_bit(addr, size, offset, 1), size);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
 
 #ifndef find_next_zero_bit
-/*
- * This implementation of find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h.
- */
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
 	if (offset >= size)
 		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp |= ~0UL >> (BITS_PER_LONG - offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
 
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffz(tmp);
+	return min(_find_next_bit(addr, size, offset, 0), size);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
@@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
  */
 unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 {
-	const unsigned long *p = addr;
-	unsigned long result = 0;
-	unsigned long tmp;
+	unsigned long idx;
 
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
 	}
-	if (!size)
-		return result;
 
-	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found:
-	return result + __ffs(tmp);
+	return size;
 }
 EXPORT_SYMBOL(find_first_bit);
 #endif
@@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
  */
 unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
-	const unsigned long *p = addr;
-	unsigned long result = 0;
-	unsigned long tmp;
+	unsigned long idx;
 
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx] != ULONG_MAX)
+			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
 	}
-	if (!size)
-		return result;
 
-	tmp = (*p) | (~0UL << size);
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found:
-	return result + ffz(tmp);
+	return size;
 }
 EXPORT_SYMBOL(find_first_zero_bit);
 #endif
@@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
 #ifdef __BIG_ENDIAN
 
 /* include/linux/byteorder does not support "unsigned long" type */
-static inline unsigned long ext2_swabp(const unsigned long * x)
-{
-#if BITS_PER_LONG == 64
-	return (unsigned long) __swab64p((u64 *) x);
-#elif BITS_PER_LONG == 32
-	return (unsigned long) __swab32p((u32 *) x);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
-/* include/linux/byteorder doesn't support "unsigned long" type */
 static inline unsigned long ext2_swab(const unsigned long y)
 {
 #if BITS_PER_LONG == 64
@@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
 #endif
 }
 
+#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
+static unsigned long _find_next_bit_le(const unsigned long *addr,
+		unsigned long nbits, unsigned long start, bool set)
+{
+	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+
+	/* Handle 1st word. */
+	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+		tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
+		start = round_down(start, BITS_PER_LONG);
+	}
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+	}
+
+	return start + __ffs(ext2_swab(tmp));
+}
+#endif
+
 #ifndef find_next_zero_bit_le
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	const unsigned long *p = addr;
-	unsigned long result = offset & ~(BITS_PER_LONG - 1);
-	unsigned long tmp;
-
 	if (offset >= size)
 		return size;
-	p += BITOP_WORD(offset);
-	size -= result;
-	offset &= (BITS_PER_LONG - 1UL);
-	if (offset) {
-		tmp = ext2_swabp(p++);
-		tmp |= (~0UL >> (BITS_PER_LONG - offset));
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
 
-	while (size & ~(BITS_PER_LONG - 1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle_swap;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = ext2_swabp(p);
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size; /* Nope. Skip ffz */
-found_middle:
-	return result + ffz(tmp);
-
-found_middle_swap:
-	return result + ffz(ext2_swab(tmp));
+	return min(_find_next_bit_le(addr, size, offset, 0), size);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	const unsigned long *p = addr;
-	unsigned long result = offset & ~(BITS_PER_LONG - 1);
-	unsigned long tmp;
-
 	if (offset >= size)
 		return size;
-	p += BITOP_WORD(offset);
-	size -= result;
-	offset &= (BITS_PER_LONG - 1UL);
-	if (offset) {
-		tmp = ext2_swabp(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-
-	while (size & ~(BITS_PER_LONG - 1)) {
-		tmp = *(p++);
-		if (tmp)
-			goto found_middle_swap;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = ext2_swabp(p);
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size; /* Nope. */
-found_middle:
-	return result + __ffs(tmp);
 
-found_middle_swap:
-	return result + __ffs(ext2_swab(tmp));
+	return min(_find_next_bit_le(addr, size, offset, 1), size);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/3] lib: move find_last_bit to lib/find_next_bit.c
  2015-01-31 20:58 [PATCH v2 1/3] lib: find_*_bit reimplementation yury.norov
@ 2015-01-31 20:58 ` yury.norov
  2015-01-31 20:58 ` [PATCH v2 3/3] lib: rename lib/find_next_bit.c to lib/find_bit.c yury.norov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: yury.norov @ 2015-01-31 20:58 UTC (permalink / raw)
  To: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf
  Cc: linux-kernel, Yury Norov

From: Yury Norov <yury.norov@gmail.com>

Currently all 'find_*_bit' family is located in lib/find_next_bit.c,
except 'find_last_bit', which is in lib/find_last_bit.c. It seems,
there's no major benefit to have it separated.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 lib/Makefile        |  2 +-
 lib/find_last_bit.c | 34 ----------------------------------
 lib/find_next_bit.c | 19 +++++++++++++++++++
 3 files changed, 20 insertions(+), 35 deletions(-)
 delete mode 100644 lib/find_last_bit.c

diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b..13990aa 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,7 +25,7 @@ obj-y	+= lockref.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
-	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
+	 bsearch.o find_next_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
deleted file mode 100644
index e67e970..0000000
--- a/lib/find_last_bit.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* find_last_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2008 IBM Corporation
- * Written by Rusty Russell <rusty@rustcorp.com.au>
- * (Inspired by David Howell's find_next_bit implementation)
- *
- * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
- * size and improve performance, 2015.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/export.h>
-#include <linux/kernel.h>
-
-#ifndef find_last_bit
-
-unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
-{
-	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
-
-	while (idx--) {
-		if (addr[idx])
-			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
-	}
-
-	return size;
-}
-EXPORT_SYMBOL(find_last_bit);
-
-#endif
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index ebfb3dc..2087839 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -3,6 +3,10 @@
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
  * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
  * size and improve performance, 2015.
  *
@@ -106,6 +110,21 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 EXPORT_SYMBOL(find_first_zero_bit);
 #endif
 
+#ifndef find_last_bit
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
+
+	while (idx--) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(find_last_bit);
+#endif
+
 #ifdef __BIG_ENDIAN
 
 /* include/linux/byteorder does not support "unsigned long" type */
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 3/3] lib: rename lib/find_next_bit.c to lib/find_bit.c
  2015-01-31 20:58 [PATCH v2 1/3] lib: find_*_bit reimplementation yury.norov
  2015-01-31 20:58 ` [PATCH v2 2/3] lib: move find_last_bit to lib/find_next_bit.c yury.norov
@ 2015-01-31 20:58 ` yury.norov
  2015-02-02 11:09   ` Rasmus Villemoes
  2015-02-02  3:17 ` [PATCH v2 1/3] lib: find_*_bit reimplementation George Spelvin
  2015-02-02 10:43 ` Rasmus Villemoes
  3 siblings, 1 reply; 14+ messages in thread
From: yury.norov @ 2015-01-31 20:58 UTC (permalink / raw)
  To: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf
  Cc: linux-kernel, Yury Norov

From: Yury Norov <yury.norov@gmail.com>

This file contains implementation for:
- find_last_bit;
- find_first_zero_bit;
- find_first_bit;
- find_next_zero_bit;
- find_next_bit.

So giving more generic name looks reasonable.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 lib/Makefile        |   2 +-
 lib/find_bit.c      | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/find_next_bit.c | 192 ---------------------------------------------------
 3 files changed, 195 insertions(+), 193 deletions(-)
 create mode 100644 lib/find_bit.c
 delete mode 100644 lib/find_next_bit.c

diff --git a/lib/Makefile b/lib/Makefile
index 13990aa..1cc93f4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,7 +25,7 @@ obj-y	+= lockref.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
-	 bsearch.o find_next_bit.o llist.o memweight.o kfifo.o \
+	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
diff --git a/lib/find_bit.c b/lib/find_bit.c
new file mode 100644
index 0000000..236a850
--- /dev/null
+++ b/lib/find_bit.c
@@ -0,0 +1,194 @@
+/* find_bit.c: generic implementation fore:
+ * find_last_bit; find_first_zero_bit; find_first_bit;
+ * find_next_zero_bit; find_next_bit.
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+
+#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+static unsigned long _find_next_bit(const unsigned long *addr,
+		unsigned long nbits, unsigned long start, bool set)
+{
+	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+
+	/* Handle 1st word. */
+	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
+		start = round_down(start, BITS_PER_LONG);
+	}
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+	}
+
+	return start + __ffs(tmp);
+}
+#endif
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	if (offset >= size)
+		return size;
+
+	return min(_find_next_bit(addr, size, offset, 1), size);
+}
+EXPORT_SYMBOL(find_next_bit);
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+	if (offset >= size)
+		return size;
+
+	return min(_find_next_bit(addr, size, offset, 0), size);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(find_first_bit);
+#endif
+
+#ifndef find_first_zero_bit
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx] != ULONG_MAX)
+			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(find_first_zero_bit);
+#endif
+
+#ifndef find_last_bit
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
+
+	while (idx--) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(find_last_bit);
+#endif
+
+#ifdef __BIG_ENDIAN
+
+/* include/linux/byteorder does not support "unsigned long" type */
+static inline unsigned long ext2_swab(const unsigned long y)
+{
+#if BITS_PER_LONG == 64
+	return (unsigned long) __swab64((u64) y);
+#elif BITS_PER_LONG == 32
+	return (unsigned long) __swab32((u32) y);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
+static unsigned long _find_next_bit_le(const unsigned long *addr,
+		unsigned long nbits, unsigned long start, bool set)
+{
+	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+
+	/* Handle 1st word. */
+	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+		tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
+		start = round_down(start, BITS_PER_LONG);
+	}
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = set ? addr[start / BITS_PER_LONG]
+			: ~addr[start / BITS_PER_LONG];
+	}
+
+	return start + __ffs(ext2_swab(tmp));
+}
+#endif
+
+#ifndef find_next_zero_bit_le
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	if (offset >= size)
+		return size;
+
+	return min(_find_next_bit_le(addr, size, offset, 0), size);
+}
+EXPORT_SYMBOL(find_next_zero_bit_le);
+#endif
+
+#ifndef find_next_bit_le
+unsigned long find_next_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	if (offset >= size)
+		return size;
+
+	return min(_find_next_bit_le(addr, size, offset, 1), size);
+}
+EXPORT_SYMBOL(find_next_bit_le);
+#endif
+
+#endif /* __BIG_ENDIAN */
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
deleted file mode 100644
index 2087839..0000000
--- a/lib/find_next_bit.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* find_next_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * Copyright (C) 2008 IBM Corporation
- * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
- * (Inspired by David Howell's find_next_bit implementation)
- *
- * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
- * size and improve performance, 2015.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/export.h>
-#include <linux/kernel.h>
-
-#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
-
-#if !defined(find_next_bit) || !defined(find_next_zero_bit)
-static unsigned long _find_next_bit(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, bool set)
-{
-	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
-			: ~addr[start / BITS_PER_LONG];
-
-	/* Handle 1st word. */
-	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
-		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
-		start = round_down(start, BITS_PER_LONG);
-	}
-
-	while (!tmp) {
-		start += BITS_PER_LONG;
-		if (start >= nbits)
-			return nbits;
-
-		tmp = set ? addr[start / BITS_PER_LONG]
-			: ~addr[start / BITS_PER_LONG];
-	}
-
-	return start + __ffs(tmp);
-}
-#endif
-
-#ifndef find_next_bit
-/*
- * Find the next set bit in a memory region.
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-			    unsigned long offset)
-{
-	if (offset >= size)
-		return size;
-
-	return min(_find_next_bit(addr, size, offset, 1), size);
-}
-EXPORT_SYMBOL(find_next_bit);
-#endif
-
-#ifndef find_next_zero_bit
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-				 unsigned long offset)
-{
-	if (offset >= size)
-		return size;
-
-	return min(_find_next_bit(addr, size, offset, 0), size);
-}
-EXPORT_SYMBOL(find_next_zero_bit);
-#endif
-
-#ifndef find_first_bit
-/*
- * Find the first set bit in a memory region.
- */
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
-	unsigned long idx;
-
-	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
-		if (addr[idx])
-			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
-	}
-
-	return size;
-}
-EXPORT_SYMBOL(find_first_bit);
-#endif
-
-#ifndef find_first_zero_bit
-/*
- * Find the first cleared bit in a memory region.
- */
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
-	unsigned long idx;
-
-	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
-		if (addr[idx] != ULONG_MAX)
-			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
-	}
-
-	return size;
-}
-EXPORT_SYMBOL(find_first_zero_bit);
-#endif
-
-#ifndef find_last_bit
-unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
-{
-	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
-
-	while (idx--) {
-		if (addr[idx])
-			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
-	}
-
-	return size;
-}
-EXPORT_SYMBOL(find_last_bit);
-#endif
-
-#ifdef __BIG_ENDIAN
-
-/* include/linux/byteorder does not support "unsigned long" type */
-static inline unsigned long ext2_swab(const unsigned long y)
-{
-#if BITS_PER_LONG == 64
-	return (unsigned long) __swab64((u64) y);
-#elif BITS_PER_LONG == 32
-	return (unsigned long) __swab32((u32) y);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
-#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
-static unsigned long _find_next_bit_le(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, bool set)
-{
-	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
-			: ~addr[start / BITS_PER_LONG];
-
-	/* Handle 1st word. */
-	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
-		tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
-		start = round_down(start, BITS_PER_LONG);
-	}
-
-	while (!tmp) {
-		start += BITS_PER_LONG;
-		if (start >= nbits)
-			return nbits;
-
-		tmp = set ? addr[start / BITS_PER_LONG]
-			: ~addr[start / BITS_PER_LONG];
-	}
-
-	return start + __ffs(ext2_swab(tmp));
-}
-#endif
-
-#ifndef find_next_zero_bit_le
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	if (offset >= size)
-		return size;
-
-	return min(_find_next_bit_le(addr, size, offset, 0), size);
-}
-EXPORT_SYMBOL(find_next_zero_bit_le);
-#endif
-
-#ifndef find_next_bit_le
-unsigned long find_next_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	if (offset >= size)
-		return size;
-
-	return min(_find_next_bit_le(addr, size, offset, 1), size);
-}
-EXPORT_SYMBOL(find_next_bit_le);
-#endif
-
-#endif /* __BIG_ENDIAN */
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-01-31 20:58 [PATCH v2 1/3] lib: find_*_bit reimplementation yury.norov
  2015-01-31 20:58 ` [PATCH v2 2/3] lib: move find_last_bit to lib/find_next_bit.c yury.norov
  2015-01-31 20:58 ` [PATCH v2 3/3] lib: rename lib/find_next_bit.c to lib/find_bit.c yury.norov
@ 2015-02-02  3:17 ` George Spelvin
  2015-02-04 23:07   ` Yury
  2015-02-02 10:43 ` Rasmus Villemoes
  3 siblings, 1 reply; 14+ messages in thread
From: George Spelvin @ 2015-02-02  3:17 UTC (permalink / raw)
  To: akpm, chris, davem, dborkman, hannes, klimov.linux, laijs, linux,
	msalter, takahiro.akashi, tgraf, valentinrothberg, yury.norov
  Cc: linux-kernel, y.norov

Yury Norov <y.norov@samsung.com> wrote:
> New implementations takes less space in source file (see diffstat)
> and in object. For me it's 710 vs 453 bytes of text.
> 
> Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
> Performance tests were ran on userspace with code like this:
> 
>	/* addr[] is filled from /dev/urandom */
>	start = clock();
>	while (ret < nbits)
>		ret = find_next_bit(addr, nbits, ret + 1);
>
>	end = clock();
>	printf("%ld\t", (unsigned long) end - start);

> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
> (for find_next_bit, nbits is 8M, for find_first_bit - 80K)
>
>	find_next_bit:		find_first_bit:
>	new	current		new	current
>	26932	43151		14777	14925
>	26947	43182		14521	15423

I'll look at this more carefully, but one immediate thought is that this
is an unrealistic benchmark.  It will amost never need to look at more
than one word of the array, but real arrays have long runs of zero
bits to skip over.

So the code size is appreciated, but the time benefits may be the result
of you optimizing for the wrong thing.

I'd try filling the array with mostly-identical bits, flipping with odds
of 1/256 or so.

For full generality, I'd test different 1->0 and 0->1 transition
probabilities.  (But powers of two are probably enough for benchmarking.)


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-01-31 20:58 [PATCH v2 1/3] lib: find_*_bit reimplementation yury.norov
                   ` (2 preceding siblings ...)
  2015-02-02  3:17 ` [PATCH v2 1/3] lib: find_*_bit reimplementation George Spelvin
@ 2015-02-02 10:43 ` Rasmus Villemoes
  2015-02-02 11:47   ` George Spelvin
  2015-02-04 22:52   ` Yury
  3 siblings, 2 replies; 14+ messages in thread
From: Rasmus Villemoes @ 2015-02-02 10:43 UTC (permalink / raw)
  To: yury.norov
  Cc: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf,
	linux-kernel, Yury Norov

On Sat, Jan 31 2015, yury.norov@gmail.com wrote:

> From: Yury Norov <y.norov@samsung.com>
>
> New implementations takes less space in source file (see diffstat)
> and in object. For me it's 710 vs 453 bytes of text.
>

New version generally looks good. Please include a summary of the
changes between the versions either below the --- line or in a 0/n cover
letter, especially since you've now expanded the scope of the series.

Comments below.

>
> Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
> Performance tests were ran on userspace with code like this:
>
> 	/* addr[] is filled from /dev/urandom */
> 	start = clock();
> 	while (ret < nbits)
> 		ret = find_next_bit(addr, nbits, ret + 1);
>
> 	end = clock();
> 	printf("%ld\t", (unsigned long) end - start);
>
> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
> (for find_next_bit, nbits is 8M, for find_first_bit - 80K)
>
> 	find_next_bit:		find_first_bit:
> 	new	current		new	current
> 	26932	43151		14777	14925
> 	26947	43182		14521	15423
> 	26507	43824		15053	14705
> 	27329	43759		14473	14777
> 	26895	43367		14847	15023
> 	26990	43693		15103	15163
> 	26775	43299		15067	15232
> 	27282	42752		14544	15121
> 	27504	43088		14644	14858
> 	26761	43856		14699	15193
> 	26692	43075		14781	14681
> 	27137	42969		14451	15061
> 	...			...
>
> find_next_bit performance gain is 35-40%;
> find_first_bit - no measurable difference.
>
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  lib/find_last_bit.c |  31 ++-----
>  lib/find_next_bit.c | 254 +++++++++++++++-------------------------------------
>  2 files changed, 79 insertions(+), 206 deletions(-)
>
> diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
> index 91ca09f..e67e970 100644
> --- a/lib/find_last_bit.c
> +++ b/lib/find_last_bit.c
> @@ -4,44 +4,29 @@
>   * Written by Rusty Russell <rusty@rustcorp.com.au>
>   * (Inspired by David Howell's find_next_bit implementation)
>   *
> + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
> + * size and improve performance, 2015.
> + *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
>   * as published by the Free Software Foundation; either version
>   * 2 of the License, or (at your option) any later version.
>   */
>  
> -#include <linux/bitops.h>

Why do you remove that #include? It is rather important that the header
and implementation don't get out of sync. I know that kernel.h includes
bitops.h, but please don't rely on such things. Quoting SubmitChecklist:

1: If you use a facility then #include the file that defines/declares
   that facility.  Don't depend on other header files pulling in ones
   that you use.


>  #include <linux/export.h>
> -#include <asm/types.h>
> -#include <asm/byteorder.h>

However, getting rid of includes that are no longer needed is certainly
a good thing.

> +#include <linux/kernel.h>
>  
>  #ifndef find_last_bit
>  
>  unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
>  {
> -	unsigned long words;
> -	unsigned long tmp;
> -
> -	/* Start at final word. */
> -	words = size / BITS_PER_LONG;
> -
> -	/* Partial final word? */
> -	if (size & (BITS_PER_LONG-1)) {
> -		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
> -					 - (size & (BITS_PER_LONG-1)))));
> -		if (tmp)
> -			goto found;
> -	}
> +	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
>  
> -	while (words) {
> -		tmp = addr[--words];
> -		if (tmp) {
> -found:
> -			return words * BITS_PER_LONG + __fls(tmp);
> -		}
> +	while (idx--) {
> +		if (addr[idx])
> +			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
>  	}
>  
> -	/* Not found */
>  	return size;
>  }
>  EXPORT_SYMBOL(find_last_bit);
> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
> index 0cbfc0b..ebfb3dc 100644
> --- a/lib/find_next_bit.c
> +++ b/lib/find_next_bit.c
> @@ -3,18 +3,45 @@
>   * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
>   * Written by David Howells (dhowells@redhat.com)
>   *
> + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
> + * size and improve performance, 2015.
> + *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
>   * as published by the Free Software Foundation; either version
>   * 2 of the License, or (at your option) any later version.
>   */
>  
> -#include <linux/bitops.h>
>  #include <linux/export.h>
> -#include <asm/types.h>
> -#include <asm/byteorder.h>
> +#include <linux/kernel.h>

Same as above.

> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
> +
> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
> +static unsigned long _find_next_bit(const unsigned long *addr,
> +		unsigned long nbits, unsigned long start, bool set)
> +{
> +	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
> +		start = round_down(start, BITS_PER_LONG);
> +	}
>  
> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
> +	while (!tmp) {
> +		start += BITS_PER_LONG;
> +		if (start >= nbits)
> +			return nbits;
> +
> +		tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	}
> +
> +	return start + __ffs(tmp);
> +}
> +#endif
>  
>  #ifndef find_next_bit
>  /*
> @@ -23,86 +50,22 @@
>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>  			    unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;

Why can't this ...


> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp &= (~0UL << offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
>  
> -found_first:
> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + __ffs(tmp);
> +	return min(_find_next_bit(addr, size, offset, 1), size);

... and this be part of _find_next_bit? Can find_next_bit not be simply
'return _find_next_bit(addr, size, offset, 1);', and similarly for
find_next_zero_bit? Btw., passing true and false for the boolean
parameter may be a little clearer.

>  }
>  EXPORT_SYMBOL(find_next_bit);
>  #endif
>  
>  #ifndef find_next_zero_bit
> -/*
> - * This implementation of find_{first,next}_zero_bit was stolen from
> - * Linus' asm-alpha/bitops.h.
> - */
>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>  				 unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;

See above.

> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
>  
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + ffz(tmp);
> +	return min(_find_next_bit(addr, size, offset, 0), size);

See above.

>  }
>  EXPORT_SYMBOL(find_next_zero_bit);
>  #endif
> @@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>   */
>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned long idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx])
> +			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
>  	}
> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + __ffs(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_bit);
>  #endif
> @@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
>   */
>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned long idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx] != ULONG_MAX)
> +			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
>  	}
> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) | (~0UL << size);
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + ffz(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_zero_bit);
>  #endif
> @@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>  #ifdef __BIG_ENDIAN
>  
>  /* include/linux/byteorder does not support "unsigned long" type */
> -static inline unsigned long ext2_swabp(const unsigned long * x)
> -{
> -#if BITS_PER_LONG == 64
> -	return (unsigned long) __swab64p((u64 *) x);
> -#elif BITS_PER_LONG == 32
> -	return (unsigned long) __swab32p((u32 *) x);
> -#else
> -#error BITS_PER_LONG not defined
> -#endif
> -}
> -
> -/* include/linux/byteorder doesn't support "unsigned long" type */
>  static inline unsigned long ext2_swab(const unsigned long y)
>  {
>  #if BITS_PER_LONG == 64
> @@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>  #endif
>  }
>  
> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
> +static unsigned long _find_next_bit_le(const unsigned long *addr,
> +		unsigned long nbits, unsigned long start, bool set)
> +{
> +	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
> +		start = round_down(start, BITS_PER_LONG);
> +	}
> +
> +	while (!tmp) {
> +		start += BITS_PER_LONG;
> +		if (start >= nbits)
> +			return nbits;
> +
> +		tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	}
> +
> +	return start + __ffs(ext2_swab(tmp));
> +}
> +#endif
> +
>  #ifndef find_next_zero_bit_le
>  unsigned long find_next_zero_bit_le(const void *addr, unsigned
>  		long size, unsigned long offset)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;

Again, I think this should be moved to the common implementation in
_find_next_bit_le, and similarly for find_next_bit_le below.

> -	p += BITOP_WORD(offset);
> -	size -= result;
> -	offset &= (BITS_PER_LONG - 1UL);
> -	if (offset) {
> -		tmp = ext2_swabp(p++);
> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
>  
> -	while (size & ~(BITS_PER_LONG - 1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle_swap;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = ext2_swabp(p);
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size; /* Nope. Skip ffz */
> -found_middle:
> -	return result + ffz(tmp);
> -
> -found_middle_swap:
> -	return result + ffz(ext2_swab(tmp));
> +	return min(_find_next_bit_le(addr, size, offset, 0), size);
>  }
>  EXPORT_SYMBOL(find_next_zero_bit_le);
>  #endif
> @@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
>  unsigned long find_next_bit_le(const void *addr, unsigned
>  		long size, unsigned long offset)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;
> -	p += BITOP_WORD(offset);
> -	size -= result;
> -	offset &= (BITS_PER_LONG - 1UL);
> -	if (offset) {
> -		tmp = ext2_swabp(p++);
> -		tmp &= (~0UL << offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -
> -	while (size & ~(BITS_PER_LONG - 1)) {
> -		tmp = *(p++);
> -		if (tmp)
> -			goto found_middle_swap;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = ext2_swabp(p);
> -found_first:
> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size; /* Nope. */
> -found_middle:
> -	return result + __ffs(tmp);
>  
> -found_middle_swap:
> -	return result + __ffs(ext2_swab(tmp));
> +	return min(_find_next_bit_le(addr, size, offset, 1), size);
>  }
>  EXPORT_SYMBOL(find_next_bit_le);
>  #endif

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 3/3] lib: rename lib/find_next_bit.c to lib/find_bit.c
  2015-01-31 20:58 ` [PATCH v2 3/3] lib: rename lib/find_next_bit.c to lib/find_bit.c yury.norov
@ 2015-02-02 11:09   ` Rasmus Villemoes
  0 siblings, 0 replies; 14+ messages in thread
From: Rasmus Villemoes @ 2015-02-02 11:09 UTC (permalink / raw)
  To: yury.norov
  Cc: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf,
	linux-kernel

On Sat, Jan 31 2015, yury.norov@gmail.com wrote:

> From: Yury Norov <yury.norov@gmail.com>
>
> This file contains implementation for:
> - find_last_bit;
> - find_first_zero_bit;
> - find_first_bit;
> - find_next_zero_bit;
> - find_next_bit.
>

[and a few _le variants]

> So giving more generic name looks reasonable.

It does. But it is a little annoying that it is not shown as a pure
rename.

> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  lib/Makefile        |   2 +-
>  lib/find_bit.c      | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/find_next_bit.c | 192 ---------------------------------------------------
>  3 files changed, 195 insertions(+), 193 deletions(-)
>  create mode 100644 lib/find_bit.c
>  delete mode 100644 lib/find_next_bit.c
>
> diff --git a/lib/Makefile b/lib/Makefile
> index 13990aa..1cc93f4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -25,7 +25,7 @@ obj-y	+= lockref.o
>  obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
>  	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
>  	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
> -	 bsearch.o find_next_bit.o llist.o memweight.o kfifo.o \
> +	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
>  	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
>  obj-y += string_helpers.o
>  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
> diff --git a/lib/find_bit.c b/lib/find_bit.c
> new file mode 100644
> index 0000000..236a850
> --- /dev/null
> +++ b/lib/find_bit.c
> @@ -0,0 +1,194 @@
> +/* find_bit.c: generic implementation fore:
> + * find_last_bit; find_first_zero_bit; find_first_bit;
> + * find_next_zero_bit; find_next_bit.
> + *

[trivial typo: s/fore/for/]

I _think_ the cause of the 2-line descrepancy is this change in a comment,
but it's hard to tell for sure. There's no simple way of telling whether the
other 192 lines are actually the same or if subtle changes have been
introduced.

This is one reason hard-coding the name of a file inside the file is a
bad idea. I'd suggest tweaking that comment in one of the earlier
patches, for example the one moving find_last_bit to find_next_bit.c,
making sure to remove the filename. Then this patch can be a simple 'git
mv' and the same two-line change of the Makefile.

Rasmus

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-02 10:43 ` Rasmus Villemoes
@ 2015-02-02 11:47   ` George Spelvin
  2015-02-02 12:56     ` Rasmus Villemoes
  2015-02-04 22:52   ` Yury
  1 sibling, 1 reply; 14+ messages in thread
From: George Spelvin @ 2015-02-02 11:47 UTC (permalink / raw)
  To: linux, yury.norov
  Cc: akpm, chris, davem, dborkman, hannes, klimov.linux, laijs,
	linux-kernel, linux, msalter, takahiro.akashi, tgraf,
	valentinrothberg, y.norov

Rasmus Villemoes <linux@rasmusvillemoes.dk> wrote:
> ... and this be part of _find_next_bit? Can find_next_bit not be simply
> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
> find_next_zero_bit? Btw., passing true and false for the boolean
> parameter may be a little clearer.

Looking at the generated code, it would be better to replace the boolean
parameter with 0ul or ~0ul and XOR with it.  The same number of registers,
and saves a conditional branch.

(I was hoping GCC would figure that trick out, but it didn't.)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-02 11:47   ` George Spelvin
@ 2015-02-02 12:56     ` Rasmus Villemoes
  2015-02-04 23:45       ` Yury
  0 siblings, 1 reply; 14+ messages in thread
From: Rasmus Villemoes @ 2015-02-02 12:56 UTC (permalink / raw)
  To: George Spelvin
  Cc: yury.norov, akpm, chris, davem, dborkman, hannes, klimov.linux,
	laijs, linux-kernel, msalter, takahiro.akashi, tgraf,
	valentinrothberg, y.norov

On Mon, Feb 02 2015, "George Spelvin" <linux@horizon.com> wrote:

> Rasmus Villemoes <linux@rasmusvillemoes.dk> wrote:
>> ... and this be part of _find_next_bit? Can find_next_bit not be simply
>> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
>> find_next_zero_bit? Btw., passing true and false for the boolean
>> parameter may be a little clearer.
>
> Looking at the generated code, it would be better to replace the boolean
> parameter with 0ul or ~0ul and XOR with it.  The same number of registers,
> and saves a conditional branch.

Nice trick. When I compiled it, gcc inlined _find_next_bit into both its
callers, making the conditional go away completely. That was with gcc
4.7. When I try with 5.0, I do see _find_next_bit being compiled
separately.

With the proposed change, 4.7 also makes find_next{,_zero}_bit wrappers
for _find_next_bit, further reducing the total size, which is a good
thing. And, if some other version decides to still inline it, it
should then know how to optimize the xor with 0ul or ~0ul just as well
as when the conditional was folded away. 

Yury, please also incorporate this in the next round.

Rasmus


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-02 10:43 ` Rasmus Villemoes
  2015-02-02 11:47   ` George Spelvin
@ 2015-02-04 22:52   ` Yury
  2015-02-05 15:01     ` Rasmus Villemoes
  1 sibling, 1 reply; 14+ messages in thread
From: Yury @ 2015-02-04 22:52 UTC (permalink / raw)
  To: Rasmus Villemoes
  Cc: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf,
	linux-kernel, Yury Norov


On 02.02.2015 13:43, Rasmus Villemoes wrote:
> On Sat, Jan 31 2015, yury.norov@gmail.com wrote:
>
>> From: Yury Norov <y.norov@samsung.com>
>>
>> New implementations takes less space in source file (see diffstat)
>> and in object. For me it's 710 vs 453 bytes of text.
>>
> New version generally looks good. Please include a summary of the
> changes between the versions either below the --- line or in a 0/n cover
> letter, especially since you've now expanded the scope of the series.
>
> Comments below.
>
>> Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
>> Performance tests were ran on userspace with code like this:
>>
>> 	/* addr[] is filled from /dev/urandom */
>> 	start = clock();
>> 	while (ret < nbits)
>> 		ret = find_next_bit(addr, nbits, ret + 1);
>>
>> 	end = clock();
>> 	printf("%ld\t", (unsigned long) end - start);
>>
>> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
>> (for find_next_bit, nbits is 8M, for find_first_bit - 80K)
>>
>> 	find_next_bit:		find_first_bit:
>> 	new	current		new	current
>> 	26932	43151		14777	14925
>> 	26947	43182		14521	15423
>> 	26507	43824		15053	14705
>> 	27329	43759		14473	14777
>> 	26895	43367		14847	15023
>> 	26990	43693		15103	15163
>> 	26775	43299		15067	15232
>> 	27282	42752		14544	15121
>> 	27504	43088		14644	14858
>> 	26761	43856		14699	15193
>> 	26692	43075		14781	14681
>> 	27137	42969		14451	15061
>> 	...			...
>>
>> find_next_bit performance gain is 35-40%;
>> find_first_bit - no measurable difference.
>>
>> Signed-off-by: Yury Norov <yury.norov@gmail.com>
>> ---
>>  lib/find_last_bit.c |  31 ++-----
>>  lib/find_next_bit.c | 254 +++++++++++++++-------------------------------------
>>  2 files changed, 79 insertions(+), 206 deletions(-)
>>
>> diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
>> index 91ca09f..e67e970 100644
>> --- a/lib/find_last_bit.c
>> +++ b/lib/find_last_bit.c
>> @@ -4,44 +4,29 @@
>>   * Written by Rusty Russell <rusty@rustcorp.com.au>
>>   * (Inspired by David Howell's find_next_bit implementation)
>>   *
>> + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
>> + * size and improve performance, 2015.
>> + *
>>   * This program is free software; you can redistribute it and/or
>>   * modify it under the terms of the GNU General Public License
>>   * as published by the Free Software Foundation; either version
>>   * 2 of the License, or (at your option) any later version.
>>   */
>>  
>> -#include <linux/bitops.h>
> Why do you remove that #include? It is rather important that the header
> and implementation don't get out of sync. I know that kernel.h includes
> bitops.h, but please don't rely on such things. Quoting SubmitChecklist:
>
> 1: If you use a facility then #include the file that defines/declares
>    that facility.  Don't depend on other header files pulling in ones
>    that you use.
>
>
>>  #include <linux/export.h>
>> -#include <asm/types.h>
>> -#include <asm/byteorder.h>
> However, getting rid of includes that are no longer needed is certainly
> a good thing.
Yes, linux/bitops.h are to get back.
>> +#include <linux/kernel.h>
>>  
>>  #ifndef find_last_bit
>>  
>>  unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	unsigned long words;
>> -	unsigned long tmp;
>> -
>> -	/* Start at final word. */
>> -	words = size / BITS_PER_LONG;
>> -
>> -	/* Partial final word? */
>> -	if (size & (BITS_PER_LONG-1)) {
>> -		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
>> -					 - (size & (BITS_PER_LONG-1)))));
>> -		if (tmp)
>> -			goto found;
>> -	}
>> +	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
>>  
>> -	while (words) {
>> -		tmp = addr[--words];
>> -		if (tmp) {
>> -found:
>> -			return words * BITS_PER_LONG + __fls(tmp);
>> -		}
>> +	while (idx--) {
>> +		if (addr[idx])
>> +			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
>>  	}
>>  
>> -	/* Not found */
>>  	return size;
>>  }
>>  EXPORT_SYMBOL(find_last_bit);
>> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
>> index 0cbfc0b..ebfb3dc 100644
>> --- a/lib/find_next_bit.c
>> +++ b/lib/find_next_bit.c
>> @@ -3,18 +3,45 @@
>>   * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
>>   * Written by David Howells (dhowells@redhat.com)
>>   *
>> + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
>> + * size and improve performance, 2015.
>> + *
>>   * This program is free software; you can redistribute it and/or
>>   * modify it under the terms of the GNU General Public License
>>   * as published by the Free Software Foundation; either version
>>   * 2 of the License, or (at your option) any later version.
>>   */
>>  
>> -#include <linux/bitops.h>
>>  #include <linux/export.h>
>> -#include <asm/types.h>
>> -#include <asm/byteorder.h>
>> +#include <linux/kernel.h>
> Same as above.
>
>> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
>> +
>> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
>> +static unsigned long _find_next_bit(const unsigned long *addr,
>> +		unsigned long nbits, unsigned long start, bool set)
>> +{
>> +	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +
>> +	/* Handle 1st word. */
>> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
>> +		start = round_down(start, BITS_PER_LONG);
>> +	}
>>  
>> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
>> +	while (!tmp) {
>> +		start += BITS_PER_LONG;
>> +		if (start >= nbits)
>> +			return nbits;
>> +
>> +		tmp = set ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +	}
>> +
>> +	return start + __ffs(tmp);
>> +}
>> +#endif
>>  
>>  #ifndef find_next_bit
>>  /*
>> @@ -23,86 +50,22 @@
>>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>>  			    unsigned long offset)
>>  {
>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -	unsigned long tmp;
>> -
>>  	if (offset >= size)
>>  		return size;
> Why can't this ...
>
>
>> -	size -= result;
>> -	offset %= BITS_PER_LONG;
>> -	if (offset) {
>> -		tmp = *(p++);
>> -		tmp &= (~0UL << offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if ((tmp = *(p++)))
>> -			goto found_middle;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = *p;
>>  
>> -found_first:
>> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size;	/* Nope. */
>> -found_middle:
>> -	return result + __ffs(tmp);
>> +	return min(_find_next_bit(addr, size, offset, 1), size);
> ... and this be part of _find_next_bit? Can find_next_bit not be simply
> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
> find_next_zero_bit? Btw., passing true and false for the boolean
> parameter may be a little clearer.
I moved size checkers out of '_find_next_bit' to let user call it from his code
if he knows for sure that size/offset pair is valid. This may help save a couple
of clocks. I think, I'll walk over the code to find how many such places we have.
If not too much / not in critical paths, checks may be moved into the function.

Or maybe leave as is and place some comment for future?...
>
>>  }
>>  EXPORT_SYMBOL(find_next_bit);
>>  #endif
>>  
>>  #ifndef find_next_zero_bit
>> -/*
>> - * This implementation of find_{first,next}_zero_bit was stolen from
>> - * Linus' asm-alpha/bitops.h.
>> - */
>>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>>  				 unsigned long offset)
>>  {
>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -	unsigned long tmp;
>> -
>>  	if (offset >= size)
>>  		return size;
> See above.
>
>> -	size -= result;
>> -	offset %= BITS_PER_LONG;
>> -	if (offset) {
>> -		tmp = *(p++);
>> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (~tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found_middle;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = *p;
>>  
>> -found_first:
>> -	tmp |= ~0UL << size;
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size;	/* Nope. */
>> -found_middle:
>> -	return result + ffz(tmp);
>> +	return min(_find_next_bit(addr, size, offset, 0), size);
> See above.
>
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit);
>>  #endif
>> @@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>>   */
>>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = 0;
>> -	unsigned long tmp;
>> +	unsigned long idx;
>>  
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if ((tmp = *(p++)))
>> -			goto found;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +		if (addr[idx])
>> +			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
>>  	}
>> -	if (!size)
>> -		return result;
>>  
>> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size;	/* Nope. */
>> -found:
>> -	return result + __ffs(tmp);
>> +	return size;
>>  }
>>  EXPORT_SYMBOL(find_first_bit);
>>  #endif
>> @@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
>>   */
>>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = 0;
>> -	unsigned long tmp;
>> +	unsigned long idx;
>>  
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +		if (addr[idx] != ULONG_MAX)
>> +			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
>>  	}
>> -	if (!size)
>> -		return result;
>>  
>> -	tmp = (*p) | (~0UL << size);
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size;	/* Nope. */
>> -found:
>> -	return result + ffz(tmp);
>> +	return size;
>>  }
>>  EXPORT_SYMBOL(find_first_zero_bit);
>>  #endif
>> @@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>>  #ifdef __BIG_ENDIAN
>>  
>>  /* include/linux/byteorder does not support "unsigned long" type */
>> -static inline unsigned long ext2_swabp(const unsigned long * x)
>> -{
>> -#if BITS_PER_LONG == 64
>> -	return (unsigned long) __swab64p((u64 *) x);
>> -#elif BITS_PER_LONG == 32
>> -	return (unsigned long) __swab32p((u32 *) x);
>> -#else
>> -#error BITS_PER_LONG not defined
>> -#endif
>> -}
>> -
>> -/* include/linux/byteorder doesn't support "unsigned long" type */
>>  static inline unsigned long ext2_swab(const unsigned long y)
>>  {
>>  #if BITS_PER_LONG == 64
>> @@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>>  #endif
>>  }
>>  
>> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
>> +static unsigned long _find_next_bit_le(const unsigned long *addr,
>> +		unsigned long nbits, unsigned long start, bool set)
>> +{
>> +	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +
>> +	/* Handle 1st word. */
>> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +		tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
>> +		start = round_down(start, BITS_PER_LONG);
>> +	}
>> +
>> +	while (!tmp) {
>> +		start += BITS_PER_LONG;
>> +		if (start >= nbits)
>> +			return nbits;
>> +
>> +		tmp = set ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +	}
>> +
>> +	return start + __ffs(ext2_swab(tmp));
>> +}
>> +#endif
>> +
>>  #ifndef find_next_zero_bit_le
>>  unsigned long find_next_zero_bit_le(const void *addr, unsigned
>>  		long size, unsigned long offset)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -	unsigned long tmp;
>> -
>>  	if (offset >= size)
>>  		return size;
> Again, I think this should be moved to the common implementation in
> _find_next_bit_le, and similarly for find_next_bit_le below.
>
>> -	p += BITOP_WORD(offset);
>> -	size -= result;
>> -	offset &= (BITS_PER_LONG - 1UL);
>> -	if (offset) {
>> -		tmp = ext2_swabp(p++);
>> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (~tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>>  
>> -	while (size & ~(BITS_PER_LONG - 1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found_middle_swap;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = ext2_swabp(p);
>> -found_first:
>> -	tmp |= ~0UL << size;
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size; /* Nope. Skip ffz */
>> -found_middle:
>> -	return result + ffz(tmp);
>> -
>> -found_middle_swap:
>> -	return result + ffz(ext2_swab(tmp));
>> +	return min(_find_next_bit_le(addr, size, offset, 0), size);
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit_le);
>>  #endif
>> @@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
>>  unsigned long find_next_bit_le(const void *addr, unsigned
>>  		long size, unsigned long offset)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -	unsigned long tmp;
>> -
>>  	if (offset >= size)
>>  		return size;
>> -	p += BITOP_WORD(offset);
>> -	size -= result;
>> -	offset &= (BITS_PER_LONG - 1UL);
>> -	if (offset) {
>> -		tmp = ext2_swabp(p++);
>> -		tmp &= (~0UL << offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -
>> -	while (size & ~(BITS_PER_LONG - 1)) {
>> -		tmp = *(p++);
>> -		if (tmp)
>> -			goto found_middle_swap;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = ext2_swabp(p);
>> -found_first:
>> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size; /* Nope. */
>> -found_middle:
>> -	return result + __ffs(tmp);
>>  
>> -found_middle_swap:
>> -	return result + __ffs(ext2_swab(tmp));
>> +	return min(_find_next_bit_le(addr, size, offset, 1), size);
>>  }
>>  EXPORT_SYMBOL(find_next_bit_le);
>>  #endif


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-02  3:17 ` [PATCH v2 1/3] lib: find_*_bit reimplementation George Spelvin
@ 2015-02-04 23:07   ` Yury
  0 siblings, 0 replies; 14+ messages in thread
From: Yury @ 2015-02-04 23:07 UTC (permalink / raw)
  To: George Spelvin, akpm, chris, davem, dborkman, hannes,
	klimov.linux, laijs, msalter, takahiro.akashi, tgraf,
	valentinrothberg
  Cc: linux-kernel, y.norov, Yury Norov


On 02.02.2015 06:17, George Spelvin wrote:
> Yury Norov <y.norov@samsung.com> wrote:
>> New implementations takes less space in source file (see diffstat)
>> and in object. For me it's 710 vs 453 bytes of text.
>>
>> Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
>> Performance tests were ran on userspace with code like this:
>>
>> 	/* addr[] is filled from /dev/urandom */
>> 	start = clock();
>> 	while (ret < nbits)
>> 		ret = find_next_bit(addr, nbits, ret + 1);
>>
>> 	end = clock();
>> 	printf("%ld\t", (unsigned long) end - start);
>> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
>> (for find_next_bit, nbits is 8M, for find_first_bit - 80K)
>>
>> 	find_next_bit:		find_first_bit:
>> 	new	current		new	current
>> 	26932	43151		14777	14925
>> 	26947	43182		14521	15423
> I'll look at this more carefully, but one immediate thought is that this
> is an unrealistic benchmark.  It will amost never need to look at more
> than one word of the array, but real arrays have long runs of zero
> bits to skip over.
>
> So the code size is appreciated, but the time benefits may be the result
> of you optimizing for the wrong thing.
>
> I'd try filling the array with mostly-identical bits, flipping with odds
> of 1/256 or so.
>
> For full generality, I'd test different 1->0 and 0->1 transition
> probabilities.  (But powers of two are probably enough for benchmarking.)
>
I think, test with random values represents at least one situation: well-fragmented memory
after long time work. (This is what I really have in my project.) In other hand, if long zero runs
is a typical behavior for one's system, it's a good opportunity for improvements, I think.
Anyway, the idea of testing find_bit on a long runs is good. Thank you.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-02 12:56     ` Rasmus Villemoes
@ 2015-02-04 23:45       ` Yury
  2015-02-05 14:51         ` Rasmus Villemoes
  0 siblings, 1 reply; 14+ messages in thread
From: Yury @ 2015-02-04 23:45 UTC (permalink / raw)
  To: Rasmus Villemoes, George Spelvin
  Cc: akpm, chris, davem, dborkman, hannes, klimov.linux, laijs,
	linux-kernel, msalter, takahiro.akashi, tgraf, valentinrothberg,
	Yury Norov


On 02.02.2015 15:56, Rasmus Villemoes wrote:
> On Mon, Feb 02 2015, "George Spelvin" <linux@horizon.com> wrote:
>
>> Rasmus Villemoes <linux@rasmusvillemoes.dk> wrote:
>>> ... and this be part of _find_next_bit? Can find_next_bit not be simply
>>> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
>>> find_next_zero_bit? Btw., passing true and false for the boolean
>>> parameter may be a little clearer.
>> Looking at the generated code, it would be better to replace the boolean
>> parameter with 0ul or ~0ul and XOR with it.  The same number of registers,
>> and saves a conditional branch.
> Nice trick. When I compiled it, gcc inlined _find_next_bit into both its
> callers, making the conditional go away completely. That was with gcc
> 4.7. When I try with 5.0, I do see _find_next_bit being compiled
> separately.
>
> With the proposed change, 4.7 also makes find_next{,_zero}_bit wrappers
> for _find_next_bit, further reducing the total size, which is a good
> thing. And, if some other version decides to still inline it, it
> should then know how to optimize the xor with 0ul or ~0ul just as well
> as when the conditional was folded away. 
>
> Yury, please also incorporate this in the next round.
>
> Rasmus
>
Ok.
What are you thinking about joining _find_next_bit and _find_next_bit_le?
They really differ in 2 lines.  It's generally good to remove duplications,
and it may decrease text size for big-endian machines. But it definitely
doesn't make code easier for reading, and maybe affects performance
after the optimization suggested by George...

(I didn't test it yet)

 29 #if !defined(find_next_bit) || !defined(find_next_zero_bit) \
 30         || (defined(BIG_ENDIAN) && \
 31                 (!defined(find_next_bit_le) || !defined(find_next_zero_bit_le)))
 32 static unsigned long _find_next_bit(const unsigned long *addr,
 33                 unsigned long nbits, unsigned long start, unsigned long flags)
 34 {
 35         unsigned long xor_mask = (flags & SET) ? 0UL : ULONG_MAX;
 36         unsigned long tmp = addr[start / BITS_PER_LONG] ^ xor_mask;
 37 
 38         /* Handle 1st word. */
 39         if (!IS_ALIGNED(start, BITS_PER_LONG)) {
 40 #ifdef BIG_ENDIAN
 41                 if (flags & LE)
 42                         tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
 43                 else
 44 #endif
 45                         tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
 46 
 47                 start = round_down(start, BITS_PER_LONG);
 48         }
 49 
 50         while (!tmp) {
 51                 start += BITS_PER_LONG;
 52                 if (start >= nbits)
 53                         return nbits;
 54 
 55                 tmp = addr[start / BITS_PER_LONG] ^ xor_mask;
 56         }
 57 
 58 #ifdef BIG_ENDIAN
 59         if (flags & LE)
 60                 return start + __ffs(ext2_swab(tmp));
 61 
 62 #endif
 63         return start + __ffs(tmp);
 64 }
 65 #endif



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-04 23:45       ` Yury
@ 2015-02-05 14:51         ` Rasmus Villemoes
  0 siblings, 0 replies; 14+ messages in thread
From: Rasmus Villemoes @ 2015-02-05 14:51 UTC (permalink / raw)
  To: Yury
  Cc: George Spelvin, akpm, chris, davem, dborkman, hannes,
	klimov.linux, laijs, linux-kernel, msalter, takahiro.akashi,
	tgraf, valentinrothberg, Yury Norov

On Thu, Feb 05 2015, Yury <yury.norov@gmail.com> wrote:

> On 02.02.2015 15:56, Rasmus Villemoes wrote:
>> On Mon, Feb 02 2015, "George Spelvin" <linux@horizon.com> wrote:
>>
>>> Rasmus Villemoes <linux@rasmusvillemoes.dk> wrote:
>>>> ... and this be part of _find_next_bit? Can find_next_bit not be simply
>>>> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
>>>> find_next_zero_bit? Btw., passing true and false for the boolean
>>>> parameter may be a little clearer.
>>> Looking at the generated code, it would be better to replace the boolean
>>> parameter with 0ul or ~0ul and XOR with it.  The same number of registers,
>>> and saves a conditional branch.
>> Nice trick. When I compiled it, gcc inlined _find_next_bit into both its
>> callers, making the conditional go away completely. That was with gcc
>> 4.7. When I try with 5.0, I do see _find_next_bit being compiled
>> separately.
>>
>> With the proposed change, 4.7 also makes find_next{,_zero}_bit wrappers
>> for _find_next_bit, further reducing the total size, which is a good
>> thing. And, if some other version decides to still inline it, it
>> should then know how to optimize the xor with 0ul or ~0ul just as well
>> as when the conditional was folded away. 
>>
>> Yury, please also incorporate this in the next round.
>>
>> Rasmus
>>
> Ok.

Good.

> What are you thinking about joining _find_next_bit and
> _find_next_bit_le?

I don't think that should be done right now, if at all. The series is
pretty close to getting my Reviewed-by; I'd prefer not to start over.

Rasmus

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
  2015-02-04 22:52   ` Yury
@ 2015-02-05 15:01     ` Rasmus Villemoes
  0 siblings, 0 replies; 14+ messages in thread
From: Rasmus Villemoes @ 2015-02-05 15:01 UTC (permalink / raw)
  To: Yury
  Cc: klimov.linux, davem, akpm, hannes, dborkman, laijs,
	takahiro.akashi, valentinrothberg, linux, msalter, chris, tgraf,
	linux-kernel, Yury Norov

On Wed, Feb 04 2015, Yury <yury.norov@gmail.com> wrote:

> On 02.02.2015 13:43, Rasmus Villemoes wrote:
>>> @@ -23,86 +50,22 @@
>>>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>>>  			    unsigned long offset)
>>>  {
>>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>>> -	unsigned long tmp;
>>> -
>>>  	if (offset >= size)
>>>  		return size;
>> Why can't this ...
>>
>>
>>> -	size -= result;
>>> -	offset %= BITS_PER_LONG;
>>> -	if (offset) {
>>> -		tmp = *(p++);
>>> -		tmp &= (~0UL << offset);
>>> -		if (size < BITS_PER_LONG)
>>> -			goto found_first;
>>> -		if (tmp)
>>> -			goto found_middle;
>>> -		size -= BITS_PER_LONG;
>>> -		result += BITS_PER_LONG;
>>> -	}
>>> -	while (size & ~(BITS_PER_LONG-1)) {
>>> -		if ((tmp = *(p++)))
>>> -			goto found_middle;
>>> -		result += BITS_PER_LONG;
>>> -		size -= BITS_PER_LONG;
>>> -	}
>>> -	if (!size)
>>> -		return result;
>>> -	tmp = *p;
>>>  
>>> -found_first:
>>> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
>>> -	if (tmp == 0UL)		/* Are any bits set? */
>>> -		return result + size;	/* Nope. */
>>> -found_middle:
>>> -	return result + __ffs(tmp);
>>> +	return min(_find_next_bit(addr, size, offset, 1), size);
>> ... and this be part of _find_next_bit? Can find_next_bit not be simply
>> 'return _find_next_bit(addr, size, offset, 1);', and similarly for
>> find_next_zero_bit? Btw., passing true and false for the boolean
>> parameter may be a little clearer.
> I moved size checkers out of '_find_next_bit' to let user call it from his code
> if he knows for sure that size/offset pair is valid. This may help save a couple
> of clocks. I think, I'll walk over the code to find how many such places we have.
> If not too much / not in critical paths, checks may be moved into the function.

But _find_next_bit is static, so outsiders can't call it... The branches
are easily predicted and hence almost free, so I think it's better to do
the code deduplication and move the bounds checking inside
_find_next_bit, so that find_next_bit is literally just 'return
_find_next_bit(addr, size, offset, 0ul);' and find_next_zero_bit is
'return _find_next_bit(addr, size, offset, ~0ul);'.

Rasmus

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/3] lib: find_*_bit reimplementation
@ 2015-02-05 23:07 Alexey Klimov
  0 siblings, 0 replies; 14+ messages in thread
From: Alexey Klimov @ 2015-02-05 23:07 UTC (permalink / raw)
  To: Yury Norov; +Cc: Rasmus Villemoes, linux, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 862 bytes --]

On Mon, Feb 2, 2015 at 3:45 PM, Yury Norov <yury.norov@gmail.com> wrote:
> Alexey,
>
> Yes, ARM has it's own implementation for subj. If you're interested in
> testing
> my patch on your odroid, try this. (Sorry, I have to attach the patch due to
> restrictions on mail agent at office).

Hi Yury,

(please don't drop people from cc; restored mail list cc; re-attach
patch for testing)

As advised please include email [PATCH 0/3] with descriptions and
maybe insert your patch-for-testing there that makes ARM arch to use
generic find_*_bit functions instead of platform ones.

I turn kernel to use generic find_bit and friends functions
implementation on ARMv7 and boot-tested your patch on odroid-xu3
(ARMv7 SoC). Boots and works fine. So if you need my tested-by here it
is:

Tested-by: Alexey Klimov <klimov.linux@gmail.com>

-- 
Best regards,
Klimov Alexey

[-- Attachment #2: 0001-arm-lib-turn-kernel-to-use-generic-implementation-of.patch --]
[-- Type: text/x-patch, Size: 3389 bytes --]

From 8e17e77e7b20874fea6ac38c5d3102ed56e014c7 Mon Sep 17 00:00:00 2001
From: Yury Norov <y.norov@samsung.com>
Date: Mon, 2 Feb 2015 15:11:07 +0300
Subject: [PATCH] arm: lib: disable platform implementation of 'find_*_bit'

Alexey, 

ARM has it's own implementation for subj. If you're interested in testing
my patch on your odroid, try this.

Best regards,
Yury Norov.
---
 arch/arm/include/asm/bitops.h   | 19 -------------------
 arch/arm/kernel/armksyms.c      | 11 -----------
 arch/arm/lib/Makefile           |  2 +-
 include/asm-generic/bitops/le.h |  1 +
 4 files changed, 2 insertions(+), 31 deletions(-)

diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index 5638099..e0611d1 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -192,25 +192,6 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset);
 #define test_and_clear_bit(nr,p)	ATOMIC_BITOP(test_and_clear_bit,nr,p)
 #define test_and_change_bit(nr,p)	ATOMIC_BITOP(test_and_change_bit,nr,p)
 
-#ifndef __ARMEB__
-/*
- * These are the little endian, atomic definitions.
- */
-#define find_first_zero_bit(p,sz)	_find_first_zero_bit_le(p,sz)
-#define find_next_zero_bit(p,sz,off)	_find_next_zero_bit_le(p,sz,off)
-#define find_first_bit(p,sz)		_find_first_bit_le(p,sz)
-#define find_next_bit(p,sz,off)		_find_next_bit_le(p,sz,off)
-
-#else
-/*
- * These are the big endian, atomic definitions.
- */
-#define find_first_zero_bit(p,sz)	_find_first_zero_bit_be(p,sz)
-#define find_next_zero_bit(p,sz,off)	_find_next_zero_bit_be(p,sz,off)
-#define find_first_bit(p,sz)		_find_first_bit_be(p,sz)
-#define find_next_bit(p,sz,off)		_find_next_bit_be(p,sz,off)
-
-#endif
 
 #if __LINUX_ARM_ARCH__ < 5
 
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671c..22e8748 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -146,17 +146,6 @@ EXPORT_SYMBOL(_clear_bit);
 EXPORT_SYMBOL(_test_and_clear_bit);
 EXPORT_SYMBOL(_change_bit);
 EXPORT_SYMBOL(_test_and_change_bit);
-EXPORT_SYMBOL(_find_first_zero_bit_le);
-EXPORT_SYMBOL(_find_next_zero_bit_le);
-EXPORT_SYMBOL(_find_first_bit_le);
-EXPORT_SYMBOL(_find_next_bit_le);
-
-#ifdef __ARMEB__
-EXPORT_SYMBOL(_find_first_zero_bit_be);
-EXPORT_SYMBOL(_find_next_zero_bit_be);
-EXPORT_SYMBOL(_find_first_bit_be);
-EXPORT_SYMBOL(_find_next_bit_be);
-#endif
 
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_OLD_MCOUNT
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 0573faa..de369aa 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,7 +6,7 @@
 
 lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
-		   delay.o delay-loop.o findbit.o memchr.o memcpy.o   \
+		   delay.o delay-loop.o memchr.o memcpy.o	      \
 		   memmove.o memset.o memzero.o setbit.o              \
 		   strchr.o strrchr.o                                 \
 		   testchangebit.o testclearbit.o testsetbit.o        \
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 6173154..9a8798f 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -2,6 +2,7 @@
 #define _ASM_GENERIC_BITOPS_LE_H_
 
 #include <asm/types.h>
+#include <asm-generic/bitops/find.h>
 #include <asm/byteorder.h>
 
 #if defined(__LITTLE_ENDIAN)
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-02-05 23:07 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-31 20:58 [PATCH v2 1/3] lib: find_*_bit reimplementation yury.norov
2015-01-31 20:58 ` [PATCH v2 2/3] lib: move find_last_bit to lib/find_next_bit.c yury.norov
2015-01-31 20:58 ` [PATCH v2 3/3] lib: rename lib/find_next_bit.c to lib/find_bit.c yury.norov
2015-02-02 11:09   ` Rasmus Villemoes
2015-02-02  3:17 ` [PATCH v2 1/3] lib: find_*_bit reimplementation George Spelvin
2015-02-04 23:07   ` Yury
2015-02-02 10:43 ` Rasmus Villemoes
2015-02-02 11:47   ` George Spelvin
2015-02-02 12:56     ` Rasmus Villemoes
2015-02-04 23:45       ` Yury
2015-02-05 14:51         ` Rasmus Villemoes
2015-02-04 22:52   ` Yury
2015-02-05 15:01     ` Rasmus Villemoes
2015-02-05 23:07 Alexey Klimov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).