All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] Bitmap optimisations
@ 2017-06-28 15:32 Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 1/4] test_bitmap: Add optimisation tests Matthew Wilcox
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Matthew Wilcox @ 2017-06-28 15:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, Martin Schwidefsky, Rasmus Villemoes, Matthew Wilcox

From: Matthew Wilcox <mawilcox@microsoft.com>

These three bitmap patches use more efficient specialisations when the
compiler can figure out that it's safe to do so.  Thanks to Rasmus'
eagle eyes, a nasty bug in v1 was avoided, and I've added a test case
which would have caught it.

Matthew Wilcox (4):
  test_bitmap: Add optimisation tests
  bitmap: Optimise bitmap_set and bitmap_clear of a single bit
  Turn bitmap_set and bitmap_clear into memset when possible
  bitmap: Use memcmp optimisation in more situations

 include/linux/bitmap.h | 33 +++++++++++++++++++++++++++------
 lib/bitmap.c           |  8 ++++----
 lib/test_bitmap.c      | 29 +++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 10 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/4] test_bitmap: Add optimisation tests
  2017-06-28 15:32 [PATCH v2 0/4] Bitmap optimisations Matthew Wilcox
@ 2017-06-28 15:32 ` Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 2/4] bitmap: Optimise bitmap_set and bitmap_clear of a single bit Matthew Wilcox
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox @ 2017-06-28 15:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, Martin Schwidefsky, Rasmus Villemoes, Matthew Wilcox

From: Matthew Wilcox <mawilcox@microsoft.com>

This version of the test is actually a no-op; the next patch will
enable it.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/test_bitmap.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index e2cbd43d193c..252d3bddbe7d 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -333,10 +333,42 @@ static void __init test_bitmap_u32_array_conversions(void)
 	}
 }
 
+#define __bitmap_set(a, b, c)	bitmap_set(a, b, c)
+#define __bitmap_clear(a, b, c)	bitmap_clear(a, b, c)
+
+static void noinline __init test_mem_optimisations(void)
+{
+	DECLARE_BITMAP(bmap1, 1024);
+	DECLARE_BITMAP(bmap2, 1024);
+	unsigned int start, nbits;
+
+	for (start = 0; start < 1024; start += 8) {
+		memset(bmap1, 0x5a, sizeof(bmap1));
+		memset(bmap2, 0x5a, sizeof(bmap2));
+		for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+			bitmap_set(bmap1, start, nbits);
+			__bitmap_set(bmap2, start, nbits);
+			if (!bitmap_equal(bmap1, bmap2, 1024))
+				printk("set not equal %d %d\n", start, nbits);
+			if (!__bitmap_equal(bmap1, bmap2, 1024))
+				printk("set not __equal %d %d\n", start, nbits);
+
+			bitmap_clear(bmap1, start, nbits);
+			__bitmap_clear(bmap2, start, nbits);
+			if (!bitmap_equal(bmap1, bmap2, 1024))
+				printk("clear not equal %d %d\n", start, nbits);
+			if (!__bitmap_equal(bmap1, bmap2, 1024))
+				printk("clear not __equal %d %d\n", start,
+									nbits);
+		}
+	}
+}
+
 static int __init test_bitmap_init(void)
 {
 	test_zero_fill_copy();
 	test_bitmap_u32_array_conversions();
+	test_mem_optimisations();
 
 	if (failed_tests == 0)
 		pr_info("all %u tests passed\n", total_tests);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/4] bitmap: Optimise bitmap_set and bitmap_clear of a single bit
  2017-06-28 15:32 [PATCH v2 0/4] Bitmap optimisations Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 1/4] test_bitmap: Add optimisation tests Matthew Wilcox
@ 2017-06-28 15:32 ` Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 3/4] Turn bitmap_set and bitmap_clear into memset when possible Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 4/4] bitmap: Use memcmp optimisation in more situations Matthew Wilcox
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox @ 2017-06-28 15:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, Martin Schwidefsky, Rasmus Villemoes, Matthew Wilcox

From: Matthew Wilcox <mawilcox@microsoft.com>

We have eight users calling bitmap_clear for a single bit and seventeen
calling bitmap_set for a single bit.  Rather than fix all of them to call
__clear_bit or __set_bit, turn bitmap_clear and bitmap_set into inline
functions and make this special case efficient.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 include/linux/bitmap.h | 23 ++++++++++++++++++++---
 lib/bitmap.c           |  8 ++++----
 lib/test_bitmap.c      |  3 ---
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3b77588a9360..4e0f0c8167af 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-
-extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
 						    unsigned long size,
@@ -315,6 +314,24 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int
 	return __bitmap_weight(src, nbits);
 }
 
+static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__set_bit(start, map);
+	else
+		__bitmap_set(map, start, nbits);
+}
+
+static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
 				unsigned int shift, int nbits)
 {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 08c6ef3a2b6f..9a532805364b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -251,7 +251,7 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, unsigned int start, int len)
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -270,9 +270,9 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
 		*p |= mask_to_set;
 	}
 }
-EXPORT_SYMBOL(bitmap_set);
+EXPORT_SYMBOL(__bitmap_set);
 
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -291,7 +291,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len)
 		*p &= ~mask_to_clear;
 	}
 }
-EXPORT_SYMBOL(bitmap_clear);
+EXPORT_SYMBOL(__bitmap_clear);
 
 /**
  * bitmap_find_next_zero_area_off - find a contiguous aligned zero area
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 252d3bddbe7d..2526a2975c51 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -333,9 +333,6 @@ static void __init test_bitmap_u32_array_conversions(void)
 	}
 }
 
-#define __bitmap_set(a, b, c)	bitmap_set(a, b, c)
-#define __bitmap_clear(a, b, c)	bitmap_clear(a, b, c)
-
 static void noinline __init test_mem_optimisations(void)
 {
 	DECLARE_BITMAP(bmap1, 1024);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/4] Turn bitmap_set and bitmap_clear into memset when possible
  2017-06-28 15:32 [PATCH v2 0/4] Bitmap optimisations Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 1/4] test_bitmap: Add optimisation tests Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 2/4] bitmap: Optimise bitmap_set and bitmap_clear of a single bit Matthew Wilcox
@ 2017-06-28 15:32 ` Matthew Wilcox
  2017-06-28 15:32 ` [PATCH v2 4/4] bitmap: Use memcmp optimisation in more situations Matthew Wilcox
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox @ 2017-06-28 15:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, Martin Schwidefsky, Rasmus Villemoes, Matthew Wilcox

From: Matthew Wilcox <mawilcox@microsoft.com>

Several callers have constant 'start' and an 'nbits' that is a multiple of
8, so we can turn them into calls to memset.  We don't need the entirety
of 'start' and 'nbits' to  be constant, we just need to know whether
they're divisible by 8.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 include/linux/bitmap.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 4e0f0c8167af..c04c9d155e59 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -319,6 +319,9 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__set_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0xff, nbits / 8);
 	else
 		__bitmap_set(map, start, nbits);
 }
@@ -328,6 +331,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__clear_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
 	else
 		__bitmap_clear(map, start, nbits);
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 4/4] bitmap: Use memcmp optimisation in more situations
  2017-06-28 15:32 [PATCH v2 0/4] Bitmap optimisations Matthew Wilcox
                   ` (2 preceding siblings ...)
  2017-06-28 15:32 ` [PATCH v2 3/4] Turn bitmap_set and bitmap_clear into memset when possible Matthew Wilcox
@ 2017-06-28 15:32 ` Matthew Wilcox
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox @ 2017-06-28 15:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, Martin Schwidefsky, Rasmus Villemoes, Matthew Wilcox

From: Matthew Wilcox <mawilcox@microsoft.com>

Commit 7dd968163f ("bitmap: bitmap_equal memcmp optimization") was
rather more restrictive than necessary; we can use memcmp() to implement
bitmap_equal() as long as the number of bits can be proved to be a
multiple of 8.  And architectures other than s390 may be able to make
good use of this optimisation.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 include/linux/bitmap.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index c04c9d155e59..5797ca6fdfe2 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -266,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1,
 {
 	if (small_const_nbits(nbits))
 		return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
-#ifdef CONFIG_S390
-	if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+	if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
 		return !memcmp(src1, src2, nbits / 8);
-#endif
 	return __bitmap_equal(src1, src2, nbits);
 }
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-06-28 15:33 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-28 15:32 [PATCH v2 0/4] Bitmap optimisations Matthew Wilcox
2017-06-28 15:32 ` [PATCH v2 1/4] test_bitmap: Add optimisation tests Matthew Wilcox
2017-06-28 15:32 ` [PATCH v2 2/4] bitmap: Optimise bitmap_set and bitmap_clear of a single bit Matthew Wilcox
2017-06-28 15:32 ` [PATCH v2 3/4] Turn bitmap_set and bitmap_clear into memset when possible Matthew Wilcox
2017-06-28 15:32 ` [PATCH v2 4/4] bitmap: Use memcmp optimisation in more situations Matthew Wilcox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.