linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v20 3/7 RESEND] xbitmap: add more operations
@ 2017-12-21  2:30 Wei Wang
  2017-12-21 14:18 ` Matthew Wilcox
  2017-12-21 21:03 ` Matthew Wilcox
  0 siblings, 2 replies; 13+ messages in thread
From: Wei Wang @ 2017-12-21  2:30 UTC (permalink / raw)
  To: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel

This patch adds support to find next 1 or 0 bit in a xbmitmap range and
clear a range of bits.

More possible optimizations to add in the future:
1) xb_set_bit_range: set a range of bits.
2) when searching a bit, if the bit is not found in the slot, move on to
the next slot directly.
3) add tags to help searching.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Suggested-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/xbitmap.h      |   6 ++
 lib/xbitmap.c                | 205 +++++++++++++++++++++++++++++++++++++++++++
 tools/include/linux/bitmap.h |  34 +++++++
 tools/include/linux/kernel.h |   2 +
 4 files changed, 247 insertions(+)

v20 RESEND Changes:
	- fixed the !node path
	- added the test cases for the !node path
	- change __builtin_constant_p(start & 7) to __builtin_constant_p(start) 

diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
index 108f929..ede1029 100644
--- a/include/linux/xbitmap.h
+++ b/include/linux/xbitmap.h
@@ -35,6 +35,12 @@ static inline void xb_init(struct xb *xb)
 int xb_set_bit(struct xb *xb, unsigned long bit);
 bool xb_test_bit(const struct xb *xb, unsigned long bit);
 void xb_clear_bit(struct xb *xb, unsigned long bit);
+void xb_clear_bit_range(struct xb *xb, unsigned long start,
+			unsigned long nbits);
+unsigned long xb_find_set(struct xb *xb, unsigned long size,
+			  unsigned long offset);
+unsigned long xb_find_zero(struct xb *xb, unsigned long size,
+			   unsigned long offset);
 
 static inline bool xb_empty(const struct xb *xb)
 {
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
index a1c0abd..1c99586 100644
--- a/lib/xbitmap.c
+++ b/lib/xbitmap.c
@@ -3,6 +3,16 @@
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 
+/*
+ * Developer notes:
+ * - locks are required to gurantee there is no concurrent
+ *   calls of xb_set_bit, xb_clear_bit, xb_clear_bit_range, xb_test_bit,
+ *   xb_find_set, or xb_find_clear to operate on the same ida bitmap.
+ * - The current implementation of xb_clear_bit_range, xb_find_set and
+ *   xb_find_clear may cause long latency when the bit range to operate
+ *   on is super large (e.g. [0, ULONG_MAX]).
+ */
+
 /**
  *  xb_set_bit - set a bit in the xbitmap
  *  @xb: the xbitmap tree used to record the bit
@@ -72,6 +82,49 @@ void xb_clear_bit(struct xb *xb, unsigned long bit)
 EXPORT_SYMBOL(xb_clear_bit);
 
 /**
+ * xb_clear_bit_range - clear a range of bits in the xbitmap
+ * @start: the start of the bit range, inclusive
+ * @nbits: number of bits to clear
+ *
+ * This function is used to clear a range of bits in the xbitmap. If all the
+ * bits in the bitmap are 0, the bitmap will be freed.
+ */
+void xb_clear_bit_range(struct xb *xb, unsigned long start,
+			unsigned long nbits)
+{
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_node *node;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+	unsigned long index = start / IDA_BITMAP_BITS;
+	unsigned long bit = start % IDA_BITMAP_BITS;
+
+	if (nbits > ULONG_MAX - start)
+		nbits = ULONG_MAX - start;
+
+	while (nbits) {
+		unsigned int __nbits = min(nbits,
+					(unsigned long)IDA_BITMAP_BITS - bit);
+
+		bitmap = __radix_tree_lookup(root, index, &node, &slot);
+		if (bitmap) {
+			if (__nbits != IDA_BITMAP_BITS)
+				bitmap_clear(bitmap->bitmap, bit, __nbits);
+
+			if (__nbits == IDA_BITMAP_BITS ||
+			    bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+				kfree(bitmap);
+				__radix_tree_delete(root, node, slot);
+			}
+		}
+		bit = 0;
+		index++;
+		nbits -= __nbits;
+	}
+}
+EXPORT_SYMBOL(xb_clear_bit_range);
+
+/**
  * xb_test_bit - test a bit in the xbitmap
  * @xb: the xbitmap tree used to record the bit
  * @bit: index of the bit to test
@@ -94,6 +147,98 @@ bool xb_test_bit(const struct xb *xb, unsigned long bit)
 }
 EXPORT_SYMBOL(xb_test_bit);
 
+/**
+ * xb_find_set - find the next set bit in a range of bits
+ * @xb: the xbitmap to search from
+ * @offset: the offset in the range to start searching
+ * @size: the size of the range
+ *
+ * Returns: the found bit or, @size if no set bit is found.
+ */
+unsigned long xb_find_set(struct xb *xb, unsigned long size,
+			  unsigned long offset)
+{
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_node *node;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+	unsigned long index = offset / IDA_BITMAP_BITS;
+	unsigned long index_end = size / IDA_BITMAP_BITS;
+	unsigned long bit = offset % IDA_BITMAP_BITS;
+
+	if (unlikely(offset >= size))
+		return size;
+
+	while (index <= index_end) {
+		unsigned long ret;
+		unsigned int nbits = size - index * IDA_BITMAP_BITS;
+
+		bitmap = __radix_tree_lookup(root, index, &node, &slot);
+
+		if (!node && !bitmap)
+			return size;
+
+		if (bitmap) {
+			if (nbits > IDA_BITMAP_BITS)
+				nbits = IDA_BITMAP_BITS;
+
+			ret = find_next_bit(bitmap->bitmap, nbits, bit);
+			if (ret != nbits)
+				return ret + index * IDA_BITMAP_BITS;
+		}
+		bit = 0;
+		index++;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(xb_find_set);
+
+/**
+ * xb_find_zero - find the next zero bit in a range of bits
+ * @xb: the xbitmap to search from
+ * @offset: the offset in the range to start searching
+ * @size: the size of the range
+ *
+ * Returns: the found bit or, @size if no zero bit is found.
+ */
+unsigned long xb_find_zero(struct xb *xb, unsigned long size,
+			   unsigned long offset)
+{
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_node *node;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+	unsigned long index = offset / IDA_BITMAP_BITS;
+	unsigned long index_end = size / IDA_BITMAP_BITS;
+	unsigned long bit = offset % IDA_BITMAP_BITS;
+
+	if (unlikely(offset >= size))
+		return size;
+
+	while (index <= index_end) {
+		unsigned long ret;
+		unsigned int nbits = size - index * IDA_BITMAP_BITS;
+
+		bitmap = __radix_tree_lookup(root, index, &node, &slot);
+		if (bitmap) {
+			if (nbits > IDA_BITMAP_BITS)
+				nbits = IDA_BITMAP_BITS;
+
+			ret = find_next_zero_bit(bitmap->bitmap, nbits, bit);
+			if (ret != nbits)
+				return ret + index * IDA_BITMAP_BITS;
+		} else {
+			return bit + index * IDA_BITMAP_BITS;
+		}
+		bit = 0;
+		index++;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(xb_find_zero);
+
 #ifndef __KERNEL__
 
 static DEFINE_XB(xb1);
@@ -111,6 +256,64 @@ void xbitmap_check_bit(unsigned long bit)
 	xb_preload_end();
 }
 
+static void xbitmap_check_bit_range(void)
+{
+	/* Regular test1: node = NULL */
+	xb_preload(GFP_KERNEL);
+	xb_set_bit(&xb1, 700);
+	xb_preload_end();
+	assert(xb_find_set(&xb1, ULONG_MAX, 0) == 700);
+	assert(xb_find_set(&xb1, ULONG_MAX, 800) == ULONG_MAX);
+	xb_clear_bit_range(&xb1, 0, 1024);
+
+	/*
+	 * Regular test 2
+	 * set bit 2000, 2001, 2040
+	 * Next 1 in [0, 2048)		--> 2000
+	 * Next 1 in [2000, 2002)	--> 2000
+	 * Next 1 in [2002, 2041)	--> 2040
+	 * Next 1 in [2002, 2040)	--> none
+	 * Next 0 in [2000, 2048)	--> 2002
+	 * Next 0 in [2048, 2060)	--> 2048
+	 */
+	xb_preload(GFP_KERNEL);
+	assert(!xb_set_bit(&xb1, 2000));
+	assert(!xb_set_bit(&xb1, 2001));
+	assert(!xb_set_bit(&xb1, 2040));
+	assert(xb_find_set(&xb1, 2048, 0) == 2000);
+	assert(xb_find_set(&xb1, 2002, 2000) == 2000);
+	assert(xb_find_set(&xb1, 2041, 2002) == 2040);
+	assert(xb_find_set(&xb1, 2040, 2002) == 2040);
+	assert(xb_find_zero(&xb1, 2048, 2000) == 2002);
+	assert(xb_find_zero(&xb1, 2060, 2048) == 2048);
+	xb_clear_bit_range(&xb1, 0, 2048);
+	assert(xb_find_set(&xb1, 2048, 0) == 2048);
+	xb_preload_end();
+
+	/*
+	 * Overflow tests:
+	 * Set bit 1 and ULONG_MAX - 4
+	 * Next 1 in [ULONG_MAX - 4, ULONG_MAX)		--> ULONG_MAX - 4
+	 * Next 1 [ULONG_MAX - 3, ULONG_MAX + 4)	--> none
+	 * Next 0 [ULONG_MAX - 4, ULONG_MAX + 4)	--> none
+	 */
+	xb_preload(GFP_KERNEL);
+	assert(!xb_set_bit(&xb1, 1));
+	xb_preload_end();
+	xb_preload(GFP_KERNEL);
+	assert(!xb_set_bit(&xb1, ULONG_MAX - 4));
+	assert(xb_find_set(&xb1, ULONG_MAX, ULONG_MAX - 4) == ULONG_MAX - 4);
+	assert(xb_find_set(&xb1, ULONG_MAX + 4, ULONG_MAX - 3) ==
+	       ULONG_MAX + 4);
+	assert(xb_find_zero(&xb1, ULONG_MAX + 4, ULONG_MAX - 4) ==
+	       ULONG_MAX + 4);
+	xb_clear_bit_range(&xb1, ULONG_MAX - 4, 4);
+	assert(xb_find_set(&xb1, ULONG_MAX, ULONG_MAX - 10) == ULONG_MAX);
+	xb_clear_bit_range(&xb1, 0, 2);
+	assert(xb_find_set(&xb1, 2, 0) == 2);
+	xb_preload_end();
+}
+
 void xbitmap_checks(void)
 {
 	xb_init(&xb1);
@@ -122,6 +325,8 @@ void xbitmap_checks(void)
 	xbitmap_check_bit(1025);
 	xbitmap_check_bit((1UL << 63) | (1UL << 24));
 	xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70);
+
+	xbitmap_check_bit_range();
 }
 
 int __weak main(void)
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca16027..6b559ee 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -37,6 +37,40 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
 	}
 }
 
+static inline void __bitmap_clear(unsigned long *map, unsigned int start,
+				  int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+
+static inline __always_inline void bitmap_clear(unsigned long *map,
+						unsigned int start,
+						unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 {
 	unsigned int nlongs = BITS_TO_LONGS(nbits);
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad8844..3c992ae 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -13,6 +13,8 @@
 #define UINT_MAX	(~0U)
 #endif
 
+#define IS_ALIGNED(x, a)	(((x) & ((typeof(x))(a) - 1)) == 0)
+
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 
 #define PERF_ALIGN(x, a)	__PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
-- 
2.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-21  2:30 [PATCH v20 3/7 RESEND] xbitmap: add more operations Wei Wang
@ 2017-12-21 14:18 ` Matthew Wilcox
  2017-12-21 14:37   ` Tetsuo Handa
  2017-12-21 21:03 ` Matthew Wilcox
  1 sibling, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-21 14:18 UTC (permalink / raw)
  To: Wei Wang
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel


First of all, the test-suite doesn't build, so I don't know whether you
ran it or not.  Then I added the xb_find_set() call below, and it fails
the assert, so you should probably fix that.

diff --git a/lib/xbitmap.c b/lib/xbitmap.c
index f03a0f9f9e29..b29af08a7597 100644
--- a/lib/xbitmap.c
+++ b/lib/xbitmap.c
@@ -249,11 +249,12 @@ void xbitmap_check_bit(unsigned long bit)
 	assert(!xb_test_bit(&xb1, bit));
 	assert(xb_set_bit(&xb1, bit) == 0);
 	assert(xb_test_bit(&xb1, bit));
-	assert(xb_clear_bit(&xb1, bit) == 0);
+	xb_clear_bit(&xb1, bit);
 	assert(xb_empty(&xb1));
-	assert(xb_clear_bit(&xb1, bit) == 0);
+	xb_clear_bit(&xb1, bit);
 	assert(xb_empty(&xb1));
 	xb_preload_end();
+	assert(xb_find_set(&xb1, ULONG_MAX, 0) == bit);
 }
 
 static void xbitmap_check_bit_range(void)
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 34ece7883629..adf36e34dd77 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -fsanitize=address
-LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
+LDFLAGS += -fsanitize=address $(LDLIBS)
+LDLIBS := -lpthread -lurcu
+TARGETS = main idr-test multiorder xbitmap
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \

On Thu, Dec 21, 2017 at 10:30:06AM +0800, Wei Wang wrote:
> v20 RESEND Changes:
> 	- fixed the !node path
> 	- added the test cases for the !node path
> 	- change __builtin_constant_p(start & 7) to __builtin_constant_p(start) 

Why would you do such a thing?  Just copy the kernel definitions.

> diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
> index 108f929..ede1029 100644
> --- a/include/linux/xbitmap.h
> +++ b/include/linux/xbitmap.h
> @@ -35,6 +35,12 @@ static inline void xb_init(struct xb *xb)
>  int xb_set_bit(struct xb *xb, unsigned long bit);
>  bool xb_test_bit(const struct xb *xb, unsigned long bit);
>  void xb_clear_bit(struct xb *xb, unsigned long bit);
> +void xb_clear_bit_range(struct xb *xb, unsigned long start,
> +			unsigned long nbits);

This is xb_zero().  I thought we talked about this before?

> +unsigned long xb_find_set(struct xb *xb, unsigned long size,
> +			  unsigned long offset);
> +unsigned long xb_find_zero(struct xb *xb, unsigned long size,
> +			   unsigned long offset);

Since you're using xb_find_zero(), I think we need the tags from the IDR.
At that point, I'm not sure there's a point in keeping the xbitmap and the
IDR as separate data structures.

> +/**
> + * xb_find_set - find the next set bit in a range of bits
> + * @xb: the xbitmap to search from
> + * @offset: the offset in the range to start searching
> + * @size: the size of the range
> + *
> + * Returns: the found bit or, @size if no set bit is found.
> + */
> +unsigned long xb_find_set(struct xb *xb, unsigned long size,
> +			  unsigned long offset)
> +{
> +	struct radix_tree_root *root = &xb->xbrt;
> +	struct radix_tree_node *node;
> +	void __rcu **slot;
> +	struct ida_bitmap *bitmap;
> +	unsigned long index = offset / IDA_BITMAP_BITS;
> +	unsigned long index_end = size / IDA_BITMAP_BITS;
> +	unsigned long bit = offset % IDA_BITMAP_BITS;
> +
> +	if (unlikely(offset >= size))
> +		return size;
> +
> +	while (index <= index_end) {
> +		unsigned long ret;
> +		unsigned int nbits = size - index * IDA_BITMAP_BITS;
> +
> +		bitmap = __radix_tree_lookup(root, index, &node, &slot);
> +
> +		if (!node && !bitmap)
> +			return size;
> +
> +		if (bitmap) {
> +			if (nbits > IDA_BITMAP_BITS)
> +				nbits = IDA_BITMAP_BITS;
> +
> +			ret = find_next_bit(bitmap->bitmap, nbits, bit);
> +			if (ret != nbits)
> +				return ret + index * IDA_BITMAP_BITS;
> +		}
> +		bit = 0;
> +		index++;
> +	}
> +
> +	return size;
> +}
> +EXPORT_SYMBOL(xb_find_set);

This is going to be slower than the implementation I sent yesterday.  If I
call:
	xb_init(xb);
	xb_set_bit(xb, ULONG_MAX);
	xb_find_set(xb, ULONG_MAX, 0);

it's going to call __radix_tree_lookup() 16 quadrillion times.
My implementation will walk the tree precisely once.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-21 14:18 ` Matthew Wilcox
@ 2017-12-21 14:37   ` Tetsuo Handa
  2017-12-22  8:45     ` Wei Wang
  0 siblings, 1 reply; 13+ messages in thread
From: Tetsuo Handa @ 2017-12-21 14:37 UTC (permalink / raw)
  To: willy, wei.w.wang
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox

Matthew Wilcox wrote:
> > +/**
> > + * xb_find_set - find the next set bit in a range of bits
> > + * @xb: the xbitmap to search from
> > + * @offset: the offset in the range to start searching
> > + * @size: the size of the range
> > + *
> > + * Returns: the found bit or, @size if no set bit is found.
> > + */
> > +unsigned long xb_find_set(struct xb *xb, unsigned long size,
> > +			  unsigned long offset)
> > +{
> > +	struct radix_tree_root *root = &xb->xbrt;
> > +	struct radix_tree_node *node;
> > +	void __rcu **slot;
> > +	struct ida_bitmap *bitmap;
> > +	unsigned long index = offset / IDA_BITMAP_BITS;
> > +	unsigned long index_end = size / IDA_BITMAP_BITS;
> > +	unsigned long bit = offset % IDA_BITMAP_BITS;
> > +
> > +	if (unlikely(offset >= size))
> > +		return size;
> > +
> > +	while (index <= index_end) {
> > +		unsigned long ret;
> > +		unsigned int nbits = size - index * IDA_BITMAP_BITS;
> > +
> > +		bitmap = __radix_tree_lookup(root, index, &node, &slot);
> > +
> > +		if (!node && !bitmap)
> > +			return size;
> > +
> > +		if (bitmap) {
> > +			if (nbits > IDA_BITMAP_BITS)
> > +				nbits = IDA_BITMAP_BITS;
> > +
> > +			ret = find_next_bit(bitmap->bitmap, nbits, bit);
> > +			if (ret != nbits)
> > +				return ret + index * IDA_BITMAP_BITS;
> > +		}
> > +		bit = 0;
> > +		index++;
> > +	}
> > +
> > +	return size;
> > +}
> > +EXPORT_SYMBOL(xb_find_set);
> 
> This is going to be slower than the implementation I sent yesterday.  If I
> call:
> 	xb_init(xb);
> 	xb_set_bit(xb, ULONG_MAX);
> 	xb_find_set(xb, ULONG_MAX, 0);
> 
> it's going to call __radix_tree_lookup() 16 quadrillion times.
> My implementation will walk the tree precisely once.
> 
Yes. Wei's patch still can not work.
We should start reviewing Matthew's implementation.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-21  2:30 [PATCH v20 3/7 RESEND] xbitmap: add more operations Wei Wang
  2017-12-21 14:18 ` Matthew Wilcox
@ 2017-12-21 21:03 ` Matthew Wilcox
  2017-12-22  8:49   ` Wei Wang
  2017-12-23  2:59   ` Tetsuo Handa
  1 sibling, 2 replies; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-21 21:03 UTC (permalink / raw)
  To: Wei Wang
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel


OK, here's a rewrite of xbitmap.

Compared to the version you sent:
 - xb_find_set() is the rewrite I sent out yesterday.
 - xb_find_clear() is a new implementation.  I use the IDR_FREE tag to find
   clear bits.  This led to me finding a bug in radix_tree_for_each_tagged().
 - xb_zero() is also a new implementation (replacing xb_clear_bit_range).
   It should also be more efficient in deep trees.
 - Did not accept your change to xb_set_bit(); I think it's important to
   leave the radix tree node in place, so we're guaranteed to make progress
   in low-memory situations.  We're expecting the caller to retry, so the
   memory should not leak.
 - Redid a lot of the kernel-doc.  Thanks for adding it!  I removed mentions
   of implementation details, leaving only information useful to those who
   are using it.

Compared to the version I put out back in February:
 - Accepted your change to xb_preload(); I think it's an improvement.
 - Rewrote xb_set_bit() to use the radix_tree_iter_ family of functions.
   Should improve performance for deep trees.
 - Rewrote xb_clear_bit() for the same reason.
 - Left out the use of radix tree exceptional entries.  Thanks for taking
   them out!  Keeps it clearer for now; if they prove useful, we can put
   them back in.
 - Removed the use of __radix_tree_delete and __radix_tree_create, so drop
   the changes to those functions.

Other miscellaneous notes
 - I left xb_fill() in the header file, even though there's no implementation
   yet.  Wouldn't be hard to add once we have a user.
 - Used SPDX tags instead of a license notice.

I think we need more test cases ... in reviewing this to send out,
I found a bug in xb_zero(), which I've only fixed because I don't have
time to write a test case for it.

diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
new file mode 100644
index 000000000000..c008309a9494
--- /dev/null
+++ b/include/linux/xbitmap.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * eXtensible Bitmaps
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ *
+ * eXtensible Bitmaps provide an unlimited-size sparse bitmap facility.
+ * All bits are initially zero.
+ *
+ * Locking is to be provided by the user.  No xb_ function is safe to
+ * call concurrently with any other xb_ function.
+ */
+
+#include <linux/idr.h>
+
+struct xb {
+	struct radix_tree_root xbrt;
+};
+
+#define XB_INIT {							\
+	.xbrt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),		\
+}
+#define DEFINE_XB(name)		struct xb name = XB_INIT
+
+static inline void xb_init(struct xb *xb)
+{
+	INIT_RADIX_TREE(&xb->xbrt, IDR_RT_MARKER | GFP_NOWAIT);
+}
+
+int xb_set_bit(struct xb *xb, unsigned long bit);
+bool xb_test_bit(const struct xb *xb, unsigned long bit);
+void xb_clear_bit(struct xb *xb, unsigned long bit);
+void xb_zero(struct xb *xb, unsigned long min, unsigned long max);
+void xb_fill(struct xb *xb, unsigned long min, unsigned long max);
+bool xb_find_set(const struct xb *xb, unsigned long max, unsigned long *bit);
+bool xb_find_zero(const struct xb *xb, unsigned long max, unsigned long *bit);
+
+static inline bool xb_empty(const struct xb *xb)
+{
+	return radix_tree_empty(&xb->xbrt);
+}
+
+int __must_check xb_preload(gfp_t);
+
+static inline void xb_preload_end(void)
+{
+	preempt_enable();
+}
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..08a8183c390b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
-	 idr.o int_sqrt.o extable.o \
+	 idr.o xbitmap.o int_sqrt.o extable.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c8d55565fafa..d2bd8feb7b85 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -37,7 +37,7 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-
+#include <linux/xbitmap.h>
 
 /* Number of nodes in fully populated tree of given height */
 static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
@@ -77,6 +77,11 @@ static struct kmem_cache *radix_tree_node_cachep;
 						RADIX_TREE_MAP_SHIFT))
 #define IDA_PRELOAD_SIZE	(IDA_MAX_PATH * 2 - 1)
 
+#define XB_INDEX_BITS		(BITS_PER_LONG - ilog2(IDA_BITMAP_BITS))
+#define XB_MAX_PATH		(DIV_ROUND_UP(XB_INDEX_BITS, \
+						RADIX_TREE_MAP_SHIFT))
+#define XB_PRELOAD_SIZE		(XB_MAX_PATH * 2 - 1)
+
 /*
  * Per-cpu pool of preloaded nodes
  */
@@ -1781,7 +1786,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 			child = rcu_dereference_raw(node->slots[offset]);
 		}
 
-		if (!child)
+		if (!child && !is_idr(root))
 			goto restart;
 		if (child == RADIX_TREE_RETRY)
 			break;
@@ -2135,6 +2140,35 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 }
 EXPORT_SYMBOL(ida_pre_get);
 
+/**
+ *  xb_preload - preload for xb_set_bit()
+ *  @gfp_mask: allocation mask to use for preloading
+ *
+ * Preallocate memory to use for the next call to xb_set_bit(). On success,
+ * return zero, with preemption disabled. On error, return -ENOMEM with
+ * preemption not disabled.
+ */
+int xb_preload(gfp_t gfp)
+{
+	if (!this_cpu_read(ida_bitmap)) {
+		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+
+		if (!bitmap)
+			return -ENOMEM;
+		/*
+		 * The per-CPU variable is updated with preemption enabled.
+		 * If the calling task is unlucky to be scheduled to another
+		 * CPU which has no ida_bitmap allocation, it will be detected
+		 * when setting a bit (i.e. xb_set_bit()).
+		 */
+		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
+		kfree(bitmap);
+	}
+
+	return __radix_tree_preload(gfp, XB_PRELOAD_SIZE);
+}
+EXPORT_SYMBOL(xb_preload);
+
 void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max)
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
new file mode 100644
index 000000000000..d596ba247b45
--- /dev/null
+++ b/lib/xbitmap.c
@@ -0,0 +1,396 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * XBitmap implementation
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/xbitmap.h>
+
+/**
+ * xb_set_bit() - Set a bit in the XBitmap.
+ * @xb: The XBitmap.
+ * @bit: Index of the bit to set.
+ *
+ * This function is used to set a bit in the xbitmap.
+ *
+ * Return: 0 on success. -ENOMEM if memory could not be allocated.
+ */
+int xb_set_bit(struct xb *xb, unsigned long bit)
+{
+	unsigned long index = bit / IDA_BITMAP_BITS;
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+
+	bit %= IDA_BITMAP_BITS;
+	radix_tree_iter_init(&iter, index);
+	slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
+	if (IS_ERR(slot)) {
+		if (slot == ERR_PTR(-ENOSPC))
+			return 0;	/* Already set */
+		return -ENOMEM;
+	}
+	bitmap = rcu_dereference_raw(*slot);
+	if (!bitmap) {
+		bitmap = this_cpu_xchg(ida_bitmap, NULL);
+		if (!bitmap)
+			return -ENOMEM;
+		memset(bitmap, 0, sizeof(*bitmap));
+		radix_tree_iter_replace(root, &iter, slot, bitmap);
+	}
+
+	__set_bit(bit, bitmap->bitmap);
+	if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+		radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
+	return 0;
+}
+EXPORT_SYMBOL(xb_set_bit);
+
+/**
+ * xb_clear_bit() - Clear a bit in the XBitmap.
+ * @xb: The XBitmap.
+ * @bit: Index of the bit to clear.
+ *
+ * This function is used to clear a bit in the xbitmap.
+ */
+void xb_clear_bit(struct xb *xb, unsigned long bit)
+{
+	unsigned long index = bit / IDA_BITMAP_BITS;
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+
+	bit %= IDA_BITMAP_BITS;
+	slot = radix_tree_iter_lookup(root, &iter, index);
+	if (!slot)
+		return;
+	bitmap = radix_tree_deref_slot(slot);
+	if (!bitmap)
+		return;
+
+	radix_tree_iter_tag_set(root, &iter, IDR_FREE);
+	__clear_bit(bit, bitmap->bitmap);
+	if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+		kfree(bitmap);
+		radix_tree_iter_delete(root, &iter, slot);
+	}
+}
+EXPORT_SYMBOL(xb_clear_bit);
+
+/**
+ * xb_zero() - Clear a range of bits in the XBitmap.
+ * @xb: The XBitmap.
+ * @min: The first bit to clear.
+ * @max: The last bit to clear.
+ *
+ * This function is used to clear a range of bits in the xbitmap.
+ */
+void xb_zero(struct xb *xb, unsigned long min, unsigned long max)
+{
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+	unsigned long index = min / IDA_BITMAP_BITS;
+	unsigned long first = min % IDA_BITMAP_BITS;
+	unsigned long maxindex = max / IDA_BITMAP_BITS;
+
+	radix_tree_for_each_slot(slot, root, &iter, index) {
+		unsigned long nbits = IDA_BITMAP_BITS;
+		if (index > maxindex)
+			break;
+		bitmap = radix_tree_deref_slot(slot);
+		if (!bitmap)
+			continue;
+		radix_tree_iter_tag_set(root, &iter, IDR_FREE);
+
+		if (!first && iter.index < maxindex)
+			goto delete;
+		if (iter.index == maxindex)
+			nbits = max % IDA_BITMAP_BITS + 1;
+		bitmap_clear(bitmap->bitmap, first, nbits);
+		first = 0;
+		if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS))
+			goto delete;
+		continue;
+delete:
+		kfree(bitmap);
+		radix_tree_iter_delete(root, &iter, slot);
+	}
+}
+EXPORT_SYMBOL(xb_zero);
+
+/**
+ * xb_test_bit() - Test a bit in the xbitmap.
+ * @xb: The XBitmap.
+ * @bit: Index of the bit to test.
+ *
+ * This function is used to test a bit in the xbitmap.
+ *
+ * Return: %true if the bit is set.
+ */
+bool xb_test_bit(const struct xb *xb, unsigned long bit)
+{
+	unsigned long index = bit / IDA_BITMAP_BITS;
+	struct ida_bitmap *bitmap = radix_tree_lookup(&xb->xbrt, index);
+
+	bit %= IDA_BITMAP_BITS;
+
+	if (!bitmap)
+		return false;
+	return test_bit(bit, bitmap->bitmap);
+}
+EXPORT_SYMBOL(xb_test_bit);
+
+/**
+ * xb_find_set() - Find the next set bit in a range of bits.
+ * @xb: The XBitmap.
+ * @max: The maximum position to search.
+ * @bit: The first bit to examine, and on exit, the found bit.
+ *
+ * On entry, @bit points to the index of the first bit to search.  On exit,
+ * if this function returns %true, @bit will be updated to the index of the
+ * first found bit.  It will not be updated if this function returns %false.
+ *
+ * Return: %true if a set bit was found.
+ */
+bool xb_find_set(const struct xb *xb, unsigned long max, unsigned long *bit)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	struct ida_bitmap *bitmap;
+	unsigned long index = *bit / IDA_BITMAP_BITS;
+	unsigned int first = *bit % IDA_BITMAP_BITS;
+	unsigned long maxindex = max / IDA_BITMAP_BITS;
+
+	radix_tree_for_each_slot(slot, &xb->xbrt, &iter, index) {
+		if (iter.index > maxindex)
+			break;
+		bitmap = radix_tree_deref_slot(slot);
+		if (bitmap) {
+			unsigned int nbits = IDA_BITMAP_BITS;
+			if (iter.index == maxindex)
+				nbits = max % IDA_BITMAP_BITS + 1;
+			first = find_next_bit(bitmap->bitmap, nbits, first);
+			if (first != nbits) {
+				*bit = first + iter.index * IDA_BITMAP_BITS;
+				return true;
+			}
+		}
+		first = 0;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(xb_find_set);
+
+/**
+ * xb_find_zero() - Find the next zero bit in a range of bits
+ * @xb: The XBitmap.
+ * @max: The maximum index to search.
+ * @bit: Pointer to an index.
+ *
+ * On entry, @bit points to the index of the first bit to search.  On exit,
+ * if this function returns %true, @bit will be updated to the index of the
+ * first found bit.  It will not be updated if this function returns %false.
+ *
+ * Return: %true if a clear bit was found.
+ */
+bool xb_find_zero(const struct xb *xb, unsigned long max, unsigned long *bit)
+{
+	void __rcu **slot;
+	struct radix_tree_iter iter;
+	struct ida_bitmap *bitmap;
+	unsigned long index = *bit / IDA_BITMAP_BITS;
+	unsigned long first = *bit % IDA_BITMAP_BITS;
+	unsigned long maxindex = max / IDA_BITMAP_BITS;
+
+	radix_tree_for_each_tagged(slot, &xb->xbrt, &iter, index, IDR_FREE) {
+		unsigned int nbits = IDA_BITMAP_BITS;
+		if (iter.index > maxindex)
+			return false;
+		bitmap = radix_tree_deref_slot(slot);
+		if (!bitmap)
+			break;
+		if (iter.index == maxindex)
+			nbits = max % IDA_BITMAP_BITS + 1;
+		first = find_next_zero_bit(bitmap->bitmap, nbits, first);
+		if (first != nbits)
+			break;
+		first = 0;
+	}
+
+	*bit = first + iter.index * IDA_BITMAP_BITS;
+	return true;
+}
+EXPORT_SYMBOL(xb_find_zero);
+
+#ifndef __KERNEL__
+
+static DEFINE_XB(xb1);
+
+static void xbitmap_check_bit(unsigned long bit)
+{
+	unsigned long nbit = 0;
+
+	xb_preload(GFP_KERNEL);
+	assert(!xb_test_bit(&xb1, bit));
+	assert(xb_set_bit(&xb1, bit) == 0);
+	assert(xb_test_bit(&xb1, bit));
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+	assert(nbit == bit);
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+	assert(nbit == bit);
+	nbit++;
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+	assert(nbit == bit + 1);
+	xb_clear_bit(&xb1, bit);
+	assert(xb_empty(&xb1));
+	xb_clear_bit(&xb1, bit);
+	assert(xb_empty(&xb1));
+	nbit = 0;
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+	assert(nbit == 0);
+	xb_preload_end();
+}
+
+static void xbitmap_check_bit_range(void)
+{
+	unsigned long nbit = 0;
+
+	/* Regular test1: node = NULL */
+	xb_preload(GFP_KERNEL);
+	xb_set_bit(&xb1, 700);
+	xb_preload_end();
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+	assert(nbit == 700);
+	nbit++;
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+	assert(nbit == 701);
+	xb_zero(&xb1, 0, 1023);
+
+	/*
+	 * Regular test 2
+	 * set bit 2000, 2001, 2040
+	 * Next 1 in [0, 2048)		--> 2000
+	 * Next 1 in [2000, 2002)	--> 2000
+	 * Next 1 in [2002, 2041)	--> 2040
+	 * Next 1 in [2002, 2040)	--> none
+	 * Next 0 in [2000, 2048)	--> 2002
+	 * Next 0 in [2048, 2060)	--> 2048
+	 */
+	xb_preload(GFP_KERNEL);
+	assert(!xb_set_bit(&xb1, 2000));
+	assert(!xb_set_bit(&xb1, 2001));
+	assert(!xb_set_bit(&xb1, 2040));
+	nbit = 0;
+	assert(xb_find_set(&xb1, 2048, &nbit) == true);
+	assert(nbit == 2000);
+	assert(xb_find_set(&xb1, 2002, &nbit) == true);
+	assert(nbit == 2000);
+	nbit = 2002;
+	assert(xb_find_set(&xb1, 2041, &nbit) == true);
+	assert(nbit == 2040);
+	nbit = 2002;
+	assert(xb_find_set(&xb1, 2040, &nbit) == true);
+	assert(nbit == 2040);
+	nbit = 2000;
+	assert(xb_find_zero(&xb1, 2048, &nbit) == true);
+	assert(nbit == 2002);
+	nbit = 2048;
+	assert(xb_find_zero(&xb1, 2060, &nbit) == true);
+	assert(nbit == 2048);
+	xb_zero(&xb1, 0, 2047);
+	nbit = 0;
+	assert(xb_find_set(&xb1, 2048, &nbit) == false);
+	assert(nbit == 0);
+	xb_preload_end();
+
+	/*
+	 * Overflow tests:
+	 * Set bit 1 and ULONG_MAX - 4
+	 * Next 1 in [ULONG_MAX - 4, ULONG_MAX)		--> ULONG_MAX - 4
+	 * Next 1 [ULONG_MAX - 3, ULONG_MAX + 4)	--> none
+	 * Next 0 [ULONG_MAX - 4, ULONG_MAX + 4)	--> none
+	 */
+	xb_preload(GFP_KERNEL);
+	assert(!xb_set_bit(&xb1, 1));
+	xb_preload_end();
+	xb_preload(GFP_KERNEL);
+	assert(!xb_set_bit(&xb1, ULONG_MAX - 4));
+	nbit = ULONG_MAX - 4;
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+	assert(nbit == ULONG_MAX - 4);
+	nbit++;
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+	assert(nbit == ULONG_MAX - 3);
+	nbit--;
+	assert(xb_find_zero(&xb1, ULONG_MAX, &nbit) == true);
+	assert(nbit == ULONG_MAX - 3);
+	xb_zero(&xb1, ULONG_MAX - 4, ULONG_MAX);
+	nbit = ULONG_MAX - 10;
+	assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+	assert(nbit == ULONG_MAX - 10);
+	xb_zero(&xb1, 0, 1);
+	nbit = 0;
+	assert(xb_find_set(&xb1, 2, &nbit) == false);
+	assert(nbit == 0);
+	xb_preload_end();
+}
+
+/* Check that setting an already-full bitmap works */
+static void xbitmap_check_set(unsigned long base)
+{
+	unsigned long i;
+
+	assert(xb_empty(&xb1));
+
+	for (i = 0; i < 64 * 1024; i++) {
+		xb_preload(GFP_KERNEL);
+		assert(xb_set_bit(&xb1, base + i) == 0);
+		xb_preload_end();
+	}
+	for (i = 0; i < 64 * 1024; i++)
+		assert(xb_set_bit(&xb1, base + i) == 0);
+
+	for (i = 0; i < 64 * 1024; i++)
+		xb_clear_bit(&xb1, base + i);
+
+	assert(xb_empty(&xb1));
+}
+
+static void xbitmap_checks(void)
+{
+	xb_init(&xb1);
+	xbitmap_check_bit(0);
+	xbitmap_check_bit(30);
+	xbitmap_check_bit(31);
+	xbitmap_check_bit(62);
+	xbitmap_check_bit(63);
+	xbitmap_check_bit(64);
+	xbitmap_check_bit(700);
+	xbitmap_check_bit(1023);
+	xbitmap_check_bit(1024);
+	xbitmap_check_bit(1025);
+	xbitmap_check_bit((1UL << 63) | (1UL << 24));
+	xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70);
+
+	xbitmap_check_bit_range();
+
+	xbitmap_check_set(0);
+	xbitmap_check_set(1024);
+	xbitmap_check_set(1024 * 64);
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	xbitmap_checks();
+}
+#endif
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca160270fdfa..6b559ee25def 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -37,6 +37,40 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
 	}
 }
 
+static inline void __bitmap_clear(unsigned long *map, unsigned int start,
+				  int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+
+static inline __always_inline void bitmap_clear(unsigned long *map,
+						unsigned int start,
+						unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 {
 	unsigned int nlongs = BITS_TO_LONGS(nbits);
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad884452c5c..3c992ae15440 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -13,6 +13,8 @@
 #define UINT_MAX	(~0U)
 #endif
 
+#define IS_ALIGNED(x, a)	(((x) & ((typeof(x))(a) - 1)) == 0)
+
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 
 #define PERF_ALIGN(x, a)	__PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index fa7ee369b3c9..adf36e34dd77 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,12 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -fsanitize=address
-LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
+LDFLAGS += -fsanitize=address $(LDLIBS)
+LDLIBS := -lpthread -lurcu
+TARGETS = main idr-test multiorder xbitmap
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
-	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \
+	 xbitmap.o
 
 ifndef SHIFT
 	SHIFT=3
@@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES)
 
 multiorder: multiorder.o $(CORE_OFILES)
 
+xbitmap: xbitmap.o $(CORE_OFILES)
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap
+
 clean:
-	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
+	$(RM) $(TARGETS) *.o radix-tree.c idr.c xbitmap.c generated/map-shift.h
 
 vpath %.c ../../lib
 
@@ -34,6 +38,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h \
+	../../../include/linux/xbitmap.h \
 	../../../include/linux/idr.h
 
 radix-tree.c: ../../../lib/radix-tree.c
@@ -42,6 +47,9 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
+xbitmap.c: ../../../lib/xbitmap.c
+	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
 .PHONY: mapshift
 
 mapshift:
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index c3bc3f364f68..426f32f28547 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -17,6 +17,4 @@
 #define pr_debug printk
 #define pr_cont printk
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/xbitmap.h b/tools/testing/radix-tree/linux/xbitmap.h
new file mode 100644
index 000000000000..61de214542e7
--- /dev/null
+++ b/tools/testing/radix-tree/linux/xbitmap.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/xbitmap.h"
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 257f3f8aacaa..d112363262ae 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -326,6 +326,10 @@ static void single_thread_tests(bool long_run)
 	rcu_barrier();
 	printv(2, "after idr_checks: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
+	xbitmap_checks();
+	rcu_barrier();
+	printv(2, "after xbitmap_checks: %d allocated, preempt %d\n",
+			nr_allocated, preempt_count);
 	big_gang_check(long_run);
 	rcu_barrier();
 	printv(2, "after big_gang_check: %d allocated, preempt %d\n",
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index d9c031dbeb1a..8175d6b23b32 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -38,6 +38,7 @@ void benchmark(void);
 void idr_checks(void);
 void ida_checks(void);
 void ida_thread_tests(void);
+void xbitmap_checks(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-21 14:37   ` Tetsuo Handa
@ 2017-12-22  8:45     ` Wei Wang
  0 siblings, 0 replies; 13+ messages in thread
From: Wei Wang @ 2017-12-22  8:45 UTC (permalink / raw)
  To: Tetsuo Handa, willy
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox

On 12/21/2017 10:37 PM, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
>>> +/**
>>> + * xb_find_set - find the next set bit in a range of bits
>>> + * @xb: the xbitmap to search from
>>> + * @offset: the offset in the range to start searching
>>> + * @size: the size of the range
>>> + *
>>> + * Returns: the found bit or, @size if no set bit is found.
>>> + */
>>> +unsigned long xb_find_set(struct xb *xb, unsigned long size,
>>> +			  unsigned long offset)
>>> +{
>>> +	struct radix_tree_root *root = &xb->xbrt;
>>> +	struct radix_tree_node *node;
>>> +	void __rcu **slot;
>>> +	struct ida_bitmap *bitmap;
>>> +	unsigned long index = offset / IDA_BITMAP_BITS;
>>> +	unsigned long index_end = size / IDA_BITMAP_BITS;
>>> +	unsigned long bit = offset % IDA_BITMAP_BITS;
>>> +
>>> +	if (unlikely(offset >= size))
>>> +		return size;
>>> +
>>> +	while (index <= index_end) {
>>> +		unsigned long ret;
>>> +		unsigned int nbits = size - index * IDA_BITMAP_BITS;
>>> +
>>> +		bitmap = __radix_tree_lookup(root, index, &node, &slot);
>>> +
>>> +		if (!node && !bitmap)
>>> +			return size;
>>> +
>>> +		if (bitmap) {
>>> +			if (nbits > IDA_BITMAP_BITS)
>>> +				nbits = IDA_BITMAP_BITS;
>>> +
>>> +			ret = find_next_bit(bitmap->bitmap, nbits, bit);
>>> +			if (ret != nbits)
>>> +				return ret + index * IDA_BITMAP_BITS;
>>> +		}
>>> +		bit = 0;
>>> +		index++;
>>> +	}
>>> +
>>> +	return size;
>>> +}
>>> +EXPORT_SYMBOL(xb_find_set);
>> This is going to be slower than the implementation I sent yesterday.  If I
>> call:
>> 	xb_init(xb);
>> 	xb_set_bit(xb, ULONG_MAX);
>> 	xb_find_set(xb, ULONG_MAX, 0);
>>
>> it's going to call __radix_tree_lookup() 16 quadrillion times.
>> My implementation will walk the tree precisely once.
>>
> Yes. Wei's patch still can not work.
> We should start reviewing Matthew's implementation.

It runs without any issue on my machine. I didn't generate an "xbitmap" 
executable (I just found adding xbitmap executable causes a build error 
due to a Makefile error), instead, I tested it within "main" and it 
passed all the tests.

Matthew has implemented a new version, let's start from there.

Best,
Wei

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-21 21:03 ` Matthew Wilcox
@ 2017-12-22  8:49   ` Wei Wang
  2018-01-02 14:09     ` Matthew Wilcox
  2017-12-23  2:59   ` Tetsuo Handa
  1 sibling, 1 reply; 13+ messages in thread
From: Wei Wang @ 2017-12-22  8:49 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel

On 12/22/2017 05:03 AM, Matthew Wilcox wrote:
> OK, here's a rewrite of xbitmap.
>
> Compared to the version you sent:
>   - xb_find_set() is the rewrite I sent out yesterday.
>   - xb_find_clear() is a new implementation.  I use the IDR_FREE tag to find
>     clear bits.  This led to me finding a bug in radix_tree_for_each_tagged().
>   - xb_zero() is also a new implementation (replacing xb_clear_bit_range).
>     It should also be more efficient in deep trees.
>   - Did not accept your change to xb_set_bit(); I think it's important to
>     leave the radix tree node in place, so we're guaranteed to make progress
>     in low-memory situations.  We're expecting the caller to retry, so the
>     memory should not leak.
>   - Redid a lot of the kernel-doc.  Thanks for adding it!  I removed mentions
>     of implementation details, leaving only information useful to those who
>     are using it.
>
> Compared to the version I put out back in February:
>   - Accepted your change to xb_preload(); I think it's an improvement.
>   - Rewrote xb_set_bit() to use the radix_tree_iter_ family of functions.
>     Should improve performance for deep trees.
>   - Rewrote xb_clear_bit() for the same reason.
>   - Left out the use of radix tree exceptional entries.  Thanks for taking
>     them out!  Keeps it clearer for now; if they prove useful, we can put
>     them back in.
>   - Removed the use of __radix_tree_delete and __radix_tree_create, so drop
>     the changes to those functions.
>
> Other miscellaneous notes
>   - I left xb_fill() in the header file, even though there's no implementation
>     yet.  Wouldn't be hard to add once we have a user.
>   - Used SPDX tags instead of a license notice.
>
> I think we need more test cases ... in reviewing this to send out,
> I found a bug in xb_zero(), which I've only fixed because I don't have
> time to write a test case for it.

Thanks for the improvement. I also found a small bug in xb_zero. With 
the following changes, it has passed the current test cases and tested 
with the virtio-balloon usage without any issue.


> +
> +/**
> + * xb_zero() - Clear a range of bits in the XBitmap.
> + * @xb: The XBitmap.
> + * @min: The first bit to clear.
> + * @max: The last bit to clear.
> + *
> + * This function is used to clear a range of bits in the xbitmap.
> + */
> +void xb_zero(struct xb *xb, unsigned long min, unsigned long max)
> +{
> +	struct radix_tree_root *root = &xb->xbrt;
> +	struct radix_tree_iter iter;
> +	void __rcu **slot;
> +	struct ida_bitmap *bitmap;
> +	unsigned long index = min / IDA_BITMAP_BITS;
> +	unsigned long first = min % IDA_BITMAP_BITS;
> +	unsigned long maxindex = max / IDA_BITMAP_BITS;
> +
> +	radix_tree_for_each_slot(slot, root, &iter, index) {
> +		unsigned long nbits = IDA_BITMAP_BITS;
> +		if (index > maxindex)
> +			break;
> +		bitmap = radix_tree_deref_slot(slot);
> +		if (!bitmap)
> +			continue;
> +		radix_tree_iter_tag_set(root, &iter, IDR_FREE);
> +
> +		if (!first && iter.index < maxindex)
> +			goto delete;
> +		if (iter.index == maxindex)
> +			nbits = max % IDA_BITMAP_BITS + 1;
> +		bitmap_clear(bitmap->bitmap, first, nbits);

It should be: bitmap_clear(.., first, nbits - first);


> diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
> index fa7ee369b3c9..adf36e34dd77 100644
> --- a/tools/testing/radix-tree/Makefile
> +++ b/tools/testing/radix-tree/Makefile
> @@ -1,12 +1,13 @@
>   # SPDX-License-Identifier: GPL-2.0
>   
>   CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
> -LDFLAGS += -fsanitize=address
> -LDLIBS+= -lpthread -lurcu
> -TARGETS = main idr-test multiorder
> +LDFLAGS += -fsanitize=address $(LDLIBS)
> +LDLIBS := -lpthread -lurcu
> +TARGETS = main idr-test multiorder xbitmap
>   CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
>   OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
> -	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
> +	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \
> +	 xbitmap.o
>   
>   ifndef SHIFT
>   	SHIFT=3
> @@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES)
>   
>   multiorder: multiorder.o $(CORE_OFILES)
>   
> +xbitmap: xbitmap.o $(CORE_OFILES)
> +	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap
> +

I think the "$(CC).." line should be removed, which will fix the build 
error when adding "xbitmap" to TARGET.


Best,
Wei

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-21 21:03 ` Matthew Wilcox
  2017-12-22  8:49   ` Wei Wang
@ 2017-12-23  2:59   ` Tetsuo Handa
  2017-12-23  3:29     ` Matthew Wilcox
  1 sibling, 1 reply; 13+ messages in thread
From: Tetsuo Handa @ 2017-12-23  2:59 UTC (permalink / raw)
  To: willy, wei.w.wang
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox

Matthew Wilcox wrote:
> +/**
> + * xb_set_bit() - Set a bit in the XBitmap.
> + * @xb: The XBitmap.
> + * @bit: Index of the bit to set.
> + *
> + * This function is used to set a bit in the xbitmap.
> + *
> + * Return: 0 on success. -ENOMEM if memory could not be allocated.
> + */
> +int xb_set_bit(struct xb *xb, unsigned long bit)
> +{
> +	unsigned long index = bit / IDA_BITMAP_BITS;
> +	struct radix_tree_root *root = &xb->xbrt;
> +	struct radix_tree_iter iter;
> +	void __rcu **slot;
> +	struct ida_bitmap *bitmap;
> +
> +	bit %= IDA_BITMAP_BITS;
> +	radix_tree_iter_init(&iter, index);
> +	slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> +	if (IS_ERR(slot)) {
> +		if (slot == ERR_PTR(-ENOSPC))
> +			return 0;	/* Already set */

Why already set? I guess something is there, but is it guaranteed that
there is a bitmap with the "bit" set?

> +		return -ENOMEM;
> +	}
> +	bitmap = rcu_dereference_raw(*slot);
> +	if (!bitmap) {
> +		bitmap = this_cpu_xchg(ida_bitmap, NULL);
> +		if (!bitmap)
> +			return -ENOMEM;

I can't understand this. I can understand if it were

  BUG_ON(!bitmap);

because you called xb_preload().

But

	/*
	 * Regular test 2
	 * set bit 2000, 2001, 2040
	 * Next 1 in [0, 2048)		--> 2000
	 * Next 1 in [2000, 2002)	--> 2000
	 * Next 1 in [2002, 2041)	--> 2040
	 * Next 1 in [2002, 2040)	--> none
	 * Next 0 in [2000, 2048)	--> 2002
	 * Next 0 in [2048, 2060)	--> 2048
	 */
	xb_preload(GFP_KERNEL);
	assert(!xb_set_bit(&xb1, 2000));
	assert(!xb_set_bit(&xb1, 2001));
	assert(!xb_set_bit(&xb1, 2040));
	nbit = 0;
	assert(xb_find_set(&xb1, 2048, &nbit) == true);
	assert(nbit == 2000);
	assert(xb_find_set(&xb1, 2002, &nbit) == true);
	assert(nbit == 2000);
	nbit = 2002;
	assert(xb_find_set(&xb1, 2041, &nbit) == true);
	assert(nbit == 2040);
	nbit = 2002;
	assert(xb_find_set(&xb1, 2040, &nbit) == true);
	assert(nbit == 2040);
	nbit = 2000;
	assert(xb_find_zero(&xb1, 2048, &nbit) == true);
	assert(nbit == 2002);
	nbit = 2048;
	assert(xb_find_zero(&xb1, 2060, &nbit) == true);
	assert(nbit == 2048);
	xb_zero(&xb1, 0, 2047);
	nbit = 0;
	assert(xb_find_set(&xb1, 2048, &nbit) == false);
	assert(nbit == 0);
	xb_preload_end();

you are not calling xb_preload() prior to each xb_set_bit() call.
This means that, if each xb_set_bit() is not surrounded with
xb_preload()/xb_preload_end(), there is possibility of hitting
this_cpu_xchg(ida_bitmap, NULL) == NULL.

If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
and get rid of xb_preload()/xb_preload_end().

You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which
means that the caller has to be prepared for allocation failure
when calling xb_set_bit(). Thus, there is no need to use preload
in order to avoid failing to allocate "bitmap".



Also, please clarify why it is OK to just return here.
I don't know what

  radix_tree_iter_replace(root, &iter, slot, bitmap);

is doing. If you created a slot but did not assign "bitmap",
what the caller of xb_test_bit() etc. will find? If there is an
assumption about this slot, won't this cause a problem?

> +		memset(bitmap, 0, sizeof(*bitmap));
> +		radix_tree_iter_replace(root, &iter, slot, bitmap);
> +	}
> +
> +	__set_bit(bit, bitmap->bitmap);
> +	if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
> +		radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
> +	return 0;
> +}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-23  2:59   ` Tetsuo Handa
@ 2017-12-23  3:29     ` Matthew Wilcox
  2017-12-23 14:33       ` Tetsuo Handa
  0 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-23  3:29 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: wei.w.wang, virtio-dev, linux-kernel, qemu-devel, virtualization,
	kvm, linux-mm, mst, mhocko, akpm, mawilcox

On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
> > +	bit %= IDA_BITMAP_BITS;
> > +	radix_tree_iter_init(&iter, index);
> > +	slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> > +	if (IS_ERR(slot)) {
> > +		if (slot == ERR_PTR(-ENOSPC))
> > +			return 0;	/* Already set */
> 
> Why already set? I guess something is there, but is it guaranteed that
> there is a bitmap with the "bit" set?

Yes.  For radix trees tagged with IDR_RT_MARKER, newly created slots
have the IDR_FREE tag set.  We only clear the IDR_FREE tag once the
bitmap is full.  So if we try to find a free slot and the tag is clear,
we know the bitmap is full.

> > +	bitmap = rcu_dereference_raw(*slot);
> > +	if (!bitmap) {
> > +		bitmap = this_cpu_xchg(ida_bitmap, NULL);
> > +		if (!bitmap)
> > +			return -ENOMEM;
> 
> I can't understand this. I can understand if it were
> 
>   BUG_ON(!bitmap);
> 
> because you called xb_preload().
> 
> But
> 
> 	/*
> 	 * Regular test 2
> 	 * set bit 2000, 2001, 2040
> 	 * Next 1 in [0, 2048)		--> 2000
> 	 * Next 1 in [2000, 2002)	--> 2000
> 	 * Next 1 in [2002, 2041)	--> 2040
> 	 * Next 1 in [2002, 2040)	--> none
> 	 * Next 0 in [2000, 2048)	--> 2002
> 	 * Next 0 in [2048, 2060)	--> 2048
> 	 */
> 	xb_preload(GFP_KERNEL);
> 	assert(!xb_set_bit(&xb1, 2000));
> 	assert(!xb_set_bit(&xb1, 2001));
> 	assert(!xb_set_bit(&xb1, 2040));
[...]
> 	xb_preload_end();
> 
> you are not calling xb_preload() prior to each xb_set_bit() call.
> This means that, if each xb_set_bit() is not surrounded with
> xb_preload()/xb_preload_end(), there is possibility of hitting
> this_cpu_xchg(ida_bitmap, NULL) == NULL.

This is just a lazy test.  We "know" that the bits in the range 1024-2047
will all land in the same bitmap, so there's no need to preload for each
of them.

> If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
> you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
> and get rid of xb_preload()/xb_preload_end().

No, we can't.  GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate
memory.  There's no reason to fail the call if the user is in a context
where they can try harder to free memory.

> You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which
> means that the caller has to be prepared for allocation failure
> when calling xb_set_bit(). Thus, there is no need to use preload
> in order to avoid failing to allocate "bitmap".

xb_preload also preloads radix tree nodes.

> Also, please clarify why it is OK to just return here.
> I don't know what
> 
>   radix_tree_iter_replace(root, &iter, slot, bitmap);
> 
> is doing. If you created a slot but did not assign "bitmap",
> what the caller of xb_test_bit() etc. will find? If there is an
> assumption about this slot, won't this cause a problem?

xb_test_bit will find NULL if bitmap wasn't assigned.  That doesn't
harm anything.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-23  3:29     ` Matthew Wilcox
@ 2017-12-23 14:33       ` Tetsuo Handa
  2017-12-23 14:58         ` Matthew Wilcox
  2017-12-24  7:31         ` Wei Wang
  0 siblings, 2 replies; 13+ messages in thread
From: Tetsuo Handa @ 2017-12-23 14:33 UTC (permalink / raw)
  To: willy
  Cc: wei.w.wang, virtio-dev, linux-kernel, qemu-devel, virtualization,
	kvm, linux-mm, mst, mhocko, akpm, mawilcox

Matthew Wilcox wrote:
> On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:
> > Matthew Wilcox wrote:
> > > +	bit %= IDA_BITMAP_BITS;
> > > +	radix_tree_iter_init(&iter, index);
> > > +	slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> > > +	if (IS_ERR(slot)) {
> > > +		if (slot == ERR_PTR(-ENOSPC))
> > > +			return 0;	/* Already set */
> > 
> > Why already set? I guess something is there, but is it guaranteed that
> > there is a bitmap with the "bit" set?
> 
> Yes.  For radix trees tagged with IDR_RT_MARKER, newly created slots
> have the IDR_FREE tag set.  We only clear the IDR_FREE tag once the
> bitmap is full.  So if we try to find a free slot and the tag is clear,
> we know the bitmap is full.
> 

OK. But does using IDR_FREE tag have more benefit than cost?
You are doing

	if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
		radix_tree_iter_tag_clear(root, &iter, IDR_FREE);

for each xb_set_bit() call. How likely do we hit ERR_PTR(-ENOSPC) path?
Isn't removing both bitmap_full() and ERR_PTR(-ENOSPC) better?

> > > +	bitmap = rcu_dereference_raw(*slot);
> > > +	if (!bitmap) {
> > > +		bitmap = this_cpu_xchg(ida_bitmap, NULL);
> > > +		if (!bitmap)
> > > +			return -ENOMEM;
> > 
> > I can't understand this. I can understand if it were
> > 
> >   BUG_ON(!bitmap);
> > 
> > because you called xb_preload().
> > 
> > But
> > 
> > 	/*
> > 	 * Regular test 2
> > 	 * set bit 2000, 2001, 2040
> > 	 * Next 1 in [0, 2048)		--> 2000
> > 	 * Next 1 in [2000, 2002)	--> 2000
> > 	 * Next 1 in [2002, 2041)	--> 2040
> > 	 * Next 1 in [2002, 2040)	--> none
> > 	 * Next 0 in [2000, 2048)	--> 2002
> > 	 * Next 0 in [2048, 2060)	--> 2048
> > 	 */
> > 	xb_preload(GFP_KERNEL);
> > 	assert(!xb_set_bit(&xb1, 2000));
> > 	assert(!xb_set_bit(&xb1, 2001));
> > 	assert(!xb_set_bit(&xb1, 2040));
> [...]
> > 	xb_preload_end();
> > 
> > you are not calling xb_preload() prior to each xb_set_bit() call.
> > This means that, if each xb_set_bit() is not surrounded with
> > xb_preload()/xb_preload_end(), there is possibility of hitting
> > this_cpu_xchg(ida_bitmap, NULL) == NULL.
> 
> This is just a lazy test.  We "know" that the bits in the range 1024-2047
> will all land in the same bitmap, so there's no need to preload for each
> of them.

Testcases also serves as how to use that API.
Assuming such thing leads to incorrect usage.

> 
> > If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
> > you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
> > and get rid of xb_preload()/xb_preload_end().
> 
> No, we can't.  GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate
> memory.  There's no reason to fail the call if the user is in a context
> where they can try harder to free memory.

But there is no reason to use GFP_NOWAIT at idr_get_free_cmn() if it is
safe to use GFP_KERNEL. If we don't require xb_preload() which forces
idr_get_free_cmn() to use GFP_NOWAIT due to possibility of preemption
disabled by xb_preload(), we can allow passing gfp flags to xb_set_bit().

> 
> > You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which
> > means that the caller has to be prepared for allocation failure
> > when calling xb_set_bit(). Thus, there is no need to use preload
> > in order to avoid failing to allocate "bitmap".
> 
> xb_preload also preloads radix tree nodes.
> 

But it after all forces idr_get_free_cmn() to use GFP_NOWAIT, doesn't it?

Speak of initial user (i.e. virtio-balloon), xb_preload() won't be able to
use GFP_KERNEL in order to avoid OOM lockup. Therefore, I don't see
advantages with using xb_preload(). If xb_set_bit() receives gfp flags,
the caller can pass GFP_KERNEL if it is safe to use GFP_KERNEL.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-23 14:33       ` Tetsuo Handa
@ 2017-12-23 14:58         ` Matthew Wilcox
  2017-12-24  7:31         ` Wei Wang
  1 sibling, 0 replies; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-23 14:58 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: wei.w.wang, virtio-dev, linux-kernel, qemu-devel, virtualization,
	kvm, linux-mm, mst, mhocko, akpm, mawilcox

On Sat, Dec 23, 2017 at 11:33:45PM +0900, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
> > On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:
> > > Matthew Wilcox wrote:
> > > > +	bit %= IDA_BITMAP_BITS;
> > > > +	radix_tree_iter_init(&iter, index);
> > > > +	slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> > > > +	if (IS_ERR(slot)) {
> > > > +		if (slot == ERR_PTR(-ENOSPC))
> > > > +			return 0;	/* Already set */
> > > 
> > > Why already set? I guess something is there, but is it guaranteed that
> > > there is a bitmap with the "bit" set?
> > 
> > Yes.  For radix trees tagged with IDR_RT_MARKER, newly created slots
> > have the IDR_FREE tag set.  We only clear the IDR_FREE tag once the
> > bitmap is full.  So if we try to find a free slot and the tag is clear,
> > we know the bitmap is full.
> > 
> 
> OK. But does using IDR_FREE tag have more benefit than cost?
> You are doing
> 
> 	if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
> 		radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
> 
> for each xb_set_bit() call. How likely do we hit ERR_PTR(-ENOSPC) path?
> Isn't removing both bitmap_full() and ERR_PTR(-ENOSPC) better?

You're assuming that the purpose of using IDR_FREE is to save xb_set_bit
from walking the tree unnecessarily.  It isn't; that's just a happy
side-effect.  Its main purpose is to make xb_find_zero() efficient.  If
we have large ranges of set bits, xb_find_zero() will be able to skip them.

> > This is just a lazy test.  We "know" that the bits in the range 1024-2047
> > will all land in the same bitmap, so there's no need to preload for each
> > of them.
> 
> Testcases also serves as how to use that API.
> Assuming such thing leads to incorrect usage.

Sure.  Would you like to submit a patch?

> > > If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
> > > you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
> > > and get rid of xb_preload()/xb_preload_end().
> > 
> > No, we can't.  GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate
> > memory.  There's no reason to fail the call if the user is in a context
> > where they can try harder to free memory.
> 
> But there is no reason to use GFP_NOWAIT at idr_get_free_cmn() if it is
> safe to use GFP_KERNEL. If we don't require xb_preload() which forces
> idr_get_free_cmn() to use GFP_NOWAIT due to possibility of preemption
> disabled by xb_preload(), we can allow passing gfp flags to xb_set_bit().

The assumption is that the user has done:

	xb_preload(GFP_KERNEL);
	spin_lock(my_lock);
	xb_set_bit(xb, bit);
	spin_unlock(my_lock);
	xb_preload_end();

This is not the world's greatest interface.  Once I have the XArray
finished, we'll be able to ditch the external spinlock and the preload
interface and be able to call:

	xb_set_bit(xb, bit, GFP_KERNEL);

> > xb_preload also preloads radix tree nodes.
> 
> But it after all forces idr_get_free_cmn() to use GFP_NOWAIT, doesn't it?

I think you don't understand how the radix tree allocates nodes.  preloading
means that it will be able to access the nodes which were allocated earlier.

> Speak of initial user (i.e. virtio-balloon), xb_preload() won't be able to
> use GFP_KERNEL in order to avoid OOM lockup. Therefore, I don't see
> advantages with using xb_preload(). If xb_set_bit() receives gfp flags,
> the caller can pass GFP_KERNEL if it is safe to use GFP_KERNEL.

I haven't reviewed how virtio-balloon is using the interfaces.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-23 14:33       ` Tetsuo Handa
  2017-12-23 14:58         ` Matthew Wilcox
@ 2017-12-24  7:31         ` Wei Wang
  1 sibling, 0 replies; 13+ messages in thread
From: Wei Wang @ 2017-12-24  7:31 UTC (permalink / raw)
  To: Tetsuo Handa, willy
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox

On 12/23/2017 10:33 PM, Tetsuo Handa wrote:
>>>> +	bitmap = rcu_dereference_raw(*slot);
>>>> +	if (!bitmap) {
>>>> +		bitmap = this_cpu_xchg(ida_bitmap, NULL);
>>>> +		if (!bitmap)
>>>> +			return -ENOMEM;
>>> I can't understand this. I can understand if it were
>>>
>>>    BUG_ON(!bitmap);
>>>
>>> because you called xb_preload().
>>>
>>> But
>>>
>>> 	/*
>>> 	 * Regular test 2
>>> 	 * set bit 2000, 2001, 2040
>>> 	 * Next 1 in [0, 2048)		--> 2000
>>> 	 * Next 1 in [2000, 2002)	--> 2000
>>> 	 * Next 1 in [2002, 2041)	--> 2040
>>> 	 * Next 1 in [2002, 2040)	--> none
>>> 	 * Next 0 in [2000, 2048)	--> 2002
>>> 	 * Next 0 in [2048, 2060)	--> 2048
>>> 	 */
>>> 	xb_preload(GFP_KERNEL);
>>> 	assert(!xb_set_bit(&xb1, 2000));
>>> 	assert(!xb_set_bit(&xb1, 2001));
>>> 	assert(!xb_set_bit(&xb1, 2040));
>> [...]
>>> 	xb_preload_end();
>>>
>>> you are not calling xb_preload() prior to each xb_set_bit() call.
>>> This means that, if each xb_set_bit() is not surrounded with
>>> xb_preload()/xb_preload_end(), there is possibility of hitting
>>> this_cpu_xchg(ida_bitmap, NULL) == NULL.
>> This is just a lazy test.  We "know" that the bits in the range 1024-2047
>> will all land in the same bitmap, so there's no need to preload for each
>> of them.
> Testcases also serves as how to use that API.
> Assuming such thing leads to incorrect usage.

If callers are aware that the bits that they going to record locate in 
the same bitmap, I think they should also perform the xb_ APIs with only 
one preload. So the test cases here have shown them a correct example. 
We can probably add some comments above to explain this.


Best,
Wei

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2017-12-22  8:49   ` Wei Wang
@ 2018-01-02 14:09     ` Matthew Wilcox
  2018-01-03  8:56       ` Wei Wang
  0 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2018-01-02 14:09 UTC (permalink / raw)
  To: Wei Wang
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel

On Fri, Dec 22, 2017 at 04:49:11PM +0800, Wei Wang wrote:
> Thanks for the improvement. I also found a small bug in xb_zero. With the
> following changes, it has passed the current test cases and tested with the
> virtio-balloon usage without any issue.

Thanks; I applied the change.  Can you supply a test-case for testing
xb_zero please?

> > @@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES)
> >   multiorder: multiorder.o $(CORE_OFILES)
> > +xbitmap: xbitmap.o $(CORE_OFILES)
> > +	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap
> > +
> 
> I think the "$(CC).." line should be removed, which will fix the build error
> when adding "xbitmap" to TARGET.

Not sure what build error you're talking about, but yes that CC line
should go.  Thanks, deleted.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
  2018-01-02 14:09     ` Matthew Wilcox
@ 2018-01-03  8:56       ` Wei Wang
  0 siblings, 0 replies; 13+ messages in thread
From: Wei Wang @ 2018-01-03  8:56 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
	linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel

On 01/02/2018 10:09 PM, Matthew Wilcox wrote:
> On Fri, Dec 22, 2017 at 04:49:11PM +0800, Wei Wang wrote:
>> Thanks for the improvement. I also found a small bug in xb_zero. With the
>> following changes, it has passed the current test cases and tested with the
>> virtio-balloon usage without any issue.
> Thanks; I applied the change.  Can you supply a test-case for testing
> xb_zero please?
>

Sure. Please check below the test cases. Do you plan to send out the new 
version of xbitmap yourself? If so, I will wait for that to send out the 
virtio-balloon patches.


static void xbitmap_check_zero_bits(void)
{
         assert(xb_empty(&xb1));

         /* Zero an empty xbitmap should work though no real work to do */
         xb_zero(&xb1, 0, ULONG_MAX);
         assert(xb_empty(&xb1));

         xb_preload(GFP_KERNEL);
         assert(xb_set_bit(&xb1, 0) == 0);
         xb_preload_end();

         /* Overflow test */
         xb_zero(&xb1, ULONG_MAX - 10, ULONG_MAX);
         assert(xb_test_bit(&xb1, 0));

         xb_preload(GFP_KERNEL);
         assert(xb_set_bit(&xb1, ULONG_MAX) == 0);
         xb_preload_end();

         xb_zero(&xb1, 0, ULONG_MAX);
         assert(xb_empty(&xb1));
}


/*
  * In the following tests, preload is called once when all the bits to set
  * locate in the same ida bitmap. Otherwise, it is recommended to call
  * preload for each xb_set_bit.
  */
static void xbitmap_check_bit_range(void)
{
         unsigned long nbit = 0;

         /* Regular test1: node = NULL */
         xb_preload(GFP_KERNEL);
         xb_set_bit(&xb1, 700);
         xb_preload_end();
         assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
         assert(nbit == 700);
         nbit++;
         assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
         assert(nbit == 701);
         xb_zero(&xb1, 0, 1023);

         /*
          * Regular test2
          * set bit 2000, 2001, 2040
          * Next 1 in [0, 2048]          --> 2000
          * Next 1 in [2000, 2002]       --> 2000
          * Next 1 in [2002, 2040]       --> 2040
          * Next 1 in [2002, 2039]       --> none
          * Next 0 in [2000, 2048]       --> 2002
          * Next 0 in [2048, 2060]       --> 2048
          */
         xb_preload(GFP_KERNEL);
         assert(!xb_set_bit(&xb1, 2000));
         assert(!xb_set_bit(&xb1, 2001));
         assert(!xb_set_bit(&xb1, 2040));
         nbit = 0;
         assert(xb_find_set(&xb1, 2048, &nbit) == true);
         assert(nbit == 2000);
         assert(xb_find_set(&xb1, 2002, &nbit) == true);
         assert(nbit == 2000);
         nbit = 2002;
         assert(xb_find_set(&xb1, 2040, &nbit) == true);
         assert(nbit == 2040);
         nbit = 2002;
         assert(xb_find_set(&xb1, 2039, &nbit) == false);
         assert(nbit == 2002);
         nbit = 2000;
         assert(xb_find_zero(&xb1, 2048, &nbit) == true);
         assert(nbit == 2002);
         nbit = 2048;
         assert(xb_find_zero(&xb1, 2060, &nbit) == true);
         assert(nbit == 2048);
         xb_zero(&xb1, 0, 2048);
         nbit = 0;
         assert(xb_find_set(&xb1, 2048, &nbit) == false);
         assert(nbit == 0);
         xb_preload_end();

         /*
          * Overflow tests:
          * Set bit 1 and ULONG_MAX - 4
          * Next 1 in [0, ULONG_MAX]                     --> 1
          * Next 1 in [1, ULONG_MAX]                     --> 1
          * Next 1 in [2, ULONG_MAX]                     --> ULONG_MAX - 4
          * Next 1 in [ULONG_MAX - 3, 2]                 --> none
          * Next 0 in [ULONG_MAX - 4, ULONG_MAX]         --> ULONG_MAX - 3
          * Zero [ULONG_MAX - 4, ULONG_MAX]
          * Next 1 in [ULONG_MAX - 10, ULONG_MAX]        --> none
          * Next 1 in [ULONG_MAX - 1, 2]                 --> none
          * Zero [0, 1]
          * Next 1 in [0, 2]                             --> none
          */
         xb_preload(GFP_KERNEL);
         assert(!xb_set_bit(&xb1, 1));
         xb_preload_end();
         xb_preload(GFP_KERNEL);
         assert(!xb_set_bit(&xb1, ULONG_MAX - 4));
         nbit = 0;
         assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
         assert(nbit == 1);
         nbit = 1;
         assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
         assert(nbit == 1);
         nbit = 2;
         assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
         assert(nbit == ULONG_MAX - 4);
         nbit++;
         assert(xb_find_set(&xb1, 2, &nbit) == false);
         assert(nbit == ULONG_MAX - 3);
         nbit--;
         assert(xb_find_zero(&xb1, ULONG_MAX, &nbit) == true);
         assert(nbit == ULONG_MAX - 3);
         xb_zero(&xb1, ULONG_MAX - 4, ULONG_MAX);
         nbit = ULONG_MAX - 10;
         assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
         assert(nbit == ULONG_MAX - 10);
         nbit = ULONG_MAX - 1;
         assert(xb_find_set(&xb1, 2, &nbit) == false);
         xb_zero(&xb1, 0, 1);
         nbit = 0;
         assert(xb_find_set(&xb1, 2, &nbit) == false);
         assert(nbit == 0);
         xb_preload_end();
         assert(xb_empty(&xb1));
}


Best,
Wei

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-01-03  8:54 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-21  2:30 [PATCH v20 3/7 RESEND] xbitmap: add more operations Wei Wang
2017-12-21 14:18 ` Matthew Wilcox
2017-12-21 14:37   ` Tetsuo Handa
2017-12-22  8:45     ` Wei Wang
2017-12-21 21:03 ` Matthew Wilcox
2017-12-22  8:49   ` Wei Wang
2018-01-02 14:09     ` Matthew Wilcox
2018-01-03  8:56       ` Wei Wang
2017-12-23  2:59   ` Tetsuo Handa
2017-12-23  3:29     ` Matthew Wilcox
2017-12-23 14:33       ` Tetsuo Handa
2017-12-23 14:58         ` Matthew Wilcox
2017-12-24  7:31         ` Wei Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).