* [PATCH v20 3/7 RESEND] xbitmap: add more operations
@ 2017-12-21 2:30 Wei Wang
2017-12-21 14:18 ` Matthew Wilcox
2017-12-21 21:03 ` Matthew Wilcox
0 siblings, 2 replies; 13+ messages in thread
From: Wei Wang @ 2017-12-21 2:30 UTC (permalink / raw)
To: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel
This patch adds support to find next 1 or 0 bit in a xbmitmap range and
clear a range of bits.
More possible optimizations to add in the future:
1) xb_set_bit_range: set a range of bits.
2) when searching a bit, if the bit is not found in the slot, move on to
the next slot directly.
3) add tags to help searching.
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Suggested-by: Matthew Wilcox <mawilcox@microsoft.com>
---
include/linux/xbitmap.h | 6 ++
lib/xbitmap.c | 205 +++++++++++++++++++++++++++++++++++++++++++
tools/include/linux/bitmap.h | 34 +++++++
tools/include/linux/kernel.h | 2 +
4 files changed, 247 insertions(+)
v20 RESEND Changes:
- fixed the !node path
- added the test cases for the !node path
- change __builtin_constant_p(start & 7) to __builtin_constant_p(start)
diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
index 108f929..ede1029 100644
--- a/include/linux/xbitmap.h
+++ b/include/linux/xbitmap.h
@@ -35,6 +35,12 @@ static inline void xb_init(struct xb *xb)
int xb_set_bit(struct xb *xb, unsigned long bit);
bool xb_test_bit(const struct xb *xb, unsigned long bit);
void xb_clear_bit(struct xb *xb, unsigned long bit);
+void xb_clear_bit_range(struct xb *xb, unsigned long start,
+ unsigned long nbits);
+unsigned long xb_find_set(struct xb *xb, unsigned long size,
+ unsigned long offset);
+unsigned long xb_find_zero(struct xb *xb, unsigned long size,
+ unsigned long offset);
static inline bool xb_empty(const struct xb *xb)
{
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
index a1c0abd..1c99586 100644
--- a/lib/xbitmap.c
+++ b/lib/xbitmap.c
@@ -3,6 +3,16 @@
#include <linux/bitmap.h>
#include <linux/slab.h>
+/*
+ * Developer notes:
+ * - locks are required to gurantee there is no concurrent
+ * calls of xb_set_bit, xb_clear_bit, xb_clear_bit_range, xb_test_bit,
+ * xb_find_set, or xb_find_clear to operate on the same ida bitmap.
+ * - The current implementation of xb_clear_bit_range, xb_find_set and
+ * xb_find_clear may cause long latency when the bit range to operate
+ * on is super large (e.g. [0, ULONG_MAX]).
+ */
+
/**
* xb_set_bit - set a bit in the xbitmap
* @xb: the xbitmap tree used to record the bit
@@ -72,6 +82,49 @@ void xb_clear_bit(struct xb *xb, unsigned long bit)
EXPORT_SYMBOL(xb_clear_bit);
/**
+ * xb_clear_bit_range - clear a range of bits in the xbitmap
+ * @start: the start of the bit range, inclusive
+ * @nbits: number of bits to clear
+ *
+ * This function is used to clear a range of bits in the xbitmap. If all the
+ * bits in the bitmap are 0, the bitmap will be freed.
+ */
+void xb_clear_bit_range(struct xb *xb, unsigned long start,
+ unsigned long nbits)
+{
+ struct radix_tree_root *root = &xb->xbrt;
+ struct radix_tree_node *node;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+ unsigned long index = start / IDA_BITMAP_BITS;
+ unsigned long bit = start % IDA_BITMAP_BITS;
+
+ if (nbits > ULONG_MAX - start)
+ nbits = ULONG_MAX - start;
+
+ while (nbits) {
+ unsigned int __nbits = min(nbits,
+ (unsigned long)IDA_BITMAP_BITS - bit);
+
+ bitmap = __radix_tree_lookup(root, index, &node, &slot);
+ if (bitmap) {
+ if (__nbits != IDA_BITMAP_BITS)
+ bitmap_clear(bitmap->bitmap, bit, __nbits);
+
+ if (__nbits == IDA_BITMAP_BITS ||
+ bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+ kfree(bitmap);
+ __radix_tree_delete(root, node, slot);
+ }
+ }
+ bit = 0;
+ index++;
+ nbits -= __nbits;
+ }
+}
+EXPORT_SYMBOL(xb_clear_bit_range);
+
+/**
* xb_test_bit - test a bit in the xbitmap
* @xb: the xbitmap tree used to record the bit
* @bit: index of the bit to test
@@ -94,6 +147,98 @@ bool xb_test_bit(const struct xb *xb, unsigned long bit)
}
EXPORT_SYMBOL(xb_test_bit);
+/**
+ * xb_find_set - find the next set bit in a range of bits
+ * @xb: the xbitmap to search from
+ * @offset: the offset in the range to start searching
+ * @size: the size of the range
+ *
+ * Returns: the found bit or, @size if no set bit is found.
+ */
+unsigned long xb_find_set(struct xb *xb, unsigned long size,
+ unsigned long offset)
+{
+ struct radix_tree_root *root = &xb->xbrt;
+ struct radix_tree_node *node;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+ unsigned long index = offset / IDA_BITMAP_BITS;
+ unsigned long index_end = size / IDA_BITMAP_BITS;
+ unsigned long bit = offset % IDA_BITMAP_BITS;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ while (index <= index_end) {
+ unsigned long ret;
+ unsigned int nbits = size - index * IDA_BITMAP_BITS;
+
+ bitmap = __radix_tree_lookup(root, index, &node, &slot);
+
+ if (!node && !bitmap)
+ return size;
+
+ if (bitmap) {
+ if (nbits > IDA_BITMAP_BITS)
+ nbits = IDA_BITMAP_BITS;
+
+ ret = find_next_bit(bitmap->bitmap, nbits, bit);
+ if (ret != nbits)
+ return ret + index * IDA_BITMAP_BITS;
+ }
+ bit = 0;
+ index++;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(xb_find_set);
+
+/**
+ * xb_find_zero - find the next zero bit in a range of bits
+ * @xb: the xbitmap to search from
+ * @offset: the offset in the range to start searching
+ * @size: the size of the range
+ *
+ * Returns: the found bit or, @size if no zero bit is found.
+ */
+unsigned long xb_find_zero(struct xb *xb, unsigned long size,
+ unsigned long offset)
+{
+ struct radix_tree_root *root = &xb->xbrt;
+ struct radix_tree_node *node;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+ unsigned long index = offset / IDA_BITMAP_BITS;
+ unsigned long index_end = size / IDA_BITMAP_BITS;
+ unsigned long bit = offset % IDA_BITMAP_BITS;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ while (index <= index_end) {
+ unsigned long ret;
+ unsigned int nbits = size - index * IDA_BITMAP_BITS;
+
+ bitmap = __radix_tree_lookup(root, index, &node, &slot);
+ if (bitmap) {
+ if (nbits > IDA_BITMAP_BITS)
+ nbits = IDA_BITMAP_BITS;
+
+ ret = find_next_zero_bit(bitmap->bitmap, nbits, bit);
+ if (ret != nbits)
+ return ret + index * IDA_BITMAP_BITS;
+ } else {
+ return bit + index * IDA_BITMAP_BITS;
+ }
+ bit = 0;
+ index++;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(xb_find_zero);
+
#ifndef __KERNEL__
static DEFINE_XB(xb1);
@@ -111,6 +256,64 @@ void xbitmap_check_bit(unsigned long bit)
xb_preload_end();
}
+static void xbitmap_check_bit_range(void)
+{
+ /* Regular test1: node = NULL */
+ xb_preload(GFP_KERNEL);
+ xb_set_bit(&xb1, 700);
+ xb_preload_end();
+ assert(xb_find_set(&xb1, ULONG_MAX, 0) == 700);
+ assert(xb_find_set(&xb1, ULONG_MAX, 800) == ULONG_MAX);
+ xb_clear_bit_range(&xb1, 0, 1024);
+
+ /*
+ * Regular test 2
+ * set bit 2000, 2001, 2040
+ * Next 1 in [0, 2048) --> 2000
+ * Next 1 in [2000, 2002) --> 2000
+ * Next 1 in [2002, 2041) --> 2040
+ * Next 1 in [2002, 2040) --> none
+ * Next 0 in [2000, 2048) --> 2002
+ * Next 0 in [2048, 2060) --> 2048
+ */
+ xb_preload(GFP_KERNEL);
+ assert(!xb_set_bit(&xb1, 2000));
+ assert(!xb_set_bit(&xb1, 2001));
+ assert(!xb_set_bit(&xb1, 2040));
+ assert(xb_find_set(&xb1, 2048, 0) == 2000);
+ assert(xb_find_set(&xb1, 2002, 2000) == 2000);
+ assert(xb_find_set(&xb1, 2041, 2002) == 2040);
+ assert(xb_find_set(&xb1, 2040, 2002) == 2040);
+ assert(xb_find_zero(&xb1, 2048, 2000) == 2002);
+ assert(xb_find_zero(&xb1, 2060, 2048) == 2048);
+ xb_clear_bit_range(&xb1, 0, 2048);
+ assert(xb_find_set(&xb1, 2048, 0) == 2048);
+ xb_preload_end();
+
+ /*
+ * Overflow tests:
+ * Set bit 1 and ULONG_MAX - 4
+ * Next 1 in [ULONG_MAX - 4, ULONG_MAX) --> ULONG_MAX - 4
+ * Next 1 [ULONG_MAX - 3, ULONG_MAX + 4) --> none
+ * Next 0 [ULONG_MAX - 4, ULONG_MAX + 4) --> none
+ */
+ xb_preload(GFP_KERNEL);
+ assert(!xb_set_bit(&xb1, 1));
+ xb_preload_end();
+ xb_preload(GFP_KERNEL);
+ assert(!xb_set_bit(&xb1, ULONG_MAX - 4));
+ assert(xb_find_set(&xb1, ULONG_MAX, ULONG_MAX - 4) == ULONG_MAX - 4);
+ assert(xb_find_set(&xb1, ULONG_MAX + 4, ULONG_MAX - 3) ==
+ ULONG_MAX + 4);
+ assert(xb_find_zero(&xb1, ULONG_MAX + 4, ULONG_MAX - 4) ==
+ ULONG_MAX + 4);
+ xb_clear_bit_range(&xb1, ULONG_MAX - 4, 4);
+ assert(xb_find_set(&xb1, ULONG_MAX, ULONG_MAX - 10) == ULONG_MAX);
+ xb_clear_bit_range(&xb1, 0, 2);
+ assert(xb_find_set(&xb1, 2, 0) == 2);
+ xb_preload_end();
+}
+
void xbitmap_checks(void)
{
xb_init(&xb1);
@@ -122,6 +325,8 @@ void xbitmap_checks(void)
xbitmap_check_bit(1025);
xbitmap_check_bit((1UL << 63) | (1UL << 24));
xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70);
+
+ xbitmap_check_bit_range();
}
int __weak main(void)
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca16027..6b559ee 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -37,6 +37,40 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
}
}
+static inline void __bitmap_clear(unsigned long *map, unsigned int start,
+ int len)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const unsigned int size = start + len;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (len - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+static inline __always_inline void bitmap_clear(unsigned long *map,
+ unsigned int start,
+ unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __clear_bit(start, map);
+ else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) &&
+ __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8))
+ memset((char *)map + start / 8, 0, nbits / 8);
+ else
+ __bitmap_clear(map, start, nbits);
+}
+
static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
unsigned int nlongs = BITS_TO_LONGS(nbits);
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad8844..3c992ae 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -13,6 +13,8 @@
#define UINT_MAX (~0U)
#endif
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
--
2.7.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-21 2:30 [PATCH v20 3/7 RESEND] xbitmap: add more operations Wei Wang
@ 2017-12-21 14:18 ` Matthew Wilcox
2017-12-21 14:37 ` Tetsuo Handa
2017-12-21 21:03 ` Matthew Wilcox
1 sibling, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-21 14:18 UTC (permalink / raw)
To: Wei Wang
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel
First of all, the test-suite doesn't build, so I don't know whether you
ran it or not. Then I added the xb_find_set() call below, and it fails
the assert, so you should probably fix that.
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
index f03a0f9f9e29..b29af08a7597 100644
--- a/lib/xbitmap.c
+++ b/lib/xbitmap.c
@@ -249,11 +249,12 @@ void xbitmap_check_bit(unsigned long bit)
assert(!xb_test_bit(&xb1, bit));
assert(xb_set_bit(&xb1, bit) == 0);
assert(xb_test_bit(&xb1, bit));
- assert(xb_clear_bit(&xb1, bit) == 0);
+ xb_clear_bit(&xb1, bit);
assert(xb_empty(&xb1));
- assert(xb_clear_bit(&xb1, bit) == 0);
+ xb_clear_bit(&xb1, bit);
assert(xb_empty(&xb1));
xb_preload_end();
+ assert(xb_find_set(&xb1, ULONG_MAX, 0) == bit);
}
static void xbitmap_check_bit_range(void)
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 34ece7883629..adf36e34dd77 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -fsanitize=address
-LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
+LDFLAGS += -fsanitize=address $(LDLIBS)
+LDLIBS := -lpthread -lurcu
+TARGETS = main idr-test multiorder xbitmap
CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \
On Thu, Dec 21, 2017 at 10:30:06AM +0800, Wei Wang wrote:
> v20 RESEND Changes:
> - fixed the !node path
> - added the test cases for the !node path
> - change __builtin_constant_p(start & 7) to __builtin_constant_p(start)
Why would you do such a thing? Just copy the kernel definitions.
> diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
> index 108f929..ede1029 100644
> --- a/include/linux/xbitmap.h
> +++ b/include/linux/xbitmap.h
> @@ -35,6 +35,12 @@ static inline void xb_init(struct xb *xb)
> int xb_set_bit(struct xb *xb, unsigned long bit);
> bool xb_test_bit(const struct xb *xb, unsigned long bit);
> void xb_clear_bit(struct xb *xb, unsigned long bit);
> +void xb_clear_bit_range(struct xb *xb, unsigned long start,
> + unsigned long nbits);
This is xb_zero(). I thought we talked about this before?
> +unsigned long xb_find_set(struct xb *xb, unsigned long size,
> + unsigned long offset);
> +unsigned long xb_find_zero(struct xb *xb, unsigned long size,
> + unsigned long offset);
Since you're using xb_find_zero(), I think we need the tags from the IDR.
At that point, I'm not sure there's a point in keeping the xbitmap and the
IDR as separate data structures.
> +/**
> + * xb_find_set - find the next set bit in a range of bits
> + * @xb: the xbitmap to search from
> + * @offset: the offset in the range to start searching
> + * @size: the size of the range
> + *
> + * Returns: the found bit or, @size if no set bit is found.
> + */
> +unsigned long xb_find_set(struct xb *xb, unsigned long size,
> + unsigned long offset)
> +{
> + struct radix_tree_root *root = &xb->xbrt;
> + struct radix_tree_node *node;
> + void __rcu **slot;
> + struct ida_bitmap *bitmap;
> + unsigned long index = offset / IDA_BITMAP_BITS;
> + unsigned long index_end = size / IDA_BITMAP_BITS;
> + unsigned long bit = offset % IDA_BITMAP_BITS;
> +
> + if (unlikely(offset >= size))
> + return size;
> +
> + while (index <= index_end) {
> + unsigned long ret;
> + unsigned int nbits = size - index * IDA_BITMAP_BITS;
> +
> + bitmap = __radix_tree_lookup(root, index, &node, &slot);
> +
> + if (!node && !bitmap)
> + return size;
> +
> + if (bitmap) {
> + if (nbits > IDA_BITMAP_BITS)
> + nbits = IDA_BITMAP_BITS;
> +
> + ret = find_next_bit(bitmap->bitmap, nbits, bit);
> + if (ret != nbits)
> + return ret + index * IDA_BITMAP_BITS;
> + }
> + bit = 0;
> + index++;
> + }
> +
> + return size;
> +}
> +EXPORT_SYMBOL(xb_find_set);
This is going to be slower than the implementation I sent yesterday. If I
call:
xb_init(xb);
xb_set_bit(xb, ULONG_MAX);
xb_find_set(xb, ULONG_MAX, 0);
it's going to call __radix_tree_lookup() 16 quadrillion times.
My implementation will walk the tree precisely once.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-21 14:18 ` Matthew Wilcox
@ 2017-12-21 14:37 ` Tetsuo Handa
2017-12-22 8:45 ` Wei Wang
0 siblings, 1 reply; 13+ messages in thread
From: Tetsuo Handa @ 2017-12-21 14:37 UTC (permalink / raw)
To: willy, wei.w.wang
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox
Matthew Wilcox wrote:
> > +/**
> > + * xb_find_set - find the next set bit in a range of bits
> > + * @xb: the xbitmap to search from
> > + * @offset: the offset in the range to start searching
> > + * @size: the size of the range
> > + *
> > + * Returns: the found bit or, @size if no set bit is found.
> > + */
> > +unsigned long xb_find_set(struct xb *xb, unsigned long size,
> > + unsigned long offset)
> > +{
> > + struct radix_tree_root *root = &xb->xbrt;
> > + struct radix_tree_node *node;
> > + void __rcu **slot;
> > + struct ida_bitmap *bitmap;
> > + unsigned long index = offset / IDA_BITMAP_BITS;
> > + unsigned long index_end = size / IDA_BITMAP_BITS;
> > + unsigned long bit = offset % IDA_BITMAP_BITS;
> > +
> > + if (unlikely(offset >= size))
> > + return size;
> > +
> > + while (index <= index_end) {
> > + unsigned long ret;
> > + unsigned int nbits = size - index * IDA_BITMAP_BITS;
> > +
> > + bitmap = __radix_tree_lookup(root, index, &node, &slot);
> > +
> > + if (!node && !bitmap)
> > + return size;
> > +
> > + if (bitmap) {
> > + if (nbits > IDA_BITMAP_BITS)
> > + nbits = IDA_BITMAP_BITS;
> > +
> > + ret = find_next_bit(bitmap->bitmap, nbits, bit);
> > + if (ret != nbits)
> > + return ret + index * IDA_BITMAP_BITS;
> > + }
> > + bit = 0;
> > + index++;
> > + }
> > +
> > + return size;
> > +}
> > +EXPORT_SYMBOL(xb_find_set);
>
> This is going to be slower than the implementation I sent yesterday. If I
> call:
> xb_init(xb);
> xb_set_bit(xb, ULONG_MAX);
> xb_find_set(xb, ULONG_MAX, 0);
>
> it's going to call __radix_tree_lookup() 16 quadrillion times.
> My implementation will walk the tree precisely once.
>
Yes. Wei's patch still can not work.
We should start reviewing Matthew's implementation.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-21 2:30 [PATCH v20 3/7 RESEND] xbitmap: add more operations Wei Wang
2017-12-21 14:18 ` Matthew Wilcox
@ 2017-12-21 21:03 ` Matthew Wilcox
2017-12-22 8:49 ` Wei Wang
2017-12-23 2:59 ` Tetsuo Handa
1 sibling, 2 replies; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-21 21:03 UTC (permalink / raw)
To: Wei Wang
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel
OK, here's a rewrite of xbitmap.
Compared to the version you sent:
- xb_find_set() is the rewrite I sent out yesterday.
- xb_find_clear() is a new implementation. I use the IDR_FREE tag to find
clear bits. This led to me finding a bug in radix_tree_for_each_tagged().
- xb_zero() is also a new implementation (replacing xb_clear_bit_range).
It should also be more efficient in deep trees.
- Did not accept your change to xb_set_bit(); I think it's important to
leave the radix tree node in place, so we're guaranteed to make progress
in low-memory situations. We're expecting the caller to retry, so the
memory should not leak.
- Redid a lot of the kernel-doc. Thanks for adding it! I removed mentions
of implementation details, leaving only information useful to those who
are using it.
Compared to the version I put out back in February:
- Accepted your change to xb_preload(); I think it's an improvement.
- Rewrote xb_set_bit() to use the radix_tree_iter_ family of functions.
Should improve performance for deep trees.
- Rewrote xb_clear_bit() for the same reason.
- Left out the use of radix tree exceptional entries. Thanks for taking
them out! Keeps it clearer for now; if they prove useful, we can put
them back in.
- Removed the use of __radix_tree_delete and __radix_tree_create, so drop
the changes to those functions.
Other miscellaneous notes
- I left xb_fill() in the header file, even though there's no implementation
yet. Wouldn't be hard to add once we have a user.
- Used SPDX tags instead of a license notice.
I think we need more test cases ... in reviewing this to send out,
I found a bug in xb_zero(), which I've only fixed because I don't have
time to write a test case for it.
diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
new file mode 100644
index 000000000000..c008309a9494
--- /dev/null
+++ b/include/linux/xbitmap.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * eXtensible Bitmaps
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ *
+ * eXtensible Bitmaps provide an unlimited-size sparse bitmap facility.
+ * All bits are initially zero.
+ *
+ * Locking is to be provided by the user. No xb_ function is safe to
+ * call concurrently with any other xb_ function.
+ */
+
+#include <linux/idr.h>
+
+struct xb {
+ struct radix_tree_root xbrt;
+};
+
+#define XB_INIT { \
+ .xbrt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \
+}
+#define DEFINE_XB(name) struct xb name = XB_INIT
+
+static inline void xb_init(struct xb *xb)
+{
+ INIT_RADIX_TREE(&xb->xbrt, IDR_RT_MARKER | GFP_NOWAIT);
+}
+
+int xb_set_bit(struct xb *xb, unsigned long bit);
+bool xb_test_bit(const struct xb *xb, unsigned long bit);
+void xb_clear_bit(struct xb *xb, unsigned long bit);
+void xb_zero(struct xb *xb, unsigned long min, unsigned long max);
+void xb_fill(struct xb *xb, unsigned long min, unsigned long max);
+bool xb_find_set(const struct xb *xb, unsigned long max, unsigned long *bit);
+bool xb_find_zero(const struct xb *xb, unsigned long max, unsigned long *bit);
+
+static inline bool xb_empty(const struct xb *xb)
+{
+ return radix_tree_empty(&xb->xbrt);
+}
+
+int __must_check xb_preload(gfp_t);
+
+static inline void xb_preload_end(void)
+{
+ preempt_enable();
+}
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..08a8183c390b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
- idr.o int_sqrt.o extable.o \
+ idr.o xbitmap.o int_sqrt.o extable.o \
sha1.o chacha20.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c8d55565fafa..d2bd8feb7b85 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -37,7 +37,7 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/string.h>
-
+#include <linux/xbitmap.h>
/* Number of nodes in fully populated tree of given height */
static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
@@ -77,6 +77,11 @@ static struct kmem_cache *radix_tree_node_cachep;
RADIX_TREE_MAP_SHIFT))
#define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1)
+#define XB_INDEX_BITS (BITS_PER_LONG - ilog2(IDA_BITMAP_BITS))
+#define XB_MAX_PATH (DIV_ROUND_UP(XB_INDEX_BITS, \
+ RADIX_TREE_MAP_SHIFT))
+#define XB_PRELOAD_SIZE (XB_MAX_PATH * 2 - 1)
+
/*
* Per-cpu pool of preloaded nodes
*/
@@ -1781,7 +1786,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
child = rcu_dereference_raw(node->slots[offset]);
}
- if (!child)
+ if (!child && !is_idr(root))
goto restart;
if (child == RADIX_TREE_RETRY)
break;
@@ -2135,6 +2140,35 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
}
EXPORT_SYMBOL(ida_pre_get);
+/**
+ * xb_preload - preload for xb_set_bit()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preallocate memory to use for the next call to xb_set_bit(). On success,
+ * return zero, with preemption disabled. On error, return -ENOMEM with
+ * preemption not disabled.
+ */
+int xb_preload(gfp_t gfp)
+{
+ if (!this_cpu_read(ida_bitmap)) {
+ struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+
+ if (!bitmap)
+ return -ENOMEM;
+ /*
+ * The per-CPU variable is updated with preemption enabled.
+ * If the calling task is unlucky to be scheduled to another
+ * CPU which has no ida_bitmap allocation, it will be detected
+ * when setting a bit (i.e. xb_set_bit()).
+ */
+ bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
+ kfree(bitmap);
+ }
+
+ return __radix_tree_preload(gfp, XB_PRELOAD_SIZE);
+}
+EXPORT_SYMBOL(xb_preload);
+
void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
struct radix_tree_iter *iter, gfp_t gfp,
unsigned long max)
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
new file mode 100644
index 000000000000..d596ba247b45
--- /dev/null
+++ b/lib/xbitmap.c
@@ -0,0 +1,396 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * XBitmap implementation
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/xbitmap.h>
+
+/**
+ * xb_set_bit() - Set a bit in the XBitmap.
+ * @xb: The XBitmap.
+ * @bit: Index of the bit to set.
+ *
+ * This function is used to set a bit in the xbitmap.
+ *
+ * Return: 0 on success. -ENOMEM if memory could not be allocated.
+ */
+int xb_set_bit(struct xb *xb, unsigned long bit)
+{
+ unsigned long index = bit / IDA_BITMAP_BITS;
+ struct radix_tree_root *root = &xb->xbrt;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+
+ bit %= IDA_BITMAP_BITS;
+ radix_tree_iter_init(&iter, index);
+ slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
+ if (IS_ERR(slot)) {
+ if (slot == ERR_PTR(-ENOSPC))
+ return 0; /* Already set */
+ return -ENOMEM;
+ }
+ bitmap = rcu_dereference_raw(*slot);
+ if (!bitmap) {
+ bitmap = this_cpu_xchg(ida_bitmap, NULL);
+ if (!bitmap)
+ return -ENOMEM;
+ memset(bitmap, 0, sizeof(*bitmap));
+ radix_tree_iter_replace(root, &iter, slot, bitmap);
+ }
+
+ __set_bit(bit, bitmap->bitmap);
+ if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+ radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
+ return 0;
+}
+EXPORT_SYMBOL(xb_set_bit);
+
+/**
+ * xb_clear_bit() - Clear a bit in the XBitmap.
+ * @xb: The XBitmap.
+ * @bit: Index of the bit to clear.
+ *
+ * This function is used to clear a bit in the xbitmap.
+ */
+void xb_clear_bit(struct xb *xb, unsigned long bit)
+{
+ unsigned long index = bit / IDA_BITMAP_BITS;
+ struct radix_tree_root *root = &xb->xbrt;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+
+ bit %= IDA_BITMAP_BITS;
+ slot = radix_tree_iter_lookup(root, &iter, index);
+ if (!slot)
+ return;
+ bitmap = radix_tree_deref_slot(slot);
+ if (!bitmap)
+ return;
+
+ radix_tree_iter_tag_set(root, &iter, IDR_FREE);
+ __clear_bit(bit, bitmap->bitmap);
+ if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+ kfree(bitmap);
+ radix_tree_iter_delete(root, &iter, slot);
+ }
+}
+EXPORT_SYMBOL(xb_clear_bit);
+
+/**
+ * xb_zero() - Clear a range of bits in the XBitmap.
+ * @xb: The XBitmap.
+ * @min: The first bit to clear.
+ * @max: The last bit to clear.
+ *
+ * This function is used to clear a range of bits in the xbitmap.
+ */
+void xb_zero(struct xb *xb, unsigned long min, unsigned long max)
+{
+ struct radix_tree_root *root = &xb->xbrt;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+ unsigned long index = min / IDA_BITMAP_BITS;
+ unsigned long first = min % IDA_BITMAP_BITS;
+ unsigned long maxindex = max / IDA_BITMAP_BITS;
+
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ unsigned long nbits = IDA_BITMAP_BITS;
+ if (index > maxindex)
+ break;
+ bitmap = radix_tree_deref_slot(slot);
+ if (!bitmap)
+ continue;
+ radix_tree_iter_tag_set(root, &iter, IDR_FREE);
+
+ if (!first && iter.index < maxindex)
+ goto delete;
+ if (iter.index == maxindex)
+ nbits = max % IDA_BITMAP_BITS + 1;
+ bitmap_clear(bitmap->bitmap, first, nbits);
+ first = 0;
+ if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS))
+ goto delete;
+ continue;
+delete:
+ kfree(bitmap);
+ radix_tree_iter_delete(root, &iter, slot);
+ }
+}
+EXPORT_SYMBOL(xb_zero);
+
+/**
+ * xb_test_bit() - Test a bit in the xbitmap.
+ * @xb: The XBitmap.
+ * @bit: Index of the bit to test.
+ *
+ * This function is used to test a bit in the xbitmap.
+ *
+ * Return: %true if the bit is set.
+ */
+bool xb_test_bit(const struct xb *xb, unsigned long bit)
+{
+ unsigned long index = bit / IDA_BITMAP_BITS;
+ struct ida_bitmap *bitmap = radix_tree_lookup(&xb->xbrt, index);
+
+ bit %= IDA_BITMAP_BITS;
+
+ if (!bitmap)
+ return false;
+ return test_bit(bit, bitmap->bitmap);
+}
+EXPORT_SYMBOL(xb_test_bit);
+
+/**
+ * xb_find_set() - Find the next set bit in a range of bits.
+ * @xb: The XBitmap.
+ * @max: The maximum position to search.
+ * @bit: The first bit to examine, and on exit, the found bit.
+ *
+ * On entry, @bit points to the index of the first bit to search. On exit,
+ * if this function returns %true, @bit will be updated to the index of the
+ * first found bit. It will not be updated if this function returns %false.
+ *
+ * Return: %true if a set bit was found.
+ */
+bool xb_find_set(const struct xb *xb, unsigned long max, unsigned long *bit)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ida_bitmap *bitmap;
+ unsigned long index = *bit / IDA_BITMAP_BITS;
+ unsigned int first = *bit % IDA_BITMAP_BITS;
+ unsigned long maxindex = max / IDA_BITMAP_BITS;
+
+ radix_tree_for_each_slot(slot, &xb->xbrt, &iter, index) {
+ if (iter.index > maxindex)
+ break;
+ bitmap = radix_tree_deref_slot(slot);
+ if (bitmap) {
+ unsigned int nbits = IDA_BITMAP_BITS;
+ if (iter.index == maxindex)
+ nbits = max % IDA_BITMAP_BITS + 1;
+ first = find_next_bit(bitmap->bitmap, nbits, first);
+ if (first != nbits) {
+ *bit = first + iter.index * IDA_BITMAP_BITS;
+ return true;
+ }
+ }
+ first = 0;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(xb_find_set);
+
+/**
+ * xb_find_zero() - Find the next zero bit in a range of bits
+ * @xb: The XBitmap.
+ * @max: The maximum index to search.
+ * @bit: Pointer to an index.
+ *
+ * On entry, @bit points to the index of the first bit to search. On exit,
+ * if this function returns %true, @bit will be updated to the index of the
+ * first found bit. It will not be updated if this function returns %false.
+ *
+ * Return: %true if a clear bit was found.
+ */
+bool xb_find_zero(const struct xb *xb, unsigned long max, unsigned long *bit)
+{
+ void __rcu **slot;
+ struct radix_tree_iter iter;
+ struct ida_bitmap *bitmap;
+ unsigned long index = *bit / IDA_BITMAP_BITS;
+ unsigned long first = *bit % IDA_BITMAP_BITS;
+ unsigned long maxindex = max / IDA_BITMAP_BITS;
+
+ radix_tree_for_each_tagged(slot, &xb->xbrt, &iter, index, IDR_FREE) {
+ unsigned int nbits = IDA_BITMAP_BITS;
+ if (iter.index > maxindex)
+ return false;
+ bitmap = radix_tree_deref_slot(slot);
+ if (!bitmap)
+ break;
+ if (iter.index == maxindex)
+ nbits = max % IDA_BITMAP_BITS + 1;
+ first = find_next_zero_bit(bitmap->bitmap, nbits, first);
+ if (first != nbits)
+ break;
+ first = 0;
+ }
+
+ *bit = first + iter.index * IDA_BITMAP_BITS;
+ return true;
+}
+EXPORT_SYMBOL(xb_find_zero);
+
+#ifndef __KERNEL__
+
+static DEFINE_XB(xb1);
+
+static void xbitmap_check_bit(unsigned long bit)
+{
+ unsigned long nbit = 0;
+
+ xb_preload(GFP_KERNEL);
+ assert(!xb_test_bit(&xb1, bit));
+ assert(xb_set_bit(&xb1, bit) == 0);
+ assert(xb_test_bit(&xb1, bit));
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+ assert(nbit == bit);
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+ assert(nbit == bit);
+ nbit++;
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+ assert(nbit == bit + 1);
+ xb_clear_bit(&xb1, bit);
+ assert(xb_empty(&xb1));
+ xb_clear_bit(&xb1, bit);
+ assert(xb_empty(&xb1));
+ nbit = 0;
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+ assert(nbit == 0);
+ xb_preload_end();
+}
+
+static void xbitmap_check_bit_range(void)
+{
+ unsigned long nbit = 0;
+
+ /* Regular test1: node = NULL */
+ xb_preload(GFP_KERNEL);
+ xb_set_bit(&xb1, 700);
+ xb_preload_end();
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+ assert(nbit == 700);
+ nbit++;
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+ assert(nbit == 701);
+ xb_zero(&xb1, 0, 1023);
+
+ /*
+ * Regular test 2
+ * set bit 2000, 2001, 2040
+ * Next 1 in [0, 2048) --> 2000
+ * Next 1 in [2000, 2002) --> 2000
+ * Next 1 in [2002, 2041) --> 2040
+ * Next 1 in [2002, 2040) --> none
+ * Next 0 in [2000, 2048) --> 2002
+ * Next 0 in [2048, 2060) --> 2048
+ */
+ xb_preload(GFP_KERNEL);
+ assert(!xb_set_bit(&xb1, 2000));
+ assert(!xb_set_bit(&xb1, 2001));
+ assert(!xb_set_bit(&xb1, 2040));
+ nbit = 0;
+ assert(xb_find_set(&xb1, 2048, &nbit) == true);
+ assert(nbit == 2000);
+ assert(xb_find_set(&xb1, 2002, &nbit) == true);
+ assert(nbit == 2000);
+ nbit = 2002;
+ assert(xb_find_set(&xb1, 2041, &nbit) == true);
+ assert(nbit == 2040);
+ nbit = 2002;
+ assert(xb_find_set(&xb1, 2040, &nbit) == true);
+ assert(nbit == 2040);
+ nbit = 2000;
+ assert(xb_find_zero(&xb1, 2048, &nbit) == true);
+ assert(nbit == 2002);
+ nbit = 2048;
+ assert(xb_find_zero(&xb1, 2060, &nbit) == true);
+ assert(nbit == 2048);
+ xb_zero(&xb1, 0, 2047);
+ nbit = 0;
+ assert(xb_find_set(&xb1, 2048, &nbit) == false);
+ assert(nbit == 0);
+ xb_preload_end();
+
+ /*
+ * Overflow tests:
+ * Set bit 1 and ULONG_MAX - 4
+ * Next 1 in [ULONG_MAX - 4, ULONG_MAX) --> ULONG_MAX - 4
+ * Next 1 [ULONG_MAX - 3, ULONG_MAX + 4) --> none
+ * Next 0 [ULONG_MAX - 4, ULONG_MAX + 4) --> none
+ */
+ xb_preload(GFP_KERNEL);
+ assert(!xb_set_bit(&xb1, 1));
+ xb_preload_end();
+ xb_preload(GFP_KERNEL);
+ assert(!xb_set_bit(&xb1, ULONG_MAX - 4));
+ nbit = ULONG_MAX - 4;
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
+ assert(nbit == ULONG_MAX - 4);
+ nbit++;
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+ assert(nbit == ULONG_MAX - 3);
+ nbit--;
+ assert(xb_find_zero(&xb1, ULONG_MAX, &nbit) == true);
+ assert(nbit == ULONG_MAX - 3);
+ xb_zero(&xb1, ULONG_MAX - 4, ULONG_MAX);
+ nbit = ULONG_MAX - 10;
+ assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
+ assert(nbit == ULONG_MAX - 10);
+ xb_zero(&xb1, 0, 1);
+ nbit = 0;
+ assert(xb_find_set(&xb1, 2, &nbit) == false);
+ assert(nbit == 0);
+ xb_preload_end();
+}
+
+/* Check that setting an already-full bitmap works */
+static void xbitmap_check_set(unsigned long base)
+{
+ unsigned long i;
+
+ assert(xb_empty(&xb1));
+
+ for (i = 0; i < 64 * 1024; i++) {
+ xb_preload(GFP_KERNEL);
+ assert(xb_set_bit(&xb1, base + i) == 0);
+ xb_preload_end();
+ }
+ for (i = 0; i < 64 * 1024; i++)
+ assert(xb_set_bit(&xb1, base + i) == 0);
+
+ for (i = 0; i < 64 * 1024; i++)
+ xb_clear_bit(&xb1, base + i);
+
+ assert(xb_empty(&xb1));
+}
+
+static void xbitmap_checks(void)
+{
+ xb_init(&xb1);
+ xbitmap_check_bit(0);
+ xbitmap_check_bit(30);
+ xbitmap_check_bit(31);
+ xbitmap_check_bit(62);
+ xbitmap_check_bit(63);
+ xbitmap_check_bit(64);
+ xbitmap_check_bit(700);
+ xbitmap_check_bit(1023);
+ xbitmap_check_bit(1024);
+ xbitmap_check_bit(1025);
+ xbitmap_check_bit((1UL << 63) | (1UL << 24));
+ xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70);
+
+ xbitmap_check_bit_range();
+
+ xbitmap_check_set(0);
+ xbitmap_check_set(1024);
+ xbitmap_check_set(1024 * 64);
+}
+
+int __weak main(void)
+{
+ radix_tree_init();
+ xbitmap_checks();
+}
+#endif
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca160270fdfa..6b559ee25def 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -37,6 +37,40 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
}
}
+static inline void __bitmap_clear(unsigned long *map, unsigned int start,
+ int len)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const unsigned int size = start + len;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (len - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+static inline __always_inline void bitmap_clear(unsigned long *map,
+ unsigned int start,
+ unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __clear_bit(start, map);
+ else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) &&
+ __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8))
+ memset((char *)map + start / 8, 0, nbits / 8);
+ else
+ __bitmap_clear(map, start, nbits);
+}
+
static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
unsigned int nlongs = BITS_TO_LONGS(nbits);
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad884452c5c..3c992ae15440 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -13,6 +13,8 @@
#define UINT_MAX (~0U)
#endif
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index fa7ee369b3c9..adf36e34dd77 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -fsanitize=address
-LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
+LDFLAGS += -fsanitize=address $(LDLIBS)
+LDLIBS := -lpthread -lurcu
+TARGETS = main idr-test multiorder xbitmap
CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
- tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \
+ xbitmap.o
ifndef SHIFT
SHIFT=3
@@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES)
multiorder: multiorder.o $(CORE_OFILES)
+xbitmap: xbitmap.o $(CORE_OFILES)
+ $(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap
+
clean:
- $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c xbitmap.c generated/map-shift.h
vpath %.c ../../lib
@@ -34,6 +38,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h \
../../include/linux/*.h \
../../include/asm/*.h \
../../../include/linux/radix-tree.h \
+ ../../../include/linux/xbitmap.h \
../../../include/linux/idr.h
radix-tree.c: ../../../lib/radix-tree.c
@@ -42,6 +47,9 @@ radix-tree.c: ../../../lib/radix-tree.c
idr.c: ../../../lib/idr.c
sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+xbitmap.c: ../../../lib/xbitmap.c
+ sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
.PHONY: mapshift
mapshift:
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index c3bc3f364f68..426f32f28547 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -17,6 +17,4 @@
#define pr_debug printk
#define pr_cont printk
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/xbitmap.h b/tools/testing/radix-tree/linux/xbitmap.h
new file mode 100644
index 000000000000..61de214542e7
--- /dev/null
+++ b/tools/testing/radix-tree/linux/xbitmap.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/xbitmap.h"
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 257f3f8aacaa..d112363262ae 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -326,6 +326,10 @@ static void single_thread_tests(bool long_run)
rcu_barrier();
printv(2, "after idr_checks: %d allocated, preempt %d\n",
nr_allocated, preempt_count);
+ xbitmap_checks();
+ rcu_barrier();
+ printv(2, "after xbitmap_checks: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
big_gang_check(long_run);
rcu_barrier();
printv(2, "after big_gang_check: %d allocated, preempt %d\n",
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index d9c031dbeb1a..8175d6b23b32 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -38,6 +38,7 @@ void benchmark(void);
void idr_checks(void);
void ida_checks(void);
void ida_thread_tests(void);
+void xbitmap_checks(void);
struct item *
item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-21 14:37 ` Tetsuo Handa
@ 2017-12-22 8:45 ` Wei Wang
0 siblings, 0 replies; 13+ messages in thread
From: Wei Wang @ 2017-12-22 8:45 UTC (permalink / raw)
To: Tetsuo Handa, willy
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox
On 12/21/2017 10:37 PM, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
>>> +/**
>>> + * xb_find_set - find the next set bit in a range of bits
>>> + * @xb: the xbitmap to search from
>>> + * @offset: the offset in the range to start searching
>>> + * @size: the size of the range
>>> + *
>>> + * Returns: the found bit or, @size if no set bit is found.
>>> + */
>>> +unsigned long xb_find_set(struct xb *xb, unsigned long size,
>>> + unsigned long offset)
>>> +{
>>> + struct radix_tree_root *root = &xb->xbrt;
>>> + struct radix_tree_node *node;
>>> + void __rcu **slot;
>>> + struct ida_bitmap *bitmap;
>>> + unsigned long index = offset / IDA_BITMAP_BITS;
>>> + unsigned long index_end = size / IDA_BITMAP_BITS;
>>> + unsigned long bit = offset % IDA_BITMAP_BITS;
>>> +
>>> + if (unlikely(offset >= size))
>>> + return size;
>>> +
>>> + while (index <= index_end) {
>>> + unsigned long ret;
>>> + unsigned int nbits = size - index * IDA_BITMAP_BITS;
>>> +
>>> + bitmap = __radix_tree_lookup(root, index, &node, &slot);
>>> +
>>> + if (!node && !bitmap)
>>> + return size;
>>> +
>>> + if (bitmap) {
>>> + if (nbits > IDA_BITMAP_BITS)
>>> + nbits = IDA_BITMAP_BITS;
>>> +
>>> + ret = find_next_bit(bitmap->bitmap, nbits, bit);
>>> + if (ret != nbits)
>>> + return ret + index * IDA_BITMAP_BITS;
>>> + }
>>> + bit = 0;
>>> + index++;
>>> + }
>>> +
>>> + return size;
>>> +}
>>> +EXPORT_SYMBOL(xb_find_set);
>> This is going to be slower than the implementation I sent yesterday. If I
>> call:
>> xb_init(xb);
>> xb_set_bit(xb, ULONG_MAX);
>> xb_find_set(xb, ULONG_MAX, 0);
>>
>> it's going to call __radix_tree_lookup() 16 quadrillion times.
>> My implementation will walk the tree precisely once.
>>
> Yes. Wei's patch still can not work.
> We should start reviewing Matthew's implementation.
It runs without any issue on my machine. I didn't generate an "xbitmap"
executable (I just found adding xbitmap executable causes a build error
due to a Makefile error), instead, I tested it within "main" and it
passed all the tests.
Matthew has implemented a new version, let's start from there.
Best,
Wei
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-21 21:03 ` Matthew Wilcox
@ 2017-12-22 8:49 ` Wei Wang
2018-01-02 14:09 ` Matthew Wilcox
2017-12-23 2:59 ` Tetsuo Handa
1 sibling, 1 reply; 13+ messages in thread
From: Wei Wang @ 2017-12-22 8:49 UTC (permalink / raw)
To: Matthew Wilcox
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel
On 12/22/2017 05:03 AM, Matthew Wilcox wrote:
> OK, here's a rewrite of xbitmap.
>
> Compared to the version you sent:
> - xb_find_set() is the rewrite I sent out yesterday.
> - xb_find_clear() is a new implementation. I use the IDR_FREE tag to find
> clear bits. This led to me finding a bug in radix_tree_for_each_tagged().
> - xb_zero() is also a new implementation (replacing xb_clear_bit_range).
> It should also be more efficient in deep trees.
> - Did not accept your change to xb_set_bit(); I think it's important to
> leave the radix tree node in place, so we're guaranteed to make progress
> in low-memory situations. We're expecting the caller to retry, so the
> memory should not leak.
> - Redid a lot of the kernel-doc. Thanks for adding it! I removed mentions
> of implementation details, leaving only information useful to those who
> are using it.
>
> Compared to the version I put out back in February:
> - Accepted your change to xb_preload(); I think it's an improvement.
> - Rewrote xb_set_bit() to use the radix_tree_iter_ family of functions.
> Should improve performance for deep trees.
> - Rewrote xb_clear_bit() for the same reason.
> - Left out the use of radix tree exceptional entries. Thanks for taking
> them out! Keeps it clearer for now; if they prove useful, we can put
> them back in.
> - Removed the use of __radix_tree_delete and __radix_tree_create, so drop
> the changes to those functions.
>
> Other miscellaneous notes
> - I left xb_fill() in the header file, even though there's no implementation
> yet. Wouldn't be hard to add once we have a user.
> - Used SPDX tags instead of a license notice.
>
> I think we need more test cases ... in reviewing this to send out,
> I found a bug in xb_zero(), which I've only fixed because I don't have
> time to write a test case for it.
Thanks for the improvement. I also found a small bug in xb_zero. With
the following changes, it has passed the current test cases and tested
with the virtio-balloon usage without any issue.
> +
> +/**
> + * xb_zero() - Clear a range of bits in the XBitmap.
> + * @xb: The XBitmap.
> + * @min: The first bit to clear.
> + * @max: The last bit to clear.
> + *
> + * This function is used to clear a range of bits in the xbitmap.
> + */
> +void xb_zero(struct xb *xb, unsigned long min, unsigned long max)
> +{
> + struct radix_tree_root *root = &xb->xbrt;
> + struct radix_tree_iter iter;
> + void __rcu **slot;
> + struct ida_bitmap *bitmap;
> + unsigned long index = min / IDA_BITMAP_BITS;
> + unsigned long first = min % IDA_BITMAP_BITS;
> + unsigned long maxindex = max / IDA_BITMAP_BITS;
> +
> + radix_tree_for_each_slot(slot, root, &iter, index) {
> + unsigned long nbits = IDA_BITMAP_BITS;
> + if (index > maxindex)
> + break;
> + bitmap = radix_tree_deref_slot(slot);
> + if (!bitmap)
> + continue;
> + radix_tree_iter_tag_set(root, &iter, IDR_FREE);
> +
> + if (!first && iter.index < maxindex)
> + goto delete;
> + if (iter.index == maxindex)
> + nbits = max % IDA_BITMAP_BITS + 1;
> + bitmap_clear(bitmap->bitmap, first, nbits);
It should be: bitmap_clear(.., first, nbits - first);
> diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
> index fa7ee369b3c9..adf36e34dd77 100644
> --- a/tools/testing/radix-tree/Makefile
> +++ b/tools/testing/radix-tree/Makefile
> @@ -1,12 +1,13 @@
> # SPDX-License-Identifier: GPL-2.0
>
> CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
> -LDFLAGS += -fsanitize=address
> -LDLIBS+= -lpthread -lurcu
> -TARGETS = main idr-test multiorder
> +LDFLAGS += -fsanitize=address $(LDLIBS)
> +LDLIBS := -lpthread -lurcu
> +TARGETS = main idr-test multiorder xbitmap
> CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
> OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
> - tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
> + tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \
> + xbitmap.o
>
> ifndef SHIFT
> SHIFT=3
> @@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES)
>
> multiorder: multiorder.o $(CORE_OFILES)
>
> +xbitmap: xbitmap.o $(CORE_OFILES)
> + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap
> +
I think the "$(CC).." line should be removed, which will fix the build
error when adding "xbitmap" to TARGET.
Best,
Wei
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-21 21:03 ` Matthew Wilcox
2017-12-22 8:49 ` Wei Wang
@ 2017-12-23 2:59 ` Tetsuo Handa
2017-12-23 3:29 ` Matthew Wilcox
1 sibling, 1 reply; 13+ messages in thread
From: Tetsuo Handa @ 2017-12-23 2:59 UTC (permalink / raw)
To: willy, wei.w.wang
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox
Matthew Wilcox wrote:
> +/**
> + * xb_set_bit() - Set a bit in the XBitmap.
> + * @xb: The XBitmap.
> + * @bit: Index of the bit to set.
> + *
> + * This function is used to set a bit in the xbitmap.
> + *
> + * Return: 0 on success. -ENOMEM if memory could not be allocated.
> + */
> +int xb_set_bit(struct xb *xb, unsigned long bit)
> +{
> + unsigned long index = bit / IDA_BITMAP_BITS;
> + struct radix_tree_root *root = &xb->xbrt;
> + struct radix_tree_iter iter;
> + void __rcu **slot;
> + struct ida_bitmap *bitmap;
> +
> + bit %= IDA_BITMAP_BITS;
> + radix_tree_iter_init(&iter, index);
> + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> + if (IS_ERR(slot)) {
> + if (slot == ERR_PTR(-ENOSPC))
> + return 0; /* Already set */
Why already set? I guess something is there, but is it guaranteed that
there is a bitmap with the "bit" set?
> + return -ENOMEM;
> + }
> + bitmap = rcu_dereference_raw(*slot);
> + if (!bitmap) {
> + bitmap = this_cpu_xchg(ida_bitmap, NULL);
> + if (!bitmap)
> + return -ENOMEM;
I can't understand this. I can understand if it were
BUG_ON(!bitmap);
because you called xb_preload().
But
/*
* Regular test 2
* set bit 2000, 2001, 2040
* Next 1 in [0, 2048) --> 2000
* Next 1 in [2000, 2002) --> 2000
* Next 1 in [2002, 2041) --> 2040
* Next 1 in [2002, 2040) --> none
* Next 0 in [2000, 2048) --> 2002
* Next 0 in [2048, 2060) --> 2048
*/
xb_preload(GFP_KERNEL);
assert(!xb_set_bit(&xb1, 2000));
assert(!xb_set_bit(&xb1, 2001));
assert(!xb_set_bit(&xb1, 2040));
nbit = 0;
assert(xb_find_set(&xb1, 2048, &nbit) == true);
assert(nbit == 2000);
assert(xb_find_set(&xb1, 2002, &nbit) == true);
assert(nbit == 2000);
nbit = 2002;
assert(xb_find_set(&xb1, 2041, &nbit) == true);
assert(nbit == 2040);
nbit = 2002;
assert(xb_find_set(&xb1, 2040, &nbit) == true);
assert(nbit == 2040);
nbit = 2000;
assert(xb_find_zero(&xb1, 2048, &nbit) == true);
assert(nbit == 2002);
nbit = 2048;
assert(xb_find_zero(&xb1, 2060, &nbit) == true);
assert(nbit == 2048);
xb_zero(&xb1, 0, 2047);
nbit = 0;
assert(xb_find_set(&xb1, 2048, &nbit) == false);
assert(nbit == 0);
xb_preload_end();
you are not calling xb_preload() prior to each xb_set_bit() call.
This means that, if each xb_set_bit() is not surrounded with
xb_preload()/xb_preload_end(), there is possibility of hitting
this_cpu_xchg(ida_bitmap, NULL) == NULL.
If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
and get rid of xb_preload()/xb_preload_end().
You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which
means that the caller has to be prepared for allocation failure
when calling xb_set_bit(). Thus, there is no need to use preload
in order to avoid failing to allocate "bitmap".
Also, please clarify why it is OK to just return here.
I don't know what
radix_tree_iter_replace(root, &iter, slot, bitmap);
is doing. If you created a slot but did not assign "bitmap",
what the caller of xb_test_bit() etc. will find? If there is an
assumption about this slot, won't this cause a problem?
> + memset(bitmap, 0, sizeof(*bitmap));
> + radix_tree_iter_replace(root, &iter, slot, bitmap);
> + }
> +
> + __set_bit(bit, bitmap->bitmap);
> + if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
> + radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
> + return 0;
> +}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-23 2:59 ` Tetsuo Handa
@ 2017-12-23 3:29 ` Matthew Wilcox
2017-12-23 14:33 ` Tetsuo Handa
0 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-23 3:29 UTC (permalink / raw)
To: Tetsuo Handa
Cc: wei.w.wang, virtio-dev, linux-kernel, qemu-devel, virtualization,
kvm, linux-mm, mst, mhocko, akpm, mawilcox
On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
> > + bit %= IDA_BITMAP_BITS;
> > + radix_tree_iter_init(&iter, index);
> > + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> > + if (IS_ERR(slot)) {
> > + if (slot == ERR_PTR(-ENOSPC))
> > + return 0; /* Already set */
>
> Why already set? I guess something is there, but is it guaranteed that
> there is a bitmap with the "bit" set?
Yes. For radix trees tagged with IDR_RT_MARKER, newly created slots
have the IDR_FREE tag set. We only clear the IDR_FREE tag once the
bitmap is full. So if we try to find a free slot and the tag is clear,
we know the bitmap is full.
> > + bitmap = rcu_dereference_raw(*slot);
> > + if (!bitmap) {
> > + bitmap = this_cpu_xchg(ida_bitmap, NULL);
> > + if (!bitmap)
> > + return -ENOMEM;
>
> I can't understand this. I can understand if it were
>
> BUG_ON(!bitmap);
>
> because you called xb_preload().
>
> But
>
> /*
> * Regular test 2
> * set bit 2000, 2001, 2040
> * Next 1 in [0, 2048) --> 2000
> * Next 1 in [2000, 2002) --> 2000
> * Next 1 in [2002, 2041) --> 2040
> * Next 1 in [2002, 2040) --> none
> * Next 0 in [2000, 2048) --> 2002
> * Next 0 in [2048, 2060) --> 2048
> */
> xb_preload(GFP_KERNEL);
> assert(!xb_set_bit(&xb1, 2000));
> assert(!xb_set_bit(&xb1, 2001));
> assert(!xb_set_bit(&xb1, 2040));
[...]
> xb_preload_end();
>
> you are not calling xb_preload() prior to each xb_set_bit() call.
> This means that, if each xb_set_bit() is not surrounded with
> xb_preload()/xb_preload_end(), there is possibility of hitting
> this_cpu_xchg(ida_bitmap, NULL) == NULL.
This is just a lazy test. We "know" that the bits in the range 1024-2047
will all land in the same bitmap, so there's no need to preload for each
of them.
> If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
> you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
> and get rid of xb_preload()/xb_preload_end().
No, we can't. GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate
memory. There's no reason to fail the call if the user is in a context
where they can try harder to free memory.
> You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which
> means that the caller has to be prepared for allocation failure
> when calling xb_set_bit(). Thus, there is no need to use preload
> in order to avoid failing to allocate "bitmap".
xb_preload also preloads radix tree nodes.
> Also, please clarify why it is OK to just return here.
> I don't know what
>
> radix_tree_iter_replace(root, &iter, slot, bitmap);
>
> is doing. If you created a slot but did not assign "bitmap",
> what the caller of xb_test_bit() etc. will find? If there is an
> assumption about this slot, won't this cause a problem?
xb_test_bit will find NULL if bitmap wasn't assigned. That doesn't
harm anything.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-23 3:29 ` Matthew Wilcox
@ 2017-12-23 14:33 ` Tetsuo Handa
2017-12-23 14:58 ` Matthew Wilcox
2017-12-24 7:31 ` Wei Wang
0 siblings, 2 replies; 13+ messages in thread
From: Tetsuo Handa @ 2017-12-23 14:33 UTC (permalink / raw)
To: willy
Cc: wei.w.wang, virtio-dev, linux-kernel, qemu-devel, virtualization,
kvm, linux-mm, mst, mhocko, akpm, mawilcox
Matthew Wilcox wrote:
> On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:
> > Matthew Wilcox wrote:
> > > + bit %= IDA_BITMAP_BITS;
> > > + radix_tree_iter_init(&iter, index);
> > > + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> > > + if (IS_ERR(slot)) {
> > > + if (slot == ERR_PTR(-ENOSPC))
> > > + return 0; /* Already set */
> >
> > Why already set? I guess something is there, but is it guaranteed that
> > there is a bitmap with the "bit" set?
>
> Yes. For radix trees tagged with IDR_RT_MARKER, newly created slots
> have the IDR_FREE tag set. We only clear the IDR_FREE tag once the
> bitmap is full. So if we try to find a free slot and the tag is clear,
> we know the bitmap is full.
>
OK. But does using IDR_FREE tag have more benefit than cost?
You are doing
if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
for each xb_set_bit() call. How likely do we hit ERR_PTR(-ENOSPC) path?
Isn't removing both bitmap_full() and ERR_PTR(-ENOSPC) better?
> > > + bitmap = rcu_dereference_raw(*slot);
> > > + if (!bitmap) {
> > > + bitmap = this_cpu_xchg(ida_bitmap, NULL);
> > > + if (!bitmap)
> > > + return -ENOMEM;
> >
> > I can't understand this. I can understand if it were
> >
> > BUG_ON(!bitmap);
> >
> > because you called xb_preload().
> >
> > But
> >
> > /*
> > * Regular test 2
> > * set bit 2000, 2001, 2040
> > * Next 1 in [0, 2048) --> 2000
> > * Next 1 in [2000, 2002) --> 2000
> > * Next 1 in [2002, 2041) --> 2040
> > * Next 1 in [2002, 2040) --> none
> > * Next 0 in [2000, 2048) --> 2002
> > * Next 0 in [2048, 2060) --> 2048
> > */
> > xb_preload(GFP_KERNEL);
> > assert(!xb_set_bit(&xb1, 2000));
> > assert(!xb_set_bit(&xb1, 2001));
> > assert(!xb_set_bit(&xb1, 2040));
> [...]
> > xb_preload_end();
> >
> > you are not calling xb_preload() prior to each xb_set_bit() call.
> > This means that, if each xb_set_bit() is not surrounded with
> > xb_preload()/xb_preload_end(), there is possibility of hitting
> > this_cpu_xchg(ida_bitmap, NULL) == NULL.
>
> This is just a lazy test. We "know" that the bits in the range 1024-2047
> will all land in the same bitmap, so there's no need to preload for each
> of them.
Testcases also serves as how to use that API.
Assuming such thing leads to incorrect usage.
>
> > If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
> > you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
> > and get rid of xb_preload()/xb_preload_end().
>
> No, we can't. GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate
> memory. There's no reason to fail the call if the user is in a context
> where they can try harder to free memory.
But there is no reason to use GFP_NOWAIT at idr_get_free_cmn() if it is
safe to use GFP_KERNEL. If we don't require xb_preload() which forces
idr_get_free_cmn() to use GFP_NOWAIT due to possibility of preemption
disabled by xb_preload(), we can allow passing gfp flags to xb_set_bit().
>
> > You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which
> > means that the caller has to be prepared for allocation failure
> > when calling xb_set_bit(). Thus, there is no need to use preload
> > in order to avoid failing to allocate "bitmap".
>
> xb_preload also preloads radix tree nodes.
>
But it after all forces idr_get_free_cmn() to use GFP_NOWAIT, doesn't it?
Speak of initial user (i.e. virtio-balloon), xb_preload() won't be able to
use GFP_KERNEL in order to avoid OOM lockup. Therefore, I don't see
advantages with using xb_preload(). If xb_set_bit() receives gfp flags,
the caller can pass GFP_KERNEL if it is safe to use GFP_KERNEL.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-23 14:33 ` Tetsuo Handa
@ 2017-12-23 14:58 ` Matthew Wilcox
2017-12-24 7:31 ` Wei Wang
1 sibling, 0 replies; 13+ messages in thread
From: Matthew Wilcox @ 2017-12-23 14:58 UTC (permalink / raw)
To: Tetsuo Handa
Cc: wei.w.wang, virtio-dev, linux-kernel, qemu-devel, virtualization,
kvm, linux-mm, mst, mhocko, akpm, mawilcox
On Sat, Dec 23, 2017 at 11:33:45PM +0900, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
> > On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:
> > > Matthew Wilcox wrote:
> > > > + bit %= IDA_BITMAP_BITS;
> > > > + radix_tree_iter_init(&iter, index);
> > > > + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index);
> > > > + if (IS_ERR(slot)) {
> > > > + if (slot == ERR_PTR(-ENOSPC))
> > > > + return 0; /* Already set */
> > >
> > > Why already set? I guess something is there, but is it guaranteed that
> > > there is a bitmap with the "bit" set?
> >
> > Yes. For radix trees tagged with IDR_RT_MARKER, newly created slots
> > have the IDR_FREE tag set. We only clear the IDR_FREE tag once the
> > bitmap is full. So if we try to find a free slot and the tag is clear,
> > we know the bitmap is full.
> >
>
> OK. But does using IDR_FREE tag have more benefit than cost?
> You are doing
>
> if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
> radix_tree_iter_tag_clear(root, &iter, IDR_FREE);
>
> for each xb_set_bit() call. How likely do we hit ERR_PTR(-ENOSPC) path?
> Isn't removing both bitmap_full() and ERR_PTR(-ENOSPC) better?
You're assuming that the purpose of using IDR_FREE is to save xb_set_bit
from walking the tree unnecessarily. It isn't; that's just a happy
side-effect. Its main purpose is to make xb_find_zero() efficient. If
we have large ranges of set bits, xb_find_zero() will be able to skip them.
> > This is just a lazy test. We "know" that the bits in the range 1024-2047
> > will all land in the same bitmap, so there's no need to preload for each
> > of them.
>
> Testcases also serves as how to use that API.
> Assuming such thing leads to incorrect usage.
Sure. Would you like to submit a patch?
> > > If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed,
> > > you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN)
> > > and get rid of xb_preload()/xb_preload_end().
> >
> > No, we can't. GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate
> > memory. There's no reason to fail the call if the user is in a context
> > where they can try harder to free memory.
>
> But there is no reason to use GFP_NOWAIT at idr_get_free_cmn() if it is
> safe to use GFP_KERNEL. If we don't require xb_preload() which forces
> idr_get_free_cmn() to use GFP_NOWAIT due to possibility of preemption
> disabled by xb_preload(), we can allow passing gfp flags to xb_set_bit().
The assumption is that the user has done:
xb_preload(GFP_KERNEL);
spin_lock(my_lock);
xb_set_bit(xb, bit);
spin_unlock(my_lock);
xb_preload_end();
This is not the world's greatest interface. Once I have the XArray
finished, we'll be able to ditch the external spinlock and the preload
interface and be able to call:
xb_set_bit(xb, bit, GFP_KERNEL);
> > xb_preload also preloads radix tree nodes.
>
> But it after all forces idr_get_free_cmn() to use GFP_NOWAIT, doesn't it?
I think you don't understand how the radix tree allocates nodes. preloading
means that it will be able to access the nodes which were allocated earlier.
> Speak of initial user (i.e. virtio-balloon), xb_preload() won't be able to
> use GFP_KERNEL in order to avoid OOM lockup. Therefore, I don't see
> advantages with using xb_preload(). If xb_set_bit() receives gfp flags,
> the caller can pass GFP_KERNEL if it is safe to use GFP_KERNEL.
I haven't reviewed how virtio-balloon is using the interfaces.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-23 14:33 ` Tetsuo Handa
2017-12-23 14:58 ` Matthew Wilcox
@ 2017-12-24 7:31 ` Wei Wang
1 sibling, 0 replies; 13+ messages in thread
From: Wei Wang @ 2017-12-24 7:31 UTC (permalink / raw)
To: Tetsuo Handa, willy
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox
On 12/23/2017 10:33 PM, Tetsuo Handa wrote:
>>>> + bitmap = rcu_dereference_raw(*slot);
>>>> + if (!bitmap) {
>>>> + bitmap = this_cpu_xchg(ida_bitmap, NULL);
>>>> + if (!bitmap)
>>>> + return -ENOMEM;
>>> I can't understand this. I can understand if it were
>>>
>>> BUG_ON(!bitmap);
>>>
>>> because you called xb_preload().
>>>
>>> But
>>>
>>> /*
>>> * Regular test 2
>>> * set bit 2000, 2001, 2040
>>> * Next 1 in [0, 2048) --> 2000
>>> * Next 1 in [2000, 2002) --> 2000
>>> * Next 1 in [2002, 2041) --> 2040
>>> * Next 1 in [2002, 2040) --> none
>>> * Next 0 in [2000, 2048) --> 2002
>>> * Next 0 in [2048, 2060) --> 2048
>>> */
>>> xb_preload(GFP_KERNEL);
>>> assert(!xb_set_bit(&xb1, 2000));
>>> assert(!xb_set_bit(&xb1, 2001));
>>> assert(!xb_set_bit(&xb1, 2040));
>> [...]
>>> xb_preload_end();
>>>
>>> you are not calling xb_preload() prior to each xb_set_bit() call.
>>> This means that, if each xb_set_bit() is not surrounded with
>>> xb_preload()/xb_preload_end(), there is possibility of hitting
>>> this_cpu_xchg(ida_bitmap, NULL) == NULL.
>> This is just a lazy test. We "know" that the bits in the range 1024-2047
>> will all land in the same bitmap, so there's no need to preload for each
>> of them.
> Testcases also serves as how to use that API.
> Assuming such thing leads to incorrect usage.
If callers are aware that the bits that they going to record locate in
the same bitmap, I think they should also perform the xb_ APIs with only
one preload. So the test cases here have shown them a correct example.
We can probably add some comments above to explain this.
Best,
Wei
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2017-12-22 8:49 ` Wei Wang
@ 2018-01-02 14:09 ` Matthew Wilcox
2018-01-03 8:56 ` Wei Wang
0 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2018-01-02 14:09 UTC (permalink / raw)
To: Wei Wang
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel
On Fri, Dec 22, 2017 at 04:49:11PM +0800, Wei Wang wrote:
> Thanks for the improvement. I also found a small bug in xb_zero. With the
> following changes, it has passed the current test cases and tested with the
> virtio-balloon usage without any issue.
Thanks; I applied the change. Can you supply a test-case for testing
xb_zero please?
> > @@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES)
> > multiorder: multiorder.o $(CORE_OFILES)
> > +xbitmap: xbitmap.o $(CORE_OFILES)
> > + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap
> > +
>
> I think the "$(CC).." line should be removed, which will fix the build error
> when adding "xbitmap" to TARGET.
Not sure what build error you're talking about, but yes that CC line
should go. Thanks, deleted.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v20 3/7 RESEND] xbitmap: add more operations
2018-01-02 14:09 ` Matthew Wilcox
@ 2018-01-03 8:56 ` Wei Wang
0 siblings, 0 replies; 13+ messages in thread
From: Wei Wang @ 2018-01-03 8:56 UTC (permalink / raw)
To: Matthew Wilcox
Cc: virtio-dev, linux-kernel, qemu-devel, virtualization, kvm,
linux-mm, mst, mhocko, akpm, mawilcox, penguin-kernel
On 01/02/2018 10:09 PM, Matthew Wilcox wrote:
> On Fri, Dec 22, 2017 at 04:49:11PM +0800, Wei Wang wrote:
>> Thanks for the improvement. I also found a small bug in xb_zero. With the
>> following changes, it has passed the current test cases and tested with the
>> virtio-balloon usage without any issue.
> Thanks; I applied the change. Can you supply a test-case for testing
> xb_zero please?
>
Sure. Please check below the test cases. Do you plan to send out the new
version of xbitmap yourself? If so, I will wait for that to send out the
virtio-balloon patches.
static void xbitmap_check_zero_bits(void)
{
assert(xb_empty(&xb1));
/* Zero an empty xbitmap should work though no real work to do */
xb_zero(&xb1, 0, ULONG_MAX);
assert(xb_empty(&xb1));
xb_preload(GFP_KERNEL);
assert(xb_set_bit(&xb1, 0) == 0);
xb_preload_end();
/* Overflow test */
xb_zero(&xb1, ULONG_MAX - 10, ULONG_MAX);
assert(xb_test_bit(&xb1, 0));
xb_preload(GFP_KERNEL);
assert(xb_set_bit(&xb1, ULONG_MAX) == 0);
xb_preload_end();
xb_zero(&xb1, 0, ULONG_MAX);
assert(xb_empty(&xb1));
}
/*
* In the following tests, preload is called once when all the bits to set
* locate in the same ida bitmap. Otherwise, it is recommended to call
* preload for each xb_set_bit.
*/
static void xbitmap_check_bit_range(void)
{
unsigned long nbit = 0;
/* Regular test1: node = NULL */
xb_preload(GFP_KERNEL);
xb_set_bit(&xb1, 700);
xb_preload_end();
assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
assert(nbit == 700);
nbit++;
assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
assert(nbit == 701);
xb_zero(&xb1, 0, 1023);
/*
* Regular test2
* set bit 2000, 2001, 2040
* Next 1 in [0, 2048] --> 2000
* Next 1 in [2000, 2002] --> 2000
* Next 1 in [2002, 2040] --> 2040
* Next 1 in [2002, 2039] --> none
* Next 0 in [2000, 2048] --> 2002
* Next 0 in [2048, 2060] --> 2048
*/
xb_preload(GFP_KERNEL);
assert(!xb_set_bit(&xb1, 2000));
assert(!xb_set_bit(&xb1, 2001));
assert(!xb_set_bit(&xb1, 2040));
nbit = 0;
assert(xb_find_set(&xb1, 2048, &nbit) == true);
assert(nbit == 2000);
assert(xb_find_set(&xb1, 2002, &nbit) == true);
assert(nbit == 2000);
nbit = 2002;
assert(xb_find_set(&xb1, 2040, &nbit) == true);
assert(nbit == 2040);
nbit = 2002;
assert(xb_find_set(&xb1, 2039, &nbit) == false);
assert(nbit == 2002);
nbit = 2000;
assert(xb_find_zero(&xb1, 2048, &nbit) == true);
assert(nbit == 2002);
nbit = 2048;
assert(xb_find_zero(&xb1, 2060, &nbit) == true);
assert(nbit == 2048);
xb_zero(&xb1, 0, 2048);
nbit = 0;
assert(xb_find_set(&xb1, 2048, &nbit) == false);
assert(nbit == 0);
xb_preload_end();
/*
* Overflow tests:
* Set bit 1 and ULONG_MAX - 4
* Next 1 in [0, ULONG_MAX] --> 1
* Next 1 in [1, ULONG_MAX] --> 1
* Next 1 in [2, ULONG_MAX] --> ULONG_MAX - 4
* Next 1 in [ULONG_MAX - 3, 2] --> none
* Next 0 in [ULONG_MAX - 4, ULONG_MAX] --> ULONG_MAX - 3
* Zero [ULONG_MAX - 4, ULONG_MAX]
* Next 1 in [ULONG_MAX - 10, ULONG_MAX] --> none
* Next 1 in [ULONG_MAX - 1, 2] --> none
* Zero [0, 1]
* Next 1 in [0, 2] --> none
*/
xb_preload(GFP_KERNEL);
assert(!xb_set_bit(&xb1, 1));
xb_preload_end();
xb_preload(GFP_KERNEL);
assert(!xb_set_bit(&xb1, ULONG_MAX - 4));
nbit = 0;
assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
assert(nbit == 1);
nbit = 1;
assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
assert(nbit == 1);
nbit = 2;
assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true);
assert(nbit == ULONG_MAX - 4);
nbit++;
assert(xb_find_set(&xb1, 2, &nbit) == false);
assert(nbit == ULONG_MAX - 3);
nbit--;
assert(xb_find_zero(&xb1, ULONG_MAX, &nbit) == true);
assert(nbit == ULONG_MAX - 3);
xb_zero(&xb1, ULONG_MAX - 4, ULONG_MAX);
nbit = ULONG_MAX - 10;
assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false);
assert(nbit == ULONG_MAX - 10);
nbit = ULONG_MAX - 1;
assert(xb_find_set(&xb1, 2, &nbit) == false);
xb_zero(&xb1, 0, 1);
nbit = 0;
assert(xb_find_set(&xb1, 2, &nbit) == false);
assert(nbit == 0);
xb_preload_end();
assert(xb_empty(&xb1));
}
Best,
Wei
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2018-01-03 8:54 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-21 2:30 [PATCH v20 3/7 RESEND] xbitmap: add more operations Wei Wang
2017-12-21 14:18 ` Matthew Wilcox
2017-12-21 14:37 ` Tetsuo Handa
2017-12-22 8:45 ` Wei Wang
2017-12-21 21:03 ` Matthew Wilcox
2017-12-22 8:49 ` Wei Wang
2018-01-02 14:09 ` Matthew Wilcox
2018-01-03 8:56 ` Wei Wang
2017-12-23 2:59 ` Tetsuo Handa
2017-12-23 3:29 ` Matthew Wilcox
2017-12-23 14:33 ` Tetsuo Handa
2017-12-23 14:58 ` Matthew Wilcox
2017-12-24 7:31 ` Wei Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).