linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] Rebasing the IDR
@ 2017-11-30 17:36 Matthew Wilcox
  2017-11-30 19:56 ` Daniel Vetter
  2017-12-11 10:54 ` Chris Wilson
  0 siblings, 2 replies; 6+ messages in thread
From: Matthew Wilcox @ 2017-11-30 17:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tejun Heo, Eric Biggers, Daniel Vetter, Chris Mi

About 40 of the approximately 180 users of the IDR in the kernel are
"1-based" instead of "0-based".  That is, they never want to have ID 0
allocated; they want to see IDs allocated between 1 and N.  Usually, that's
expressed like this:

        /* Get the user-visible handle using idr. */
        ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);

The current implementation of this grieves me.  You see, we mark each
node of the radix tree according to whether it has any free entries
or not, and entry 0 is always free!  If we've already allocated 10,000
entries from this IDR, and see this call, we'll walk all the way down
the left side of the tree looking for entry 1, get to the bottom,
see that entries 1-63 are allocated, then walk up to the next level,
see that 64-4095 are allocated, walk up to the next level, see that
8192-12287 has a free entry, and then start walking down again.

It'd be somewhere around two to three times quicker to know not to
look down the left hand side of the tree, see that 1-8191 are used and
start looking in the range 8192-12287.  I considered a function like
idr_reserve(idr, N) to reserve the first N entries (we have at least one
consumer in the tree that is 2-based, not 1-based), but that struck me
as inelegant.  We also have the cool optimisation that if there's only
one element in the radix tree at offset 0, then we don't even need
to allocate a node to store it, we just store it right in the head.
And that optimisation was never available to the 1-based users.

I've come up with this solution instead, idr_base.  It's free in terms
of memory consumption on 64-bit as it fits in the gap at the end of the
struct idr.  Essentially, we just subtract off idr_base when looking
up the ID, and add it back on when reporting the ID.  We need to do some
saturating arithmetic in idr_get_next(), because starting a walk at 4
billion or negative 8 quintillion doesn't work out terribly well.

It does require the user to call idr_init_base() instead of idr_init().
That should be a relatively small conversion effort.  Opinions?  Feedback?
Better test cases for the test-suite (ahem)?

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 91d27a9bcdf4..a657b1f925f8 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -18,6 +18,7 @@
 
 struct idr {
 	struct radix_tree_root	idr_rt;
+	unsigned int		idr_base;
 	unsigned int		idr_next;
 };
 
@@ -30,9 +31,10 @@ struct idr {
 /* Set the IDR flag and the IDR_FREE tag */
 #define IDR_RT_MARKER		((__force gfp_t)(3 << __GFP_BITS_SHIFT))
 
-#define IDR_INIT							\
-{									\
-	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)			\
+#define IDR_INIT {							\
+	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER),			\
+	.idr_base = 0,							\
+	.idr_next = 0,							\
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT
 
@@ -123,12 +125,20 @@ static inline int __must_check idr_alloc_u32(struct idr *idr, void *ptr,
 
 static inline void *idr_remove(struct idr *idr, unsigned long id)
 {
-	return radix_tree_delete_item(&idr->idr_rt, id, NULL);
+	return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
 }
 
 static inline void idr_init(struct idr *idr)
 {
 	INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
+	idr->idr_base = 0;
+	idr->idr_next = 0;
+}
+
+static inline void idr_init_base(struct idr *idr, int base)
+{
+	INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
+	idr->idr_base = base;
 	idr->idr_next = 0;
 }
 
@@ -163,7 +173,7 @@ static inline void idr_preload_end(void)
  */
 static inline void *idr_find(const struct idr *idr, unsigned long id)
 {
-	return radix_tree_lookup(&idr->idr_rt, id);
+	return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
 }
 
 /**
diff --git a/lib/idr.c b/lib/idr.c
index 1aaeb5a8c426..a1d3531bd62f 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -33,21 +33,22 @@ int idr_alloc_ul(struct idr *idr, void *ptr, unsigned long *nextid,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
 	if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
 		idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
 
-	radix_tree_iter_init(&iter, *nextid);
-	slot = idr_get_free(&idr->idr_rt, &iter, gfp, max);
+	radix_tree_iter_init(&iter, *nextid - base);
+	slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
 	if (IS_ERR(slot))
 		return PTR_ERR(slot);
 
 	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
 	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
 
-	*nextid = iter.index;
+	*nextid = iter.index + base;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(idr_alloc_ul);
@@ -143,13 +144,14 @@ int idr_for_each(const struct idr *idr,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
 		int ret;
 
 		if (WARN_ON_ONCE(iter.index > INT_MAX))
 			break;
-		ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+		ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
 		if (ret)
 			return ret;
 	}
@@ -172,15 +174,19 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
+	int id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
+	id = iter.index + base;
 
-	if (WARN_ON_ONCE(iter.index > INT_MAX))
+	if (WARN_ON_ONCE(id > INT_MAX))
 		return NULL;
 
-	*nextid = iter.index;
+	*nextid = id;
 	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
@@ -199,12 +205,15 @@ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
 
-	*nextid = iter.index;
+	*nextid = iter.index + base;
 	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next_ul);
@@ -231,6 +240,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
+	id -= idr->idr_base;
 
 	entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 36437ade429c..44ef9eba5a7a 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -153,11 +153,12 @@ void idr_nowait_test(void)
 	idr_destroy(&idr);
 }
 
-void idr_get_next_test(void)
+void idr_get_next_test(int base)
 {
 	unsigned long i;
 	int nextid;
 	DEFINE_IDR(idr);
+	idr_init_base(&idr, base);
 
 	int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
 
@@ -244,7 +245,9 @@ void idr_checks(void)
 	idr_alloc_test();
 	idr_null_test();
 	idr_nowait_test();
-	idr_get_next_test();
+	idr_get_next_test(0);
+	idr_get_next_test(1);
+	idr_get_next_test(4);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC] Rebasing the IDR
  2017-11-30 17:36 [RFC] Rebasing the IDR Matthew Wilcox
@ 2017-11-30 19:56 ` Daniel Vetter
  2017-11-30 21:09   ` Matthew Wilcox
  2017-12-11 10:54 ` Chris Wilson
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Vetter @ 2017-11-30 19:56 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linux Kernel Mailing List, Tejun Heo, Eric Biggers, Chris Mi, dri-devel

Adding dri-devel, I think a pile of those are in drm.

On Thu, Nov 30, 2017 at 6:36 PM, Matthew Wilcox <willy@infradead.org> wrote:
> About 40 of the approximately 180 users of the IDR in the kernel are
> "1-based" instead of "0-based".  That is, they never want to have ID 0
> allocated; they want to see IDs allocated between 1 and N.  Usually, that's
> expressed like this:
>
>         /* Get the user-visible handle using idr. */
>         ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
>
> The current implementation of this grieves me.  You see, we mark each
> node of the radix tree according to whether it has any free entries
> or not, and entry 0 is always free!  If we've already allocated 10,000
> entries from this IDR, and see this call, we'll walk all the way down
> the left side of the tree looking for entry 1, get to the bottom,
> see that entries 1-63 are allocated, then walk up to the next level,
> see that 64-4095 are allocated, walk up to the next level, see that
> 8192-12287 has a free entry, and then start walking down again.
>
> It'd be somewhere around two to three times quicker to know not to
> look down the left hand side of the tree, see that 1-8191 are used and
> start looking in the range 8192-12287.  I considered a function like
> idr_reserve(idr, N) to reserve the first N entries (we have at least one
> consumer in the tree that is 2-based, not 1-based), but that struck me
> as inelegant.  We also have the cool optimisation that if there's only
> one element in the radix tree at offset 0, then we don't even need
> to allocate a node to store it, we just store it right in the head.
> And that optimisation was never available to the 1-based users.
>
> I've come up with this solution instead, idr_base.  It's free in terms
> of memory consumption on 64-bit as it fits in the gap at the end of the
> struct idr.  Essentially, we just subtract off idr_base when looking
> up the ID, and add it back on when reporting the ID.  We need to do some
> saturating arithmetic in idr_get_next(), because starting a walk at 4
> billion or negative 8 quintillion doesn't work out terribly well.
>
> It does require the user to call idr_init_base() instead of idr_init().
> That should be a relatively small conversion effort.  Opinions?  Feedback?
> Better test cases for the test-suite (ahem)?

Just quick context on why we do this: We need to marshal/demarshal
NULL in a ton of our ioctls, mapping that to 0 is natural.

And yes I very much like this, and totally fine with trading an
idr_init_base when we can nuke the explicit index ranges at every
alloc side instead. So if you give me an idr_alloc function that
doesn't take a range as icing, that would be great.

Cheers, Daniel

>
> diff --git a/include/linux/idr.h b/include/linux/idr.h
> index 91d27a9bcdf4..a657b1f925f8 100644
> --- a/include/linux/idr.h
> +++ b/include/linux/idr.h
> @@ -18,6 +18,7 @@
>
>  struct idr {
>         struct radix_tree_root  idr_rt;
> +       unsigned int            idr_base;
>         unsigned int            idr_next;
>  };
>
> @@ -30,9 +31,10 @@ struct idr {
>  /* Set the IDR flag and the IDR_FREE tag */
>  #define IDR_RT_MARKER          ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
>
> -#define IDR_INIT                                                       \
> -{                                                                      \
> -       .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)                        \
> +#define IDR_INIT {                                                     \
> +       .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER),                       \
> +       .idr_base = 0,                                                  \
> +       .idr_next = 0,                                                  \
>  }
>  #define DEFINE_IDR(name)       struct idr name = IDR_INIT
>
> @@ -123,12 +125,20 @@ static inline int __must_check idr_alloc_u32(struct idr *idr, void *ptr,
>
>  static inline void *idr_remove(struct idr *idr, unsigned long id)
>  {
> -       return radix_tree_delete_item(&idr->idr_rt, id, NULL);
> +       return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
>  }
>
>  static inline void idr_init(struct idr *idr)
>  {
>         INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
> +       idr->idr_base = 0;
> +       idr->idr_next = 0;
> +}
> +
> +static inline void idr_init_base(struct idr *idr, int base)
> +{
> +       INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
> +       idr->idr_base = base;
>         idr->idr_next = 0;
>  }
>
> @@ -163,7 +173,7 @@ static inline void idr_preload_end(void)
>   */
>  static inline void *idr_find(const struct idr *idr, unsigned long id)
>  {
> -       return radix_tree_lookup(&idr->idr_rt, id);
> +       return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
>  }
>
>  /**
> diff --git a/lib/idr.c b/lib/idr.c
> index 1aaeb5a8c426..a1d3531bd62f 100644
> --- a/lib/idr.c
> +++ b/lib/idr.c
> @@ -33,21 +33,22 @@ int idr_alloc_ul(struct idr *idr, void *ptr, unsigned long *nextid,
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       int base = idr->idr_base;
>
>         if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
>                 return -EINVAL;
>         if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
>                 idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
>
> -       radix_tree_iter_init(&iter, *nextid);
> -       slot = idr_get_free(&idr->idr_rt, &iter, gfp, max);
> +       radix_tree_iter_init(&iter, *nextid - base);
> +       slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
>         if (IS_ERR(slot))
>                 return PTR_ERR(slot);
>
>         radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
>         radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
>
> -       *nextid = iter.index;
> +       *nextid = iter.index + base;
>         return 0;
>  }
>  EXPORT_SYMBOL_GPL(idr_alloc_ul);
> @@ -143,13 +144,14 @@ int idr_for_each(const struct idr *idr,
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       int base = idr->idr_base;
>
>         radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
>                 int ret;
>
>                 if (WARN_ON_ONCE(iter.index > INT_MAX))
>                         break;
> -               ret = fn(iter.index, rcu_dereference_raw(*slot), data);
> +               ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
>                 if (ret)
>                         return ret;
>         }
> @@ -172,15 +174,19 @@ void *idr_get_next(struct idr *idr, int *nextid)
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       int base = idr->idr_base;
> +       int id = *nextid;
>
> -       slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
> +       id = (id < base) ? 0 : id - base;
> +       slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
>         if (!slot)
>                 return NULL;
> +       id = iter.index + base;
>
> -       if (WARN_ON_ONCE(iter.index > INT_MAX))
> +       if (WARN_ON_ONCE(id > INT_MAX))
>                 return NULL;
>
> -       *nextid = iter.index;
> +       *nextid = id;
>         return rcu_dereference_raw(*slot);
>  }
>  EXPORT_SYMBOL(idr_get_next);
> @@ -199,12 +205,15 @@ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
>  {
>         struct radix_tree_iter iter;
>         void __rcu **slot;
> +       unsigned long base = idr->idr_base;
> +       unsigned long id = *nextid;
>
> -       slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
> +       id = (id < base) ? 0 : id - base;
> +       slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
>         if (!slot)
>                 return NULL;
>
> -       *nextid = iter.index;
> +       *nextid = iter.index + base;
>         return rcu_dereference_raw(*slot);
>  }
>  EXPORT_SYMBOL(idr_get_next_ul);
> @@ -231,6 +240,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
>
>         if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
>                 return ERR_PTR(-EINVAL);
> +       id -= idr->idr_base;
>
>         entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
>         if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
> diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
> index 36437ade429c..44ef9eba5a7a 100644
> --- a/tools/testing/radix-tree/idr-test.c
> +++ b/tools/testing/radix-tree/idr-test.c
> @@ -153,11 +153,12 @@ void idr_nowait_test(void)
>         idr_destroy(&idr);
>  }
>
> -void idr_get_next_test(void)
> +void idr_get_next_test(int base)
>  {
>         unsigned long i;
>         int nextid;
>         DEFINE_IDR(idr);
> +       idr_init_base(&idr, base);
>
>         int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
>
> @@ -244,7 +245,9 @@ void idr_checks(void)
>         idr_alloc_test();
>         idr_null_test();
>         idr_nowait_test();
> -       idr_get_next_test();
> +       idr_get_next_test(0);
> +       idr_get_next_test(1);
> +       idr_get_next_test(4);
>  }
>
>  /*



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] Rebasing the IDR
  2017-11-30 19:56 ` Daniel Vetter
@ 2017-11-30 21:09   ` Matthew Wilcox
  2017-12-03 12:09     ` Daniel Vetter
  0 siblings, 1 reply; 6+ messages in thread
From: Matthew Wilcox @ 2017-11-30 21:09 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Linux Kernel Mailing List, Tejun Heo, Eric Biggers, Chris Mi, dri-devel

On Thu, Nov 30, 2017 at 08:56:43PM +0100, Daniel Vetter wrote:
> Adding dri-devel, I think a pile of those are in drm.

Yeah, quite a lot!  This is a good thing; means you didn't invent your
own custom ID allocator.

> On Thu, Nov 30, 2017 at 6:36 PM, Matthew Wilcox <willy@infradead.org> wrote:
> > About 40 of the approximately 180 users of the IDR in the kernel are
> > "1-based" instead of "0-based".  That is, they never want to have ID 0
> > allocated; they want to see IDs allocated between 1 and N.  Usually, that's
> > expressed like this:
> >
> >         /* Get the user-visible handle using idr. */
> >         ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
> >

> Just quick context on why we do this: We need to marshal/demarshal
> NULL in a ton of our ioctls, mapping that to 0 is natural.

Yep.  You could actually store NULL at IDR index 0; the IDR distinguishes
between an allocated NULL and an unallocated entry.

> And yes I very much like this, and totally fine with trading an
> idr_init_base when we can nuke the explicit index ranges at every
> alloc side instead. So if you give me an idr_alloc function that
> doesn't take a range as icing, that would be great.

I think the API is definitely bad at making the easy things easy ... I'd
call the current idr_alloc() idr_alloc_range().  But I don't want to change
200+ call sites, so I'll settle for a new name.

	ret = idr_get(&file_priv->object_idr, obj, GFP_KERNEL);

I have another new API in the works for after the xarray lands and we can
simplify the locking --

int __must_check idr_alloc_u32(struct idr *idr, void *ptr,
			unsigned int *nextid, unsigned int max, gfp_t gfp)

The trick is that we store to nextid before inserting the ptr into the
xarray, so this will enable more users to dispense with their own locking
(or avoid awful i-looked-up-this-object-but-it's-not-initialised-so-
pretend-i-didn't-see-it dances).

(There's also an idr_alloc_ul for completeness, but I think this is
actually a bad idea; the radix tree gets pretty unwieldy at large sparse
indices and I don't want to encourage people to go outside unsigned int).

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] Rebasing the IDR
  2017-11-30 21:09   ` Matthew Wilcox
@ 2017-12-03 12:09     ` Daniel Vetter
  0 siblings, 0 replies; 6+ messages in thread
From: Daniel Vetter @ 2017-12-03 12:09 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linux Kernel Mailing List, Tejun Heo, Eric Biggers, Chris Mi, dri-devel

On Thu, Nov 30, 2017 at 10:09 PM, Matthew Wilcox <willy@infradead.org> wrote:
> On Thu, Nov 30, 2017 at 08:56:43PM +0100, Daniel Vetter wrote:
>> Adding dri-devel, I think a pile of those are in drm.
>
> Yeah, quite a lot!  This is a good thing; means you didn't invent your
> own custom ID allocator.
>
>> On Thu, Nov 30, 2017 at 6:36 PM, Matthew Wilcox <willy@infradead.org> wrote:
>> > About 40 of the approximately 180 users of the IDR in the kernel are
>> > "1-based" instead of "0-based".  That is, they never want to have ID 0
>> > allocated; they want to see IDs allocated between 1 and N.  Usually, that's
>> > expressed like this:
>> >
>> >         /* Get the user-visible handle using idr. */
>> >         ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
>> >
>
>> Just quick context on why we do this: We need to marshal/demarshal
>> NULL in a ton of our ioctls, mapping that to 0 is natural.
>
> Yep.  You could actually store NULL at IDR index 0; the IDR distinguishes
> between an allocated NULL and an unallocated entry.

We're storing NULL in a few interim cases already, to solve issues
around init/teardown of objects (where we need to keep the index
reserved, but not pointing to anything). So if I could pick, I'd
prefer to distinguish that from the always-NULL case for 0. Since
doing that would mean in our driver unload code we'd then have to
check for that NULL pointer in the usual cleanup loops (where we know
all the interim NULL values are definitely gone).

>> And yes I very much like this, and totally fine with trading an
>> idr_init_base when we can nuke the explicit index ranges at every
>> alloc side instead. So if you give me an idr_alloc function that
>> doesn't take a range as icing, that would be great.
>
> I think the API is definitely bad at making the easy things easy ... I'd
> call the current idr_alloc() idr_alloc_range().  But I don't want to change
> 200+ call sites, so I'll settle for a new name.
>
>         ret = idr_get(&file_priv->object_idr, obj, GFP_KERNEL);
>
> I have another new API in the works for after the xarray lands and we can
> simplify the locking --
>
> int __must_check idr_alloc_u32(struct idr *idr, void *ptr,
>                         unsigned int *nextid, unsigned int max, gfp_t gfp)
>
> The trick is that we store to nextid before inserting the ptr into the
> xarray, so this will enable more users to dispense with their own locking
> (or avoid awful i-looked-up-this-object-but-it's-not-initialised-so-
> pretend-i-didn't-see-it dances).
>
> (There's also an idr_alloc_ul for completeness, but I think this is
> actually a bad idea; the radix tree gets pretty unwieldy at large sparse
> indices and I don't want to encourage people to go outside unsigned int).

+1 on idr_get. A bit confusing since get/put is usually used for
refcounting in the kernel (we're switching the drm subsystem from
ref/unref to get/put for more consistency), but oh well, flag days are
not pretty either.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] Rebasing the IDR
  2017-11-30 17:36 [RFC] Rebasing the IDR Matthew Wilcox
  2017-11-30 19:56 ` Daniel Vetter
@ 2017-12-11 10:54 ` Chris Wilson
  2018-02-09 19:46   ` Matthew Wilcox
  1 sibling, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2017-12-11 10:54 UTC (permalink / raw)
  To: Matthew Wilcox, linux-kernel
  Cc: Tejun Heo, Eric Biggers, Daniel Vetter, Chris Mi

Quoting Matthew Wilcox (2017-11-30 17:36:30)
> About 40 of the approximately 180 users of the IDR in the kernel are
> "1-based" instead of "0-based".  That is, they never want to have ID 0
> allocated; they want to see IDs allocated between 1 and N.  Usually, that's
> expressed like this:
> 
>         /* Get the user-visible handle using idr. */
>         ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
> 
> The current implementation of this grieves me.  You see, we mark each
> node of the radix tree according to whether it has any free entries
> or not, and entry 0 is always free!  If we've already allocated 10,000
> entries from this IDR, and see this call, we'll walk all the way down
> the left side of the tree looking for entry 1, get to the bottom,
> see that entries 1-63 are allocated, then walk up to the next level,
> see that 64-4095 are allocated, walk up to the next level, see that
> 8192-12287 has a free entry, and then start walking down again.

Hmm, missing the baseline to apply this patch. But I did the quick hack
of allocating index 0 of the idr and that did eradicate the
idr_get_free_cmn() from being at the top of the profiles for the
many-object stress tests. This improvement will be much appreciated.
-Chris

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] Rebasing the IDR
  2017-12-11 10:54 ` Chris Wilson
@ 2018-02-09 19:46   ` Matthew Wilcox
  0 siblings, 0 replies; 6+ messages in thread
From: Matthew Wilcox @ 2018-02-09 19:46 UTC (permalink / raw)
  To: Chris Wilson
  Cc: linux-kernel, Tejun Heo, Eric Biggers, Daniel Vetter, Chris Mi

On Mon, Dec 11, 2017 at 10:54:51AM +0000, Chris Wilson wrote:
> Quoting Matthew Wilcox (2017-11-30 17:36:30)
> > About 40 of the approximately 180 users of the IDR in the kernel are
> > "1-based" instead of "0-based".  That is, they never want to have ID 0
> > allocated; they want to see IDs allocated between 1 and N.  Usually, that's
> > expressed like this:
> > 
> >         /* Get the user-visible handle using idr. */
> >         ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_KERNEL);
> > 
> > The current implementation of this grieves me.  You see, we mark each
> > node of the radix tree according to whether it has any free entries
> > or not, and entry 0 is always free!  If we've already allocated 10,000
> > entries from this IDR, and see this call, we'll walk all the way down
> > the left side of the tree looking for entry 1, get to the bottom,
> > see that entries 1-63 are allocated, then walk up to the next level,
> > see that 64-4095 are allocated, walk up to the next level, see that
> > 8192-12287 has a free entry, and then start walking down again.
> 
> Hmm, missing the baseline to apply this patch. But I did the quick hack
> of allocating index 0 of the idr and that did eradicate the
> idr_get_free_cmn() from being at the top of the profiles for the
> many-object stress tests. This improvement will be much appreciated.

Hi Chris,

The new IDR implementation is now in Linus' tree.  For any IDR that
you want to be 1s based, initialise it using idr_init_base() instead of
idr_init() and it will magically be more efficient.

I did a quick example on the first IDR which matched
$ git grep 'idr_alloc.*, 1,' drivers/gpu

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index bd6e9a40f421..c35ea6695fee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -844,7 +844,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	}
 
 	mutex_init(&fpriv->bo_list_lock);
-	idr_init(&fpriv->bo_list_handles);
+	idr_init_base(&fpriv->bo_list_handles, 1);
 
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 

Let me know if you run into any problems using it; there are test-cases in
the test-suite, but I may have missed something.

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-02-09 19:46 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-30 17:36 [RFC] Rebasing the IDR Matthew Wilcox
2017-11-30 19:56 ` Daniel Vetter
2017-11-30 21:09   ` Matthew Wilcox
2017-12-03 12:09     ` Daniel Vetter
2017-12-11 10:54 ` Chris Wilson
2018-02-09 19:46   ` Matthew Wilcox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).