All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] mm/vmap: remove "node" argument
@ 2019-05-22 15:09 Uladzislau Rezki (Sony)
  2019-05-22 15:09 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Uladzislau Rezki (Sony)
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Uladzislau Rezki (Sony) @ 2019-05-22 15:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Roman Gushchin, Uladzislau Rezki, Michal Hocko, Matthew Wilcox,
	linux-mm, LKML, Thomas Garnier, Oleksiy Avramchenko,
	Steven Rostedt, Joel Fernandes, Thomas Gleixner, Ingo Molnar,
	Tejun Heo

Remove unused argument from the __alloc_vmap_area() function.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c42872ed82ac..ea1b65fac599 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -985,7 +985,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
  */
 static __always_inline unsigned long
 __alloc_vmap_area(unsigned long size, unsigned long align,
-	unsigned long vstart, unsigned long vend, int node)
+	unsigned long vstart, unsigned long vend)
 {
 	unsigned long nva_start_addr;
 	struct vmap_area *va;
@@ -1062,7 +1062,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	 * If an allocation fails, the "vend" address is
 	 * returned. Therefore trigger the overflow path.
 	 */
-	addr = __alloc_vmap_area(size, align, vstart, vend, node);
+	addr = __alloc_vmap_area(size, align, vstart, vend);
 	if (unlikely(addr == vend))
 		goto overflow;
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose
  2019-05-22 15:09 [PATCH 1/4] mm/vmap: remove "node" argument Uladzislau Rezki (Sony)
@ 2019-05-22 15:09 ` Uladzislau Rezki (Sony)
  2019-05-22 18:19   ` Andrew Morton
  2019-05-22 15:09 ` [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge Uladzislau Rezki (Sony)
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Uladzislau Rezki (Sony) @ 2019-05-22 15:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Roman Gushchin, Uladzislau Rezki, Michal Hocko, Matthew Wilcox,
	linux-mm, LKML, Thomas Garnier, Oleksiy Avramchenko,
	Steven Rostedt, Joel Fernandes, Thomas Gleixner, Ingo Molnar,
	Tejun Heo

Introduce ne_fit_preload()/ne_fit_preload_end() functions
for preloading one extra vmap_area object to ensure that
we have it available when fit type is NE_FIT_TYPE.

The preload is done per CPU and with GFP_KERNEL permissive
allocation masks, which allow to be more stable under low
memory condition and high memory pressure.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 3 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ea1b65fac599..5302e1b79c7b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -364,6 +364,13 @@ static LIST_HEAD(free_vmap_area_list);
  */
 static struct rb_root free_vmap_area_root = RB_ROOT;
 
+/*
+ * Preload a CPU with one object for "no edge" split case. The
+ * aim is to get rid of allocations from the atomic context, thus
+ * to use more permissive allocation masks.
+ */
+static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
+
 static __always_inline unsigned long
 va_size(struct vmap_area *va)
 {
@@ -950,9 +957,24 @@ adjust_va_to_fit_type(struct vmap_area *va,
 		 *   L V  NVA  V R
 		 * |---|-------|---|
 		 */
-		lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
-		if (unlikely(!lva))
-			return -1;
+		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
+		if (unlikely(!lva)) {
+			/*
+			 * For percpu allocator we do not do any pre-allocation
+			 * and leave it as it is. The reason is it most likely
+			 * never ends up with NE_FIT_TYPE splitting. In case of
+			 * percpu allocations offsets and sizes are aligned to
+			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
+			 * are its main fitting cases.
+			 *
+			 * There are few exceptions though, as en example it is
+			 * a first allocation(early boot up) when we have "one"
+			 * big free space that has to be split.
+			 */
+			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
+			if (!lva)
+				return -1;
+		}
 
 		/*
 		 * Build the remainder.
@@ -1023,6 +1045,50 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
 }
 
 /*
+ * Preload this CPU with one extra vmap_area object to ensure
+ * that we have it available when fit type of free area is
+ * NE_FIT_TYPE.
+ *
+ * The preload is done in non-atomic context thus, it allows us
+ * to use more permissive allocation masks, therefore to be more
+ * stable under low memory condition and high memory pressure.
+ *
+ * If success, it returns zero with preemption disabled. In case
+ * of error, (-ENOMEM) is returned with preemption not disabled.
+ * Note it has to be paired with alloc_vmap_area_preload_end().
+ */
+static void
+ne_fit_preload(int *preloaded)
+{
+	preempt_disable();
+
+	if (!__this_cpu_read(ne_fit_preload_node)) {
+		struct vmap_area *node;
+
+		preempt_enable();
+		node = kmem_cache_alloc(vmap_area_cachep, GFP_KERNEL);
+		if (node == NULL) {
+			*preloaded = 0;
+			return;
+		}
+
+		preempt_disable();
+
+		if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, node))
+			kmem_cache_free(vmap_area_cachep, node);
+	}
+
+	*preloaded = 1;
+}
+
+static void
+ne_fit_preload_end(int preloaded)
+{
+	if (preloaded)
+		preempt_enable();
+}
+
+/*
  * Allocate a region of KVA of the specified size and alignment, within the
  * vstart and vend.
  */
@@ -1034,6 +1100,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	struct vmap_area *va;
 	unsigned long addr;
 	int purged = 0;
+	int preloaded;
 
 	BUG_ON(!size);
 	BUG_ON(offset_in_page(size));
@@ -1056,6 +1123,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
 
 retry:
+	/*
+	 * Even if it fails we do not really care about that.
+	 * Just proceed as it is. "overflow" path will refill
+	 * the cache we allocate from.
+	 */
+	ne_fit_preload(&preloaded);
 	spin_lock(&vmap_area_lock);
 
 	/*
@@ -1063,6 +1136,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	 * returned. Therefore trigger the overflow path.
 	 */
 	addr = __alloc_vmap_area(size, align, vstart, vend);
+	ne_fit_preload_end(preloaded);
+
 	if (unlikely(addr == vend))
 		goto overflow;
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge
  2019-05-22 15:09 [PATCH 1/4] mm/vmap: remove "node" argument Uladzislau Rezki (Sony)
  2019-05-22 15:09 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Uladzislau Rezki (Sony)
@ 2019-05-22 15:09 ` Uladzislau Rezki (Sony)
  2019-05-22 18:19   ` Andrew Morton
  2019-05-22 15:09 ` [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va() Uladzislau Rezki (Sony)
  2019-05-24 10:33 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Hillf Danton
  3 siblings, 1 reply; 12+ messages in thread
From: Uladzislau Rezki (Sony) @ 2019-05-22 15:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Roman Gushchin, Uladzislau Rezki, Michal Hocko, Matthew Wilcox,
	linux-mm, LKML, Thomas Garnier, Oleksiy Avramchenko,
	Steven Rostedt, Joel Fernandes, Thomas Gleixner, Ingo Molnar,
	Tejun Heo

It does not make sense to try to "unlink" the node that is
definitely not linked with a list nor tree. On the first
merge step VA just points to the previously disconnected
busy area.

On the second step, check if the node has been merged and do
"unlink" if so, because now it points to an object that must
be linked.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5302e1b79c7b..89b8f44e8837 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -718,9 +718,6 @@ merge_or_add_vmap_area(struct vmap_area *va,
 			/* Check and update the tree if needed. */
 			augment_tree_propagate_from(sibling);
 
-			/* Remove this VA, it has been merged. */
-			unlink_va(va, root);
-
 			/* Free vmap_area object. */
 			kmem_cache_free(vmap_area_cachep, va);
 
@@ -745,12 +742,12 @@ merge_or_add_vmap_area(struct vmap_area *va,
 			/* Check and update the tree if needed. */
 			augment_tree_propagate_from(sibling);
 
-			/* Remove this VA, it has been merged. */
-			unlink_va(va, root);
+			/* Remove this VA, if it has been merged. */
+			if (merged)
+				unlink_va(va, root);
 
 			/* Free vmap_area object. */
 			kmem_cache_free(vmap_area_cachep, va);
-
 			return;
 		}
 	}
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va()
  2019-05-22 15:09 [PATCH 1/4] mm/vmap: remove "node" argument Uladzislau Rezki (Sony)
  2019-05-22 15:09 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Uladzislau Rezki (Sony)
  2019-05-22 15:09 ` [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge Uladzislau Rezki (Sony)
@ 2019-05-22 15:09 ` Uladzislau Rezki (Sony)
  2019-05-22 18:19   ` Andrew Morton
  2019-05-24 10:33 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Hillf Danton
  3 siblings, 1 reply; 12+ messages in thread
From: Uladzislau Rezki (Sony) @ 2019-05-22 15:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Roman Gushchin, Uladzislau Rezki, Michal Hocko, Matthew Wilcox,
	linux-mm, LKML, Thomas Garnier, Oleksiy Avramchenko,
	Steven Rostedt, Joel Fernandes, Thomas Gleixner, Ingo Molnar,
	Tejun Heo

Move the BUG_ON()/RB_EMPTY_NODE() check under unlink_va()
function, it means if an empty node gets freed it is a BUG
thus is considered as faulty behaviour.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 89b8f44e8837..47f7e7e83e23 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -533,20 +533,16 @@ link_va(struct vmap_area *va, struct rb_root *root,
 static __always_inline void
 unlink_va(struct vmap_area *va, struct rb_root *root)
 {
-	/*
-	 * During merging a VA node can be empty, therefore
-	 * not linked with the tree nor list. Just check it.
-	 */
-	if (!RB_EMPTY_NODE(&va->rb_node)) {
-		if (root == &free_vmap_area_root)
-			rb_erase_augmented(&va->rb_node,
-				root, &free_vmap_area_rb_augment_cb);
-		else
-			rb_erase(&va->rb_node, root);
+	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
 
-		list_del(&va->list);
-		RB_CLEAR_NODE(&va->rb_node);
-	}
+	if (root == &free_vmap_area_root)
+		rb_erase_augmented(&va->rb_node,
+			root, &free_vmap_area_rb_augment_cb);
+	else
+		rb_erase(&va->rb_node, root);
+
+	list_del(&va->list);
+	RB_CLEAR_NODE(&va->rb_node);
 }
 
 #if DEBUG_AUGMENT_PROPAGATE_CHECK
@@ -1190,8 +1186,6 @@ EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
 
 static void __free_vmap_area(struct vmap_area *va)
 {
-	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
-
 	/*
 	 * Remove from the busy tree/list.
 	 */
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose
  2019-05-22 15:09 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Uladzislau Rezki (Sony)
@ 2019-05-22 18:19   ` Andrew Morton
  2019-05-23 11:42     ` Uladzislau Rezki
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2019-05-22 18:19 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Roman Gushchin, Michal Hocko, Matthew Wilcox, linux-mm, LKML,
	Thomas Garnier, Oleksiy Avramchenko, Steven Rostedt,
	Joel Fernandes, Thomas Gleixner, Ingo Molnar, Tejun Heo

On Wed, 22 May 2019 17:09:37 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:

> Introduce ne_fit_preload()/ne_fit_preload_end() functions
> for preloading one extra vmap_area object to ensure that
> we have it available when fit type is NE_FIT_TYPE.
> 
> The preload is done per CPU and with GFP_KERNEL permissive
> allocation masks, which allow to be more stable under low
> memory condition and high memory pressure.

What is the reason for this change?  Presumably some workload is
suffering from allocation failures?  Please provide a full description
of when and how this occurs so others can judge the desirability of
this change.

> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -364,6 +364,13 @@ static LIST_HEAD(free_vmap_area_list);
>   */
>  static struct rb_root free_vmap_area_root = RB_ROOT;
>  
> +/*
> + * Preload a CPU with one object for "no edge" split case. The
> + * aim is to get rid of allocations from the atomic context, thus
> + * to use more permissive allocation masks.
> + */
> +static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
> +
>  static __always_inline unsigned long
>  va_size(struct vmap_area *va)
>  {
> @@ -950,9 +957,24 @@ adjust_va_to_fit_type(struct vmap_area *va,
>  		 *   L V  NVA  V R
>  		 * |---|-------|---|
>  		 */
> -		lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> -		if (unlikely(!lva))
> -			return -1;
> +		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
> +		if (unlikely(!lva)) {
> +			/*
> +			 * For percpu allocator we do not do any pre-allocation
> +			 * and leave it as it is. The reason is it most likely
> +			 * never ends up with NE_FIT_TYPE splitting. In case of
> +			 * percpu allocations offsets and sizes are aligned to
> +			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
> +			 * are its main fitting cases.
> +			 *
> +			 * There are few exceptions though, as en example it is

"a few"

s/en/an/

> +			 * a first allocation(early boot up) when we have "one"

s/(/ (/

> +			 * big free space that has to be split.
> +			 */
> +			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> +			if (!lva)
> +				return -1;
> +		}
>  
>  		/*
>  		 * Build the remainder.
> @@ -1023,6 +1045,50 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
>  }
>  
>  /*
> + * Preload this CPU with one extra vmap_area object to ensure
> + * that we have it available when fit type of free area is
> + * NE_FIT_TYPE.
> + *
> + * The preload is done in non-atomic context thus, it allows us

s/ thus,/, thus/

> + * to use more permissive allocation masks, therefore to be more

s/, therefore//

> + * stable under low memory condition and high memory pressure.
> + *
> + * If success, it returns zero with preemption disabled. In case
> + * of error, (-ENOMEM) is returned with preemption not disabled.
> + * Note it has to be paired with alloc_vmap_area_preload_end().
> + */
> +static void
> +ne_fit_preload(int *preloaded)
> +{
> +	preempt_disable();
> +
> +	if (!__this_cpu_read(ne_fit_preload_node)) {
> +		struct vmap_area *node;
> +
> +		preempt_enable();
> +		node = kmem_cache_alloc(vmap_area_cachep, GFP_KERNEL);
> +		if (node == NULL) {
> +			*preloaded = 0;
> +			return;
> +		}
> +
> +		preempt_disable();
> +
> +		if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, node))
> +			kmem_cache_free(vmap_area_cachep, node);
> +	}
> +
> +	*preloaded = 1;
> +}

Why not make it do `return preloaded;'?  The
pass-and-return-by-reference seems unnecessary?

Otherwise it all appears solid and sensible.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge
  2019-05-22 15:09 ` [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge Uladzislau Rezki (Sony)
@ 2019-05-22 18:19   ` Andrew Morton
  2019-05-23 11:49     ` Uladzislau Rezki
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2019-05-22 18:19 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Roman Gushchin, Michal Hocko, Matthew Wilcox, linux-mm, LKML,
	Thomas Garnier, Oleksiy Avramchenko, Steven Rostedt,
	Joel Fernandes, Thomas Gleixner, Ingo Molnar, Tejun Heo

On Wed, 22 May 2019 17:09:38 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:

> It does not make sense to try to "unlink" the node that is
> definitely not linked with a list nor tree. On the first
> merge step VA just points to the previously disconnected
> busy area.
> 
> On the second step, check if the node has been merged and do
> "unlink" if so, because now it points to an object that must
> be linked.

Again, what is the motivation for this change?  Seems to be a bit of a
code/logic cleanup, no significant runtime effect?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va()
  2019-05-22 15:09 ` [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va() Uladzislau Rezki (Sony)
@ 2019-05-22 18:19   ` Andrew Morton
  2019-05-23 12:07     ` Uladzislau Rezki
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2019-05-22 18:19 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Roman Gushchin, Michal Hocko, Matthew Wilcox, linux-mm, LKML,
	Thomas Garnier, Oleksiy Avramchenko, Steven Rostedt,
	Joel Fernandes, Thomas Gleixner, Ingo Molnar, Tejun Heo

On Wed, 22 May 2019 17:09:39 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:

> Move the BUG_ON()/RB_EMPTY_NODE() check under unlink_va()
> function, it means if an empty node gets freed it is a BUG
> thus is considered as faulty behaviour.

So... this is an expansion of the assertion's coverage?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose
  2019-05-22 18:19   ` Andrew Morton
@ 2019-05-23 11:42     ` Uladzislau Rezki
  0 siblings, 0 replies; 12+ messages in thread
From: Uladzislau Rezki @ 2019-05-23 11:42 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Uladzislau Rezki (Sony),
	Roman Gushchin, Michal Hocko, Matthew Wilcox, linux-mm, LKML,
	Thomas Garnier, Oleksiy Avramchenko, Steven Rostedt,
	Joel Fernandes, Thomas Gleixner, Ingo Molnar, Tejun Heo

On Wed, May 22, 2019 at 11:19:04AM -0700, Andrew Morton wrote:
> On Wed, 22 May 2019 17:09:37 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:
> 
> > Introduce ne_fit_preload()/ne_fit_preload_end() functions
> > for preloading one extra vmap_area object to ensure that
> > we have it available when fit type is NE_FIT_TYPE.
> > 
> > The preload is done per CPU and with GFP_KERNEL permissive
> > allocation masks, which allow to be more stable under low
> > memory condition and high memory pressure.
> 
> What is the reason for this change?  Presumably some workload is
> suffering from allocation failures?  Please provide a full description
> of when and how this occurs so others can judge the desirability of
> this change.
>
It is not driven by any particular workload that suffers from it.
At least i am not aware of something related to it.

I just think about avoid of using GFP_NOWAIT if it is possible. The
reason behind it is GFP_KERNEL has more permissive parameters and
as an example does __GFP_DIRECT_RECLAIM if no memory available what
can be beneficial in case of high memory pressure or low memory
condition.

Probably i could simulate some special conditions and come up with
something, but i am not sure. I think this change will be good for
"small" systems without swap under high memory pressure where direct
reclaim and other flags can fix the situation.

Do you want me to try to find a specific test case? What do you think?

> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -364,6 +364,13 @@ static LIST_HEAD(free_vmap_area_list);
> >   */
> >  static struct rb_root free_vmap_area_root = RB_ROOT;
> >  
> > +/*
> > + * Preload a CPU with one object for "no edge" split case. The
> > + * aim is to get rid of allocations from the atomic context, thus
> > + * to use more permissive allocation masks.
> > + */
> > +static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
> > +
> >  static __always_inline unsigned long
> >  va_size(struct vmap_area *va)
> >  {
> > @@ -950,9 +957,24 @@ adjust_va_to_fit_type(struct vmap_area *va,
> >  		 *   L V  NVA  V R
> >  		 * |---|-------|---|
> >  		 */
> > -		lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> > -		if (unlikely(!lva))
> > -			return -1;
> > +		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
> > +		if (unlikely(!lva)) {
> > +			/*
> > +			 * For percpu allocator we do not do any pre-allocation
> > +			 * and leave it as it is. The reason is it most likely
> > +			 * never ends up with NE_FIT_TYPE splitting. In case of
> > +			 * percpu allocations offsets and sizes are aligned to
> > +			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
> > +			 * are its main fitting cases.
> > +			 *
> > +			 * There are few exceptions though, as en example it is
> 
> "a few"
> 
> s/en/an/
> 
> > +			 * a first allocation(early boot up) when we have "one"
> 
> s/(/ (/
> 
Will fix that.

> > +			 * big free space that has to be split.
> > +			 */
> > +			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> > +			if (!lva)
> > +				return -1;
> > +		}
> >  
> >  		/*
> >  		 * Build the remainder.
> > @@ -1023,6 +1045,50 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
> >  }
> >  
> >  /*
> > + * Preload this CPU with one extra vmap_area object to ensure
> > + * that we have it available when fit type of free area is
> > + * NE_FIT_TYPE.
> > + *
> > + * The preload is done in non-atomic context thus, it allows us
> 
> s/ thus,/, thus/
> 
Will fix.

> > + * to use more permissive allocation masks, therefore to be more
> 
> s/, therefore//
> 
Will fix.

> > + * stable under low memory condition and high memory pressure.
> > + *
> > + * If success, it returns zero with preemption disabled. In case
> > + * of error, (-ENOMEM) is returned with preemption not disabled.
> > + * Note it has to be paired with alloc_vmap_area_preload_end().
> > + */
> > +static void
> > +ne_fit_preload(int *preloaded)
> > +{
> > +	preempt_disable();
> > +
> > +	if (!__this_cpu_read(ne_fit_preload_node)) {
> > +		struct vmap_area *node;
> > +
> > +		preempt_enable();
> > +		node = kmem_cache_alloc(vmap_area_cachep, GFP_KERNEL);
> > +		if (node == NULL) {
> > +			*preloaded = 0;
> > +			return;
> > +		}
> > +
> > +		preempt_disable();
> > +
> > +		if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, node))
> > +			kmem_cache_free(vmap_area_cachep, node);
> > +	}
> > +
> > +	*preloaded = 1;
> > +}
> 
> Why not make it do `return preloaded;'?  The
> pass-and-return-by-reference seems unnecessary?
>
Will rewrite. I just though about:

preload_start(preloaded)
...
preload_end(preloaded)

instead of doing it conditionally:

preloaded = preload_start()
...
if (preloaded)
    preload_end();

Thank you!

--
Vlad Rezki

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge
  2019-05-22 18:19   ` Andrew Morton
@ 2019-05-23 11:49     ` Uladzislau Rezki
  0 siblings, 0 replies; 12+ messages in thread
From: Uladzislau Rezki @ 2019-05-23 11:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Uladzislau Rezki (Sony),
	Roman Gushchin, Michal Hocko, Matthew Wilcox, linux-mm, LKML,
	Thomas Garnier, Oleksiy Avramchenko, Steven Rostedt,
	Joel Fernandes, Thomas Gleixner, Ingo Molnar, Tejun Heo

On Wed, May 22, 2019 at 11:19:11AM -0700, Andrew Morton wrote:
> On Wed, 22 May 2019 17:09:38 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:
> 
> > It does not make sense to try to "unlink" the node that is
> > definitely not linked with a list nor tree. On the first
> > merge step VA just points to the previously disconnected
> > busy area.
> > 
> > On the second step, check if the node has been merged and do
> > "unlink" if so, because now it points to an object that must
> > be linked.
> 
> Again, what is the motivation for this change?  Seems to be a bit of a
> code/logic cleanup, no significant runtime effect?
> 
It is just about some cleanups. Nothing related to design change
and it behaviors as before.

--
Vlad Rezki

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va()
  2019-05-22 18:19   ` Andrew Morton
@ 2019-05-23 12:07     ` Uladzislau Rezki
  0 siblings, 0 replies; 12+ messages in thread
From: Uladzislau Rezki @ 2019-05-23 12:07 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Uladzislau Rezki (Sony),
	Roman Gushchin, Michal Hocko, Matthew Wilcox, linux-mm, LKML,
	Thomas Garnier, Oleksiy Avramchenko, Steven Rostedt,
	Joel Fernandes, Thomas Gleixner, Ingo Molnar, Tejun Heo

On Wed, May 22, 2019 at 11:19:16AM -0700, Andrew Morton wrote:
> On Wed, 22 May 2019 17:09:39 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:
> 
> > Move the BUG_ON()/RB_EMPTY_NODE() check under unlink_va()
> > function, it means if an empty node gets freed it is a BUG
> > thus is considered as faulty behaviour.
> 
> So... this is an expansion of the assertion's coverage?
>
I would say it is rather moving BUG() and RB_EMPTY_NODE() check
under unlink_va(). We used to have BUG_ON() and it is still there
but now inlined. So it is not about assertion's coverage.

--
Vlad Rezki

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose
  2019-05-22 15:09 [PATCH 1/4] mm/vmap: remove "node" argument Uladzislau Rezki (Sony)
                   ` (2 preceding siblings ...)
  2019-05-22 15:09 ` [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va() Uladzislau Rezki (Sony)
@ 2019-05-24 10:33 ` Hillf Danton
  2019-05-24 14:14   ` Uladzislau Rezki
  3 siblings, 1 reply; 12+ messages in thread
From: Hillf Danton @ 2019-05-24 10:33 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, Roman Gushchin, Michal Hocko, Hillf Danton,
	Matthew Wilcox, linux-mm, LKML, Thomas Garnier,
	Oleksiy Avramchenko, Steven Rostedt, Joel Fernandes,
	Thomas Gleixner, Ingo Molnar, Tejun Heo


On Wed, 22 May 2019 17:09:37 +0200 Uladzislau Rezki (Sony) wrote:
>  /*
> + * Preload this CPU with one extra vmap_area object to ensure
> + * that we have it available when fit type of free area is
> + * NE_FIT_TYPE.
> + *
> + * The preload is done in non-atomic context thus, it allows us
> + * to use more permissive allocation masks, therefore to be more
> + * stable under low memory condition and high memory pressure.
> + *
> + * If success, it returns zero with preemption disabled. In case
> + * of error, (-ENOMEM) is returned with preemption not disabled.
> + * Note it has to be paired with alloc_vmap_area_preload_end().
> + */
> +static void
> +ne_fit_preload(int *preloaded)
> +{
> +	preempt_disable();
> +
> +	if (!__this_cpu_read(ne_fit_preload_node)) {
> +		struct vmap_area *node;
> +
> +		preempt_enable();
> +		node = kmem_cache_alloc(vmap_area_cachep, GFP_KERNEL);

Alternatively, can you please take another look at the upside to use
the memory node parameter in alloc_vmap_area() for allocating va slab,
given that this preload, unlike adjust_va_to_fit_type() is invoked
with the vmap_area_lock not aquired?

> +		if (node == NULL) {
> +			*preloaded = 0;
> +			return;
> +		}
> +
> +		preempt_disable();
> +
> +		if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, node))
> +			kmem_cache_free(vmap_area_cachep, node);
> +	}
> +
> +	*preloaded = 1;
> +}
> +
> +static void
> +ne_fit_preload_end(int preloaded)
> +{
> +	if (preloaded)
> +		preempt_enable();
> +}
> +
> +/*
>   * Allocate a region of KVA of the specified size and alignment, within the
>   * vstart and vend.
>   */
> @@ -1034,6 +1100,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	struct vmap_area *va;
>  	unsigned long addr;
>  	int purged = 0;
> +	int preloaded;
>  
>  	BUG_ON(!size);
>  	BUG_ON(offset_in_page(size));
> @@ -1056,6 +1123,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
>  
>  retry:
> +	/*
> +	 * Even if it fails we do not really care about that.
> +	 * Just proceed as it is. "overflow" path will refill
> +	 * the cache we allocate from.
> +	 */
> +	ne_fit_preload(&preloaded);
>  	spin_lock(&vmap_area_lock);
>  
>  	/*
> @@ -1063,6 +1136,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	 * returned. Therefore trigger the overflow path.
>  	 */
>  	addr = __alloc_vmap_area(size, align, vstart, vend);
> +	ne_fit_preload_end(preloaded);
> +
>  	if (unlikely(addr == vend))
>  		goto overflow;
>  
> -- 
> 2.11.0
>  
Best Regards
Hillf


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose
  2019-05-24 10:33 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Hillf Danton
@ 2019-05-24 14:14   ` Uladzislau Rezki
  0 siblings, 0 replies; 12+ messages in thread
From: Uladzislau Rezki @ 2019-05-24 14:14 UTC (permalink / raw)
  To: Hillf Danton
  Cc: Uladzislau Rezki (Sony),
	Andrew Morton, Roman Gushchin, Michal Hocko, Matthew Wilcox,
	linux-mm, LKML, Thomas Garnier, Oleksiy Avramchenko,
	Steven Rostedt, Joel Fernandes, Thomas Gleixner, Ingo Molnar,
	Tejun Heo

On Fri, May 24, 2019 at 06:33:16PM +0800, Hillf Danton wrote:
> 
> On Wed, 22 May 2019 17:09:37 +0200 Uladzislau Rezki (Sony) wrote:
> >  /*
> > + * Preload this CPU with one extra vmap_area object to ensure
> > + * that we have it available when fit type of free area is
> > + * NE_FIT_TYPE.
> > + *
> > + * The preload is done in non-atomic context thus, it allows us
> > + * to use more permissive allocation masks, therefore to be more
> > + * stable under low memory condition and high memory pressure.
> > + *
> > + * If success, it returns zero with preemption disabled. In case
> > + * of error, (-ENOMEM) is returned with preemption not disabled.
> > + * Note it has to be paired with alloc_vmap_area_preload_end().
> > + */
> > +static void
> > +ne_fit_preload(int *preloaded)
> > +{
> > +	preempt_disable();
> > +
> > +	if (!__this_cpu_read(ne_fit_preload_node)) {
> > +		struct vmap_area *node;
> > +
> > +		preempt_enable();
> > +		node = kmem_cache_alloc(vmap_area_cachep, GFP_KERNEL);
> 
> Alternatively, can you please take another look at the upside to use
> the memory node parameter in alloc_vmap_area() for allocating va slab,
> given that this preload, unlike adjust_va_to_fit_type() is invoked
> with the vmap_area_lock not aquired?
> 
Agree. That makes sense. I will upload the v2 where fix all comments.

Thank you!

--
Vlad Rezki

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-05-24 14:14 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-22 15:09 [PATCH 1/4] mm/vmap: remove "node" argument Uladzislau Rezki (Sony)
2019-05-22 15:09 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Uladzislau Rezki (Sony)
2019-05-22 18:19   ` Andrew Morton
2019-05-23 11:42     ` Uladzislau Rezki
2019-05-22 15:09 ` [PATCH 3/4] mm/vmap: get rid of one single unlink_va() when merge Uladzislau Rezki (Sony)
2019-05-22 18:19   ` Andrew Morton
2019-05-23 11:49     ` Uladzislau Rezki
2019-05-22 15:09 ` [PATCH 4/4] mm/vmap: move BUG_ON() check to the unlink_va() Uladzislau Rezki (Sony)
2019-05-22 18:19   ` Andrew Morton
2019-05-23 12:07     ` Uladzislau Rezki
2019-05-24 10:33 ` [PATCH 2/4] mm/vmap: preload a CPU with one object for split purpose Hillf Danton
2019-05-24 14:14   ` Uladzislau Rezki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.