All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work
@ 2022-06-07  9:34 Uladzislau Rezki (Sony)
  2022-06-07  9:34 ` [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root Uladzislau Rezki (Sony)
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-06-07  9:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko

Hi.

This small serias is a preparation work to implement per-cpu vmalloc
allocation in order to reduce a high internal lock contention. This
series does not introduce any functional changes, it is only about
preparation.

Uladzislau Rezki (Sony) (5):
  mm/vmalloc: Make link_va()/unlink_va() common to different rb_root
  mm/vmalloc: Extend __alloc_vmap_area() with extra arguments
  mm/vmalloc: Initialize VA's list node after unlink
  mm/vmalloc: Extend __find_vmap_area() with one more argument
  lib/test_vmalloc: Switch to prandom_u32()

 lib/test_vmalloc.c | 15 +++----
 mm/vmalloc.c       | 98 ++++++++++++++++++++++++++++++++--------------
 2 files changed, 76 insertions(+), 37 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root
  2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
@ 2022-06-07  9:34 ` Uladzislau Rezki (Sony)
  2022-06-08  3:45   ` Baoquan He
  2022-06-07  9:34 ` [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments Uladzislau Rezki (Sony)
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-06-07  9:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko

Currently link_va() and unlik_va(), in order to figure out a tree
type, compares a passed root value with a global free_vmap_area_root
variable to distinguish the augmented rb-tree from a regular one. It
is hard coded since such functions can manipulate only with specific
"free_vmap_area_root" tree that represents a global free vmap space.

Make it common by introducing "_augment" versions of both internal
functions, so it is possible to deal with different trees.

There is no functional change as a result of this patch.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 60 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 12 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index be8ed06804a5..0102d6d5fcdf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -911,8 +911,9 @@ get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
 }
 
 static __always_inline void
-link_va(struct vmap_area *va, struct rb_root *root,
-	struct rb_node *parent, struct rb_node **link, struct list_head *head)
+__link_va(struct vmap_area *va, struct rb_root *root,
+	struct rb_node *parent, struct rb_node **link,
+	struct list_head *head, bool augment)
 {
 	/*
 	 * VA is still not in the list, but we can
@@ -926,7 +927,7 @@ link_va(struct vmap_area *va, struct rb_root *root,
 
 	/* Insert to the rb-tree */
 	rb_link_node(&va->rb_node, parent, link);
-	if (root == &free_vmap_area_root) {
+	if (augment) {
 		/*
 		 * Some explanation here. Just perform simple insertion
 		 * to the tree. We do not set va->subtree_max_size to
@@ -950,12 +951,28 @@ link_va(struct vmap_area *va, struct rb_root *root,
 }
 
 static __always_inline void
-unlink_va(struct vmap_area *va, struct rb_root *root)
+link_va(struct vmap_area *va, struct rb_root *root,
+	struct rb_node *parent, struct rb_node **link,
+	struct list_head *head)
+{
+	__link_va(va, root, parent, link, head, false);
+}
+
+static __always_inline void
+link_va_augment(struct vmap_area *va, struct rb_root *root,
+	struct rb_node *parent, struct rb_node **link,
+	struct list_head *head)
+{
+	__link_va(va, root, parent, link, head, true);
+}
+
+static __always_inline void
+__unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
 {
 	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
 		return;
 
-	if (root == &free_vmap_area_root)
+	if (augment)
 		rb_erase_augmented(&va->rb_node,
 			root, &free_vmap_area_rb_augment_cb);
 	else
@@ -965,6 +982,18 @@ unlink_va(struct vmap_area *va, struct rb_root *root)
 	RB_CLEAR_NODE(&va->rb_node);
 }
 
+static __always_inline void
+unlink_va(struct vmap_area *va, struct rb_root *root)
+{
+	__unlink_va(va, root, false);
+}
+
+static __always_inline void
+unlink_va_augment(struct vmap_area *va, struct rb_root *root)
+{
+	__unlink_va(va, root, true);
+}
+
 #if DEBUG_AUGMENT_PROPAGATE_CHECK
 /*
  * Gets called when remove the node and rotate.
@@ -1060,7 +1089,7 @@ insert_vmap_area_augment(struct vmap_area *va,
 		link = find_va_links(va, root, NULL, &parent);
 
 	if (link) {
-		link_va(va, root, parent, link, head);
+		link_va_augment(va, root, parent, link, head);
 		augment_tree_propagate_from(va);
 	}
 }
@@ -1077,8 +1106,8 @@ insert_vmap_area_augment(struct vmap_area *va,
  * ongoing.
  */
 static __always_inline struct vmap_area *
-merge_or_add_vmap_area(struct vmap_area *va,
-	struct rb_root *root, struct list_head *head)
+__merge_or_add_vmap_area(struct vmap_area *va,
+	struct rb_root *root, struct list_head *head, bool augment)
 {
 	struct vmap_area *sibling;
 	struct list_head *next;
@@ -1140,7 +1169,7 @@ merge_or_add_vmap_area(struct vmap_area *va,
 			 * "normalized" because of rotation operations.
 			 */
 			if (merged)
-				unlink_va(va, root);
+				__unlink_va(va, root, augment);
 
 			sibling->va_end = va->va_end;
 
@@ -1155,16 +1184,23 @@ merge_or_add_vmap_area(struct vmap_area *va,
 
 insert:
 	if (!merged)
-		link_va(va, root, parent, link, head);
+		__link_va(va, root, parent, link, head, augment);
 
 	return va;
 }
 
+static __always_inline struct vmap_area *
+merge_or_add_vmap_area(struct vmap_area *va,
+	struct rb_root *root, struct list_head *head)
+{
+	return __merge_or_add_vmap_area(va, root, head, false);
+}
+
 static __always_inline struct vmap_area *
 merge_or_add_vmap_area_augment(struct vmap_area *va,
 	struct rb_root *root, struct list_head *head)
 {
-	va = merge_or_add_vmap_area(va, root, head);
+	va = __merge_or_add_vmap_area(va, root, head, true);
 	if (va)
 		augment_tree_propagate_from(va);
 
@@ -1348,7 +1384,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
 		 * V      NVA      V
 		 * |---------------|
 		 */
-		unlink_va(va, &free_vmap_area_root);
+		unlink_va_augment(va, &free_vmap_area_root);
 		kmem_cache_free(vmap_area_cachep, va);
 	} else if (type == LE_FIT_TYPE) {
 		/*
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments
  2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
  2022-06-07  9:34 ` [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root Uladzislau Rezki (Sony)
@ 2022-06-07  9:34 ` Uladzislau Rezki (Sony)
  2022-06-07  9:49   ` Christoph Hellwig
  2022-06-08  3:46   ` Baoquan He
  2022-06-07  9:34 ` [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink Uladzislau Rezki (Sony)
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 20+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-06-07  9:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko

It implies that __alloc_vmap_area() allocates only from the
global vmap space, therefore a list-head and rb-tree, which
represent a free vmap space, are not passed as parameters to
this function and are accessed directly from this function.

Extend the __alloc_vmap_area() and other dependent functions
to have a possibility to allocate from different trees making
an interface common and not specific.

There is no functional change as a result of this patch.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0102d6d5fcdf..745e89eb6ca1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1234,15 +1234,15 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
  * overhead.
  */
 static __always_inline struct vmap_area *
-find_vmap_lowest_match(unsigned long size, unsigned long align,
-	unsigned long vstart, bool adjust_search_size)
+find_vmap_lowest_match(struct rb_root *root, unsigned long size,
+	unsigned long align, unsigned long vstart, bool adjust_search_size)
 {
 	struct vmap_area *va;
 	struct rb_node *node;
 	unsigned long length;
 
 	/* Start from the root. */
-	node = free_vmap_area_root.rb_node;
+	node = root->rb_node;
 
 	/* Adjust the search size for alignment overhead. */
 	length = adjust_search_size ? size + align - 1 : size;
@@ -1370,9 +1370,9 @@ classify_va_fit_type(struct vmap_area *va,
 }
 
 static __always_inline int
-adjust_va_to_fit_type(struct vmap_area *va,
-	unsigned long nva_start_addr, unsigned long size,
-	enum fit_type type)
+adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
+	struct vmap_area *va, unsigned long nva_start_addr,
+	unsigned long size, enum fit_type type)
 {
 	struct vmap_area *lva = NULL;
 
@@ -1384,7 +1384,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
 		 * V      NVA      V
 		 * |---------------|
 		 */
-		unlink_va_augment(va, &free_vmap_area_root);
+		unlink_va_augment(va, root);
 		kmem_cache_free(vmap_area_cachep, va);
 	} else if (type == LE_FIT_TYPE) {
 		/*
@@ -1462,8 +1462,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
 		augment_tree_propagate_from(va);
 
 		if (lva)	/* type == NE_FIT_TYPE */
-			insert_vmap_area_augment(lva, &va->rb_node,
-				&free_vmap_area_root, &free_vmap_area_list);
+			insert_vmap_area_augment(lva, &va->rb_node, root, head);
 	}
 
 	return 0;
@@ -1474,7 +1473,8 @@ adjust_va_to_fit_type(struct vmap_area *va,
  * Otherwise a vend is returned that indicates failure.
  */
 static __always_inline unsigned long
-__alloc_vmap_area(unsigned long size, unsigned long align,
+__alloc_vmap_area(struct rb_root *root, struct list_head *head,
+	unsigned long size, unsigned long align,
 	unsigned long vstart, unsigned long vend)
 {
 	bool adjust_search_size = true;
@@ -1495,7 +1495,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
 	if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
 		adjust_search_size = false;
 
-	va = find_vmap_lowest_match(size, align, vstart, adjust_search_size);
+	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
 	if (unlikely(!va))
 		return vend;
 
@@ -1514,7 +1514,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
 		return vend;
 
 	/* Update the free vmap_area. */
-	ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
+	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size, type);
 	if (ret)
 		return vend;
 
@@ -1605,7 +1605,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 
 retry:
 	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
-	addr = __alloc_vmap_area(size, align, vstart, vend);
+	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
+		size, align, vstart, vend);
 	spin_unlock(&free_vmap_area_lock);
 
 	/*
@@ -3886,7 +3887,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 			/* It is a BUG(), but trigger recovery instead. */
 			goto recovery;
 
-		ret = adjust_va_to_fit_type(va, start, size, type);
+		ret = adjust_va_to_fit_type(&free_vmap_area_root,
+				&free_vmap_area_list, va, start, size, type);
 		if (unlikely(ret))
 			goto recovery;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink
  2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
  2022-06-07  9:34 ` [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root Uladzislau Rezki (Sony)
  2022-06-07  9:34 ` [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments Uladzislau Rezki (Sony)
@ 2022-06-07  9:34 ` Uladzislau Rezki (Sony)
  2022-06-08  3:19   ` Baoquan He
  2022-06-07  9:34 ` [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument Uladzislau Rezki (Sony)
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-06-07  9:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko

A vmap_area can travel between different places. For example
attached/detached to/from different rb-trees. In order to
prevent fancy bugs, initialize a VA's list node after it is
removed from the list, so it pairs with VA's rb_node which
is also initialized.

There is no functional change as a result of this patch.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 745e89eb6ca1..82771e555273 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -978,7 +978,7 @@ __unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
 	else
 		rb_erase(&va->rb_node, root);
 
-	list_del(&va->list);
+	list_del_init(&va->list);
 	RB_CLEAR_NODE(&va->rb_node);
 }
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument
  2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
                   ` (2 preceding siblings ...)
  2022-06-07  9:34 ` [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink Uladzislau Rezki (Sony)
@ 2022-06-07  9:34 ` Uladzislau Rezki (Sony)
  2022-06-07  9:47   ` Christoph Hellwig
  2022-06-08  3:47   ` Baoquan He
  2022-06-07  9:34 ` [PATCH 5/5] lib/test_vmalloc: Switch to prandom_u32() Uladzislau Rezki (Sony)
  2022-06-07 22:35 ` [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Andrew Morton
  5 siblings, 2 replies; 20+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-06-07  9:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko

__find_vmap_area() finds a "vmap_area" based on passed address.
It scan the specific "vmap_area_root" rb-tree. Extend the function
with one extra argument, so any tree can be specified where the
search has to be done.

There is no functional change as a result of this patch.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 82771e555273..b60385101897 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -814,9 +814,9 @@ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
 	return va;
 }
 
-static struct vmap_area *__find_vmap_area(unsigned long addr)
+static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
 {
-	struct rb_node *n = vmap_area_root.rb_node;
+	struct rb_node *n = root->rb_node;
 
 	addr = (unsigned long)kasan_reset_tag((void *)addr);
 
@@ -1840,7 +1840,7 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
 	struct vmap_area *va;
 
 	spin_lock(&vmap_area_lock);
-	va = __find_vmap_area(addr);
+	va = __find_vmap_area(addr, &vmap_area_root);
 	spin_unlock(&vmap_area_lock);
 
 	return va;
@@ -2582,7 +2582,7 @@ struct vm_struct *remove_vm_area(const void *addr)
 	might_sleep();
 
 	spin_lock(&vmap_area_lock);
-	va = __find_vmap_area((unsigned long)addr);
+	va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
 	if (va && va->vm) {
 		struct vm_struct *vm = va->vm;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/5] lib/test_vmalloc: Switch to prandom_u32()
  2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
                   ` (3 preceding siblings ...)
  2022-06-07  9:34 ` [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument Uladzislau Rezki (Sony)
@ 2022-06-07  9:34 ` Uladzislau Rezki (Sony)
  2022-06-07 22:35 ` [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Andrew Morton
  5 siblings, 0 replies; 20+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-06-07  9:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko

A get_random_bytes() function can cause a high contention if it is
called across CPUs simultaneously. Because it shares one lock per
all CPUs:

<snip>
   class name     con-bounces  contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
   &crng->lock:   663145       665886        0.05           8.85         261966.66        0.39            7188152       13731279       0.04           11.89        2181582.30       0.16
   -----------
   &crng->lock    307835       [<00000000acba59cd>] _extract_crng+0x48/0x90
   &crng->lock    358051       [<00000000f0075abc>] _crng_backtrack_protect+0x32/0x90
   -----------
   &crng->lock    234241       [<00000000f0075abc>] _crng_backtrack_protect+0x32/0x90
   &crng->lock    431645       [<00000000acba59cd>] _extract_crng+0x48/0x90
<snip>

Switch from the get_random_bytes() to prandom_u32() that does not
have any internal contention when a random value is needed for the
tests.

The reason is to minimize CPU cycles introduced by the test-suite
itself from the vmalloc performance metrics.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 lib/test_vmalloc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index cf41fd6df42a..4f2f2d1bac56 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -74,12 +74,13 @@ test_report_one_done(void)
 
 static int random_size_align_alloc_test(void)
 {
-	unsigned long size, align, rnd;
+	unsigned long size, align;
+	unsigned int rnd;
 	void *ptr;
 	int i;
 
 	for (i = 0; i < test_loop_count; i++) {
-		get_random_bytes(&rnd, sizeof(rnd));
+		rnd = prandom_u32();
 
 		/*
 		 * Maximum 1024 pages, if PAGE_SIZE is 4096.
@@ -150,7 +151,7 @@ static int random_size_alloc_test(void)
 	int i;
 
 	for (i = 0; i < test_loop_count; i++) {
-		get_random_bytes(&n, sizeof(i));
+		n = prandom_u32();
 		n = (n % 100) + 1;
 
 		p = vmalloc(n * PAGE_SIZE);
@@ -294,14 +295,14 @@ pcpu_alloc_test(void)
 	for (i = 0; i < 35000; i++) {
 		unsigned int r;
 
-		get_random_bytes(&r, sizeof(i));
+		r = prandom_u32();
 		size = (r % (PAGE_SIZE / 4)) + 1;
 
 		/*
 		 * Maximum PAGE_SIZE
 		 */
-		get_random_bytes(&r, sizeof(i));
-		align = 1 << ((i % 11) + 1);
+		r = prandom_u32();
+		align = 1 << ((r % 11) + 1);
 
 		pcpu[i] = __alloc_percpu(size, align);
 		if (!pcpu[i])
@@ -396,7 +397,7 @@ static void shuffle_array(int *arr, int n)
 	int i, j;
 
 	for (i = n - 1; i > 0; i--)  {
-		get_random_bytes(&rnd, sizeof(rnd));
+		rnd = prandom_u32();
 
 		/* Cut the range. */
 		j = rnd % i;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument
  2022-06-07  9:34 ` [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument Uladzislau Rezki (Sony)
@ 2022-06-07  9:47   ` Christoph Hellwig
  2022-06-07 13:03     ` Uladzislau Rezki
  2022-06-08  3:47   ` Baoquan He
  1 sibling, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2022-06-07  9:47 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On Tue, Jun 07, 2022 at 11:34:48AM +0200, Uladzislau Rezki (Sony) wrote:
> __find_vmap_area() finds a "vmap_area" based on passed address.
> It scan the specific "vmap_area_root" rb-tree. Extend the function
> with one extra argument, so any tree can be specified where the
> search has to be done.

Uhh, why?  This just adds a copletel useless argument.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments
  2022-06-07  9:34 ` [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments Uladzislau Rezki (Sony)
@ 2022-06-07  9:49   ` Christoph Hellwig
  2022-06-07 13:02     ` Uladzislau Rezki
  2022-06-08  3:46   ` Baoquan He
  1 sibling, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2022-06-07  9:49 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On Tue, Jun 07, 2022 at 11:34:46AM +0200, Uladzislau Rezki (Sony) wrote:
> It implies that __alloc_vmap_area() allocates only from the
> global vmap space, therefore a list-head and rb-tree, which
> represent a free vmap space, are not passed as parameters to
> this function and are accessed directly from this function.

Yes, which totally makes sense.

> Extend the __alloc_vmap_area() and other dependent functions
> to have a possibility to allocate from different trees making
> an interface common and not specific.

Which seems completely pointless.  Why add argument that are always
passed the same values?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments
  2022-06-07  9:49   ` Christoph Hellwig
@ 2022-06-07 13:02     ` Uladzislau Rezki
  2022-06-10  8:18       ` Christoph Hellwig
  0 siblings, 1 reply; 20+ messages in thread
From: Uladzislau Rezki @ 2022-06-07 13:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Linux Memory Management List, LKML,
	Matthew Wilcox, Nicholas Piggin, Oleksiy Avramchenko

>
> On Tue, Jun 07, 2022 at 11:34:46AM +0200, Uladzislau Rezki (Sony) wrote:
> > It implies that __alloc_vmap_area() allocates only from the
> > global vmap space, therefore a list-head and rb-tree, which
> > represent a free vmap space, are not passed as parameters to
> > this function and are accessed directly from this function.
>
> Yes, which totally makes sense.
>
> > Extend the __alloc_vmap_area() and other dependent functions
> > to have a possibility to allocate from different trees making
> > an interface common and not specific.
>
> Which seems completely pointless.  Why add argument that are always
> passed the same values?
>
I wrote about it in the cover latter. It is a preparation work for
making vmalloc per-cpu.
In that case free/busy data are located on different rb_roots that is
why those functions
have to be adopted to work with any tree.

-- 
Uladzislau Rezki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument
  2022-06-07  9:47   ` Christoph Hellwig
@ 2022-06-07 13:03     ` Uladzislau Rezki
  0 siblings, 0 replies; 20+ messages in thread
From: Uladzislau Rezki @ 2022-06-07 13:03 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Linux Memory Management List, LKML,
	Matthew Wilcox, Nicholas Piggin, Oleksiy Avramchenko

On Tue, Jun 7, 2022 at 11:47 AM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Tue, Jun 07, 2022 at 11:34:48AM +0200, Uladzislau Rezki (Sony) wrote:
> > __find_vmap_area() finds a "vmap_area" based on passed address.
> > It scan the specific "vmap_area_root" rb-tree. Extend the function
> > with one extra argument, so any tree can be specified where the
> > search has to be done.
>
> Uhh, why?  This just adds a copletel useless argument.
>
I wrote about it in the cover latter. It is a preparation work for
making vmalloc per-cpu.
In that case free/busy data are located on different rb_roots that is
why those functions
have to be adopted to work with any tree.

-- 
Uladzislau Rezki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work
  2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
                   ` (4 preceding siblings ...)
  2022-06-07  9:34 ` [PATCH 5/5] lib/test_vmalloc: Switch to prandom_u32() Uladzislau Rezki (Sony)
@ 2022-06-07 22:35 ` Andrew Morton
  2022-06-08 10:05   ` Uladzislau Rezki
  5 siblings, 1 reply; 20+ messages in thread
From: Andrew Morton @ 2022-06-07 22:35 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On Tue,  7 Jun 2022 11:34:44 +0200 "Uladzislau Rezki (Sony)" <urezki@gmail.com> wrote:

> This small serias is a preparation work to implement per-cpu vmalloc
> allocation in order to reduce a high internal lock contention. This
> series does not introduce any functional changes, it is only about
> preparation.

I can toss it in for some runtime testing, but...

What lock are we talking about here, what is the magnitude of the
performance issues it is causing and what is the status of the patch
which uses all this preparation?


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink
  2022-06-07  9:34 ` [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink Uladzislau Rezki (Sony)
@ 2022-06-08  3:19   ` Baoquan He
  2022-06-09 12:36     ` Uladzislau Rezki
  0 siblings, 1 reply; 20+ messages in thread
From: Baoquan He @ 2022-06-08  3:19 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> A vmap_area can travel between different places. For example
> attached/detached to/from different rb-trees. In order to
> prevent fancy bugs, initialize a VA's list node after it is
> removed from the list, so it pairs with VA's rb_node which
> is also initialized.
> 
> There is no functional change as a result of this patch.
> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  mm/vmalloc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 745e89eb6ca1..82771e555273 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -978,7 +978,7 @@ __unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
>  	else
>  		rb_erase(&va->rb_node, root);
>  
> -	list_del(&va->list);
> +	list_del_init(&va->list);

Don't object this change, while list_del poison members, which is also
not bad?

static inline void list_del(struct list_head *entry)                                     
{                                                                                        
        __list_del_entry(entry);                                                         
        entry->next = LIST_POISON1;                                                      
        entry->prev = LIST_POISON2;                                                      
}   


>  	RB_CLEAR_NODE(&va->rb_node);
>  }
>  
> -- 
> 2.30.2
> 
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root
  2022-06-07  9:34 ` [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root Uladzislau Rezki (Sony)
@ 2022-06-08  3:45   ` Baoquan He
  0 siblings, 0 replies; 20+ messages in thread
From: Baoquan He @ 2022-06-08  3:45 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> Currently link_va() and unlik_va(), in order to figure out a tree
> type, compares a passed root value with a global free_vmap_area_root
> variable to distinguish the augmented rb-tree from a regular one. It
> is hard coded since such functions can manipulate only with specific
> "free_vmap_area_root" tree that represents a global free vmap space.
> 
> Make it common by introducing "_augment" versions of both internal
> functions, so it is possible to deal with different trees.
> 
> There is no functional change as a result of this patch.

Patch looks good to me. Looking forward to seeing the later per-cpu
vmalloc allocation patches.

Reviewed-by: Baoquan He <bhe@redhat.com>

> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  mm/vmalloc.c | 60 +++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 48 insertions(+), 12 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index be8ed06804a5..0102d6d5fcdf 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -911,8 +911,9 @@ get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
>  }
>  
>  static __always_inline void
> -link_va(struct vmap_area *va, struct rb_root *root,
> -	struct rb_node *parent, struct rb_node **link, struct list_head *head)
> +__link_va(struct vmap_area *va, struct rb_root *root,
> +	struct rb_node *parent, struct rb_node **link,
> +	struct list_head *head, bool augment)
>  {
>  	/*
>  	 * VA is still not in the list, but we can
> @@ -926,7 +927,7 @@ link_va(struct vmap_area *va, struct rb_root *root,
>  
>  	/* Insert to the rb-tree */
>  	rb_link_node(&va->rb_node, parent, link);
> -	if (root == &free_vmap_area_root) {
> +	if (augment) {
>  		/*
>  		 * Some explanation here. Just perform simple insertion
>  		 * to the tree. We do not set va->subtree_max_size to
> @@ -950,12 +951,28 @@ link_va(struct vmap_area *va, struct rb_root *root,
>  }
>  
>  static __always_inline void
> -unlink_va(struct vmap_area *va, struct rb_root *root)
> +link_va(struct vmap_area *va, struct rb_root *root,
> +	struct rb_node *parent, struct rb_node **link,
> +	struct list_head *head)
> +{
> +	__link_va(va, root, parent, link, head, false);
> +}
> +
> +static __always_inline void
> +link_va_augment(struct vmap_area *va, struct rb_root *root,
> +	struct rb_node *parent, struct rb_node **link,
> +	struct list_head *head)
> +{
> +	__link_va(va, root, parent, link, head, true);
> +}
> +
> +static __always_inline void
> +__unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
>  {
>  	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
>  		return;
>  
> -	if (root == &free_vmap_area_root)
> +	if (augment)
>  		rb_erase_augmented(&va->rb_node,
>  			root, &free_vmap_area_rb_augment_cb);
>  	else
> @@ -965,6 +982,18 @@ unlink_va(struct vmap_area *va, struct rb_root *root)
>  	RB_CLEAR_NODE(&va->rb_node);
>  }
>  
> +static __always_inline void
> +unlink_va(struct vmap_area *va, struct rb_root *root)
> +{
> +	__unlink_va(va, root, false);
> +}
> +
> +static __always_inline void
> +unlink_va_augment(struct vmap_area *va, struct rb_root *root)
> +{
> +	__unlink_va(va, root, true);
> +}
> +
>  #if DEBUG_AUGMENT_PROPAGATE_CHECK
>  /*
>   * Gets called when remove the node and rotate.
> @@ -1060,7 +1089,7 @@ insert_vmap_area_augment(struct vmap_area *va,
>  		link = find_va_links(va, root, NULL, &parent);
>  
>  	if (link) {
> -		link_va(va, root, parent, link, head);
> +		link_va_augment(va, root, parent, link, head);
>  		augment_tree_propagate_from(va);
>  	}
>  }
> @@ -1077,8 +1106,8 @@ insert_vmap_area_augment(struct vmap_area *va,
>   * ongoing.
>   */
>  static __always_inline struct vmap_area *
> -merge_or_add_vmap_area(struct vmap_area *va,
> -	struct rb_root *root, struct list_head *head)
> +__merge_or_add_vmap_area(struct vmap_area *va,
> +	struct rb_root *root, struct list_head *head, bool augment)
>  {
>  	struct vmap_area *sibling;
>  	struct list_head *next;
> @@ -1140,7 +1169,7 @@ merge_or_add_vmap_area(struct vmap_area *va,
>  			 * "normalized" because of rotation operations.
>  			 */
>  			if (merged)
> -				unlink_va(va, root);
> +				__unlink_va(va, root, augment);
>  
>  			sibling->va_end = va->va_end;
>  
> @@ -1155,16 +1184,23 @@ merge_or_add_vmap_area(struct vmap_area *va,
>  
>  insert:
>  	if (!merged)
> -		link_va(va, root, parent, link, head);
> +		__link_va(va, root, parent, link, head, augment);
>  
>  	return va;
>  }
>  
> +static __always_inline struct vmap_area *
> +merge_or_add_vmap_area(struct vmap_area *va,
> +	struct rb_root *root, struct list_head *head)
> +{
> +	return __merge_or_add_vmap_area(va, root, head, false);
> +}
> +
>  static __always_inline struct vmap_area *
>  merge_or_add_vmap_area_augment(struct vmap_area *va,
>  	struct rb_root *root, struct list_head *head)
>  {
> -	va = merge_or_add_vmap_area(va, root, head);
> +	va = __merge_or_add_vmap_area(va, root, head, true);
>  	if (va)
>  		augment_tree_propagate_from(va);
>  
> @@ -1348,7 +1384,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
>  		 * V      NVA      V
>  		 * |---------------|
>  		 */
> -		unlink_va(va, &free_vmap_area_root);
> +		unlink_va_augment(va, &free_vmap_area_root);
>  		kmem_cache_free(vmap_area_cachep, va);
>  	} else if (type == LE_FIT_TYPE) {
>  		/*
> -- 
> 2.30.2
> 
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments
  2022-06-07  9:34 ` [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments Uladzislau Rezki (Sony)
  2022-06-07  9:49   ` Christoph Hellwig
@ 2022-06-08  3:46   ` Baoquan He
  1 sibling, 0 replies; 20+ messages in thread
From: Baoquan He @ 2022-06-08  3:46 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> It implies that __alloc_vmap_area() allocates only from the
> global vmap space, therefore a list-head and rb-tree, which
> represent a free vmap space, are not passed as parameters to
> this function and are accessed directly from this function.
> 
> Extend the __alloc_vmap_area() and other dependent functions
> to have a possibility to allocate from different trees making
> an interface common and not specific.
> 
> There is no functional change as a result of this patch.

LGTM,

Reviewed-by: Baoquan He <bhe@redhat.com>

> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  mm/vmalloc.c | 30 ++++++++++++++++--------------
>  1 file changed, 16 insertions(+), 14 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 0102d6d5fcdf..745e89eb6ca1 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1234,15 +1234,15 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
>   * overhead.
>   */
>  static __always_inline struct vmap_area *
> -find_vmap_lowest_match(unsigned long size, unsigned long align,
> -	unsigned long vstart, bool adjust_search_size)
> +find_vmap_lowest_match(struct rb_root *root, unsigned long size,
> +	unsigned long align, unsigned long vstart, bool adjust_search_size)
>  {
>  	struct vmap_area *va;
>  	struct rb_node *node;
>  	unsigned long length;
>  
>  	/* Start from the root. */
> -	node = free_vmap_area_root.rb_node;
> +	node = root->rb_node;
>  
>  	/* Adjust the search size for alignment overhead. */
>  	length = adjust_search_size ? size + align - 1 : size;
> @@ -1370,9 +1370,9 @@ classify_va_fit_type(struct vmap_area *va,
>  }
>  
>  static __always_inline int
> -adjust_va_to_fit_type(struct vmap_area *va,
> -	unsigned long nva_start_addr, unsigned long size,
> -	enum fit_type type)
> +adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> +	struct vmap_area *va, unsigned long nva_start_addr,
> +	unsigned long size, enum fit_type type)
>  {
>  	struct vmap_area *lva = NULL;
>  
> @@ -1384,7 +1384,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
>  		 * V      NVA      V
>  		 * |---------------|
>  		 */
> -		unlink_va_augment(va, &free_vmap_area_root);
> +		unlink_va_augment(va, root);
>  		kmem_cache_free(vmap_area_cachep, va);
>  	} else if (type == LE_FIT_TYPE) {
>  		/*
> @@ -1462,8 +1462,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
>  		augment_tree_propagate_from(va);
>  
>  		if (lva)	/* type == NE_FIT_TYPE */
> -			insert_vmap_area_augment(lva, &va->rb_node,
> -				&free_vmap_area_root, &free_vmap_area_list);
> +			insert_vmap_area_augment(lva, &va->rb_node, root, head);
>  	}
>  
>  	return 0;
> @@ -1474,7 +1473,8 @@ adjust_va_to_fit_type(struct vmap_area *va,
>   * Otherwise a vend is returned that indicates failure.
>   */
>  static __always_inline unsigned long
> -__alloc_vmap_area(unsigned long size, unsigned long align,
> +__alloc_vmap_area(struct rb_root *root, struct list_head *head,
> +	unsigned long size, unsigned long align,
>  	unsigned long vstart, unsigned long vend)
>  {
>  	bool adjust_search_size = true;
> @@ -1495,7 +1495,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
>  	if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
>  		adjust_search_size = false;
>  
> -	va = find_vmap_lowest_match(size, align, vstart, adjust_search_size);
> +	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
>  	if (unlikely(!va))
>  		return vend;
>  
> @@ -1514,7 +1514,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
>  		return vend;
>  
>  	/* Update the free vmap_area. */
> -	ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
> +	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size, type);
>  	if (ret)
>  		return vend;
>  
> @@ -1605,7 +1605,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  
>  retry:
>  	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
> -	addr = __alloc_vmap_area(size, align, vstart, vend);
> +	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> +		size, align, vstart, vend);
>  	spin_unlock(&free_vmap_area_lock);
>  
>  	/*
> @@ -3886,7 +3887,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
>  			/* It is a BUG(), but trigger recovery instead. */
>  			goto recovery;
>  
> -		ret = adjust_va_to_fit_type(va, start, size, type);
> +		ret = adjust_va_to_fit_type(&free_vmap_area_root,
> +				&free_vmap_area_list, va, start, size, type);
>  		if (unlikely(ret))
>  			goto recovery;
>  
> -- 
> 2.30.2
> 
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument
  2022-06-07  9:34 ` [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument Uladzislau Rezki (Sony)
  2022-06-07  9:47   ` Christoph Hellwig
@ 2022-06-08  3:47   ` Baoquan He
  1 sibling, 0 replies; 20+ messages in thread
From: Baoquan He @ 2022-06-08  3:47 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> __find_vmap_area() finds a "vmap_area" based on passed address.
> It scan the specific "vmap_area_root" rb-tree. Extend the function
> with one extra argument, so any tree can be specified where the
> search has to be done.
> 
> There is no functional change as a result of this patch.

LGTM,
Reviewed-by: Baoquan He <bhe@redhat.com>

> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  mm/vmalloc.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 82771e555273..b60385101897 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -814,9 +814,9 @@ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
>  	return va;
>  }
>  
> -static struct vmap_area *__find_vmap_area(unsigned long addr)
> +static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
>  {
> -	struct rb_node *n = vmap_area_root.rb_node;
> +	struct rb_node *n = root->rb_node;
>  
>  	addr = (unsigned long)kasan_reset_tag((void *)addr);
>  
> @@ -1840,7 +1840,7 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
>  	struct vmap_area *va;
>  
>  	spin_lock(&vmap_area_lock);
> -	va = __find_vmap_area(addr);
> +	va = __find_vmap_area(addr, &vmap_area_root);
>  	spin_unlock(&vmap_area_lock);
>  
>  	return va;
> @@ -2582,7 +2582,7 @@ struct vm_struct *remove_vm_area(const void *addr)
>  	might_sleep();
>  
>  	spin_lock(&vmap_area_lock);
> -	va = __find_vmap_area((unsigned long)addr);
> +	va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
>  	if (va && va->vm) {
>  		struct vm_struct *vm = va->vm;
>  
> -- 
> 2.30.2
> 
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work
  2022-06-07 22:35 ` [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Andrew Morton
@ 2022-06-08 10:05   ` Uladzislau Rezki
  0 siblings, 0 replies; 20+ messages in thread
From: Uladzislau Rezki @ 2022-06-08 10:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko

>
> I can toss it in for some runtime testing, but...
>
> What lock are we talking about here, what is the magnitude of the
> performance issues it is causing and what is the status of the patch
> which uses all this preparation?
>
1.
The vmalloc still uses the global lock in order to access to the global
vmap space. As for magnitude it depends on number of CPUs, higher
number higher contention. Linear dependence.

2.
I am not aware about performance issues which i run into on my setup,
from the other hand there is a "Per cpu kva allocator" built on top of
vmalloc. See vm_map_ram() vm_unmap_ram(). Having vmalloc-per
CPU we can get rid of it.

It is used by the XFS, f2fs and some drivers. The reason is that a
vmalloc is costly due to internal global lock. That is why those users
go with "Per cpu kva allocator" to accelerate their workloads.

3.
My synthetic test shows a big difference between per-CPU vmalloc
patches and default variant. I have different prototypes based on
various ways how to make it per-CPU. I still do not have a fully solution
that satisfies all the needs. But i do not think it is possible due to many
constraints.

4.
This series is not tighten to future per-cpu-vmalloc patches, it is rather
makes the vmalloc code to be more generic as a result of such common
code it would be easier to extend it to per-cpu variant.

It means if per-cpu is not in place it is not needed to be reverted back.

That is the status.

-- 
Uladzislau Rezki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink
  2022-06-08  3:19   ` Baoquan He
@ 2022-06-09 12:36     ` Uladzislau Rezki
  2022-06-09 13:30       ` Baoquan He
  0 siblings, 1 reply; 20+ messages in thread
From: Uladzislau Rezki @ 2022-06-09 12:36 UTC (permalink / raw)
  To: Baoquan He
  Cc: Andrew Morton, Linux Memory Management List, LKML,
	Christoph Hellwig, Matthew Wilcox, Nicholas Piggin,
	Oleksiy Avramchenko

>
> On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> > A vmap_area can travel between different places. For example
> > attached/detached to/from different rb-trees. In order to
> > prevent fancy bugs, initialize a VA's list node after it is
> > removed from the list, so it pairs with VA's rb_node which
> > is also initialized.
> >
> > There is no functional change as a result of this patch.
> >
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > ---
> >  mm/vmalloc.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index 745e89eb6ca1..82771e555273 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -978,7 +978,7 @@ __unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
> >       else
> >               rb_erase(&va->rb_node, root);
> >
> > -     list_del(&va->list);
> > +     list_del_init(&va->list);
>
> Don't object this change, while list_del poison members, which is also
> not bad?
>
It is not bad for sure. The main aim was to be align with what the
RB_CLEAR_NODE() does, i.e. initialize VA when it is detached
and be safe with list manipulation when it is detached. For example
whether it is empty or not: list_empty(), etc.

-- 
Uladzislau Rezki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink
  2022-06-09 12:36     ` Uladzislau Rezki
@ 2022-06-09 13:30       ` Baoquan He
  2022-06-09 13:53         ` Uladzislau Rezki
  0 siblings, 1 reply; 20+ messages in thread
From: Baoquan He @ 2022-06-09 13:30 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Andrew Morton, Linux Memory Management List, LKML,
	Christoph Hellwig, Matthew Wilcox, Nicholas Piggin,
	Oleksiy Avramchenko

On 06/09/22 at 02:36pm, Uladzislau Rezki wrote:
> >
> > On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> > > A vmap_area can travel between different places. For example
> > > attached/detached to/from different rb-trees. In order to
> > > prevent fancy bugs, initialize a VA's list node after it is
> > > removed from the list, so it pairs with VA's rb_node which
> > > is also initialized.
> > >
> > > There is no functional change as a result of this patch.
> > >
> > > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > > ---
> > >  mm/vmalloc.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index 745e89eb6ca1..82771e555273 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -978,7 +978,7 @@ __unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
> > >       else
> > >               rb_erase(&va->rb_node, root);
> > >
> > > -     list_del(&va->list);
> > > +     list_del_init(&va->list);
> >
> > Don't object this change, while list_del poison members, which is also
> > not bad?
> >
> It is not bad for sure. The main aim was to be align with what the
> RB_CLEAR_NODE() does, i.e. initialize VA when it is detached
> and be safe with list manipulation when it is detached. For example
> whether it is empty or not: list_empty(), etc.

Agree. list_del() can't make list_empty() work, and RB_CLEAR_NODE() has
done the clearing already.

Then this change looks reasonable to me, thanks.

Reviewed-by: Baoquan He <bhe@redhat.com>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink
  2022-06-09 13:30       ` Baoquan He
@ 2022-06-09 13:53         ` Uladzislau Rezki
  0 siblings, 0 replies; 20+ messages in thread
From: Uladzislau Rezki @ 2022-06-09 13:53 UTC (permalink / raw)
  To: Baoquan He
  Cc: Andrew Morton, Linux Memory Management List, LKML,
	Christoph Hellwig, Matthew Wilcox, Nicholas Piggin,
	Oleksiy Avramchenko

On Thu, Jun 9, 2022 at 3:30 PM Baoquan He <bhe@redhat.com> wrote:
>
> On 06/09/22 at 02:36pm, Uladzislau Rezki wrote:
> > >
> > > On 06/07/22 at 11:34am, Uladzislau Rezki (Sony) wrote:
> > > > A vmap_area can travel between different places. For example
> > > > attached/detached to/from different rb-trees. In order to
> > > > prevent fancy bugs, initialize a VA's list node after it is
> > > > removed from the list, so it pairs with VA's rb_node which
> > > > is also initialized.
> > > >
> > > > There is no functional change as a result of this patch.
> > > >
> > > > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > > > ---
> > > >  mm/vmalloc.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > > index 745e89eb6ca1..82771e555273 100644
> > > > --- a/mm/vmalloc.c
> > > > +++ b/mm/vmalloc.c
> > > > @@ -978,7 +978,7 @@ __unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
> > > >       else
> > > >               rb_erase(&va->rb_node, root);
> > > >
> > > > -     list_del(&va->list);
> > > > +     list_del_init(&va->list);
> > >
> > > Don't object this change, while list_del poison members, which is also
> > > not bad?
> > >
> > It is not bad for sure. The main aim was to be align with what the
> > RB_CLEAR_NODE() does, i.e. initialize VA when it is detached
> > and be safe with list manipulation when it is detached. For example
> > whether it is empty or not: list_empty(), etc.
>
> Agree. list_del() can't make list_empty() work, and RB_CLEAR_NODE() has
> done the clearing already.
>
> Then this change looks reasonable to me, thanks.
>
> Reviewed-by: Baoquan He <bhe@redhat.com>
>
Thanks!

-- 
Uladzislau Rezki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments
  2022-06-07 13:02     ` Uladzislau Rezki
@ 2022-06-10  8:18       ` Christoph Hellwig
  0 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2022-06-10  8:18 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: Christoph Hellwig, Andrew Morton, Linux Memory Management List,
	LKML, Matthew Wilcox, Nicholas Piggin, Oleksiy Avramchenko

On Tue, Jun 07, 2022 at 03:02:42PM +0200, Uladzislau Rezki wrote:
> I wrote about it in the cover latter. It is a preparation work for
> making vmalloc per-cpu.

Then:

 a) state this in the actual patch commit log.  Those need to be
    standalone and sufficiently describe what the patch is doing.
 b) do that in the actual series that makes use of them.  In fact
    for these kinds of changes the prep patch really seems like a bad
    idea to start with and it would be much easier to follow if the
    changes were done in the main patch.


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-06-10  8:21 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-07  9:34 [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Uladzislau Rezki (Sony)
2022-06-07  9:34 ` [PATCH 1/5] mm/vmalloc: Make link_va()/unlink_va() common to different rb_root Uladzislau Rezki (Sony)
2022-06-08  3:45   ` Baoquan He
2022-06-07  9:34 ` [PATCH 2/5] mm/vmalloc: Extend __alloc_vmap_area() with extra arguments Uladzislau Rezki (Sony)
2022-06-07  9:49   ` Christoph Hellwig
2022-06-07 13:02     ` Uladzislau Rezki
2022-06-10  8:18       ` Christoph Hellwig
2022-06-08  3:46   ` Baoquan He
2022-06-07  9:34 ` [PATCH 3/5] mm/vmalloc: Initialize VA's list node after unlink Uladzislau Rezki (Sony)
2022-06-08  3:19   ` Baoquan He
2022-06-09 12:36     ` Uladzislau Rezki
2022-06-09 13:30       ` Baoquan He
2022-06-09 13:53         ` Uladzislau Rezki
2022-06-07  9:34 ` [PATCH 4/5] mm/vmalloc: Extend __find_vmap_area() with one more argument Uladzislau Rezki (Sony)
2022-06-07  9:47   ` Christoph Hellwig
2022-06-07 13:03     ` Uladzislau Rezki
2022-06-08  3:47   ` Baoquan He
2022-06-07  9:34 ` [PATCH 5/5] lib/test_vmalloc: Switch to prandom_u32() Uladzislau Rezki (Sony)
2022-06-07 22:35 ` [PATCH 0/5] Reduce a vmalloc internal lock contention preparation work Andrew Morton
2022-06-08 10:05   ` Uladzislau Rezki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.