All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
To: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>
Cc: LKML <linux-kernel@vger.kernel.org>, Baoquan He <bhe@redhat.com>,
	Lorenzo Stoakes <lstoakes@gmail.com>,
	Christoph Hellwig <hch@infradead.org>,
	Matthew Wilcox <willy@infradead.org>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Dave Chinner <david@fromorbit.com>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Subject: [PATCH v2 9/9] mm: vmalloc: Set nr_nodes/node_size based on CPU-cores
Date: Tue, 29 Aug 2023 10:11:42 +0200	[thread overview]
Message-ID: <20230829081142.3619-10-urezki@gmail.com> (raw)
In-Reply-To: <20230829081142.3619-1-urezki@gmail.com>

The density ratio is set to 2, i.e. two users per one node.
For example if there are 6 cores in a system the "nr_nodes"
is 3.

The "node_size" also depends on number of physical cores.
A high-threshold limit is hard-coded and set to SZ_4M.

For 32-bit, single/dual core systems an access to a global
vmap heap is not balanced. Such small systems do not suffer
from lock contentions due to limitation of CPU-cores.

Test on AMD Ryzen Threadripper 3970X 32-Core Processor:
sudo ./test_vmalloc.sh run_test_mask=127 nr_threads=64

<default perf>
 94.17%     0.90%  [kernel]    [k] _raw_spin_lock
 93.27%    93.05%  [kernel]    [k] native_queued_spin_lock_slowpath
 74.69%     0.25%  [kernel]    [k] __vmalloc_node_range
 72.64%     0.01%  [kernel]    [k] __get_vm_area_node
 72.04%     0.89%  [kernel]    [k] alloc_vmap_area
 42.17%     0.00%  [kernel]    [k] vmalloc
 32.53%     0.00%  [kernel]    [k] __vmalloc_node
 24.91%     0.25%  [kernel]    [k] vfree
 24.32%     0.01%  [kernel]    [k] remove_vm_area
 22.63%     0.21%  [kernel]    [k] find_unlink_vmap_area
 15.51%     0.00%  [unknown]   [k] 0xffffffffc09a74ac
 14.35%     0.00%  [kernel]    [k] ret_from_fork_asm
 14.35%     0.00%  [kernel]    [k] ret_from_fork
 14.35%     0.00%  [kernel]    [k] kthread
<default perf>
   vs
<patch-series perf>
 74.32%     2.42%  [kernel]    [k] __vmalloc_node_range
 69.58%     0.01%  [kernel]    [k] vmalloc
 54.21%     1.17%  [kernel]    [k] __alloc_pages_bulk
 48.13%    47.91%  [kernel]    [k] clear_page_orig
 43.60%     0.01%  [unknown]   [k] 0xffffffffc082f16f
 32.06%     0.00%  [kernel]    [k] ret_from_fork_asm
 32.06%     0.00%  [kernel]    [k] ret_from_fork
 32.06%     0.00%  [kernel]    [k] kthread
 31.30%     0.00%  [unknown]   [k] 0xffffffffc082f889
 22.98%     4.16%  [kernel]    [k] vfree
 14.36%     0.28%  [kernel]    [k] __get_vm_area_node
 13.43%     3.35%  [kernel]    [k] alloc_vmap_area
 10.86%     0.04%  [kernel]    [k] remove_vm_area
  8.89%     2.75%  [kernel]    [k] _raw_spin_lock
  7.19%     0.00%  [unknown]   [k] 0xffffffffc082fba3
  6.65%     1.37%  [kernel]    [k] free_unref_page
  6.13%     6.11%  [kernel]    [k] native_queued_spin_lock_slowpath
<patch-series perf>

confirms that a native_queued_spin_lock_slowpath bottle-neck
can be considered as negligible for the patch-series version.

The throughput is ~15x higher:

urezki@pc638:~$ time sudo ./test_vmalloc.sh run_test_mask=127 nr_threads=64
Run the test with following parameters: run_test_mask=127 nr_threads=64
Done.
Check the kernel ring buffer to see the summary.

real    24m3.305s
user    0m0.361s
sys     0m0.013s
urezki@pc638:~$

urezki@pc638:~$ time sudo ./test_vmalloc.sh run_test_mask=127 nr_threads=64
Run the test with following parameters: run_test_mask=127 nr_threads=64
Done.
Check the kernel ring buffer to see the summary.

real    1m28.382s
user    0m0.014s
sys     0m0.026s
urezki@pc638:~$

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9cce012aecdb..08990f630c21 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -796,6 +796,9 @@ struct vmap_node {
 	atomic_t fill_in_progress;
 };
 
+#define MAX_NODES U8_MAX
+#define MAX_NODE_SIZE SZ_4M
+
 static struct vmap_node *nodes, snode;
 static __read_mostly unsigned int nr_nodes = 1;
 static __read_mostly unsigned int node_size = 1;
@@ -4803,11 +4806,24 @@ static void vmap_init_free_space(void)
 	}
 }
 
+static unsigned int calculate_nr_nodes(void)
+{
+	unsigned int nr_cpus;
+
+	nr_cpus = num_present_cpus();
+	if (nr_cpus <= 1)
+		nr_cpus = num_possible_cpus();
+
+	/* Density factor. Two users per a node. */
+	return clamp_t(unsigned int, nr_cpus >> 1, 1, MAX_NODES);
+}
+
 static void vmap_init_nodes(void)
 {
 	struct vmap_node *vn;
 	int i;
 
+	nr_nodes = calculate_nr_nodes();
 	nodes = &snode;
 
 	if (nr_nodes > 1) {
@@ -4830,6 +4846,16 @@ static void vmap_init_nodes(void)
 		INIT_LIST_HEAD(&vn->free.head);
 		spin_lock_init(&vn->free.lock);
 	}
+
+	/*
+	 * Scale a node size to number of CPUs. Each power of two
+	 * value doubles a node size. A high-threshold limit is set
+	 * to 4M.
+	 */
+#if BITS_PER_LONG == 64
+	if (nr_nodes > 1)
+		node_size = min(SZ_64K << fls(num_possible_cpus()), SZ_4M);
+#endif
 }
 
 void __init vmalloc_init(void)
-- 
2.30.2


  parent reply	other threads:[~2023-08-29  8:12 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-29  8:11 [PATCH v2 0/9] Mitigate a vmap lock contention v2 Uladzislau Rezki (Sony)
2023-08-29  8:11 ` [PATCH v2 1/9] mm: vmalloc: Add va_alloc() helper Uladzislau Rezki (Sony)
2023-09-06  5:51   ` Baoquan He
2023-09-06 15:06     ` Uladzislau Rezki
2023-08-29  8:11 ` [PATCH v2 2/9] mm: vmalloc: Rename adjust_va_to_fit_type() function Uladzislau Rezki (Sony)
2023-09-06  5:51   ` Baoquan He
2023-09-06 16:27     ` Uladzislau Rezki
2023-08-29  8:11 ` [PATCH v2 3/9] mm: vmalloc: Move vmap_init_free_space() down in vmalloc.c Uladzislau Rezki (Sony)
2023-09-06  5:52   ` Baoquan He
2023-09-06 16:29     ` Uladzislau Rezki
2023-08-29  8:11 ` [PATCH v2 4/9] mm: vmalloc: Remove global vmap_area_root rb-tree Uladzislau Rezki (Sony)
2023-08-29 14:30   ` kernel test robot
2023-08-30 14:48     ` Uladzislau Rezki
2023-09-07  2:17   ` Baoquan He
2023-09-07  2:17     ` Baoquan He
2023-09-07  9:38     ` Baoquan He
2023-09-07  9:38       ` Baoquan He
2023-09-07  9:40       ` Uladzislau Rezki
2023-09-07  9:40         ` Uladzislau Rezki
2023-09-07  9:39     ` Uladzislau Rezki
2023-09-07  9:39       ` Uladzislau Rezki
2023-09-07  9:58       ` Baoquan He
2023-09-07  9:58         ` Baoquan He
2023-09-08  1:51         ` HAGIO KAZUHITO(萩尾 一仁)
2023-09-08  1:51           ` HAGIO KAZUHITO(萩尾 一仁)
2023-09-08  4:43           ` Baoquan He
2023-09-08  4:43             ` Baoquan He
2023-09-08  5:01             ` HAGIO KAZUHITO(萩尾 一仁)
2023-09-08  5:01               ` HAGIO KAZUHITO(萩尾 一仁)
2023-09-08  6:44               ` Baoquan He
2023-09-08  6:44                 ` Baoquan He
2023-09-08 11:25                 ` Uladzislau Rezki
2023-09-08 11:25                   ` Uladzislau Rezki
2023-09-08 11:38                   ` Baoquan He
2023-09-08 11:38                     ` Baoquan He
2023-09-08 13:23                     ` Uladzislau Rezki
2023-09-08 13:23                       ` Uladzislau Rezki
2023-09-11  2:38   ` Baoquan He
2023-09-11 16:53     ` Uladzislau Rezki
2023-09-12 13:19       ` Baoquan He
2023-08-29  8:11 ` [PATCH v2 5/9] mm: vmalloc: Remove global purge_vmap_area_root rb-tree Uladzislau Rezki (Sony)
2023-09-11  2:57   ` Baoquan He
2023-09-11 17:00     ` Uladzislau Rezki
2023-08-29  8:11 ` [PATCH v2 6/9] mm: vmalloc: Offload free_vmap_area_lock lock Uladzislau Rezki (Sony)
2023-09-06  6:04   ` Baoquan He
2023-09-06 19:16     ` Uladzislau Rezki
2023-09-07  0:06       ` Baoquan He
2023-09-07  9:33         ` Uladzislau Rezki
2023-09-11  3:25   ` Baoquan He
2023-09-11 17:10     ` Uladzislau Rezki
2023-09-12 13:21       ` Baoquan He
2023-08-29  8:11 ` [PATCH v2 7/9] mm: vmalloc: Support multiple nodes in vread_iter Uladzislau Rezki (Sony)
2023-09-11  3:58   ` Baoquan He
2023-09-11 18:16     ` Uladzislau Rezki
2023-09-12 13:42       ` Baoquan He
2023-09-13 15:42         ` Uladzislau Rezki
2023-09-14  3:02           ` Baoquan He
2023-09-14  3:36           ` Baoquan He
2023-09-14  3:38             ` Baoquan He
2023-09-13 10:59       ` Baoquan He
2023-09-13 15:38         ` Uladzislau Rezki
2023-08-29  8:11 ` [PATCH v2 8/9] mm: vmalloc: Support multiple nodes in vmallocinfo Uladzislau Rezki (Sony)
2023-09-15 13:02   ` Baoquan He
2023-09-15 18:32     ` Uladzislau Rezki
2023-08-29  8:11 ` Uladzislau Rezki (Sony) [this message]
2023-09-15 13:03   ` [PATCH v2 9/9] mm: vmalloc: Set nr_nodes/node_size based on CPU-cores Baoquan He
2023-09-15 18:31     ` Uladzislau Rezki
2023-08-31  1:15 ` [PATCH v2 0/9] Mitigate a vmap lock contention v2 Baoquan He
2023-08-31 16:26   ` Uladzislau Rezki
2023-09-04 14:55 ` Uladzislau Rezki
2023-09-04 19:53   ` Andrew Morton
2023-09-05  6:53     ` Uladzislau Rezki
2023-09-06 20:04 ` Lorenzo Stoakes
2023-09-07  9:15   ` Uladzislau Rezki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230829081142.3619-10-urezki@gmail.com \
    --to=urezki@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lstoakes@gmail.com \
    --cc=oleksiy.avramchenko@sony.com \
    --cc=paulmck@kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.