All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: David Rientjes <rientjes@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Re: Linux 4.9-rc6
Date: Mon, 21 Nov 2016 05:51:37 -0800	[thread overview]
Message-ID: <1479736297.8455.405.camel@edumazet-glaptop3.roam.corp.google.com> (raw)
In-Reply-To: <1479735165.8455.400.camel@edumazet-glaptop3.roam.corp.google.com>

On Mon, 2016-11-21 at 05:32 -0800, Eric Dumazet wrote:

> 
> Oh, this was definitely my intent of course, thanks for noticing this
> typo ;)

V2 is fixing this, and brings back NUMA spreading,
(eg alloc_large_system_hash() done at boot time )


lpaa24:~# grep alloc_large /proc/vmallocinfo 
0xffffc90000009000-0xffffc9000000c000   12288 alloc_large_system_hash+0x178/0x238 pages=2 vmalloc N0=1 N1=1
0xffffc9000000c000-0xffffc9000000f000   12288 alloc_large_system_hash+0x178/0x238 pages=2 vmalloc N0=1 N1=1
0xffffc9000001e000-0xffffc9000009f000  528384 alloc_large_system_hash+0x178/0x238 pages=128 vmalloc N0=64 N1=64
0xffffc9000009f000-0xffffc900000e0000  266240 alloc_large_system_hash+0x178/0x238 pages=64 vmalloc N0=32 N1=32
0xffffc900001d3000-0xffffc900101d4000 268439552 alloc_large_system_hash+0x178/0x238 pages=65536 vmalloc vpages N0=32768 N1=32768
0xffffc900101d4000-0xffffc900181d5000 134221824 alloc_large_system_hash+0x178/0x238 pages=32768 vmalloc vpages N0=16384 N1=16384
0xffffc900181d5000-0xffffc900185d6000 4198400 alloc_large_system_hash+0x178/0x238 pages=1024 vmalloc vpages N0=512 N1=512
0xffffc900185d6000-0xffffc900189d7000 4198400 alloc_large_system_hash+0x178/0x238 pages=1024 vmalloc vpages N0=512 N1=512
0xffffc9001b271000-0xffffc9001b672000 4198400 alloc_large_system_hash+0x178/0x238 pages=1024 vmalloc vpages N0=512 N1=512
0xffffc9001b672000-0xffffc9001b675000   12288 alloc_large_system_hash+0x178/0x238 pages=2 vmalloc N0=1 N1=1
0xffffc9001b675000-0xffffc9001b776000 1052672 alloc_large_system_hash+0x178/0x238 pages=256 vmalloc N0=128 N1=128
0xffffc9001b776000-0xffffc9001b977000 2101248 alloc_large_system_hash+0x178/0x238 pages=512 vmalloc N0=256 N1=256
0xffffc9001b977000-0xffffc9001bb78000 2101248 alloc_large_system_hash+0x178/0x238 pages=512 vmalloc N0=256 N1=256
0xffffc9001c075000-0xffffc9001c176000 1052672 alloc_large_system_hash+0x178/0x238 pages=256 vmalloc N0=128 N1=128


 mm/vmalloc.c |   47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f2481cb4e6b2..f4b9c9238f86 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -21,6 +21,7 @@
 #include <linux/debugobjects.h>
 #include <linux/kallsyms.h>
 #include <linux/list.h>
+#include <linux/mempolicy.h>
 #include <linux/notifier.h>
 #include <linux/rbtree.h>
 #include <linux/radix-tree.h>
@@ -1602,9 +1603,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node)
 {
 	struct page **pages;
-	unsigned int nr_pages, array_size, i;
+	unsigned int nr_pages, array_size, i, j;
 	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
 	const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
+	const gfp_t multi_alloc_mask = (alloc_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_NORETRY;
+	int max_node_order = MAX_ORDER - 1;
 
 	nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
 	array_size = (nr_pages * sizeof(struct page *));
@@ -1624,20 +1627,48 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		return NULL;
 	}
 
-	for (i = 0; i < area->nr_pages; i++) {
-		struct page *page;
+	if (IS_ENABLED(CONFIG_NUMA) && nr_online_nodes > 1) {
+		struct mempolicy *policy = current->mempolicy;
+		int pages_per_node;
 
-		if (node == NUMA_NO_NODE)
-			page = alloc_page(alloc_mask);
-		else
-			page = alloc_pages_node(node, alloc_mask, 0);
+		if (policy && policy->mode == MPOL_INTERLEAVE) {
+			pages_per_node = DIV_ROUND_UP(nr_pages,
+						      nr_online_nodes);
+			max_node_order = min(max_node_order,
+					     ilog2(pages_per_node));
+		}
+	}
+
+	for (i = 0; i < area->nr_pages;) {
+		unsigned int chunk_order = min(ilog2(area->nr_pages - i),
+					       max_node_order);
+		struct page *page = NULL;
+
+		while (chunk_order) {
+			if (node == NUMA_NO_NODE)
+				page = alloc_pages(multi_alloc_mask, chunk_order);
+			else
+				page = alloc_pages_node(node, multi_alloc_mask, chunk_order);
+			if (page) {
+				split_page(page, chunk_order);
+				break;
+			}
+			chunk_order--;
+		}
+		if (!page) {
+			if (node == NUMA_NO_NODE)
+				page = alloc_pages(alloc_mask, 0);
+			else
+				page = alloc_pages_node(node, alloc_mask, 0);
+		}
 
 		if (unlikely(!page)) {
 			/* Successfully allocated i pages, free them in __vunmap() */
 			area->nr_pages = i;
 			goto fail;
 		}
-		area->pages[i] = page;
+		for (j = 0; j < (1U << chunk_order); j++)
+			area->pages[i++] = page++;
 		if (gfpflags_allow_blocking(gfp_mask))
 			cond_resched();
 	}

  reply	other threads:[~2016-11-21 13:51 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-20 22:05 Linux 4.9-rc6 Linus Torvalds
2016-11-20 22:27 ` Eric Dumazet
2016-11-20 23:27   ` Linus Torvalds
2016-11-21  1:35     ` Al Viro
2016-11-21  4:59       ` Eric Dumazet
2016-11-21  8:34         ` David Rientjes
2016-11-21 13:32           ` Eric Dumazet
2016-11-21 13:51             ` Eric Dumazet [this message]
2016-11-21 16:49               ` Eric Dumazet
2016-12-04 10:43               ` Thorsten Leemhuis
     [not found]                 ` <CA+55aFzPiZW4FfWbvM-+AFraa0fkUHv4C1Y9SCzHdXEcUSPqdg@mail.gmail.com>
2016-12-04 17:17                   ` Eric Dumazet
2016-12-21 15:30                     ` Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1479736297.8455.405.camel@edumazet-glaptop3.roam.corp.google.com \
    --to=eric.dumazet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rientjes@google.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.