linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Dave Hansen <dave.hansen@intel.com>,
	linux-kernel@vger.kernel.org,
	Christoph Hellwig <hch@infradead.org>,
	linux-mm@kvack.org, Andy Lutomirski <luto@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	"Kirill A. Shutemov" <kirill@shutemov.name>
Subject: Use higher-order pages in vmalloc
Date: Wed, 21 Feb 2018 07:42:14 -0800	[thread overview]
Message-ID: <20180221154214.GA4167@bombadil.infradead.org> (raw)
In-Reply-To: <151670493255.658225.2881484505285363395.stgit@buzz>

On Tue, Jan 23, 2018 at 01:55:32PM +0300, Konstantin Khlebnikov wrote:
> Virtually mapped stack have two bonuses: it eats order-0 pages and
> adds guard page at the end. But it slightly slower if system have
> plenty free high-order pages.
> 
> This patch adds option to use virtually bapped stack as fallback for
> atomic allocation of traditional high-order page.

This prompted me to write a patch I've been meaning to do for a while,
allocating large pages if they're available to satisfy vmalloc.  I thought
it would save on touching multiple struct pages, but it turns out that
the checking code we currently have in the free_pages path requires you
to have initialised all of the tail pages (maybe we can make that code
conditional ...)

It does save the buddy allocator the trouble of breaking down higher-order
pages into order-0 pages, only to allocate them again immediately.

(um, i seem to have broken the patch while cleaning it up for submission.
since it probably won't be accepted anyway, I'm not going to try to debug it)

diff --git a/kernel/fork.c b/kernel/fork.c
index be8aa5b98666..2bc01071b6ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -319,12 +319,12 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 	if (vm) {
 		int i;
 
-		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-			mod_zone_page_state(page_zone(vm->pages[i]),
+		for (i = 0; i < vm->nr_pages; i++) {
+			struct page *page = vm->pages[i];
+			unsigned int size = PAGE_SIZE << compound_order(page);
+			mod_zone_page_state(page_zone(page),
 					    NR_KERNEL_STACK_KB,
-					    PAGE_SIZE / 1024 * account);
+					    size / 1024 * account);
 		}
 
 		/* All stack pages belong to the same memcg. */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b728c98f49cd..4bfc29b21bc1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -134,6 +134,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
+	unsigned int i;
 	pte_t *pte;
 
 	/*
@@ -151,9 +152,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
 			return -EBUSY;
 		if (WARN_ON(!page))
 			return -ENOMEM;
-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+		for (i = 0; i < (1UL << compound_order(page)); i++) {
+			set_pte_at(&init_mm, addr, pte++,
+					mk_pte(page + i, prot));
+			addr += PAGE_SIZE;
+		}
 		(*nr)++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (addr != end);
 	return 0;
 }
 
@@ -1530,14 +1535,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
 	debug_check_no_obj_freed(addr, get_vm_area_size(area));
 
 	if (deallocate_pages) {
-		int i;
+		unsigned int i;
 
 		for (i = 0; i < area->nr_pages; i++) {
 			struct page *page = area->pages[i];
 
 			BUG_ON(!page);
 			__ClearPageVmalloc(page);
-			__free_pages(page, 0);
+			__free_pages(page, compound_order(page));
 		}
 
 		kvfree(area->pages);
@@ -1696,11 +1701,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 	for (i = 0; i < area->nr_pages; i++) {
 		struct page *page;
-
-		if (node == NUMA_NO_NODE)
-			page = alloc_page(alloc_mask);
-		else
-			page = alloc_pages_node(node, alloc_mask, 0);
+		unsigned int j = ilog2(area->nr_pages - i) + 1;
+
+		do {
+			j--;
+			if (node == NUMA_NO_NODE)
+				page = alloc_pages(alloc_mask, j);
+			else
+				page = alloc_pages_node(node, alloc_mask, j);
+		} while (!page && j);
+
+		if (j) {
+			area->nr_pages -= (1UL << j) - 1;
+			prep_compound_page(page, j);
+		}
 
 		if (unlikely(!page)) {
 			/* Successfully allocated i pages, free them in __vunmap() */
@@ -1719,8 +1733,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 fail:
 	warn_alloc(gfp_mask, NULL,
-			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
-			  (area->nr_pages*PAGE_SIZE), area->size);
+		   "vmalloc: allocation failure, allocated %ld of %ld bytes",
+		   (nr_pages * PAGE_SIZE), get_vm_area_size(area));
 	vfree(area->addr);
 	return NULL;
 }

  reply	other threads:[~2018-02-21 15:42 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-23 10:55 [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Konstantin Khlebnikov
2018-01-23 10:55 ` [PATCH 2/4] vmalloc: add __vmalloc_area() Konstantin Khlebnikov
2018-01-23 10:55 ` [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area() Konstantin Khlebnikov
2018-01-23 13:57   ` Konstantin Khlebnikov
2018-02-21  0:16     ` Andrew Morton
2018-02-21  7:23       ` Konstantin Khlebnikov
2018-02-21 16:35         ` Andy Lutomirski
2018-01-23 10:55 ` [PATCH 4/4] kernel/fork: add option to use virtually mapped stacks as fallback Konstantin Khlebnikov
2018-02-21 15:42   ` Matthew Wilcox [this message]
2018-02-21 16:11     ` Use higher-order pages in vmalloc Andy Lutomirski
2018-02-21 16:50       ` Matthew Wilcox
2018-02-21 16:16     ` Dave Hansen
2018-02-21 17:01       ` Matthew Wilcox
2018-02-22  6:59         ` Michal Hocko
2018-02-22 12:22           ` Matthew Wilcox
2018-02-22 13:36             ` Michal Hocko
2018-02-22 19:01               ` Andy Lutomirski
2018-02-22 19:19                 ` Dave Hansen
2018-02-22 19:27                   ` Andy Lutomirski
2018-02-22 19:36                     ` Dave Hansen
2018-02-23 12:13                 ` Michal Hocko
2018-03-01 18:16                   ` Eric Dumazet
2018-02-21 12:24 ` [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Matthew Wilcox
2018-02-21 12:39   ` Andrey Ryabinin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180221154214.GA4167@bombadil.infradead.org \
    --to=willy@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hch@infradead.org \
    --cc=khlebnikov@yandex-team.ru \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).