All of lore.kernel.org
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: corbet@lwn.net, mike.kravetz@oracle.com, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, x86@kernel.org, hpa@zytor.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, viro@zeniv.linux.org.uk,
	akpm@linux-foundation.org, paulmck@kernel.org,
	mchehab+huawei@kernel.org, pawan.kumar.gupta@linux.intel.com,
	rdunlap@infradead.org, oneukum@suse.com,
	anshuman.khandual@arm.com, jroedel@suse.de,
	almasrymina@google.com, rientjes@google.com, willy@infradead.org,
	osalvador@suse.de, mhocko@suse.com, song.bao.hua@hisilicon.com
Cc: duanxiongchun@bytedance.com, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org,
	Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH v6 10/16] mm/hugetlb: Allocate the vmemmap pages associated with each hugetlb page
Date: Tue, 24 Nov 2020 17:52:53 +0800	[thread overview]
Message-ID: <20201124095259.58755-11-songmuchun@bytedance.com> (raw)
In-Reply-To: <20201124095259.58755-1-songmuchun@bytedance.com>

When we free a hugetlb page to the buddy, we should allocate the vmemmap
pages associated with it. We can do that in the __free_hugepage().

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/hugetlb.c         |   2 +
 mm/hugetlb_vmemmap.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/hugetlb_vmemmap.h |   5 +++
 3 files changed, 109 insertions(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 41056b4230f1..3fafa39fcac6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1382,6 +1382,8 @@ static void __free_hugepage(struct hstate *h, struct page *page)
 {
 	int i;
 
+	alloc_huge_page_vmemmap(h, page);
+
 	for (i = 0; i < pages_per_huge_page(h); i++) {
 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
 				1 << PG_referenced | 1 << PG_dirty |
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index f6ba288966d4..d6a1b06c1322 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -95,6 +95,7 @@
 #define pr_fmt(fmt)	"HugeTLB vmemmap: " fmt
 
 #include <linux/bootmem_info.h>
+#include <linux/delay.h>
 #include "hugetlb_vmemmap.h"
 
 /*
@@ -108,6 +109,8 @@
 #define RESERVE_VMEMMAP_NR		2U
 #define RESERVE_VMEMMAP_SIZE		(RESERVE_VMEMMAP_NR << PAGE_SHIFT)
 #define TAIL_PAGE_REUSE			-1
+#define GFP_VMEMMAP_PAGE		\
+	(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_HIGH)
 
 #ifndef VMEMMAP_HPAGE_SHIFT
 #define VMEMMAP_HPAGE_SHIFT		HPAGE_SHIFT
@@ -159,6 +162,105 @@ static pmd_t *vmemmap_to_pmd(unsigned long page)
 	return pmd_offset(pud, page);
 }
 
+static void __remap_huge_page_pte_vmemmap(struct page *reuse, pte_t *ptep,
+					  unsigned long start,
+					  unsigned long end,
+					  struct list_head *remap_pages)
+{
+	pgprot_t pgprot = PAGE_KERNEL;
+	void *from = page_to_virt(reuse);
+	unsigned long addr;
+
+	for (addr = start; addr < end; addr += PAGE_SIZE) {
+		void *to;
+		struct page *page;
+		pte_t entry, old = *ptep;
+
+		page = list_first_entry(remap_pages, struct page, lru);
+		list_del(&page->lru);
+		to = page_to_virt(page);
+		copy_page(to, from);
+
+		/*
+		 * Make sure that any data that writes to the @to is made
+		 * visible to the physical page.
+		 */
+		flush_kernel_vmap_range(to, PAGE_SIZE);
+
+		prepare_vmemmap_page(page);
+
+		entry = mk_pte(page, pgprot);
+		set_pte_at(&init_mm, addr, ptep++, entry);
+
+		VM_BUG_ON(!pte_present(old) || pte_page(old) != reuse);
+	}
+}
+
+static void __remap_huge_page_pmd_vmemmap(pmd_t *pmd, unsigned long start,
+					  unsigned long end,
+					  struct list_head *vmemmap_pages)
+{
+	unsigned long next, addr = start;
+	struct page *reuse = NULL;
+
+	do {
+		pte_t *ptep;
+
+		ptep = pte_offset_kernel(pmd, addr);
+		if (!reuse)
+			reuse = pte_page(ptep[TAIL_PAGE_REUSE]);
+
+		next = vmemmap_hpage_addr_end(addr, end);
+		__remap_huge_page_pte_vmemmap(reuse, ptep, addr, next,
+					      vmemmap_pages);
+	} while (pmd++, addr = next, addr != end);
+
+	flush_tlb_kernel_range(start, end);
+}
+
+static inline void alloc_vmemmap_pages(struct hstate *h, struct list_head *list)
+{
+	unsigned int nr = free_vmemmap_pages_per_hpage(h);
+
+	while (nr--) {
+		struct page *page;
+
+retry:
+		page = alloc_page(GFP_VMEMMAP_PAGE);
+		if (unlikely(!page)) {
+			msleep(100);
+			/*
+			 * We should retry infinitely, because we cannot
+			 * handle allocation failures. Once we allocate
+			 * vmemmap pages successfully, then we can free
+			 * a HugeTLB page.
+			 */
+			goto retry;
+		}
+		list_add_tail(&page->lru, list);
+	}
+}
+
+void alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
+{
+	pmd_t *pmd;
+	unsigned long start, end;
+	unsigned long vmemmap_addr = (unsigned long)head;
+	LIST_HEAD(map_pages);
+
+	if (!free_vmemmap_pages_per_hpage(h))
+		return;
+
+	alloc_vmemmap_pages(h, &map_pages);
+
+	pmd = vmemmap_to_pmd(vmemmap_addr);
+	BUG_ON(!pmd);
+
+	start = vmemmap_addr + RESERVE_VMEMMAP_SIZE;
+	end = vmemmap_addr + vmemmap_pages_size_per_hpage(h);
+	__remap_huge_page_pmd_vmemmap(pmd, start, end, &map_pages);
+}
+
 static inline void free_vmemmap_page_list(struct list_head *list)
 {
 	struct page *page, *next;
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 293897b9f1d8..7887095488f4 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -12,6 +12,7 @@
 
 #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
 void __init hugetlb_vmemmap_init(struct hstate *h);
+void alloc_huge_page_vmemmap(struct hstate *h, struct page *head);
 void free_huge_page_vmemmap(struct hstate *h, struct page *head);
 
 static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
@@ -23,6 +24,10 @@ static inline void hugetlb_vmemmap_init(struct hstate *h)
 {
 }
 
+static inline void alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
+{
+}
+
 static inline void free_huge_page_vmemmap(struct hstate *h, struct page *head)
 {
 }
-- 
2.11.0


  parent reply	other threads:[~2020-11-24  9:59 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-24  9:52 [PATCH v6 00/16] Free some vmemmap pages of hugetlb page Muchun Song
2020-11-24  9:52 ` [PATCH v6 01/16] mm/memory_hotplug: Move bootmem info registration API to bootmem_info.c Muchun Song
2021-06-10 12:15   ` chengkaitao
2020-11-24  9:52 ` [PATCH v6 02/16] mm/memory_hotplug: Move {get,put}_page_bootmem() " Muchun Song
2020-11-24  9:52 ` [PATCH v6 03/16] mm/hugetlb: Introduce a new config HUGETLB_PAGE_FREE_VMEMMAP Muchun Song
2020-11-24  9:52 ` [PATCH v6 04/16] mm/hugetlb: Introduce nr_free_vmemmap_pages in the struct hstate Muchun Song
2020-11-24  9:52 ` [PATCH v6 05/16] mm/bootmem_info: Introduce {free,prepare}_vmemmap_page() Muchun Song
2020-11-24  9:52 ` [PATCH v6 06/16] mm/hugetlb: Disable freeing vmemmap if struct page size is not power of two Muchun Song
2020-11-24  9:52 ` [PATCH v6 07/16] x86/mm/64: Disable PMD page mapping of vmemmap Muchun Song
2020-11-24 10:24   ` Oscar Salvador
2020-11-24 10:31     ` Song Bao Hua (Barry Song)
2020-11-24 10:31       ` Song Bao Hua (Barry Song)
2020-11-24 11:13     ` [External] " Muchun Song
2020-11-24 11:13       ` Muchun Song
2020-11-24  9:52 ` [PATCH v6 08/16] mm/hugetlb: Free the vmemmap pages associated with each hugetlb page Muchun Song
2020-11-24  9:52 ` [PATCH v6 09/16] mm/hugetlb: Defer freeing of HugeTLB pages Muchun Song
2020-11-24 11:51   ` Michal Hocko
2020-11-24 12:45     ` [External] " Muchun Song
2020-11-24 12:45       ` Muchun Song
2020-11-24 13:14       ` Michal Hocko
2020-11-24  9:52 ` Muchun Song [this message]
2020-11-24  9:52 ` [PATCH v6 11/16] mm/hugetlb: Introduce remap_huge_page_pmd_vmemmap helper Muchun Song
2020-11-24  9:52 ` [PATCH v6 12/16] mm/hugetlb: Set the PageHWPoison to the raw error page Muchun Song
2020-11-24  9:52 ` [PATCH v6 13/16] mm/hugetlb: Flush work when dissolving hugetlb page Muchun Song
2020-11-24  9:52 ` [PATCH v6 14/16] mm/hugetlb: Add a kernel parameter hugetlb_free_vmemmap Muchun Song
2020-11-24 10:53   ` Song Bao Hua (Barry Song)
2020-11-24 10:53     ` Song Bao Hua (Barry Song)
2020-11-24 11:07     ` [External] " Muchun Song
2020-11-24 11:07       ` Muchun Song
2020-11-24  9:52 ` [PATCH v6 15/16] mm/hugetlb: Gather discrete indexes of tail page Muchun Song
2020-11-24  9:52 ` [PATCH v6 16/16] mm/hugetlb: Add BUILD_BUG_ON to catch invalid usage of tail struct page Muchun Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201124095259.58755-11-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=almasrymina@google.com \
    --cc=anshuman.khandual@arm.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=hpa@zytor.com \
    --cc=jroedel@suse.de \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mchehab+huawei@kernel.org \
    --cc=mhocko@suse.com \
    --cc=mike.kravetz@oracle.com \
    --cc=mingo@redhat.com \
    --cc=oneukum@suse.com \
    --cc=osalvador@suse.de \
    --cc=paulmck@kernel.org \
    --cc=pawan.kumar.gupta@linux.intel.com \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=rientjes@google.com \
    --cc=song.bao.hua@hisilicon.com \
    --cc=tglx@linutronix.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.