linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: linux-mm@kvack.org
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Roman Gushchin <guro@fb.com>, Rik van Riel <riel@surriel.com>,
	Matthew Wilcox <willy@infradead.org>,
	Shakeel Butt <shakeelb@google.com>,
	Yang Shi <shy828301@gmail.com>, Jason Gunthorpe <jgg@nvidia.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Michal Hocko <mhocko@suse.com>,
	David Hildenbrand <david@redhat.com>,
	William Kucharski <william.kucharski@oracle.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	John Hubbard <jhubbard@nvidia.com>,
	David Nellans <dnellans@nvidia.com>,
	linux-kernel@vger.kernel.org, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH v2 29/30] mm: thp: use cma reservation for pud thp allocation.
Date: Mon, 28 Sep 2020 13:54:27 -0400	[thread overview]
Message-ID: <20200928175428.4110504-30-zi.yan@sent.com> (raw)
In-Reply-To: <20200928175428.4110504-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

Sharing hugepage_cma reservation with hugetlb for pud thp allocaiton.
The reserved cma regions still can be used for moveable page allocations.

During 1GB page split, all subpages are cleared from the CMA bitmap,
since they are no more 1GB pages and will be freed via the normal path
instead of cma_release().

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/cma.h     |  3 +++
 include/linux/huge_mm.h | 10 ++++++++++
 mm/cma.c                | 31 +++++++++++++++++++++++++++++++
 mm/huge_memory.c        | 34 ++++++++++++++++++++++++++++++++++
 mm/hugetlb.c            | 21 +--------------------
 mm/mempolicy.c          | 14 +++++++++++++-
 mm/page_alloc.c         | 29 +++++++++++++++++++++++++++++
 7 files changed, 121 insertions(+), 21 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 9989d580c2a7..c299b62b3a7a 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -48,6 +48,9 @@ extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 			      bool no_warn);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 
+extern bool cma_clear_bitmap_if_in_range(struct cma *cma, const struct page *page,
+					unsigned int count);
+
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
 
 extern void cma_reserve(int min_order, unsigned long requested_size,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 0d0f9cf25aeb..163b244d9acd 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -24,6 +24,8 @@ extern struct page *follow_trans_huge_pud(struct vm_area_struct *vma,
 					  unsigned long addr,
 					  pud_t *pud,
 					  unsigned int flags);
+extern struct page *alloc_thp_pud_page(int nid);
+extern bool free_thp_pud_page(struct page *page, int order);
 #else
 static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 {
@@ -43,6 +45,14 @@ struct page *follow_trans_huge_pud(struct vm_area_struct *vma,
 {
 	return NULL;
 }
+struct page *alloc_thp_pud_page(int nid)
+{
+	return NULL;
+}
+extern bool free_thp_pud_page(struct page *page, int order);
+{
+	return false;
+}
 #endif
 
 extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
diff --git a/mm/cma.c b/mm/cma.c
index 1a9d997fa5ab..c595aad61f58 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -532,6 +532,37 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned int count)
 	return true;
 }
 
+/**
+ * cma_clear_bitmap_if_in_range() - clear bitmap for a given page
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function clears bitmap of memory allocated by cma_alloc().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_clear_bitmap_if_in_range(struct cma *cma, const struct page *pages,
+				  unsigned int count)
+{
+	unsigned long pfn;
+
+	if (!cma || !pages)
+		return false;
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	if (pfn + count > cma->base_pfn + cma->count)
+		return false;
+
+	cma_clear_bitmap(cma, pfn, count);
+
+	return true;
+}
+
 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
 {
 	int i;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 20ecffc27396..910e51f35910 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -33,6 +33,7 @@
 #include <linux/oom.h>
 #include <linux/numa.h>
 #include <linux/page_owner.h>
+#include <linux/cma.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
@@ -62,6 +63,10 @@ static struct shrinker deferred_split_shrinker;
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
 
+#ifdef CONFIG_CMA
+extern struct cma *hugepage_cma[MAX_NUMNODES];
+#endif
+
 bool transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
 	/* The addr is used to check if the vma size fits */
@@ -2498,6 +2503,17 @@ static void __split_huge_pud_page(struct page *page, struct list_head *list,
 	/* no file-back page support yet */
 	VM_BUG_ON(!PageAnon(page));
 
+	/*
+	 * clear cma bitmap when we split pud page so the subpages can be freed
+	 * as normal pages
+	 */
+	if (IS_ENABLED(CONFIG_CMA)) {
+		struct cma *cma = hugepage_cma[page_to_nid(head)];
+
+		VM_BUG_ON(!cma_clear_bitmap_if_in_range(cma, head,
+				thp_nr_pages(head)));
+	}
+
 	for (i = HPAGE_PUD_NR - HPAGE_PMD_NR; i >= 1; i -= HPAGE_PMD_NR)
 		__split_huge_pud_page_tail(head, i, lruvec, list);
 
@@ -3732,3 +3748,21 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
 	update_mmu_cache_pmd(vma, address, pvmw->pmd);
 }
 #endif
+
+struct page *alloc_thp_pud_page(int nid)
+{
+	struct page *page = NULL;
+#ifdef CONFIG_CMA
+	page = cma_alloc(hugepage_cma[nid], HPAGE_PUD_NR, HPAGE_PUD_ORDER, true);
+#endif
+	return page;
+}
+
+bool free_thp_pud_page(struct page *page, int order)
+{
+	bool ret = false;
+#ifdef CONFIG_CMA
+	ret = cma_release(hugepage_cma[page_to_nid(page)], page, 1<<order);
+#endif
+	return ret;
+}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 871f1c315c48..0282110c72b5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1200,26 +1200,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		nr_nodes--)
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static void destroy_compound_gigantic_page(struct page *page,
-					unsigned int order)
-{
-	int i;
-	int nr_pages = 1 << order;
-	struct page *p = page + 1;
-
-	atomic_set(compound_mapcount_ptr(page), 0);
-	if (hpage_pincount_available(page))
-		atomic_set(compound_pincount_ptr(page), 0);
-
-	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-		clear_compound_head(p);
-		set_page_refcounted(p);
-	}
-
-	set_compound_order(page, 0);
-	__ClearPageHead(page);
-}
-
+extern void destroy_compound_gigantic_page(struct page *page, unsigned int order);
 static void free_gigantic_page(struct page *page, unsigned int order)
 {
 	/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 731a7710395f..dc3d6371195f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2143,7 +2143,12 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 {
 	struct page *page;
 
-	page = __alloc_pages(gfp, order, nid);
+	if (order == HPAGE_PUD_ORDER) {
+		page = alloc_thp_pud_page(nid);
+		if (page && (gfp & __GFP_COMP))
+			prep_compound_page(page, order);
+	} else
+		page = __alloc_pages(gfp, order, nid);
 	/* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */
 	if (!static_branch_likely(&vm_numa_stat_key))
 		return page;
@@ -2217,6 +2222,13 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		nmask = policy_nodemask(gfp, pol);
 		if (!nmask || node_isset(hpage_node, *nmask)) {
 			mpol_cond_put(pol);
+
+			if (order == HPAGE_PUD_ORDER) {
+				page = alloc_thp_pud_page(hpage_node);
+				if (page && (gfp & __GFP_COMP))
+					prep_compound_page(page, order);
+				goto out;
+			}
 			/*
 			 * First, try to allocate THP only on local node, but
 			 * don't reclaim unnecessarily, just compact.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6bdb38a8fb48..5251ecb30465 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1481,6 +1481,25 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
 	}
 }
 
+void destroy_compound_gigantic_page(struct page *page, unsigned int order)
+{
+	int i;
+	int nr_pages = 1 << order;
+	struct page *p = page + 1;
+
+	atomic_set(compound_mapcount_ptr(page), 0);
+	if (hpage_pincount_available(page))
+		atomic_set(compound_pincount_ptr(page), 0);
+
+	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+		clear_compound_head(p);
+		set_page_refcounted(p);
+	}
+
+	set_compound_order(page, 0);
+	__ClearPageHead(page);
+}
+
 static void __free_pages_ok(struct page *page, unsigned int order)
 {
 	unsigned long flags;
@@ -1490,6 +1509,16 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	if (!free_pages_prepare(page, order, true))
 		return;
 
+	if (order == HPAGE_PUD_ORDER) {
+		bool thp_pud_page_freed = false;
+
+		destroy_compound_gigantic_page(page, order);
+		set_page_refcounted(page);
+		thp_pud_page_freed = free_thp_pud_page(page, order);
+		VM_BUG_ON_PAGE(!thp_pud_page_freed, page);
+		return;
+	}
+
 	migratetype = get_pfnblock_migratetype(page, pfn);
 	local_irq_save(flags);
 	__count_vm_events(PGFREE, 1 << order);
-- 
2.28.0



  parent reply	other threads:[~2020-09-28 17:56 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-28 17:53 [RFC PATCH v2 00/30] 1GB PUD THP support on x86_64 Zi Yan
2020-09-28 17:53 ` [RFC PATCH v2 01/30] mm/pagewalk: use READ_ONCE when reading the PUD entry unlocked Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 02/30] mm: pagewalk: use READ_ONCE when reading the PMD " Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 03/30] mm: thp: use single linked list for THP page table page deposit Zi Yan
2020-09-28 19:34   ` Matthew Wilcox
2020-09-28 20:34     ` Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 04/30] mm: add new helper functions to allocate one PMD page with 512 PTE pages Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 05/30] mm: thp: add page table deposit/withdraw functions for PUD THP Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 06/30] mm: change thp_order and thp_nr as we will have not just PMD THPs Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 07/30] mm: thp: add anonymous PUD THP page fault support without enabling it Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 08/30] mm: thp: add PUD THP support for copy_huge_pud Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 09/30] mm: thp: add PUD THP support to zap_huge_pud Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 10/30] fs: proc: add PUD THP kpageflag Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 11/30] mm: thp: handling PUD THP reference bit Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 12/30] mm: rmap: add mappped/unmapped page order to anonymous page rmap functions Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 13/30] mm: rmap: add map_order to page_remove_anon_compound_rmap Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 14/30] mm: thp: add PUD THP split_huge_pud_page() function Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 15/30] mm: thp: add PUD THP to deferred split list when PUD mapping is gone Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 16/30] mm: debug: adapt dump_page to PUD THP Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 17/30] mm: thp: PUD THP COW splits PUD page and falls back to PMD page Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 18/30] mm: thp: PUD THP follow_p*d_page() support Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 19/30] mm: stats: make smap stats understand PUD THPs Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 20/30] mm: page_vma_walk: teach it about PMD-mapped PUD THP Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 21/30] mm: thp: PUD THP support in try_to_unmap() Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 22/30] mm: thp: split PUD THPs at page reclaim Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 23/30] mm: support PUD THP pagemap support Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 24/30] mm: madvise: add page size options to MADV_HUGEPAGE and MADV_NOHUGEPAGE Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 25/30] mm: vma: add VM_HUGEPAGE_PUD to vm_flags at bit 37 Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 26/30] mm: thp: add a global knob to enable/disable PUD THPs Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 27/30] mm: thp: make PUD THP size public Zi Yan
2020-09-28 17:54 ` [RFC PATCH v2 28/30] hugetlb: cma: move cma reserve function to cma.c Zi Yan
2020-09-28 17:54 ` Zi Yan [this message]
2020-09-28 17:54 ` [RFC PATCH v2 30/30] mm: thp: enable anonymous PUD THP at page fault path Zi Yan
2020-09-30 11:55 ` [RFC PATCH v2 00/30] 1GB PUD THP support on x86_64 Michal Hocko
2020-10-01 15:14   ` Zi Yan
2020-10-02  7:32     ` Michal Hocko
2020-10-02  7:50       ` David Hildenbrand
2020-10-02  8:10         ` Michal Hocko
2020-10-02  8:30           ` David Hildenbrand
2020-10-05 15:03             ` Zi Yan
2020-10-05 15:55               ` Matthew Wilcox
2020-10-05 17:04                 ` Roman Gushchin
2020-10-05 19:12                 ` Zi Yan
2020-10-05 19:37                   ` Matthew Wilcox
2020-10-05 17:16               ` Roman Gushchin
2020-10-05 17:27                 ` David Hildenbrand
2020-10-05 18:25                   ` Roman Gushchin
2020-10-05 18:33                     ` David Hildenbrand
2020-10-05 19:11                       ` Roman Gushchin
2020-10-06  8:25                         ` David Hildenbrand
2020-10-05 17:39               ` David Hildenbrand
2020-10-05 18:05                 ` Zi Yan
2020-10-05 18:48                   ` David Hildenbrand
2020-10-06 11:59                   ` Michal Hocko
2020-10-05 15:34         ` Zi Yan
2020-10-05 17:30           ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200928175428.4110504-30-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=aarcange@redhat.com \
    --cc=david@redhat.com \
    --cc=dnellans@nvidia.com \
    --cc=guro@fb.com \
    --cc=jgg@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=mike.kravetz@oracle.com \
    --cc=riel@surriel.com \
    --cc=shakeelb@google.com \
    --cc=shy828301@gmail.com \
    --cc=william.kucharski@oracle.com \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).