All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Zach O'Keefe" <zokeefe@google.com>
To: Alex Shi <alex.shi@linux.alibaba.com>,
	David Hildenbrand <david@redhat.com>,
	 David Rientjes <rientjes@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	 Michal Hocko <mhocko@suse.com>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	 SeongJae Park <sj@kernel.org>, Song Liu <songliubraving@fb.com>,
	Vlastimil Babka <vbabka@suse.cz>,  Yang Shi <shy828301@gmail.com>,
	Zi Yan <ziy@nvidia.com>,
	linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	 Arnd Bergmann <arnd@arndb.de>,
	Axel Rasmussen <axelrasmussen@google.com>,
	 Chris Kennelly <ckennelly@google.com>,
	Chris Zankel <chris@zankel.net>, Helge Deller <deller@gmx.de>,
	 Hugh Dickins <hughd@google.com>,
	Ivan Kokshaysky <ink@jurassic.park.msu.ru>,
	 "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
	Jens Axboe <axboe@kernel.dk>,
	 "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Matt Turner <mattst88@gmail.com>,
	 Max Filippov <jcmvbkbc@gmail.com>,
	Miaohe Lin <linmiaohe@huawei.com>,
	 Minchan Kim <minchan@kernel.org>,
	Patrick Xia <patrickx@google.com>,
	 Pavel Begunkov <asml.silence@gmail.com>,
	Peter Xu <peterx@redhat.com>,
	 Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	"Zach O'Keefe" <zokeefe@google.com>,
	 kernel test robot <lkp@intel.com>
Subject: [PATCH v2 03/12] mm/khugepaged: make hugepage allocation context-specific
Date: Thu, 14 Apr 2022 11:06:03 -0700	[thread overview]
Message-ID: <20220414180612.3844426-4-zokeefe@google.com> (raw)
In-Reply-To: <20220414180612.3844426-1-zokeefe@google.com>

Add hugepage allocation context to struct collapse_context, allowing
different collapse contexts to allocate hugepages differently.  For
example, khugepaged decides to allocate differently in NUMA and UMA
configurations, and other collapse contexts shouldn't be coupled to this
decision.

Additionally, move [pre]allocated hugepage pointer into
struct collapse_context.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
Reported-by: kernel test robot <lkp@intel.com>
---
 mm/khugepaged.c | 96 ++++++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 25f45ac7f6bd..21c8436fa73c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -92,6 +92,10 @@ struct collapse_control {
 
 	/* Last target selected in khugepaged_find_target_node() for this scan */
 	int last_target_node;
+
+	struct page *hpage;
+	struct page* (*alloc_hpage)(struct collapse_control *cc, gfp_t gfp,
+				    int node);
 };
 
 /**
@@ -877,21 +881,21 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 	return true;
 }
 
-static struct page *
-khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
+static struct page *khugepaged_alloc_page(struct collapse_control *cc,
+					  gfp_t gfp, int node)
 {
-	VM_BUG_ON_PAGE(*hpage, *hpage);
+	VM_BUG_ON_PAGE(cc->hpage, cc->hpage);
 
-	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
-	if (unlikely(!*hpage)) {
+	cc->hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
+	if (unlikely(!cc->hpage)) {
 		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
-		*hpage = ERR_PTR(-ENOMEM);
+		cc->hpage = ERR_PTR(-ENOMEM);
 		return NULL;
 	}
 
-	prep_transhuge_page(*hpage);
+	prep_transhuge_page(cc->hpage);
 	count_vm_event(THP_COLLAPSE_ALLOC);
-	return *hpage;
+	return cc->hpage;
 }
 #else
 static int khugepaged_find_target_node(struct collapse_control *cc)
@@ -953,12 +957,12 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 	return true;
 }
 
-static struct page *
-khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
+static struct page *khugepaged_alloc_page(struct collapse_control *cc,
+					  gfp_t gfp, int node)
 {
-	VM_BUG_ON(!*hpage);
+	VM_BUG_ON(!cc->hpage);
 
-	return  *hpage;
+	return cc->hpage;
 }
 #endif
 
@@ -1080,10 +1084,9 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 	return true;
 }
 
-static void collapse_huge_page(struct mm_struct *mm,
-				   unsigned long address,
-				   struct page **hpage,
-				   int node, int referenced, int unmapped)
+static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
+			       struct collapse_control *cc, int referenced,
+			       int unmapped)
 {
 	LIST_HEAD(compound_pagelist);
 	pmd_t *pmd, _pmd;
@@ -1096,6 +1099,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	struct mmu_notifier_range range;
 	gfp_t gfp;
 	const struct cpumask *cpumask;
+	int node;
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
@@ -1110,13 +1114,14 @@ static void collapse_huge_page(struct mm_struct *mm,
 	 */
 	mmap_read_unlock(mm);
 
+	node = khugepaged_find_target_node(cc);
 	/* sched to specified node before huage page memory copy */
 	if (task_node(current) != node) {
 		cpumask = cpumask_of_node(node);
 		if (!cpumask_empty(cpumask))
 			set_cpus_allowed_ptr(current, cpumask);
 	}
-	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	new_page = cc->alloc_hpage(cc, gfp, node);
 	if (!new_page) {
 		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
 		goto out_nolock;
@@ -1238,15 +1243,15 @@ static void collapse_huge_page(struct mm_struct *mm,
 	update_mmu_cache_pmd(vma, address, pmd);
 	spin_unlock(pmd_ptl);
 
-	*hpage = NULL;
+	cc->hpage = NULL;
 
 	khugepaged_pages_collapsed++;
 	result = SCAN_SUCCEED;
 out_up_write:
 	mmap_write_unlock(mm);
 out_nolock:
-	if (!IS_ERR_OR_NULL(*hpage))
-		mem_cgroup_uncharge(page_folio(*hpage));
+	if (!IS_ERR_OR_NULL(cc->hpage))
+		mem_cgroup_uncharge(page_folio(cc->hpage));
 	trace_mm_collapse_huge_page(mm, isolated, result);
 	return;
 }
@@ -1254,7 +1259,6 @@ static void collapse_huge_page(struct mm_struct *mm,
 static int khugepaged_scan_pmd(struct mm_struct *mm,
 			       struct vm_area_struct *vma,
 			       unsigned long address,
-			       struct page **hpage,
 			       struct collapse_control *cc)
 {
 	pmd_t *pmd;
@@ -1399,10 +1403,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 	if (ret) {
-		node = khugepaged_find_target_node(cc);
 		/* collapse_huge_page will return with the mmap_lock released */
-		collapse_huge_page(mm, address, hpage, node,
-				referenced, unmapped);
+		collapse_huge_page(mm, address, cc, referenced, unmapped);
 	}
 out:
 	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
@@ -1667,8 +1669,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  * @mm: process address space where collapse happens
  * @file: file that collapse on
  * @start: collapse start address
- * @hpage: new allocated huge page for collapse
- * @node: appointed node the new huge page allocate from
+ * @cc: collapse context and scratchpad
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
@@ -1686,8 +1687,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *    + unlock and free huge page;
  */
 static void collapse_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start,
-		struct page **hpage, int node)
+			  struct file *file, pgoff_t start,
+			  struct collapse_control *cc)
 {
 	struct address_space *mapping = file->f_mapping;
 	gfp_t gfp;
@@ -1697,15 +1698,16 @@ static void collapse_file(struct mm_struct *mm,
 	XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
 	int nr_none = 0, result = SCAN_SUCCEED;
 	bool is_shmem = shmem_file(file);
-	int nr;
+	int nr, node;
 
 	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
 	/* Only allocate from the target node */
 	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
+	node = khugepaged_find_target_node(cc);
 
-	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	new_page = cc->alloc_hpage(cc, gfp, node);
 	if (!new_page) {
 		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
 		goto out;
@@ -1998,7 +2000,7 @@ static void collapse_file(struct mm_struct *mm,
 		 * Remove pte page tables, so we can re-fault the page as huge.
 		 */
 		retract_page_tables(mapping, start);
-		*hpage = NULL;
+		cc->hpage = NULL;
 
 		khugepaged_pages_collapsed++;
 	} else {
@@ -2045,14 +2047,14 @@ static void collapse_file(struct mm_struct *mm,
 	unlock_page(new_page);
 out:
 	VM_BUG_ON(!list_empty(&pagelist));
-	if (!IS_ERR_OR_NULL(*hpage))
-		mem_cgroup_uncharge(page_folio(*hpage));
+	if (!IS_ERR_OR_NULL(cc->hpage))
+		mem_cgroup_uncharge(page_folio(cc->hpage));
 	/* TODO: tracepoints */
 }
 
 static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage,
-		struct collapse_control *cc)
+				 struct file *file, pgoff_t start,
+				 struct collapse_control *cc)
 {
 	struct page *page = NULL;
 	struct address_space *mapping = file->f_mapping;
@@ -2125,8 +2127,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 			result = SCAN_EXCEED_NONE_PTE;
 			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {
-			node = khugepaged_find_target_node(cc);
-			collapse_file(mm, file, start, hpage, node);
+			collapse_file(mm, file, start, cc);
 		}
 	}
 
@@ -2134,8 +2135,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 }
 #else
 static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage,
-		struct collapse_control *cc)
+				 struct file *file, pgoff_t start,
+				 struct collapse_control *cc)
 {
 	BUILD_BUG();
 }
@@ -2146,7 +2147,6 @@ static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
-					    struct page **hpage,
 					    struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
 	__acquires(&khugepaged_mm_lock)
@@ -2223,12 +2223,11 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 
 				mmap_read_unlock(mm);
 				ret = 1;
-				khugepaged_scan_file(mm, file, pgoff, hpage, cc);
+				khugepaged_scan_file(mm, file, pgoff, cc);
 				fput(file);
 			} else {
 				ret = khugepaged_scan_pmd(mm, vma,
-						khugepaged_scan.address,
-						hpage, cc);
+						khugepaged_scan.address, cc);
 			}
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
@@ -2286,15 +2285,15 @@ static int khugepaged_wait_event(void)
 
 static void khugepaged_do_scan(struct collapse_control *cc)
 {
-	struct page *hpage = NULL;
 	unsigned int progress = 0, pass_through_head = 0;
 	unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
 	bool wait = true;
 
+	cc->hpage = NULL;
 	lru_add_drain_all();
 
 	while (progress < pages) {
-		if (!khugepaged_prealloc_page(&hpage, &wait))
+		if (!khugepaged_prealloc_page(&cc->hpage, &wait))
 			break;
 
 		cond_resched();
@@ -2308,14 +2307,14 @@ static void khugepaged_do_scan(struct collapse_control *cc)
 		if (khugepaged_has_work() &&
 		    pass_through_head < 2)
 			progress += khugepaged_scan_mm_slot(pages - progress,
-							    &hpage, cc);
+							    cc);
 		else
 			progress = pages;
 		spin_unlock(&khugepaged_mm_lock);
 	}
 
-	if (!IS_ERR_OR_NULL(hpage))
-		put_page(hpage);
+	if (!IS_ERR_OR_NULL(cc->hpage))
+		put_page(cc->hpage);
 }
 
 static bool khugepaged_should_wakeup(void)
@@ -2349,6 +2348,7 @@ static int khugepaged(void *none)
 	struct mm_slot *mm_slot;
 	struct collapse_control cc = {
 		.last_target_node = NUMA_NO_NODE,
+		.alloc_hpage = &khugepaged_alloc_page,
 	};
 
 	set_freezable();
-- 
2.36.0.rc0.470.gd361397f0d-goog



  parent reply	other threads:[~2022-04-14 18:06 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-14 18:06 [PATCH v2 00/12] mm: userspace hugepage collapse Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 01/12] mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds THP Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 02/12] mm/khugepaged: add struct collapse_control Zach O'Keefe
2022-04-14 18:06 ` Zach O'Keefe [this message]
2022-04-14 18:06 ` [PATCH v2 04/12] mm/khugepaged: add struct collapse_result Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 05/12] mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 06/12] mm/khugepaged: remove khugepaged prefix from shared collapse functions Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 07/12] mm/khugepaged: add flag to ignore khugepaged_max_ptes_* Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 08/12] mm/khugepaged: add flag to ignore page young/referenced requirement Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 09/12] mm/madvise: add MADV_COLLAPSE to process_madvise() Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 10/12] selftests/vm: modularize collapse selftests Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 11/12] selftests/vm: add MADV_COLLAPSE collapse context to selftests Zach O'Keefe
2022-04-14 18:06 ` [PATCH v2 12/12] selftests/vm: add test to verify recollapse of THPs Zach O'Keefe
2022-04-15  0:04 ` [PATCH v2 00/12] mm: userspace hugepage collapse Peter Xu
2022-04-15  0:52   ` Zach O'Keefe
2022-04-15 13:39     ` Peter Xu
2022-04-15 20:04       ` Zach O'Keefe
2022-04-16 19:26         ` Peter Xu
2022-04-17 17:23           ` Zach O'Keefe
2022-04-19 14:33           ` David Hildenbrand
2022-04-19 15:55             ` Zach O'Keefe
2022-04-19 20:02               ` David Hildenbrand
2022-04-19 22:42                 ` Zach O'Keefe
2022-04-21  0:56                   ` Yang Shi
2022-04-21  1:02                     ` Zach O'Keefe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220414180612.3844426-4-zokeefe@google.com \
    --to=zokeefe@google.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@linux.alibaba.com \
    --cc=arnd@arndb.de \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=axelrasmussen@google.com \
    --cc=chris@zankel.net \
    --cc=ckennelly@google.com \
    --cc=david@redhat.com \
    --cc=deller@gmx.de \
    --cc=hughd@google.com \
    --cc=ink@jurassic.park.msu.ru \
    --cc=jcmvbkbc@gmail.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-mm@kvack.org \
    --cc=lkp@intel.com \
    --cc=mattst88@gmail.com \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=pasha.tatashin@soleen.com \
    --cc=patrickx@google.com \
    --cc=peterx@redhat.com \
    --cc=rientjes@google.com \
    --cc=shy828301@gmail.com \
    --cc=sj@kernel.org \
    --cc=songliubraving@fb.com \
    --cc=tsbogend@alpha.franken.de \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.