linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: linux-mm@kvack.org, Roman Gushchin <guro@fb.com>
Cc: Rik van Riel <riel@surriel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Matthew Wilcox <willy@infradead.org>,
	Shakeel Butt <shakeelb@google.com>,
	Yang Shi <yang.shi@linux.alibaba.com>,
	David Nellans <dnellans@nvidia.com>,
	linux-kernel@vger.kernel.org, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 04/16] mm: thp: 1GB THP copy on write implementation.
Date: Wed,  2 Sep 2020 14:06:16 -0400	[thread overview]
Message-ID: <20200902180628.4052244-5-zi.yan@sent.com> (raw)
In-Reply-To: <20200902180628.4052244-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

COW on 1GB THPs will fall back to 2MB THPs if 1GB THP is not available.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 arch/x86/include/asm/pgalloc.h |  9 ++++++
 include/linux/huge_mm.h        |  5 ++++
 mm/huge_memory.c               | 54 ++++++++++++++++++++++++++++++++++
 mm/memory.c                    |  2 +-
 mm/swapfile.c                  |  4 ++-
 5 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fae13467d3e1..31221269c387 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -98,6 +98,15 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
+static inline void pud_populate_with_pgtable(struct mm_struct *mm, pud_t *pud,
+				struct page *pte)
+{
+	unsigned long pfn = page_to_pfn(pte);
+
+	paravirt_alloc_pmd(mm, pfn);
+	set_pud(pud, __pud(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+}
+
 #if CONFIG_PGTABLE_LEVELS > 2
 static inline pmd_t *pmd_alloc_one_page_with_ptes(struct mm_struct *mm, unsigned long addr)
 {
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7528652400e4..0c20a8ea6911 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,6 +19,7 @@ extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
 extern int do_huge_pud_anonymous_page(struct vm_fault *vmf);
+extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud);
 #else
 static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 {
@@ -27,6 +28,10 @@ extern int do_huge_pud_anonymous_page(struct vm_fault *vmf)
 {
 	return VM_FAULT_FALLBACK;
 }
+extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud)
+{
+	return VM_FAULT_FALLBACK;
+}
 #endif
 
 extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ec3847392208..6da9b02501b7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1334,6 +1334,60 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 unlock:
 	spin_unlock(vmf->ptl);
 }
+
+vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct page *page = NULL;
+	unsigned long haddr = vmf->address & HPAGE_PUD_MASK;
+
+	vmf->ptl = pud_lockptr(vma->vm_mm, vmf->pud);
+	VM_BUG_ON_VMA(!vma->anon_vma, vma);
+
+	if (is_huge_zero_pud(orig_pud))
+		goto fallback;
+
+	spin_lock(vmf->ptl);
+
+	if (unlikely(!pud_same(*vmf->pud, orig_pud))) {
+		spin_unlock(vmf->ptl);
+		return 0;
+	}
+
+	page = pud_page(orig_pud);
+	VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
+
+	/* Lock page for reuse_swap_page() */
+	if (!trylock_page(page)) {
+		get_page(page);
+		spin_unlock(vmf->ptl);
+		lock_page(page);
+		spin_lock(vmf->ptl);
+		if (unlikely(!pud_same(*vmf->pud, orig_pud))) {
+			unlock_page(page);
+			put_page(page);
+			return 0;
+		}
+		put_page(page);
+	}
+	if (reuse_swap_page(page, NULL)) {
+		pud_t entry;
+
+		entry = pud_mkyoung(orig_pud);
+		entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+		if (pudp_set_access_flags(vma, haddr, vmf->pud, entry,  1))
+			update_mmu_cache_pud(vma, vmf->address, vmf->pud);
+		unlock_page(page);
+		spin_unlock(vmf->ptl);
+		return VM_FAULT_WRITE;
+	}
+	unlock_page(page);
+	spin_unlock(vmf->ptl);
+fallback:
+	__split_huge_pud(vma, vmf->pud, vmf->address);
+	return VM_FAULT_FALLBACK;
+}
+
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 
 void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
diff --git a/mm/memory.c b/mm/memory.c
index 6f86294438fd..b88587256bc1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4165,7 +4165,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* No support for anonymous transparent PUD pages yet */
 	if (vma_is_anonymous(vmf->vma))
-		return VM_FAULT_FALLBACK;
+		return do_huge_pud_wp_page(vmf, orig_pud);
 	if (vmf->vma->vm_ops->huge_fault)
 		return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 20012c0c0252..e3f771c2ad83 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1635,7 +1635,9 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
 	/* hugetlbfs shouldn't call it */
 	VM_BUG_ON_PAGE(PageHuge(page), page);
 
-	if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
+	if (!IS_ENABLED(CONFIG_THP_SWAP) ||
+	    unlikely(compound_order(compound_head(page)) == HPAGE_PUD_ORDER) ||
+	    likely(!PageTransCompound(page))) {
 		mapcount = page_trans_huge_mapcount(page, total_mapcount);
 		if (PageSwapCache(page))
 			swapcount = page_swapcount(page);
-- 
2.28.0



  parent reply	other threads:[~2020-09-02 18:06 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-02 18:06 [RFC PATCH 00/16] 1GB THP support on x86_64 Zi Yan
2020-09-02 18:06 ` [RFC PATCH 01/16] mm: add pagechain container for storing multiple pages Zi Yan
2020-09-02 20:29   ` Randy Dunlap
2020-09-02 20:48     ` Zi Yan
2020-09-03  3:15   ` Matthew Wilcox
2020-09-07 12:22   ` Kirill A. Shutemov
2020-09-07 15:11     ` Zi Yan
2020-09-09 13:46       ` Kirill A. Shutemov
2020-09-09 14:15         ` Zi Yan
2020-09-02 18:06 ` [RFC PATCH 02/16] mm: thp: 1GB anonymous page implementation Zi Yan
2020-09-02 18:06 ` [RFC PATCH 03/16] mm: proc: add 1GB THP kpageflag Zi Yan
2020-09-09 13:46   ` Kirill A. Shutemov
2020-09-02 18:06 ` Zi Yan [this message]
2020-09-02 18:06 ` [RFC PATCH 05/16] mm: thp: handling 1GB THP reference bit Zi Yan
2020-09-09 14:09   ` Kirill A. Shutemov
2020-09-09 14:36     ` Zi Yan
2020-09-02 18:06 ` [RFC PATCH 06/16] mm: thp: add 1GB THP split_huge_pud_page() function Zi Yan
2020-09-09 14:18   ` Kirill A. Shutemov
2020-09-09 14:19     ` Zi Yan
2020-09-02 18:06 ` [RFC PATCH 07/16] mm: stats: make smap stats understand PUD THPs Zi Yan
2020-09-02 18:06 ` [RFC PATCH 08/16] mm: page_vma_walk: teach it about PMD-mapped PUD THP Zi Yan
2020-09-02 18:06 ` [RFC PATCH 09/16] mm: thp: 1GB THP support in try_to_unmap() Zi Yan
2020-09-02 18:06 ` [RFC PATCH 10/16] mm: thp: split 1GB THPs at page reclaim Zi Yan
2020-09-02 18:06 ` [RFC PATCH 11/16] mm: thp: 1GB THP follow_p*d_page() support Zi Yan
2020-09-02 18:06 ` [RFC PATCH 12/16] mm: support 1GB THP pagemap support Zi Yan
2020-09-02 18:06 ` [RFC PATCH 13/16] mm: thp: add a knob to enable/disable 1GB THPs Zi Yan
2020-09-02 18:06 ` [RFC PATCH 14/16] mm: page_alloc: >=MAX_ORDER pages allocation an deallocation Zi Yan
2020-09-02 18:06 ` [RFC PATCH 15/16] hugetlb: cma: move cma reserve function to cma.c Zi Yan
2020-09-02 18:06 ` [RFC PATCH 16/16] mm: thp: use cma reservation for pud thp allocation Zi Yan
2020-09-02 18:40 ` [RFC PATCH 00/16] 1GB THP support on x86_64 Jason Gunthorpe
2020-09-02 18:45   ` Zi Yan
2020-09-02 18:48     ` Jason Gunthorpe
2020-09-02 19:05       ` Zi Yan
2020-09-02 19:57         ` Jason Gunthorpe
2020-09-02 20:29           ` Zi Yan
2020-09-03 16:40             ` Jason Gunthorpe
2020-09-03 16:55               ` Matthew Wilcox
2020-09-03 17:08                 ` Jason Gunthorpe
2020-09-03  7:32 ` Michal Hocko
2020-09-03 16:25   ` Roman Gushchin
2020-09-03 16:50     ` Jason Gunthorpe
2020-09-03 17:01       ` Matthew Wilcox
2020-09-03 17:18         ` Jason Gunthorpe
2020-09-03 20:57     ` Mike Kravetz
2020-09-03 21:06       ` Roman Gushchin
2020-09-04  7:42     ` Michal Hocko
2020-09-04 21:10       ` Roman Gushchin
2020-09-07  7:20         ` Michal Hocko
2020-09-08 15:09           ` Zi Yan
2020-09-08 19:58             ` Roman Gushchin
2020-09-09  4:01               ` John Hubbard
2020-09-09  7:15               ` Michal Hocko
2020-09-03 14:23 ` Kirill A. Shutemov
2020-09-03 16:30   ` Roman Gushchin
2020-09-08 11:57     ` David Hildenbrand
2020-09-08 14:05       ` Zi Yan
2020-09-08 14:22         ` David Hildenbrand
2020-09-08 15:36           ` Zi Yan
2020-09-08 14:27         ` Matthew Wilcox
2020-09-08 15:50           ` Zi Yan
2020-09-09 12:11           ` Jason Gunthorpe
2020-09-09 12:32             ` Matthew Wilcox
2020-09-09 13:14               ` Jason Gunthorpe
2020-09-09 13:27                 ` David Hildenbrand
2020-09-10 10:02                   ` William Kucharski
2020-09-08 14:35         ` Michal Hocko
2020-09-08 14:41           ` Rik van Riel
2020-09-08 15:02             ` David Hildenbrand
2020-09-09  7:04             ` Michal Hocko
2020-09-09 13:19               ` Rik van Riel
2020-09-09 13:43                 ` David Hildenbrand
2020-09-09 13:49                   ` Rik van Riel
2020-09-09 13:54                     ` David Hildenbrand
2020-09-10  7:32                   ` Michal Hocko
2020-09-10  8:27                     ` David Hildenbrand
2020-09-10 14:21                       ` Zi Yan
2020-09-10 14:34                         ` David Hildenbrand
2020-09-10 14:41                           ` Zi Yan
2020-09-10 15:15                             ` David Hildenbrand
2020-09-10 13:32                     ` Rik van Riel
2020-09-10 14:30                       ` Zi Yan
2020-09-09 13:59                 ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200902180628.4052244-5-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=dnellans@nvidia.com \
    --cc=guro@fb.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@surriel.com \
    --cc=shakeelb@google.com \
    --cc=willy@infradead.org \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).