From: Zi Yan <zi.yan@sent.com>
To: linux-mm@kvack.org, Roman Gushchin <guro@fb.com>
Cc: Rik van Riel <riel@surriel.com>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
Matthew Wilcox <willy@infradead.org>,
Shakeel Butt <shakeelb@google.com>,
Yang Shi <yang.shi@linux.alibaba.com>,
David Nellans <dnellans@nvidia.com>,
linux-kernel@vger.kernel.org, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 04/16] mm: thp: 1GB THP copy on write implementation.
Date: Wed, 2 Sep 2020 14:06:16 -0400 [thread overview]
Message-ID: <20200902180628.4052244-5-zi.yan@sent.com> (raw)
In-Reply-To: <20200902180628.4052244-1-zi.yan@sent.com>
From: Zi Yan <ziy@nvidia.com>
COW on 1GB THPs will fall back to 2MB THPs if 1GB THP is not available.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
arch/x86/include/asm/pgalloc.h | 9 ++++++
include/linux/huge_mm.h | 5 ++++
mm/huge_memory.c | 54 ++++++++++++++++++++++++++++++++++
mm/memory.c | 2 +-
mm/swapfile.c | 4 ++-
5 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fae13467d3e1..31221269c387 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -98,6 +98,15 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
#define pmd_pgtable(pmd) pmd_page(pmd)
+static inline void pud_populate_with_pgtable(struct mm_struct *mm, pud_t *pud,
+ struct page *pte)
+{
+ unsigned long pfn = page_to_pfn(pte);
+
+ paravirt_alloc_pmd(mm, pfn);
+ set_pud(pud, __pud(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+}
+
#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one_page_with_ptes(struct mm_struct *mm, unsigned long addr)
{
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7528652400e4..0c20a8ea6911 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,6 +19,7 @@ extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
extern int do_huge_pud_anonymous_page(struct vm_fault *vmf);
+extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud);
#else
static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
{
@@ -27,6 +28,10 @@ extern int do_huge_pud_anonymous_page(struct vm_fault *vmf)
{
return VM_FAULT_FALLBACK;
}
+extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud)
+{
+ return VM_FAULT_FALLBACK;
+}
#endif
extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ec3847392208..6da9b02501b7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1334,6 +1334,60 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
unlock:
spin_unlock(vmf->ptl);
}
+
+vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct page *page = NULL;
+ unsigned long haddr = vmf->address & HPAGE_PUD_MASK;
+
+ vmf->ptl = pud_lockptr(vma->vm_mm, vmf->pud);
+ VM_BUG_ON_VMA(!vma->anon_vma, vma);
+
+ if (is_huge_zero_pud(orig_pud))
+ goto fallback;
+
+ spin_lock(vmf->ptl);
+
+ if (unlikely(!pud_same(*vmf->pud, orig_pud))) {
+ spin_unlock(vmf->ptl);
+ return 0;
+ }
+
+ page = pud_page(orig_pud);
+ VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
+
+ /* Lock page for reuse_swap_page() */
+ if (!trylock_page(page)) {
+ get_page(page);
+ spin_unlock(vmf->ptl);
+ lock_page(page);
+ spin_lock(vmf->ptl);
+ if (unlikely(!pud_same(*vmf->pud, orig_pud))) {
+ unlock_page(page);
+ put_page(page);
+ return 0;
+ }
+ put_page(page);
+ }
+ if (reuse_swap_page(page, NULL)) {
+ pud_t entry;
+
+ entry = pud_mkyoung(orig_pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, haddr, vmf->pud, entry, 1))
+ update_mmu_cache_pud(vma, vmf->address, vmf->pud);
+ unlock_page(page);
+ spin_unlock(vmf->ptl);
+ return VM_FAULT_WRITE;
+ }
+ unlock_page(page);
+ spin_unlock(vmf->ptl);
+fallback:
+ __split_huge_pud(vma, vmf->pud, vmf->address);
+ return VM_FAULT_FALLBACK;
+}
+
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
diff --git a/mm/memory.c b/mm/memory.c
index 6f86294438fd..b88587256bc1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4165,7 +4165,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vmf->vma))
- return VM_FAULT_FALLBACK;
+ return do_huge_pud_wp_page(vmf, orig_pud);
if (vmf->vma->vm_ops->huge_fault)
return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 20012c0c0252..e3f771c2ad83 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1635,7 +1635,9 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
- if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
+ if (!IS_ENABLED(CONFIG_THP_SWAP) ||
+ unlikely(compound_order(compound_head(page)) == HPAGE_PUD_ORDER) ||
+ likely(!PageTransCompound(page))) {
mapcount = page_trans_huge_mapcount(page, total_mapcount);
if (PageSwapCache(page))
swapcount = page_swapcount(page);
--
2.28.0
next prev parent reply other threads:[~2020-09-02 18:06 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-02 18:06 [RFC PATCH 00/16] 1GB THP support on x86_64 Zi Yan
2020-09-02 18:06 ` [RFC PATCH 01/16] mm: add pagechain container for storing multiple pages Zi Yan
2020-09-02 20:29 ` Randy Dunlap
2020-09-02 20:48 ` Zi Yan
2020-09-03 3:15 ` Matthew Wilcox
2020-09-07 12:22 ` Kirill A. Shutemov
2020-09-07 15:11 ` Zi Yan
2020-09-09 13:46 ` Kirill A. Shutemov
2020-09-09 14:15 ` Zi Yan
2020-09-02 18:06 ` [RFC PATCH 02/16] mm: thp: 1GB anonymous page implementation Zi Yan
2020-09-02 18:06 ` [RFC PATCH 03/16] mm: proc: add 1GB THP kpageflag Zi Yan
2020-09-09 13:46 ` Kirill A. Shutemov
2020-09-02 18:06 ` Zi Yan [this message]
2020-09-02 18:06 ` [RFC PATCH 05/16] mm: thp: handling 1GB THP reference bit Zi Yan
2020-09-09 14:09 ` Kirill A. Shutemov
2020-09-09 14:36 ` Zi Yan
2020-09-02 18:06 ` [RFC PATCH 06/16] mm: thp: add 1GB THP split_huge_pud_page() function Zi Yan
2020-09-09 14:18 ` Kirill A. Shutemov
2020-09-09 14:19 ` Zi Yan
2020-09-02 18:06 ` [RFC PATCH 07/16] mm: stats: make smap stats understand PUD THPs Zi Yan
2020-09-02 18:06 ` [RFC PATCH 08/16] mm: page_vma_walk: teach it about PMD-mapped PUD THP Zi Yan
2020-09-02 18:06 ` [RFC PATCH 09/16] mm: thp: 1GB THP support in try_to_unmap() Zi Yan
2020-09-02 18:06 ` [RFC PATCH 10/16] mm: thp: split 1GB THPs at page reclaim Zi Yan
2020-09-02 18:06 ` [RFC PATCH 11/16] mm: thp: 1GB THP follow_p*d_page() support Zi Yan
2020-09-02 18:06 ` [RFC PATCH 12/16] mm: support 1GB THP pagemap support Zi Yan
2020-09-02 18:06 ` [RFC PATCH 13/16] mm: thp: add a knob to enable/disable 1GB THPs Zi Yan
2020-09-02 18:06 ` [RFC PATCH 14/16] mm: page_alloc: >=MAX_ORDER pages allocation an deallocation Zi Yan
2020-09-02 18:06 ` [RFC PATCH 15/16] hugetlb: cma: move cma reserve function to cma.c Zi Yan
2020-09-02 18:06 ` [RFC PATCH 16/16] mm: thp: use cma reservation for pud thp allocation Zi Yan
2020-09-02 18:40 ` [RFC PATCH 00/16] 1GB THP support on x86_64 Jason Gunthorpe
2020-09-02 18:45 ` Zi Yan
2020-09-02 18:48 ` Jason Gunthorpe
2020-09-02 19:05 ` Zi Yan
2020-09-02 19:57 ` Jason Gunthorpe
2020-09-02 20:29 ` Zi Yan
2020-09-03 16:40 ` Jason Gunthorpe
2020-09-03 16:55 ` Matthew Wilcox
2020-09-03 17:08 ` Jason Gunthorpe
2020-09-03 7:32 ` Michal Hocko
2020-09-03 16:25 ` Roman Gushchin
2020-09-03 16:50 ` Jason Gunthorpe
2020-09-03 17:01 ` Matthew Wilcox
2020-09-03 17:18 ` Jason Gunthorpe
2020-09-03 20:57 ` Mike Kravetz
2020-09-03 21:06 ` Roman Gushchin
2020-09-04 7:42 ` Michal Hocko
2020-09-04 21:10 ` Roman Gushchin
2020-09-07 7:20 ` Michal Hocko
2020-09-08 15:09 ` Zi Yan
2020-09-08 19:58 ` Roman Gushchin
2020-09-09 4:01 ` John Hubbard
2020-09-09 7:15 ` Michal Hocko
2020-09-03 14:23 ` Kirill A. Shutemov
2020-09-03 16:30 ` Roman Gushchin
2020-09-08 11:57 ` David Hildenbrand
2020-09-08 14:05 ` Zi Yan
2020-09-08 14:22 ` David Hildenbrand
2020-09-08 15:36 ` Zi Yan
2020-09-08 14:27 ` Matthew Wilcox
2020-09-08 15:50 ` Zi Yan
2020-09-09 12:11 ` Jason Gunthorpe
2020-09-09 12:32 ` Matthew Wilcox
2020-09-09 13:14 ` Jason Gunthorpe
2020-09-09 13:27 ` David Hildenbrand
2020-09-10 10:02 ` William Kucharski
2020-09-08 14:35 ` Michal Hocko
2020-09-08 14:41 ` Rik van Riel
2020-09-08 15:02 ` David Hildenbrand
2020-09-09 7:04 ` Michal Hocko
2020-09-09 13:19 ` Rik van Riel
2020-09-09 13:43 ` David Hildenbrand
2020-09-09 13:49 ` Rik van Riel
2020-09-09 13:54 ` David Hildenbrand
2020-09-10 7:32 ` Michal Hocko
2020-09-10 8:27 ` David Hildenbrand
2020-09-10 14:21 ` Zi Yan
2020-09-10 14:34 ` David Hildenbrand
2020-09-10 14:41 ` Zi Yan
2020-09-10 15:15 ` David Hildenbrand
2020-09-10 13:32 ` Rik van Riel
2020-09-10 14:30 ` Zi Yan
2020-09-09 13:59 ` Michal Hocko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200902180628.4052244-5-zi.yan@sent.com \
--to=zi.yan@sent.com \
--cc=dnellans@nvidia.com \
--cc=guro@fb.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=riel@surriel.com \
--cc=shakeelb@google.com \
--cc=willy@infradead.org \
--cc=yang.shi@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).