All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Huang, Ying" <ying.huang@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Huang Ying <ying.huang@intel.com>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Michal Hocko <mhocko@suse.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Shaohua Li <shli@kernel.org>, Hugh Dickins <hughd@google.com>,
	Minchan Kim <minchan@kernel.org>, Rik van Riel <riel@redhat.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>,
	Zi Yan <zi.yan@cs.rutgers.edu>
Subject: [PATCH -mm -V3 20/21] mm, THP, swap: create PMD swap mapping when unmap the THP
Date: Wed, 23 May 2018 16:26:24 +0800	[thread overview]
Message-ID: <20180523082625.6897-21-ying.huang@intel.com> (raw)
In-Reply-To: <20180523082625.6897-1-ying.huang@intel.com>

From: Huang Ying <ying.huang@intel.com>

This is the final step of the THP swapin support.  When reclaiming a
anonymous THP, after allocating the huge swap cluster and add the THP
into swap cache, the PMD page mapping will be changed to the mapping
to the swap space.  Previously, the PMD page mapping will be split
before being changed.  In this patch, the unmap code is enhanced not
to split the PMD mapping, but create a PMD swap mapping to replace it
instead.  So later when clear the SWAP_HAS_CACHE flag in the last step
of swapout, the huge swap cluster will be kept instead of being split,
and when swapin, the huge swap cluster will be read as a whole into a
THP.  That is, the THP will not be split during swapout/swapin.  This
can eliminate the overhead of splitting/collapsing, and reduce the
page fault count, etc.  But more important, the utilization of THP is
improved greatly, that is, much more THP will be kept when swapping is
used, so that we can take full advantage of THP including its high
performance for swapout/swapin.

Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
---
 include/linux/huge_mm.h | 11 +++++++++++
 mm/huge_memory.c        | 30 ++++++++++++++++++++++++++++++
 mm/rmap.c               | 43 +++++++++++++++++++++++++++++++++++++++++--
 mm/vmscan.c             |  6 +-----
 4 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5001c28b3d18..d03fcddcc42d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -404,6 +404,8 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+struct page_vma_mapped_walk;
+
 #ifdef CONFIG_THP_SWAP
 extern void __split_huge_swap_pmd(struct vm_area_struct *vma,
 				  unsigned long haddr,
@@ -411,6 +413,8 @@ extern void __split_huge_swap_pmd(struct vm_area_struct *vma,
 extern int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			       unsigned long address, pmd_t orig_pmd);
 extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern bool set_pmd_swap_entry(struct page_vma_mapped_walk *pvmw,
+	struct page *page, unsigned long address, pmd_t pmdval);
 
 static inline bool transparent_hugepage_swapin_enabled(
 	struct vm_area_struct *vma)
@@ -452,6 +456,13 @@ static inline int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd)
 	return 0;
 }
 
+static inline bool set_pmd_swap_entry(struct page_vma_mapped_walk *pvmw,
+				      struct page *page, unsigned long address,
+				      pmd_t pmdval)
+{
+	return false;
+}
+
 static inline bool transparent_hugepage_swapin_enabled(
 	struct vm_area_struct *vma)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e80d03c2412a..88984e95b9b2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1876,6 +1876,36 @@ int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd)
 	count_vm_event(THP_SWPIN_FALLBACK);
 	goto fallback;
 }
+
+bool set_pmd_swap_entry(struct page_vma_mapped_walk *pvmw, struct page *page,
+			unsigned long address, pmd_t pmdval)
+{
+	struct vm_area_struct *vma = pvmw->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	pmd_t swp_pmd;
+	swp_entry_t entry = { .val = page_private(page) };
+
+	if (swap_duplicate(&entry, true) < 0) {
+		set_pmd_at(mm, address, pvmw->pmd, pmdval);
+		return false;
+	}
+	if (list_empty(&mm->mmlist)) {
+		spin_lock(&mmlist_lock);
+		if (list_empty(&mm->mmlist))
+			list_add(&mm->mmlist, &init_mm.mmlist);
+		spin_unlock(&mmlist_lock);
+	}
+	add_mm_counter(mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+	add_mm_counter(mm, MM_SWAPENTS, HPAGE_PMD_NR);
+	swp_pmd = swp_entry_to_pmd(entry);
+	if (pmd_soft_dirty(pmdval))
+		swp_pmd = pmd_swp_mksoft_dirty(swp_pmd);
+	set_pmd_at(mm, address, pvmw->pmd, swp_pmd);
+
+	page_remove_rmap(page, true);
+	put_page(page);
+	return true;
+}
 #endif
 
 static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
diff --git a/mm/rmap.c b/mm/rmap.c
index 5f45d6325c40..4861b1a86e2a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1402,12 +1402,51 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				continue;
 		}
 
+		address = pvmw.address;
+
+#ifdef CONFIG_THP_SWAP
+		/* PMD-mapped THP swap entry */
+		if (thp_swap_supported() && !pvmw.pte && PageAnon(page)) {
+			pmd_t pmdval;
+
+			VM_BUG_ON_PAGE(PageHuge(page) ||
+				       !PageTransCompound(page), page);
+
+			flush_cache_range(vma, address,
+					  address + HPAGE_PMD_SIZE);
+			mmu_notifier_invalidate_range_start(mm, address,
+					address + HPAGE_PMD_SIZE);
+			if (should_defer_flush(mm, flags)) {
+				/* check comments for PTE below */
+				pmdval = pmdp_huge_get_and_clear(mm, address,
+								 pvmw.pmd);
+				set_tlb_ubc_flush_pending(mm,
+							  pmd_dirty(pmdval));
+			} else
+				pmdval = pmdp_huge_clear_flush(vma, address,
+							       pvmw.pmd);
+
+			/*
+			 * Move the dirty bit to the page. Now the pmd
+			 * is gone.
+			 */
+			if (pmd_dirty(pmdval))
+				set_page_dirty(page);
+
+			/* Update high watermark before we lower rss */
+			update_hiwater_rss(mm);
+
+			ret = set_pmd_swap_entry(&pvmw, page, address, pmdval);
+			mmu_notifier_invalidate_range_end(mm, address,
+					address + HPAGE_PMD_SIZE);
+			continue;
+		}
+#endif
+
 		/* Unexpected PMD-mapped THP? */
 		VM_BUG_ON_PAGE(!pvmw.pte, page);
 
 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
-		address = pvmw.address;
-
 
 		if (IS_ENABLED(CONFIG_MIGRATION) &&
 		    (flags & TTU_MIGRATION) &&
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 50055d72f294..9f46047d4dee 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1148,11 +1148,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * processes. Try to unmap it here.
 		 */
 		if (page_mapped(page)) {
-			enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
-
-			if (unlikely(PageTransHuge(page)))
-				flags |= TTU_SPLIT_HUGE_PMD;
-			if (!try_to_unmap(page, flags)) {
+			if (!try_to_unmap(page, ttu_flags | TTU_BATCH_FLUSH)) {
 				nr_unmap_fail++;
 				goto activate_locked;
 			}
-- 
2.16.1

  parent reply	other threads:[~2018-05-23  8:27 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-23  8:26 [PATCH -mm -V3 00/21] mm, THP, swap: Swapout/swapin THP in one piece Huang, Ying
2018-05-23  8:26 ` Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 01/21] mm, THP, swap: Enable PMD swap operations for CONFIG_THP_SWAP Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 02/21] mm, THP, swap: Make CONFIG_THP_SWAP depends on CONFIG_SWAP Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 03/21] mm, THP, swap: Support PMD swap mapping in swap_duplicate() Huang, Ying
2018-06-11 20:42   ` Daniel Jordan
2018-06-12  1:23     ` Huang, Ying
2018-06-12  1:23       ` Huang, Ying
2018-06-12  3:15       ` Huang, Ying
2018-06-12  3:15         ` Huang, Ying
2018-06-12 12:05         ` Daniel Jordan
2018-06-12 12:04       ` Daniel Jordan
2018-06-12 21:44       ` Daniel Jordan
2018-06-12 21:44         ` Daniel Jordan
2018-06-13  1:26         ` Huang, Ying
2018-06-13  1:26           ` Huang, Ying
2018-06-13 11:49           ` Daniel Jordan
2018-05-23  8:26 ` [PATCH -mm -V3 04/21] mm, THP, swap: Support PMD swap mapping in swapcache_free_cluster() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 05/21] mm, THP, swap: Support PMD swap mapping in free_swap_and_cache()/swap_free() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 06/21] mm, THP, swap: Support PMD swap mapping when splitting huge PMD Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 07/21] mm, THP, swap: Support PMD swap mapping in split_swap_cluster() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 08/21] mm, THP, swap: Support to read a huge swap cluster for swapin a THP Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 09/21] mm, THP, swap: Swapin a THP as a whole Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 10/21] mm, THP, swap: Support to count THP swapin and its fallback Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 11/21] mm, THP, swap: Add sysfs interface to configure THP swapin Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 12/21] mm, THP, swap: Support PMD swap mapping in swapoff Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 13/21] mm, THP, swap: Support PMD swap mapping in madvise_free() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 14/21] mm, cgroup, THP, swap: Support to move swap account for PMD swap mapping Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 15/21] mm, THP, swap: Support to copy PMD swap mapping when fork() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 16/21] mm, THP, swap: Free PMD swap mapping when zap_huge_pmd() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 17/21] mm, THP, swap: Support PMD swap mapping for MADV_WILLNEED Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 18/21] mm, THP, swap: Support PMD swap mapping in mincore() Huang, Ying
2018-05-23  8:26 ` [PATCH -mm -V3 19/21] mm, THP, swap: Support PMD swap mapping in common path Huang, Ying
2018-05-23  8:26 ` Huang, Ying [this message]
2018-05-23  8:26 ` [PATCH -mm -V3 21/21] mm, THP: Avoid to split THP when reclaim MADV_FREE THP Huang, Ying
2018-06-01  6:11 ` [PATCH -mm -V3 00/21] mm, THP, swap: Swapout/swapin THP in one piece Naoya Horiguchi
2018-06-01  7:03   ` Huang, Ying
2018-06-01  7:03     ` Huang, Ying
2018-06-04 18:06 ` Daniel Jordan
2018-06-04 18:06   ` Daniel Jordan
2018-06-05  4:30   ` Huang, Ying
2018-06-05  4:30     ` Huang, Ying
2018-06-05 16:38     ` Daniel Jordan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180523082625.6897-21-ying.huang@intel.com \
    --to=ying.huang@intel.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=n-horiguchi@ah.jp.nec.com \
    --cc=riel@redhat.com \
    --cc=shli@kernel.org \
    --cc=zi.yan@cs.rutgers.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.