linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
To: linux-mm@kvack.org
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Hugh Dickins <hughd@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Michal Hocko <mhocko@kernel.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	Pavel Emelyanov <xemul@parallels.com>,
	Zi Yan <zi.yan@cs.rutgers.edu>,
	Balbir Singh <bsingharora@gmail.com>,
	linux-kernel@vger.kernel.org,
	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>,
	Naoya Horiguchi <nao.horiguchi@gmail.com>
Subject: [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration
Date: Tue,  8 Nov 2016 08:31:50 +0900	[thread overview]
Message-ID: <1478561517-4317-6-git-send-email-n-horiguchi@ah.jp.nec.com> (raw)
In-Reply-To: <1478561517-4317-1-git-send-email-n-horiguchi@ah.jp.nec.com>

This patch prepares thp migration's core code. These code will be open when
unmap_and_move() stops unconditionally splitting thp and get_new_page() starts
to allocate destination thps.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
---
ChangeLog v1 -> v2:
- support pte-mapped thp, doubly-mapped thp
---
 arch/x86/include/asm/pgtable_64.h |   2 +
 include/linux/swapops.h           |  61 +++++++++++++++
 mm/huge_memory.c                  | 154 ++++++++++++++++++++++++++++++++++++++
 mm/migrate.c                      |  44 ++++++++++-
 mm/pgtable-generic.c              |   3 +-
 5 files changed, 262 insertions(+), 2 deletions(-)

diff --git v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
index 1cc82ec..3a1b48e 100644
--- v4.9-rc2-mmotm-2016-10-27-18-27/arch/x86/include/asm/pgtable_64.h
+++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/arch/x86/include/asm/pgtable_64.h
@@ -167,7 +167,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 					 ((type) << (SWP_TYPE_FIRST_BIT)) \
 					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
+#define __pmd_to_swp_entry(pte)		((swp_entry_t) { pmd_val((pmd)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
+#define __swp_entry_to_pmd(x)		((pmd_t) { .pmd = (x).val })
 
 extern int kern_addr_valid(unsigned long addr);
 extern void cleanup_highmap(void);
diff --git v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
index 5c3a5f3..b6b22a2 100644
--- v4.9-rc2-mmotm-2016-10-27-18-27/include/linux/swapops.h
+++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/include/linux/swapops.h
@@ -163,6 +163,67 @@ static inline int is_write_migration_entry(swp_entry_t entry)
 
 #endif
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+extern void set_pmd_migration_entry(struct page *page,
+		struct vm_area_struct *vma, unsigned long address);
+
+extern int remove_migration_pmd(struct page *new, pmd_t *pmd,
+		struct vm_area_struct *vma, unsigned long addr, void *old);
+
+extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
+
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+	swp_entry_t arch_entry;
+
+	arch_entry = __pmd_to_swp_entry(pmd);
+	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+	swp_entry_t arch_entry;
+
+	arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
+	return __swp_entry_to_pmd(arch_entry);
+}
+
+static inline int is_pmd_migration_entry(pmd_t pmd)
+{
+	return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
+}
+#else
+static inline void set_pmd_migration_entry(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+}
+
+static inline int remove_migration_pmd(struct page *new, pmd_t *pmd,
+		struct vm_area_struct *vma, unsigned long addr, void *old)
+{
+	return 0;
+}
+
+static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
+
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+	return swp_entry(0, 0);
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+	pmd_t pmd = {};
+
+	return pmd;
+}
+
+static inline int is_pmd_migration_entry(pmd_t pmd)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 extern atomic_long_t num_poisoned_pages __read_mostly;
diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
index 0509d17..b3022b3 100644
--- v4.9-rc2-mmotm-2016-10-27-18-27/mm/huge_memory.c
+++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/huge_memory.c
@@ -2310,3 +2310,157 @@ static int __init split_huge_pages_debugfs(void)
 }
 late_initcall(split_huge_pages_debugfs);
 #endif
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+void set_pmd_migration_entry(struct page *page, struct vm_area_struct *vma,
+				unsigned long addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pmd_t pmdval;
+	swp_entry_t entry;
+	spinlock_t *ptl;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		return;
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		return;
+	pmd = pmd_offset(pud, addr);
+	pmdval = *pmd;
+	barrier();
+	if (!pmd_present(pmdval))
+		return;
+
+	mmu_notifier_invalidate_range_start(mm, addr, addr + HPAGE_PMD_SIZE);
+	if (pmd_trans_huge(pmdval)) {
+		pmd_t pmdswp;
+
+		ptl = pmd_lock(mm, pmd);
+		if (!pmd_present(*pmd))
+			goto unlock_pmd;
+		if (unlikely(!pmd_trans_huge(*pmd)))
+			goto unlock_pmd;
+		if (pmd_page(*pmd) != page)
+			goto unlock_pmd;
+
+		pmdval = pmdp_huge_get_and_clear(mm, addr, pmd);
+		if (pmd_dirty(pmdval))
+			set_page_dirty(page);
+		entry = make_migration_entry(page, pmd_write(pmdval));
+		pmdswp = swp_entry_to_pmd(entry);
+		pmdswp = pmd_mkhuge(pmdswp);
+		set_pmd_at(mm, addr, pmd, pmdswp);
+		page_remove_rmap(page, true);
+		put_page(page);
+unlock_pmd:
+		spin_unlock(ptl);
+	} else { /* pte-mapped thp */
+		pte_t *pte;
+		pte_t pteval;
+		struct page *tmp = compound_head(page);
+		unsigned long address = addr & HPAGE_PMD_MASK;
+		pte_t swp_pte;
+		int i;
+
+		pte = pte_offset_map(pmd, address);
+		ptl = pte_lockptr(mm, pmd);
+		spin_lock(ptl);
+		for (i = 0; i < HPAGE_PMD_NR; i++, pte++, tmp++) {
+			if (!(pte_present(*pte) &&
+			      page_to_pfn(tmp) == pte_pfn(*pte)))
+				continue;
+			pteval = ptep_clear_flush(vma, address, pte);
+			if (pte_dirty(pteval))
+				set_page_dirty(tmp);
+			entry = make_migration_entry(tmp, pte_write(pteval));
+			swp_pte = swp_entry_to_pte(entry);
+			set_pte_at(mm, address, pte, swp_pte);
+			page_remove_rmap(tmp, false);
+			put_page(tmp);
+		}
+		pte_unmap_unlock(pte, ptl);
+	}
+	mmu_notifier_invalidate_range_end(mm, addr, addr + HPAGE_PMD_SIZE);
+	return;
+}
+
+int remove_migration_pmd(struct page *new, pmd_t *pmd,
+		struct vm_area_struct *vma, unsigned long addr, void *old)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	spinlock_t *ptl;
+	pmd_t pmde;
+	swp_entry_t entry;
+
+	pmde = *pmd;
+	barrier();
+
+	if (!pmd_present(pmde)) {
+		if (is_migration_entry(pmd_to_swp_entry(pmde))) {
+			unsigned long mmun_start = addr & HPAGE_PMD_MASK;
+			unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
+
+			ptl = pmd_lock(mm, pmd);
+			entry = pmd_to_swp_entry(*pmd);
+			if (migration_entry_to_page(entry) != old)
+				goto unlock_ptl;
+			get_page(new);
+			pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot));
+			if (is_write_migration_entry(entry))
+				pmde = maybe_pmd_mkwrite(pmde, vma);
+			flush_cache_range(vma, mmun_start, mmun_end);
+			page_add_anon_rmap(new, vma, mmun_start, true);
+			pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
+			set_pmd_at(mm, mmun_start, pmd, pmde);
+			flush_tlb_range(vma, mmun_start, mmun_end);
+			if (vma->vm_flags & VM_LOCKED)
+				mlock_vma_page(new);
+			update_mmu_cache_pmd(vma, addr, pmd);
+unlock_ptl:
+			spin_unlock(ptl);
+		}
+	} else { /* pte-mapped thp */
+		pte_t *ptep;
+		pte_t pte;
+		int i;
+		struct page *tmpnew = compound_head(new);
+		struct page *tmpold = compound_head((struct page *)old);
+		unsigned long address = addr & HPAGE_PMD_MASK;
+
+		ptep = pte_offset_map(pmd, addr);
+		ptl = pte_lockptr(mm, pmd);
+		spin_lock(ptl);
+
+		for (i = 0; i < HPAGE_PMD_NR;
+		     i++, ptep++, tmpnew++, tmpold++, address += PAGE_SIZE) {
+			pte = *ptep;
+			if (!is_swap_pte(pte))
+				continue;
+			entry = pte_to_swp_entry(pte);
+			if (!is_migration_entry(entry) ||
+			    migration_entry_to_page(entry) != tmpold)
+				continue;
+			get_page(tmpnew);
+			pte = pte_mkold(mk_pte(tmpnew,
+					       READ_ONCE(vma->vm_page_prot)));
+			if (pte_swp_soft_dirty(*ptep))
+				pte = pte_mksoft_dirty(pte);
+			if (is_write_migration_entry(entry))
+				pte = maybe_mkwrite(pte, vma);
+			flush_dcache_page(tmpnew);
+			set_pte_at(mm, address, ptep, pte);
+			if (PageAnon(new))
+				page_add_anon_rmap(tmpnew, vma, address, false);
+			else
+				page_add_file_rmap(tmpnew, false);
+			update_mmu_cache(vma, address, ptep);
+		}
+		pte_unmap_unlock(ptep, ptl);
+	}
+	return SWAP_AGAIN;
+}
+#endif
diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
index 66ce6b4..54f2eb6 100644
--- v4.9-rc2-mmotm-2016-10-27-18-27/mm/migrate.c
+++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/migrate.c
@@ -198,6 +198,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	swp_entry_t entry;
+	pgd_t *pgd;
+	pud_t *pud;
  	pmd_t *pmd;
 	pte_t *ptep, pte;
  	spinlock_t *ptl;
@@ -208,10 +210,29 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 			goto out;
 		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
 	} else {
-		pmd = mm_find_pmd(mm, addr);
+		pmd_t pmde;
+
+		pgd = pgd_offset(mm, addr);
+		if (!pgd_present(*pgd))
+			goto out;
+		pud = pud_offset(pgd, addr);
+		if (!pud_present(*pud))
+			goto out;
+		pmd = pmd_offset(pud, addr);
 		if (!pmd)
 			goto out;
 
+		if (PageTransCompound(new)) {
+			remove_migration_pmd(new, pmd, vma, addr, old);
+			goto out;
+		}
+
+		pmde = *pmd;
+		barrier();
+
+		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
+			goto out;
+
 		ptep = pte_offset_map(pmd, addr);
 
 		/*
@@ -344,6 +365,27 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
 	__migration_entry_wait(mm, pte, ptl);
 }
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
+{
+	spinlock_t *ptl;
+	struct page *page;
+
+	ptl = pmd_lock(mm, pmd);
+	if (!is_pmd_migration_entry(*pmd))
+		goto unlock;
+	page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
+	if (!get_page_unless_zero(page))
+		goto unlock;
+	spin_unlock(ptl);
+	wait_on_page_locked(page);
+	put_page(page);
+	return;
+unlock:
+	spin_unlock(ptl);
+}
+#endif
+
 #ifdef CONFIG_BLOCK
 /* Returns true if all buffers are successfully locked */
 static bool buffer_migrate_lock_buffers(struct buffer_head *head,
diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
index 71c5f91..6012343 100644
--- v4.9-rc2-mmotm-2016-10-27-18-27/mm/pgtable-generic.c
+++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/pgtable-generic.c
@@ -118,7 +118,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 {
 	pmd_t pmd;
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+	VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+		  !pmd_devmap(*pmdp));
 	pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
 	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	return pmd;
-- 
2.7.0

  parent reply	other threads:[~2016-11-07 23:32 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-07 23:31 [PATCH v2 00/12] mm: page migration enhancement for thp Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 01/12] mm: x86: move _PAGE_SWP_SOFT_DIRTY from bit 7 to bit 6 Naoya Horiguchi
2016-11-10 23:29   ` Dave Hansen
2016-11-11  1:08     ` Naoya Horiguchi
2016-11-11 11:12     ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 02/12] mm: mempolicy: add queue_pages_node_check() Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 03/12] mm: thp: introduce separate TTU flag for thp freezing Naoya Horiguchi
2016-11-10  8:31   ` Anshuman Khandual
2016-11-10  9:09     ` Naoya Horiguchi
2016-11-11  3:18       ` Anshuman Khandual
2016-11-11 11:16   ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 04/12] mm: thp: introduce CONFIG_ARCH_ENABLE_THP_MIGRATION Naoya Horiguchi
2016-11-11 11:18   ` Kirill A. Shutemov
2016-11-15  2:05     ` Naoya Horiguchi
2016-11-28 14:21   ` Michal Hocko
2016-11-29  7:50     ` Naoya Horiguchi
2016-11-29  8:45       ` Michal Hocko
2016-11-07 23:31 ` Naoya Horiguchi [this message]
2016-11-08  3:05   ` [PATCH v2 05/12] mm: thp: add core routines for thp/pmd migration kbuild test robot
2016-11-08  6:46     ` Naoya Horiguchi
2016-11-08  8:13   ` Anshuman Khandual
2016-11-08  8:16     ` Naoya Horiguchi
2016-11-10  8:29   ` Anshuman Khandual
2016-11-10  9:43     ` Naoya Horiguchi
2016-11-14 11:45   ` Kirill A. Shutemov
2016-11-15  4:57     ` Naoya Horiguchi
2016-11-28 14:31   ` Michal Hocko
2016-11-29  7:57     ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 06/12] mm: thp: enable thp migration in generic path Naoya Horiguchi
2016-11-28 14:33   ` Michal Hocko
2016-11-29  8:16     ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 07/12] mm: thp: check pmd migration entry in common path Naoya Horiguchi
2016-11-08  0:23   ` kbuild test robot
2016-11-08  1:22     ` Naoya Horiguchi
2016-11-10  8:36   ` Anshuman Khandual
2016-11-10  9:12     ` Naoya Horiguchi
2016-11-10  9:08   ` Hillf Danton
2016-11-10  9:21     ` Naoya Horiguchi
2016-11-10  9:28       ` Hillf Danton
2016-11-10  9:34         ` Naoya Horiguchi
2016-11-17 23:56   ` Kirill A. Shutemov
2016-11-29  6:46     ` Naoya Horiguchi
2016-11-28 14:35   ` Michal Hocko
2016-11-07 23:31 ` [PATCH v2 08/12] mm: soft-dirty: keep soft-dirty bits over thp migration Naoya Horiguchi
2016-11-10  8:38   ` Anshuman Khandual
2016-11-10  9:13     ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 09/12] mm: hwpoison: soft offline supports " Naoya Horiguchi
2016-11-10 10:31   ` Balbir Singh
2016-11-10 23:58     ` Naoya Horiguchi
2016-11-14 23:22       ` Balbir Singh
2016-11-07 23:31 ` [PATCH v2 10/12] mm: mempolicy: mbind and migrate_pages support " Naoya Horiguchi
2016-11-25 12:27   ` Anshuman Khandual
2016-11-29  7:07     ` Naoya Horiguchi
2016-11-07 23:31 ` [PATCH v2 11/12] mm: migrate: move_pages() supports " Naoya Horiguchi
2016-11-18  0:01   ` Kirill A. Shutemov
2016-11-07 23:31 ` [PATCH v2 12/12] mm: memory_hotplug: memory hotremove " Naoya Horiguchi
2016-11-08  0:29   ` kbuild test robot
2016-11-08  0:30   ` kbuild test robot
2016-11-08  1:36     ` Naoya Horiguchi
2016-11-08  1:41       ` [PATCH] mm: fix unused variable warning Naoya Horiguchi
2016-11-09  2:32 ` [PATCH v2 00/12] mm: page migration enhancement for thp Balbir Singh
2016-11-09  4:59   ` Naoya Horiguchi
2016-11-09 21:28     ` Balbir Singh
2016-11-09 10:33 ` Anshuman Khandual
2016-11-09 23:52   ` Naoya Horiguchi
2016-11-10 14:01     ` Zi Yan
2016-11-11  3:48       ` Anshuman Khandual
2017-01-30 10:47 ` Anshuman Khandual
2017-01-31 19:54   ` Zi Yan
2017-02-03  3:12     ` Hillf Danton
2017-02-03  3:24       ` Zi Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1478561517-4317-6-git-send-email-n-horiguchi@ah.jp.nec.com \
    --to=n-horiguchi@ah.jp.nec.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=dave.hansen@intel.com \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@kernel.org \
    --cc=nao.horiguchi@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=xemul@parallels.com \
    --cc=zi.yan@cs.rutgers.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).