linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: "John Hubbard" <jhubbard@nvidia.com>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"David Nellans" <dnellans@nvidia.com>,
	"Balbir Singh" <bsingharora@gmail.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Evgeny Baskakov" <ebaskakov@nvidia.com>,
	"Mark Hairgrove" <mhairgrove@nvidia.com>,
	"Sherry Cheung" <SCheung@nvidia.com>,
	"Subhash Gutti" <sgutti@nvidia.com>
Subject: [HMM-v25 15/19] mm/migrate: migrate_vma() unmap page from vma while collecting pages
Date: Wed, 16 Aug 2017 20:05:44 -0400	[thread overview]
Message-ID: <20170817000548.32038-16-jglisse@redhat.com> (raw)
In-Reply-To: <20170817000548.32038-1-jglisse@redhat.com>

Common case for migration of virtual address range is page are map
only once inside the vma in which migration is taking place. Because
we already walk the CPU page table for that range we can directly do
the unmap there and setup special migration swap entry.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Evgeny Baskakov <ebaskakov@nvidia.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Mark Hairgrove <mhairgrove@nvidia.com>
Signed-off-by: Sherry Cheung <SCheung@nvidia.com>
Signed-off-by: Subhash Gutti <sgutti@nvidia.com>
---
 mm/migrate.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 112 insertions(+), 29 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 60e2f8369cd7..57d1fa7a8e62 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2160,7 +2160,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 	struct migrate_vma *migrate = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 	struct mm_struct *mm = vma->vm_mm;
-	unsigned long addr = start;
+	unsigned long addr = start, unmapped = 0;
 	spinlock_t *ptl;
 	pte_t *ptep;
 
@@ -2205,9 +2205,12 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 		return migrate_vma_collect_hole(start, end, walk);
 
 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	arch_enter_lazy_mmu_mode();
+
 	for (; addr < end; addr += PAGE_SIZE, ptep++) {
 		unsigned long mpfn, pfn;
 		struct page *page;
+		swp_entry_t entry;
 		pte_t pte;
 
 		pte = *ptep;
@@ -2239,11 +2242,44 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 		mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
 		mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
 
+		/*
+		 * Optimize for the common case where page is only mapped once
+		 * in one process. If we can lock the page, then we can safely
+		 * set up a special migration page table entry now.
+		 */
+		if (trylock_page(page)) {
+			pte_t swp_pte;
+
+			mpfn |= MIGRATE_PFN_LOCKED;
+			ptep_get_and_clear(mm, addr, ptep);
+
+			/* Setup special migration page table entry */
+			entry = make_migration_entry(page, pte_write(pte));
+			swp_pte = swp_entry_to_pte(entry);
+			if (pte_soft_dirty(pte))
+				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			set_pte_at(mm, addr, ptep, swp_pte);
+
+			/*
+			 * This is like regular unmap: we remove the rmap and
+			 * drop page refcount. Page won't be freed, as we took
+			 * a reference just above.
+			 */
+			page_remove_rmap(page, false);
+			put_page(page);
+			unmapped++;
+		}
+
 next:
 		migrate->src[migrate->npages++] = mpfn;
 	}
+	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(ptep - 1, ptl);
 
+	/* Only flush the TLB if we actually modified any entries */
+	if (unmapped)
+		flush_tlb_range(walk->vma, start, end);
+
 	return 0;
 }
 
@@ -2268,7 +2304,13 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
 	mm_walk.mm = migrate->vma->vm_mm;
 	mm_walk.private = migrate;
 
+	mmu_notifier_invalidate_range_start(mm_walk.mm,
+					    migrate->start,
+					    migrate->end);
 	walk_page_range(migrate->start, migrate->end, &mm_walk);
+	mmu_notifier_invalidate_range_end(mm_walk.mm,
+					  migrate->start,
+					  migrate->end);
 
 	migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
 }
@@ -2316,32 +2358,37 @@ static bool migrate_vma_check_page(struct page *page)
 static void migrate_vma_prepare(struct migrate_vma *migrate)
 {
 	const unsigned long npages = migrate->npages;
+	const unsigned long start = migrate->start;
+	unsigned long addr, i, restore = 0;
 	bool allow_drain = true;
-	unsigned long i;
 
 	lru_add_drain();
 
 	for (i = 0; (i < npages) && migrate->cpages; i++) {
 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
+		bool remap = true;
 
 		if (!page)
 			continue;
 
-		/*
-		 * Because we are migrating several pages there can be
-		 * a deadlock between 2 concurrent migration where each
-		 * are waiting on each other page lock.
-		 *
-		 * Make migrate_vma() a best effort thing and backoff
-		 * for any page we can not lock right away.
-		 */
-		if (!trylock_page(page)) {
-			migrate->src[i] = 0;
-			migrate->cpages--;
-			put_page(page);
-			continue;
+		if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
+			/*
+			 * Because we are migrating several pages there can be
+			 * a deadlock between 2 concurrent migration where each
+			 * are waiting on each other page lock.
+			 *
+			 * Make migrate_vma() a best effort thing and backoff
+			 * for any page we can not lock right away.
+			 */
+			if (!trylock_page(page)) {
+				migrate->src[i] = 0;
+				migrate->cpages--;
+				put_page(page);
+				continue;
+			}
+			remap = false;
+			migrate->src[i] |= MIGRATE_PFN_LOCKED;
 		}
-		migrate->src[i] |= MIGRATE_PFN_LOCKED;
 
 		if (!PageLRU(page) && allow_drain) {
 			/* Drain CPU's pagevec */
@@ -2350,21 +2397,50 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 		}
 
 		if (isolate_lru_page(page)) {
-			migrate->src[i] = 0;
-			unlock_page(page);
-			migrate->cpages--;
-			put_page(page);
+			if (remap) {
+				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+				migrate->cpages--;
+				restore++;
+			} else {
+				migrate->src[i] = 0;
+				unlock_page(page);
+				migrate->cpages--;
+				put_page(page);
+			}
 			continue;
 		}
 
 		if (!migrate_vma_check_page(page)) {
-			migrate->src[i] = 0;
-			unlock_page(page);
-			migrate->cpages--;
+			if (remap) {
+				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+				migrate->cpages--;
+				restore++;
 
-			putback_lru_page(page);
+				get_page(page);
+				putback_lru_page(page);
+			} else {
+				migrate->src[i] = 0;
+				unlock_page(page);
+				migrate->cpages--;
+
+				putback_lru_page(page);
+			}
 		}
 	}
+
+	for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
+		struct page *page = migrate_pfn_to_page(migrate->src[i]);
+
+		if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
+			continue;
+
+		remove_migration_pte(page, migrate->vma, addr, page);
+
+		migrate->src[i] = 0;
+		unlock_page(page);
+		put_page(page);
+		restore--;
+	}
 }
 
 /*
@@ -2391,12 +2467,19 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
 		if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
 			continue;
 
-		try_to_unmap(page, flags);
-		if (page_mapped(page) || !migrate_vma_check_page(page)) {
-			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-			migrate->cpages--;
-			restore++;
+		if (page_mapped(page)) {
+			try_to_unmap(page, flags);
+			if (page_mapped(page))
+				goto restore;
 		}
+
+		if (migrate_vma_check_page(page))
+			continue;
+
+restore:
+		migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+		migrate->cpages--;
+		restore++;
 	}
 
 	for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
-- 
2.13.4

  parent reply	other threads:[~2017-08-17  0:06 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-17  0:05 [HMM-v25 00/19] HMM (Heterogeneous Memory Management) v25 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 01/19] hmm: heterogeneous memory management documentation v3 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 02/19] mm/hmm: heterogeneous memory management (HMM for short) v5 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 03/19] mm/hmm/mirror: mirror process address space on device with HMM helpers v3 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 04/19] mm/hmm/mirror: helper to snapshot CPU page table v4 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 05/19] mm/hmm/mirror: device page fault handler Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 06/19] mm/memory_hotplug: introduce add_pages Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 07/19] mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory v5 Jérôme Glisse
2018-12-20  8:33   ` Dan Williams
2018-12-20 16:15     ` Jerome Glisse
2018-12-20 16:47       ` Dan Williams
2018-12-20 16:57         ` Jerome Glisse
2017-08-17  0:05 ` [HMM-v25 08/19] mm/ZONE_DEVICE: special case put_page() for device private pages v4 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 09/19] mm/memcontrol: allow to uncharge page without using page->lru field Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 10/19] mm/memcontrol: support MEMORY_DEVICE_PRIVATE v4 Jérôme Glisse
2017-09-05 17:13   ` Laurent Dufour
2017-09-05 17:21     ` Jerome Glisse
2017-08-17  0:05 ` [HMM-v25 11/19] mm/hmm/devmem: device memory hotplug using ZONE_DEVICE v7 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 12/19] mm/hmm/devmem: dummy HMM device for ZONE_DEVICE memory v3 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 13/19] mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY Jérôme Glisse
2017-08-17 21:12   ` Andrew Morton
2017-08-17 21:44     ` Jerome Glisse
2017-08-17  0:05 ` [HMM-v25 14/19] mm/migrate: new memory migration helper for use with device memory v5 Jérôme Glisse
2017-08-17  0:05 ` Jérôme Glisse [this message]
2017-08-17  0:05 ` [HMM-v25 16/19] mm/migrate: support un-addressable ZONE_DEVICE page in migration v3 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 17/19] mm/migrate: allow migrate_vma() to alloc new page on empty entry v4 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 18/19] mm/device-public-memory: device memory cache coherent with CPU v5 Jérôme Glisse
2017-08-17  0:05 ` [HMM-v25 19/19] mm/hmm: add new helper to hotplug CDM memory region v3 Jérôme Glisse
2017-09-04  3:09   ` Bob Liu
2017-09-04 15:51     ` Jerome Glisse
2017-09-05  1:13       ` Bob Liu
2017-09-05  2:38         ` Jerome Glisse
2017-09-05  3:50           ` Bob Liu
2017-09-05 13:50             ` Jerome Glisse
2017-09-05 16:18               ` Dan Williams
2017-09-05 19:00               ` Ross Zwisler
2017-09-05 19:20                 ` Jerome Glisse
2017-09-08 19:43                   ` Ross Zwisler
2017-09-08 20:29                     ` Jerome Glisse
2017-09-05 18:54           ` Ross Zwisler
2017-09-06  1:25             ` Bob Liu
2017-09-06  2:12               ` Jerome Glisse
2017-09-07  2:06                 ` Bob Liu
2017-09-07 17:00                   ` Jerome Glisse
2017-09-07 17:27                   ` Jerome Glisse
2017-09-08  1:59                     ` Bob Liu
2017-09-08 20:43                       ` Dan Williams
2017-11-17  3:47                         ` chetan L
2017-09-05  3:36       ` Balbir Singh
2017-08-17 21:39 ` [HMM-v25 00/19] HMM (Heterogeneous Memory Management) v25 Andrew Morton
2017-08-17 21:55   ` Jerome Glisse
2017-08-17 21:59     ` Dan Williams
2017-08-17 22:02       ` Jerome Glisse
2017-08-17 22:06         ` Dan Williams
2017-08-17 22:16       ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170817000548.32038-16-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=SCheung@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=dan.j.williams@intel.com \
    --cc=dnellans@nvidia.com \
    --cc=ebaskakov@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhairgrove@nvidia.com \
    --cc=sgutti@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).