Linux-mm Archive on lore.kernel.org
 help / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: Dave Hansen <dave.hansen@linux.intel.com>,
	Yang Shi <yang.shi@linux.alibaba.com>,
	Keith Busch <keith.busch@intel.com>,
	Fengguang Wu <fengguang.wu@intel.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>,
	Michal Hocko <mhocko@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	Mel Gorman <mgorman@techsingularity.net>,
	John Hubbard <jhubbard@nvidia.com>,
	Mark Hairgrove <mhairgrove@nvidia.com>,
	Nitin Gupta <nigupta@nvidia.com>,
	Javier Cabezas <jcabezas@nvidia.com>,
	David Nellans <dnellans@nvidia.com>, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 16/25] exchange page: Add THP exchange support.
Date: Wed,  3 Apr 2019 19:00:37 -0700
Message-ID: <20190404020046.32741-17-zi.yan@sent.com> (raw)
In-Reply-To: <20190404020046.32741-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

Enable exchange THPs in the process. It also need to take care of
exchanging PTE-mapped THPs.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/exchange.h |  2 ++
 mm/exchange.c            | 73 +++++++++++++++++++++++++++++++++++++-----------
 mm/migrate.c             |  2 +-
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/include/linux/exchange.h b/include/linux/exchange.h
index 20d2184..8785d08 100644
--- a/include/linux/exchange.h
+++ b/include/linux/exchange.h
@@ -14,6 +14,8 @@ struct exchange_page_info {
 	int from_page_was_mapped;
 	int to_page_was_mapped;
 
+	pgoff_t from_index, to_index;
+
 	struct list_head list;
 };
 
diff --git a/mm/exchange.c b/mm/exchange.c
index 555a72c..45c7013 100644
--- a/mm/exchange.c
+++ b/mm/exchange.c
@@ -51,7 +51,8 @@ struct page_flags {
 	unsigned int page_swapcache:1;
 	unsigned int page_writeback:1;
 	unsigned int page_private:1;
-	unsigned int __pad:3;
+	unsigned int page_doublemap:1;
+	unsigned int __pad:2;
 };
 
 
@@ -127,20 +128,23 @@ static void exchange_huge_page(struct page *dst, struct page *src)
 static void exchange_page_flags(struct page *to_page, struct page *from_page)
 {
 	int from_cpupid, to_cpupid;
-	struct page_flags from_page_flags, to_page_flags;
+	struct page_flags from_page_flags = {0}, to_page_flags = {0};
 	struct mem_cgroup *to_memcg = page_memcg(to_page),
 					  *from_memcg = page_memcg(from_page);
 
 	from_cpupid = page_cpupid_xchg_last(from_page, -1);
 
-	from_page_flags.page_error = TestClearPageError(from_page);
+	from_page_flags.page_error = PageError(from_page);
+	if (from_page_flags.page_error)
+		ClearPageError(from_page);
 	from_page_flags.page_referenced = TestClearPageReferenced(from_page);
 	from_page_flags.page_uptodate = PageUptodate(from_page);
 	ClearPageUptodate(from_page);
 	from_page_flags.page_active = TestClearPageActive(from_page);
 	from_page_flags.page_unevictable = TestClearPageUnevictable(from_page);
 	from_page_flags.page_checked = PageChecked(from_page);
-	ClearPageChecked(from_page);
+	if (from_page_flags.page_checked)
+		ClearPageChecked(from_page);
 	from_page_flags.page_mappedtodisk = PageMappedToDisk(from_page);
 	ClearPageMappedToDisk(from_page);
 	from_page_flags.page_dirty = PageDirty(from_page);
@@ -150,18 +154,22 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
 	clear_page_idle(from_page);
 	from_page_flags.page_swapcache = PageSwapCache(from_page);
 	from_page_flags.page_writeback = test_clear_page_writeback(from_page);
+	from_page_flags.page_doublemap = PageDoubleMap(from_page);
 
 
 	to_cpupid = page_cpupid_xchg_last(to_page, -1);
 
-	to_page_flags.page_error = TestClearPageError(to_page);
+	to_page_flags.page_error = PageError(to_page);
+	if (to_page_flags.page_error)
+		ClearPageError(to_page);
 	to_page_flags.page_referenced = TestClearPageReferenced(to_page);
 	to_page_flags.page_uptodate = PageUptodate(to_page);
 	ClearPageUptodate(to_page);
 	to_page_flags.page_active = TestClearPageActive(to_page);
 	to_page_flags.page_unevictable = TestClearPageUnevictable(to_page);
 	to_page_flags.page_checked = PageChecked(to_page);
-	ClearPageChecked(to_page);
+	if (to_page_flags.page_checked)
+		ClearPageChecked(to_page);
 	to_page_flags.page_mappedtodisk = PageMappedToDisk(to_page);
 	ClearPageMappedToDisk(to_page);
 	to_page_flags.page_dirty = PageDirty(to_page);
@@ -171,6 +179,7 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
 	clear_page_idle(to_page);
 	to_page_flags.page_swapcache = PageSwapCache(to_page);
 	to_page_flags.page_writeback = test_clear_page_writeback(to_page);
+	to_page_flags.page_doublemap = PageDoubleMap(to_page);
 
 	/* set to_page */
 	if (from_page_flags.page_error)
@@ -197,6 +206,8 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
 		set_page_young(to_page);
 	if (from_page_flags.page_is_idle)
 		set_page_idle(to_page);
+	if (from_page_flags.page_doublemap)
+		SetPageDoubleMap(to_page);
 
 	/* set from_page */
 	if (to_page_flags.page_error)
@@ -223,6 +234,8 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
 		set_page_young(from_page);
 	if (to_page_flags.page_is_idle)
 		set_page_idle(from_page);
+	if (to_page_flags.page_doublemap)
+		SetPageDoubleMap(from_page);
 
 	/*
 	 * Copy NUMA information to the new page, to prevent over-eager
@@ -599,7 +612,6 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page,
 
 	from_index = from_page->index;
 	to_index = to_page->index;
-
 	/*
 	 * Corner case handling:
 	 * 1. When a new swap-cache page is read into, it is added to the LRU
@@ -673,8 +685,6 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page,
 			swap(from_page->index, from_index);
 	}
 
-
-
 out_unlock_both:
 	if (to_anon_vma)
 		put_anon_vma(to_anon_vma);
@@ -689,6 +699,23 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page,
 	return rc;
 }
 
+static bool can_be_exchanged(struct page *from, struct page *to)
+{
+	if (PageCompound(from) != PageCompound(to))
+		return false;
+
+	if (PageHuge(from) != PageHuge(to))
+		return false;
+
+	if (PageHuge(from) || PageHuge(to))
+		return false;
+
+	if (compound_order(from) != compound_order(to))
+		return false;
+
+	return true;
+}
+
 /*
  * Exchange pages in the exchange_list
  *
@@ -745,7 +772,8 @@ int exchange_pages(struct list_head *exchange_list,
 		}
 
 		/* TODO: compound page not supported */
-		if (PageCompound(from_page) || page_mapping(from_page)) {
+		if (!can_be_exchanged(from_page, to_page) ||
+		    page_mapping(from_page)) {
 			++failed;
 			goto putback;
 		}
@@ -784,6 +812,8 @@ static int unmap_pair_pages_concur(struct exchange_page_info *one_pair,
 	struct page *from_page = one_pair->from_page;
 	struct page *to_page = one_pair->to_page;
 
+	one_pair->from_index = from_page->index;
+	one_pair->to_index = to_page->index;
 	/* from_page lock down  */
 	if (!trylock_page(from_page)) {
 		if (!force || ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC))
@@ -903,7 +933,6 @@ static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr,
 					   struct list_head *exchange_list_ptr,
 						enum migrate_mode mode)
 {
-	int rc = -EBUSY;
 	int nr_failed = 0;
 	struct address_space *to_page_mapping, *from_page_mapping;
 	struct exchange_page_info *one_pair, *one_pair2;
@@ -911,6 +940,7 @@ static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr,
 	list_for_each_entry_safe(one_pair, one_pair2, unmapped_list_ptr, list) {
 		struct page *from_page = one_pair->from_page;
 		struct page *to_page = one_pair->to_page;
+		int rc = -EBUSY;
 
 		VM_BUG_ON_PAGE(!PageLocked(from_page), from_page);
 		VM_BUG_ON_PAGE(!PageLocked(to_page), to_page);
@@ -926,8 +956,9 @@ static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr,
 		BUG_ON(PageWriteback(to_page));
 
 		/* actual page mapping exchange */
-		rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping,
-							to_page, from_page, NULL, NULL, mode, 0, 0);
+		if (!page_mapped(from_page) && !page_mapped(to_page))
+			rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping,
+								to_page, from_page, NULL, NULL, mode, 0, 0);
 
 		if (rc) {
 			if (one_pair->from_page_was_mapped)
@@ -954,7 +985,7 @@ static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr,
 			one_pair->from_page = NULL;
 			one_pair->to_page = NULL;
 
-			list_move(&one_pair->list, exchange_list_ptr);
+			list_del(&one_pair->list);
 			++nr_failed;
 		}
 	}
@@ -1026,8 +1057,18 @@ static int remove_migration_ptes_concur(struct list_head *unmapped_list_ptr)
 	struct exchange_page_info *iterator;
 
 	list_for_each_entry(iterator, unmapped_list_ptr, list) {
-		remove_migration_ptes(iterator->from_page, iterator->to_page, false);
-		remove_migration_ptes(iterator->to_page, iterator->from_page, false);
+		struct page *from_page = iterator->from_page;
+		struct page *to_page = iterator->to_page;
+
+		swap(from_page->index, iterator->from_index);
+		if (iterator->from_page_was_mapped)
+			remove_migration_ptes(iterator->from_page, iterator->to_page, false);
+		swap(from_page->index, iterator->from_index);
+
+		swap(to_page->index, iterator->to_index);
+		if (iterator->to_page_was_mapped)
+			remove_migration_ptes(iterator->to_page, iterator->from_page, false);
+		swap(to_page->index, iterator->to_index);
 
 
 		if (iterator->from_anon_vma)
diff --git a/mm/migrate.c b/mm/migrate.c
index a0ca817..da7af68 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -229,7 +229,7 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 		if (PageKsm(page))
 			new = page;
 		else
-			new = page - pvmw.page->index +
+			new = page - page->index +
 				linear_page_index(vma, pvmw.address);
 
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-- 
2.7.4


  parent reply index

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-04  2:00 [RFC PATCH 00/25] Accelerate page migration and use memcg for PMEM management Zi Yan
2019-04-04  2:00 ` [RFC PATCH 01/25] mm: migrate: Change migrate_mode to support combination migration modes Zi Yan
2019-04-04  2:00 ` [RFC PATCH 02/25] mm: migrate: Add mode parameter to support future page copy routines Zi Yan
2019-04-04  2:00 ` [RFC PATCH 03/25] mm: migrate: Add a multi-threaded page migration function Zi Yan
2019-04-04  2:00 ` [RFC PATCH 04/25] mm: migrate: Add copy_page_multithread into migrate_pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 05/25] mm: migrate: Add vm.accel_page_copy in sysfs to control page copy acceleration Zi Yan
2019-04-04  2:00 ` [RFC PATCH 06/25] mm: migrate: Make the number of copy threads adjustable via sysctl Zi Yan
2019-04-04  2:00 ` [RFC PATCH 07/25] mm: migrate: Add copy_page_dma to use DMA Engine to copy pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 08/25] mm: migrate: Add copy_page_dma into migrate_page_copy Zi Yan
2019-04-04  2:00 ` [RFC PATCH 09/25] mm: migrate: Add copy_page_lists_dma_always to support copy a list of pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 10/25] mm: migrate: copy_page_lists_mt() to copy a page list using multi-threads Zi Yan
2019-04-04  2:00 ` [RFC PATCH 11/25] mm: migrate: Add concurrent page migration into move_pages syscall Zi Yan
2019-04-04  2:00 ` [RFC PATCH 12/25] exchange pages: new page migration mechanism: exchange_pages() Zi Yan
2019-04-04  2:00 ` [RFC PATCH 13/25] exchange pages: add multi-threaded exchange pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 14/25] exchange pages: concurrent " Zi Yan
2019-04-04  2:00 ` [RFC PATCH 15/25] exchange pages: exchange anonymous page and file-backed page Zi Yan
2019-04-04  2:00 ` Zi Yan [this message]
2019-04-04  2:00 ` [RFC PATCH 17/25] exchange page: Add exchange_page() syscall Zi Yan
2019-04-04  2:00 ` [RFC PATCH 18/25] memcg: Add per node memory usage&max stats in memcg Zi Yan
2019-04-04  2:00 ` [RFC PATCH 19/25] mempolicy: add MPOL_F_MEMCG flag, enforcing memcg memory limit Zi Yan
2019-04-04  2:00 ` [RFC PATCH 20/25] memory manage: Add memory manage syscall Zi Yan
2019-04-04  2:00 ` [RFC PATCH 21/25] mm: move update_lru_sizes() to mm_inline.h for broader use Zi Yan
2019-04-04  2:00 ` [RFC PATCH 22/25] memory manage: active/inactive page list manipulation in memcg Zi Yan
2019-04-04  2:00 ` [RFC PATCH 23/25] memory manage: page migration based page manipulation between NUMA nodes Zi Yan
2019-04-04  2:00 ` [RFC PATCH 24/25] memory manage: limit migration batch size Zi Yan
2019-04-04  2:00 ` [RFC PATCH 25/25] memory manage: use exchange pages to memory manage to improve throughput Zi Yan
2019-04-04  7:13 ` [RFC PATCH 00/25] Accelerate page migration and use memcg for PMEM management Michal Hocko
2019-04-05  0:32 ` Yang Shi
2019-04-05 17:20   ` Zi Yan

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190404020046.32741-17-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=akpm@linux-foundation.org \
    --cc=daniel.m.jordan@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dnellans@nvidia.com \
    --cc=fengguang.wu@intel.com \
    --cc=jcabezas@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=keith.busch@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhairgrove@nvidia.com \
    --cc=mhocko@kernel.org \
    --cc=nigupta@nvidia.com \
    --cc=vbabka@suse.cz \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org linux-mm@archiver.kernel.org
	public-inbox-index linux-mm


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/ public-inbox