All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: Dave Hansen <dave.hansen@linux.intel.com>,
	Yang Shi <yang.shi@linux.alibaba.com>,
	Keith Busch <keith.busch@intel.com>,
	Fengguang Wu <fengguang.wu@intel.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>,
	Michal Hocko <mhocko@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	Mel Gorman <mgorman@techsingularity.net>,
	John Hubbard <jhubbard@nvidia.com>,
	Mark Hairgrove <mhairgrove@nvidia.com>,
	Nitin Gupta <nigupta@nvidia.com>,
	Javier Cabezas <jcabezas@nvidia.com>,
	David Nellans <dnellans@nvidia.com>, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 14/25] exchange pages: concurrent exchange pages.
Date: Wed,  3 Apr 2019 19:00:35 -0700	[thread overview]
Message-ID: <20190404020046.32741-15-zi.yan@sent.com> (raw)
In-Reply-To: <20190404020046.32741-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

It unmaps two lists of pages, then exchange them in
exchange_page_lists_mthread(), and finally remaps both lists of
pages.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/exchange.h |   2 +
 mm/exchange.c            | 397 +++++++++++++++++++++++++++++++++++++++++++++++
 mm/exchange_page.c       |   1 -
 3 files changed, 399 insertions(+), 1 deletion(-)

diff --git a/include/linux/exchange.h b/include/linux/exchange.h
index 778068e..20d2184 100644
--- a/include/linux/exchange.h
+++ b/include/linux/exchange.h
@@ -20,4 +20,6 @@ struct exchange_page_info {
 int exchange_pages(struct list_head *exchange_list,
 			enum migrate_mode mode,
 			int reason);
+int exchange_pages_concur(struct list_head *exchange_list,
+		enum migrate_mode mode, int reason);
 #endif /* _LINUX_EXCHANGE_H */
diff --git a/mm/exchange.c b/mm/exchange.c
index ce2c899..bbada58 100644
--- a/mm/exchange.c
+++ b/mm/exchange.c
@@ -600,3 +600,400 @@ int exchange_pages(struct list_head *exchange_list,
 	}
 	return failed;
 }
+
+
+static int unmap_pair_pages_concur(struct exchange_page_info *one_pair,
+				int force, enum migrate_mode mode)
+{
+	int rc = -EAGAIN;
+	struct anon_vma *anon_vma_from_page = NULL, *anon_vma_to_page = NULL;
+	struct page *from_page = one_pair->from_page;
+	struct page *to_page = one_pair->to_page;
+
+	/* from_page lock down  */
+	if (!trylock_page(from_page)) {
+		if (!force || ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC))
+			goto out;
+
+		lock_page(from_page);
+	}
+
+	BUG_ON(PageWriteback(from_page));
+
+	/*
+	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
+	 * we cannot notice that anon_vma is freed while we migrates a page.
+	 * This get_anon_vma() delays freeing anon_vma pointer until the end
+	 * of migration. File cache pages are no problem because of page_lock()
+	 * File Caches may use write_page() or lock_page() in migration, then,
+	 * just care Anon page here.
+	 *
+	 * Only page_get_anon_vma() understands the subtleties of
+	 * getting a hold on an anon_vma from outside one of its mms.
+	 * But if we cannot get anon_vma, then we won't need it anyway,
+	 * because that implies that the anon page is no longer mapped
+	 * (and cannot be remapped so long as we hold the page lock).
+	 */
+	if (PageAnon(from_page) && !PageKsm(from_page))
+		one_pair->from_anon_vma = anon_vma_from_page
+					= page_get_anon_vma(from_page);
+
+	/* to_page lock down  */
+	if (!trylock_page(to_page)) {
+		if (!force || ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC))
+			goto out_unlock;
+
+		lock_page(to_page);
+	}
+
+	BUG_ON(PageWriteback(to_page));
+
+	/*
+	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
+	 * we cannot notice that anon_vma is freed while we migrates a page.
+	 * This get_anon_vma() delays freeing anon_vma pointer until the end
+	 * of migration. File cache pages are no problem because of page_lock()
+	 * File Caches may use write_page() or lock_page() in migration, then,
+	 * just care Anon page here.
+	 *
+	 * Only page_get_anon_vma() understands the subtleties of
+	 * getting a hold on an anon_vma from outside one of its mms.
+	 * But if we cannot get anon_vma, then we won't need it anyway,
+	 * because that implies that the anon page is no longer mapped
+	 * (and cannot be remapped so long as we hold the page lock).
+	 */
+	if (PageAnon(to_page) && !PageKsm(to_page))
+		one_pair->to_anon_vma = anon_vma_to_page = page_get_anon_vma(to_page);
+
+	/*
+	 * Corner case handling:
+	 * 1. When a new swap-cache page is read into, it is added to the LRU
+	 * and treated as swapcache but it has no rmap yet.
+	 * Calling try_to_unmap() against a page->mapping==NULL page will
+	 * trigger a BUG.  So handle it here.
+	 * 2. An orphaned page (see truncate_complete_page) might have
+	 * fs-private metadata. The page can be picked up due to memory
+	 * offlining.  Everywhere else except page reclaim, the page is
+	 * invisible to the vm, so the page can not be migrated.  So try to
+	 * free the metadata, so the page can be freed.
+	 */
+	if (!from_page->mapping) {
+		VM_BUG_ON_PAGE(PageAnon(from_page), from_page);
+		if (page_has_private(from_page)) {
+			try_to_free_buffers(from_page);
+			goto out_unlock_both;
+		}
+	} else if (page_mapped(from_page)) {
+		/* Establish migration ptes */
+		VM_BUG_ON_PAGE(PageAnon(from_page) && !PageKsm(from_page) &&
+					   !anon_vma_from_page, from_page);
+		try_to_unmap(from_page,
+			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+
+		one_pair->from_page_was_mapped = 1;
+	}
+
+	if (!to_page->mapping) {
+		VM_BUG_ON_PAGE(PageAnon(to_page), to_page);
+		if (page_has_private(to_page)) {
+			try_to_free_buffers(to_page);
+			goto out_unlock_both;
+		}
+	} else if (page_mapped(to_page)) {
+		/* Establish migration ptes */
+		VM_BUG_ON_PAGE(PageAnon(to_page) && !PageKsm(to_page) &&
+					   !anon_vma_to_page, to_page);
+		try_to_unmap(to_page,
+			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+
+		one_pair->to_page_was_mapped = 1;
+	}
+
+	return MIGRATEPAGE_SUCCESS;
+
+out_unlock_both:
+	if (anon_vma_to_page)
+		put_anon_vma(anon_vma_to_page);
+	unlock_page(to_page);
+out_unlock:
+	/* Drop an anon_vma reference if we took one */
+	if (anon_vma_from_page)
+		put_anon_vma(anon_vma_from_page);
+	unlock_page(from_page);
+out:
+
+	return rc;
+}
+
+static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr,
+					   struct list_head *exchange_list_ptr,
+						enum migrate_mode mode)
+{
+	int rc = -EBUSY;
+	int nr_failed = 0;
+	struct address_space *to_page_mapping, *from_page_mapping;
+	struct exchange_page_info *one_pair, *one_pair2;
+
+	list_for_each_entry_safe(one_pair, one_pair2, unmapped_list_ptr, list) {
+		struct page *from_page = one_pair->from_page;
+		struct page *to_page = one_pair->to_page;
+
+		VM_BUG_ON_PAGE(!PageLocked(from_page), from_page);
+		VM_BUG_ON_PAGE(!PageLocked(to_page), to_page);
+
+		/* copy page->mapping not use page_mapping()  */
+		to_page_mapping = page_mapping(to_page);
+		from_page_mapping = page_mapping(from_page);
+
+		BUG_ON(from_page_mapping);
+		BUG_ON(to_page_mapping);
+
+		BUG_ON(PageWriteback(from_page));
+		BUG_ON(PageWriteback(to_page));
+
+		/* actual page mapping exchange */
+		rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping,
+							to_page, from_page, mode, 0, 0);
+
+		if (rc) {
+			if (one_pair->from_page_was_mapped)
+				remove_migration_ptes(from_page, from_page, false);
+			if (one_pair->to_page_was_mapped)
+				remove_migration_ptes(to_page, to_page, false);
+
+			if (one_pair->from_anon_vma)
+				put_anon_vma(one_pair->from_anon_vma);
+			unlock_page(from_page);
+
+			if (one_pair->to_anon_vma)
+				put_anon_vma(one_pair->to_anon_vma);
+			unlock_page(to_page);
+
+			mod_node_page_state(page_pgdat(from_page), NR_ISOLATED_ANON +
+					page_is_file_cache(from_page), -hpage_nr_pages(from_page));
+			putback_lru_page(from_page);
+
+			mod_node_page_state(page_pgdat(to_page), NR_ISOLATED_ANON +
+					page_is_file_cache(to_page), -hpage_nr_pages(to_page));
+			putback_lru_page(to_page);
+
+			one_pair->from_page = NULL;
+			one_pair->to_page = NULL;
+
+			list_move(&one_pair->list, exchange_list_ptr);
+			++nr_failed;
+		}
+	}
+
+	return nr_failed;
+}
+
+static int exchange_page_data_concur(struct list_head *unmapped_list_ptr,
+									enum migrate_mode mode)
+{
+	struct exchange_page_info *one_pair;
+	int num_pages = 0, idx = 0;
+	struct page **src_page_list = NULL, **dst_page_list = NULL;
+	unsigned long size = 0;
+	int rc = -EFAULT;
+
+	if (list_empty(unmapped_list_ptr))
+		return 0;
+
+	/* form page list  */
+	list_for_each_entry(one_pair, unmapped_list_ptr, list) {
+		++num_pages;
+		size += PAGE_SIZE * hpage_nr_pages(one_pair->from_page);
+	}
+
+	src_page_list = kzalloc(sizeof(struct page *)*num_pages, GFP_KERNEL);
+	if (!src_page_list)
+		return -ENOMEM;
+	dst_page_list = kzalloc(sizeof(struct page *)*num_pages, GFP_KERNEL);
+	if (!dst_page_list)
+		return -ENOMEM;
+
+	list_for_each_entry(one_pair, unmapped_list_ptr, list) {
+		src_page_list[idx] = one_pair->from_page;
+		dst_page_list[idx] = one_pair->to_page;
+		++idx;
+	}
+
+	BUG_ON(idx != num_pages);
+
+
+	if (mode & MIGRATE_MT)
+		rc = exchange_page_lists_mthread(dst_page_list, src_page_list,
+				num_pages);
+
+	if (rc) {
+		list_for_each_entry(one_pair, unmapped_list_ptr, list) {
+			if (PageHuge(one_pair->from_page) ||
+				PageTransHuge(one_pair->from_page)) {
+				exchange_huge_page(one_pair->to_page, one_pair->from_page);
+			} else {
+				exchange_highpage(one_pair->to_page, one_pair->from_page);
+			}
+		}
+	}
+
+	kfree(src_page_list);
+	kfree(dst_page_list);
+
+	list_for_each_entry(one_pair, unmapped_list_ptr, list) {
+		exchange_page_flags(one_pair->to_page, one_pair->from_page);
+	}
+
+	return rc;
+}
+
+static int remove_migration_ptes_concur(struct list_head *unmapped_list_ptr)
+{
+	struct exchange_page_info *iterator;
+
+	list_for_each_entry(iterator, unmapped_list_ptr, list) {
+		remove_migration_ptes(iterator->from_page, iterator->to_page, false);
+		remove_migration_ptes(iterator->to_page, iterator->from_page, false);
+
+
+		if (iterator->from_anon_vma)
+			put_anon_vma(iterator->from_anon_vma);
+		unlock_page(iterator->from_page);
+
+
+		if (iterator->to_anon_vma)
+			put_anon_vma(iterator->to_anon_vma);
+		unlock_page(iterator->to_page);
+
+
+		putback_lru_page(iterator->from_page);
+		iterator->from_page = NULL;
+
+		putback_lru_page(iterator->to_page);
+		iterator->to_page = NULL;
+	}
+
+	return 0;
+}
+
+int exchange_pages_concur(struct list_head *exchange_list,
+		enum migrate_mode mode, int reason)
+{
+	struct exchange_page_info *one_pair, *one_pair2;
+	int pass = 0;
+	int retry = 1;
+	int nr_failed = 0;
+	int nr_succeeded = 0;
+	int rc = 0;
+	LIST_HEAD(serialized_list);
+	LIST_HEAD(unmapped_list);
+
+	for(pass = 0; pass < 1 && retry; pass++) {
+		retry = 0;
+
+		/* unmap and get new page for page_mapping(page) == NULL */
+		list_for_each_entry_safe(one_pair, one_pair2, exchange_list, list) {
+			struct page *from_page = one_pair->from_page;
+			struct page *to_page = one_pair->to_page;
+			cond_resched();
+
+			if (page_count(from_page) == 1) {
+				/* page was freed from under us. So we are done  */
+				ClearPageActive(from_page);
+				ClearPageUnevictable(from_page);
+
+				put_page(from_page);
+				dec_node_page_state(from_page, NR_ISOLATED_ANON +
+						page_is_file_cache(from_page));
+
+				if (page_count(to_page) == 1) {
+					ClearPageActive(to_page);
+					ClearPageUnevictable(to_page);
+					put_page(to_page);
+				} else {
+					mod_node_page_state(page_pgdat(to_page), NR_ISOLATED_ANON +
+							page_is_file_cache(to_page), -hpage_nr_pages(to_page));
+					putback_lru_page(to_page);
+				}
+				list_del(&one_pair->list);
+
+				continue;
+			}
+
+			if (page_count(to_page) == 1) {
+				/* page was freed from under us. So we are done  */
+				ClearPageActive(to_page);
+				ClearPageUnevictable(to_page);
+
+				put_page(to_page);
+
+				dec_node_page_state(to_page, NR_ISOLATED_ANON +
+						page_is_file_cache(to_page));
+
+				mod_node_page_state(page_pgdat(from_page), NR_ISOLATED_ANON +
+						page_is_file_cache(from_page), -hpage_nr_pages(from_page));
+				putback_lru_page(from_page);
+
+				list_del(&one_pair->list);
+				continue;
+			}
+		/* We do not exchange huge pages and file-backed pages concurrently */
+			if (PageHuge(one_pair->from_page) || PageHuge(one_pair->to_page)) {
+				rc = -ENODEV;
+			}
+			else if ((page_mapping(one_pair->from_page) != NULL) ||
+					 (page_mapping(one_pair->from_page) != NULL)) {
+				rc = -ENODEV;
+			}
+			else
+				rc = unmap_pair_pages_concur(one_pair, 1, mode);
+
+			switch(rc) {
+			case -ENODEV:
+				list_move(&one_pair->list, &serialized_list);
+				break;
+			case -ENOMEM:
+				goto out;
+			case -EAGAIN:
+				retry++;
+				break;
+			case MIGRATEPAGE_SUCCESS:
+				list_move(&one_pair->list, &unmapped_list);
+				nr_succeeded++;
+				break;
+			default:
+				/*
+				 * Permanent failure (-EBUSY, -ENOSYS, etc.):
+				 * unlike -EAGAIN case, the failed page is
+				 * removed from migration page list and not
+				 * retried in the next outer loop.
+				 */
+				list_move(&one_pair->list, &serialized_list);
+				nr_failed++;
+				break;
+			}
+		}
+
+		/* move page->mapping to new page, only -EAGAIN could happen  */
+		exchange_page_mapping_concur(&unmapped_list, exchange_list, mode);
+
+
+		/* copy pages in unmapped_list */
+		exchange_page_data_concur(&unmapped_list, mode);
+
+
+		/* remove migration pte, if old_page is NULL?, unlock old and new
+		 * pages, put anon_vma, put old and new pages */
+		remove_migration_ptes_concur(&unmapped_list);
+	}
+
+	nr_failed += retry;
+	rc = nr_failed;
+
+	exchange_pages(&serialized_list, mode, reason);
+out:
+	list_splice(&unmapped_list, exchange_list);
+	list_splice(&serialized_list, exchange_list);
+
+	return nr_failed?-EFAULT:0;
+}
diff --git a/mm/exchange_page.c b/mm/exchange_page.c
index 6054697..5dba0a6 100644
--- a/mm/exchange_page.c
+++ b/mm/exchange_page.c
@@ -126,7 +126,6 @@ int exchange_page_lists_mthread(struct page **to, struct page **from, int nr_pag
 	int to_node = page_to_nid(*to);
 	int i;
 	struct copy_page_info *work_items;
-	int nr_pages_per_page = hpage_nr_pages(*from);
 	const struct cpumask *per_node_cpumask = cpumask_of_node(to_node);
 	int cpu_id_list[32] = {0};
 	int cpu;
-- 
2.7.4


  parent reply	other threads:[~2019-04-04  2:10 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-04  2:00 [RFC PATCH 00/25] Accelerate page migration and use memcg for PMEM management Zi Yan
2019-04-04  2:00 ` [RFC PATCH 01/25] mm: migrate: Change migrate_mode to support combination migration modes Zi Yan
2019-04-04  2:00 ` [RFC PATCH 02/25] mm: migrate: Add mode parameter to support future page copy routines Zi Yan
2019-04-04  2:00 ` [RFC PATCH 03/25] mm: migrate: Add a multi-threaded page migration function Zi Yan
2019-04-04  2:00 ` [RFC PATCH 04/25] mm: migrate: Add copy_page_multithread into migrate_pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 05/25] mm: migrate: Add vm.accel_page_copy in sysfs to control page copy acceleration Zi Yan
2019-04-04  2:00 ` [RFC PATCH 06/25] mm: migrate: Make the number of copy threads adjustable via sysctl Zi Yan
2019-04-04  2:00 ` [RFC PATCH 07/25] mm: migrate: Add copy_page_dma to use DMA Engine to copy pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 08/25] mm: migrate: Add copy_page_dma into migrate_page_copy Zi Yan
2019-04-04  2:00 ` [RFC PATCH 09/25] mm: migrate: Add copy_page_lists_dma_always to support copy a list of pages Zi Yan
2019-04-04  2:00 ` [RFC PATCH 10/25] mm: migrate: copy_page_lists_mt() to copy a page list using multi-threads Zi Yan
2019-04-04  2:00 ` [RFC PATCH 11/25] mm: migrate: Add concurrent page migration into move_pages syscall Zi Yan
2019-04-04  2:00 ` [RFC PATCH 12/25] exchange pages: new page migration mechanism: exchange_pages() Zi Yan
2019-04-04  2:00 ` [RFC PATCH 13/25] exchange pages: add multi-threaded exchange pages Zi Yan
2019-04-04  2:00 ` Zi Yan [this message]
2019-04-04  2:00 ` [RFC PATCH 15/25] exchange pages: exchange anonymous page and file-backed page Zi Yan
2019-04-04  2:00 ` [RFC PATCH 16/25] exchange page: Add THP exchange support Zi Yan
2019-04-04  2:00 ` [RFC PATCH 17/25] exchange page: Add exchange_page() syscall Zi Yan
2019-04-04  2:00 ` [RFC PATCH 18/25] memcg: Add per node memory usage&max stats in memcg Zi Yan
2019-04-04  2:00 ` [RFC PATCH 19/25] mempolicy: add MPOL_F_MEMCG flag, enforcing memcg memory limit Zi Yan
2019-04-04  2:00 ` [RFC PATCH 20/25] memory manage: Add memory manage syscall Zi Yan
2019-04-04  2:00 ` [RFC PATCH 21/25] mm: move update_lru_sizes() to mm_inline.h for broader use Zi Yan
2019-04-04  2:00 ` [RFC PATCH 22/25] memory manage: active/inactive page list manipulation in memcg Zi Yan
2019-04-04  2:00 ` [RFC PATCH 23/25] memory manage: page migration based page manipulation between NUMA nodes Zi Yan
2019-04-04  2:00 ` [RFC PATCH 24/25] memory manage: limit migration batch size Zi Yan
2019-04-04  2:00 ` [RFC PATCH 25/25] memory manage: use exchange pages to memory manage to improve throughput Zi Yan
2019-04-04  7:13 ` [RFC PATCH 00/25] Accelerate page migration and use memcg for PMEM management Michal Hocko
2019-04-05  0:32 ` Yang Shi
2019-04-05 17:20   ` Zi Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190404020046.32741-15-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=akpm@linux-foundation.org \
    --cc=daniel.m.jordan@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dnellans@nvidia.com \
    --cc=fengguang.wu@intel.com \
    --cc=jcabezas@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=keith.busch@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhairgrove@nvidia.com \
    --cc=mhocko@kernel.org \
    --cc=nigupta@nvidia.com \
    --cc=vbabka@suse.cz \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.