From: Zi Yan <zi.yan@sent.com>
To: Dave Hansen <dave.hansen@linux.intel.com>,
Yang Shi <yang.shi@linux.alibaba.com>,
Keith Busch <keith.busch@intel.com>,
Fengguang Wu <fengguang.wu@intel.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>,
Michal Hocko <mhocko@kernel.org>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Vlastimil Babka <vbabka@suse.cz>,
Mel Gorman <mgorman@techsingularity.net>,
John Hubbard <jhubbard@nvidia.com>,
Mark Hairgrove <mhairgrove@nvidia.com>,
Nitin Gupta <nigupta@nvidia.com>,
Javier Cabezas <jcabezas@nvidia.com>,
David Nellans <dnellans@nvidia.com>, Zi Yan <ziy@nvidia.com>
Subject: [RFC PATCH 15/25] exchange pages: exchange anonymous page and file-backed page.
Date: Wed, 3 Apr 2019 19:00:36 -0700 [thread overview]
Message-ID: <20190404020046.32741-16-zi.yan@sent.com> (raw)
In-Reply-To: <20190404020046.32741-1-zi.yan@sent.com>
From: Zi Yan <ziy@nvidia.com>
This is only done for the basic exchange pages, because we might
need to lock multiple files when doing concurrent exchange pages,
which could cause deadlocks easily.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/exchange.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++------------
mm/internal.h | 9 ++
mm/migrate.c | 6 +-
3 files changed, 241 insertions(+), 58 deletions(-)
diff --git a/mm/exchange.c b/mm/exchange.c
index bbada58..555a72c 100644
--- a/mm/exchange.c
+++ b/mm/exchange.c
@@ -20,6 +20,8 @@
#include <linux/memcontrol.h>
#include <linux/balloon_compaction.h>
#include <linux/buffer_head.h>
+#include <linux/fs.h> /* buffer_migrate_page */
+#include <linux/backing-dev.h>
#include "internal.h"
@@ -147,8 +149,6 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
from_page_flags.page_is_idle = page_is_idle(from_page);
clear_page_idle(from_page);
from_page_flags.page_swapcache = PageSwapCache(from_page);
- from_page_flags.page_private = PagePrivate(from_page);
- ClearPagePrivate(from_page);
from_page_flags.page_writeback = test_clear_page_writeback(from_page);
@@ -170,8 +170,6 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
to_page_flags.page_is_idle = page_is_idle(to_page);
clear_page_idle(to_page);
to_page_flags.page_swapcache = PageSwapCache(to_page);
- to_page_flags.page_private = PagePrivate(to_page);
- ClearPagePrivate(to_page);
to_page_flags.page_writeback = test_clear_page_writeback(to_page);
/* set to_page */
@@ -268,18 +266,22 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page)
static int exchange_page_move_mapping(struct address_space *to_mapping,
struct address_space *from_mapping,
struct page *to_page, struct page *from_page,
+ struct buffer_head *to_head, struct buffer_head *from_head,
enum migrate_mode mode,
int to_extra_count, int from_extra_count)
{
- int to_expected_count = 1 + to_extra_count,
- from_expected_count = 1 + from_extra_count;
- unsigned long from_page_index = page_index(from_page),
- to_page_index = page_index(to_page);
+ int to_expected_count = expected_page_refs(to_mapping, to_page) + to_extra_count,
+ from_expected_count = expected_page_refs(from_mapping, from_page) + from_extra_count;
+ unsigned long from_page_index = from_page->index;
+ unsigned long to_page_index = to_page->index;
int to_swapbacked = PageSwapBacked(to_page),
from_swapbacked = PageSwapBacked(from_page);
- struct address_space *to_mapping_value = to_page->mapping,
- *from_mapping_value = from_page->mapping;
+ struct address_space *to_mapping_value = to_page->mapping;
+ struct address_space *from_mapping_value = from_page->mapping;
+ VM_BUG_ON_PAGE(to_mapping != page_mapping(to_page), to_page);
+ VM_BUG_ON_PAGE(from_mapping != page_mapping(from_page), from_page);
+ VM_BUG_ON(PageCompound(from_page) != PageCompound(to_page));
if (!to_mapping) {
/* Anonymous page without mapping */
@@ -293,26 +295,125 @@ static int exchange_page_move_mapping(struct address_space *to_mapping,
return -EAGAIN;
}
- /*
- * Now we know that no one else is looking at the page:
- * no turning back from here.
- */
- /* from_page */
- from_page->index = to_page_index;
- from_page->mapping = to_mapping_value;
+ /* both are anonymous pages */
+ if (!from_mapping && !to_mapping) {
+ /* from_page */
+ from_page->index = to_page_index;
+ from_page->mapping = to_mapping_value;
+
+ ClearPageSwapBacked(from_page);
+ if (to_swapbacked)
+ SetPageSwapBacked(from_page);
+
+
+ /* to_page */
+ to_page->index = from_page_index;
+ to_page->mapping = from_mapping_value;
+
+ ClearPageSwapBacked(to_page);
+ if (from_swapbacked)
+ SetPageSwapBacked(to_page);
+ } else if (!from_mapping && to_mapping) {
+ /* from is anonymous, to is file-backed */
+ XA_STATE(to_xas, &to_mapping->i_pages, page_index(to_page));
+ struct zone *from_zone, *to_zone;
+ int dirty;
+
+ from_zone = page_zone(from_page);
+ to_zone = page_zone(to_page);
+
+ xas_lock_irq(&to_xas);
+
+ if (page_count(to_page) != to_expected_count ||
+ xas_load(&to_xas) != to_page) {
+ xas_unlock_irq(&to_xas);
+ return -EAGAIN;
+ }
+
+ if (!page_ref_freeze(to_page, to_expected_count)) {
+ xas_unlock_irq(&to_xas);
+ pr_debug("cannot freeze page count\n");
+ return -EAGAIN;
+ }
+
+ if (!page_ref_freeze(from_page, from_expected_count)) {
+ page_ref_unfreeze(to_page, to_expected_count);
+ xas_unlock_irq(&to_xas);
+
+ return -EAGAIN;
+ }
+ /*
+ * Now we know that no one else is looking at the page:
+ * no turning back from here.
+ */
+ ClearPageSwapBacked(from_page);
+ ClearPageSwapBacked(to_page);
+
+ /* from_page */
+ from_page->index = to_page_index;
+ from_page->mapping = to_mapping_value;
+ /* to_page */
+ to_page->index = from_page_index;
+ to_page->mapping = from_mapping_value;
+
+ if (to_swapbacked)
+ __SetPageSwapBacked(from_page);
+ else
+ VM_BUG_ON_PAGE(PageSwapCache(to_page), to_page);
- ClearPageSwapBacked(from_page);
- if (to_swapbacked)
- SetPageSwapBacked(from_page);
+ if (from_swapbacked)
+ __SetPageSwapBacked(to_page);
+ else
+ VM_BUG_ON_PAGE(PageSwapCache(from_page), from_page);
+ dirty = PageDirty(to_page);
- /* to_page */
- to_page->index = from_page_index;
- to_page->mapping = from_mapping_value;
+ xas_store(&to_xas, from_page);
+ if (PageTransHuge(to_page)) {
+ int i;
+ for (i = 1; i < HPAGE_PMD_NR; i++) {
+ xas_next(&to_xas);
+ xas_store(&to_xas, from_page + i);
+ }
+ }
+
+ /* move cache reference */
+ page_ref_unfreeze(to_page, to_expected_count - hpage_nr_pages(to_page));
+ page_ref_unfreeze(from_page, from_expected_count + hpage_nr_pages(from_page));
+
+ xas_unlock(&to_xas);
+
+ /*
+ * If moved to a different zone then also account
+ * the page for that zone. Other VM counters will be
+ * taken care of when we establish references to the
+ * new page and drop references to the old page.
+ *
+ * Note that anonymous pages are accounted for
+ * via NR_FILE_PAGES and NR_ANON_MAPPED if they
+ * are mapped to swap space.
+ */
+ if (to_zone != from_zone) {
+ __dec_node_state(to_zone->zone_pgdat, NR_FILE_PAGES);
+ __inc_node_state(from_zone->zone_pgdat, NR_FILE_PAGES);
+ if (PageSwapBacked(to_page) && !PageSwapCache(to_page)) {
+ __dec_node_state(to_zone->zone_pgdat, NR_SHMEM);
+ __inc_node_state(from_zone->zone_pgdat, NR_SHMEM);
+ }
+ if (dirty && mapping_cap_account_dirty(to_mapping)) {
+ __dec_node_state(to_zone->zone_pgdat, NR_FILE_DIRTY);
+ __dec_zone_state(to_zone, NR_ZONE_WRITE_PENDING);
+ __inc_node_state(from_zone->zone_pgdat, NR_FILE_DIRTY);
+ __inc_zone_state(from_zone, NR_ZONE_WRITE_PENDING);
+ }
+ }
+ local_irq_enable();
- ClearPageSwapBacked(to_page);
- if (from_swapbacked)
- SetPageSwapBacked(to_page);
+ } else {
+ /* from is file-backed to is anonymous: fold this to the case above */
+ /* both are file-backed */
+ VM_BUG_ON(1);
+ }
return MIGRATEPAGE_SUCCESS;
}
@@ -322,6 +423,7 @@ static int exchange_from_to_pages(struct page *to_page, struct page *from_page,
{
int rc = -EBUSY;
struct address_space *to_page_mapping, *from_page_mapping;
+ struct buffer_head *to_head = NULL, *to_bh = NULL;
VM_BUG_ON_PAGE(!PageLocked(from_page), from_page);
VM_BUG_ON_PAGE(!PageLocked(to_page), to_page);
@@ -330,15 +432,71 @@ static int exchange_from_to_pages(struct page *to_page, struct page *from_page,
to_page_mapping = page_mapping(to_page);
from_page_mapping = page_mapping(from_page);
+ /* from_page has to be anonymous page */
BUG_ON(from_page_mapping);
- BUG_ON(to_page_mapping);
-
BUG_ON(PageWriteback(from_page));
+ /* writeback has to finish */
BUG_ON(PageWriteback(to_page));
- /* actual page mapping exchange */
- rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping,
- to_page, from_page, mode, 0, 0);
+ /* to_page is anonymous */
+ if (!to_page_mapping) {
+exchange_mappings:
+ /* actual page mapping exchange */
+ rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping,
+ to_page, from_page, NULL, NULL, mode, 0, 0);
+ } else {
+ if (to_page_mapping->a_ops->migratepage == buffer_migrate_page) {
+ if (!page_has_buffers(to_page))
+ goto exchange_mappings;
+
+ to_head = page_buffers(to_page);
+
+ rc = exchange_page_move_mapping(to_page_mapping,
+ from_page_mapping, to_page, from_page,
+ to_head, NULL, mode, 0, 0);
+
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ /*
+ * In the async case, migrate_page_move_mapping locked the buffers
+ * with an IRQ-safe spinlock held. In the sync case, the buffers
+ * need to be locked now
+ */
+ if ((mode & MIGRATE_MODE_MASK) != MIGRATE_ASYNC)
+ BUG_ON(!buffer_migrate_lock_buffers(to_head, mode));
+
+ ClearPagePrivate(to_page);
+ set_page_private(from_page, page_private(to_page));
+ set_page_private(to_page, 0);
+ /* transfer private page count */
+ put_page(to_page);
+ get_page(from_page);
+
+ to_bh = to_head;
+ do {
+ set_bh_page(to_bh, from_page, bh_offset(to_bh));
+ to_bh = to_bh->b_this_page;
+
+ } while (to_bh != to_head);
+
+ SetPagePrivate(from_page);
+
+ to_bh = to_head;
+ } else if (!to_page_mapping->a_ops->migratepage) {
+ /* fallback_migrate_page */
+ if (PageDirty(to_page)) {
+ if ((mode & MIGRATE_MODE_MASK) != MIGRATE_SYNC)
+ return -EBUSY;
+ return writeout(to_page_mapping, to_page);
+ }
+ if (page_has_private(to_page) &&
+ !try_to_release_page(to_page, GFP_KERNEL))
+ return -EAGAIN;
+
+ goto exchange_mappings;
+ }
+ }
/* actual page data exchange */
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
@@ -356,8 +514,28 @@ static int exchange_from_to_pages(struct page *to_page, struct page *from_page,
rc = 0;
}
+ /*
+ * 1. buffer_migrate_page:
+ * private flag should be transferred from to_page to from_page
+ *
+ * 2. anon<->anon, fallback_migrate_page:
+ * both have none private flags or to_page's is cleared.
+ * */
+ VM_BUG_ON(!((page_has_private(from_page) && !page_has_private(to_page)) ||
+ (!page_has_private(from_page) && !page_has_private(to_page))));
+
exchange_page_flags(to_page, from_page);
+ if (to_bh) {
+ VM_BUG_ON(to_bh != to_head);
+ do {
+ unlock_buffer(to_bh);
+ put_bh(to_bh);
+ to_bh = to_bh->b_this_page;
+
+ } while (to_bh != to_head);
+ }
+
return rc;
}
@@ -369,34 +547,12 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page,
pgoff_t from_index, to_index;
struct anon_vma *from_anon_vma = NULL, *to_anon_vma = NULL;
- /* from_page lock down */
if (!trylock_page(from_page)) {
if ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC)
goto out;
-
lock_page(from_page);
}
- BUG_ON(PageWriteback(from_page));
-
- /*
- * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
- * we cannot notice that anon_vma is freed while we migrates a page.
- * This get_anon_vma() delays freeing anon_vma pointer until the end
- * of migration. File cache pages are no problem because of page_lock()
- * File Caches may use write_page() or lock_page() in migration, then,
- * just care Anon page here.
- *
- * Only page_get_anon_vma() understands the subtleties of
- * getting a hold on an anon_vma from outside one of its mms.
- * But if we cannot get anon_vma, then we won't need it anyway,
- * because that implies that the anon page is no longer mapped
- * (and cannot be remapped so long as we hold the page lock).
- */
- if (PageAnon(from_page) && !PageKsm(from_page))
- from_anon_vma = page_get_anon_vma(from_page);
-
- /* to_page lock down */
if (!trylock_page(to_page)) {
if ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC)
goto out_unlock;
@@ -404,7 +560,22 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page,
lock_page(to_page);
}
- BUG_ON(PageWriteback(to_page));
+ /* from_page is supposed to be an anonymous page */
+ VM_BUG_ON_PAGE(PageWriteback(from_page), from_page);
+
+ if (PageWriteback(to_page)) {
+ /*
+ * Only in the case of a full synchronous migration is it
+ * necessary to wait for PageWriteback. In the async case,
+ * the retry loop is too short and in the sync-light case,
+ * the overhead of stalling is too much
+ */
+ if ((mode & MIGRATE_MODE_MASK) != MIGRATE_SYNC) {
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+ wait_on_page_writeback(to_page);
+ }
/*
* By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
@@ -420,6 +591,9 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page,
* because that implies that the anon page is no longer mapped
* (and cannot be remapped so long as we hold the page lock).
*/
+ if (PageAnon(from_page) && !PageKsm(from_page))
+ from_anon_vma = page_get_anon_vma(from_page);
+
if (PageAnon(to_page) && !PageKsm(to_page))
to_anon_vma = page_get_anon_vma(to_page);
@@ -753,7 +927,7 @@ static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr,
/* actual page mapping exchange */
rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping,
- to_page, from_page, mode, 0, 0);
+ to_page, from_page, NULL, NULL, mode, 0, 0);
if (rc) {
if (one_pair->from_page_was_mapped)
diff --git a/mm/internal.h b/mm/internal.h
index a039459..cf63bf6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -566,4 +566,13 @@ extern int exchange_page_mthread(struct page *to, struct page *from,
extern int exchange_page_lists_mthread(struct page **to,
struct page **from,
int nr_pages);
+
+extern int exchange_two_pages(struct page *page1, struct page *page2);
+
+bool buffer_migrate_lock_buffers(struct buffer_head *head,
+ enum migrate_mode mode);
+int writeout(struct address_space *mapping, struct page *page);
+int expected_page_refs(struct address_space *mapping, struct page *page);
+
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index ad02797..a0ca817 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -385,7 +385,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
}
#endif
-static int expected_page_refs(struct address_space *mapping, struct page *page)
+int expected_page_refs(struct address_space *mapping, struct page *page)
{
int expected_count = 1;
@@ -732,7 +732,7 @@ EXPORT_SYMBOL(migrate_page);
#ifdef CONFIG_BLOCK
/* Returns true if all buffers are successfully locked */
-static bool buffer_migrate_lock_buffers(struct buffer_head *head,
+bool buffer_migrate_lock_buffers(struct buffer_head *head,
enum migrate_mode mode)
{
struct buffer_head *bh = head;
@@ -880,7 +880,7 @@ int buffer_migrate_page_norefs(struct address_space *mapping,
/*
* Writeback a page to clean the dirty state
*/
-static int writeout(struct address_space *mapping, struct page *page)
+int writeout(struct address_space *mapping, struct page *page)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
--
2.7.4
next prev parent reply other threads:[~2019-04-04 2:10 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-04 2:00 [RFC PATCH 00/25] Accelerate page migration and use memcg for PMEM management Zi Yan
2019-04-04 2:00 ` [RFC PATCH 01/25] mm: migrate: Change migrate_mode to support combination migration modes Zi Yan
2019-04-04 2:00 ` [RFC PATCH 02/25] mm: migrate: Add mode parameter to support future page copy routines Zi Yan
2019-04-04 2:00 ` [RFC PATCH 03/25] mm: migrate: Add a multi-threaded page migration function Zi Yan
2019-04-04 2:00 ` [RFC PATCH 04/25] mm: migrate: Add copy_page_multithread into migrate_pages Zi Yan
2019-04-04 2:00 ` [RFC PATCH 05/25] mm: migrate: Add vm.accel_page_copy in sysfs to control page copy acceleration Zi Yan
2019-04-04 2:00 ` [RFC PATCH 06/25] mm: migrate: Make the number of copy threads adjustable via sysctl Zi Yan
2019-04-04 2:00 ` [RFC PATCH 07/25] mm: migrate: Add copy_page_dma to use DMA Engine to copy pages Zi Yan
2019-04-04 2:00 ` [RFC PATCH 08/25] mm: migrate: Add copy_page_dma into migrate_page_copy Zi Yan
2019-04-04 2:00 ` [RFC PATCH 09/25] mm: migrate: Add copy_page_lists_dma_always to support copy a list of pages Zi Yan
2019-04-04 2:00 ` [RFC PATCH 10/25] mm: migrate: copy_page_lists_mt() to copy a page list using multi-threads Zi Yan
2019-04-04 2:00 ` [RFC PATCH 11/25] mm: migrate: Add concurrent page migration into move_pages syscall Zi Yan
2019-04-04 2:00 ` [RFC PATCH 12/25] exchange pages: new page migration mechanism: exchange_pages() Zi Yan
2019-04-04 2:00 ` [RFC PATCH 13/25] exchange pages: add multi-threaded exchange pages Zi Yan
2019-04-04 2:00 ` [RFC PATCH 14/25] exchange pages: concurrent " Zi Yan
2019-04-04 2:00 ` Zi Yan [this message]
2019-04-04 2:00 ` [RFC PATCH 16/25] exchange page: Add THP exchange support Zi Yan
2019-04-04 2:00 ` [RFC PATCH 17/25] exchange page: Add exchange_page() syscall Zi Yan
2019-04-04 2:00 ` [RFC PATCH 18/25] memcg: Add per node memory usage&max stats in memcg Zi Yan
2019-04-04 2:00 ` [RFC PATCH 19/25] mempolicy: add MPOL_F_MEMCG flag, enforcing memcg memory limit Zi Yan
2019-04-04 2:00 ` [RFC PATCH 20/25] memory manage: Add memory manage syscall Zi Yan
2019-04-04 2:00 ` [RFC PATCH 21/25] mm: move update_lru_sizes() to mm_inline.h for broader use Zi Yan
2019-04-04 2:00 ` [RFC PATCH 22/25] memory manage: active/inactive page list manipulation in memcg Zi Yan
2019-04-04 2:00 ` [RFC PATCH 23/25] memory manage: page migration based page manipulation between NUMA nodes Zi Yan
2019-04-04 2:00 ` [RFC PATCH 24/25] memory manage: limit migration batch size Zi Yan
2019-04-04 2:00 ` [RFC PATCH 25/25] memory manage: use exchange pages to memory manage to improve throughput Zi Yan
2019-04-04 7:13 ` [RFC PATCH 00/25] Accelerate page migration and use memcg for PMEM management Michal Hocko
2019-04-05 0:32 ` Yang Shi
2019-04-05 17:20 ` Zi Yan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190404020046.32741-16-zi.yan@sent.com \
--to=zi.yan@sent.com \
--cc=akpm@linux-foundation.org \
--cc=daniel.m.jordan@oracle.com \
--cc=dave.hansen@linux.intel.com \
--cc=dnellans@nvidia.com \
--cc=fengguang.wu@intel.com \
--cc=jcabezas@nvidia.com \
--cc=jhubbard@nvidia.com \
--cc=keith.busch@intel.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mhairgrove@nvidia.com \
--cc=mhocko@kernel.org \
--cc=nigupta@nvidia.com \
--cc=vbabka@suse.cz \
--cc=yang.shi@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).