linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Huang Ying <ying.huang@intel.com>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Huang Ying <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
	Yang Shi <shy828301@gmail.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	Oscar Salvador <osalvador@suse.de>,
	Matthew Wilcox <willy@infradead.org>
Subject: [RFC 4/6] mm/migrate_pages: batch _unmap and _move
Date: Wed, 21 Sep 2022 14:06:14 +0800	[thread overview]
Message-ID: <20220921060616.73086-5-ying.huang@intel.com> (raw)
In-Reply-To: <20220921060616.73086-1-ying.huang@intel.com>

In this patch the _unmap and _move stage of the page migration is
batched.  That for, previously, it is,

  for each page
    _unmap()
    _move()

Now, it is,

  for each page
    _unmap()
  for each page
    _move()

Based on this, we can batch the TLB flushing and use some hardware
accelerator to copy pages between batched _unmap and batched _move
stages.

Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
---
 mm/migrate.c | 155 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 139 insertions(+), 16 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 1077af858e36..165cbbc834e2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -996,6 +996,32 @@ static void __migrate_page_extract(struct page *newpage,
 
 #define MIGRATEPAGE_UNMAP		1
 
+static void migrate_page_undo_page(struct page *page,
+				   int page_was_mapped,
+				   struct anon_vma *anon_vma,
+				   struct list_head *ret)
+{
+	struct folio *folio = page_folio(page);
+
+	if (page_was_mapped)
+		remove_migration_ptes(folio, folio, false);
+	if (anon_vma)
+		put_anon_vma(anon_vma);
+	unlock_page(page);
+	list_move_tail(&page->lru, ret);
+}
+
+static void migrate_page_undo_newpage(struct page *newpage,
+				      free_page_t put_new_page,
+				      unsigned long private)
+{
+	unlock_page(newpage);
+	if (put_new_page)
+		put_new_page(newpage, private);
+	else
+		put_page(newpage);
+}
+
 static int __migrate_page_unmap(struct page *page, struct page *newpage,
 				int force, enum migrate_mode mode)
 {
@@ -1140,6 +1166,8 @@ static int __migrate_page_move(struct page *page, struct page *newpage,
 
 	rc = move_to_new_folio(dst, folio, mode);
 
+	if (rc != -EAGAIN)
+		list_del(&newpage->lru);
 	/*
 	 * When successful, push newpage to LRU immediately: so that if it
 	 * turns out to be an mlocked page, remove_migration_ptes() will
@@ -1155,6 +1183,11 @@ static int __migrate_page_move(struct page *page, struct page *newpage,
 			lru_add_drain();
 	}
 
+	if (rc == -EAGAIN) {
+		__migrate_page_record(newpage, page_was_mapped, anon_vma);
+		return rc;
+	}
+
 	if (page_was_mapped)
 		remove_migration_ptes(folio,
 			rc == MIGRATEPAGE_SUCCESS ? dst : folio, false);
@@ -1220,6 +1253,7 @@ static int migrate_page_unmap(new_page_t get_new_page, free_page_t put_new_page,
 		return -ENOMEM;
 	*newpagep = newpage;
 
+	newpage->private = 0;
 	rc = __migrate_page_unmap(page, newpage, force, mode);
 	if (rc == MIGRATEPAGE_UNMAP)
 		return rc;
@@ -1258,7 +1292,7 @@ static int migrate_page_move(free_page_t put_new_page, unsigned long private,
 		 * removed and will be freed. A page that has not been
 		 * migrated will have kept its references and be restored.
 		 */
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 	}
 
 	/*
@@ -1268,9 +1302,8 @@ static int migrate_page_move(free_page_t put_new_page, unsigned long private,
 	 */
 	if (rc == MIGRATEPAGE_SUCCESS) {
 		migrate_page_done(page, reason);
-	} else {
-		if (rc != -EAGAIN)
-			list_add_tail(&page->lru, ret);
+	} else if (rc != -EAGAIN) {
+		list_add_tail(&page->lru, ret);
 
 		if (put_new_page)
 			put_new_page(newpage, private);
@@ -1455,11 +1488,13 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	int pass = 0;
 	bool is_thp = false;
 	struct page *page;
-	struct page *newpage = NULL;
+	struct page *newpage = NULL, *newpage2;
 	struct page *page2;
 	int rc, nr_subpages;
 	LIST_HEAD(ret_pages);
 	LIST_HEAD(thp_split_pages);
+	LIST_HEAD(unmap_pages);
+	LIST_HEAD(new_pages);
 	bool nosplit = (reason == MR_NUMA_MISPLACED);
 	bool no_subpage_counting = false;
 
@@ -1541,19 +1576,19 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 			nr_subpages = compound_nr(page);
 			cond_resched();
 
-			if (PageHuge(page))
+			if (PageHuge(page)) {
+				list_move_tail(&page->lru, &ret_pages);
 				continue;
+			}
 
 			rc = migrate_page_unmap(get_new_page, put_new_page, private,
 						page, &newpage, pass > 2, mode,
 						reason, &ret_pages);
-			if (rc == MIGRATEPAGE_UNMAP)
-				rc = migrate_page_move(put_new_page, private,
-						       page, newpage, mode,
-						       reason, &ret_pages);
 			/*
 			 * The rules are:
 			 *	Success: page will be freed
+			 *	Unmap: page will be put on unmap_pages list,
+			 *	       new page put on new_pages list
 			 *	-EAGAIN: stay on the from list
 			 *	-ENOMEM: stay on the from list
 			 *	-ENOSYS: stay on the from list
@@ -1589,7 +1624,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 			case -ENOMEM:
 				/*
 				 * When memory is low, don't bother to try to migrate
-				 * other pages, just exit.
+				 * other pages, move unmapped pages, then exit.
 				 */
 				if (is_thp) {
 					nr_thp_failed++;
@@ -1610,9 +1645,11 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				 * the caller otherwise the page refcnt will be leaked.
 				 */
 				list_splice_init(&thp_split_pages, from);
-				/* nr_failed isn't updated for not used */
 				nr_thp_failed += thp_retry;
-				goto out;
+				if (list_empty(&unmap_pages))
+					goto out;
+				else
+					goto move;
 			case -EAGAIN:
 				if (is_thp)
 					thp_retry++;
@@ -1625,6 +1662,10 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				if (is_thp)
 					nr_thp_succeeded++;
 				break;
+			case MIGRATEPAGE_UNMAP:
+				list_move_tail(&page->lru, &unmap_pages);
+				list_add_tail(&newpage->lru, &new_pages);
+				break;
 			default:
 				/*
 				 * Permanent failure (-EBUSY, etc.):
@@ -1645,12 +1686,96 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	nr_failed += retry;
 	nr_thp_failed += thp_retry;
 	nr_failed_pages += nr_retry_pages;
+move:
+	retry = 1;
+	thp_retry = 1;
+	for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
+		retry = 0;
+		thp_retry = 0;
+		nr_retry_pages = 0;
+
+		newpage = list_first_entry(&new_pages, struct page, lru);
+		newpage2 = list_next_entry(newpage, lru);
+		list_for_each_entry_safe(page, page2, &unmap_pages, lru) {
+			/*
+			 * THP statistics is based on the source huge page.
+			 * Capture required information that might get lost
+			 * during migration.
+			 */
+			is_thp = PageTransHuge(page) && !PageHuge(page);
+			nr_subpages = compound_nr(page);
+			cond_resched();
+
+			rc = migrate_page_move(put_new_page, private,
+					       page, newpage, mode,
+					       reason, &ret_pages);
+			/*
+			 * The rules are:
+			 *	Success: page will be freed
+			 *	-EAGAIN: stay on the unmap_pages list
+			 *	Other errno: put on ret_pages list then splice to
+			 *		     from list
+			 */
+			switch(rc) {
+			case -EAGAIN:
+				if (is_thp)
+					thp_retry++;
+				else if (!no_subpage_counting)
+					retry++;
+				nr_retry_pages += nr_subpages;
+				break;
+			case MIGRATEPAGE_SUCCESS:
+				nr_succeeded += nr_subpages;
+				if (is_thp)
+					nr_thp_succeeded++;
+				break;
+			default:
+				/*
+				 * Permanent failure (-EBUSY, etc.):
+				 * unlike -EAGAIN case, the failed page is
+				 * removed from migration page list and not
+				 * retried in the next outer loop.
+				 */
+				if (is_thp)
+					nr_thp_failed++;
+				else if (!no_subpage_counting)
+					nr_failed++;
+
+				nr_failed_pages += nr_subpages;
+				break;
+			}
+			newpage = newpage2;
+			newpage2 = list_next_entry(newpage, lru);
+		}
+	}
+	nr_failed += retry;
+	nr_thp_failed += thp_retry;
+	nr_failed_pages += nr_retry_pages;
+
+	rc = nr_failed + nr_thp_failed;
+out:
+	/* Cleanup remaining pages */
+	newpage = list_first_entry(&new_pages, struct page, lru);
+	newpage2 = list_next_entry(newpage, lru);
+	list_for_each_entry_safe(page, page2, &unmap_pages, lru) {
+		int page_was_mapped = 0;
+		struct anon_vma *anon_vma = NULL;
+
+		__migrate_page_extract(newpage, &page_was_mapped, &anon_vma);
+		migrate_page_undo_page(page, page_was_mapped, anon_vma,
+				       &ret_pages);
+		list_del(&newpage->lru);
+		migrate_page_undo_newpage(newpage, put_new_page, private);
+		newpage = newpage2;
+		newpage2 = list_next_entry(newpage, lru);
+	}
+
 	/*
 	 * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
 	 * counting in this round, since all subpages of a THP is counted
 	 * as 1 failure in the first round.
 	 */
-	if (!list_empty(&thp_split_pages)) {
+	if (rc >= 0 && !list_empty(&thp_split_pages)) {
 		/*
 		 * Move non-migrated pages (after 10 retries) to ret_pages
 		 * to avoid migrating them again.
@@ -1662,8 +1787,6 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 		goto thp_subpage_migration;
 	}
 
-	rc = nr_failed + nr_thp_failed;
-out:
 	/*
 	 * Put the permanent failure page back to migration list, they
 	 * will be put back to the right list by the caller.
-- 
2.35.1


  parent reply	other threads:[~2022-09-21  6:07 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-21  6:06 [RFC 0/6] migrate_pages(): batch TLB flushing Huang Ying
2022-09-21  6:06 ` [RFC 1/6] mm/migrate_pages: separate huge page and normal pages migration Huang Ying
2022-09-21 15:55   ` Zi Yan
2022-09-22  1:14     ` Huang, Ying
2022-09-22  6:03   ` Baolin Wang
2022-09-22  6:22     ` Huang, Ying
2022-09-21  6:06 ` [RFC 2/6] mm/migrate_pages: split unmap_and_move() to _unmap() and _move() Huang Ying
2022-09-21 16:08   ` Zi Yan
2022-09-22  1:15     ` Huang, Ying
2022-09-22  6:36   ` Baolin Wang
2022-09-26  9:28   ` Alistair Popple
2022-09-26 18:06     ` Yang Shi
2022-09-27  0:02       ` Alistair Popple
2022-09-27  1:51         ` Huang, Ying
2022-09-27 20:34           ` John Hubbard
2022-09-27 20:57             ` Yang Shi
2022-09-28  0:59               ` Alistair Popple
2022-09-28  1:41                 ` Huang, Ying
2022-09-28  1:44                   ` John Hubbard
2022-09-28  1:49                     ` Yang Shi
2022-09-28  1:56                       ` John Hubbard
2022-09-28  2:14                         ` Yang Shi
2022-09-28  2:57                           ` John Hubbard
2022-09-28  3:25                             ` Yang Shi
2022-09-28  3:39                               ` Yang Shi
2022-09-27 20:56           ` Yang Shi
2022-09-27 20:54         ` Yang Shi
2022-09-21  6:06 ` [RFC 3/6] mm/migrate_pages: restrict number of pages to migrate in batch Huang Ying
2022-09-21 16:10   ` Zi Yan
2022-09-21 16:15     ` Zi Yan
2022-09-22  1:15     ` Huang, Ying
2022-09-21  6:06 ` Huang Ying [this message]
2022-09-21  6:06 ` [RFC 5/6] mm/migrate_pages: share more code between _unmap and _move Huang Ying
2022-09-21  6:06 ` [RFC 6/6] mm/migrate_pages: batch flushing TLB Huang Ying
2022-09-21 15:47 ` [RFC 0/6] migrate_pages(): batch TLB flushing Zi Yan
2022-09-22  1:45   ` Huang, Ying
2022-09-22  3:47   ` haoxin
2022-09-22  4:36     ` Huang, Ying
2022-09-22 12:50 ` Bharata B Rao
2022-09-23  7:52   ` Huang, Ying
2022-09-27 10:46     ` Bharata B Rao
2022-09-28  1:46       ` Huang, Ying
2022-09-26  9:11 ` Alistair Popple
2022-09-27 11:21 ` haoxin
2022-09-28  2:01   ` Huang, Ying
2022-09-28  3:33     ` haoxin
2022-09-28  4:53       ` Huang, Ying
2022-11-01 14:49   ` Hesham Almatary
2022-11-02  3:14     ` Huang, Ying
2022-11-02 14:13       ` Hesham Almatary

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220921060616.73086-5-ying.huang@intel.com \
    --to=ying.huang@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=osalvador@suse.de \
    --cc=shy828301@gmail.com \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).