Linux-mm Archive on lore.kernel.org
 help / color / Atom feed
From: Dave Hansen <dave.hansen@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org,Dave Hansen
	<dave.hansen@linux.intel.com>,kbusch@kernel.org,yang.shi@linux.alibaba.com,rientjes@google.com,ying.huang@intel.com,dan.j.williams@intel.com
Subject: [RFC][PATCH 2/8] mm/migrate: Defer allocating new page until needed
Date: Mon, 29 Jun 2020 16:45:07 -0700
Message-ID: <20200629234507.CA0FDE19@viggo.jf.intel.com> (raw)
In-Reply-To: <20200629234503.749E5340@viggo.jf.intel.com>


From: Keith Busch <kbusch@kernel.org>

Migrating pages had been allocating the new page before it was actually
needed. Subsequent operations may still fail, which would have to handle
cleaning up the newly allocated page when it was never used.

Defer allocating the page until we are actually ready to make use of
it, after locking the original page. This simplifies error handling,
but should not have any functional change in behavior. This is just
refactoring page migration so the main part can more easily be reused
by other code.

#Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
---

 b/mm/migrate.c |  148 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 75 insertions(+), 73 deletions(-)

diff -puN mm/migrate.c~0007-mm-migrate-Defer-allocating-new-page-until-needed mm/migrate.c
--- a/mm/migrate.c~0007-mm-migrate-Defer-allocating-new-page-until-needed	2020-06-29 16:34:37.896312607 -0700
+++ b/mm/migrate.c	2020-06-29 16:34:37.900312607 -0700
@@ -1014,56 +1014,17 @@ out:
 	return rc;
 }
 
-static int __unmap_and_move(struct page *page, struct page *newpage,
-				int force, enum migrate_mode mode)
+static int __unmap_and_move(new_page_t get_new_page,
+			    free_page_t put_new_page,
+			    unsigned long private, struct page *page,
+			    enum migrate_mode mode,
+			    enum migrate_reason reason)
 {
 	int rc = -EAGAIN;
 	int page_was_mapped = 0;
 	struct anon_vma *anon_vma = NULL;
 	bool is_lru = !__PageMovable(page);
-
-	if (!trylock_page(page)) {
-		if (!force || mode == MIGRATE_ASYNC)
-			goto out;
-
-		/*
-		 * It's not safe for direct compaction to call lock_page.
-		 * For example, during page readahead pages are added locked
-		 * to the LRU. Later, when the IO completes the pages are
-		 * marked uptodate and unlocked. However, the queueing
-		 * could be merging multiple pages for one bio (e.g.
-		 * mpage_readpages). If an allocation happens for the
-		 * second or third page, the process can end up locking
-		 * the same page twice and deadlocking. Rather than
-		 * trying to be clever about what pages can be locked,
-		 * avoid the use of lock_page for direct compaction
-		 * altogether.
-		 */
-		if (current->flags & PF_MEMALLOC)
-			goto out;
-
-		lock_page(page);
-	}
-
-	if (PageWriteback(page)) {
-		/*
-		 * Only in the case of a full synchronous migration is it
-		 * necessary to wait for PageWriteback. In the async case,
-		 * the retry loop is too short and in the sync-light case,
-		 * the overhead of stalling is too much
-		 */
-		switch (mode) {
-		case MIGRATE_SYNC:
-		case MIGRATE_SYNC_NO_COPY:
-			break;
-		default:
-			rc = -EBUSY;
-			goto out_unlock;
-		}
-		if (!force)
-			goto out_unlock;
-		wait_on_page_writeback(page);
-	}
+	struct page *newpage;
 
 	/*
 	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
@@ -1082,6 +1043,12 @@ static int __unmap_and_move(struct page
 	if (PageAnon(page) && !PageKsm(page))
 		anon_vma = page_get_anon_vma(page);
 
+	newpage = get_new_page(page, private);
+	if (!newpage) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
 	/*
 	 * Block others from accessing the new page when we get around to
 	 * establishing additional references. We are usually the only one
@@ -1091,11 +1058,11 @@ static int __unmap_and_move(struct page
 	 * This is much like races on refcount of oldpage: just don't BUG().
 	 */
 	if (unlikely(!trylock_page(newpage)))
-		goto out_unlock;
+		goto out_put;
 
 	if (unlikely(!is_lru)) {
 		rc = move_to_new_page(newpage, page, mode);
-		goto out_unlock_both;
+		goto out_unlock;
 	}
 
 	/*
@@ -1114,7 +1081,7 @@ static int __unmap_and_move(struct page
 		VM_BUG_ON_PAGE(PageAnon(page), page);
 		if (page_has_private(page)) {
 			try_to_free_buffers(page);
-			goto out_unlock_both;
+			goto out_unlock;
 		}
 	} else if (page_mapped(page)) {
 		/* Establish migration ptes */
@@ -1131,15 +1098,9 @@ static int __unmap_and_move(struct page
 	if (page_was_mapped)
 		remove_migration_ptes(page,
 			rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
-
-out_unlock_both:
-	unlock_page(newpage);
 out_unlock:
-	/* Drop an anon_vma reference if we took one */
-	if (anon_vma)
-		put_anon_vma(anon_vma);
-	unlock_page(page);
-out:
+	unlock_page(newpage);
+out_put:
 	/*
 	 * If migration is successful, decrease refcount of the newpage
 	 * which will not free the page because new page owner increased
@@ -1150,12 +1111,20 @@ out:
 	 * state.
 	 */
 	if (rc == MIGRATEPAGE_SUCCESS) {
+		set_page_owner_migrate_reason(newpage, reason);
 		if (unlikely(!is_lru))
 			put_page(newpage);
 		else
 			putback_lru_page(newpage);
+	} else if (put_new_page) {
+		put_new_page(newpage, private);
+	} else {
+		put_page(newpage);
 	}
-
+out:
+	/* Drop an anon_vma reference if we took one */
+	if (anon_vma)
+		put_anon_vma(anon_vma);
 	return rc;
 }
 
@@ -1203,8 +1172,7 @@ static ICE_noinline int unmap_and_move(n
 				   int force, enum migrate_mode mode,
 				   enum migrate_reason reason)
 {
-	int rc = MIGRATEPAGE_SUCCESS;
-	struct page *newpage = NULL;
+	int rc = -EAGAIN;
 
 	if (!thp_migration_supported() && PageTransHuge(page))
 		return -ENOMEM;
@@ -1219,17 +1187,57 @@ static ICE_noinline int unmap_and_move(n
 				__ClearPageIsolated(page);
 			unlock_page(page);
 		}
+		rc = MIGRATEPAGE_SUCCESS;
 		goto out;
 	}
 
-	newpage = get_new_page(page, private);
-	if (!newpage)
-		return -ENOMEM;
+	if (!trylock_page(page)) {
+		if (!force || mode == MIGRATE_ASYNC)
+			return rc;
 
-	rc = __unmap_and_move(page, newpage, force, mode);
-	if (rc == MIGRATEPAGE_SUCCESS)
-		set_page_owner_migrate_reason(newpage, reason);
+		/*
+		 * It's not safe for direct compaction to call lock_page.
+		 * For example, during page readahead pages are added locked
+		 * to the LRU. Later, when the IO completes the pages are
+		 * marked uptodate and unlocked. However, the queueing
+		 * could be merging multiple pages for one bio (e.g.
+		 * mpage_readpages). If an allocation happens for the
+		 * second or third page, the process can end up locking
+		 * the same page twice and deadlocking. Rather than
+		 * trying to be clever about what pages can be locked,
+		 * avoid the use of lock_page for direct compaction
+		 * altogether.
+		 */
+		if (current->flags & PF_MEMALLOC)
+			return rc;
+
+		lock_page(page);
+	}
+
+	if (PageWriteback(page)) {
+		/*
+		 * Only in the case of a full synchronous migration is it
+		 * necessary to wait for PageWriteback. In the async case,
+		 * the retry loop is too short and in the sync-light case,
+		 * the overhead of stalling is too much
+		 */
+		switch (mode) {
+		case MIGRATE_SYNC:
+		case MIGRATE_SYNC_NO_COPY:
+			break;
+		default:
+			rc = -EBUSY;
+			goto out_unlock;
+		}
+		if (!force)
+			goto out_unlock;
+		wait_on_page_writeback(page);
+	}
+	rc = __unmap_and_move(get_new_page, put_new_page, private,
+			      page, mode, reason);
 
+out_unlock:
+	unlock_page(page);
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -1269,9 +1277,8 @@ out:
 		if (rc != -EAGAIN) {
 			if (likely(!__PageMovable(page))) {
 				putback_lru_page(page);
-				goto put_new;
+				goto done;
 			}
-
 			lock_page(page);
 			if (PageMovable(page))
 				putback_movable_page(page);
@@ -1280,13 +1287,8 @@ out:
 			unlock_page(page);
 			put_page(page);
 		}
-put_new:
-		if (put_new_page)
-			put_new_page(newpage, private);
-		else
-			put_page(newpage);
 	}
-
+done:
 	return rc;
 }
 
_


  parent reply index

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-29 23:45 [RFC][PATCH 0/8] Migrate Pages in lieu of discard Dave Hansen
2020-06-29 23:45 ` [RFC][PATCH 1/8] mm/numa: node demotion data structure and lookup Dave Hansen
2020-06-29 23:45 ` Dave Hansen [this message]
2020-07-01  8:47   ` [RFC][PATCH 2/8] mm/migrate: Defer allocating new page until needed Greg Thelen
2020-07-01 14:46     ` Dave Hansen
2020-07-01 18:32       ` Yang Shi
2020-06-29 23:45 ` [RFC][PATCH 3/8] mm/vmscan: Attempt to migrate page in lieu of discard Dave Hansen
2020-07-01  0:47   ` David Rientjes
2020-07-01  1:29     ` Yang Shi
2020-07-01  5:41       ` David Rientjes
2020-07-01  8:54         ` Huang, Ying
2020-07-01 18:20           ` Dave Hansen
2020-07-01 19:50             ` David Rientjes
2020-07-02  1:50               ` Huang, Ying
2020-07-01 15:15         ` Dave Hansen
2020-07-01 17:21         ` Yang Shi
2020-07-01 19:45           ` David Rientjes
2020-07-02 10:02             ` Jonathan Cameron
2020-07-01  1:40     ` Huang, Ying
2020-07-01 16:48     ` Dave Hansen
2020-07-01 19:25       ` David Rientjes
2020-07-02  5:02         ` Huang, Ying
2020-06-29 23:45 ` [RFC][PATCH 4/8] mm/vmscan: add page demotion counter Dave Hansen
2020-06-29 23:45 ` [RFC][PATCH 5/8] mm/numa: automatically generate node migration order Dave Hansen
2020-06-30  8:22   ` Huang, Ying
2020-07-01 18:23     ` Dave Hansen
2020-07-02  1:20       ` Huang, Ying
2020-06-29 23:45 ` [RFC][PATCH 6/8] mm/vmscan: Consider anonymous pages without swap Dave Hansen
2020-06-29 23:45 ` [RFC][PATCH 7/8] mm/vmscan: never demote for memcg reclaim Dave Hansen
2020-06-29 23:45 ` [RFC][PATCH 8/8] mm/numa: new reclaim mode to enable reclaim-based migration Dave Hansen
2020-06-30  7:23   ` Huang, Ying
2020-06-30 17:50     ` Yang Shi
2020-07-01  0:48       ` Huang, Ying
2020-07-01  1:12         ` Yang Shi
2020-07-01  1:28           ` Huang, Ying
2020-07-01 16:02       ` Dave Hansen
2020-07-03  9:30   ` Huang, Ying
2020-06-30 18:36 ` [RFC][PATCH 0/8] Migrate Pages in lieu of discard Shakeel Butt
2020-06-30 18:51   ` Dave Hansen
2020-06-30 19:25     ` Shakeel Butt
2020-06-30 19:31       ` Dave Hansen
2020-07-01 14:24         ` [RFC] [PATCH " Zi Yan
2020-07-01 14:32           ` Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200629234507.CA0FDE19@viggo.jf.intel.com \
    --to=dave.hansen@linux.intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rientjes@google.com \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org
	public-inbox-index linux-mm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git