All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>,
	Mel Gorman <mgorman@techsingularity.net>, Zi Yan <ziy@nvidia.com>,
	"Huang, Ying" <ying.huang@intel.com>,
	David Hildenbrand <david@redhat.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH 07/10] mm: page_alloc: close migratetype race between freeing and stealing
Date: Wed, 20 Mar 2024 14:02:12 -0400	[thread overview]
Message-ID: <20240320180429.678181-8-hannes@cmpxchg.org> (raw)
In-Reply-To: <20240320180429.678181-1-hannes@cmpxchg.org>

There are three freeing paths that read the page's migratetype
optimistically before grabbing the zone lock. When this races with
block stealing, those pages go on the wrong freelist.

The paths in question are:
- when freeing >costly orders that aren't THP
- when freeing pages to the buddy upon pcp lock contention
- when freeing pages that are isolated
- when freeing pages initially during boot
- when freeing the remainder in alloc_pages_exact()
- when "accepting" unaccepted VM host memory before first use
- when freeing pages during unpoisoning

None of these are so hot that they would need this optimization at the
cost of hampering defrag efforts. Especially when contrasted with the
fact that the most common buddy freeing path - free_pcppages_bulk - is
checking the migratetype under the zone->lock just fine.

In addition, isolated pages need to look up the migratetype under the
lock anyway, which adds branches to the locked section, and results in
a double lookup when the pages are in fact isolated.

Move the lookups into the lock.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 mm/page_alloc.c | 52 ++++++++++++++++++-------------------------------
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e7d0d4711bdd..3f65b565eaad 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1227,18 +1227,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
-static void free_one_page(struct zone *zone,
-				struct page *page, unsigned long pfn,
-				unsigned int order,
-				int migratetype, fpi_t fpi_flags)
+static void free_one_page(struct zone *zone, struct page *page,
+			  unsigned long pfn, unsigned int order,
+			  fpi_t fpi_flags)
 {
 	unsigned long flags;
+	int migratetype;
 
 	spin_lock_irqsave(&zone->lock, flags);
-	if (unlikely(has_isolate_pageblock(zone) ||
-		is_migrate_isolate(migratetype))) {
-		migratetype = get_pfnblock_migratetype(page, pfn);
-	}
+	migratetype = get_pfnblock_migratetype(page, pfn);
 	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
@@ -1246,21 +1243,13 @@ static void free_one_page(struct zone *zone,
 static void __free_pages_ok(struct page *page, unsigned int order,
 			    fpi_t fpi_flags)
 {
-	int migratetype;
 	unsigned long pfn = page_to_pfn(page);
 	struct zone *zone = page_zone(page);
 
 	if (!free_pages_prepare(page, order))
 		return;
 
-	/*
-	 * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
-	 * is used to avoid calling get_pfnblock_migratetype() under the lock.
-	 * This will reduce the lock holding time.
-	 */
-	migratetype = get_pfnblock_migratetype(page, pfn);
-
-	free_one_page(zone, page, pfn, order, migratetype, fpi_flags);
+	free_one_page(zone, page, pfn, order, fpi_flags);
 
 	__count_vm_events(PGFREE, 1 << order);
 }
@@ -2533,7 +2522,7 @@ void free_unref_page(struct page *page, unsigned int order)
 	struct per_cpu_pages *pcp;
 	struct zone *zone;
 	unsigned long pfn = page_to_pfn(page);
-	int migratetype, pcpmigratetype;
+	int migratetype;
 
 	if (!free_pages_prepare(page, order))
 		return;
@@ -2545,23 +2534,23 @@ void free_unref_page(struct page *page, unsigned int order)
 	 * get those areas back if necessary. Otherwise, we may have to free
 	 * excessively into the page allocator
 	 */
-	migratetype = pcpmigratetype = get_pfnblock_migratetype(page, pfn);
+	migratetype = get_pfnblock_migratetype(page, pfn);
 	if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
 		if (unlikely(is_migrate_isolate(migratetype))) {
-			free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+			free_one_page(page_zone(page), page, pfn, order, FPI_NONE);
 			return;
 		}
-		pcpmigratetype = MIGRATE_MOVABLE;
+		migratetype = MIGRATE_MOVABLE;
 	}
 
 	zone = page_zone(page);
 	pcp_trylock_prepare(UP_flags);
 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 	if (pcp) {
-		free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+		free_unref_page_commit(zone, pcp, page, migratetype, order);
 		pcp_spin_unlock(pcp);
 	} else {
-		free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+		free_one_page(zone, page, pfn, order, FPI_NONE);
 	}
 	pcp_trylock_finish(UP_flags);
 }
@@ -2591,12 +2580,8 @@ void free_unref_folios(struct folio_batch *folios)
 		 * allocator.
 		 */
 		if (!pcp_allowed_order(order)) {
-			int migratetype;
-
-			migratetype = get_pfnblock_migratetype(&folio->page,
-							       pfn);
-			free_one_page(folio_zone(folio), &folio->page, pfn,
-					order, migratetype, FPI_NONE);
+			free_one_page(folio_zone(folio), &folio->page,
+				      pfn, order, FPI_NONE);
 			continue;
 		}
 		folio->private = (void *)(unsigned long)order;
@@ -2632,7 +2617,7 @@ void free_unref_folios(struct folio_batch *folios)
 			 */
 			if (is_migrate_isolate(migratetype)) {
 				free_one_page(zone, &folio->page, pfn,
-					      order, migratetype, FPI_NONE);
+					      order, FPI_NONE);
 				continue;
 			}
 
@@ -2645,7 +2630,7 @@ void free_unref_folios(struct folio_batch *folios)
 			if (unlikely(!pcp)) {
 				pcp_trylock_finish(UP_flags);
 				free_one_page(zone, &folio->page, pfn,
-					      order, migratetype, FPI_NONE);
+					      order, FPI_NONE);
 				continue;
 			}
 			locked_zone = zone;
@@ -6823,13 +6808,14 @@ bool take_page_off_buddy(struct page *page)
 bool put_page_back_buddy(struct page *page)
 {
 	struct zone *zone = page_zone(page);
-	unsigned long pfn = page_to_pfn(page);
 	unsigned long flags;
-	int migratetype = get_pfnblock_migratetype(page, pfn);
 	bool ret = false;
 
 	spin_lock_irqsave(&zone->lock, flags);
 	if (put_page_testzero(page)) {
+		unsigned long pfn = page_to_pfn(page);
+		int migratetype = get_pfnblock_migratetype(page, pfn);
+
 		ClearPageHWPoisonTakenOff(page);
 		__free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
 		if (TestClearPageHWPoison(page)) {
-- 
2.44.0


  parent reply	other threads:[~2024-03-20 18:05 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-20 18:02 [PATCH V4 00/10] mm: page_alloc: freelist migratetype hygiene Johannes Weiner
2024-03-20 18:02 ` [PATCH 01/10] mm: page_alloc: remove pcppage migratetype caching Johannes Weiner
2024-03-20 18:02 ` [PATCH 02/10] mm: page_alloc: optimize free_unref_folios() Johannes Weiner
2024-03-25 15:56   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 03/10] mm: page_alloc: fix up block types when merging compatible blocks Johannes Weiner
2024-03-20 18:02 ` [PATCH 04/10] mm: page_alloc: move free pages when converting block during isolation Johannes Weiner
2024-03-20 18:02 ` [PATCH 05/10] mm: page_alloc: fix move_freepages_block() range error Johannes Weiner
2024-03-25 16:22   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 06/10] mm: page_alloc: fix freelist movement during block conversion Johannes Weiner
2024-03-26 11:28   ` Vlastimil Babka
2024-03-26 12:34     ` Johannes Weiner
2024-04-05 12:11   ` Baolin Wang
2024-04-05 16:56     ` Johannes Weiner
2024-04-07  6:58       ` Baolin Wang
2024-04-08  7:24       ` Vlastimil Babka
2024-04-09  6:21       ` Vlastimil Babka
2024-03-20 18:02 ` Johannes Weiner [this message]
2024-03-26 15:25   ` [PATCH 07/10] mm: page_alloc: close migratetype race between freeing and stealing Vlastimil Babka
2024-03-20 18:02 ` [PATCH 08/10] mm: page_alloc: set migratetype inside move_freepages() Johannes Weiner
2024-03-26 15:40   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 09/10] mm: page_isolation: prepare for hygienic freelists Johannes Weiner
2024-03-21 13:13   ` kernel test robot
2024-03-21 14:24     ` Johannes Weiner
2024-03-21 15:03       ` Zi Yan
2024-03-27  8:06   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Johannes Weiner
2024-03-27  8:54   ` Vlastimil Babka
2024-03-27 14:32     ` Johannes Weiner
2024-03-27 18:57     ` [PATCH 1/3] mm: page_alloc: consolidate free page accounting fix Johannes Weiner
2024-03-27 18:58     ` [PATCH 2/3] mm: page_alloc: consolidate free page accounting fix 2 Johannes Weiner
2024-03-27 19:01     ` [PATCH 3/3] mm: page_alloc: batch vmstat updates in expand() Johannes Weiner
2024-03-27 20:35       ` Vlastimil Babka
2024-04-07 10:19   ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Baolin Wang
2024-04-08  7:38     ` Vlastimil Babka
2024-04-08  9:13       ` Baolin Wang
2024-04-08 14:23       ` Johannes Weiner
2024-04-09  6:23         ` Vlastimil Babka
2024-04-09  7:48           ` [PATCH] mm: page_alloc: consolidate free page accounting fix 3 Baolin Wang
2024-04-09 21:15             ` kernel test robot
2024-04-09 22:36               ` Johannes Weiner
2024-04-09 21:25             ` kernel test robot
2024-04-09  7:56           ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Baolin Wang
2024-04-09  8:41             ` Vlastimil Babka
2024-04-09  9:31         ` Baolin Wang
2024-04-09 14:46           ` Zi Yan
2024-04-10  8:49             ` Baolin Wang
2024-03-27  9:30 ` [PATCH V4 00/10] mm: page_alloc: freelist migratetype hygiene Vlastimil Babka
2024-03-27 13:10   ` Zi Yan
2024-03-27 14:29   ` Johannes Weiner
2024-04-08  9:30 ` Baolin Wang
2024-04-08 14:24   ` Johannes Weiner
2024-05-11  5:14 ` Yu Zhao
2024-05-13 16:03   ` Johannes Weiner
2024-05-13 18:10     ` Yu Zhao
2024-05-13 19:04       ` Johannes Weiner
  -- strict thread matches above, loose matches on Subject: below --
2024-03-06  4:08 [PATCH V3 01/10] " Johannes Weiner
2024-03-06  4:08 ` [PATCH 07/10] mm: page_alloc: close migratetype race between freeing and stealing Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240320180429.678181-8-hannes@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=vbabka@suse.cz \
    --cc=ying.huang@intel.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.