All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
@ 2023-08-21  9:04 Zhiguo Jiang
  0 siblings, 0 replies; 7+ messages in thread
From: Zhiguo Jiang @ 2023-08-21  9:04 UTC (permalink / raw)
  To: Andrew Morton, linux-mm, linux-kernel; +Cc: opensource.kernel, Zhiguo Jiang

In case of alloc_flags contain ALLOC_HIGHATOMIC and alloc order
is order1/2/3/10 in rmqueue(), if pages are alloced successfully
from pcplist cache, and move a free pageblock from the alloced
migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result
in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist,
pages in other migratetype freelist are reduced and more likely to
allocation failure.

Currently the sequence of ALLOC_HIGHATOMIC allocation is:
pcplist cache --> buddy (batch >> order) allocation migratetype
freelist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
migratetype freelist.

Due to the fact that requesting pages from the pcplist cache is faster
than buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is:
pcplist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
migrate freelist.

This patch can solve the allocation failure of the Non-ALLOC_HIGHATOMIC
alloc_flag due to excessive pages reservations in MIGRATE_HIGHATOMIC
freelists.

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
 mm/internal.h   |  1 +
 mm/page_alloc.c | 22 ++++++++++++++--------
 2 files changed, 15 insertions(+), 8 deletions(-)
 mode change 100644 => 100755 mm/internal.h
 mode change 100644 => 100755 mm/page_alloc.c

diff --git a/mm/internal.h b/mm/internal.h
index 7499b5ea1cf6..a3b3fdda8031
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -844,6 +844,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 #endif
 #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
 #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+#define ALLOC_PCPLIST		0x1000 /* page alloced from pcplist */
 
 /* Flags that allow allocations below the min watermark. */
 #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 442c1b3480aa..afcd0f875c92
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2722,6 +2722,8 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			int batch = READ_ONCE(pcp->batch);
 			int alloced;
 
+			if (alloc_flags & ALLOC_HIGHATOMIC)
+				goto out;
 			/*
 			 * Scale batch relative to order if batch implies
 			 * free pages can be stored on the PCP. Batch can
@@ -2736,6 +2738,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+out:
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2798,7 +2801,7 @@ __no_sanitize_memory
 static inline
 struct page *rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
-			gfp_t gfp_flags, unsigned int alloc_flags,
+			gfp_t gfp_flags, unsigned int *alloc_flags,
 			int migratetype)
 {
 	struct page *page;
@@ -2814,21 +2817,23 @@ struct page *rmqueue(struct zone *preferred_zone,
 		 * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
 		 * we need to skip it when CMA area isn't allowed.
 		 */
-		if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
+		if (!IS_ENABLED(CONFIG_CMA) || *alloc_flags & ALLOC_CMA ||
 				migratetype != MIGRATE_MOVABLE) {
 			page = rmqueue_pcplist(preferred_zone, zone, order,
-					migratetype, alloc_flags);
-			if (likely(page))
+					migratetype, *alloc_flags);
+			if (likely(page)) {
+				*alloc_flags |= ALLOC_PCPLIST;
 				goto out;
+			}
 		}
 	}
 
-	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
+	page = rmqueue_buddy(preferred_zone, zone, order, *alloc_flags,
 							migratetype);
 
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
-	if ((alloc_flags & ALLOC_KSWAPD) &&
+	if ((*alloc_flags & ALLOC_KSWAPD) &&
 	    unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {
 		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
 		wakeup_kswapd(zone, 0, 0, zone_idx(zone));
@@ -3208,7 +3213,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 try_this_zone:
 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
-				gfp_mask, alloc_flags, ac->migratetype);
+				gfp_mask, &alloc_flags, ac->migratetype);
 		if (page) {
 			prep_new_page(page, order, gfp_mask, alloc_flags);
 
@@ -3216,7 +3221,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * If this is a high-order atomic allocation then check
 			 * if the pageblock should be reserved for the future
 			 */
-			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)
+				&& unlikely(!(alloc_flags & ALLOC_PCPLIST)))
 				reserve_highatomic_pageblock(page, zone);
 
 			return page;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
@ 2023-12-01  7:25 Zhiguo Jiang
  0 siblings, 0 replies; 7+ messages in thread
From: Zhiguo Jiang @ 2023-12-01  7:25 UTC (permalink / raw)
  To: Andrew Morton, linux-mm, linux-kernel; +Cc: opensource.kernel, Zhiguo Jiang

In case that alloc_flags contains ALLOC_HIGHATOMIC and alloc order
is order1/2/3/10 in rmqueue(), if pages are allocated from pcplist
successfully, a free pageblock will be also moved from the allocated
migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
alloc from MIGRATE_HIGHATOMIC freelist first. So this will result
in an increasing number of pages on the buddy highatomic freelist
and an increased risk of allocation failure on other migrate freelists
in buddy.

Currently the sequence of ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_bulk() --> rmqueue_buddy() MIGRATE_HIGHATOMIC -->
rmqueue_buddy() allocated migratetype.

Due to the fact that requesting pages from the pcplist is faster than
buddy, the sequence of the ALLOC_HIGHATOMIC allocation is modified:
pcplist --> rmqueue_buddy() MIGRATE_HIGHATOMIC --> rmqueue_buddy()
allocation migratetype.

This patch can solve the failure problem of allocating other migrate
type pages due to the excessive MIGRATE_HIGHATOMIC freelist reserved
pages.

In comparative testing, cat /proc/pagetypeinfo and the HighAtomic
freelist size is:
Test without this patch:
Node 0, zone Normal, type HighAtomic 2369 771 138 15 0 0 0 0 0 0 0
Test with this patch:
Node 0, zone Normal, type HighAtomic 206 82 4 2 1 0 0 0 0 0 0

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
 mm/page_alloc.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 mm/page_alloc.c

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2f646773a934..95de1204e1e5
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2861,11 +2861,20 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			int batch = nr_pcp_alloc(pcp, zone, order);
 			int alloced;
 
+			/*
+			 * If pcplist is empty and alloc_flags
+			 * contains ALLOC_HIGHATOMIC, alloc from
+			 * buddy highatomic freelist first.
+			 */
+			if (alloc_flags & ALLOC_HIGHATOMIC)
+				goto out;
+
 			alloced = rmqueue_bulk(zone, order,
 					batch, list,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+out:
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2929,7 +2938,7 @@ static inline
 struct page *rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
 			gfp_t gfp_flags, unsigned int alloc_flags,
-			int migratetype)
+			int migratetype, bool *highatomic)
 {
 	struct page *page;
 
@@ -2949,6 +2958,24 @@ struct page *rmqueue(struct zone *preferred_zone,
 	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
 							migratetype);
 
+	/*
+	 * The high-order atomic allocation pageblock reserved:
+	 *
+	 * If the high-order atomic page is allocated from pcplist,
+	 * the highatomic pageblock does not need to be reserved,
+	 * which can avoid migrating an increasing number of pages
+	 * into buddy highatomic freelist and leading to an increased
+	 * risk of allocation failure on other migrate freelists in
+	 * buddy.
+	 *
+	 * If the high-order atomic page is allocated from buddy
+	 * highatomic freelist, regardless of whether the allocation
+	 * is successful or not, the highatomic pageblock can try to
+	 * be reserved.
+	 */
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+		*highatomic = true;
+
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
 	if ((alloc_flags & ALLOC_KSWAPD) &&
@@ -3219,6 +3246,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct pglist_data *last_pgdat = NULL;
 	bool last_pgdat_dirty_ok = false;
 	bool no_fallback;
+	bool highatomic = false;
 
 retry:
 	/*
@@ -3350,7 +3378,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 try_this_zone:
 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
-				gfp_mask, alloc_flags, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype, &highatomic);
 		if (page) {
 			prep_new_page(page, order, gfp_mask, alloc_flags);
 
@@ -3358,7 +3386,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * If this is a high-order atomic allocation then check
 			 * if the pageblock should be reserved for the future
 			 */
-			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+			if (unlikely(highatomic))
 				reserve_highatomic_pageblock(page, zone);
 
 			return page;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
@ 2023-11-23 14:50 Zhiguo Jiang
  0 siblings, 0 replies; 7+ messages in thread
From: Zhiguo Jiang @ 2023-11-23 14:50 UTC (permalink / raw)
  To: Andrew Morton, linux-mm, linux-kernel
  Cc: Matthew Wilcox, Johannes Weiner, opensource.kernel, Zhiguo Jiang

In case that alloc_flags contains ALLOC_HIGHATOMIC and alloc order
is order1/2/3/10 in rmqueue(), if pages are allocated from pcplist
successfully, a free pageblock will be also moved from the allocated
migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
alloc from MIGRATE_HIGHATOMIC freelist first. So this will result
in an increasing number of pages on the buddy highatomic freelist
and an increased risk of allocation failure on other migrate freelists
in buddy.

Currently the sequence of ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_bulk() --> rmqueue_buddy() MIGRATE_HIGHATOMIC -->
rmqueue_buddy() allocated migratetype.

Due to the fact that requesting pages from the pcplist is faster than
buddy, the sequence of the ALLOC_HIGHATOMIC allocation is modified:
pcplist --> rmqueue_buddy() MIGRATE_HIGHATOMIC --> rmqueue_buddy()
allocation migratetype.

This patch can solve the failure problem of allocating other migrate
type pages due to the excessive MIGRATE_HIGHATOMIC freelist reserved
pages.

In comparative testing, cat /proc/pagetypeinfo and the HighAtomic
freelist size is:
Test without this patch:
Node 0, zone Normal, type HighAtomic 2369 771 138 15 0 0 0 0 0 0 0
Test with this patch:
Node 0, zone Normal, type HighAtomic 206 82 4 2 1 0 0 0 0 0 0

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
 mm/page_alloc.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 mm/page_alloc.c

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7a8dac0c1c74..693e86fc9850
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2850,11 +2850,20 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			int batch = nr_pcp_alloc(pcp, zone, order);
 			int alloced;
 
+			/*
+			 * If pcplist is empty and alloc_flags contains
+			 * ALLOC_HIGHATOMIC, alloc from buddy highatomic
+			 * freelist first.
+			 */
+			if (alloc_flags & ALLOC_HIGHATOMIC)
+				goto out;
+
 			alloced = rmqueue_bulk(zone, order,
 					batch, list,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+out:
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2918,7 +2927,7 @@ static inline
 struct page *rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
 			gfp_t gfp_flags, unsigned int alloc_flags,
-			int migratetype)
+			int migratetype, bool *highatomic)
 {
 	struct page *page;
 
@@ -2938,6 +2947,24 @@ struct page *rmqueue(struct zone *preferred_zone,
 	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
 							migratetype);
 
+	/*
+	 * The high-order atomic allocation pageblock reserved:
+	 *
+	 * If the high-order atomic page is allocated from pcplist,
+	 * the highatomic pageblock does not need to be reserved,
+	 * which can avoid migrating an increasing number of pages
+	 * into buddy highatomic freelist and leading to an increased
+	 * risk of allocation failure on other migrate freelists in
+	 * buddy.
+	 *
+	 * If the high-order atomic page is allocated from buddy
+	 * highatomic freelist, regardless of whether the allocation
+	 * is successful or not, the highatomic pageblock can try to
+	 * be reserved.
+	 */
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+		*highatomic = true;
+
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
 	if ((alloc_flags & ALLOC_KSWAPD) &&
@@ -3208,6 +3235,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct pglist_data *last_pgdat = NULL;
 	bool last_pgdat_dirty_ok = false;
 	bool no_fallback;
+	bool highatomic = false;
 
 retry:
 	/*
@@ -3339,7 +3367,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 try_this_zone:
 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
-				gfp_mask, alloc_flags, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype, &highatomic);
 		if (page) {
 			prep_new_page(page, order, gfp_mask, alloc_flags);
 
@@ -3347,7 +3375,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * If this is a high-order atomic allocation then check
 			 * if the pageblock should be reserved for the future
 			 */
-			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+			if (unlikely(highatomic))
 				reserve_highatomic_pageblock(page, zone);
 
 			return page;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
  2023-11-20 17:29 ` Matthew Wilcox
@ 2023-11-21  2:26   ` zhiguojiang
  0 siblings, 0 replies; 7+ messages in thread
From: zhiguojiang @ 2023-11-21  2:26 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, linux-mm, linux-kernel, Johannes Weiner,
	opensource.kernel


Thanks, I have updated the related modifications according to  your 
suggestions in patch v2.

在 2023/11/21 1:29, Matthew Wilcox 写道:
> On Mon, Nov 20, 2023 at 10:35:36AM +0800, Zhiguo Jiang wrote:
>> +			/*
>> +			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
>> +			 * it should alloc from buddy highatomic migrate freelist firstly
>> +			 * to ensure quick and successful allocation.
> Assuming that all the serious question shave been dealt with, let's
> fix the less important problems ...
>
> 			 * If pcplist is empty and alloc_flags contains
> 			 * ALLOC_HIGHATOMIC, alloc from buddy highatomic
> 			 * freelist first.
>
>> @@ -2918,7 +2927,7 @@ static inline
>>   struct page *rmqueue(struct zone *preferred_zone,
>>   			struct zone *zone, unsigned int order,
>>   			gfp_t gfp_flags, unsigned int alloc_flags,
>> -			int migratetype)
>> +			int migratetype, bool *highatomc_allocation)
> bool *highatomic
>
>> +	/*
>> +	 * The high-order atomic allocation pageblock reserved conditions:
>> +	 *
>> +	 * If the high-order atomic allocation page is alloced from pcplist,
>> +	 * the highatomic pageblock does not need to be reserved, which can
>> +	 * void to migrate an increasing number of pages into buddy
> 	 * avoid migrating an increasing number of pages into buddy
>
>> +	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
> "increased"
>
>> +	 * allocation failure on other buddy migrate freelists.
>> +	 *
>> +	 * If the high-order atomic allocation page is alloced from buddy
> "allocated"
>
>> @@ -3208,6 +3234,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   	struct pglist_data *last_pgdat = NULL;
>>   	bool last_pgdat_dirty_ok = false;
>>   	bool no_fallback;
>> +	bool highatomc_allocation = false;
> Again, just call this 'highatomic'.
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
  2023-11-20  2:35 Zhiguo Jiang
@ 2023-11-20 17:29 ` Matthew Wilcox
  2023-11-21  2:26   ` zhiguojiang
  0 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2023-11-20 17:29 UTC (permalink / raw)
  To: Zhiguo Jiang
  Cc: Andrew Morton, linux-mm, linux-kernel, Johannes Weiner,
	opensource.kernel

On Mon, Nov 20, 2023 at 10:35:36AM +0800, Zhiguo Jiang wrote:
> +			/*
> +			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
> +			 * it should alloc from buddy highatomic migrate freelist firstly
> +			 * to ensure quick and successful allocation.

Assuming that all the serious question shave been dealt with, let's
fix the less important problems ...

			 * If pcplist is empty and alloc_flags contains
			 * ALLOC_HIGHATOMIC, alloc from buddy highatomic
			 * freelist first.

> @@ -2918,7 +2927,7 @@ static inline
>  struct page *rmqueue(struct zone *preferred_zone,
>  			struct zone *zone, unsigned int order,
>  			gfp_t gfp_flags, unsigned int alloc_flags,
> -			int migratetype)
> +			int migratetype, bool *highatomc_allocation)

bool *highatomic

> +	/*
> +	 * The high-order atomic allocation pageblock reserved conditions:
> +	 *
> +	 * If the high-order atomic allocation page is alloced from pcplist,
> +	 * the highatomic pageblock does not need to be reserved, which can
> +	 * void to migrate an increasing number of pages into buddy

	 * avoid migrating an increasing number of pages into buddy

> +	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of

"increased"

> +	 * allocation failure on other buddy migrate freelists.
> +	 *
> +	 * If the high-order atomic allocation page is alloced from buddy

"allocated"

> @@ -3208,6 +3234,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>  	struct pglist_data *last_pgdat = NULL;
>  	bool last_pgdat_dirty_ok = false;
>  	bool no_fallback;
> +	bool highatomc_allocation = false;

Again, just call this 'highatomic'.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
@ 2023-11-20  2:35 Zhiguo Jiang
  2023-11-20 17:29 ` Matthew Wilcox
  0 siblings, 1 reply; 7+ messages in thread
From: Zhiguo Jiang @ 2023-11-20  2:35 UTC (permalink / raw)
  To: Andrew Morton, linux-mm, linux-kernel
  Cc: Johannes Weiner, Matthew Wilcox, opensource.kernel, Zhiguo Jiang

In case that alloc_flags contains ALLOC_HIGHATOMIC and alloc order
is order1/2/3/10 in rmqueue(), if pages are alloced successfully
from pcplist, a free pageblock will be also moved from the alloced
migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result
in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist,
pages in other migratetype freelist are reduced and more likely to
allocation failure.

Currently the sequence of ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_bulk() --> rmqueue_buddy() MIGRATE_HIGHATOMIC
--> rmqueue_buddy() allocation migratetype.

Due to the fact that requesting pages from the pcplist is faster than
buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_buddy() MIGRATE_HIGHATOMIC --> rmqueue_buddy()
allocation migratetype.

This patch can solve the failure problem of allocating other types of
pages due to excessive MIGRATE_HIGHATOMIC freelist reservations.

In comparative testing, cat /proc/pagetypeinfo and the HighAtomic
freelist size is:
Test without this patch:
Node 0, zone Normal, type HighAtomic 2369 771 138 15 0 0 0 0 0 0 0
Test with this patch:
Node 0, zone Normal, type HighAtomic 206 82 4 2 1 0 0 0 0 0 0

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
 mm/page_alloc.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 mm/page_alloc.c

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7a8dac0c1c74..49890d00cc3c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2850,11 +2850,20 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			int batch = nr_pcp_alloc(pcp, zone, order);
 			int alloced;
 
+			/*
+			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
+			 * it should alloc from buddy highatomic migrate freelist firstly
+			 * to ensure quick and successful allocation.
+			 */
+			if (alloc_flags & ALLOC_HIGHATOMIC)
+				goto out;
+
 			alloced = rmqueue_bulk(zone, order,
 					batch, list,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+out:
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2918,7 +2927,7 @@ static inline
 struct page *rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
 			gfp_t gfp_flags, unsigned int alloc_flags,
-			int migratetype)
+			int migratetype, bool *highatomc_allocation)
 {
 	struct page *page;
 
@@ -2938,6 +2947,23 @@ struct page *rmqueue(struct zone *preferred_zone,
 	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
 							migratetype);
 
+	/*
+	 * The high-order atomic allocation pageblock reserved conditions:
+	 *
+	 * If the high-order atomic allocation page is alloced from pcplist,
+	 * the highatomic pageblock does not need to be reserved, which can
+	 * void to migrate an increasing number of pages into buddy
+	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
+	 * allocation failure on other buddy migrate freelists.
+	 *
+	 * If the high-order atomic allocation page is alloced from buddy
+	 * highatomic migrate freelist, regardless of whether the allocation
+	 * is successful or not, the highatomic pageblock can try to be
+	 * reserved.
+	 */
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+		*highatomc_allocation = true;
+
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
 	if ((alloc_flags & ALLOC_KSWAPD) &&
@@ -3208,6 +3234,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct pglist_data *last_pgdat = NULL;
 	bool last_pgdat_dirty_ok = false;
 	bool no_fallback;
+	bool highatomc_allocation = false;
 
 retry:
 	/*
@@ -3339,7 +3366,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 try_this_zone:
 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
-				gfp_mask, alloc_flags, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype, &highatomc_allocation);
 		if (page) {
 			prep_new_page(page, order, gfp_mask, alloc_flags);
 
@@ -3347,7 +3374,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * If this is a high-order atomic allocation then check
 			 * if the pageblock should be reserved for the future
 			 */
-			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+			if (unlikely(highatomc_allocation))
 				reserve_highatomic_pageblock(page, zone);
 
 			return page;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue
@ 2023-11-13  3:03 Zhiguo Jiang
  0 siblings, 0 replies; 7+ messages in thread
From: Zhiguo Jiang @ 2023-11-13  3:03 UTC (permalink / raw)
  To: Andrew Morton, linux-mm, linux-kernel
  Cc: Matthew Wilcox, Johannes Weiner, opensource.kernel, Zhiguo Jiang

In case that alloc_flags contains ALLOC_HIGHATOMIC and alloc order
is order1/2/3/10 in rmqueue(), if pages are alloced successfully
from pcplist, a free pageblock will be also moved from the alloced
migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result
in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist,
pages in other migratetype freelist are reduced and more likely to
allocation failure.

Currently the sequence of ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_bulk() --> rmqueue_buddy() MIGRATE_HIGHATOMIC
--> rmqueue_buddy() allocation migratetype.

Due to the fact that requesting pages from the pcplist is faster than
buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is:
pcplist --> rmqueue_buddy() MIGRATE_HIGHATOMIC --> rmqueue_buddy()
allocation migratetype.

This patch can solve the failure problem of allocating other types of
pages due to excessive MIGRATE_HIGHATOMIC freelist reservations.

In comparative testing, cat /proc/pagetypeinfo and the HighAtomic
freelist size is:
Test without this patch:
Node 0, zone Normal, type HighAtomic 2369 771 138 15 0 0 0 0 0 0 0
Test with this patch:
Node 0, zone Normal, type HighAtomic 206 82 4 2 1 0 0 0 0 0 0 

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
 mm/page_alloc.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 mm/page_alloc.c

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7a8dac0c1c74..49890d00cc3c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2850,11 +2850,20 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			int batch = nr_pcp_alloc(pcp, zone, order);
 			int alloced;
 
+			/*
+			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
+			 * it should alloc from buddy highatomic migrate freelist firstly
+			 * to ensure quick and successful allocation.
+			 */
+			if (alloc_flags & ALLOC_HIGHATOMIC)
+				goto out;
+
 			alloced = rmqueue_bulk(zone, order,
 					batch, list,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+out:
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2918,7 +2927,7 @@ static inline
 struct page *rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
 			gfp_t gfp_flags, unsigned int alloc_flags,
-			int migratetype)
+			int migratetype, bool *highatomc_allocation)
 {
 	struct page *page;
 
@@ -2938,6 +2947,23 @@ struct page *rmqueue(struct zone *preferred_zone,
 	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
 							migratetype);
 
+	/*
+	 * The high-order atomic allocation pageblock reserved conditions:
+	 *
+	 * If the high-order atomic allocation page is alloced from pcplist,
+	 * the highatomic pageblock does not need to be reserved, which can
+	 * void to migrate an increasing number of pages into buddy
+	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
+	 * allocation failure on other buddy migrate freelists.
+	 *
+	 * If the high-order atomic allocation page is alloced from buddy
+	 * highatomic migrate freelist, regardless of whether the allocation
+	 * is successful or not, the highatomic pageblock can try to be
+	 * reserved.
+	 */
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+		*highatomc_allocation = true;
+
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
 	if ((alloc_flags & ALLOC_KSWAPD) &&
@@ -3208,6 +3234,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct pglist_data *last_pgdat = NULL;
 	bool last_pgdat_dirty_ok = false;
 	bool no_fallback;
+	bool highatomc_allocation = false;
 
 retry:
 	/*
@@ -3339,7 +3366,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 try_this_zone:
 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
-				gfp_mask, alloc_flags, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype, &highatomc_allocation);
 		if (page) {
 			prep_new_page(page, order, gfp_mask, alloc_flags);
 
@@ -3347,7 +3374,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * If this is a high-order atomic allocation then check
 			 * if the pageblock should be reserved for the future
 			 */
-			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+			if (unlikely(highatomc_allocation))
 				reserve_highatomic_pageblock(page, zone);
 
 			return page;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-12-01  7:25 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-21  9:04 [PATCH] mm: ALLOC_HIGHATOMIC flag allocation issue Zhiguo Jiang
2023-11-13  3:03 Zhiguo Jiang
2023-11-20  2:35 Zhiguo Jiang
2023-11-20 17:29 ` Matthew Wilcox
2023-11-21  2:26   ` zhiguojiang
2023-11-23 14:50 Zhiguo Jiang
2023-12-01  7:25 Zhiguo Jiang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.