All of lore.kernel.org
 help / color / mirror / Atom feed
From: Baolin Wang <baolin.wang@linux.alibaba.com>
To: Johannes Weiner <hannes@cmpxchg.org>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>,
	Mel Gorman <mgorman@techsingularity.net>, Zi Yan <ziy@nvidia.com>,
	"Huang, Ying" <ying.huang@intel.com>,
	David Hildenbrand <david@redhat.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 10/10] mm: page_alloc: consolidate free page accounting
Date: Sun, 7 Apr 2024 18:19:28 +0800	[thread overview]
Message-ID: <7b3b7f2e-7109-4e72-b1cf-259cb56f3629@linux.alibaba.com> (raw)
In-Reply-To: <20240320180429.678181-11-hannes@cmpxchg.org>



On 2024/3/21 02:02, Johannes Weiner wrote:
> Free page accounting currently happens a bit too high up the call
> stack, where it has to deal with guard pages, compaction capturing,
> block stealing and even page isolation. This is subtle and fragile,
> and makes it difficult to hack on the code.
> 
> Now that type violations on the freelists have been fixed, push the
> accounting down to where pages enter and leave the freelist.
> 
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>   include/linux/mm.h     |  18 ++--
>   include/linux/vmstat.h |   8 --
>   mm/debug_page_alloc.c  |  12 +--
>   mm/internal.h          |   5 --
>   mm/page_alloc.c        | 194 +++++++++++++++++++++++------------------
>   mm/page_isolation.c    |   3 +-
>   6 files changed, 120 insertions(+), 120 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 8147b1302413..bd2e94391c7e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3781,24 +3781,22 @@ static inline bool page_is_guard(struct page *page)
>   	return PageGuard(page);
>   }
>   
> -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -		      int migratetype);
> +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order);
>   static inline bool set_page_guard(struct zone *zone, struct page *page,
> -				  unsigned int order, int migratetype)
> +				  unsigned int order)
>   {
>   	if (!debug_guardpage_enabled())
>   		return false;
> -	return __set_page_guard(zone, page, order, migratetype);
> +	return __set_page_guard(zone, page, order);
>   }
>   
> -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -			int migratetype);
> +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order);
>   static inline void clear_page_guard(struct zone *zone, struct page *page,
> -				    unsigned int order, int migratetype)
> +				    unsigned int order)
>   {
>   	if (!debug_guardpage_enabled())
>   		return;
> -	__clear_page_guard(zone, page, order, migratetype);
> +	__clear_page_guard(zone, page, order);
>   }
>   
>   #else	/* CONFIG_DEBUG_PAGEALLOC */
> @@ -3808,9 +3806,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
>   static inline bool debug_guardpage_enabled(void) { return false; }
>   static inline bool page_is_guard(struct page *page) { return false; }
>   static inline bool set_page_guard(struct zone *zone, struct page *page,
> -			unsigned int order, int migratetype) { return false; }
> +			unsigned int order) { return false; }
>   static inline void clear_page_guard(struct zone *zone, struct page *page,
> -				unsigned int order, int migratetype) {}
> +				unsigned int order) {}
>   #endif	/* CONFIG_DEBUG_PAGEALLOC */
>   
>   #ifdef __HAVE_ARCH_GATE_AREA
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 343906a98d6e..735eae6e272c 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(struct folio *folio,
>   	mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
>   }
>   
> -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
> -					     int migratetype)
> -{
> -	__mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
> -	if (is_migrate_cma(migratetype))
> -		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
> -}
> -
>   extern const char * const vmstat_text[];
>   
>   static inline const char *zone_stat_name(enum zone_stat_item item)
> diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c
> index 6755f0c9d4a3..d46acf989dde 100644
> --- a/mm/debug_page_alloc.c
> +++ b/mm/debug_page_alloc.c
> @@ -32,8 +32,7 @@ static int __init debug_guardpage_minorder_setup(char *buf)
>   }
>   early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
>   
> -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -		      int migratetype)
> +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order)
>   {
>   	if (order >= debug_guardpage_minorder())
>   		return false;
> @@ -41,19 +40,12 @@ bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
>   	__SetPageGuard(page);
>   	INIT_LIST_HEAD(&page->buddy_list);
>   	set_page_private(page, order);
> -	/* Guard pages are not available for any usage */
> -	if (!is_migrate_isolate(migratetype))
> -		__mod_zone_freepage_state(zone, -(1 << order), migratetype);
>   
>   	return true;
>   }
>   
> -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -		      int migratetype)
> +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order)
>   {
>   	__ClearPageGuard(page);
> -
>   	set_page_private(page, 0);
> -	if (!is_migrate_isolate(migratetype))
> -		__mod_zone_freepage_state(zone, (1 << order), migratetype);
>   }
> diff --git a/mm/internal.h b/mm/internal.h
> index d6e6c7d9f04e..0a4007b03d0d 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1036,11 +1036,6 @@ static inline bool is_migrate_highatomic(enum migratetype migratetype)
>   	return migratetype == MIGRATE_HIGHATOMIC;
>   }
>   
> -static inline bool is_migrate_highatomic_page(struct page *page)
> -{
> -	return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
> -}
> -
>   void setup_zone_pageset(struct zone *zone);
>   
>   struct migration_target_control {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index efb2581ac142..c46491f83ac2 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -642,42 +642,72 @@ compaction_capture(struct capture_control *capc, struct page *page,
>   }
>   #endif /* CONFIG_COMPACTION */
>   
> -/* Used for pages not on another list */
> -static inline void add_to_free_list(struct page *page, struct zone *zone,
> -				    unsigned int order, int migratetype)
> +static inline void account_freepages(struct page *page, struct zone *zone,
> +				     int nr_pages, int migratetype)
>   {
> -	struct free_area *area = &zone->free_area[order];
> +	if (is_migrate_isolate(migratetype))
> +		return;
>   
> -	list_add(&page->buddy_list, &area->free_list[migratetype]);
> -	area->nr_free++;
> +	__mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
> +
> +	if (is_migrate_cma(migratetype))
> +		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
>   }
>   
>   /* Used for pages not on another list */
> -static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
> -					 unsigned int order, int migratetype)
> +static inline void __add_to_free_list(struct page *page, struct zone *zone,
> +				      unsigned int order, int migratetype,
> +				      bool tail)
>   {
>   	struct free_area *area = &zone->free_area[order];
>   
> -	list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
> +	VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
> +		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
> +		     get_pageblock_migratetype(page), migratetype, 1 << order);
> +
> +	if (tail)
> +		list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
> +	else
> +		list_add(&page->buddy_list, &area->free_list[migratetype]);
>   	area->nr_free++;
>   }
>   
> +static inline void add_to_free_list(struct page *page, struct zone *zone,
> +				    unsigned int order, int migratetype,
> +				    bool tail)
> +{
> +	__add_to_free_list(page, zone, order, migratetype, tail);
> +	account_freepages(page, zone, 1 << order, migratetype);
> +}
> +
>   /*
>    * Used for pages which are on another list. Move the pages to the tail
>    * of the list - so the moved pages won't immediately be considered for
>    * allocation again (e.g., optimization for memory onlining).
>    */
>   static inline void move_to_free_list(struct page *page, struct zone *zone,
> -				     unsigned int order, int migratetype)
> +				     unsigned int order, int old_mt, int new_mt)
>   {
>   	struct free_area *area = &zone->free_area[order];
>   
> -	list_move_tail(&page->buddy_list, &area->free_list[migratetype]);
> +	/* Free page moving can fail, so it happens before the type update */
> +	VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
> +		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
> +		     get_pageblock_migratetype(page), old_mt, 1 << order);
> +
> +	list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
> +
> +	account_freepages(page, zone, -(1 << order), old_mt);
> +	account_freepages(page, zone, 1 << order, new_mt);
>   }
>   
> -static inline void del_page_from_free_list(struct page *page, struct zone *zone,
> -					   unsigned int order)
> +static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
> +					     unsigned int order, int migratetype)
>   {
> +        VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
> +		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
> +		     get_pageblock_migratetype(page), migratetype, 1 << order);
> +
>   	/* clear reported state and update reported page count */
>   	if (page_reported(page))
>   		__ClearPageReported(page);
> @@ -688,6 +718,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
>   	zone->free_area[order].nr_free--;
>   }
>   
> +static inline void del_page_from_free_list(struct page *page, struct zone *zone,
> +					   unsigned int order, int migratetype)
> +{
> +	__del_page_from_free_list(page, zone, order, migratetype);
> +	account_freepages(page, zone, -(1 << order), migratetype);
> +}
> +
>   static inline struct page *get_page_from_free_area(struct free_area *area,
>   					    int migratetype)
>   {
> @@ -759,18 +796,16 @@ static inline void __free_one_page(struct page *page,
>   	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
>   
>   	VM_BUG_ON(migratetype == -1);
> -	if (likely(!is_migrate_isolate(migratetype)))
> -		__mod_zone_freepage_state(zone, 1 << order, migratetype);
> -
>   	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
>   	VM_BUG_ON_PAGE(bad_range(zone, page), page);
>   
> +	account_freepages(page, zone, 1 << order, migratetype);
> +
>   	while (order < MAX_PAGE_ORDER) {
> -		if (compaction_capture(capc, page, order, migratetype)) {
> -			__mod_zone_freepage_state(zone, -(1 << order),
> -								migratetype);
> +		int buddy_mt = migratetype;
> +
> +		if (compaction_capture(capc, page, order, migratetype))
>   			return;
> -		}

IIUC, if the released page is captured by compaction, then the 
statistics for free pages should be correspondingly decreased, 
otherwise, there will be a slight regression for my thpcompact benchmark.

thpcompact Percentage Faults Huge
                           k6.9-rc2-base        base + patch10 + 2 fixes	
Percentage huge-1        78.18 (   0.00%)       71.92 (  -8.01%)
Percentage huge-3        86.70 (   0.00%)       86.07 (  -0.73%)
Percentage huge-5        90.26 (   0.00%)       78.02 ( -13.57%)
Percentage huge-7        92.34 (   0.00%)       78.67 ( -14.81%)
Percentage huge-12       91.18 (   0.00%)       81.04 ( -11.12%)
Percentage huge-18       89.00 (   0.00%)       79.57 ( -10.60%)
Percentage huge-24       90.52 (   0.00%)       80.07 ( -11.54%)
Percentage huge-30       94.44 (   0.00%)       96.28 (   1.95%)
Percentage huge-32       93.09 (   0.00%)       99.39 (   6.77%)

I add below fix based on your fix 2, then the thpcompact Percentage 
looks good. How do you think for the fix?

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8330c5c2de6b..2facf844ef84 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -805,8 +805,10 @@ static inline void __free_one_page(struct page *page,
         while (order < MAX_PAGE_ORDER) {
                 int buddy_mt = migratetype;

-               if (compaction_capture(capc, page, order, migratetype))
+               if (compaction_capture(capc, page, order, migratetype)) {
+                       account_freepages(zone, -(1 << order), migratetype);
                         return;
+               }

                 buddy = find_buddy_page_pfn(page, pfn, order, &buddy_pfn);
                 if (!buddy)

With my fix, the THP percentage looks better:
                       k6.9-rc2-base          base + patch10 + 2 fixes	+ 
my fix
Percentage huge-1        78.18 (   0.00%)       82.83 (   5.94%)
Percentage huge-3        86.70 (   0.00%)       93.47 (   7.81%)
Percentage huge-5        90.26 (   0.00%)       94.73 (   4.95%)
Percentage huge-7        92.34 (   0.00%)       95.22 (   3.12%)
Percentage huge-12       91.18 (   0.00%)       92.40 (   1.34%)
Percentage huge-18       89.00 (   0.00%)       85.39 (  -4.06%)
Percentage huge-24       90.52 (   0.00%)       94.70 (   4.61%)
Percentage huge-30       94.44 (   0.00%)       97.00 (   2.71%)
Percentage huge-32       93.09 (   0.00%)       92.87 (  -0.24%)

  parent reply	other threads:[~2024-04-07 10:19 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-20 18:02 [PATCH V4 00/10] mm: page_alloc: freelist migratetype hygiene Johannes Weiner
2024-03-20 18:02 ` [PATCH 01/10] mm: page_alloc: remove pcppage migratetype caching Johannes Weiner
2024-03-20 18:02 ` [PATCH 02/10] mm: page_alloc: optimize free_unref_folios() Johannes Weiner
2024-03-25 15:56   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 03/10] mm: page_alloc: fix up block types when merging compatible blocks Johannes Weiner
2024-03-20 18:02 ` [PATCH 04/10] mm: page_alloc: move free pages when converting block during isolation Johannes Weiner
2024-03-20 18:02 ` [PATCH 05/10] mm: page_alloc: fix move_freepages_block() range error Johannes Weiner
2024-03-25 16:22   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 06/10] mm: page_alloc: fix freelist movement during block conversion Johannes Weiner
2024-03-26 11:28   ` Vlastimil Babka
2024-03-26 12:34     ` Johannes Weiner
2024-04-05 12:11   ` Baolin Wang
2024-04-05 16:56     ` Johannes Weiner
2024-04-07  6:58       ` Baolin Wang
2024-04-08  7:24       ` Vlastimil Babka
2024-04-09  6:21       ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 07/10] mm: page_alloc: close migratetype race between freeing and stealing Johannes Weiner
2024-03-26 15:25   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 08/10] mm: page_alloc: set migratetype inside move_freepages() Johannes Weiner
2024-03-26 15:40   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 09/10] mm: page_isolation: prepare for hygienic freelists Johannes Weiner
2024-03-21 13:13   ` kernel test robot
2024-03-21 14:24     ` Johannes Weiner
2024-03-21 15:03       ` Zi Yan
2024-03-27  8:06   ` Vlastimil Babka
2024-03-20 18:02 ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Johannes Weiner
2024-03-27  8:54   ` Vlastimil Babka
2024-03-27 14:32     ` Johannes Weiner
2024-03-27 18:57     ` [PATCH 1/3] mm: page_alloc: consolidate free page accounting fix Johannes Weiner
2024-03-27 18:58     ` [PATCH 2/3] mm: page_alloc: consolidate free page accounting fix 2 Johannes Weiner
2024-03-27 19:01     ` [PATCH 3/3] mm: page_alloc: batch vmstat updates in expand() Johannes Weiner
2024-03-27 20:35       ` Vlastimil Babka
2024-04-07 10:19   ` Baolin Wang [this message]
2024-04-08  7:38     ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Vlastimil Babka
2024-04-08  9:13       ` Baolin Wang
2024-04-08 14:23       ` Johannes Weiner
2024-04-09  6:23         ` Vlastimil Babka
2024-04-09  7:48           ` [PATCH] mm: page_alloc: consolidate free page accounting fix 3 Baolin Wang
2024-04-09 21:15             ` kernel test robot
2024-04-09 22:36               ` Johannes Weiner
2024-04-09 21:25             ` kernel test robot
2024-04-09  7:56           ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Baolin Wang
2024-04-09  8:41             ` Vlastimil Babka
2024-04-09  9:31         ` Baolin Wang
2024-04-09 14:46           ` Zi Yan
2024-04-10  8:49             ` Baolin Wang
2024-03-27  9:30 ` [PATCH V4 00/10] mm: page_alloc: freelist migratetype hygiene Vlastimil Babka
2024-03-27 13:10   ` Zi Yan
2024-03-27 14:29   ` Johannes Weiner
2024-04-08  9:30 ` Baolin Wang
2024-04-08 14:24   ` Johannes Weiner
2024-05-11  5:14 ` Yu Zhao
2024-05-13 16:03   ` Johannes Weiner
2024-05-13 18:10     ` Yu Zhao
2024-05-13 19:04       ` Johannes Weiner
  -- strict thread matches above, loose matches on Subject: below --
2024-03-06  4:08 [PATCH V3 01/10] " Johannes Weiner
2024-03-06  4:08 ` [PATCH 10/10] mm: page_alloc: consolidate free page accounting Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7b3b7f2e-7109-4e72-b1cf-259cb56f3629@linux.alibaba.com \
    --to=baolin.wang@linux.alibaba.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=vbabka@suse.cz \
    --cc=ying.huang@intel.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.