linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 0/2] mm: Don't set and reset page count in MEMINIT_EARLY
@ 2023-09-28  8:33 Yajun Deng
  2023-09-28  8:33 ` [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page Yajun Deng
  2023-09-28  8:33 ` [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY Yajun Deng
  0 siblings, 2 replies; 34+ messages in thread
From: Yajun Deng @ 2023-09-28  8:33 UTC (permalink / raw)
  To: akpm, rppt
  Cc: mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel,
	Yajun Deng

__init_single_page would set page count and __free_pages_core would
reset it. A lot of pages don't need to do this when in MEMINIT_EARLY
context. It's unnecessary and time-consuming.

The 1st patch is pass page count and reserved to __init_single_page.
It's in preparation for the 2nd patch, it didn't change anything.

The 2nd patch only set page count for the reserved region, not all
of the region.

Yajun Deng (2):
  mm: pass page count and reserved to __init_single_page
  mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY

 mm/hugetlb.c    |  2 +-
 mm/internal.h   |  8 +++++++-
 mm/mm_init.c    | 38 ++++++++++++++++++++++++--------------
 mm/page_alloc.c | 20 ++++++++++++--------
 4 files changed, 44 insertions(+), 24 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page
  2023-09-28  8:33 [PATCH v4 0/2] mm: Don't set and reset page count in MEMINIT_EARLY Yajun Deng
@ 2023-09-28  8:33 ` Yajun Deng
  2023-09-29  8:19   ` Mike Rapoport
  2023-09-28  8:33 ` [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY Yajun Deng
  1 sibling, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-09-28  8:33 UTC (permalink / raw)
  To: akpm, rppt
  Cc: mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel,
	Yajun Deng

When we init a single page, we need to mark that page reserved when it
is reserved. And some pages need to reset page count, such as compound
pages.

Introduce enum init_page_flags, the caller will init page count and mark
page reserved by passing INIT_PAGE_COUNT and INIT_PAGE_RESERVED.

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
---
v4: move the changes of __init_zone_device_page().
v3: Introduce enum init_page_flags.
v2: Introduce INIT_PAGE_COUNT and INIT_PAGE_RESERVED.
v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
---
 mm/hugetlb.c  |  2 +-
 mm/internal.h |  8 +++++++-
 mm/mm_init.c  | 24 +++++++++++++-----------
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a82dc37669b0..bb9c334a8392 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3196,7 +3196,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
 	for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
 		struct page *page = pfn_to_page(pfn);
 
-		__init_single_page(page, pfn, zone, nid);
+		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
 		prep_compound_tail((struct page *)folio, pfn - head_pfn);
 		ret = page_ref_freeze(page, 1);
 		VM_BUG_ON(!ret);
diff --git a/mm/internal.h b/mm/internal.h
index d7916f1e9e98..449891ad7fdb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1209,8 +1209,14 @@ struct vma_prepare {
 	struct vm_area_struct *remove2;
 };
 
+enum init_page_flags {
+	INIT_PAGE_COUNT    = (1 << 0),
+	INIT_PAGE_RESERVED = (1 << 1),
+};
+
 void __meminit __init_single_page(struct page *page, unsigned long pfn,
-				unsigned long zone, int nid);
+				  unsigned long zone, int nid,
+				  enum init_page_flags flags);
 
 /* shrinker related functions */
 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 06a72c223bce..9716c8a7ade9 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -557,11 +557,11 @@ static void __init find_zone_movable_pfns_for_nodes(void)
 }
 
 void __meminit __init_single_page(struct page *page, unsigned long pfn,
-				unsigned long zone, int nid)
+				  unsigned long zone, int nid,
+				  enum init_page_flags flags)
 {
 	mm_zero_struct_page(page);
 	set_page_links(page, zone, nid, pfn);
-	init_page_count(page);
 	page_mapcount_reset(page);
 	page_cpupid_reset_last(page);
 	page_kasan_tag_reset(page);
@@ -572,6 +572,10 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
 	if (!is_highmem_idx(zone))
 		set_page_address(page, __va(pfn << PAGE_SHIFT));
 #endif
+	if (flags & INIT_PAGE_COUNT)
+		init_page_count(page);
+	if (flags & INIT_PAGE_RESERVED)
+		__SetPageReserved(page);
 }
 
 #ifdef CONFIG_NUMA
@@ -714,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
 		if (zone_spans_pfn(zone, pfn))
 			break;
 	}
-	__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
+	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
 }
 #else
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
@@ -821,8 +825,8 @@ static void __init init_unavailable_range(unsigned long spfn,
 			pfn = pageblock_end_pfn(pfn) - 1;
 			continue;
 		}
-		__init_single_page(pfn_to_page(pfn), pfn, zone, node);
-		__SetPageReserved(pfn_to_page(pfn));
+		__init_single_page(pfn_to_page(pfn), pfn, zone, node,
+				   INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
 		pgcnt++;
 	}
 
@@ -884,7 +888,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
 		}
 
 		page = pfn_to_page(pfn);
-		__init_single_page(page, pfn, zone, nid);
+		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
 		if (context == MEMINIT_HOTPLUG)
 			__SetPageReserved(page);
 
@@ -967,9 +971,6 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
 					  unsigned long zone_idx, int nid,
 					  struct dev_pagemap *pgmap)
 {
-
-	__init_single_page(page, pfn, zone_idx, nid);
-
 	/*
 	 * Mark page reserved as it will need to wait for onlining
 	 * phase for it to be fully associated with a zone.
@@ -977,7 +978,8 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
 	 * We can use the non-atomic __set_bit operation for setting
 	 * the flag as we are still initializing the pages.
 	 */
-	__SetPageReserved(page);
+	__init_single_page(page, pfn, zone_idx, nid,
+			   INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
 
 	/*
 	 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
@@ -2058,7 +2060,7 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
 		} else {
 			page++;
 		}
-		__init_single_page(page, pfn, zid, nid);
+		__init_single_page(page, pfn, zid, nid, INIT_PAGE_COUNT);
 		nr_pages++;
 	}
 	return (nr_pages);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-28  8:33 [PATCH v4 0/2] mm: Don't set and reset page count in MEMINIT_EARLY Yajun Deng
  2023-09-28  8:33 ` [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page Yajun Deng
@ 2023-09-28  8:33 ` Yajun Deng
  2023-09-29  8:30   ` Mike Rapoport
  1 sibling, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-09-28  8:33 UTC (permalink / raw)
  To: akpm, rppt
  Cc: mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel,
	Yajun Deng

memmap_init_range() would init page count of all pages, but the free
pages count would be reset in __free_pages_core(). There are opposite
operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
context.

Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
and check the page count before reset it.

At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
need, as it already done in __init_single_page.

The following data was tested on an x86 machine with 190GB of RAM.

before:
free_low_memory_core_early()    341ms

after:
free_low_memory_core_early()    285ms

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
---
v4: same with v2.
v3: same with v2.
v2: check page count instead of check context before reset it.
v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
---
 mm/mm_init.c    | 18 +++++++++++++-----
 mm/page_alloc.c | 20 ++++++++++++--------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 9716c8a7ade9..3ab8861e1ef3 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
 		if (zone_spans_pfn(zone, pfn))
 			break;
 	}
-	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
+	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
 }
 #else
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
@@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
 
 			init_reserved_page(start_pfn, nid);
 
-			/* Avoid false-positive PageTail() */
-			INIT_LIST_HEAD(&page->lru);
+			/* Init page count for reserved region */
+			init_page_count(page);
 
 			/*
 			 * no need for atomic set_bit because the struct
@@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
 		}
 
 		page = pfn_to_page(pfn);
-		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
-		if (context == MEMINIT_HOTPLUG)
+
+		/* If the context is MEMINIT_EARLY, we will init page count and
+		 * mark page reserved in reserve_bootmem_region, the free region
+		 * wouldn't have page count and we will check the pages count
+		 * in __free_pages_core.
+		 */
+		__init_single_page(page, pfn, zone, nid, 0);
+		if (context == MEMINIT_HOTPLUG) {
+			init_page_count(page);
 			__SetPageReserved(page);
+		}
 
 		/*
 		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 06be8821d833..b868caabe8dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
 	unsigned int loop;
 
 	/*
-	 * When initializing the memmap, __init_single_page() sets the refcount
-	 * of all pages to 1 ("allocated"/"not free"). We have to set the
-	 * refcount of all involved pages to 0.
+	 * When initializing the memmap, memmap_init_range sets the refcount
+	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
+	 * have to set the refcount of all involved pages to 0. Otherwise,
+	 * we don't do it, as reserve_bootmem_region only set the refcount on
+	 * reserve region ("reserved") in early context.
 	 */
-	prefetchw(p);
-	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
-		prefetchw(p + 1);
+	if (page_count(page)) {
+		prefetchw(p);
+		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
+			prefetchw(p + 1);
+			__ClearPageReserved(p);
+			set_page_count(p, 0);
+		}
 		__ClearPageReserved(p);
 		set_page_count(p, 0);
 	}
-	__ClearPageReserved(p);
-	set_page_count(p, 0);
 
 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page
  2023-09-28  8:33 ` [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page Yajun Deng
@ 2023-09-29  8:19   ` Mike Rapoport
  2023-09-29  9:37     ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-09-29  8:19 UTC (permalink / raw)
  To: Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel

On Thu, Sep 28, 2023 at 04:33:01PM +0800, Yajun Deng wrote:
> Subject: mm: pass page count and reserved to __init_single_page

We add pass flags that tell __init_single_page() how to initialize page
count and PG_Reserved, I think a better subject would be:

mm: allow optional initialization of page count and PG_reserved flag

> When we init a single page, we need to mark that page reserved when it
> is reserved. And some pages need to reset page count, such as compound
> pages.
>
> Introduce enum init_page_flags, the caller will init page count and mark
> page reserved by passing INIT_PAGE_COUNT and INIT_PAGE_RESERVED.

This does not really describe why the change is needed. How about

__init_single_page() unconditionally resets page count which is unnecessary
for reserved pages. 

To allow skipping page count initialization and marking a page reserved in
one go add flags parameter to __init_single_page().

No functional changes.

> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> ---
> v4: move the changes of __init_zone_device_page().
> v3: Introduce enum init_page_flags.
> v2: Introduce INIT_PAGE_COUNT and INIT_PAGE_RESERVED.
> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
> ---
>  mm/hugetlb.c  |  2 +-
>  mm/internal.h |  8 +++++++-
>  mm/mm_init.c  | 24 +++++++++++++-----------
>  3 files changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index a82dc37669b0..bb9c334a8392 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3196,7 +3196,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
>  	for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
>  		struct page *page = pfn_to_page(pfn);
>  
> -		__init_single_page(page, pfn, zone, nid);
> +		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>  		prep_compound_tail((struct page *)folio, pfn - head_pfn);
>  		ret = page_ref_freeze(page, 1);
>  		VM_BUG_ON(!ret);
> diff --git a/mm/internal.h b/mm/internal.h
> index d7916f1e9e98..449891ad7fdb 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1209,8 +1209,14 @@ struct vma_prepare {
>  	struct vm_area_struct *remove2;
>  };
>  
> +enum init_page_flags {

enum page_init_flags please

> +	INIT_PAGE_COUNT    = (1 << 0),
> +	INIT_PAGE_RESERVED = (1 << 1),
> +};
> +
>  void __meminit __init_single_page(struct page *page, unsigned long pfn,
> -				unsigned long zone, int nid);
> +				  unsigned long zone, int nid,
> +				  enum init_page_flags flags);
>  
>  /* shrinker related functions */
>  unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 06a72c223bce..9716c8a7ade9 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -557,11 +557,11 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>  }
>  
>  void __meminit __init_single_page(struct page *page, unsigned long pfn,
> -				unsigned long zone, int nid)
> +				  unsigned long zone, int nid,
> +				  enum init_page_flags flags)
>  {
>  	mm_zero_struct_page(page);
>  	set_page_links(page, zone, nid, pfn);
> -	init_page_count(page);
>  	page_mapcount_reset(page);
>  	page_cpupid_reset_last(page);
>  	page_kasan_tag_reset(page);
> @@ -572,6 +572,10 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
>  	if (!is_highmem_idx(zone))
>  		set_page_address(page, __va(pfn << PAGE_SHIFT));
>  #endif
> +	if (flags & INIT_PAGE_COUNT)
> +		init_page_count(page);
> +	if (flags & INIT_PAGE_RESERVED)
> +		__SetPageReserved(page);
>  }
>  
>  #ifdef CONFIG_NUMA
> @@ -714,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>  		if (zone_spans_pfn(zone, pfn))
>  			break;
>  	}
> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);

There is __SetPageReserved call a few lines below, it can be folded here.

>  }
>  #else
>  static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> @@ -821,8 +825,8 @@ static void __init init_unavailable_range(unsigned long spfn,
>  			pfn = pageblock_end_pfn(pfn) - 1;
>  			continue;
>  		}
> -		__init_single_page(pfn_to_page(pfn), pfn, zone, node);
> -		__SetPageReserved(pfn_to_page(pfn));
> +		__init_single_page(pfn_to_page(pfn), pfn, zone, node,
> +				   INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
>  		pgcnt++;
>  	}
>  
> @@ -884,7 +888,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>  		}
>  
>  		page = pfn_to_page(pfn);
> -		__init_single_page(page, pfn, zone, nid);
> +		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>  		if (context == MEMINIT_HOTPLUG)
>  			__SetPageReserved(page);
>  
> @@ -967,9 +971,6 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
>  					  unsigned long zone_idx, int nid,
>  					  struct dev_pagemap *pgmap)
>  {
> -
> -	__init_single_page(page, pfn, zone_idx, nid);
> -
>  	/*
>  	 * Mark page reserved as it will need to wait for onlining
>  	 * phase for it to be fully associated with a zone.
> @@ -977,7 +978,8 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
>  	 * We can use the non-atomic __set_bit operation for setting
>  	 * the flag as we are still initializing the pages.
>  	 */
> -	__SetPageReserved(page);
> +	__init_single_page(page, pfn, zone_idx, nid,
> +			   INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
>  
>  	/*
>  	 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
> @@ -2058,7 +2060,7 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
>  		} else {
>  			page++;
>  		}
> -		__init_single_page(page, pfn, zid, nid);
> +		__init_single_page(page, pfn, zid, nid, INIT_PAGE_COUNT);
>  		nr_pages++;
>  	}
>  	return (nr_pages);
> -- 
> 2.25.1
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-28  8:33 ` [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY Yajun Deng
@ 2023-09-29  8:30   ` Mike Rapoport
  2023-09-29  9:50     ` Yajun Deng
  2023-10-02  8:30     ` David Hildenbrand
  0 siblings, 2 replies; 34+ messages in thread
From: Mike Rapoport @ 2023-09-29  8:30 UTC (permalink / raw)
  To: Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel

On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
> memmap_init_range() would init page count of all pages, but the free
> pages count would be reset in __free_pages_core(). There are opposite
> operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
> context.
> 
> Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
> and check the page count before reset it.
> 
> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
> need, as it already done in __init_single_page.
> 
> The following data was tested on an x86 machine with 190GB of RAM.
> 
> before:
> free_low_memory_core_early()    341ms
> 
> after:
> free_low_memory_core_early()    285ms
> 
> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> ---
> v4: same with v2.
> v3: same with v2.
> v2: check page count instead of check context before reset it.
> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
> ---
>  mm/mm_init.c    | 18 +++++++++++++-----
>  mm/page_alloc.c | 20 ++++++++++++--------
>  2 files changed, 25 insertions(+), 13 deletions(-)
> 
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 9716c8a7ade9..3ab8861e1ef3 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>  		if (zone_spans_pfn(zone, pfn))
>  			break;
>  	}
> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>  }
>  #else
>  static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
>  
>  			init_reserved_page(start_pfn, nid);
>  
> -			/* Avoid false-positive PageTail() */
> -			INIT_LIST_HEAD(&page->lru);
> +			/* Init page count for reserved region */

Please add a comment that describes _why_ we initialize the page count here.

> +			init_page_count(page);
>  
>  			/*
>  			 * no need for atomic set_bit because the struct
> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>  		}
>  
>  		page = pfn_to_page(pfn);
> -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
> -		if (context == MEMINIT_HOTPLUG)
> +
> +		/* If the context is MEMINIT_EARLY, we will init page count and
> +		 * mark page reserved in reserve_bootmem_region, the free region
> +		 * wouldn't have page count and we will check the pages count
> +		 * in __free_pages_core.
> +		 */
> +		__init_single_page(page, pfn, zone, nid, 0);
> +		if (context == MEMINIT_HOTPLUG) {
> +			init_page_count(page);
>  			__SetPageReserved(page);

Rather than calling init_page_count() and __SetPageReserved() for
MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
an call __init_single_page() after the check for MEMINIT_HOTPLUG.

But more generally, I wonder if we have to differentiate HOTPLUG here at all.
@David, can you comment please?

> +		}
>  
>  		/*
>  		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 06be8821d833..b868caabe8dc 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>  	unsigned int loop;
>  
>  	/*
> -	 * When initializing the memmap, __init_single_page() sets the refcount
> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
> -	 * refcount of all involved pages to 0.
> +	 * When initializing the memmap, memmap_init_range sets the refcount
> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
> +	 * have to set the refcount of all involved pages to 0. Otherwise,
> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
> +	 * reserve region ("reserved") in early context.
>  	 */

Again, why hotplug and early init should be different?

> -	prefetchw(p);
> -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> -		prefetchw(p + 1);
> +	if (page_count(page)) {
> +		prefetchw(p);
> +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> +			prefetchw(p + 1);
> +			__ClearPageReserved(p);
> +			set_page_count(p, 0);
> +		}
>  		__ClearPageReserved(p);
>  		set_page_count(p, 0);
>  	}
> -	__ClearPageReserved(p);
> -	set_page_count(p, 0);
>  
>  	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>  
> -- 
> 2.25.1
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page
  2023-09-29  8:19   ` Mike Rapoport
@ 2023-09-29  9:37     ` Yajun Deng
  0 siblings, 0 replies; 34+ messages in thread
From: Yajun Deng @ 2023-09-29  9:37 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel


On 2023/9/29 16:19, Mike Rapoport wrote:
> On Thu, Sep 28, 2023 at 04:33:01PM +0800, Yajun Deng wrote:
>> Subject: mm: pass page count and reserved to __init_single_page
> We add pass flags that tell __init_single_page() how to initialize page
> count and PG_Reserved, I think a better subject would be:
>
> mm: allow optional initialization of page count and PG_reserved flag
Okay.
>> When we init a single page, we need to mark that page reserved when it
>> is reserved. And some pages need to reset page count, such as compound
>> pages.
>>
>> Introduce enum init_page_flags, the caller will init page count and mark
>> page reserved by passing INIT_PAGE_COUNT and INIT_PAGE_RESERVED.
> This does not really describe why the change is needed. How about
>
> __init_single_page() unconditionally resets page count which is unnecessary
> for reserved pages.
>
> To allow skipping page count initialization and marking a page reserved in
> one go add flags parameter to __init_single_page().
>
> No functional changes.
Okay.
>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>> ---
>> v4: move the changes of __init_zone_device_page().
>> v3: Introduce enum init_page_flags.
>> v2: Introduce INIT_PAGE_COUNT and INIT_PAGE_RESERVED.
>> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>> ---
>>   mm/hugetlb.c  |  2 +-
>>   mm/internal.h |  8 +++++++-
>>   mm/mm_init.c  | 24 +++++++++++++-----------
>>   3 files changed, 21 insertions(+), 13 deletions(-)
>>
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index a82dc37669b0..bb9c334a8392 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -3196,7 +3196,7 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
>>   	for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
>>   		struct page *page = pfn_to_page(pfn);
>>   
>> -		__init_single_page(page, pfn, zone, nid);
>> +		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>   		prep_compound_tail((struct page *)folio, pfn - head_pfn);
>>   		ret = page_ref_freeze(page, 1);
>>   		VM_BUG_ON(!ret);
>> diff --git a/mm/internal.h b/mm/internal.h
>> index d7916f1e9e98..449891ad7fdb 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -1209,8 +1209,14 @@ struct vma_prepare {
>>   	struct vm_area_struct *remove2;
>>   };
>>   
>> +enum init_page_flags {
> enum page_init_flags please
Okay.
>> +	INIT_PAGE_COUNT    = (1 << 0),
>> +	INIT_PAGE_RESERVED = (1 << 1),
>> +};
>> +
>>   void __meminit __init_single_page(struct page *page, unsigned long pfn,
>> -				unsigned long zone, int nid);
>> +				  unsigned long zone, int nid,
>> +				  enum init_page_flags flags);
>>   
>>   /* shrinker related functions */
>>   unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>> index 06a72c223bce..9716c8a7ade9 100644
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -557,11 +557,11 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>>   }
>>   
>>   void __meminit __init_single_page(struct page *page, unsigned long pfn,
>> -				unsigned long zone, int nid)
>> +				  unsigned long zone, int nid,
>> +				  enum init_page_flags flags)
>>   {
>>   	mm_zero_struct_page(page);
>>   	set_page_links(page, zone, nid, pfn);
>> -	init_page_count(page);
>>   	page_mapcount_reset(page);
>>   	page_cpupid_reset_last(page);
>>   	page_kasan_tag_reset(page);
>> @@ -572,6 +572,10 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
>>   	if (!is_highmem_idx(zone))
>>   		set_page_address(page, __va(pfn << PAGE_SHIFT));
>>   #endif
>> +	if (flags & INIT_PAGE_COUNT)
>> +		init_page_count(page);
>> +	if (flags & INIT_PAGE_RESERVED)
>> +		__SetPageReserved(page);
>>   }
>>   
>>   #ifdef CONFIG_NUMA
>> @@ -714,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>>   		if (zone_spans_pfn(zone, pfn))
>>   			break;
>>   	}
>> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
>> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
> There is __SetPageReserved call a few lines below, it can be folded here.
>
No, There is a #ifdef in front of it. If so, I need to add 
__SetPageReserved to another init_reserved_page().

And there is an return before __init_single_page.
I will change INIT_PAGE_COUNT to 0 in next patch.

>>   }
>>   #else
>>   static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>> @@ -821,8 +825,8 @@ static void __init init_unavailable_range(unsigned long spfn,
>>   			pfn = pageblock_end_pfn(pfn) - 1;
>>   			continue;
>>   		}
>> -		__init_single_page(pfn_to_page(pfn), pfn, zone, node);
>> -		__SetPageReserved(pfn_to_page(pfn));
>> +		__init_single_page(pfn_to_page(pfn), pfn, zone, node,
>> +				   INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
>>   		pgcnt++;
>>   	}
>>   
>> @@ -884,7 +888,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>>   		}
>>   
>>   		page = pfn_to_page(pfn);
>> -		__init_single_page(page, pfn, zone, nid);
>> +		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>   		if (context == MEMINIT_HOTPLUG)
>>   			__SetPageReserved(page);
>>   
>> @@ -967,9 +971,6 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
>>   					  unsigned long zone_idx, int nid,
>>   					  struct dev_pagemap *pgmap)
>>   {
>> -
>> -	__init_single_page(page, pfn, zone_idx, nid);
>> -
>>   	/*
>>   	 * Mark page reserved as it will need to wait for onlining
>>   	 * phase for it to be fully associated with a zone.
>> @@ -977,7 +978,8 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
>>   	 * We can use the non-atomic __set_bit operation for setting
>>   	 * the flag as we are still initializing the pages.
>>   	 */
>> -	__SetPageReserved(page);
>> +	__init_single_page(page, pfn, zone_idx, nid,
>> +			   INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
>>   
>>   	/*
>>   	 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
>> @@ -2058,7 +2060,7 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
>>   		} else {
>>   			page++;
>>   		}
>> -		__init_single_page(page, pfn, zid, nid);
>> +		__init_single_page(page, pfn, zid, nid, INIT_PAGE_COUNT);
>>   		nr_pages++;
>>   	}
>>   	return (nr_pages);
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-29  8:30   ` Mike Rapoport
@ 2023-09-29  9:50     ` Yajun Deng
  2023-09-29 10:02       ` Mike Rapoport
  2023-10-02  8:30     ` David Hildenbrand
  1 sibling, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-09-29  9:50 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel


On 2023/9/29 16:30, Mike Rapoport wrote:
> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>> memmap_init_range() would init page count of all pages, but the free
>> pages count would be reset in __free_pages_core(). There are opposite
>> operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
>> context.
>>
>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
>> and check the page count before reset it.
>>
>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>> need, as it already done in __init_single_page.
>>
>> The following data was tested on an x86 machine with 190GB of RAM.
>>
>> before:
>> free_low_memory_core_early()    341ms
>>
>> after:
>> free_low_memory_core_early()    285ms
>>
>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>> ---
>> v4: same with v2.
>> v3: same with v2.
>> v2: check page count instead of check context before reset it.
>> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>> ---
>>   mm/mm_init.c    | 18 +++++++++++++-----
>>   mm/page_alloc.c | 20 ++++++++++++--------
>>   2 files changed, 25 insertions(+), 13 deletions(-)
>>
>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>> index 9716c8a7ade9..3ab8861e1ef3 100644
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>>   		if (zone_spans_pfn(zone, pfn))
>>   			break;
>>   	}
>> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
>> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>   }
>>   #else
>>   static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>> @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
>>   
>>   			init_reserved_page(start_pfn, nid);
>>   
>> -			/* Avoid false-positive PageTail() */
>> -			INIT_LIST_HEAD(&page->lru);
>> +			/* Init page count for reserved region */
> Please add a comment that describes _why_ we initialize the page count here.
Okay.
>
>> +			init_page_count(page);
>>   
>>   			/*
>>   			 * no need for atomic set_bit because the struct
>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>>   		}
>>   
>>   		page = pfn_to_page(pfn);
>> -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>> -		if (context == MEMINIT_HOTPLUG)
>> +
>> +		/* If the context is MEMINIT_EARLY, we will init page count and
>> +		 * mark page reserved in reserve_bootmem_region, the free region
>> +		 * wouldn't have page count and we will check the pages count
>> +		 * in __free_pages_core.
>> +		 */
>> +		__init_single_page(page, pfn, zone, nid, 0);
>> +		if (context == MEMINIT_HOTPLUG) {
>> +			init_page_count(page);
>>   			__SetPageReserved(page);
> Rather than calling init_page_count() and __SetPageReserved() for
> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
> an call __init_single_page() after the check for MEMINIT_HOTPLUG.

No, the following code would cost more time than the current code in 
memmap_init().

if (context == MEMINIT_HOTPLUG)

	__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
else
	
	__init_single_page(page, pfn, zone, nid, 0);

> But more generally, I wonder if we have to differentiate HOTPLUG here at all.
> @David, can you comment please?
>
>> +		}
>>   
>>   		/*
>>   		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 06be8821d833..b868caabe8dc 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>>   	unsigned int loop;
>>   
>>   	/*
>> -	 * When initializing the memmap, __init_single_page() sets the refcount
>> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
>> -	 * refcount of all involved pages to 0.
>> +	 * When initializing the memmap, memmap_init_range sets the refcount
>> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
>> +	 * have to set the refcount of all involved pages to 0. Otherwise,
>> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
>> +	 * reserve region ("reserved") in early context.
>>   	 */
> Again, why hotplug and early init should be different?
I will add a comment that describes it will save boot time.
>
>> -	prefetchw(p);
>> -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>> -		prefetchw(p + 1);
>> +	if (page_count(page)) {
>> +		prefetchw(p);
>> +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>> +			prefetchw(p + 1);
>> +			__ClearPageReserved(p);
>> +			set_page_count(p, 0);
>> +		}
>>   		__ClearPageReserved(p);
>>   		set_page_count(p, 0);
>>   	}
>> -	__ClearPageReserved(p);
>> -	set_page_count(p, 0);
>>   
>>   	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>>   
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-29  9:50     ` Yajun Deng
@ 2023-09-29 10:02       ` Mike Rapoport
  2023-09-29 10:27         ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-09-29 10:02 UTC (permalink / raw)
  To: Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel

On Fri, Sep 29, 2023 at 05:50:40PM +0800, Yajun Deng wrote:
> 
> On 2023/9/29 16:30, Mike Rapoport wrote:
> > On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
> > > memmap_init_range() would init page count of all pages, but the free
> > > pages count would be reset in __free_pages_core(). There are opposite
> > > operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
> > > context.
> > > 
> > > Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
> > > and check the page count before reset it.
> > > 
> > > At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
> > > need, as it already done in __init_single_page.
> > > 
> > > The following data was tested on an x86 machine with 190GB of RAM.
> > > 
> > > before:
> > > free_low_memory_core_early()    341ms
> > > 
> > > after:
> > > free_low_memory_core_early()    285ms
> > > 
> > > Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> > > ---
> > > v4: same with v2.
> > > v3: same with v2.
> > > v2: check page count instead of check context before reset it.
> > > v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
> > > ---
> > >   mm/mm_init.c    | 18 +++++++++++++-----
> > >   mm/page_alloc.c | 20 ++++++++++++--------
> > >   2 files changed, 25 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > > index 9716c8a7ade9..3ab8861e1ef3 100644
> > > --- a/mm/mm_init.c
> > > +++ b/mm/mm_init.c
> > > @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
> > >   		if (zone_spans_pfn(zone, pfn))
> > >   			break;
> > >   	}
> > > -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
> > > +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
> > >   }
> > >   #else
> > >   static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> > > @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
> > >   			init_reserved_page(start_pfn, nid);
> > > -			/* Avoid false-positive PageTail() */
> > > -			INIT_LIST_HEAD(&page->lru);
> > > +			/* Init page count for reserved region */
> > Please add a comment that describes _why_ we initialize the page count here.
> Okay.
> > 
> > > +			init_page_count(page);
> > >   			/*
> > >   			 * no need for atomic set_bit because the struct
> > > @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
> > >   		}
> > >   		page = pfn_to_page(pfn);
> > > -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
> > > -		if (context == MEMINIT_HOTPLUG)
> > > +
> > > +		/* If the context is MEMINIT_EARLY, we will init page count and
> > > +		 * mark page reserved in reserve_bootmem_region, the free region
> > > +		 * wouldn't have page count and we will check the pages count
> > > +		 * in __free_pages_core.
> > > +		 */
> > > +		__init_single_page(page, pfn, zone, nid, 0);
> > > +		if (context == MEMINIT_HOTPLUG) {
> > > +			init_page_count(page);
> > >   			__SetPageReserved(page);
> > Rather than calling init_page_count() and __SetPageReserved() for
> > MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
> > an call __init_single_page() after the check for MEMINIT_HOTPLUG.
> 
> No, the following code would cost more time than the current code in
> memmap_init().
> 
> if (context == MEMINIT_HOTPLUG)
> 
> 	__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
> else
> 	
> 	__init_single_page(page, pfn, zone, nid, 0);

Sorry if I wasn't clear. What I meant was to have something along these lines:

	enum page_init_flags flags = 0;

	if (context == MEMINIT_HOTPLUG)
		flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
	__init_single_page(page, pfn, zone, nid, flags);

> > But more generally, I wonder if we have to differentiate HOTPLUG here at all.
> > @David, can you comment please?
> > 
> > > +		}
> > >   		/*
> > >   		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
> > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > index 06be8821d833..b868caabe8dc 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
> > >   	unsigned int loop;
> > >   	/*
> > > -	 * When initializing the memmap, __init_single_page() sets the refcount
> > > -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
> > > -	 * refcount of all involved pages to 0.
> > > +	 * When initializing the memmap, memmap_init_range sets the refcount
> > > +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
> > > +	 * have to set the refcount of all involved pages to 0. Otherwise,
> > > +	 * we don't do it, as reserve_bootmem_region only set the refcount on
> > > +	 * reserve region ("reserved") in early context.
> > >   	 */
> > Again, why hotplug and early init should be different?
> I will add a comment that describes it will save boot time.

But why do we need initialize struct pages differently at boot time vs
memory hotplug?
Is there a reason memory hotplug cannot have page count set to 0 just like
for pages reserved at boot time?
 
> > > -	prefetchw(p);
> > > -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> > > -		prefetchw(p + 1);
> > > +	if (page_count(page)) {
> > > +		prefetchw(p);
> > > +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> > > +			prefetchw(p + 1);
> > > +			__ClearPageReserved(p);
> > > +			set_page_count(p, 0);
> > > +		}
> > >   		__ClearPageReserved(p);
> > >   		set_page_count(p, 0);
> > >   	}
> > > -	__ClearPageReserved(p);
> > > -	set_page_count(p, 0);
> > >   	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
> > > -- 
> > > 2.25.1
> > > 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-29 10:02       ` Mike Rapoport
@ 2023-09-29 10:27         ` Yajun Deng
  2023-10-01 18:59           ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-09-29 10:27 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel


On 2023/9/29 18:02, Mike Rapoport wrote:
> On Fri, Sep 29, 2023 at 05:50:40PM +0800, Yajun Deng wrote:
>> On 2023/9/29 16:30, Mike Rapoport wrote:
>>> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>>>> memmap_init_range() would init page count of all pages, but the free
>>>> pages count would be reset in __free_pages_core(). There are opposite
>>>> operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
>>>> context.
>>>>
>>>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
>>>> and check the page count before reset it.
>>>>
>>>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>>>> need, as it already done in __init_single_page.
>>>>
>>>> The following data was tested on an x86 machine with 190GB of RAM.
>>>>
>>>> before:
>>>> free_low_memory_core_early()    341ms
>>>>
>>>> after:
>>>> free_low_memory_core_early()    285ms
>>>>
>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>> ---
>>>> v4: same with v2.
>>>> v3: same with v2.
>>>> v2: check page count instead of check context before reset it.
>>>> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>> ---
>>>>    mm/mm_init.c    | 18 +++++++++++++-----
>>>>    mm/page_alloc.c | 20 ++++++++++++--------
>>>>    2 files changed, 25 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>>> index 9716c8a7ade9..3ab8861e1ef3 100644
>>>> --- a/mm/mm_init.c
>>>> +++ b/mm/mm_init.c
>>>> @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>>>>    		if (zone_spans_pfn(zone, pfn))
>>>>    			break;
>>>>    	}
>>>> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
>>>> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>>>    }
>>>>    #else
>>>>    static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>>>> @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
>>>>    			init_reserved_page(start_pfn, nid);
>>>> -			/* Avoid false-positive PageTail() */
>>>> -			INIT_LIST_HEAD(&page->lru);
>>>> +			/* Init page count for reserved region */
>>> Please add a comment that describes _why_ we initialize the page count here.
>> Okay.
>>>> +			init_page_count(page);
>>>>    			/*
>>>>    			 * no need for atomic set_bit because the struct
>>>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>>>>    		}
>>>>    		page = pfn_to_page(pfn);
>>>> -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>>> -		if (context == MEMINIT_HOTPLUG)
>>>> +
>>>> +		/* If the context is MEMINIT_EARLY, we will init page count and
>>>> +		 * mark page reserved in reserve_bootmem_region, the free region
>>>> +		 * wouldn't have page count and we will check the pages count
>>>> +		 * in __free_pages_core.
>>>> +		 */
>>>> +		__init_single_page(page, pfn, zone, nid, 0);
>>>> +		if (context == MEMINIT_HOTPLUG) {
>>>> +			init_page_count(page);
>>>>    			__SetPageReserved(page);
>>> Rather than calling init_page_count() and __SetPageReserved() for
>>> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
>>> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
>> No, the following code would cost more time than the current code in
>> memmap_init().
>>
>> if (context == MEMINIT_HOTPLUG)
>>
>> 	__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
>> else
>> 	
>> 	__init_single_page(page, pfn, zone, nid, 0);
> Sorry if I wasn't clear. What I meant was to have something along these lines:
>
> 	enum page_init_flags flags = 0;
>
> 	if (context == MEMINIT_HOTPLUG)
> 		flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
> 	__init_single_page(page, pfn, zone, nid, flags);
>
Okay, I'll test the time consumed in memmap_init().
>>> But more generally, I wonder if we have to differentiate HOTPLUG here at all.
>>> @David, can you comment please?
>>>
>>>> +		}
>>>>    		/*
>>>>    		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>> index 06be8821d833..b868caabe8dc 100644
>>>> --- a/mm/page_alloc.c
>>>> +++ b/mm/page_alloc.c
>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>>>>    	unsigned int loop;
>>>>    	/*
>>>> -	 * When initializing the memmap, __init_single_page() sets the refcount
>>>> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
>>>> -	 * refcount of all involved pages to 0.
>>>> +	 * When initializing the memmap, memmap_init_range sets the refcount
>>>> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
>>>> +	 * have to set the refcount of all involved pages to 0. Otherwise,
>>>> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
>>>> +	 * reserve region ("reserved") in early context.
>>>>    	 */
>>> Again, why hotplug and early init should be different?
>> I will add a comment that describes it will save boot time.
> But why do we need initialize struct pages differently at boot time vs
> memory hotplug?
> Is there a reason memory hotplug cannot have page count set to 0 just like
> for pages reserved at boot time?
>   

This patch just save boot time in MEMINIT_EARLY. If someone finds out 
that it can save time in

MEMINIT_HOTPLUG, I think it can be done in another patch later. I just 
keeping it in the same.

>>>> -	prefetchw(p);
>>>> -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>> -		prefetchw(p + 1);
>>>> +	if (page_count(page)) {
>>>> +		prefetchw(p);
>>>> +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>> +			prefetchw(p + 1);
>>>> +			__ClearPageReserved(p);
>>>> +			set_page_count(p, 0);
>>>> +		}
>>>>    		__ClearPageReserved(p);
>>>>    		set_page_count(p, 0);
>>>>    	}
>>>> -	__ClearPageReserved(p);
>>>> -	set_page_count(p, 0);
>>>>    	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>>>> -- 
>>>> 2.25.1
>>>>


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-29 10:27         ` Yajun Deng
@ 2023-10-01 18:59           ` Mike Rapoport
  2023-10-02  7:03             ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-01 18:59 UTC (permalink / raw)
  To: Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel

On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
> 
> On 2023/9/29 18:02, Mike Rapoport wrote:
> > On Fri, Sep 29, 2023 at 05:50:40PM +0800, Yajun Deng wrote:
> > > On 2023/9/29 16:30, Mike Rapoport wrote:
> > > > On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
> > > > > memmap_init_range() would init page count of all pages, but the free
> > > > > pages count would be reset in __free_pages_core(). There are opposite
> > > > > operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
> > > > > context.
> > > > > 
> > > > > Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
> > > > > and check the page count before reset it.
> > > > > 
> > > > > At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
> > > > > need, as it already done in __init_single_page.
> > > > > 
> > > > > The following data was tested on an x86 machine with 190GB of RAM.
> > > > > 
> > > > > before:
> > > > > free_low_memory_core_early()    341ms
> > > > > 
> > > > > after:
> > > > > free_low_memory_core_early()    285ms
> > > > > 
> > > > > Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> > > > > ---
> > > > > v4: same with v2.
> > > > > v3: same with v2.
> > > > > v2: check page count instead of check context before reset it.
> > > > > v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
> > > > > ---
> > > > >    mm/mm_init.c    | 18 +++++++++++++-----
> > > > >    mm/page_alloc.c | 20 ++++++++++++--------
> > > > >    2 files changed, 25 insertions(+), 13 deletions(-)
> > > > > 
> > > > > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > > > > index 9716c8a7ade9..3ab8861e1ef3 100644
> > > > > --- a/mm/mm_init.c
> > > > > +++ b/mm/mm_init.c
> > > > > @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
> > > > >    		if (zone_spans_pfn(zone, pfn))
> > > > >    			break;
> > > > >    	}
> > > > > -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
> > > > > +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
> > > > >    }
> > > > >    #else
> > > > >    static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> > > > > @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
> > > > >    			init_reserved_page(start_pfn, nid);
> > > > > -			/* Avoid false-positive PageTail() */
> > > > > -			INIT_LIST_HEAD(&page->lru);
> > > > > +			/* Init page count for reserved region */
> > > > Please add a comment that describes _why_ we initialize the page count here.
> > > Okay.
> > > > > +			init_page_count(page);
> > > > >    			/*
> > > > >    			 * no need for atomic set_bit because the struct
> > > > > @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
> > > > >    		}
> > > > >    		page = pfn_to_page(pfn);
> > > > > -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
> > > > > -		if (context == MEMINIT_HOTPLUG)
> > > > > +
> > > > > +		/* If the context is MEMINIT_EARLY, we will init page count and
> > > > > +		 * mark page reserved in reserve_bootmem_region, the free region
> > > > > +		 * wouldn't have page count and we will check the pages count
> > > > > +		 * in __free_pages_core.
> > > > > +		 */
> > > > > +		__init_single_page(page, pfn, zone, nid, 0);
> > > > > +		if (context == MEMINIT_HOTPLUG) {
> > > > > +			init_page_count(page);
> > > > >    			__SetPageReserved(page);
> > > > Rather than calling init_page_count() and __SetPageReserved() for
> > > > MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
> > > > an call __init_single_page() after the check for MEMINIT_HOTPLUG.
> > > No, the following code would cost more time than the current code in
> > > memmap_init().
> > > 
> > > if (context == MEMINIT_HOTPLUG)
> > > 
> > > 	__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
> > > else
> > > 	
> > > 	__init_single_page(page, pfn, zone, nid, 0);
> > Sorry if I wasn't clear. What I meant was to have something along these lines:
> > 
> > 	enum page_init_flags flags = 0;
> > 
> > 	if (context == MEMINIT_HOTPLUG)
> > 		flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
> > 	__init_single_page(page, pfn, zone, nid, flags);
> > 
> Okay, I'll test the time consumed in memmap_init().
> > > > But more generally, I wonder if we have to differentiate HOTPLUG here at all.
> > > > @David, can you comment please?
> > > > 
> > > > > +		}
> > > > >    		/*
> > > > >    		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
> > > > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > > > index 06be8821d833..b868caabe8dc 100644
> > > > > --- a/mm/page_alloc.c
> > > > > +++ b/mm/page_alloc.c
> > > > > @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
> > > > >    	unsigned int loop;
> > > > >    	/*
> > > > > -	 * When initializing the memmap, __init_single_page() sets the refcount
> > > > > -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
> > > > > -	 * refcount of all involved pages to 0.
> > > > > +	 * When initializing the memmap, memmap_init_range sets the refcount
> > > > > +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
> > > > > +	 * have to set the refcount of all involved pages to 0. Otherwise,
> > > > > +	 * we don't do it, as reserve_bootmem_region only set the refcount on
> > > > > +	 * reserve region ("reserved") in early context.
> > > > >    	 */
> > > > Again, why hotplug and early init should be different?
> > > I will add a comment that describes it will save boot time.
> > But why do we need initialize struct pages differently at boot time vs
> > memory hotplug?
> > Is there a reason memory hotplug cannot have page count set to 0 just like
> > for pages reserved at boot time?
> 
> This patch just save boot time in MEMINIT_EARLY. If someone finds out that
> it can save time in
> 
> MEMINIT_HOTPLUG, I think it can be done in another patch later. I just
> keeping it in the same.

But it's not the same. It becomes slower after your patch and the code that
frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes non-uniform
for no apparent reason.

The if (page_count(page)) is really non-obvious...
 
> > > > > -	prefetchw(p);
> > > > > -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> > > > > -		prefetchw(p + 1);
> > > > > +	if (page_count(page)) {
> > > > > +		prefetchw(p);
> > > > > +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> > > > > +			prefetchw(p + 1);
> > > > > +			__ClearPageReserved(p);
> > > > > +			set_page_count(p, 0);
> > > > > +		}
> > > > >    		__ClearPageReserved(p);
> > > > >    		set_page_count(p, 0);
> > > > >    	}
> > > > > -	__ClearPageReserved(p);
> > > > > -	set_page_count(p, 0);
> > > > >    	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
> > > > > -- 
> > > > > 2.25.1
> > > > > 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-01 18:59           ` Mike Rapoport
@ 2023-10-02  7:03             ` Yajun Deng
  2023-10-02  8:47               ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-02  7:03 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel


On 2023/10/2 02:59, Mike Rapoport wrote:
> On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
>> On 2023/9/29 18:02, Mike Rapoport wrote:
>>> On Fri, Sep 29, 2023 at 05:50:40PM +0800, Yajun Deng wrote:
>>>> On 2023/9/29 16:30, Mike Rapoport wrote:
>>>>> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>>>>>> memmap_init_range() would init page count of all pages, but the free
>>>>>> pages count would be reset in __free_pages_core(). There are opposite
>>>>>> operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
>>>>>> context.
>>>>>>
>>>>>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
>>>>>> and check the page count before reset it.
>>>>>>
>>>>>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>>>>>> need, as it already done in __init_single_page.
>>>>>>
>>>>>> The following data was tested on an x86 machine with 190GB of RAM.
>>>>>>
>>>>>> before:
>>>>>> free_low_memory_core_early()    341ms
>>>>>>
>>>>>> after:
>>>>>> free_low_memory_core_early()    285ms
>>>>>>
>>>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>>>> ---
>>>>>> v4: same with v2.
>>>>>> v3: same with v2.
>>>>>> v2: check page count instead of check context before reset it.
>>>>>> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>>>> ---
>>>>>>     mm/mm_init.c    | 18 +++++++++++++-----
>>>>>>     mm/page_alloc.c | 20 ++++++++++++--------
>>>>>>     2 files changed, 25 insertions(+), 13 deletions(-)
>>>>>>
>>>>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>>>>> index 9716c8a7ade9..3ab8861e1ef3 100644
>>>>>> --- a/mm/mm_init.c
>>>>>> +++ b/mm/mm_init.c
>>>>>> @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>>>>>>     		if (zone_spans_pfn(zone, pfn))
>>>>>>     			break;
>>>>>>     	}
>>>>>> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
>>>>>> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>>>>>     }
>>>>>>     #else
>>>>>>     static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>>>>>> @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
>>>>>>     			init_reserved_page(start_pfn, nid);
>>>>>> -			/* Avoid false-positive PageTail() */
>>>>>> -			INIT_LIST_HEAD(&page->lru);
>>>>>> +			/* Init page count for reserved region */
>>>>> Please add a comment that describes _why_ we initialize the page count here.
>>>> Okay.
>>>>>> +			init_page_count(page);
>>>>>>     			/*
>>>>>>     			 * no need for atomic set_bit because the struct
>>>>>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>>>>>>     		}
>>>>>>     		page = pfn_to_page(pfn);
>>>>>> -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>>>>> -		if (context == MEMINIT_HOTPLUG)
>>>>>> +
>>>>>> +		/* If the context is MEMINIT_EARLY, we will init page count and
>>>>>> +		 * mark page reserved in reserve_bootmem_region, the free region
>>>>>> +		 * wouldn't have page count and we will check the pages count
>>>>>> +		 * in __free_pages_core.
>>>>>> +		 */
>>>>>> +		__init_single_page(page, pfn, zone, nid, 0);
>>>>>> +		if (context == MEMINIT_HOTPLUG) {
>>>>>> +			init_page_count(page);
>>>>>>     			__SetPageReserved(page);
>>>>> Rather than calling init_page_count() and __SetPageReserved() for
>>>>> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
>>>>> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
>>>> No, the following code would cost more time than the current code in
>>>> memmap_init().
>>>>
>>>> if (context == MEMINIT_HOTPLUG)
>>>>
>>>> 	__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT | INIT_PAGE_RESERVED);
>>>> else
>>>> 	
>>>> 	__init_single_page(page, pfn, zone, nid, 0);
>>> Sorry if I wasn't clear. What I meant was to have something along these lines:
>>>
>>> 	enum page_init_flags flags = 0;
>>>
>>> 	if (context == MEMINIT_HOTPLUG)
>>> 		flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
>>> 	__init_single_page(page, pfn, zone, nid, flags);
>>>
>> Okay, I'll test the time consumed in memmap_init().
>>>>> But more generally, I wonder if we have to differentiate HOTPLUG here at all.
>>>>> @David, can you comment please?
>>>>>
>>>>>> +		}
>>>>>>     		/*
>>>>>>     		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>>>> index 06be8821d833..b868caabe8dc 100644
>>>>>> --- a/mm/page_alloc.c
>>>>>> +++ b/mm/page_alloc.c
>>>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>>>>>>     	unsigned int loop;
>>>>>>     	/*
>>>>>> -	 * When initializing the memmap, __init_single_page() sets the refcount
>>>>>> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
>>>>>> -	 * refcount of all involved pages to 0.
>>>>>> +	 * When initializing the memmap, memmap_init_range sets the refcount
>>>>>> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
>>>>>> +	 * have to set the refcount of all involved pages to 0. Otherwise,
>>>>>> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
>>>>>> +	 * reserve region ("reserved") in early context.
>>>>>>     	 */
>>>>> Again, why hotplug and early init should be different?
>>>> I will add a comment that describes it will save boot time.
>>> But why do we need initialize struct pages differently at boot time vs
>>> memory hotplug?
>>> Is there a reason memory hotplug cannot have page count set to 0 just like
>>> for pages reserved at boot time?
>> This patch just save boot time in MEMINIT_EARLY. If someone finds out that
>> it can save time in
>>
>> MEMINIT_HOTPLUG, I think it can be done in another patch later. I just
>> keeping it in the same.
> But it's not the same. It becomes slower after your patch and the code that
> frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes non-uniform
> for no apparent reason.

__free_pages_core will also be called by others, such as:
deferred_free_range, do_collection and memblock_free_late.

We couldn't remove  'if (page_count(page))' even if we set page count to 
0 when MEMINIT_HOTPLUG.

The following data is the time consumed in online_pages_range().

start_pfn   nr_pages    before   after
0x680000    0x80000     4.315ms  4.461ms
0x700000    0x80000     4.468ms  4.440ms
0x780000    0x80000     4.295ms  3.835ms
0x800000    0x80000     3.928ms  4.377ms
0x880000    0x80000     4.321ms  4.378ms
0x900000    0x80000     4.448ms  3.964ms
0x980000    0x80000     4.000ms  4.381ms
0xa00000    0x80000     4.459ms  4.255ms
sum                     34.234ms  34.091ms

As we can see, it doesn't become slower with ' if (page_count(page))' 
when MEMINIT_HOTPLUG.

> The if (page_count(page)) is really non-obvious...
>   
>>>>>> -	prefetchw(p);
>>>>>> -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>>>> -		prefetchw(p + 1);
>>>>>> +	if (page_count(page)) {
>>>>>> +		prefetchw(p);
>>>>>> +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>>>> +			prefetchw(p + 1);
>>>>>> +			__ClearPageReserved(p);
>>>>>> +			set_page_count(p, 0);
>>>>>> +		}
>>>>>>     		__ClearPageReserved(p);
>>>>>>     		set_page_count(p, 0);
>>>>>>     	}
>>>>>> -	__ClearPageReserved(p);
>>>>>> -	set_page_count(p, 0);
>>>>>>     	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>>>>>> -- 
>>>>>> 2.25.1
>>>>>>


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-09-29  8:30   ` Mike Rapoport
  2023-09-29  9:50     ` Yajun Deng
@ 2023-10-02  8:30     ` David Hildenbrand
  2023-10-08  8:57       ` Yajun Deng
  1 sibling, 1 reply; 34+ messages in thread
From: David Hildenbrand @ 2023-10-02  8:30 UTC (permalink / raw)
  To: Mike Rapoport, Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel

On 29.09.23 10:30, Mike Rapoport wrote:
> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>> memmap_init_range() would init page count of all pages, but the free
>> pages count would be reset in __free_pages_core(). There are opposite
>> operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
>> context.
>>
>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY context,
>> and check the page count before reset it.
>>
>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>> need, as it already done in __init_single_page.
>>
>> The following data was tested on an x86 machine with 190GB of RAM.
>>
>> before:
>> free_low_memory_core_early()    341ms
>>
>> after:
>> free_low_memory_core_early()    285ms
>>
>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>> ---
>> v4: same with v2.
>> v3: same with v2.
>> v2: check page count instead of check context before reset it.
>> v1: https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>> ---
>>   mm/mm_init.c    | 18 +++++++++++++-----
>>   mm/page_alloc.c | 20 ++++++++++++--------
>>   2 files changed, 25 insertions(+), 13 deletions(-)
>>
>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>> index 9716c8a7ade9..3ab8861e1ef3 100644
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -718,7 +718,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid)
>>   		if (zone_spans_pfn(zone, pfn))
>>   			break;
>>   	}
>> -	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, INIT_PAGE_COUNT);
>> +	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>   }
>>   #else
>>   static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>> @@ -756,8 +756,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
>>   
>>   			init_reserved_page(start_pfn, nid);
>>   
>> -			/* Avoid false-positive PageTail() */
>> -			INIT_LIST_HEAD(&page->lru);
>> +			/* Init page count for reserved region */
> 
> Please add a comment that describes _why_ we initialize the page count here.
> 
>> +			init_page_count(page);
>>   
>>   			/*
>>   			 * no need for atomic set_bit because the struct
>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
>>   		}
>>   
>>   		page = pfn_to_page(pfn);
>> -		__init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>> -		if (context == MEMINIT_HOTPLUG)
>> +
>> +		/* If the context is MEMINIT_EARLY, we will init page count and
>> +		 * mark page reserved in reserve_bootmem_region, the free region
>> +		 * wouldn't have page count and we will check the pages count
>> +		 * in __free_pages_core.
>> +		 */
>> +		__init_single_page(page, pfn, zone, nid, 0);
>> +		if (context == MEMINIT_HOTPLUG) {
>> +			init_page_count(page);
>>   			__SetPageReserved(page);
> 
> Rather than calling init_page_count() and __SetPageReserved() for
> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | INIT_PAGE_RESERVED
> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
> 
> But more generally, I wonder if we have to differentiate HOTPLUG here at all.
> @David, can you comment please?

There are a lot of details to that, and I'll share some I can briefly think of.

1) __SetPageReserved()

I tried removing that a while ago, but there was a blocker (IIRC something about
ZONE_DEVICE). I still have the patches at [1] and I could probably take a look
if that blocker still exists (I recall that something changed at some point, but
I never had the time to follow up).

But once we stop setting the pages reserved, we might run into issues with ...


2) init_page_count()

virtio-mem, XEN balloon and HV-balloon add memory blocks that can contain holes.
set_online_page_callback() is used to intercept memory onlining and to expose
only the pages that are not holes to the buddy: calling generic_online_page() on !hole.

Holes are PageReserved but with an initialized page count. Memory offlining will fail on
PageReserved pages -- has_unmovable_pages().


At least virtio-mem clears the PageReserved flag of holes when onlining memory,
and currently relies in the page count to be reasonable (so memory offlining can work).

static void virtio_mem_set_fake_offline(unsigned long pfn,
					unsigned long nr_pages, bool onlined)
{
	page_offline_begin();
	for (; nr_pages--; pfn++) {
		struct page *page = pfn_to_page(pfn);

		__SetPageOffline(page);
		if (!onlined) {
			SetPageDirty(page);
			/* FIXME: remove after cleanups */
			ClearPageReserved(page);
		}
	}
	page_offline_end();
}


For virtio-mem, we could initialize the page count there instead. The other PV drivers
might require a bit more thought.


[1] https://github.com/davidhildenbrand/linux/tree/online_reserved_cleanup

> 
>> +		}
>>   
>>   		/*
>>   		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 06be8821d833..b868caabe8dc 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>>   	unsigned int loop;
>>   
>>   	/*
>> -	 * When initializing the memmap, __init_single_page() sets the refcount
>> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
>> -	 * refcount of all involved pages to 0.
>> +	 * When initializing the memmap, memmap_init_range sets the refcount
>> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
>> +	 * have to set the refcount of all involved pages to 0. Otherwise,
>> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
>> +	 * reserve region ("reserved") in early context.
>>   	 */
> 
> Again, why hotplug and early init should be different?
> 
>> -	prefetchw(p);
>> -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>> -		prefetchw(p + 1);
>> +	if (page_count(page)) {
>> +		prefetchw(p);
>> +		for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>> +			prefetchw(p + 1);
>> +			__ClearPageReserved(p);
>> +			set_page_count(p, 0);
>> +		}
>>   		__ClearPageReserved(p);
>>   		set_page_count(p, 0);

That looks wrong. if the page count would by pure luck be 0 already for hotplugged memory,
you wouldn't clear the reserved flag.

These changes make me a bit nervous.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-02  7:03             ` Yajun Deng
@ 2023-10-02  8:47               ` Mike Rapoport
  2023-10-02  8:56                 ` David Hildenbrand
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-02  8:47 UTC (permalink / raw)
  To: Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, david, linux-mm, linux-kernel

On Mon, Oct 02, 2023 at 03:03:56PM +0800, Yajun Deng wrote:
> 
> On 2023/10/2 02:59, Mike Rapoport wrote:
> > On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
> > > On 2023/9/29 18:02, Mike Rapoport wrote:
> > > > > > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > > > > > index 06be8821d833..b868caabe8dc 100644
> > > > > > > --- a/mm/page_alloc.c
> > > > > > > +++ b/mm/page_alloc.c
> > > > > > > @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
> > > > > > >     	unsigned int loop;
> > > > > > >     	/*
> > > > > > > -	 * When initializing the memmap, __init_single_page() sets the refcount
> > > > > > > -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
> > > > > > > -	 * refcount of all involved pages to 0.
> > > > > > > +	 * When initializing the memmap, memmap_init_range sets the refcount
> > > > > > > +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
> > > > > > > +	 * have to set the refcount of all involved pages to 0. Otherwise,
> > > > > > > +	 * we don't do it, as reserve_bootmem_region only set the refcount on
> > > > > > > +	 * reserve region ("reserved") in early context.
> > > > > > >     	 */
> > > > > > Again, why hotplug and early init should be different?
> > > > > I will add a comment that describes it will save boot time.
> > > > But why do we need initialize struct pages differently at boot time vs
> > > > memory hotplug?
> > > > Is there a reason memory hotplug cannot have page count set to 0 just like
> > > > for pages reserved at boot time?
> > > This patch just save boot time in MEMINIT_EARLY. If someone finds out that
> > > it can save time in
> > > 
> > > MEMINIT_HOTPLUG, I think it can be done in another patch later. I just
> > > keeping it in the same.
> > But it's not the same. It becomes slower after your patch and the code that
> > frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes non-uniform
> > for no apparent reason.
> 
> __free_pages_core will also be called by others, such as:
> deferred_free_range, do_collection and memblock_free_late.
> 
> We couldn't remove  'if (page_count(page))' even if we set page count to 0
> when MEMINIT_HOTPLUG.

That 'if' breaks the invariant that __free_pages_core is always called for
pages with initialized page count. Adding it may lead to subtle bugs and
random memory corruption so we don't want to add it at the first place.

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-02  8:47               ` Mike Rapoport
@ 2023-10-02  8:56                 ` David Hildenbrand
  2023-10-02 11:10                   ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: David Hildenbrand @ 2023-10-02  8:56 UTC (permalink / raw)
  To: Mike Rapoport, Yajun Deng
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel

On 02.10.23 10:47, Mike Rapoport wrote:
> On Mon, Oct 02, 2023 at 03:03:56PM +0800, Yajun Deng wrote:
>>
>> On 2023/10/2 02:59, Mike Rapoport wrote:
>>> On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
>>>> On 2023/9/29 18:02, Mike Rapoport wrote:
>>>>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>>>>>> index 06be8821d833..b868caabe8dc 100644
>>>>>>>> --- a/mm/page_alloc.c
>>>>>>>> +++ b/mm/page_alloc.c
>>>>>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>>>>>>>>      	unsigned int loop;
>>>>>>>>      	/*
>>>>>>>> -	 * When initializing the memmap, __init_single_page() sets the refcount
>>>>>>>> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
>>>>>>>> -	 * refcount of all involved pages to 0.
>>>>>>>> +	 * When initializing the memmap, memmap_init_range sets the refcount
>>>>>>>> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
>>>>>>>> +	 * have to set the refcount of all involved pages to 0. Otherwise,
>>>>>>>> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
>>>>>>>> +	 * reserve region ("reserved") in early context.
>>>>>>>>      	 */
>>>>>>> Again, why hotplug and early init should be different?
>>>>>> I will add a comment that describes it will save boot time.
>>>>> But why do we need initialize struct pages differently at boot time vs
>>>>> memory hotplug?
>>>>> Is there a reason memory hotplug cannot have page count set to 0 just like
>>>>> for pages reserved at boot time?
>>>> This patch just save boot time in MEMINIT_EARLY. If someone finds out that
>>>> it can save time in
>>>>
>>>> MEMINIT_HOTPLUG, I think it can be done in another patch later. I just
>>>> keeping it in the same.
>>> But it's not the same. It becomes slower after your patch and the code that
>>> frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes non-uniform
>>> for no apparent reason.
>>
>> __free_pages_core will also be called by others, such as:
>> deferred_free_range, do_collection and memblock_free_late.
>>
>> We couldn't remove  'if (page_count(page))' even if we set page count to 0
>> when MEMINIT_HOTPLUG.
> 
> That 'if' breaks the invariant that __free_pages_core is always called for
> pages with initialized page count. Adding it may lead to subtle bugs and
> random memory corruption so we don't want to add it at the first place.

As long as we have to special-case memory hotplug, we know that we are 
always coming via generic_online_page() in that case. We could either 
move some logic over there, or let __free_pages_core() know what it 
should do.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-02  8:56                 ` David Hildenbrand
@ 2023-10-02 11:10                   ` Mike Rapoport
  2023-10-02 11:25                     ` David Hildenbrand
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-02 11:10 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Yajun Deng, akpm, mike.kravetz, muchun.song, willy, linux-mm,
	linux-kernel

On Mon, Oct 02, 2023 at 10:56:51AM +0200, David Hildenbrand wrote:
> On 02.10.23 10:47, Mike Rapoport wrote:
> > On Mon, Oct 02, 2023 at 03:03:56PM +0800, Yajun Deng wrote:
> > > 
> > > On 2023/10/2 02:59, Mike Rapoport wrote:
> > > > On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
> > > > > On 2023/9/29 18:02, Mike Rapoport wrote:
> > > > > > > > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > > > > > > > index 06be8821d833..b868caabe8dc 100644
> > > > > > > > > --- a/mm/page_alloc.c
> > > > > > > > > +++ b/mm/page_alloc.c
> > > > > > > > > @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
> > > > > > > > >      	unsigned int loop;
> > > > > > > > >      	/*
> > > > > > > > > -	 * When initializing the memmap, __init_single_page() sets the refcount
> > > > > > > > > -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
> > > > > > > > > -	 * refcount of all involved pages to 0.
> > > > > > > > > +	 * When initializing the memmap, memmap_init_range sets the refcount
> > > > > > > > > +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
> > > > > > > > > +	 * have to set the refcount of all involved pages to 0. Otherwise,
> > > > > > > > > +	 * we don't do it, as reserve_bootmem_region only set the refcount on
> > > > > > > > > +	 * reserve region ("reserved") in early context.
> > > > > > > > >      	 */
> > > > > > > > Again, why hotplug and early init should be different?
> > > > > > > I will add a comment that describes it will save boot time.
> > > > > > But why do we need initialize struct pages differently at boot time vs
> > > > > > memory hotplug?
> > > > > > Is there a reason memory hotplug cannot have page count set to 0 just like
> > > > > > for pages reserved at boot time?
> > > > > This patch just save boot time in MEMINIT_EARLY. If someone finds out that
> > > > > it can save time in
> > > > > 
> > > > > MEMINIT_HOTPLUG, I think it can be done in another patch later. I just
> > > > > keeping it in the same.
> > > > But it's not the same. It becomes slower after your patch and the code that
> > > > frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes non-uniform
> > > > for no apparent reason.
> > > 
> > > __free_pages_core will also be called by others, such as:
> > > deferred_free_range, do_collection and memblock_free_late.
> > > 
> > > We couldn't remove  'if (page_count(page))' even if we set page count to 0
> > > when MEMINIT_HOTPLUG.
> > 
> > That 'if' breaks the invariant that __free_pages_core is always called for
> > pages with initialized page count. Adding it may lead to subtle bugs and
> > random memory corruption so we don't want to add it at the first place.
> 
> As long as we have to special-case memory hotplug, we know that we are
> always coming via generic_online_page() in that case. We could either move
> some logic over there, or let __free_pages_core() know what it should do.

Looks like the patch rather special cases MEMINIT_EARLY, although I didn't
check throughfully other code paths. 
Anyway, relying on page_count() to be correct in different ways for
different callers of __free_pages_core() does not sound right to me.
 
> -- 
> Cheers,
> 
> David / dhildenb
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-02 11:10                   ` Mike Rapoport
@ 2023-10-02 11:25                     ` David Hildenbrand
  2023-10-03 14:38                       ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: David Hildenbrand @ 2023-10-02 11:25 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Yajun Deng, akpm, mike.kravetz, muchun.song, willy, linux-mm,
	linux-kernel

On 02.10.23 13:10, Mike Rapoport wrote:
> On Mon, Oct 02, 2023 at 10:56:51AM +0200, David Hildenbrand wrote:
>> On 02.10.23 10:47, Mike Rapoport wrote:
>>> On Mon, Oct 02, 2023 at 03:03:56PM +0800, Yajun Deng wrote:
>>>>
>>>> On 2023/10/2 02:59, Mike Rapoport wrote:
>>>>> On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
>>>>>> On 2023/9/29 18:02, Mike Rapoport wrote:
>>>>>>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>>>>>>>> index 06be8821d833..b868caabe8dc 100644
>>>>>>>>>> --- a/mm/page_alloc.c
>>>>>>>>>> +++ b/mm/page_alloc.c
>>>>>>>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, unsigned int order)
>>>>>>>>>>       	unsigned int loop;
>>>>>>>>>>       	/*
>>>>>>>>>> -	 * When initializing the memmap, __init_single_page() sets the refcount
>>>>>>>>>> -	 * of all pages to 1 ("allocated"/"not free"). We have to set the
>>>>>>>>>> -	 * refcount of all involved pages to 0.
>>>>>>>>>> +	 * When initializing the memmap, memmap_init_range sets the refcount
>>>>>>>>>> +	 * of all pages to 1 ("reserved" and "free") in hotplug context. We
>>>>>>>>>> +	 * have to set the refcount of all involved pages to 0. Otherwise,
>>>>>>>>>> +	 * we don't do it, as reserve_bootmem_region only set the refcount on
>>>>>>>>>> +	 * reserve region ("reserved") in early context.
>>>>>>>>>>       	 */
>>>>>>>>> Again, why hotplug and early init should be different?
>>>>>>>> I will add a comment that describes it will save boot time.
>>>>>>> But why do we need initialize struct pages differently at boot time vs
>>>>>>> memory hotplug?
>>>>>>> Is there a reason memory hotplug cannot have page count set to 0 just like
>>>>>>> for pages reserved at boot time?
>>>>>> This patch just save boot time in MEMINIT_EARLY. If someone finds out that
>>>>>> it can save time in
>>>>>>
>>>>>> MEMINIT_HOTPLUG, I think it can be done in another patch later. I just
>>>>>> keeping it in the same.
>>>>> But it's not the same. It becomes slower after your patch and the code that
>>>>> frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes non-uniform
>>>>> for no apparent reason.
>>>>
>>>> __free_pages_core will also be called by others, such as:
>>>> deferred_free_range, do_collection and memblock_free_late.
>>>>
>>>> We couldn't remove  'if (page_count(page))' even if we set page count to 0
>>>> when MEMINIT_HOTPLUG.
>>>
>>> That 'if' breaks the invariant that __free_pages_core is always called for
>>> pages with initialized page count. Adding it may lead to subtle bugs and
>>> random memory corruption so we don't want to add it at the first place.
>>
>> As long as we have to special-case memory hotplug, we know that we are
>> always coming via generic_online_page() in that case. We could either move
>> some logic over there, or let __free_pages_core() know what it should do.
> 
> Looks like the patch rather special cases MEMINIT_EARLY, although I didn't
> check throughfully other code paths.
> Anyway, relying on page_count() to be correct in different ways for
> different callers of __free_pages_core() does not sound right to me.

Absolutely agreed.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-02 11:25                     ` David Hildenbrand
@ 2023-10-03 14:38                       ` Yajun Deng
  2023-10-05  5:06                         ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-03 14:38 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/2 19:25, David Hildenbrand wrote:
> On 02.10.23 13:10, Mike Rapoport wrote:
>> On Mon, Oct 02, 2023 at 10:56:51AM +0200, David Hildenbrand wrote:
>>> On 02.10.23 10:47, Mike Rapoport wrote:
>>>> On Mon, Oct 02, 2023 at 03:03:56PM +0800, Yajun Deng wrote:
>>>>>
>>>>> On 2023/10/2 02:59, Mike Rapoport wrote:
>>>>>> On Fri, Sep 29, 2023 at 06:27:25PM +0800, Yajun Deng wrote:
>>>>>>> On 2023/9/29 18:02, Mike Rapoport wrote:
>>>>>>>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>>>>>>>>> index 06be8821d833..b868caabe8dc 100644
>>>>>>>>>>> --- a/mm/page_alloc.c
>>>>>>>>>>> +++ b/mm/page_alloc.c
>>>>>>>>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page 
>>>>>>>>>>> *page, unsigned int order)
>>>>>>>>>>>           unsigned int loop;
>>>>>>>>>>>           /*
>>>>>>>>>>> -     * When initializing the memmap, __init_single_page() 
>>>>>>>>>>> sets the refcount
>>>>>>>>>>> -     * of all pages to 1 ("allocated"/"not free"). We have 
>>>>>>>>>>> to set the
>>>>>>>>>>> -     * refcount of all involved pages to 0.
>>>>>>>>>>> +     * When initializing the memmap, memmap_init_range sets 
>>>>>>>>>>> the refcount
>>>>>>>>>>> +     * of all pages to 1 ("reserved" and "free") in hotplug 
>>>>>>>>>>> context. We
>>>>>>>>>>> +     * have to set the refcount of all involved pages to 0. 
>>>>>>>>>>> Otherwise,
>>>>>>>>>>> +     * we don't do it, as reserve_bootmem_region only set 
>>>>>>>>>>> the refcount on
>>>>>>>>>>> +     * reserve region ("reserved") in early context.
>>>>>>>>>>>            */
>>>>>>>>>> Again, why hotplug and early init should be different?
>>>>>>>>> I will add a comment that describes it will save boot time.
>>>>>>>> But why do we need initialize struct pages differently at boot 
>>>>>>>> time vs
>>>>>>>> memory hotplug?
>>>>>>>> Is there a reason memory hotplug cannot have page count set to 
>>>>>>>> 0 just like
>>>>>>>> for pages reserved at boot time?
>>>>>>> This patch just save boot time in MEMINIT_EARLY. If someone 
>>>>>>> finds out that
>>>>>>> it can save time in
>>>>>>>
>>>>>>> MEMINIT_HOTPLUG, I think it can be done in another patch later. 
>>>>>>> I just
>>>>>>> keeping it in the same.
>>>>>> But it's not the same. It becomes slower after your patch and the 
>>>>>> code that
>>>>>> frees the pages for MEMINIT_EARLY and MEMINIT_HOTPLUG becomes 
>>>>>> non-uniform
>>>>>> for no apparent reason.
>>>>>
>>>>> __free_pages_core will also be called by others, such as:
>>>>> deferred_free_range, do_collection and memblock_free_late.
>>>>>
>>>>> We couldn't remove  'if (page_count(page))' even if we set page 
>>>>> count to 0
>>>>> when MEMINIT_HOTPLUG.
>>>>
>>>> That 'if' breaks the invariant that __free_pages_core is always 
>>>> called for
>>>> pages with initialized page count. Adding it may lead to subtle 
>>>> bugs and
>>>> random memory corruption so we don't want to add it at the first 
>>>> place.
>>>
>>> As long as we have to special-case memory hotplug, we know that we are
>>> always coming via generic_online_page() in that case. We could 
>>> either move
>>> some logic over there, or let __free_pages_core() know what it 
>>> should do.
>>
>> Looks like the patch rather special cases MEMINIT_EARLY, although I 
>> didn't
>> check throughfully other code paths.
>> Anyway, relying on page_count() to be correct in different ways for
>> different callers of __free_pages_core() does not sound right to me.
>
> Absolutely agreed.
>
I already sent v5  a few days ago. Comments, please...




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-03 14:38                       ` Yajun Deng
@ 2023-10-05  5:06                         ` Mike Rapoport
  2023-10-05 14:04                           ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-05  5:06 UTC (permalink / raw)
  To: Yajun Deng
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel

On Tue, Oct 03, 2023 at 10:38:09PM +0800, Yajun Deng wrote:
> 
> On 2023/10/2 19:25, David Hildenbrand wrote:
> > On 02.10.23 13:10, Mike Rapoport wrote:
> > > > > 
> > > > > That 'if' breaks the invariant that __free_pages_core is
> > > > > always called for
> > > > > pages with initialized page count. Adding it may lead to
> > > > > subtle bugs and
> > > > > random memory corruption so we don't want to add it at the
> > > > > first place.
> > > > 
> > > > As long as we have to special-case memory hotplug, we know that we are
> > > > always coming via generic_online_page() in that case. We could
> > > > either move
> > > > some logic over there, or let __free_pages_core() know what it
> > > > should do.
> > > 
> > > Looks like the patch rather special cases MEMINIT_EARLY, although I
> > > didn't
> > > check throughfully other code paths.
> > > Anyway, relying on page_count() to be correct in different ways for
> > > different callers of __free_pages_core() does not sound right to me.
> > 
> > Absolutely agreed.
> > 
> I already sent v5  a few days ago. Comments, please...

Does it address all the feedback from this thread?

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-05  5:06                         ` Mike Rapoport
@ 2023-10-05 14:04                           ` Yajun Deng
  2023-10-12  9:19                             ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-05 14:04 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel


On 2023/10/5 13:06, Mike Rapoport wrote:
> On Tue, Oct 03, 2023 at 10:38:09PM +0800, Yajun Deng wrote:
>> On 2023/10/2 19:25, David Hildenbrand wrote:
>>> On 02.10.23 13:10, Mike Rapoport wrote:
>>>>>> That 'if' breaks the invariant that __free_pages_core is
>>>>>> always called for
>>>>>> pages with initialized page count. Adding it may lead to
>>>>>> subtle bugs and
>>>>>> random memory corruption so we don't want to add it at the
>>>>>> first place.
>>>>> As long as we have to special-case memory hotplug, we know that we are
>>>>> always coming via generic_online_page() in that case. We could
>>>>> either move
>>>>> some logic over there, or let __free_pages_core() know what it
>>>>> should do.
>>>> Looks like the patch rather special cases MEMINIT_EARLY, although I
>>>> didn't
>>>> check throughfully other code paths.
>>>> Anyway, relying on page_count() to be correct in different ways for
>>>> different callers of __free_pages_core() does not sound right to me.
>>> Absolutely agreed.
>>>
>> I already sent v5  a few days ago. Comments, please...
> Does it address all the feedback from this thread?
>

Except hotplug. As far as I konw, we only clear page count in 
MEMINIT_EARLY and all tail pages in compound page.

So adding 'if (page_count(page))' will have no actual effect for other 
case. According to previous data, it didn't

become slower in hotplug.



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-02  8:30     ` David Hildenbrand
@ 2023-10-08  8:57       ` Yajun Deng
  2023-10-10  2:31         ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-08  8:57 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/2 16:30, David Hildenbrand wrote:
> On 29.09.23 10:30, Mike Rapoport wrote:
>> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>>> memmap_init_range() would init page count of all pages, but the free
>>> pages count would be reset in __free_pages_core(). There are opposite
>>> operations. It's unnecessary and time-consuming when it's MEMINIT_EARLY
>>> context.
>>>
>>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY 
>>> context,
>>> and check the page count before reset it.
>>>
>>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>>> need, as it already done in __init_single_page.
>>>
>>> The following data was tested on an x86 machine with 190GB of RAM.
>>>
>>> before:
>>> free_low_memory_core_early()    341ms
>>>
>>> after:
>>> free_low_memory_core_early()    285ms
>>>
>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>> ---
>>> v4: same with v2.
>>> v3: same with v2.
>>> v2: check page count instead of check context before reset it.
>>> v1: 
>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>> ---
>>>   mm/mm_init.c    | 18 +++++++++++++-----
>>>   mm/page_alloc.c | 20 ++++++++++++--------
>>>   2 files changed, 25 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>> index 9716c8a7ade9..3ab8861e1ef3 100644
>>> --- a/mm/mm_init.c
>>> +++ b/mm/mm_init.c
>>> @@ -718,7 +718,7 @@ static void __meminit 
>>> init_reserved_page(unsigned long pfn, int nid)
>>>           if (zone_spans_pfn(zone, pfn))
>>>               break;
>>>       }
>>> -    __init_single_page(pfn_to_page(pfn), pfn, zid, nid, 
>>> INIT_PAGE_COUNT);
>>> +    __init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>>   }
>>>   #else
>>>   static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>>> @@ -756,8 +756,8 @@ void __meminit 
>>> reserve_bootmem_region(phys_addr_t start,
>>>                 init_reserved_page(start_pfn, nid);
>>>   -            /* Avoid false-positive PageTail() */
>>> -            INIT_LIST_HEAD(&page->lru);
>>> +            /* Init page count for reserved region */
>>
>> Please add a comment that describes _why_ we initialize the page 
>> count here.
>>
>>> +            init_page_count(page);
>>>                 /*
>>>                * no need for atomic set_bit because the struct
>>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long 
>>> size, int nid, unsigned long zone
>>>           }
>>>             page = pfn_to_page(pfn);
>>> -        __init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>> -        if (context == MEMINIT_HOTPLUG)
>>> +
>>> +        /* If the context is MEMINIT_EARLY, we will init page count 
>>> and
>>> +         * mark page reserved in reserve_bootmem_region, the free 
>>> region
>>> +         * wouldn't have page count and we will check the pages count
>>> +         * in __free_pages_core.
>>> +         */
>>> +        __init_single_page(page, pfn, zone, nid, 0);
>>> +        if (context == MEMINIT_HOTPLUG) {
>>> +            init_page_count(page);
>>>               __SetPageReserved(page);
>>
>> Rather than calling init_page_count() and __SetPageReserved() for
>> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | 
>> INIT_PAGE_RESERVED
>> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
>>
>> But more generally, I wonder if we have to differentiate HOTPLUG here 
>> at all.
>> @David, can you comment please?
>
> There are a lot of details to that, and I'll share some I can briefly 
> think of.
>
> 1) __SetPageReserved()
>
> I tried removing that a while ago, but there was a blocker (IIRC 
> something about
> ZONE_DEVICE). I still have the patches at [1] and I could probably 
> take a look
> if that blocker still exists (I recall that something changed at some 
> point, but
> I never had the time to follow up).
>
> But once we stop setting the pages reserved, we might run into issues 
> with ...
>
>
> 2) init_page_count()
>
> virtio-mem, XEN balloon and HV-balloon add memory blocks that can 
> contain holes.
> set_online_page_callback() is used to intercept memory onlining and to 
> expose
> only the pages that are not holes to the buddy: calling 
> generic_online_page() on !hole.
>
> Holes are PageReserved but with an initialized page count. Memory 
> offlining will fail on
> PageReserved pages -- has_unmovable_pages().
>
>
> At least virtio-mem clears the PageReserved flag of holes when 
> onlining memory,
> and currently relies in the page count to be reasonable (so memory 
> offlining can work).
>
> static void virtio_mem_set_fake_offline(unsigned long pfn,
>                     unsigned long nr_pages, bool onlined)
> {
>     page_offline_begin();
>     for (; nr_pages--; pfn++) {
>         struct page *page = pfn_to_page(pfn);
>
>         __SetPageOffline(page);
>         if (!onlined) {
>             SetPageDirty(page);
>             /* FIXME: remove after cleanups */
>             ClearPageReserved(page);
>         }
>     }
>     page_offline_end();
> }
>
>
> For virtio-mem, we could initialize the page count there instead. The 
> other PV drivers
> might require a bit more thought.
>
>
> [1] 
> https://github.com/davidhildenbrand/linux/tree/online_reserved_cleanup
>
>>
>>> +        }
>>>             /*
>>>            * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>> index 06be8821d833..b868caabe8dc 100644
>>> --- a/mm/page_alloc.c
>>> +++ b/mm/page_alloc.c
>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, 
>>> unsigned int order)
>>>       unsigned int loop;
>>>         /*
>>> -     * When initializing the memmap, __init_single_page() sets the 
>>> refcount
>>> -     * of all pages to 1 ("allocated"/"not free"). We have to set the
>>> -     * refcount of all involved pages to 0.
>>> +     * When initializing the memmap, memmap_init_range sets the 
>>> refcount
>>> +     * of all pages to 1 ("reserved" and "free") in hotplug 
>>> context. We
>>> +     * have to set the refcount of all involved pages to 0. Otherwise,
>>> +     * we don't do it, as reserve_bootmem_region only set the 
>>> refcount on
>>> +     * reserve region ("reserved") in early context.
>>>        */
>>
>> Again, why hotplug and early init should be different?
>>
>>> -    prefetchw(p);
>>> -    for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>> -        prefetchw(p + 1);
>>> +    if (page_count(page)) {
>>> +        prefetchw(p);
>>> +        for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>> +            prefetchw(p + 1);
>>> +            __ClearPageReserved(p);
>>> +            set_page_count(p, 0);
>>> +        }
>>>           __ClearPageReserved(p);
>>>           set_page_count(p, 0);
>
> That looks wrong. if the page count would by pure luck be 0 already 
> for hotplugged memory,
> you wouldn't clear the reserved flag.
>
> These changes make me a bit nervous.


Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I need 
to do something else?



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-08  8:57       ` Yajun Deng
@ 2023-10-10  2:31         ` Yajun Deng
  2023-10-12  9:23           ` David Hildenbrand
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-10  2:31 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/8 16:57, Yajun Deng wrote:
>
> On 2023/10/2 16:30, David Hildenbrand wrote:
>> On 29.09.23 10:30, Mike Rapoport wrote:
>>> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>>>> memmap_init_range() would init page count of all pages, but the free
>>>> pages count would be reset in __free_pages_core(). There are opposite
>>>> operations. It's unnecessary and time-consuming when it's 
>>>> MEMINIT_EARLY
>>>> context.
>>>>
>>>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY 
>>>> context,
>>>> and check the page count before reset it.
>>>>
>>>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>>>> need, as it already done in __init_single_page.
>>>>
>>>> The following data was tested on an x86 machine with 190GB of RAM.
>>>>
>>>> before:
>>>> free_low_memory_core_early()    341ms
>>>>
>>>> after:
>>>> free_low_memory_core_early()    285ms
>>>>
>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>> ---
>>>> v4: same with v2.
>>>> v3: same with v2.
>>>> v2: check page count instead of check context before reset it.
>>>> v1: 
>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>> ---
>>>>   mm/mm_init.c    | 18 +++++++++++++-----
>>>>   mm/page_alloc.c | 20 ++++++++++++--------
>>>>   2 files changed, 25 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>>> index 9716c8a7ade9..3ab8861e1ef3 100644
>>>> --- a/mm/mm_init.c
>>>> +++ b/mm/mm_init.c
>>>> @@ -718,7 +718,7 @@ static void __meminit 
>>>> init_reserved_page(unsigned long pfn, int nid)
>>>>           if (zone_spans_pfn(zone, pfn))
>>>>               break;
>>>>       }
>>>> -    __init_single_page(pfn_to_page(pfn), pfn, zid, nid, 
>>>> INIT_PAGE_COUNT);
>>>> +    __init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>>>   }
>>>>   #else
>>>>   static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>>>> @@ -756,8 +756,8 @@ void __meminit 
>>>> reserve_bootmem_region(phys_addr_t start,
>>>>                 init_reserved_page(start_pfn, nid);
>>>>   -            /* Avoid false-positive PageTail() */
>>>> -            INIT_LIST_HEAD(&page->lru);
>>>> +            /* Init page count for reserved region */
>>>
>>> Please add a comment that describes _why_ we initialize the page 
>>> count here.
>>>
>>>> +            init_page_count(page);
>>>>                 /*
>>>>                * no need for atomic set_bit because the struct
>>>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long 
>>>> size, int nid, unsigned long zone
>>>>           }
>>>>             page = pfn_to_page(pfn);
>>>> -        __init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>>> -        if (context == MEMINIT_HOTPLUG)
>>>> +
>>>> +        /* If the context is MEMINIT_EARLY, we will init page 
>>>> count and
>>>> +         * mark page reserved in reserve_bootmem_region, the free 
>>>> region
>>>> +         * wouldn't have page count and we will check the pages count
>>>> +         * in __free_pages_core.
>>>> +         */
>>>> +        __init_single_page(page, pfn, zone, nid, 0);
>>>> +        if (context == MEMINIT_HOTPLUG) {
>>>> +            init_page_count(page);
>>>>               __SetPageReserved(page);
>>>
>>> Rather than calling init_page_count() and __SetPageReserved() for
>>> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT | 
>>> INIT_PAGE_RESERVED
>>> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
>>>
>>> But more generally, I wonder if we have to differentiate HOTPLUG 
>>> here at all.
>>> @David, can you comment please?
>>
>> There are a lot of details to that, and I'll share some I can briefly 
>> think of.
>>
>> 1) __SetPageReserved()
>>
>> I tried removing that a while ago, but there was a blocker (IIRC 
>> something about
>> ZONE_DEVICE). I still have the patches at [1] and I could probably 
>> take a look
>> if that blocker still exists (I recall that something changed at some 
>> point, but
>> I never had the time to follow up).
>>
>> But once we stop setting the pages reserved, we might run into issues 
>> with ...
>>
>>
>> 2) init_page_count()
>>
>> virtio-mem, XEN balloon and HV-balloon add memory blocks that can 
>> contain holes.
>> set_online_page_callback() is used to intercept memory onlining and 
>> to expose
>> only the pages that are not holes to the buddy: calling 
>> generic_online_page() on !hole.
>>
>> Holes are PageReserved but with an initialized page count. Memory 
>> offlining will fail on
>> PageReserved pages -- has_unmovable_pages().
>>
>>
>> At least virtio-mem clears the PageReserved flag of holes when 
>> onlining memory,
>> and currently relies in the page count to be reasonable (so memory 
>> offlining can work).
>>
>> static void virtio_mem_set_fake_offline(unsigned long pfn,
>>                     unsigned long nr_pages, bool onlined)
>> {
>>     page_offline_begin();
>>     for (; nr_pages--; pfn++) {
>>         struct page *page = pfn_to_page(pfn);
>>
>>         __SetPageOffline(page);
>>         if (!onlined) {
>>             SetPageDirty(page);
>>             /* FIXME: remove after cleanups */
>>             ClearPageReserved(page);
>>         }
>>     }
>>     page_offline_end();
>> }
>>
>>
>> For virtio-mem, we could initialize the page count there instead. The 
>> other PV drivers
>> might require a bit more thought.
>>
>>
>> [1] 
>> https://github.com/davidhildenbrand/linux/tree/online_reserved_cleanup
>>
>>>
>>>> +        }
>>>>             /*
>>>>            * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>> index 06be8821d833..b868caabe8dc 100644
>>>> --- a/mm/page_alloc.c
>>>> +++ b/mm/page_alloc.c
>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page, 
>>>> unsigned int order)
>>>>       unsigned int loop;
>>>>         /*
>>>> -     * When initializing the memmap, __init_single_page() sets the 
>>>> refcount
>>>> -     * of all pages to 1 ("allocated"/"not free"). We have to set the
>>>> -     * refcount of all involved pages to 0.
>>>> +     * When initializing the memmap, memmap_init_range sets the 
>>>> refcount
>>>> +     * of all pages to 1 ("reserved" and "free") in hotplug 
>>>> context. We
>>>> +     * have to set the refcount of all involved pages to 0. 
>>>> Otherwise,
>>>> +     * we don't do it, as reserve_bootmem_region only set the 
>>>> refcount on
>>>> +     * reserve region ("reserved") in early context.
>>>>        */
>>>
>>> Again, why hotplug and early init should be different?
>>>
>>>> -    prefetchw(p);
>>>> -    for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>> -        prefetchw(p + 1);
>>>> +    if (page_count(page)) {
>>>> +        prefetchw(p);
>>>> +        for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>> +            prefetchw(p + 1);
>>>> +            __ClearPageReserved(p);
>>>> +            set_page_count(p, 0);
>>>> +        }
>>>>           __ClearPageReserved(p);
>>>>           set_page_count(p, 0);
>>
>> That looks wrong. if the page count would by pure luck be 0 already 
>> for hotplugged memory,
>> you wouldn't clear the reserved flag.
>>
>> These changes make me a bit nervous.
>
>
> Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I 
> need to do something else?
>

How about the following if statement? But it needs to add more patch 
like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).

It'll be safer, but more complex. Please comment...

	if (context != MEMINIT_EARLY || (page_count(page) || PageReserved(page)) {



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-05 14:04                           ` Yajun Deng
@ 2023-10-12  9:19                             ` Mike Rapoport
  2023-10-12  9:36                               ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-12  9:19 UTC (permalink / raw)
  To: Yajun Deng
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel

On Thu, Oct 05, 2023 at 10:04:28PM +0800, Yajun Deng wrote:
> 
> > > > > > > That 'if' breaks the invariant that __free_pages_core is
> > > > > > > always called for pages with initialized page count. Adding
> > > > > > > it may lead to subtle bugs and random memory corruption so we
> > > > > > > don't want to add it at the first place.
> > > > > >
> > > > > > As long as we have to special-case memory hotplug, we know that
> > > > > > we are always coming via generic_online_page() in that case. We
> > > > > > could either move some logic over there, or let
> > > > > > __free_pages_core() know what it should do.
> > > > >
> > > > > Looks like the patch rather special cases MEMINIT_EARLY, although
> > > > > I didn't check throughfully other code paths.  Anyway, relying on
> > > > > page_count() to be correct in different ways for different
> > > > > callers of __free_pages_core() does not sound right to me.
> > > >
> > > > Absolutely agreed.
> > > > 
> > > I already sent v5  a few days ago. Comments, please...
> >
> > Does it address all the feedback from this thread?
> 
> Except hotplug. 

Please reread carefully the last comments from me and from David above.

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-10  2:31         ` Yajun Deng
@ 2023-10-12  9:23           ` David Hildenbrand
  2023-10-12  9:53             ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: David Hildenbrand @ 2023-10-12  9:23 UTC (permalink / raw)
  To: Yajun Deng, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel

On 10.10.23 04:31, Yajun Deng wrote:
> 
> On 2023/10/8 16:57, Yajun Deng wrote:
>>
>> On 2023/10/2 16:30, David Hildenbrand wrote:
>>> On 29.09.23 10:30, Mike Rapoport wrote:
>>>> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>>>>> memmap_init_range() would init page count of all pages, but the free
>>>>> pages count would be reset in __free_pages_core(). There are opposite
>>>>> operations. It's unnecessary and time-consuming when it's
>>>>> MEMINIT_EARLY
>>>>> context.
>>>>>
>>>>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY
>>>>> context,
>>>>> and check the page count before reset it.
>>>>>
>>>>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>>>>> need, as it already done in __init_single_page.
>>>>>
>>>>> The following data was tested on an x86 machine with 190GB of RAM.
>>>>>
>>>>> before:
>>>>> free_low_memory_core_early()    341ms
>>>>>
>>>>> after:
>>>>> free_low_memory_core_early()    285ms
>>>>>
>>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>>> ---
>>>>> v4: same with v2.
>>>>> v3: same with v2.
>>>>> v2: check page count instead of check context before reset it.
>>>>> v1:
>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>>> ---
>>>>>    mm/mm_init.c    | 18 +++++++++++++-----
>>>>>    mm/page_alloc.c | 20 ++++++++++++--------
>>>>>    2 files changed, 25 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>>>> index 9716c8a7ade9..3ab8861e1ef3 100644
>>>>> --- a/mm/mm_init.c
>>>>> +++ b/mm/mm_init.c
>>>>> @@ -718,7 +718,7 @@ static void __meminit
>>>>> init_reserved_page(unsigned long pfn, int nid)
>>>>>            if (zone_spans_pfn(zone, pfn))
>>>>>                break;
>>>>>        }
>>>>> -    __init_single_page(pfn_to_page(pfn), pfn, zid, nid,
>>>>> INIT_PAGE_COUNT);
>>>>> +    __init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>>>>    }
>>>>>    #else
>>>>>    static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>>>>> @@ -756,8 +756,8 @@ void __meminit
>>>>> reserve_bootmem_region(phys_addr_t start,
>>>>>                  init_reserved_page(start_pfn, nid);
>>>>>    -            /* Avoid false-positive PageTail() */
>>>>> -            INIT_LIST_HEAD(&page->lru);
>>>>> +            /* Init page count for reserved region */
>>>>
>>>> Please add a comment that describes _why_ we initialize the page
>>>> count here.
>>>>
>>>>> +            init_page_count(page);
>>>>>                  /*
>>>>>                 * no need for atomic set_bit because the struct
>>>>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long
>>>>> size, int nid, unsigned long zone
>>>>>            }
>>>>>              page = pfn_to_page(pfn);
>>>>> -        __init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>>>> -        if (context == MEMINIT_HOTPLUG)
>>>>> +
>>>>> +        /* If the context is MEMINIT_EARLY, we will init page
>>>>> count and
>>>>> +         * mark page reserved in reserve_bootmem_region, the free
>>>>> region
>>>>> +         * wouldn't have page count and we will check the pages count
>>>>> +         * in __free_pages_core.
>>>>> +         */
>>>>> +        __init_single_page(page, pfn, zone, nid, 0);
>>>>> +        if (context == MEMINIT_HOTPLUG) {
>>>>> +            init_page_count(page);
>>>>>                __SetPageReserved(page);
>>>>
>>>> Rather than calling init_page_count() and __SetPageReserved() for
>>>> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT |
>>>> INIT_PAGE_RESERVED
>>>> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
>>>>
>>>> But more generally, I wonder if we have to differentiate HOTPLUG
>>>> here at all.
>>>> @David, can you comment please?
>>>
>>> There are a lot of details to that, and I'll share some I can briefly
>>> think of.
>>>
>>> 1) __SetPageReserved()
>>>
>>> I tried removing that a while ago, but there was a blocker (IIRC
>>> something about
>>> ZONE_DEVICE). I still have the patches at [1] and I could probably
>>> take a look
>>> if that blocker still exists (I recall that something changed at some
>>> point, but
>>> I never had the time to follow up).
>>>
>>> But once we stop setting the pages reserved, we might run into issues
>>> with ...
>>>
>>>
>>> 2) init_page_count()
>>>
>>> virtio-mem, XEN balloon and HV-balloon add memory blocks that can
>>> contain holes.
>>> set_online_page_callback() is used to intercept memory onlining and
>>> to expose
>>> only the pages that are not holes to the buddy: calling
>>> generic_online_page() on !hole.
>>>
>>> Holes are PageReserved but with an initialized page count. Memory
>>> offlining will fail on
>>> PageReserved pages -- has_unmovable_pages().
>>>
>>>
>>> At least virtio-mem clears the PageReserved flag of holes when
>>> onlining memory,
>>> and currently relies in the page count to be reasonable (so memory
>>> offlining can work).
>>>
>>> static void virtio_mem_set_fake_offline(unsigned long pfn,
>>>                      unsigned long nr_pages, bool onlined)
>>> {
>>>      page_offline_begin();
>>>      for (; nr_pages--; pfn++) {
>>>          struct page *page = pfn_to_page(pfn);
>>>
>>>          __SetPageOffline(page);
>>>          if (!onlined) {
>>>              SetPageDirty(page);
>>>              /* FIXME: remove after cleanups */
>>>              ClearPageReserved(page);
>>>          }
>>>      }
>>>      page_offline_end();
>>> }
>>>
>>>
>>> For virtio-mem, we could initialize the page count there instead. The
>>> other PV drivers
>>> might require a bit more thought.
>>>
>>>
>>> [1]
>>> https://github.com/davidhildenbrand/linux/tree/online_reserved_cleanup
>>>
>>>>
>>>>> +        }
>>>>>              /*
>>>>>             * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>>> index 06be8821d833..b868caabe8dc 100644
>>>>> --- a/mm/page_alloc.c
>>>>> +++ b/mm/page_alloc.c
>>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page,
>>>>> unsigned int order)
>>>>>        unsigned int loop;
>>>>>          /*
>>>>> -     * When initializing the memmap, __init_single_page() sets the
>>>>> refcount
>>>>> -     * of all pages to 1 ("allocated"/"not free"). We have to set the
>>>>> -     * refcount of all involved pages to 0.
>>>>> +     * When initializing the memmap, memmap_init_range sets the
>>>>> refcount
>>>>> +     * of all pages to 1 ("reserved" and "free") in hotplug
>>>>> context. We
>>>>> +     * have to set the refcount of all involved pages to 0.
>>>>> Otherwise,
>>>>> +     * we don't do it, as reserve_bootmem_region only set the
>>>>> refcount on
>>>>> +     * reserve region ("reserved") in early context.
>>>>>         */
>>>>
>>>> Again, why hotplug and early init should be different?
>>>>
>>>>> -    prefetchw(p);
>>>>> -    for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>>> -        prefetchw(p + 1);
>>>>> +    if (page_count(page)) {
>>>>> +        prefetchw(p);
>>>>> +        for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>>> +            prefetchw(p + 1);
>>>>> +            __ClearPageReserved(p);
>>>>> +            set_page_count(p, 0);
>>>>> +        }
>>>>>            __ClearPageReserved(p);
>>>>>            set_page_count(p, 0);
>>>
>>> That looks wrong. if the page count would by pure luck be 0 already
>>> for hotplugged memory,
>>> you wouldn't clear the reserved flag.
>>>
>>> These changes make me a bit nervous.
>>
>>
>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
>> need to do something else?
>>
> 
> How about the following if statement? But it needs to add more patch
> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
> 
> It'll be safer, but more complex. Please comment...
> 
> 	if (context != MEMINIT_EARLY || (page_count(page) || PageReserved(page)) {
> 

Ideally we could make initialization only depend on the context, and not 
check for count or the reserved flag.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-12  9:19                             ` Mike Rapoport
@ 2023-10-12  9:36                               ` Yajun Deng
  0 siblings, 0 replies; 34+ messages in thread
From: Yajun Deng @ 2023-10-12  9:36 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel


On 2023/10/12 17:19, Mike Rapoport wrote:
> On Thu, Oct 05, 2023 at 10:04:28PM +0800, Yajun Deng wrote:
>>>>>>>> That 'if' breaks the invariant that __free_pages_core is
>>>>>>>> always called for pages with initialized page count. Adding
>>>>>>>> it may lead to subtle bugs and random memory corruption so we
>>>>>>>> don't want to add it at the first place.
>>>>>>> As long as we have to special-case memory hotplug, we know that
>>>>>>> we are always coming via generic_online_page() in that case. We
>>>>>>> could either move some logic over there, or let
>>>>>>> __free_pages_core() know what it should do.
>>>>>> Looks like the patch rather special cases MEMINIT_EARLY, although
>>>>>> I didn't check throughfully other code paths.  Anyway, relying on
>>>>>> page_count() to be correct in different ways for different
>>>>>> callers of __free_pages_core() does not sound right to me.
>>>>> Absolutely agreed.
>>>>>
>>>> I already sent v5  a few days ago. Comments, please...
>>> Does it address all the feedback from this thread?
>> Except hotplug.
> Please reread carefully the last comments from me and from David above.
>

I replied in another thread about that 'if' statement. David just 
replied to me, let's discuss in another thread.



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-12  9:23           ` David Hildenbrand
@ 2023-10-12  9:53             ` Yajun Deng
  2023-10-13  8:48               ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-12  9:53 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/12 17:23, David Hildenbrand wrote:
> On 10.10.23 04:31, Yajun Deng wrote:
>>
>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>
>>> On 2023/10/2 16:30, David Hildenbrand wrote:
>>>> On 29.09.23 10:30, Mike Rapoport wrote:
>>>>> On Thu, Sep 28, 2023 at 04:33:02PM +0800, Yajun Deng wrote:
>>>>>> memmap_init_range() would init page count of all pages, but the free
>>>>>> pages count would be reset in __free_pages_core(). There are 
>>>>>> opposite
>>>>>> operations. It's unnecessary and time-consuming when it's
>>>>>> MEMINIT_EARLY
>>>>>> context.
>>>>>>
>>>>>> Init page count in reserve_bootmem_region when in MEMINIT_EARLY
>>>>>> context,
>>>>>> and check the page count before reset it.
>>>>>>
>>>>>> At the same time, the INIT_LIST_HEAD in reserve_bootmem_region isn't
>>>>>> need, as it already done in __init_single_page.
>>>>>>
>>>>>> The following data was tested on an x86 machine with 190GB of RAM.
>>>>>>
>>>>>> before:
>>>>>> free_low_memory_core_early()    341ms
>>>>>>
>>>>>> after:
>>>>>> free_low_memory_core_early()    285ms
>>>>>>
>>>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>>>> ---
>>>>>> v4: same with v2.
>>>>>> v3: same with v2.
>>>>>> v2: check page count instead of check context before reset it.
>>>>>> v1:
>>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/ 
>>>>>>
>>>>>> ---
>>>>>>    mm/mm_init.c    | 18 +++++++++++++-----
>>>>>>    mm/page_alloc.c | 20 ++++++++++++--------
>>>>>>    2 files changed, 25 insertions(+), 13 deletions(-)
>>>>>>
>>>>>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>>>>>> index 9716c8a7ade9..3ab8861e1ef3 100644
>>>>>> --- a/mm/mm_init.c
>>>>>> +++ b/mm/mm_init.c
>>>>>> @@ -718,7 +718,7 @@ static void __meminit
>>>>>> init_reserved_page(unsigned long pfn, int nid)
>>>>>>            if (zone_spans_pfn(zone, pfn))
>>>>>>                break;
>>>>>>        }
>>>>>> -    __init_single_page(pfn_to_page(pfn), pfn, zid, nid,
>>>>>> INIT_PAGE_COUNT);
>>>>>> +    __init_single_page(pfn_to_page(pfn), pfn, zid, nid, 0);
>>>>>>    }
>>>>>>    #else
>>>>>>    static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
>>>>>> @@ -756,8 +756,8 @@ void __meminit
>>>>>> reserve_bootmem_region(phys_addr_t start,
>>>>>>                  init_reserved_page(start_pfn, nid);
>>>>>>    -            /* Avoid false-positive PageTail() */
>>>>>> -            INIT_LIST_HEAD(&page->lru);
>>>>>> +            /* Init page count for reserved region */
>>>>>
>>>>> Please add a comment that describes _why_ we initialize the page
>>>>> count here.
>>>>>
>>>>>> + init_page_count(page);
>>>>>>                  /*
>>>>>>                 * no need for atomic set_bit because the struct
>>>>>> @@ -888,9 +888,17 @@ void __meminit memmap_init_range(unsigned long
>>>>>> size, int nid, unsigned long zone
>>>>>>            }
>>>>>>              page = pfn_to_page(pfn);
>>>>>> -        __init_single_page(page, pfn, zone, nid, INIT_PAGE_COUNT);
>>>>>> -        if (context == MEMINIT_HOTPLUG)
>>>>>> +
>>>>>> +        /* If the context is MEMINIT_EARLY, we will init page
>>>>>> count and
>>>>>> +         * mark page reserved in reserve_bootmem_region, the free
>>>>>> region
>>>>>> +         * wouldn't have page count and we will check the pages 
>>>>>> count
>>>>>> +         * in __free_pages_core.
>>>>>> +         */
>>>>>> +        __init_single_page(page, pfn, zone, nid, 0);
>>>>>> +        if (context == MEMINIT_HOTPLUG) {
>>>>>> +            init_page_count(page);
>>>>>>                __SetPageReserved(page);
>>>>>
>>>>> Rather than calling init_page_count() and __SetPageReserved() for
>>>>> MEMINIT_HOTPLUG you can set flags to INIT_PAGE_COUNT |
>>>>> INIT_PAGE_RESERVED
>>>>> an call __init_single_page() after the check for MEMINIT_HOTPLUG.
>>>>>
>>>>> But more generally, I wonder if we have to differentiate HOTPLUG
>>>>> here at all.
>>>>> @David, can you comment please?
>>>>
>>>> There are a lot of details to that, and I'll share some I can briefly
>>>> think of.
>>>>
>>>> 1) __SetPageReserved()
>>>>
>>>> I tried removing that a while ago, but there was a blocker (IIRC
>>>> something about
>>>> ZONE_DEVICE). I still have the patches at [1] and I could probably
>>>> take a look
>>>> if that blocker still exists (I recall that something changed at some
>>>> point, but
>>>> I never had the time to follow up).
>>>>
>>>> But once we stop setting the pages reserved, we might run into issues
>>>> with ...
>>>>
>>>>
>>>> 2) init_page_count()
>>>>
>>>> virtio-mem, XEN balloon and HV-balloon add memory blocks that can
>>>> contain holes.
>>>> set_online_page_callback() is used to intercept memory onlining and
>>>> to expose
>>>> only the pages that are not holes to the buddy: calling
>>>> generic_online_page() on !hole.
>>>>
>>>> Holes are PageReserved but with an initialized page count. Memory
>>>> offlining will fail on
>>>> PageReserved pages -- has_unmovable_pages().
>>>>
>>>>
>>>> At least virtio-mem clears the PageReserved flag of holes when
>>>> onlining memory,
>>>> and currently relies in the page count to be reasonable (so memory
>>>> offlining can work).
>>>>
>>>> static void virtio_mem_set_fake_offline(unsigned long pfn,
>>>>                      unsigned long nr_pages, bool onlined)
>>>> {
>>>>      page_offline_begin();
>>>>      for (; nr_pages--; pfn++) {
>>>>          struct page *page = pfn_to_page(pfn);
>>>>
>>>>          __SetPageOffline(page);
>>>>          if (!onlined) {
>>>>              SetPageDirty(page);
>>>>              /* FIXME: remove after cleanups */
>>>>              ClearPageReserved(page);
>>>>          }
>>>>      }
>>>>      page_offline_end();
>>>> }
>>>>
>>>>
>>>> For virtio-mem, we could initialize the page count there instead. The
>>>> other PV drivers
>>>> might require a bit more thought.
>>>>
>>>>
>>>> [1]
>>>> https://github.com/davidhildenbrand/linux/tree/online_reserved_cleanup
>>>>
>>>>>
>>>>>> +        }
>>>>>>              /*
>>>>>>             * Usually, we want to mark the pageblock 
>>>>>> MIGRATE_MOVABLE,
>>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>>>> index 06be8821d833..b868caabe8dc 100644
>>>>>> --- a/mm/page_alloc.c
>>>>>> +++ b/mm/page_alloc.c
>>>>>> @@ -1285,18 +1285,22 @@ void __free_pages_core(struct page *page,
>>>>>> unsigned int order)
>>>>>>        unsigned int loop;
>>>>>>          /*
>>>>>> -     * When initializing the memmap, __init_single_page() sets the
>>>>>> refcount
>>>>>> -     * of all pages to 1 ("allocated"/"not free"). We have to 
>>>>>> set the
>>>>>> -     * refcount of all involved pages to 0.
>>>>>> +     * When initializing the memmap, memmap_init_range sets the
>>>>>> refcount
>>>>>> +     * of all pages to 1 ("reserved" and "free") in hotplug
>>>>>> context. We
>>>>>> +     * have to set the refcount of all involved pages to 0.
>>>>>> Otherwise,
>>>>>> +     * we don't do it, as reserve_bootmem_region only set the
>>>>>> refcount on
>>>>>> +     * reserve region ("reserved") in early context.
>>>>>>         */
>>>>>
>>>>> Again, why hotplug and early init should be different?
>>>>>
>>>>>> -    prefetchw(p);
>>>>>> -    for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>>>> -        prefetchw(p + 1);
>>>>>> +    if (page_count(page)) {
>>>>>> +        prefetchw(p);
>>>>>> +        for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
>>>>>> +            prefetchw(p + 1);
>>>>>> +            __ClearPageReserved(p);
>>>>>> +            set_page_count(p, 0);
>>>>>> +        }
>>>>>>            __ClearPageReserved(p);
>>>>>>            set_page_count(p, 0);
>>>>
>>>> That looks wrong. if the page count would by pure luck be 0 already
>>>> for hotplugged memory,
>>>> you wouldn't clear the reserved flag.
>>>>
>>>> These changes make me a bit nervous.
>>>
>>>
>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
>>> need to do something else?
>>>
>>
>> How about the following if statement? But it needs to add more patch
>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>
>> It'll be safer, but more complex. Please comment...
>>
>>     if (context != MEMINIT_EARLY || (page_count(page) || 
>> PageReserved(page)) {
>>
>
> Ideally we could make initialization only depend on the context, and 
> not check for count or the reserved flag.
>

This link is v1, 
https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/

If we could make initialization only depend on the context, I'll modify 
it based on v1.

@Mike,  By the way,  this code will cost more time:

                 if (context == MEMINIT_HOTPLUG)
                         flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
                 __init_single_page(page, pfn, zone, nid, flags);


[    0.014999] On node 0, zone DMA32: 31679 pages in unavailable ranges
[    0.311560] ACPI: PM-Timer IO Port: 0x508


This code will cost less time:

                 __init_single_page(page, pfn, zone, nid, 0);
                 if (context == MEMINIT_HOTPLUG) {
                         init_page_count(page);
                         __SetPageReserved(page);

[    0.014299] On node 0, zone DMA32: 31679 pages in unavailable ranges
[    0.250223] ACPI: PM-Timer IO Port: 0x508



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-12  9:53             ` Yajun Deng
@ 2023-10-13  8:48               ` Mike Rapoport
  2023-10-13  9:29                 ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-13  8:48 UTC (permalink / raw)
  To: Yajun Deng
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel

On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
> 
> On 2023/10/12 17:23, David Hildenbrand wrote:
> > On 10.10.23 04:31, Yajun Deng wrote:
> > > 
> > > On 2023/10/8 16:57, Yajun Deng wrote:
> > > > > 
> > > > > That looks wrong. if the page count would by pure luck be 0
> > > > > already for hotplugged memory, you wouldn't clear the reserved
> > > > > flag.
> > > > > 
> > > > > These changes make me a bit nervous.
> > > > 
> > > > Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
> > > > need to do something else?
> > > > 
> > > 
> > > How about the following if statement? But it needs to add more patch
> > > like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
> > > 
> > > It'll be safer, but more complex. Please comment...
> > > 
> > >     if (context != MEMINIT_EARLY || (page_count(page) ||
> > > PageReserved(page)) {
> > > 
> > 
> > Ideally we could make initialization only depend on the context, and not
> > check for count or the reserved flag.
> > 
> 
> This link is v1,
> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
> 
> If we could make initialization only depend on the context, I'll modify it
> based on v1.

Although ~20% improvement looks impressive, this is only optimization of a
fraction of the boot time, and realistically, how much 56 msec saves from
the total boot time when you boot a machine with 190G of RAM?

I still think the improvement does not justify the churn, added complexity
and special casing of different code paths of initialization of struct pages.
 
> @Mike,  By the way,  this code will cost more time:
> 
>                 if (context == MEMINIT_HOTPLUG)
>                         flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
>                 __init_single_page(page, pfn, zone, nid, flags);
> 
> 
> [    0.014999] On node 0, zone DMA32: 31679 pages in unavailable ranges
> [    0.311560] ACPI: PM-Timer IO Port: 0x508
> 
> 
> This code will cost less time:
> 
>                 __init_single_page(page, pfn, zone, nid, 0);
>                 if (context == MEMINIT_HOTPLUG) {
>                         init_page_count(page);
>                         __SetPageReserved(page);
> 
> [    0.014299] On node 0, zone DMA32: 31679 pages in unavailable ranges
> [    0.250223] ACPI: PM-Timer IO Port: 0x508
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-13  8:48               ` Mike Rapoport
@ 2023-10-13  9:29                 ` Yajun Deng
  2023-10-16  6:33                   ` Mike Rapoport
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-13  9:29 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel


On 2023/10/13 16:48, Mike Rapoport wrote:
> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>> already for hotplugged memory, you wouldn't clear the reserved
>>>>>> flag.
>>>>>>
>>>>>> These changes make me a bit nervous.
>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
>>>>> need to do something else?
>>>>>
>>>> How about the following if statement? But it needs to add more patch
>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>
>>>> It'll be safer, but more complex. Please comment...
>>>>
>>>>      if (context != MEMINIT_EARLY || (page_count(page) ||
>>>> PageReserved(page)) {
>>>>
>>> Ideally we could make initialization only depend on the context, and not
>>> check for count or the reserved flag.
>>>
>> This link is v1,
>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>
>> If we could make initialization only depend on the context, I'll modify it
>> based on v1.
> Although ~20% improvement looks impressive, this is only optimization of a
> fraction of the boot time, and realistically, how much 56 msec saves from
> the total boot time when you boot a machine with 190G of RAM?


There are a lot of factors that can affect the total boot time. 56 msec 
saves may be insignificant.

But if we look at the boot log, we'll see there's a significant time jump.

before:

[    0.250334] ACPI: PM-Timer IO Port: 0x508

[    0.618994] Memory: 173413056K/199884452K available (18440K kernel 
code, 4204K rwdata, 5608K rodata, 3188K init, 17024K bss, 5499616K 
reserved, 20971520K cma-reserved)


after:

[    0.260229] software IO TLB: area num 32.

[    0.563497] Memory: 173413056K/199884452K available (18440K kernel 
code, 4204K rwdata, 5608K rodata, 3188K init, 17024K bss, 5499616K 
reserved, 20971520K cma-reserved)


Memory initialization is time consuming in the boot log.

> I still think the improvement does not justify the churn, added complexity
> and special casing of different code paths of initialization of struct pages.
>   


Because there is a loop, if the order is MAX_ORDER, the loop will run 
1024 times. The following 'if' would be safer:

'if (context != MEMINIT_EARLY || (page_count(page) || >> 
PageReserved(page)) {'

This is a foundation. We may change this when we are able to safely 
remove page init in the hotplug context one day.

So the case for the early context is only temporary.

>> @Mike,  By the way,  this code will cost more time:
>>
>>                  if (context == MEMINIT_HOTPLUG)
>>                          flags = INIT_PAGE_COUNT | INIT_PAGE_RESERVED;
>>                  __init_single_page(page, pfn, zone, nid, flags);
>>
>>
>> [    0.014999] On node 0, zone DMA32: 31679 pages in unavailable ranges
>> [    0.311560] ACPI: PM-Timer IO Port: 0x508
>>
>>
>> This code will cost less time:
>>
>>                  __init_single_page(page, pfn, zone, nid, 0);
>>                  if (context == MEMINIT_HOTPLUG) {
>>                          init_page_count(page);
>>                          __SetPageReserved(page);
>>
>> [    0.014299] On node 0, zone DMA32: 31679 pages in unavailable ranges
>> [    0.250223] ACPI: PM-Timer IO Port: 0x508
>>


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-13  9:29                 ` Yajun Deng
@ 2023-10-16  6:33                   ` Mike Rapoport
  2023-10-16  8:10                     ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Mike Rapoport @ 2023-10-16  6:33 UTC (permalink / raw)
  To: Yajun Deng
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel

On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
> 
> On 2023/10/13 16:48, Mike Rapoport wrote:
> > On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
> > > On 2023/10/12 17:23, David Hildenbrand wrote:
> > > > On 10.10.23 04:31, Yajun Deng wrote:
> > > > > On 2023/10/8 16:57, Yajun Deng wrote:
> > > > > > > That looks wrong. if the page count would by pure luck be 0
> > > > > > > already for hotplugged memory, you wouldn't clear the reserved
> > > > > > > flag.
> > > > > > > 
> > > > > > > These changes make me a bit nervous.
> > > > > > Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
> > > > > > need to do something else?
> > > > > > 
> > > > > How about the following if statement? But it needs to add more patch
> > > > > like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
> > > > > 
> > > > > It'll be safer, but more complex. Please comment...
> > > > > 
> > > > >      if (context != MEMINIT_EARLY || (page_count(page) ||
> > > > > PageReserved(page)) {
> > > > > 
> > > > Ideally we could make initialization only depend on the context, and not
> > > > check for count or the reserved flag.
> > > > 
> > > This link is v1,
> > > https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
> > > 
> > > If we could make initialization only depend on the context, I'll modify it
> > > based on v1.
> > Although ~20% improvement looks impressive, this is only optimization of a
> > fraction of the boot time, and realistically, how much 56 msec saves from
> > the total boot time when you boot a machine with 190G of RAM?
> 
> There are a lot of factors that can affect the total boot time. 56 msec
> saves may be insignificant.
> 
> But if we look at the boot log, we'll see there's a significant time jump.
> 
> before:
> 
> [    0.250334] ACPI: PM-Timer IO Port: 0x508
> [    0.618994] Memory: 173413056K/199884452K available (18440K kernel code,
> 
> after:
> 
> [    0.260229] software IO TLB: area num 32.
> [    0.563497] Memory: 173413056K/199884452K available (18440K kernel code,
> 
> Memory initialization is time consuming in the boot log.

You just confirmed that 56 msec is insignificant and then you send again
the improvement of ~60 msec in memory initialization.

What does this improvement gain in percentage of total boot time?
 
> > I still think the improvement does not justify the churn, added complexity
> > and special casing of different code paths of initialization of struct pages.
> 
> 
> Because there is a loop, if the order is MAX_ORDER, the loop will run 1024
> times. The following 'if' would be safer:
> 
> 'if (context != MEMINIT_EARLY || (page_count(page) || >> PageReserved(page))
> {'

No, it will not.

As the matter of fact any condition here won't be 'safer' because it makes
the code more complex and less maintainable. 
Any future change in __free_pages_core() or one of it's callers will have
to reason what will happen with that condition after the change.

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-16  6:33                   ` Mike Rapoport
@ 2023-10-16  8:10                     ` Yajun Deng
  2023-10-16  8:16                       ` David Hildenbrand
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-16  8:10 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: David Hildenbrand, akpm, mike.kravetz, muchun.song, willy,
	linux-mm, linux-kernel


On 2023/10/16 14:33, Mike Rapoport wrote:
> On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
>> On 2023/10/13 16:48, Mike Rapoport wrote:
>>> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>>>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>>>> already for hotplugged memory, you wouldn't clear the reserved
>>>>>>>> flag.
>>>>>>>>
>>>>>>>> These changes make me a bit nervous.
>>>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
>>>>>>> need to do something else?
>>>>>>>
>>>>>> How about the following if statement? But it needs to add more patch
>>>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>>>
>>>>>> It'll be safer, but more complex. Please comment...
>>>>>>
>>>>>>       if (context != MEMINIT_EARLY || (page_count(page) ||
>>>>>> PageReserved(page)) {
>>>>>>
>>>>> Ideally we could make initialization only depend on the context, and not
>>>>> check for count or the reserved flag.
>>>>>
>>>> This link is v1,
>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>>
>>>> If we could make initialization only depend on the context, I'll modify it
>>>> based on v1.
>>> Although ~20% improvement looks impressive, this is only optimization of a
>>> fraction of the boot time, and realistically, how much 56 msec saves from
>>> the total boot time when you boot a machine with 190G of RAM?
>> There are a lot of factors that can affect the total boot time. 56 msec
>> saves may be insignificant.
>>
>> But if we look at the boot log, we'll see there's a significant time jump.
>>
>> before:
>>
>> [    0.250334] ACPI: PM-Timer IO Port: 0x508
>> [    0.618994] Memory: 173413056K/199884452K available (18440K kernel code,
>>
>> after:
>>
>> [    0.260229] software IO TLB: area num 32.
>> [    0.563497] Memory: 173413056K/199884452K available (18440K kernel code,
>> Memory:
>> Memory initialization is time consuming in the boot log.
> You just confirmed that 56 msec is insignificant and then you send again
> the improvement of ~60 msec in memory initialization.
>
> What does this improvement gain in percentage of total boot time?


before:

[   10.692708] Run /init as init process


after:

[   10.666290] Run /init as init process


About 0.25%. The total boot time is variable, depending on how many 
drivers need to be initialized.


>   
>>> I still think the improvement does not justify the churn, added complexity
>>> and special casing of different code paths of initialization of struct pages.
>>
>> Because there is a loop, if the order is MAX_ORDER, the loop will run 1024
>> times. The following 'if' would be safer:
>>
>> 'if (context != MEMINIT_EARLY || (page_count(page) || >> PageReserved(page))
>> {'
> No, it will not.
>
> As the matter of fact any condition here won't be 'safer' because it makes
> the code more complex and less maintainable.
> Any future change in __free_pages_core() or one of it's callers will have
> to reason what will happen with that condition after the change.


To avoid introducing MEMINIT_LATE context and make code simpler. This 
might be a better option.

if (page_count(page) || PageReserved(page))



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-16  8:10                     ` Yajun Deng
@ 2023-10-16  8:16                       ` David Hildenbrand
  2023-10-16  8:32                         ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: David Hildenbrand @ 2023-10-16  8:16 UTC (permalink / raw)
  To: Yajun Deng, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel

On 16.10.23 10:10, Yajun Deng wrote:
> 
> On 2023/10/16 14:33, Mike Rapoport wrote:
>> On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
>>> On 2023/10/13 16:48, Mike Rapoport wrote:
>>>> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>>>>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>>>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>>>>> already for hotplugged memory, you wouldn't clear the reserved
>>>>>>>>> flag.
>>>>>>>>>
>>>>>>>>> These changes make me a bit nervous.
>>>>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or do I
>>>>>>>> need to do something else?
>>>>>>>>
>>>>>>> How about the following if statement? But it needs to add more patch
>>>>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>>>>
>>>>>>> It'll be safer, but more complex. Please comment...
>>>>>>>
>>>>>>>        if (context != MEMINIT_EARLY || (page_count(page) ||
>>>>>>> PageReserved(page)) {
>>>>>>>
>>>>>> Ideally we could make initialization only depend on the context, and not
>>>>>> check for count or the reserved flag.
>>>>>>
>>>>> This link is v1,
>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>>>
>>>>> If we could make initialization only depend on the context, I'll modify it
>>>>> based on v1.
>>>> Although ~20% improvement looks impressive, this is only optimization of a
>>>> fraction of the boot time, and realistically, how much 56 msec saves from
>>>> the total boot time when you boot a machine with 190G of RAM?
>>> There are a lot of factors that can affect the total boot time. 56 msec
>>> saves may be insignificant.
>>>
>>> But if we look at the boot log, we'll see there's a significant time jump.
>>>
>>> before:
>>>
>>> [    0.250334] ACPI: PM-Timer IO Port: 0x508
>>> [    0.618994] Memory: 173413056K/199884452K available (18440K kernel code,
>>>
>>> after:
>>>
>>> [    0.260229] software IO TLB: area num 32.
>>> [    0.563497] Memory: 173413056K/199884452K available (18440K kernel code,
>>> Memory:
>>> Memory initialization is time consuming in the boot log.
>> You just confirmed that 56 msec is insignificant and then you send again
>> the improvement of ~60 msec in memory initialization.
>>
>> What does this improvement gain in percentage of total boot time?
> 
> 
> before:
> 
> [   10.692708] Run /init as init process
> 
> 
> after:
> 
> [   10.666290] Run /init as init process
> 
> 
> About 0.25%. The total boot time is variable, depending on how many
> drivers need to be initialized.
> 
> 
>>    
>>>> I still think the improvement does not justify the churn, added complexity
>>>> and special casing of different code paths of initialization of struct pages.
>>>
>>> Because there is a loop, if the order is MAX_ORDER, the loop will run 1024
>>> times. The following 'if' would be safer:
>>>
>>> 'if (context != MEMINIT_EARLY || (page_count(page) || >> PageReserved(page))
>>> {'
>> No, it will not.
>>
>> As the matter of fact any condition here won't be 'safer' because it makes
>> the code more complex and less maintainable.
>> Any future change in __free_pages_core() or one of it's callers will have
>> to reason what will happen with that condition after the change.
> 
> 
> To avoid introducing MEMINIT_LATE context and make code simpler. This
> might be a better option.
> 
> if (page_count(page) || PageReserved(page))

I'll have to side with Mike here; this change might not be worth it.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-16  8:16                       ` David Hildenbrand
@ 2023-10-16  8:32                         ` Yajun Deng
  2023-10-16  8:36                           ` David Hildenbrand
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-16  8:32 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/16 16:16, David Hildenbrand wrote:
> On 16.10.23 10:10, Yajun Deng wrote:
>>
>> On 2023/10/16 14:33, Mike Rapoport wrote:
>>> On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
>>>> On 2023/10/13 16:48, Mike Rapoport wrote:
>>>>> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>>>>>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>>>>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>>>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>>>>>> already for hotplugged memory, you wouldn't clear the reserved
>>>>>>>>>> flag.
>>>>>>>>>>
>>>>>>>>>> These changes make me a bit nervous.
>>>>>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or 
>>>>>>>>> do I
>>>>>>>>> need to do something else?
>>>>>>>>>
>>>>>>>> How about the following if statement? But it needs to add more 
>>>>>>>> patch
>>>>>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>>>>>
>>>>>>>> It'll be safer, but more complex. Please comment...
>>>>>>>>
>>>>>>>>        if (context != MEMINIT_EARLY || (page_count(page) ||
>>>>>>>> PageReserved(page)) {
>>>>>>>>
>>>>>>> Ideally we could make initialization only depend on the context, 
>>>>>>> and not
>>>>>>> check for count or the reserved flag.
>>>>>>>
>>>>>> This link is v1,
>>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/ 
>>>>>>
>>>>>>
>>>>>> If we could make initialization only depend on the context, I'll 
>>>>>> modify it
>>>>>> based on v1.
>>>>> Although ~20% improvement looks impressive, this is only 
>>>>> optimization of a
>>>>> fraction of the boot time, and realistically, how much 56 msec 
>>>>> saves from
>>>>> the total boot time when you boot a machine with 190G of RAM?
>>>> There are a lot of factors that can affect the total boot time. 56 
>>>> msec
>>>> saves may be insignificant.
>>>>
>>>> But if we look at the boot log, we'll see there's a significant 
>>>> time jump.
>>>>
>>>> before:
>>>>
>>>> [    0.250334] ACPI: PM-Timer IO Port: 0x508
>>>> [    0.618994] Memory: 173413056K/199884452K available (18440K 
>>>> kernel code,
>>>>
>>>> after:
>>>>
>>>> [    0.260229] software IO TLB: area num 32.
>>>> [    0.563497] Memory: 173413056K/199884452K available (18440K 
>>>> kernel code,
>>>> Memory:
>>>> Memory initialization is time consuming in the boot log.
>>> You just confirmed that 56 msec is insignificant and then you send 
>>> again
>>> the improvement of ~60 msec in memory initialization.
>>>
>>> What does this improvement gain in percentage of total boot time?
>>
>>
>> before:
>>
>> [   10.692708] Run /init as init process
>>
>>
>> after:
>>
>> [   10.666290] Run /init as init process
>>
>>
>> About 0.25%. The total boot time is variable, depending on how many
>> drivers need to be initialized.
>>
>>
>>>>> I still think the improvement does not justify the churn, added 
>>>>> complexity
>>>>> and special casing of different code paths of initialization of 
>>>>> struct pages.
>>>>
>>>> Because there is a loop, if the order is MAX_ORDER, the loop will 
>>>> run 1024
>>>> times. The following 'if' would be safer:
>>>>
>>>> 'if (context != MEMINIT_EARLY || (page_count(page) || >> 
>>>> PageReserved(page))
>>>> {'
>>> No, it will not.
>>>
>>> As the matter of fact any condition here won't be 'safer' because it 
>>> makes
>>> the code more complex and less maintainable.
>>> Any future change in __free_pages_core() or one of it's callers will 
>>> have
>>> to reason what will happen with that condition after the change.
>>
>>
>> To avoid introducing MEMINIT_LATE context and make code simpler. This
>> might be a better option.
>>
>> if (page_count(page) || PageReserved(page))
>
> I'll have to side with Mike here; this change might not be worth it.
>

Okay, I got it. Thanks!



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-16  8:32                         ` Yajun Deng
@ 2023-10-16  8:36                           ` David Hildenbrand
  2023-10-16 10:17                             ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: David Hildenbrand @ 2023-10-16  8:36 UTC (permalink / raw)
  To: Yajun Deng, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel

On 16.10.23 10:32, Yajun Deng wrote:
> 
> On 2023/10/16 16:16, David Hildenbrand wrote:
>> On 16.10.23 10:10, Yajun Deng wrote:
>>>
>>> On 2023/10/16 14:33, Mike Rapoport wrote:
>>>> On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
>>>>> On 2023/10/13 16:48, Mike Rapoport wrote:
>>>>>> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>>>>>>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>>>>>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>>>>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>>>>>>> already for hotplugged memory, you wouldn't clear the reserved
>>>>>>>>>>> flag.
>>>>>>>>>>>
>>>>>>>>>>> These changes make me a bit nervous.
>>>>>>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or
>>>>>>>>>> do I
>>>>>>>>>> need to do something else?
>>>>>>>>>>
>>>>>>>>> How about the following if statement? But it needs to add more
>>>>>>>>> patch
>>>>>>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>>>>>>
>>>>>>>>> It'll be safer, but more complex. Please comment...
>>>>>>>>>
>>>>>>>>>         if (context != MEMINIT_EARLY || (page_count(page) ||
>>>>>>>>> PageReserved(page)) {
>>>>>>>>>
>>>>>>>> Ideally we could make initialization only depend on the context,
>>>>>>>> and not
>>>>>>>> check for count or the reserved flag.
>>>>>>>>
>>>>>>> This link is v1,
>>>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/
>>>>>>>
>>>>>>>
>>>>>>> If we could make initialization only depend on the context, I'll
>>>>>>> modify it
>>>>>>> based on v1.
>>>>>> Although ~20% improvement looks impressive, this is only
>>>>>> optimization of a
>>>>>> fraction of the boot time, and realistically, how much 56 msec
>>>>>> saves from
>>>>>> the total boot time when you boot a machine with 190G of RAM?
>>>>> There are a lot of factors that can affect the total boot time. 56
>>>>> msec
>>>>> saves may be insignificant.
>>>>>
>>>>> But if we look at the boot log, we'll see there's a significant
>>>>> time jump.
>>>>>
>>>>> before:
>>>>>
>>>>> [    0.250334] ACPI: PM-Timer IO Port: 0x508
>>>>> [    0.618994] Memory: 173413056K/199884452K available (18440K
>>>>> kernel code,
>>>>>
>>>>> after:
>>>>>
>>>>> [    0.260229] software IO TLB: area num 32.
>>>>> [    0.563497] Memory: 173413056K/199884452K available (18440K
>>>>> kernel code,
>>>>> Memory:
>>>>> Memory initialization is time consuming in the boot log.
>>>> You just confirmed that 56 msec is insignificant and then you send
>>>> again
>>>> the improvement of ~60 msec in memory initialization.
>>>>
>>>> What does this improvement gain in percentage of total boot time?
>>>
>>>
>>> before:
>>>
>>> [   10.692708] Run /init as init process
>>>
>>>
>>> after:
>>>
>>> [   10.666290] Run /init as init process
>>>
>>>
>>> About 0.25%. The total boot time is variable, depending on how many
>>> drivers need to be initialized.
>>>
>>>
>>>>>> I still think the improvement does not justify the churn, added
>>>>>> complexity
>>>>>> and special casing of different code paths of initialization of
>>>>>> struct pages.
>>>>>
>>>>> Because there is a loop, if the order is MAX_ORDER, the loop will
>>>>> run 1024
>>>>> times. The following 'if' would be safer:
>>>>>
>>>>> 'if (context != MEMINIT_EARLY || (page_count(page) || >>
>>>>> PageReserved(page))
>>>>> {'
>>>> No, it will not.
>>>>
>>>> As the matter of fact any condition here won't be 'safer' because it
>>>> makes
>>>> the code more complex and less maintainable.
>>>> Any future change in __free_pages_core() or one of it's callers will
>>>> have
>>>> to reason what will happen with that condition after the change.
>>>
>>>
>>> To avoid introducing MEMINIT_LATE context and make code simpler. This
>>> might be a better option.
>>>
>>> if (page_count(page) || PageReserved(page))
>>
>> I'll have to side with Mike here; this change might not be worth it.
>>
> 
> Okay, I got it. Thanks!

IMHO instead of adding more checks to that code we should try to unify 
that handling such that we can just remove it. As expressed, at least 
from the memory hotplug perspective there are still reasons why we need 
that; I can provide some guidance on how to eventually achieve that, but 
it might end up in a bit of work ...

Anyhow, thanks for bringing up that topic; it reminded me that I still 
have pending cleanups to not rely on PageReserved on the memory hotplug 
path.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-16  8:36                           ` David Hildenbrand
@ 2023-10-16 10:17                             ` Yajun Deng
  2023-10-17  9:58                               ` Yajun Deng
  0 siblings, 1 reply; 34+ messages in thread
From: Yajun Deng @ 2023-10-16 10:17 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/16 16:36, David Hildenbrand wrote:
> On 16.10.23 10:32, Yajun Deng wrote:
>>
>> On 2023/10/16 16:16, David Hildenbrand wrote:
>>> On 16.10.23 10:10, Yajun Deng wrote:
>>>>
>>>> On 2023/10/16 14:33, Mike Rapoport wrote:
>>>>> On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
>>>>>> On 2023/10/13 16:48, Mike Rapoport wrote:
>>>>>>> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>>>>>>>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>>>>>>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>>>>>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>>>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>>>>>>>> already for hotplugged memory, you wouldn't clear the reserved
>>>>>>>>>>>> flag.
>>>>>>>>>>>>
>>>>>>>>>>>> These changes make me a bit nervous.
>>>>>>>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or
>>>>>>>>>>> do I
>>>>>>>>>>> need to do something else?
>>>>>>>>>>>
>>>>>>>>>> How about the following if statement? But it needs to add more
>>>>>>>>>> patch
>>>>>>>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>>>>>>>
>>>>>>>>>> It'll be safer, but more complex. Please comment...
>>>>>>>>>>
>>>>>>>>>>         if (context != MEMINIT_EARLY || (page_count(page) ||
>>>>>>>>>> PageReserved(page)) {
>>>>>>>>>>
>>>>>>>>> Ideally we could make initialization only depend on the context,
>>>>>>>>> and not
>>>>>>>>> check for count or the reserved flag.
>>>>>>>>>
>>>>>>>> This link is v1,
>>>>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/ 
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> If we could make initialization only depend on the context, I'll
>>>>>>>> modify it
>>>>>>>> based on v1.
>>>>>>> Although ~20% improvement looks impressive, this is only
>>>>>>> optimization of a
>>>>>>> fraction of the boot time, and realistically, how much 56 msec
>>>>>>> saves from
>>>>>>> the total boot time when you boot a machine with 190G of RAM?
>>>>>> There are a lot of factors that can affect the total boot time. 56
>>>>>> msec
>>>>>> saves may be insignificant.
>>>>>>
>>>>>> But if we look at the boot log, we'll see there's a significant
>>>>>> time jump.
>>>>>>
>>>>>> before:
>>>>>>
>>>>>> [    0.250334] ACPI: PM-Timer IO Port: 0x508
>>>>>> [    0.618994] Memory: 173413056K/199884452K available (18440K
>>>>>> kernel code,
>>>>>>
>>>>>> after:
>>>>>>
>>>>>> [    0.260229] software IO TLB: area num 32.
>>>>>> [    0.563497] Memory: 173413056K/199884452K available (18440K
>>>>>> kernel code,
>>>>>> Memory:
>>>>>> Memory initialization is time consuming in the boot log.
>>>>> You just confirmed that 56 msec is insignificant and then you send
>>>>> again
>>>>> the improvement of ~60 msec in memory initialization.
>>>>>
>>>>> What does this improvement gain in percentage of total boot time?
>>>>
>>>>
>>>> before:
>>>>
>>>> [   10.692708] Run /init as init process
>>>>
>>>>
>>>> after:
>>>>
>>>> [   10.666290] Run /init as init process
>>>>
>>>>
>>>> About 0.25%. The total boot time is variable, depending on how many
>>>> drivers need to be initialized.
>>>>
>>>>
>>>>>>> I still think the improvement does not justify the churn, added
>>>>>>> complexity
>>>>>>> and special casing of different code paths of initialization of
>>>>>>> struct pages.
>>>>>>
>>>>>> Because there is a loop, if the order is MAX_ORDER, the loop will
>>>>>> run 1024
>>>>>> times. The following 'if' would be safer:
>>>>>>
>>>>>> 'if (context != MEMINIT_EARLY || (page_count(page) || >>
>>>>>> PageReserved(page))
>>>>>> {'
>>>>> No, it will not.
>>>>>
>>>>> As the matter of fact any condition here won't be 'safer' because it
>>>>> makes
>>>>> the code more complex and less maintainable.
>>>>> Any future change in __free_pages_core() or one of it's callers will
>>>>> have
>>>>> to reason what will happen with that condition after the change.
>>>>
>>>>
>>>> To avoid introducing MEMINIT_LATE context and make code simpler. This
>>>> might be a better option.
>>>>
>>>> if (page_count(page) || PageReserved(page))
>>>
>>> I'll have to side with Mike here; this change might not be worth it.
>>>
>>
>> Okay, I got it. Thanks!
>
> IMHO instead of adding more checks to that code we should try to unify 
> that handling such that we can just remove it. As expressed, at least 
> from the memory hotplug perspective there are still reasons why we 
> need that; I can provide some guidance on how to eventually achieve 
> that, but it might end up in a bit of work ...


Yes, we can't remove it right now. If we want to do that, we have to 
clean up rely on page count and PageReserved first.

>
> Anyhow, thanks for bringing up that topic; it reminded me that I still 
> have pending cleanups to not rely on PageReserved on the memory 
> hotplug path.
>


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY
  2023-10-16 10:17                             ` Yajun Deng
@ 2023-10-17  9:58                               ` Yajun Deng
  0 siblings, 0 replies; 34+ messages in thread
From: Yajun Deng @ 2023-10-17  9:58 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: akpm, mike.kravetz, muchun.song, willy, linux-mm, linux-kernel


On 2023/10/16 18:17, Yajun Deng wrote:
>
> On 2023/10/16 16:36, David Hildenbrand wrote:
>> On 16.10.23 10:32, Yajun Deng wrote:
>>>
>>> On 2023/10/16 16:16, David Hildenbrand wrote:
>>>> On 16.10.23 10:10, Yajun Deng wrote:
>>>>>
>>>>> On 2023/10/16 14:33, Mike Rapoport wrote:
>>>>>> On Fri, Oct 13, 2023 at 05:29:19PM +0800, Yajun Deng wrote:
>>>>>>> On 2023/10/13 16:48, Mike Rapoport wrote:
>>>>>>>> On Thu, Oct 12, 2023 at 05:53:22PM +0800, Yajun Deng wrote:
>>>>>>>>> On 2023/10/12 17:23, David Hildenbrand wrote:
>>>>>>>>>> On 10.10.23 04:31, Yajun Deng wrote:
>>>>>>>>>>> On 2023/10/8 16:57, Yajun Deng wrote:
>>>>>>>>>>>>> That looks wrong. if the page count would by pure luck be 0
>>>>>>>>>>>>> already for hotplugged memory, you wouldn't clear the 
>>>>>>>>>>>>> reserved
>>>>>>>>>>>>> flag.
>>>>>>>>>>>>>
>>>>>>>>>>>>> These changes make me a bit nervous.
>>>>>>>>>>>> Is 'if (page_count(page) || PageReserved(page))' be safer? Or
>>>>>>>>>>>> do I
>>>>>>>>>>>> need to do something else?
>>>>>>>>>>>>
>>>>>>>>>>> How about the following if statement? But it needs to add more
>>>>>>>>>>> patch
>>>>>>>>>>> like v1 ([PATCH 2/4] mm: Introduce MEMINIT_LATE context).
>>>>>>>>>>>
>>>>>>>>>>> It'll be safer, but more complex. Please comment...
>>>>>>>>>>>
>>>>>>>>>>>         if (context != MEMINIT_EARLY || (page_count(page) ||
>>>>>>>>>>> PageReserved(page)) {
>>>>>>>>>>>
>>>>>>>>>> Ideally we could make initialization only depend on the context,
>>>>>>>>>> and not
>>>>>>>>>> check for count or the reserved flag.
>>>>>>>>>>
>>>>>>>>> This link is v1,
>>>>>>>>> https://lore.kernel.org/all/20230922070923.355656-1-yajun.deng@linux.dev/ 
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> If we could make initialization only depend on the context, I'll
>>>>>>>>> modify it
>>>>>>>>> based on v1.
>>>>>>>> Although ~20% improvement looks impressive, this is only
>>>>>>>> optimization of a
>>>>>>>> fraction of the boot time, and realistically, how much 56 msec
>>>>>>>> saves from
>>>>>>>> the total boot time when you boot a machine with 190G of RAM?
>>>>>>> There are a lot of factors that can affect the total boot time. 56
>>>>>>> msec
>>>>>>> saves may be insignificant.
>>>>>>>
>>>>>>> But if we look at the boot log, we'll see there's a significant
>>>>>>> time jump.
>>>>>>>
>>>>>>> before:
>>>>>>>
>>>>>>> [    0.250334] ACPI: PM-Timer IO Port: 0x508
>>>>>>> [    0.618994] Memory: 173413056K/199884452K available (18440K
>>>>>>> kernel code,
>>>>>>>
>>>>>>> after:
>>>>>>>
>>>>>>> [    0.260229] software IO TLB: area num 32.
>>>>>>> [    0.563497] Memory: 173413056K/199884452K available (18440K
>>>>>>> kernel code,
>>>>>>> Memory:
>>>>>>> Memory initialization is time consuming in the boot log.
>>>>>> You just confirmed that 56 msec is insignificant and then you send
>>>>>> again
>>>>>> the improvement of ~60 msec in memory initialization.
>>>>>>
>>>>>> What does this improvement gain in percentage of total boot time?
>>>>>
>>>>>
>>>>> before:
>>>>>
>>>>> [   10.692708] Run /init as init process
>>>>>
>>>>>
>>>>> after:
>>>>>
>>>>> [   10.666290] Run /init as init process
>>>>>
>>>>>
>>>>> About 0.25%. The total boot time is variable, depending on how many
>>>>> drivers need to be initialized.
>>>>>
>>>>>
>>>>>>>> I still think the improvement does not justify the churn, added
>>>>>>>> complexity
>>>>>>>> and special casing of different code paths of initialization of
>>>>>>>> struct pages.
>>>>>>>
>>>>>>> Because there is a loop, if the order is MAX_ORDER, the loop will
>>>>>>> run 1024
>>>>>>> times. The following 'if' would be safer:
>>>>>>>
>>>>>>> 'if (context != MEMINIT_EARLY || (page_count(page) || >>
>>>>>>> PageReserved(page))
>>>>>>> {'
>>>>>> No, it will not.
>>>>>>
>>>>>> As the matter of fact any condition here won't be 'safer' because it
>>>>>> makes
>>>>>> the code more complex and less maintainable.
>>>>>> Any future change in __free_pages_core() or one of it's callers will
>>>>>> have
>>>>>> to reason what will happen with that condition after the change.
>>>>>
>>>>>
>>>>> To avoid introducing MEMINIT_LATE context and make code simpler. This
>>>>> might be a better option.
>>>>>
>>>>> if (page_count(page) || PageReserved(page))
>>>>
>>>> I'll have to side with Mike here; this change might not be worth it.
>>>>
>>>
>>> Okay, I got it. Thanks!
>>
>> IMHO instead of adding more checks to that code we should try to 
>> unify that handling such that we can just remove it. As expressed, at 
>> least from the memory hotplug perspective there are still reasons why 
>> we need that; I can provide some guidance on how to eventually 
>> achieve that, but it might end up in a bit of work ...
>
>
> Yes, we can't remove it right now. If we want to do that, we have to 
> clean up rely on page count and PageReserved first.


How about making __free_pages_core separate, like:

void __init __free_pages_core_early(struct page *page, unsigned int order)
{
         unsigned int nr_pages = 1 << order;

         atomic_long_add(nr_pages, &page_zone(page)->managed_pages);

         if (page_contains_unaccepted(page, order)) {
                 if (order == MAX_ORDER && __free_unaccepted(page))
                         return;

                 accept_page(page, order);
         }

         /*
          * Bypass PCP and place fresh pages right to the tail, primarily
          * relevant for memory onlining.
          */
         __free_pages_ok(page, order, FPI_TO_TAIL);
}

void __free_pages_core(struct page *page, unsigned int order)
{
         unsigned int nr_pages = 1 << order;
         struct page *p = page;
         unsigned int loop;

         /*
          * When initializing the memmap, __init_single_page() sets the 
refcount
          * of all pages to 1 ("allocated"/"not free"). We have to set the
          * refcount of all involved pages to 0.
          */
         prefetchw(p);
         for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
                 prefetchw(p + 1);
                 __ClearPageReserved(p);
                 set_page_count(p, 0);
         }
         __ClearPageReserved(p);
         set_page_count(p, 0);

         __free_pages_core_early(page, order);
}

We only change the caller we need to __free_pages_core_early, it doesn't 
affect other callers.

>
>>
>> Anyhow, thanks for bringing up that topic; it reminded me that I 
>> still have pending cleanups to not rely on PageReserved on the memory 
>> hotplug path.
>>


^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2023-10-17  9:58 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-28  8:33 [PATCH v4 0/2] mm: Don't set and reset page count in MEMINIT_EARLY Yajun Deng
2023-09-28  8:33 ` [PATCH v4 1/2] mm: pass page count and reserved to __init_single_page Yajun Deng
2023-09-29  8:19   ` Mike Rapoport
2023-09-29  9:37     ` Yajun Deng
2023-09-28  8:33 ` [PATCH v4 2/2] mm: Init page count in reserve_bootmem_region when MEMINIT_EARLY Yajun Deng
2023-09-29  8:30   ` Mike Rapoport
2023-09-29  9:50     ` Yajun Deng
2023-09-29 10:02       ` Mike Rapoport
2023-09-29 10:27         ` Yajun Deng
2023-10-01 18:59           ` Mike Rapoport
2023-10-02  7:03             ` Yajun Deng
2023-10-02  8:47               ` Mike Rapoport
2023-10-02  8:56                 ` David Hildenbrand
2023-10-02 11:10                   ` Mike Rapoport
2023-10-02 11:25                     ` David Hildenbrand
2023-10-03 14:38                       ` Yajun Deng
2023-10-05  5:06                         ` Mike Rapoport
2023-10-05 14:04                           ` Yajun Deng
2023-10-12  9:19                             ` Mike Rapoport
2023-10-12  9:36                               ` Yajun Deng
2023-10-02  8:30     ` David Hildenbrand
2023-10-08  8:57       ` Yajun Deng
2023-10-10  2:31         ` Yajun Deng
2023-10-12  9:23           ` David Hildenbrand
2023-10-12  9:53             ` Yajun Deng
2023-10-13  8:48               ` Mike Rapoport
2023-10-13  9:29                 ` Yajun Deng
2023-10-16  6:33                   ` Mike Rapoport
2023-10-16  8:10                     ` Yajun Deng
2023-10-16  8:16                       ` David Hildenbrand
2023-10-16  8:32                         ` Yajun Deng
2023-10-16  8:36                           ` David Hildenbrand
2023-10-16 10:17                             ` Yajun Deng
2023-10-17  9:58                               ` Yajun Deng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).