Re: [PATCH v10 2/5] mm: page_isolation: check specified range for unmovable pages

From: Zi Yan <ziy@nvidia.com>
To: David Hildenbrand <david@redhat.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	Vlastimil Babka <vbabka@suse.cz>,
	Mel Gorman <mgorman@techsingularity.net>,
	Eric Ren <renzhengeek@gmail.com>, Mike Rapoport <rppt@kernel.org>,
	Oscar Salvador <osalvador@suse.de>,
	Christophe Leroy <christophe.leroy@csgroup.eu>
Subject: Re: [PATCH v10 2/5] mm: page_isolation: check specified range for unmovable pages
Date: Tue, 12 Apr 2022 11:01:36 -0400	[thread overview]
Message-ID: <B4336C2E-E728-49E2-B84B-5728B9A1EA69@nvidia.com> (raw)
In-Reply-To: <428828e1-6b59-8db7-62e0-1429863cb13f@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 16600 bytes --]

On 12 Apr 2022, at 10:49, David Hildenbrand wrote:

> On 12.04.22 16:07, Zi Yan wrote:
>> On 12 Apr 2022, at 9:10, David Hildenbrand wrote:
>>
>>> On 06.04.22 17:18, Zi Yan wrote:
>>>> From: Zi Yan <ziy@nvidia.com>
>>>>
>>>> Enable set_migratetype_isolate() to check specified sub-range for
>>>> unmovable pages during isolation. Page isolation is done
>>>> at MAX_ORDER_NR_PAEGS granularity, but not all pages within that
>>>> granularity are intended to be isolated. For example,
>>>> alloc_contig_range(), which uses page isolation, allows ranges without
>>>> alignment. This commit makes unmovable page check only look for
>>>> interesting pages, so that page isolation can succeed for any
>>>> non-overlapping ranges.
>>>>
>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>> ---
>>>
>>> [...]
>>>
>>>>  /*
>>>> - * This function checks whether pageblock includes unmovable pages or not.
>>>> + * This function checks whether the range [start_pfn, end_pfn) includes
>>>> + * unmovable pages or not. The range must fall into a single pageblock and
>>>> + * consequently belong to a single zone.
>>>>   *
>>>>   * PageLRU check without isolation or lru_lock could race so that
>>>>   * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
>>>> @@ -28,12 +30,14 @@
>>>>   * cannot get removed (e.g., via memory unplug) concurrently.
>>>>   *
>>>>   */
>>>> -static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
>>>> -				 int migratetype, int flags)
>>>> +static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long end_pfn,
>>>> +				int migratetype, int flags)
>>>>  {
>>>> -	unsigned long iter = 0;
>>>> -	unsigned long pfn = page_to_pfn(page);
>>>> -	unsigned long offset = pfn % pageblock_nr_pages;
>>>> +	unsigned long pfn = start_pfn;
>>>> +	struct page *page = pfn_to_page(pfn);
>>>
>>>
>>> Just do
>>>
>>> struct page *page = pfn_to_page(start_pfn);
>>> struct zone *zone = page_zone(page);
>>>
>>> here. No need to lookup the zone again in the loop because, as you
>>> document "must ... belong to a single zone.".
>>>
>>> Then, there is also no need to initialize "pfn" here. In the loop header
>>> is sufficient.
>>>
>>
>> Sure.
>>
>>>> +
>>>> +	VM_BUG_ON(ALIGN_DOWN(start_pfn, pageblock_nr_pages) !=
>>>> +		  ALIGN_DOWN(end_pfn - 1, pageblock_nr_pages));
>>>>
>>>>  	if (is_migrate_cma_page(page)) {
>>>>  		/*
>>>> @@ -47,8 +51,11 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
>>>>  		return page;
>>>>  	}
>>>>
>>>> -	for (; iter < pageblock_nr_pages - offset; iter++) {
>>>> -		page = pfn_to_page(pfn + iter);
>>>> +	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
>>>> +		struct zone *zone;
>>>> +
>>>> +		page = pfn_to_page(pfn);
>>>> +		zone = page_zone(page);
>>>>
>>>>  		/*
>>>>  		 * Both, bootmem allocations and memory holes are marked
>>>> @@ -85,7 +92,7 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
>>>>  			}
>>>>
>>>>  			skip_pages = compound_nr(head) - (page - head);
>>>> -			iter += skip_pages - 1;
>>>> +			pfn += skip_pages - 1;
>>>>  			continue;
>>>>  		}
>>>>
>>>> @@ -97,7 +104,7 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
>>>>  		 */
>>>>  		if (!page_ref_count(page)) {
>>>>  			if (PageBuddy(page))
>>>> -				iter += (1 << buddy_order(page)) - 1;
>>>> +				pfn += (1 << buddy_order(page)) - 1;
>>>>  			continue;
>>>>  		}
>>>>
>>>> @@ -134,11 +141,18 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
>>>>  	return NULL;
>>>>  }
>>>>
>>>> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags)
>>>> +/*
>>>> + * This function set pageblock migratetype to isolate if no unmovable page is
>>>> + * present in [start_pfn, end_pfn). The pageblock must intersect with
>>>> + * [start_pfn, end_pfn).
>>>> + */
>>>> +static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags,
>>>> +			unsigned long start_pfn, unsigned long end_pfn)
>>>
>>> I think we might be able do better, eventually not passing start_pfn at
>>> all. Hmm.
>>
>> IMHO, having start_pfn and end_pfn in the parameter list would make the
>> interface easier to understand. Otherwise if we remove start_pfn,
>> the caller needs to adjust @page to be within the range of [start_pfn,
>> end_pfn)
>>
>>>
>>> I think we want to pull out the
>>> start_isolate_page_range()/undo_isolate_page_range() interface change
>>> into a separate patch.
>>
>> You mean a patch just adding
>>
>> unsigned long isolate_start = pfn_max_align_down(start_pfn);
>> unsigned long isolate_end = pfn_max_align_up(end_pfn);
>>
>> in start_isolate_page_range()/undo_isolate_page_range()?
>>
>> Yes I can do that.
>
> I think we have to be careful with memory onlining/offlining. There are
> corner cases where we get called with only pageblock alignment and
> must not adjust the range.

In the patch below, you added a new set of start_isolate_pageblocks()
and undo_isolate_pageblocks(), where start_isolate_pageblocks() still
calls set_migratetype_isolate() and noted their range should not be
adjusted. But in my patch, set_migratetype_isolate() adjusts
the range for has_unmovable_pages() check. Do you mean
start_isolate_pageblocks() should call a different version of
set_migratetype_isolate() without range adjustment? That can be done
with an additional parameter in start_isolate_page_range(), like
bool strict, right?

>
>
> Something like this as a base for the next cleanups/extensions:
>
>
> From 18d29b53600d6d0d6ac87cdc6b7517e989fa3dac Mon Sep 17 00:00:00 2001
> From: David Hildenbrand <david@redhat.com>
> Date: Tue, 12 Apr 2022 15:51:50 +0200
> Subject: [PATCH] mm: page-isolation: Move alignment logic into
>  start_isolate_page_range()/undo_isolate_page_range()
>
> For ordinary range allocations, we actually have to isolate all pageblocks
> in a MAX_ORDER - 1 range. Only memory onlining/offlining is special: it
> knows exactly which pageblocks to isolate/unisolate and we must not mess
> with the pageblocks to isolate (memory onlining/offlining alwayes passed
> MAX_ORDER - 1 - aligned ranges, unless we're dealing with vmemmap
> located on hotplugged memory, whereby the start pfn might only be
> pageblock aligned).
>
> Further, for ordinary allcoations, we'll want to know the exact range
> we want to allocate -- to check only that range for unmovable pages.
> Right now we lose that information.
>
> So let's move the alignment logic into start_isolate_page_range() /
> undo_isolate_page_range(), such that we have the actual range of
> interest available and the alignment logic contained in there.
>
> Provide start_isolate_pageblocks()/undo_isolate_pageblocks() for memory
> onlining/offlining.
>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  include/linux/page-isolation.h | 23 ++++-----
>  mm/memory_hotplug.c            |  8 ++--
>  mm/page_alloc.c                | 15 +-----
>  mm/page_isolation.c            | 85 ++++++++++++++++++++++++++--------
>  4 files changed, 81 insertions(+), 50 deletions(-)
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index e14eddf6741a..8e9e9e80ba67 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -37,20 +37,15 @@ void set_pageblock_migratetype(struct page *page, int migratetype);
>  int move_freepages_block(struct zone *zone, struct page *page,
>  				int migratetype, int *num_movable);
>
> -/*
> - * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
> - */
> -int
> -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> -			 unsigned migratetype, int flags);
> -
> -/*
> - * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
> - * target range is [start_pfn, end_pfn)
> - */
> -void
> -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> -			unsigned migratetype);
> +int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype, int flags);
> +int start_isolate_pageblocks(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype, int flags);
> +
> +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype);
> +void undo_isolate_pageblocks(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype);
>
>  /*
>   * Test all pages in [start_pfn, end_pfn) are isolated or not.
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 416b38ca8def..fb7f63c800d1 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1089,7 +1089,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
>
>  	/*
>  	 * Fixup the number of isolated pageblocks before marking the sections
> -	 * onlining, such that undo_isolate_page_range() works correctly.
> +	 * onlining, such that undo_isolate_pageblocks() works correctly.
>  	 */
>  	spin_lock_irqsave(&zone->lock, flags);
>  	zone->nr_isolate_pageblock += nr_pages / pageblock_nr_pages;
> @@ -1113,7 +1113,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
>  		build_all_zonelists(NULL);
>
>  	/* Basic onlining is complete, allow allocation of onlined pages. */
> -	undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
> +	undo_isolate_pageblocks(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
>
>  	/*
>  	 * Freshly onlined pages aren't shuffled (e.g., all pages are placed to
> @@ -1834,7 +1834,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>  	lru_cache_disable();
>
>  	/* set above range as isolated */
> -	ret = start_isolate_page_range(start_pfn, end_pfn,
> +	ret = start_isolate_pageblocks(start_pfn, end_pfn,
>  				       MIGRATE_MOVABLE,
>  				       MEMORY_OFFLINE | REPORT_FAILURE);
>  	if (ret) {
> @@ -1937,7 +1937,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>
>  failed_removal_isolated:
>  	/* pushback to free area */
> -	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
> +	undo_isolate_pageblocks(start_pfn, end_pfn, MIGRATE_MOVABLE);
>  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
>  failed_removal_pcplists_disabled:
>  	lru_cache_enable();
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index badc05fc1b2f..76f8c19e37a2 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8950,15 +8950,6 @@ void *__init alloc_large_system_hash(const char *tablename,
>  }
>
>  #ifdef CONFIG_CONTIG_ALLOC
> -static unsigned long pfn_max_align_down(unsigned long pfn)
> -{
> -	return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
> -}
> -
> -static unsigned long pfn_max_align_up(unsigned long pfn)
> -{
> -	return ALIGN(pfn, MAX_ORDER_NR_PAGES);
> -}
>
>  #if defined(CONFIG_DYNAMIC_DEBUG) || \
>  	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> @@ -9104,8 +9095,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>  	 * put back to page allocator so that buddy can use them.
>  	 */
>
> -	ret = start_isolate_page_range(pfn_max_align_down(start),
> -				       pfn_max_align_up(end), migratetype, 0);
> +	ret = start_isolate_page_range(start, end, migratetype, 0);
>  	if (ret)
>  		return ret;
>
> @@ -9186,8 +9176,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>  		free_contig_range(end, outer_end - end);
>
>  done:
> -	undo_isolate_page_range(pfn_max_align_down(start),
> -				pfn_max_align_up(end), migratetype);
> +	undo_isolate_page_range(start, end, migratetype);
>  	return ret;
>  }
>  EXPORT_SYMBOL(alloc_contig_range);
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index b34f1310aeaa..7c1f7724c5bb 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -15,6 +15,16 @@
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/page_isolation.h>
>
> +static unsigned long pfn_max_align_down(unsigned long pfn)
> +{
> +	return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
> +}
> +
> +static unsigned long pfn_max_align_up(unsigned long pfn)
> +{
> +	return ALIGN(pfn, MAX_ORDER_NR_PAGES);
> +}
> +
>  /*
>   * This function checks whether pageblock includes unmovable pages or not.
>   *
> @@ -262,12 +272,40 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>  	return NULL;
>  }
>
> +/*
> + * Make page-allocation-type of pageblocks to be MIGRATE_ISOLATE.
> + *
> + * Most users should actually use start_isolate_page_range(). Only memory
> + * onlining/offlining that knows exactly what it's doing in regard to
> + * isolating only some pageblocks of MAX_ORDER - 1 pages (for the vmemmap)
> + * should use this interface.
> + */
> +int start_isolate_pageblocks(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype, int flags)
> +{
> +	unsigned long pfn;
> +	struct page *page;
> +
> +	BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
> +	BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
> +
> +	for (pfn = start_pfn;
> +	     pfn < end_pfn;
> +	     pfn += pageblock_nr_pages) {
> +		page = __first_valid_page(pfn, pageblock_nr_pages);
> +		if (page && set_migratetype_isolate(page, migratetype, flags)) {
> +			undo_isolate_pageblocks(start_pfn, pfn, migratetype);
> +			return -EBUSY;
> +		}
> +	}
> +	return 0;
> +}
> +
>  /**
>   * start_isolate_page_range() - make page-allocation-type of range of pages to
>   * be MIGRATE_ISOLATE.
>   * @start_pfn:		The lower PFN of the range to be isolated.
>   * @end_pfn:		The upper PFN of the range to be isolated.
> - *			start_pfn/end_pfn must be aligned to pageblock_order.
>   * @migratetype:	Migrate type to set in error recovery.
>   * @flags:		The following flags are allowed (they can be combined in
>   *			a bit mask)
> @@ -306,29 +344,23 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>  int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>  			     unsigned migratetype, int flags)
>  {
> -	unsigned long pfn;
> -	struct page *page;
> -
> -	BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
> -	BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
> +	start_pfn = = pfn_max_align_down(start_pfn);
> +	end_pfn = pfn_max_align_up(end_pfn);
>
> -	for (pfn = start_pfn;
> -	     pfn < end_pfn;
> -	     pfn += pageblock_nr_pages) {
> -		page = __first_valid_page(pfn, pageblock_nr_pages);
> -		if (page && set_migratetype_isolate(page, migratetype, flags)) {
> -			undo_isolate_page_range(start_pfn, pfn, migratetype);
> -			return -EBUSY;
> -		}
> -	}
> -	return 0;
> +	return start_isolate_pageblocks(start_pfn, end_pfn, migratetype, flags);
>  }
>
>  /*
> - * Make isolated pages available again.
> + * Make isolated pageblocks, isolated via start_isolate_pageblocks, available
> + * again.
> + *
> + * Most users should actually use undo_isolate_page_range(). Only memory
> + * onlining/offlining that knows exactly what it's doing in regard to
> + * isolating only some pageblocks of MAX_ORDER - 1 pages (for the vmemmap)
> + * should use this interface.
>   */
> -void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> -			    unsigned migratetype)
> +void undo_isolate_pageblocks(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype)
>  {
>  	unsigned long pfn;
>  	struct page *page;
> @@ -345,6 +377,21 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>  		unset_migratetype_isolate(page, migratetype);
>  	}
>  }
> +
> +/*
> + * Make isolated pageblocks, isolated via start_isolate_page_range(), available
> + * again. The pageblock isolation range will be extended just like for
> + * start_isolate_page_range().
> + */
> +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> +			     unsigned migratetype)
> +{
> +	start_pfn = = pfn_max_align_down(start_pfn);
> +	end_pfn = pfn_max_align_up(end_pfn);
> +
> +	return undo_isolate_pageblocks(start_pfn, end_pfn, migratetype);
> +}
> +
>  /*
>   * Test all pages in the range is free(means isolated) or not.
>   * all pages in [start_pfn...end_pfn) must be in the same zone.
> -- 
> 2.35.1
>
>
>
> -- 
> Thanks,
>
> David / dhildenb

--
Best Regards,
Yan, Zi

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]