* [PATCH v7 1/5] mm: page_isolation: move has_unmovable_pages() to mm/page_isolation.c
2022-03-11 18:36 [PATCH v7 0/5] Use pageblock_order for cma and alloc_contig_range alignment Zi Yan
@ 2022-03-11 18:36 ` Zi Yan
2022-03-14 17:03 ` David Hildenbrand
2022-03-11 18:36 ` [PATCH v7 2/5] mm: page_isolation: check specified range for unmovable pages Zi Yan
` (3 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Zi Yan @ 2022-03-11 18:36 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy,
Zi Yan, Mike Rapoport
From: Zi Yan <ziy@nvidia.com>
has_unmovable_pages() is only used in mm/page_isolation.c. Move it from
mm/page_alloc.c and make it static.
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
---
include/linux/page-isolation.h | 2 -
mm/page_alloc.c | 119 ---------------------------------
mm/page_isolation.c | 119 +++++++++++++++++++++++++++++++++
3 files changed, 119 insertions(+), 121 deletions(-)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 572458016331..e14eddf6741a 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -33,8 +33,6 @@ static inline bool is_migrate_isolate(int migratetype)
#define MEMORY_OFFLINE 0x1
#define REPORT_FAILURE 0x2
-struct page *has_unmovable_pages(struct zone *zone, struct page *page,
- int migratetype, int flags);
void set_pageblock_migratetype(struct page *page, int migratetype);
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype, int *num_movable);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f648decfe39d..6de57d058d3d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8936,125 +8936,6 @@ void *__init alloc_large_system_hash(const char *tablename,
return table;
}
-/*
- * This function checks whether pageblock includes unmovable pages or not.
- *
- * PageLRU check without isolation or lru_lock could race so that
- * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
- * check without lock_page also may miss some movable non-lru pages at
- * race condition. So you can't expect this function should be exact.
- *
- * Returns a page without holding a reference. If the caller wants to
- * dereference that page (e.g., dumping), it has to make sure that it
- * cannot get removed (e.g., via memory unplug) concurrently.
- *
- */
-struct page *has_unmovable_pages(struct zone *zone, struct page *page,
- int migratetype, int flags)
-{
- unsigned long iter = 0;
- unsigned long pfn = page_to_pfn(page);
- unsigned long offset = pfn % pageblock_nr_pages;
-
- if (is_migrate_cma_page(page)) {
- /*
- * CMA allocations (alloc_contig_range) really need to mark
- * isolate CMA pageblocks even when they are not movable in fact
- * so consider them movable here.
- */
- if (is_migrate_cma(migratetype))
- return NULL;
-
- return page;
- }
-
- for (; iter < pageblock_nr_pages - offset; iter++) {
- page = pfn_to_page(pfn + iter);
-
- /*
- * Both, bootmem allocations and memory holes are marked
- * PG_reserved and are unmovable. We can even have unmovable
- * allocations inside ZONE_MOVABLE, for example when
- * specifying "movablecore".
- */
- if (PageReserved(page))
- return page;
-
- /*
- * If the zone is movable and we have ruled out all reserved
- * pages then it should be reasonably safe to assume the rest
- * is movable.
- */
- if (zone_idx(zone) == ZONE_MOVABLE)
- continue;
-
- /*
- * Hugepages are not in LRU lists, but they're movable.
- * THPs are on the LRU, but need to be counted as #small pages.
- * We need not scan over tail pages because we don't
- * handle each tail page individually in migration.
- */
- if (PageHuge(page) || PageTransCompound(page)) {
- struct page *head = compound_head(page);
- unsigned int skip_pages;
-
- if (PageHuge(page)) {
- if (!hugepage_migration_supported(page_hstate(head)))
- return page;
- } else if (!PageLRU(head) && !__PageMovable(head)) {
- return page;
- }
-
- skip_pages = compound_nr(head) - (page - head);
- iter += skip_pages - 1;
- continue;
- }
-
- /*
- * We can't use page_count without pin a page
- * because another CPU can free compound page.
- * This check already skips compound tails of THP
- * because their page->_refcount is zero at all time.
- */
- if (!page_ref_count(page)) {
- if (PageBuddy(page))
- iter += (1 << buddy_order(page)) - 1;
- continue;
- }
-
- /*
- * The HWPoisoned page may be not in buddy system, and
- * page_count() is not 0.
- */
- if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
- continue;
-
- /*
- * We treat all PageOffline() pages as movable when offlining
- * to give drivers a chance to decrement their reference count
- * in MEM_GOING_OFFLINE in order to indicate that these pages
- * can be offlined as there are no direct references anymore.
- * For actually unmovable PageOffline() where the driver does
- * not support this, we will fail later when trying to actually
- * move these pages that still have a reference count > 0.
- * (false negatives in this function only)
- */
- if ((flags & MEMORY_OFFLINE) && PageOffline(page))
- continue;
-
- if (__PageMovable(page) || PageLRU(page))
- continue;
-
- /*
- * If there are RECLAIMABLE pages, we need to check
- * it. But now, memory offline itself doesn't call
- * shrink_node_slabs() and it still to be fixed.
- */
- return page;
- }
- return NULL;
-}
-
#ifdef CONFIG_CONTIG_ALLOC
static unsigned long pfn_max_align_down(unsigned long pfn)
{
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index f67c4c70f17f..b34f1310aeaa 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -15,6 +15,125 @@
#define CREATE_TRACE_POINTS
#include <trace/events/page_isolation.h>
+/*
+ * This function checks whether pageblock includes unmovable pages or not.
+ *
+ * PageLRU check without isolation or lru_lock could race so that
+ * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
+ * check without lock_page also may miss some movable non-lru pages at
+ * race condition. So you can't expect this function should be exact.
+ *
+ * Returns a page without holding a reference. If the caller wants to
+ * dereference that page (e.g., dumping), it has to make sure that it
+ * cannot get removed (e.g., via memory unplug) concurrently.
+ *
+ */
+static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
+ int migratetype, int flags)
+{
+ unsigned long iter = 0;
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long offset = pfn % pageblock_nr_pages;
+
+ if (is_migrate_cma_page(page)) {
+ /*
+ * CMA allocations (alloc_contig_range) really need to mark
+ * isolate CMA pageblocks even when they are not movable in fact
+ * so consider them movable here.
+ */
+ if (is_migrate_cma(migratetype))
+ return NULL;
+
+ return page;
+ }
+
+ for (; iter < pageblock_nr_pages - offset; iter++) {
+ page = pfn_to_page(pfn + iter);
+
+ /*
+ * Both, bootmem allocations and memory holes are marked
+ * PG_reserved and are unmovable. We can even have unmovable
+ * allocations inside ZONE_MOVABLE, for example when
+ * specifying "movablecore".
+ */
+ if (PageReserved(page))
+ return page;
+
+ /*
+ * If the zone is movable and we have ruled out all reserved
+ * pages then it should be reasonably safe to assume the rest
+ * is movable.
+ */
+ if (zone_idx(zone) == ZONE_MOVABLE)
+ continue;
+
+ /*
+ * Hugepages are not in LRU lists, but they're movable.
+ * THPs are on the LRU, but need to be counted as #small pages.
+ * We need not scan over tail pages because we don't
+ * handle each tail page individually in migration.
+ */
+ if (PageHuge(page) || PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+ unsigned int skip_pages;
+
+ if (PageHuge(page)) {
+ if (!hugepage_migration_supported(page_hstate(head)))
+ return page;
+ } else if (!PageLRU(head) && !__PageMovable(head)) {
+ return page;
+ }
+
+ skip_pages = compound_nr(head) - (page - head);
+ iter += skip_pages - 1;
+ continue;
+ }
+
+ /*
+ * We can't use page_count without pin a page
+ * because another CPU can free compound page.
+ * This check already skips compound tails of THP
+ * because their page->_refcount is zero at all time.
+ */
+ if (!page_ref_count(page)) {
+ if (PageBuddy(page))
+ iter += (1 << buddy_order(page)) - 1;
+ continue;
+ }
+
+ /*
+ * The HWPoisoned page may be not in buddy system, and
+ * page_count() is not 0.
+ */
+ if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
+ continue;
+
+ /*
+ * We treat all PageOffline() pages as movable when offlining
+ * to give drivers a chance to decrement their reference count
+ * in MEM_GOING_OFFLINE in order to indicate that these pages
+ * can be offlined as there are no direct references anymore.
+ * For actually unmovable PageOffline() where the driver does
+ * not support this, we will fail later when trying to actually
+ * move these pages that still have a reference count > 0.
+ * (false negatives in this function only)
+ */
+ if ((flags & MEMORY_OFFLINE) && PageOffline(page))
+ continue;
+
+ if (__PageMovable(page) || PageLRU(page))
+ continue;
+
+ /*
+ * If there are RECLAIMABLE pages, we need to check
+ * it. But now, memory offline itself doesn't call
+ * shrink_node_slabs() and it still to be fixed.
+ */
+ return page;
+ }
+ return NULL;
+}
+
static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags)
{
struct zone *zone = page_zone(page);
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v7 1/5] mm: page_isolation: move has_unmovable_pages() to mm/page_isolation.c
2022-03-11 18:36 ` [PATCH v7 1/5] mm: page_isolation: move has_unmovable_pages() to mm/page_isolation.c Zi Yan
@ 2022-03-14 17:03 ` David Hildenbrand
0 siblings, 0 replies; 9+ messages in thread
From: David Hildenbrand @ 2022-03-14 17:03 UTC (permalink / raw)
To: Zi Yan, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy,
Mike Rapoport
On 11.03.22 19:36, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
>
> has_unmovable_pages() is only used in mm/page_isolation.c. Move it from
> mm/page_alloc.c and make it static.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> Reviewed-by: Oscar Salvador <osalvador@suse.de>
> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v7 2/5] mm: page_isolation: check specified range for unmovable pages
2022-03-11 18:36 [PATCH v7 0/5] Use pageblock_order for cma and alloc_contig_range alignment Zi Yan
2022-03-11 18:36 ` [PATCH v7 1/5] mm: page_isolation: move has_unmovable_pages() to mm/page_isolation.c Zi Yan
@ 2022-03-11 18:36 ` Zi Yan
2022-03-14 17:13 ` David Hildenbrand
2022-03-11 18:36 ` [PATCH v7 3/5] mm: make alloc_contig_range work at pageblock granularity Zi Yan
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Zi Yan @ 2022-03-11 18:36 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy,
Zi Yan
From: Zi Yan <ziy@nvidia.com>
Enable set_migratetype_isolate() to check specified sub-range for
unmovable pages during isolation. Page isolation is done
at max(MAX_ORDER_NR_PAEGS, pageblock_nr_pages) granularity, but not all
pages within that granularity are intended to be isolated. For example,
alloc_contig_range(), which uses page isolation, allows ranges without
alignment. This commit makes unmovable page check only look for
interesting pages, so that page isolation can succeed for any
non-overlapping ranges.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/page-isolation.h | 10 ++++++++
mm/page_alloc.c | 13 +---------
mm/page_isolation.c | 47 +++++++++++++++++++++-------------
3 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index e14eddf6741a..eb4a208fe907 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -15,6 +15,16 @@ static inline bool is_migrate_isolate(int migratetype)
{
return migratetype == MIGRATE_ISOLATE;
}
+static inline unsigned long pfn_max_align_down(unsigned long pfn)
+{
+ return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
+}
+
+static inline unsigned long pfn_max_align_up(unsigned long pfn)
+{
+ return ALIGN(pfn, MAX_ORDER_NR_PAGES);
+}
+
#else
static inline bool has_isolate_pageblock(struct zone *zone)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6de57d058d3d..680580a40a35 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8937,16 +8937,6 @@ void *__init alloc_large_system_hash(const char *tablename,
}
#ifdef CONFIG_CONTIG_ALLOC
-static unsigned long pfn_max_align_down(unsigned long pfn)
-{
- return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
-}
-
-static unsigned long pfn_max_align_up(unsigned long pfn)
-{
- return ALIGN(pfn, MAX_ORDER_NR_PAGES);
-}
-
#if defined(CONFIG_DYNAMIC_DEBUG) || \
(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* Usage: See admin-guide/dynamic-debug-howto.rst */
@@ -9091,8 +9081,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* put back to page allocator so that buddy can use them.
*/
- ret = start_isolate_page_range(pfn_max_align_down(start),
- pfn_max_align_up(end), migratetype, 0);
+ ret = start_isolate_page_range(start, end, migratetype, 0);
if (ret)
return ret;
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index b34f1310aeaa..e0afc3ee8cf9 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -16,7 +16,8 @@
#include <trace/events/page_isolation.h>
/*
- * This function checks whether pageblock includes unmovable pages or not.
+ * This function checks whether pageblock within [start_pfn, end_pfn) includes
+ * unmovable pages or not.
*
* PageLRU check without isolation or lru_lock could race so that
* MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
@@ -29,11 +30,14 @@
*
*/
static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
- int migratetype, int flags)
+ int migratetype, int flags,
+ unsigned long start_pfn, unsigned long end_pfn)
{
- unsigned long iter = 0;
- unsigned long pfn = page_to_pfn(page);
- unsigned long offset = pfn % pageblock_nr_pages;
+ unsigned long first_pfn = max(page_to_pfn(page), start_pfn);
+ unsigned long pfn = first_pfn;
+ unsigned long last_pfn = min(ALIGN(pfn + 1, pageblock_nr_pages), end_pfn);
+
+ page = pfn_to_page(pfn);
if (is_migrate_cma_page(page)) {
/*
@@ -47,8 +51,8 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
return page;
}
- for (; iter < pageblock_nr_pages - offset; iter++) {
- page = pfn_to_page(pfn + iter);
+ for (pfn = first_pfn; pfn < last_pfn; pfn++) {
+ page = pfn_to_page(pfn);
/*
* Both, bootmem allocations and memory holes are marked
@@ -85,7 +89,7 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
}
skip_pages = compound_nr(head) - (page - head);
- iter += skip_pages - 1;
+ pfn += skip_pages - 1;
continue;
}
@@ -97,7 +101,7 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
*/
if (!page_ref_count(page)) {
if (PageBuddy(page))
- iter += (1 << buddy_order(page)) - 1;
+ pfn += (1 << buddy_order(page)) - 1;
continue;
}
@@ -134,7 +138,13 @@ static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
return NULL;
}
-static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags)
+/*
+ * This function set pageblock migratetype to isolate if no unmovable page is
+ * present in [start_pfn, end_pfn). The pageblock must be within
+ * [start_pfn, end_pfn).
+ */
+static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags,
+ unsigned long start_pfn, unsigned long end_pfn)
{
struct zone *zone = page_zone(page);
struct page *unmovable;
@@ -156,7 +166,8 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
* We just check MOVABLE pages.
*/
- unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags);
+ unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags,
+ start_pfn, end_pfn);
if (!unmovable) {
unsigned long nr_pages;
int mt = get_pageblock_migratetype(page);
@@ -267,7 +278,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* be MIGRATE_ISOLATE.
* @start_pfn: The lower PFN of the range to be isolated.
* @end_pfn: The upper PFN of the range to be isolated.
- * start_pfn/end_pfn must be aligned to pageblock_order.
* @migratetype: Migrate type to set in error recovery.
* @flags: The following flags are allowed (they can be combined in
* a bit mask)
@@ -309,15 +319,16 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned long pfn;
struct page *page;
- BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
- BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
+ unsigned long isolate_start = pfn_max_align_down(start_pfn);
+ unsigned long isolate_end = pfn_max_align_up(end_pfn);
- for (pfn = start_pfn;
- pfn < end_pfn;
+ for (pfn = isolate_start;
+ pfn < isolate_end;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
- if (page && set_migratetype_isolate(page, migratetype, flags)) {
- undo_isolate_page_range(start_pfn, pfn, migratetype);
+ if (page && set_migratetype_isolate(page, migratetype, flags,
+ start_pfn, end_pfn)) {
+ undo_isolate_page_range(isolate_start, pfn, migratetype);
return -EBUSY;
}
}
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v7 2/5] mm: page_isolation: check specified range for unmovable pages
2022-03-11 18:36 ` [PATCH v7 2/5] mm: page_isolation: check specified range for unmovable pages Zi Yan
@ 2022-03-14 17:13 ` David Hildenbrand
2022-03-14 17:58 ` Zi Yan
0 siblings, 1 reply; 9+ messages in thread
From: David Hildenbrand @ 2022-03-14 17:13 UTC (permalink / raw)
To: Zi Yan, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy
On 11.03.22 19:36, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
>
> Enable set_migratetype_isolate() to check specified sub-range for
> unmovable pages during isolation. Page isolation is done
> at max(MAX_ORDER_NR_PAEGS, pageblock_nr_pages) granularity, but not all
> pages within that granularity are intended to be isolated. For example,
> alloc_contig_range(), which uses page isolation, allows ranges without
> alignment. This commit makes unmovable page check only look for
> interesting pages, so that page isolation can succeed for any
> non-overlapping ranges.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/page-isolation.h | 10 ++++++++
> mm/page_alloc.c | 13 +---------
> mm/page_isolation.c | 47 +++++++++++++++++++++-------------
> 3 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index e14eddf6741a..eb4a208fe907 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -15,6 +15,16 @@ static inline bool is_migrate_isolate(int migratetype)
> {
> return migratetype == MIGRATE_ISOLATE;
> }
> +static inline unsigned long pfn_max_align_down(unsigned long pfn)
> +{
> + return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
> +}
> +
> +static inline unsigned long pfn_max_align_up(unsigned long pfn)
> +{
> + return ALIGN(pfn, MAX_ORDER_NR_PAGES);
> +}
> +
> #else
> static inline bool has_isolate_pageblock(struct zone *zone)
> {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6de57d058d3d..680580a40a35 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8937,16 +8937,6 @@ void *__init alloc_large_system_hash(const char *tablename,
> }
>
> #ifdef CONFIG_CONTIG_ALLOC
> -static unsigned long pfn_max_align_down(unsigned long pfn)
> -{
> - return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
> -}
> -
> -static unsigned long pfn_max_align_up(unsigned long pfn)
> -{
> - return ALIGN(pfn, MAX_ORDER_NR_PAGES);
> -}
> -
> #if defined(CONFIG_DYNAMIC_DEBUG) || \
> (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> /* Usage: See admin-guide/dynamic-debug-howto.rst */
> @@ -9091,8 +9081,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
> * put back to page allocator so that buddy can use them.
> */
>
> - ret = start_isolate_page_range(pfn_max_align_down(start),
> - pfn_max_align_up(end), migratetype, 0);
> + ret = start_isolate_page_range(start, end, migratetype, 0);
> if (ret)
> return ret;
>
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index b34f1310aeaa..e0afc3ee8cf9 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -16,7 +16,8 @@
> #include <trace/events/page_isolation.h>
>
> /*
> - * This function checks whether pageblock includes unmovable pages or not.
> + * This function checks whether pageblock within [start_pfn, end_pfn) includes
> + * unmovable pages or not.
> *
> * PageLRU check without isolation or lru_lock could race so that
> * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
> @@ -29,11 +30,14 @@
> *
> */
> static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
> - int migratetype, int flags)
> + int migratetype, int flags,
> + unsigned long start_pfn, unsigned long end_pfn)
> {
> - unsigned long iter = 0;
> - unsigned long pfn = page_to_pfn(page);
> - unsigned long offset = pfn % pageblock_nr_pages;
> + unsigned long first_pfn = max(page_to_pfn(page), start_pfn);
> + unsigned long pfn = first_pfn;
> + unsigned long last_pfn = min(ALIGN(pfn + 1, pageblock_nr_pages), end_pfn);
> +
> + page = pfn_to_page(pfn);
I think we should get rid of the page argument completely. The caller
should pass in a reasonable [start_pfn, end_pfn) range, and to any
necessary fixups to the range outside of this function.
The goal should be to have
pfn = start_pfn
and replacing last_pfn by end_pfn.
Ideally we'd end up with "This function checks whether the range
[start_pfn, end_pfn) contains unmovable pages or not."
What would be missing to achieve that?
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v7 2/5] mm: page_isolation: check specified range for unmovable pages
2022-03-14 17:13 ` David Hildenbrand
@ 2022-03-14 17:58 ` Zi Yan
0 siblings, 0 replies; 9+ messages in thread
From: Zi Yan @ 2022-03-14 17:58 UTC (permalink / raw)
To: David Hildenbrand
Cc: linux-mm, linux-kernel, virtualization, Vlastimil Babka,
Mel Gorman, Eric Ren, Mike Rapoport, Oscar Salvador,
Christophe Leroy
[-- Attachment #1: Type: text/plain, Size: 4788 bytes --]
On 14 Mar 2022, at 13:13, David Hildenbrand wrote:
> On 11.03.22 19:36, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> Enable set_migratetype_isolate() to check specified sub-range for
>> unmovable pages during isolation. Page isolation is done
>> at max(MAX_ORDER_NR_PAEGS, pageblock_nr_pages) granularity, but not all
>> pages within that granularity are intended to be isolated. For example,
>> alloc_contig_range(), which uses page isolation, allows ranges without
>> alignment. This commit makes unmovable page check only look for
>> interesting pages, so that page isolation can succeed for any
>> non-overlapping ranges.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/page-isolation.h | 10 ++++++++
>> mm/page_alloc.c | 13 +---------
>> mm/page_isolation.c | 47 +++++++++++++++++++++-------------
>> 3 files changed, 40 insertions(+), 30 deletions(-)
>>
>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>> index e14eddf6741a..eb4a208fe907 100644
>> --- a/include/linux/page-isolation.h
>> +++ b/include/linux/page-isolation.h
>> @@ -15,6 +15,16 @@ static inline bool is_migrate_isolate(int migratetype)
>> {
>> return migratetype == MIGRATE_ISOLATE;
>> }
>> +static inline unsigned long pfn_max_align_down(unsigned long pfn)
>> +{
>> + return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
>> +}
>> +
>> +static inline unsigned long pfn_max_align_up(unsigned long pfn)
>> +{
>> + return ALIGN(pfn, MAX_ORDER_NR_PAGES);
>> +}
>> +
>> #else
>> static inline bool has_isolate_pageblock(struct zone *zone)
>> {
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 6de57d058d3d..680580a40a35 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -8937,16 +8937,6 @@ void *__init alloc_large_system_hash(const char *tablename,
>> }
>>
>> #ifdef CONFIG_CONTIG_ALLOC
>> -static unsigned long pfn_max_align_down(unsigned long pfn)
>> -{
>> - return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
>> -}
>> -
>> -static unsigned long pfn_max_align_up(unsigned long pfn)
>> -{
>> - return ALIGN(pfn, MAX_ORDER_NR_PAGES);
>> -}
>> -
>> #if defined(CONFIG_DYNAMIC_DEBUG) || \
>> (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
>> /* Usage: See admin-guide/dynamic-debug-howto.rst */
>> @@ -9091,8 +9081,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
>> * put back to page allocator so that buddy can use them.
>> */
>>
>> - ret = start_isolate_page_range(pfn_max_align_down(start),
>> - pfn_max_align_up(end), migratetype, 0);
>> + ret = start_isolate_page_range(start, end, migratetype, 0);
>> if (ret)
>> return ret;
>>
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index b34f1310aeaa..e0afc3ee8cf9 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -16,7 +16,8 @@
>> #include <trace/events/page_isolation.h>
>>
>> /*
>> - * This function checks whether pageblock includes unmovable pages or not.
>> + * This function checks whether pageblock within [start_pfn, end_pfn) includes
>> + * unmovable pages or not.
>> *
>> * PageLRU check without isolation or lru_lock could race so that
>> * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
>> @@ -29,11 +30,14 @@
>> *
>> */
>> static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
>> - int migratetype, int flags)
>> + int migratetype, int flags,
>> + unsigned long start_pfn, unsigned long end_pfn)
>> {
>> - unsigned long iter = 0;
>> - unsigned long pfn = page_to_pfn(page);
>> - unsigned long offset = pfn % pageblock_nr_pages;
>> + unsigned long first_pfn = max(page_to_pfn(page), start_pfn);
>> + unsigned long pfn = first_pfn;
>> + unsigned long last_pfn = min(ALIGN(pfn + 1, pageblock_nr_pages), end_pfn);
>> +
>> + page = pfn_to_page(pfn);
>
> I think we should get rid of the page argument completely. The caller
> should pass in a reasonable [start_pfn, end_pfn) range, and to any
> necessary fixups to the range outside of this function.
>
> The goal should be to have
>
> pfn = start_pfn
>
> and replacing last_pfn by end_pfn.
>
>
> Ideally we'd end up with "This function checks whether the range
> [start_pfn, end_pfn) contains unmovable pages or not."
>
>
> What would be missing to achieve that?
>
Sure, I will do that in the next version. Thanks for pointing this out.
The intersection of the pageblock of the “page” and [start_pfn, end_pfn)
will be moved to set_migratetype_isolate() and the resulting range will
be passed into has_unmovable_pages() to avoid redundant unmovable page checks.
--
Best Regards,
Yan, Zi
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v7 3/5] mm: make alloc_contig_range work at pageblock granularity
2022-03-11 18:36 [PATCH v7 0/5] Use pageblock_order for cma and alloc_contig_range alignment Zi Yan
2022-03-11 18:36 ` [PATCH v7 1/5] mm: page_isolation: move has_unmovable_pages() to mm/page_isolation.c Zi Yan
2022-03-11 18:36 ` [PATCH v7 2/5] mm: page_isolation: check specified range for unmovable pages Zi Yan
@ 2022-03-11 18:36 ` Zi Yan
2022-03-11 18:36 ` [PATCH v7 4/5] mm: cma: use pageblock_order as the single alignment Zi Yan
2022-03-11 18:36 ` [PATCH v7 5/5] drivers: virtio_mem: use pageblock size as the minimum virtio_mem size Zi Yan
4 siblings, 0 replies; 9+ messages in thread
From: Zi Yan @ 2022-03-11 18:36 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy,
Zi Yan, kernel test robot
From: Zi Yan <ziy@nvidia.com>
alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
pageblocks with different migratetypes. It might unnecessarily convert
extra pageblocks at the beginning and at the end of the range. Change
alloc_contig_range() to work at pageblock granularity.
Special handling is needed for free pages and in-use pages across the
boundaries of the range specified alloc_contig_range(). Because these
partially isolated pages causes free page accounting issues. The free
pages will be split and freed into separate migratetype lists; the
in-use pages will be migrated then the freed pages will be handled.
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/page-isolation.h | 2 +-
mm/internal.h | 6 ++
mm/memory_hotplug.c | 3 +-
mm/page_alloc.c | 112 ++++++++++++-----------
mm/page_isolation.c | 156 +++++++++++++++++++++++++++++++--
5 files changed, 214 insertions(+), 65 deletions(-)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index eb4a208fe907..20ec9cad3882 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -52,7 +52,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
*/
int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype, int flags);
+ unsigned migratetype, int flags, gfp_t gfp_flags);
/*
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
diff --git a/mm/internal.h b/mm/internal.h
index 86277d90a5e2..6f86faad6b46 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -263,6 +263,9 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr,
int nid, bool exact_nid);
+void split_free_page(struct page *free_page,
+ int order, unsigned long split_pfn_offset);
+
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/*
@@ -326,6 +329,9 @@ isolate_freepages_range(struct compact_control *cc,
int
isolate_migratepages_range(struct compact_control *cc,
unsigned long low_pfn, unsigned long end_pfn);
+
+int __alloc_contig_migrate_range(struct compact_control *cc,
+ unsigned long start, unsigned long end);
#endif
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 416b38ca8def..1cf4d4b60772 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1836,7 +1836,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
- MEMORY_OFFLINE | REPORT_FAILURE);
+ MEMORY_OFFLINE | REPORT_FAILURE,
+ GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
if (ret) {
reason = "failure to isolate range";
goto failed_removal_pcplists_disabled;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 680580a40a35..c2db271b874f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1138,6 +1138,43 @@ static inline void __free_one_page(struct page *page,
page_reporting_notify_free(order);
}
+/**
+ * split_free_page() -- split a free page at split_pfn_offset
+ * @free_page: the original free page
+ * @order: the order of the page
+ * @split_pfn_offset: split offset within the page
+ *
+ * It is used when the free page crosses two pageblocks with different migratetypes
+ * at split_pfn_offset within the page. The split free page will be put into
+ * separate migratetype lists afterwards. Otherwise, the function achieves
+ * nothing.
+ */
+void split_free_page(struct page *free_page,
+ int order, unsigned long split_pfn_offset)
+{
+ struct zone *zone = page_zone(free_page);
+ unsigned long free_page_pfn = page_to_pfn(free_page);
+ unsigned long pfn;
+ unsigned long flags;
+ int free_page_order;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ del_page_from_free_list(free_page, zone, order);
+ for (pfn = free_page_pfn;
+ pfn < free_page_pfn + (1UL << order);) {
+ int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
+
+ free_page_order = ffs(split_pfn_offset) - 1;
+ __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
+ mt, FPI_NONE);
+ pfn += 1UL << free_page_order;
+ split_pfn_offset -= (1UL << free_page_order);
+ /* we have done the first part, now switch to second part */
+ if (split_pfn_offset == 0)
+ split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
/*
* A bad page could be due to a number of fields. Instead of multiple branches,
* try and check multiple fields with one check. The caller must do a detailed
@@ -8959,7 +8996,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
#endif
/* [start, end) must belong to a single zone. */
-static int __alloc_contig_migrate_range(struct compact_control *cc,
+int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long start, unsigned long end)
{
/* This function is based on compact_zone() from compaction.c. */
@@ -9017,6 +9054,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
return 0;
}
+
/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -9041,8 +9079,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask)
{
- unsigned long outer_start, outer_end;
- unsigned int order;
+ unsigned long outer_end;
+ unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
+ unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
int ret = 0;
struct compact_control cc = {
@@ -9061,14 +9100,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* What we do here is we mark all pageblocks in range as
* MIGRATE_ISOLATE. Because pageblock and max order pages may
* have different sizes, and due to the way page allocator
- * work, we align the range to biggest of the two pages so
- * that page allocator won't try to merge buddies from
- * different pageblocks and change MIGRATE_ISOLATE to some
- * other migration type.
+ * work, start_isolate_page_range() has special handlings for this.
*
* Once the pageblocks are marked as MIGRATE_ISOLATE, we
* migrate the pages from an unaligned range (ie. pages that
- * we are interested in). This will put all the pages in
+ * we are interested in). This will put all the pages in
* range back to page allocator as MIGRATE_ISOLATE.
*
* When this is done, we take the pages in range from page
@@ -9081,9 +9117,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* put back to page allocator so that buddy can use them.
*/
- ret = start_isolate_page_range(start, end, migratetype, 0);
+ ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
if (ret)
- return ret;
+ goto done;
drain_all_pages(cc.zone);
@@ -9102,68 +9138,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
goto done;
ret = 0;
- /*
- * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
- * aligned blocks that are marked as MIGRATE_ISOLATE. What's
- * more, all pages in [start, end) are free in page allocator.
- * What we are going to do is to allocate all pages from
- * [start, end) (that is remove them from page allocator).
- *
- * The only problem is that pages at the beginning and at the
- * end of interesting range may be not aligned with pages that
- * page allocator holds, ie. they can be part of higher order
- * pages. Because of this, we reserve the bigger range and
- * once this is done free the pages we are not interested in.
- *
- * We don't have to hold zone->lock here because the pages are
- * isolated thus they won't get removed from buddy.
- */
-
- order = 0;
- outer_start = start;
- while (!PageBuddy(pfn_to_page(outer_start))) {
- if (++order >= MAX_ORDER) {
- outer_start = start;
- break;
- }
- outer_start &= ~0UL << order;
- }
-
- if (outer_start != start) {
- order = buddy_order(pfn_to_page(outer_start));
-
- /*
- * outer_start page could be small order buddy page and
- * it doesn't include start page. Adjust outer_start
- * in this case to report failed page properly
- * on tracepoint in test_pages_isolated()
- */
- if (outer_start + (1UL << order) <= start)
- outer_start = start;
- }
-
/* Make sure the range is really isolated. */
- if (test_pages_isolated(outer_start, end, 0)) {
+ if (test_pages_isolated(alloc_start, alloc_end, 0)) {
ret = -EBUSY;
goto done;
}
/* Grab isolated pages from freelists. */
- outer_end = isolate_freepages_range(&cc, outer_start, end);
+ outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
if (!outer_end) {
ret = -EBUSY;
goto done;
}
/* Free head and tail (if any) */
- if (start != outer_start)
- free_contig_range(outer_start, start - outer_start);
- if (end != outer_end)
- free_contig_range(end, outer_end - end);
+ if (start != alloc_start)
+ free_contig_range(alloc_start, start - alloc_start);
+ if (end != alloc_end)
+ free_contig_range(end, alloc_end - end);
done:
- undo_isolate_page_range(pfn_max_align_down(start),
- pfn_max_align_up(end), migratetype);
+ undo_isolate_page_range(alloc_start,
+ alloc_end, migratetype);
return ret;
}
EXPORT_SYMBOL(alloc_contig_range);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index e0afc3ee8cf9..0d5ad2be50c3 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -273,6 +273,129 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
return NULL;
}
+/**
+ * isolate_single_pageblock() -- tries to isolate a pageblock that might be
+ * within a free or in-use page.
+ * @boundary_pfn: pageblock-aligned pfn that a page might cross
+ * @gfp_flags: GFP flags used for migrating pages
+ * @isolate_before: isolate the pageblock before the boundary_pfn
+ *
+ * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
+ * pageblock. When not all pageblocks within a page are isolated at the same
+ * time, free page accounting can go wrong. For example, in the case of
+ * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
+ * [ MAX_ORDER-1 ]
+ * [ pageblock0 | pageblock1 ]
+ * When either pageblock is isolated, if it is a free page, the page is not
+ * split into separate migratetype lists, which is supposed to; if it is an
+ * in-use page and freed later, __free_one_page() does not split the free page
+ * either. The function handles this by splitting the free page or migrating
+ * the in-use page then splitting the free page.
+ */
+static int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
+ bool isolate_before)
+{
+ unsigned char saved_mt;
+ /*
+ * scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
+ * avoid isolate pageblocks belonging to a bigger free or in-use page
+ */
+ unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
+ unsigned long isolate_pageblock;
+ unsigned long pfn;
+
+ VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
+
+ if (isolate_before)
+ isolate_pageblock = boundary_pfn - pageblock_nr_pages;
+ else
+ isolate_pageblock = boundary_pfn;
+
+ saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
+ set_pageblock_migratetype(pfn_to_page(isolate_pageblock), MIGRATE_ISOLATE);
+
+ for (pfn = start_pfn; pfn < boundary_pfn;) {
+ struct page *page = __first_valid_page(pfn, boundary_pfn - start_pfn);
+
+ /*
+ * start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
+ * aligned, if there is any free pages in [start_pfn, boundary_pfn),
+ * its head page will always be in the range.
+ */
+ if (PageBuddy(page)) {
+ int order = buddy_order(page);
+
+ if (pfn + (1UL << order) > boundary_pfn)
+ split_free_page(page, order, boundary_pfn - pfn);
+ pfn += (1UL << order);
+ continue;
+ }
+ /*
+ * migrate compound pages then let the free page handling code
+ * above do the rest. If migration is not enabled, just fail.
+ */
+ if (PageHuge(page) || PageTransCompound(page)) {
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+ unsigned long nr_pages = compound_nr(page);
+ int order = compound_order(page);
+ struct page *head = compound_head(page);
+ unsigned long head_pfn = page_to_pfn(head);
+ int ret;
+ struct compact_control cc = {
+ .nr_migratepages = 0,
+ .order = -1,
+ .zone = page_zone(pfn_to_page(head_pfn)),
+ .mode = MIGRATE_SYNC,
+ .ignore_skip_hint = true,
+ .no_set_skip_hint = true,
+ .gfp_mask = gfp_flags,
+ .alloc_contig = true,
+ };
+ INIT_LIST_HEAD(&cc.migratepages);
+
+ if (head_pfn + nr_pages < boundary_pfn) {
+ pfn += nr_pages;
+ continue;
+ }
+
+ ret = __alloc_contig_migrate_range(&cc, head_pfn,
+ head_pfn + nr_pages);
+
+ if (ret)
+ goto failed;
+ /*
+ * reset pfn, let the free page handling code above
+ * split the free page to the right migratetype list.
+ *
+ * head_pfn is not used here as a hugetlb page order
+ * can be bigger than MAX_ORDER-1, but after it is
+ * freed, the free page order is not. Use pfn within
+ * the range to find the head of the free page and
+ * reset order to 0 if a hugetlb page with
+ * >MAX_ORDER-1 order is encountered.
+ */
+ if (order > MAX_ORDER-1)
+ order = 0;
+ while (!PageBuddy(pfn_to_page(pfn))) {
+ order++;
+ pfn &= ~0UL << order;
+ }
+ continue;
+#else
+ goto failed;
+#endif
+ }
+
+ pfn++;
+ }
+ return 0;
+failed:
+ /* restore the original migratetype */
+ set_pageblock_migratetype(pfn_to_page(isolate_pageblock), saved_mt);
+ return -EBUSY;
+}
+
+
/**
* start_isolate_page_range() - make page-allocation-type of range of pages to
* be MIGRATE_ISOLATE.
@@ -286,6 +409,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* and PageOffline() pages.
* REPORT_FAILURE - report details about the failure to
* isolate the range
+ * @gfp_flags: GFP flags used for migrating pages that sit across the
+ * range boundaries.
*
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
@@ -294,6 +419,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* pages in the range finally, the caller have to free all pages in the range.
* test_page_isolated() can be used for test it.
*
+ * The function first tries to isolate the pageblocks at the beginning and end
+ * of the range, since there might be pages across the range boundaries.
+ * Afterwards, it isolates the rest of the range.
+ *
* There is no high level synchronization mechanism that prevents two threads
* from trying to isolate overlapping ranges. If this happens, one thread
* will notice pageblocks in the overlapping range already set to isolate.
@@ -314,21 +443,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* Return: 0 on success and -EBUSY if any part of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype, int flags)
+ unsigned migratetype, int flags, gfp_t gfp_flags)
{
unsigned long pfn;
struct page *page;
+ /* isolation is done at page block granularity */
+ unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
+ unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
+ int ret;
- unsigned long isolate_start = pfn_max_align_down(start_pfn);
- unsigned long isolate_end = pfn_max_align_up(end_pfn);
+ /* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
+ ret = isolate_single_pageblock(isolate_start, gfp_flags, false);
+ if (ret)
+ return ret;
+
+ /* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
+ ret = isolate_single_pageblock(isolate_end, gfp_flags, true);
+ if (ret) {
+ unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
+ return ret;
+ }
- for (pfn = isolate_start;
- pfn < isolate_end;
+ /* skip isolated pageblocks at the beginning and end */
+ for (pfn = isolate_start + pageblock_nr_pages;
+ pfn < isolate_end - pageblock_nr_pages;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
if (page && set_migratetype_isolate(page, migratetype, flags,
start_pfn, end_pfn)) {
undo_isolate_page_range(isolate_start, pfn, migratetype);
+ unset_migratetype_isolate(
+ pfn_to_page(isolate_end - pageblock_nr_pages),
+ migratetype);
return -EBUSY;
}
}
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v7 4/5] mm: cma: use pageblock_order as the single alignment
2022-03-11 18:36 [PATCH v7 0/5] Use pageblock_order for cma and alloc_contig_range alignment Zi Yan
` (2 preceding siblings ...)
2022-03-11 18:36 ` [PATCH v7 3/5] mm: make alloc_contig_range work at pageblock granularity Zi Yan
@ 2022-03-11 18:36 ` Zi Yan
2022-03-11 18:36 ` [PATCH v7 5/5] drivers: virtio_mem: use pageblock size as the minimum virtio_mem size Zi Yan
4 siblings, 0 replies; 9+ messages in thread
From: Zi Yan @ 2022-03-11 18:36 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy,
Zi Yan
From: Zi Yan <ziy@nvidia.com>
Now alloc_contig_range() works at pageblock granularity. Change CMA
allocation, which uses alloc_contig_range(), to use pageblock_order
alignment.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/cma.h | 4 ++--
include/linux/mmzone.h | 5 +----
mm/page_alloc.c | 4 ++--
3 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index a6f637342740..63873b93deaa 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -17,11 +17,11 @@
#define CMA_MAX_NAME 64
/*
- * TODO: once the buddy -- especially pageblock merging and alloc_contig_range()
+ * the buddy -- especially pageblock merging and alloc_contig_range()
* -- can deal with only some pageblocks of a higher-order page being
* MIGRATE_CMA, we can use pageblock_nr_pages.
*/
-#define CMA_MIN_ALIGNMENT_PAGES MAX_ORDER_NR_PAGES
+#define CMA_MIN_ALIGNMENT_PAGES pageblock_nr_pages
#define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES)
struct cma;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 962b14d403e8..0725c50ca0cb 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -54,10 +54,7 @@ enum migratetype {
*
* The way to use it is to change migratetype of a range of
* pageblocks to MIGRATE_CMA which can be done by
- * __free_pageblock_cma() function. What is important though
- * is that a range of pageblocks must be aligned to
- * MAX_ORDER_NR_PAGES should biggest page be bigger than
- * a single pageblock.
+ * __free_pageblock_cma() function.
*/
MIGRATE_CMA,
#endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c2db271b874f..0f96bd57c258 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -9065,8 +9065,8 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
* be either of the two.
* @gfp_mask: GFP mask to use during compaction
*
- * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
- * aligned. The PFN range must belong to a single zone.
+ * The PFN range does not have to be pageblock aligned. The PFN range must
+ * belong to a single zone.
*
* The first thing this routine does is attempt to MIGRATE_ISOLATE all
* pageblocks in the range. Once isolated, the pageblocks should not
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v7 5/5] drivers: virtio_mem: use pageblock size as the minimum virtio_mem size.
2022-03-11 18:36 [PATCH v7 0/5] Use pageblock_order for cma and alloc_contig_range alignment Zi Yan
` (3 preceding siblings ...)
2022-03-11 18:36 ` [PATCH v7 4/5] mm: cma: use pageblock_order as the single alignment Zi Yan
@ 2022-03-11 18:36 ` Zi Yan
4 siblings, 0 replies; 9+ messages in thread
From: Zi Yan @ 2022-03-11 18:36 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linux-kernel, virtualization, Vlastimil Babka, Mel Gorman,
Eric Ren, Mike Rapoport, Oscar Salvador, Christophe Leroy,
Zi Yan
From: Zi Yan <ziy@nvidia.com>
alloc_contig_range() now only needs to be aligned to pageblock_order,
drop virtio_mem size requirement that it needs to be the max of
pageblock_order and MAX_ORDER.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
drivers/virtio/virtio_mem.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index e7d6b679596d..e07486f01999 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -2476,10 +2476,10 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
/*
- * TODO: once alloc_contig_range() works reliably with pageblock
- * granularity on ZONE_NORMAL, use pageblock_nr_pages instead.
+ * alloc_contig_range() works reliably with pageblock
+ * granularity on ZONE_NORMAL, use pageblock_nr_pages.
*/
- sb_size = PAGE_SIZE * MAX_ORDER_NR_PAGES;
+ sb_size = PAGE_SIZE * pageblock_nr_pages;
sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
if (sb_size < memory_block_size_bytes() && !force_bbm) {
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread