All of lore.kernel.org
 help / color / mirror / Atom feed
From: Doug Berger <opendmb@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>, Mike Rapoport <rppt@kernel.org>,
	Borislav Petkov <bp@suse.de>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Neeraj Upadhyay <quic_neeraju@quicinc.com>,
	Randy Dunlap <rdunlap@infradead.org>,
	Damien Le Moal <damien.lemoal@opensource.wdc.com>,
	Muchun Song <songmuchun@bytedance.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Mel Gorman <mgorman@suse.de>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Florian Fainelli <f.fainelli@gmail.com>,
	David Hildenbrand <david@redhat.com>,
	Oscar Salvador <osalvador@suse.de>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Doug Berger <opendmb@gmail.com>
Subject: [PATCH v3 7/9] mm/dmb: Introduce Designated Movable Blocks
Date: Thu, 20 Oct 2022 14:53:16 -0700	[thread overview]
Message-ID: <20221020215318.4193269-8-opendmb@gmail.com> (raw)
In-Reply-To: <20221020215318.4193269-1-opendmb@gmail.com>

Designated Movable Blocks are blocks of memory that are composed
of one or more adjacent memblocks that have the MEMBLOCK_MOVABLE
designation. These blocks must be reserved before receiving that
designation and will be located in the ZONE_MOVABLE zone rather
than any other zone that may span them.

Signed-off-by: Doug Berger <opendmb@gmail.com>
---
 include/linux/dmb.h | 29 ++++++++++++++
 mm/Kconfig          | 12 ++++++
 mm/Makefile         |  1 +
 mm/dmb.c            | 91 +++++++++++++++++++++++++++++++++++++++++++
 mm/memblock.c       |  6 ++-
 mm/page_alloc.c     | 95 ++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 220 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/dmb.h
 create mode 100644 mm/dmb.c

diff --git a/include/linux/dmb.h b/include/linux/dmb.h
new file mode 100644
index 000000000000..fa2976c0fa21
--- /dev/null
+++ b/include/linux/dmb.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DMB_H__
+#define __DMB_H__
+
+#include <linux/memblock.h>
+
+/*
+ * the buddy -- especially pageblock merging and alloc_contig_range()
+ * -- can deal with only some pageblocks of a higher-order page being
+ *  MIGRATE_MOVABLE, we can use pageblock_nr_pages.
+ */
+#define DMB_MIN_ALIGNMENT_PAGES pageblock_nr_pages
+#define DMB_MIN_ALIGNMENT_BYTES (PAGE_SIZE * DMB_MIN_ALIGNMENT_PAGES)
+
+enum {
+	DMB_DISJOINT = 0,
+	DMB_INTERSECTS,
+	DMB_MIXED,
+};
+
+struct dmb;
+
+extern int dmb_intersects(unsigned long spfn, unsigned long epfn);
+
+extern int dmb_reserve(phys_addr_t base, phys_addr_t size,
+		       struct dmb **res_dmb);
+extern void dmb_init_region(struct memblock_region *region);
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index 57e1d8c5b505..3a058b34e77e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -868,6 +868,18 @@ config CMA_AREAS
 
 	  If unsure, leave the default value "7" in UMA and "19" in NUMA.
 
+config DMB_COUNT
+	int "Maximum count of Designated Movable Blocks"
+	default 19 if NUMA
+	default 7
+	help
+	  Designated Movable Blocks are blocks of memory that can be used
+	  by the page allocator exclusively for movable pages. They are
+	  managed in ZONE_MOVABLE but may overlap with other zones. This
+	  parameter sets the maximum number of DMBs in the system.
+
+	  If unsure, leave the default value "7" in UMA and "19" in NUMA.
+
 config MEM_SOFT_DIRTY
 	bool "Track memory changes"
 	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
diff --git a/mm/Makefile b/mm/Makefile
index 8e105e5b3e29..824be8fb11cd 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -67,6 +67,7 @@ obj-y += page-alloc.o
 obj-y += init-mm.o
 obj-y += memblock.o
 obj-y += $(memory-hotplug-y)
+obj-y += dmb.o
 
 ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
diff --git a/mm/dmb.c b/mm/dmb.c
new file mode 100644
index 000000000000..f6c4e2662e0f
--- /dev/null
+++ b/mm/dmb.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Designated Movable Block
+ */
+
+#define pr_fmt(fmt) "dmb: " fmt
+
+#include <linux/dmb.h>
+
+struct dmb {
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+};
+
+static struct dmb dmb_areas[CONFIG_DMB_COUNT];
+static unsigned int dmb_area_count;
+
+int dmb_intersects(unsigned long spfn, unsigned long epfn)
+{
+	int i;
+	struct dmb *dmb;
+
+	if (spfn >= epfn)
+		return DMB_DISJOINT;
+
+	for (i = 0; i < dmb_area_count; i++) {
+		dmb = &dmb_areas[i];
+		if (spfn >= dmb->end_pfn)
+			continue;
+		if (epfn <= dmb->start_pfn)
+			return DMB_DISJOINT;
+		if (spfn >= dmb->start_pfn && epfn <= dmb->end_pfn)
+			return DMB_INTERSECTS;
+		else
+			return DMB_MIXED;
+	}
+
+	return DMB_DISJOINT;
+}
+EXPORT_SYMBOL(dmb_intersects);
+
+int __init dmb_reserve(phys_addr_t base, phys_addr_t size,
+		       struct dmb **res_dmb)
+{
+	struct dmb *dmb;
+
+	/* Sanity checks */
+	if (!size || !memblock_is_region_reserved(base, size))
+		return -EINVAL;
+
+	/* ensure minimal alignment required by mm core */
+	if (!IS_ALIGNED(base | size, DMB_MIN_ALIGNMENT_BYTES))
+		return -EINVAL;
+
+	if (dmb_area_count == ARRAY_SIZE(dmb_areas)) {
+		pr_warn("Not enough slots for DMB reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	dmb = &dmb_areas[dmb_area_count++];
+
+	dmb->start_pfn = PFN_DOWN(base);
+	dmb->end_pfn = PFN_DOWN(base + size);
+	if (res_dmb)
+		*res_dmb = dmb;
+
+	memblock_mark_movable(base, size);
+	return 0;
+}
+
+void __init dmb_init_region(struct memblock_region *region)
+{
+	unsigned long pfn;
+	int i;
+
+	for (pfn = memblock_region_memory_base_pfn(region);
+	     pfn < memblock_region_memory_end_pfn(region);
+	     pfn += pageblock_nr_pages) {
+		struct page *page = pfn_to_page(pfn);
+
+		for (i = 0; i < pageblock_nr_pages; i++)
+			set_page_zone(page + i, ZONE_MOVABLE);
+
+		/* free reserved pageblocks to page allocator */
+		init_reserved_pageblock(page);
+	}
+}
diff --git a/mm/memblock.c b/mm/memblock.c
index 46a4deb07d92..2a957337bd8b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -16,6 +16,7 @@
 #include <linux/kmemleak.h>
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
+#include <linux/dmb.h>
 
 #include <asm/sections.h>
 #include <linux/io.h>
@@ -2090,13 +2091,16 @@ static void __init memmap_init_reserved_pages(void)
 	for_each_reserved_mem_range(i, &start, &end)
 		reserve_bootmem_region(start, end);
 
-	/* and also treat struct pages for the NOMAP regions as PageReserved */
 	for_each_mem_region(region) {
+		/* treat struct pages for the NOMAP regions as PageReserved */
 		if (memblock_is_nomap(region)) {
 			start = region->base;
 			end = start + region->size;
 			reserve_bootmem_region(start, end);
 		}
+		/* move Designated Movable Block pages to ZONE_MOVABLE */
+		if (memblock_is_movable(region))
+			dmb_init_region(region);
 	}
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d7a5a05ead4b..82cad751e0b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -76,6 +76,7 @@
 #include <linux/khugepaged.h>
 #include <linux/buffer_head.h>
 #include <linux/delayacct.h>
+#include <linux/dmb.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -434,6 +435,8 @@ static unsigned long required_kernelcore __initdata;
 static unsigned long required_kernelcore_percent __initdata;
 static unsigned long required_movablecore __initdata;
 static unsigned long required_movablecore_percent __initdata;
+static unsigned long min_dmb_pfn[MAX_NUMNODES] __initdata;
+static unsigned long max_dmb_pfn[MAX_NUMNODES] __initdata;
 static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata;
 bool mirrored_kernelcore __initdata_memblock;
 
@@ -2171,7 +2174,7 @@ static int __init deferred_init_memmap(void *data)
 	}
 zone_empty:
 	/* Sanity check that the next zone really is unpopulated */
-	WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+	WARN_ON(++zid < ZONE_MOVABLE && populated_zone(++zone));
 
 	pr_info("node %d deferred pages initialised in %ums\n",
 		pgdat->node_id, jiffies_to_msecs(jiffies - start));
@@ -6978,6 +6981,10 @@ static void __init memmap_init_zone_range(struct zone *zone,
 	unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
 	int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
 
+	/* Skip overlap of ZONE_MOVABLE */
+	if (zone_id == ZONE_MOVABLE && zone_start_pfn < *hole_pfn)
+		zone_start_pfn = *hole_pfn;
+
 	start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
 	end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
 
@@ -7438,6 +7445,12 @@ static unsigned long __init zone_spanned_pages_in_node(int nid,
 				node_start_pfn, node_end_pfn,
 				zone_start_pfn, zone_end_pfn);
 
+	if (zone_type == ZONE_MOVABLE && max_dmb_pfn[nid]) {
+		if (*zone_start_pfn == *zone_end_pfn)
+			*zone_end_pfn = max_dmb_pfn[nid];
+		*zone_start_pfn = min(*zone_start_pfn, min_dmb_pfn[nid]);
+	}
+
 	/* Check that this node has pages within the zone's required range */
 	if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
 		return 0;
@@ -7506,12 +7519,21 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
 			&zone_start_pfn, &zone_end_pfn);
 	nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
 
+	if (zone_type == ZONE_MOVABLE && max_dmb_pfn[nid]) {
+		if (zone_start_pfn == zone_end_pfn)
+			zone_end_pfn = max_dmb_pfn[nid];
+		else
+			zone_end_pfn = zone_movable_pfn[nid];
+		zone_start_pfn = min(zone_start_pfn, min_dmb_pfn[nid]);
+		nr_absent += zone_end_pfn - zone_start_pfn;
+	}
+
 	/*
 	 * ZONE_MOVABLE handling.
-	 * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
+	 * Treat pages to be ZONE_MOVABLE in other zones as absent pages
 	 * and vice versa.
 	 */
-	if (mirrored_kernelcore && zone_movable_pfn[nid]) {
+	if (zone_movable_pfn[nid]) {
 		unsigned long start_pfn, end_pfn;
 		struct memblock_region *r;
 
@@ -7521,6 +7543,19 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
 			end_pfn = clamp(memblock_region_memory_end_pfn(r),
 					zone_start_pfn, zone_end_pfn);
 
+			if (memblock_is_movable(r)) {
+				if (zone_type != ZONE_MOVABLE) {
+					nr_absent += end_pfn - start_pfn;
+					continue;
+				}
+
+				nr_absent -=  end_pfn - start_pfn;
+				continue;
+			}
+
+			if (!mirrored_kernelcore)
+				continue;
+
 			if (zone_type == ZONE_MOVABLE &&
 			    memblock_is_mirror(r))
 				nr_absent += end_pfn - start_pfn;
@@ -7540,18 +7575,27 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
 {
 	unsigned long totalpages = 0;
 	enum zone_type i;
+	int nid = pgdat->node_id;
+
+	/*
+	 * If Designated Movable Blocks are defined on this node, ensure that
+	 * zone_movable_pfn is also defined for this node.
+	 */
+	if (max_dmb_pfn[nid] && !zone_movable_pfn[nid])
+		zone_movable_pfn[nid] = min(node_end_pfn,
+				arch_zone_highest_possible_pfn[movable_zone]);
 
 	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zone *zone = pgdat->node_zones + i;
 		unsigned long zone_start_pfn, zone_end_pfn;
 		unsigned long spanned, absent, size;
 
-		spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
+		spanned = zone_spanned_pages_in_node(nid, i,
 						     node_start_pfn,
 						     node_end_pfn,
 						     &zone_start_pfn,
 						     &zone_end_pfn);
-		absent = zone_absent_pages_in_node(pgdat->node_id, i,
+		absent = zone_absent_pages_in_node(nid, i,
 						   node_start_pfn,
 						   node_end_pfn);
 
@@ -8002,15 +8046,27 @@ unsigned long __init node_map_pfn_alignment(void)
 static unsigned long __init early_calculate_totalpages(void)
 {
 	unsigned long totalpages = 0;
-	unsigned long start_pfn, end_pfn;
-	int i, nid;
+	struct memblock_region *r;
 
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
-		unsigned long pages = end_pfn - start_pfn;
+	for_each_mem_region(r) {
+		unsigned long start_pfn, end_pfn, pages;
+		int nid;
+
+		nid = memblock_get_region_node(r);
+		start_pfn = memblock_region_memory_base_pfn(r);
+		end_pfn = memblock_region_memory_end_pfn(r);
 
-		totalpages += pages;
-		if (pages)
+		pages = end_pfn - start_pfn;
+		if (pages) {
+			totalpages += pages;
 			node_set_state(nid, N_MEMORY);
+			if (memblock_is_movable(r)) {
+				if (start_pfn < min_dmb_pfn[nid])
+					min_dmb_pfn[nid] = start_pfn;
+				if (end_pfn > max_dmb_pfn[nid])
+					max_dmb_pfn[nid] = end_pfn;
+			}
+		}
 	}
 	return totalpages;
 }
@@ -8023,7 +8079,7 @@ static unsigned long __init early_calculate_totalpages(void)
  */
 static void __init find_zone_movable_pfns_for_nodes(void)
 {
-	int i, nid;
+	int nid;
 	unsigned long usable_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
 	/* save the state before borrow the nodemask */
@@ -8151,13 +8207,24 @@ static void __init find_zone_movable_pfns_for_nodes(void)
 		kernelcore_remaining = kernelcore_node;
 
 		/* Go through each range of PFNs within this node */
-		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
+		for_each_mem_region(r) {
 			unsigned long size_pages;
 
+			if (memblock_get_region_node(r) != nid)
+				continue;
+
+			start_pfn = memblock_region_memory_base_pfn(r);
+			end_pfn = memblock_region_memory_end_pfn(r);
 			start_pfn = max(start_pfn, zone_movable_pfn[nid]);
 			if (start_pfn >= end_pfn)
 				continue;
 
+			/* Skip over Designated Movable Blocks */
+			if (memblock_is_movable(r)) {
+				zone_movable_pfn[nid] = end_pfn;
+				continue;
+			}
+
 			/* Account for what is only usable for kernelcore */
 			if (start_pfn < usable_startpfn) {
 				unsigned long kernel_pages;
@@ -8306,6 +8373,8 @@ void __init free_area_init(unsigned long *max_zone_pfn)
 	}
 
 	/* Find the PFNs that ZONE_MOVABLE begins at in each node */
+	memset(min_dmb_pfn, 0xff, sizeof(min_dmb_pfn));
+	memset(max_dmb_pfn, 0, sizeof(max_dmb_pfn));
 	memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
 	find_zone_movable_pfns_for_nodes();
 
-- 
2.25.1


  parent reply	other threads:[~2022-10-20 21:55 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-20 21:53 [PATCH v3 0/9] mm: introduce Designated Movable Blocks Doug Berger
2022-10-20 21:53 ` [PATCH v3 1/9] lib/show_mem.c: display MovableOnly Doug Berger
2022-10-20 21:53 ` [PATCH v3 2/9] mm/page_alloc: calculate node_spanned_pages from pfns Doug Berger
2022-10-20 21:53 ` [PATCH v3 3/9] mm/page_alloc: prevent creation of empty zones Doug Berger
2022-10-20 21:53 ` [PATCH v3 4/9] mm/page_alloc.c: allow oversized movablecore Doug Berger
2022-10-20 21:53 ` [PATCH v3 5/9] mm/page_alloc: introduce init_reserved_pageblock() Doug Berger
2022-10-20 21:53 ` [PATCH v3 6/9] memblock: introduce MEMBLOCK_MOVABLE flag Doug Berger
2022-10-20 21:53 ` Doug Berger [this message]
2022-10-20 21:53 ` [PATCH v3 8/9] mm/page_alloc: make alloc_contig_pages DMB aware Doug Berger
2022-10-20 21:53 ` [PATCH v3 9/9] mm/page_alloc: allow base for movablecore Doug Berger
2022-10-26 10:55 ` [PATCH v3 0/9] mm: introduce Designated Movable Blocks Mel Gorman
2022-10-26 11:11   ` David Hildenbrand
2022-10-26 12:02     ` Mel Gorman
2022-11-02 22:33   ` Doug Berger
2022-11-18 17:05     ` Mel Gorman
2022-12-15  0:17       ` Doug Berger
2023-01-04 15:43         ` Mel Gorman
2023-01-04 19:10           ` Florian Fainelli
2023-01-19 22:33           ` Doug Berger
2023-01-03 23:43 ` Florian Fainelli
2023-01-04 15:56   ` David Hildenbrand
2023-01-04 19:00     ` Florian Fainelli
2023-01-05 13:29       ` David Hildenbrand
2023-04-18 20:09         ` Florian Fainelli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221020215318.4193269-8-opendmb@gmail.com \
    --to=opendmb@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@suse.de \
    --cc=corbet@lwn.net \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=david@redhat.com \
    --cc=f.fainelli@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=mike.kravetz@oracle.com \
    --cc=osalvador@suse.de \
    --cc=paulmck@kernel.org \
    --cc=quic_neeraju@quicinc.com \
    --cc=rdunlap@infradead.org \
    --cc=rppt@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.