LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
To: akpm@linux-foundation.org, linux-mm@kvack.org
Cc: sparclinux@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-nvdimm@lists.01.org, davem@davemloft.net,
	pavel.tatashin@microsoft.com, mhocko@suse.com, mingo@kernel.org,
	kirill.shutemov@linux.intel.com, dan.j.williams@intel.com,
	dave.jiang@intel.com, alexander.h.duyck@linux.intel.com,
	rppt@linux.vnet.ibm.com, willy@infradead.org, vbabka@suse.cz,
	khalid.aziz@oracle.com, ldufour@linux.vnet.ibm.com,
	mgorman@techsingularity.net, yi.z.zhang@linux.intel.com,
	alexander.h.duyck@linux.intel.com
Subject: [mm PATCH v5 3/7] mm: Implement new zone specific memblock iterator
Date: Mon, 05 Nov 2018 13:19:40 -0800
Message-ID: <154145278071.30046.9022571960145979137.stgit@ahduyck-desk1.jf.intel.com> (raw)
In-Reply-To: <154145268025.30046.11742652345962594283.stgit@ahduyck-desk1.jf.intel.com>

This patch introduces a new iterator for_each_free_mem_pfn_range_in_zone.

This iterator will take care of making sure a given memory range provided
is in fact contained within a zone. It takes are of all the bounds checking
we were doing in deferred_grow_zone, and deferred_init_memmap. In addition
it should help to speed up the search a bit by iterating until the end of a
range is greater than the start of the zone pfn range, and will exit
completely if the start is beyond the end of the zone.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 include/linux/memblock.h |   22 ++++++++++++++++
 mm/memblock.c            |   63 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c          |   31 +++++++++--------------
 3 files changed, 97 insertions(+), 19 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index aee299a6aa76..413623dc96a3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -248,6 +248,28 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
+				  unsigned long *out_spfn,
+				  unsigned long *out_epfn);
+/**
+ * for_each_free_mem_range_in_zone - iterate through zone specific free
+ * memblock areas
+ * @i: u64 used as loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ *
+ * Walks over free (memory && !reserved) areas of memblock in a specific
+ * zone. Available as soon as memblock is initialized.
+ */
+#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end)	\
+	for (i = 0,							\
+	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end);	\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
 /**
  * for_each_free_mem_range - iterate through free memblock areas
  * @i: u64 used as loop variable
diff --git a/mm/memblock.c b/mm/memblock.c
index 7df468c8ebc8..f1d1fbfd1ae7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1239,6 +1239,69 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 	return 0;
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/**
+ * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
+ *
+ * @idx: pointer to u64 loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @out_start: ptr to ulong for start pfn of the range, can be %NULL
+ * @out_end: ptr to ulong for end pfn of the range, can be %NULL
+ *
+ * This function is meant to be a zone/pfn specific wrapper for the
+ * for_each_mem_range type iterators. Specifically they are used in the
+ * deferred memory init routines and as such we were duplicating much of
+ * this logic throughout the code. So instead of having it in multiple
+ * locations it seemed like it would make more sense to centralize this to
+ * one new iterator that does everything they need.
+ */
+void __init_memblock
+__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
+			     unsigned long *out_spfn, unsigned long *out_epfn)
+{
+	int zone_nid = zone_to_nid(zone);
+	phys_addr_t spa, epa;
+	int nid;
+
+	__next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
+			 &memblock.memory, &memblock.reserved,
+			 &spa, &epa, &nid);
+
+	while (*idx != ULLONG_MAX) {
+		unsigned long epfn = PFN_DOWN(epa);
+		unsigned long spfn = PFN_UP(spa);
+
+		/*
+		 * Verify the end is at least past the start of the zone and
+		 * that we have at least one PFN to initialize.
+		 */
+		if (zone->zone_start_pfn < epfn && spfn < epfn) {
+			/* if we went too far just stop searching */
+			if (zone_end_pfn(zone) <= spfn)
+				break;
+
+			if (out_spfn)
+				*out_spfn = max(zone->zone_start_pfn, spfn);
+			if (out_epfn)
+				*out_epfn = min(zone_end_pfn(zone), epfn);
+
+			return;
+		}
+
+		__next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
+				 &memblock.memory, &memblock.reserved,
+				 &spa, &epa, &nid);
+	}
+
+	/* signal end of iteration */
+	*idx = ULLONG_MAX;
+	if (out_spfn)
+		*out_spfn = ULONG_MAX;
+	if (out_epfn)
+		*out_epfn = 0;
+}
+
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index be1197c120a8..5cfd3ebe10d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1516,11 +1516,9 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
 static int __init deferred_init_memmap(void *data)
 {
 	pg_data_t *pgdat = data;
-	int nid = pgdat->node_id;
 	unsigned long start = jiffies;
 	unsigned long nr_pages = 0;
 	unsigned long spfn, epfn, first_init_pfn, flags;
-	phys_addr_t spa, epa;
 	int zid;
 	struct zone *zone;
 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
@@ -1557,14 +1555,12 @@ static int __init deferred_init_memmap(void *data)
 	 * freeing pages we can access pages that are ahead (computing buddy
 	 * page in __free_one_page()).
 	 */
-	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
-		spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
-		epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+	for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+		spfn = max_t(unsigned long, first_init_pfn, spfn);
 		nr_pages += deferred_init_pages(zone, spfn, epfn);
 	}
-	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
-		spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
-		epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+	for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+		spfn = max_t(unsigned long, first_init_pfn, spfn);
 		deferred_free_pages(spfn, epfn);
 	}
 	pgdat_resize_unlock(pgdat, &flags);
@@ -1572,8 +1568,8 @@ static int __init deferred_init_memmap(void *data)
 	/* Sanity check that the next zone really is unpopulated */
 	WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
 
-	pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
-					jiffies_to_msecs(jiffies - start));
+	pr_info("node %d initialised, %lu pages in %ums\n",
+		pgdat->node_id,	nr_pages, jiffies_to_msecs(jiffies - start));
 
 	pgdat_init_report_one_done();
 	return 0;
@@ -1604,13 +1600,11 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
 static noinline bool __init
 deferred_grow_zone(struct zone *zone, unsigned int order)
 {
-	int nid = zone_to_nid(zone);
-	pg_data_t *pgdat = NODE_DATA(nid);
 	unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+	pg_data_t *pgdat = zone->zone_pgdat;
 	unsigned long nr_pages = 0;
 	unsigned long first_init_pfn, spfn, epfn, t, flags;
 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
-	phys_addr_t spa, epa;
 	u64 i;
 
 	/* Only the last zone may have deferred pages */
@@ -1646,9 +1640,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
 		return false;
 	}
 
-	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
-		spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
-		epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+	for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+		spfn = max_t(unsigned long, first_init_pfn, spfn);
 
 		while (spfn < epfn && nr_pages < nr_pages_needed) {
 			t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
@@ -1662,9 +1655,9 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
 			break;
 	}
 
-	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
-		spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
-		epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
+	for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+		spfn = max_t(unsigned long, first_init_pfn, spfn);
+		epfn = min_t(unsigned long, first_deferred_pfn, epfn);
 		deferred_free_pages(spfn, epfn);
 
 		if (first_deferred_pfn == epfn)


  parent reply index

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-05 21:19 [mm PATCH v5 0/7] Deferred page init improvements Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 1/7] mm: Use mm_zero_struct_page from SPARC on all 64b architectures Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 2/7] mm: Drop meminit_pfn_in_nid as it is redundant Alexander Duyck
2018-11-05 21:19 ` Alexander Duyck [this message]
2018-11-09 23:26   ` [mm PATCH v5 3/7] mm: Implement new zone specific memblock iterator Pavel Tatashin
2018-11-09 23:58     ` Alexander Duyck
2018-11-10  0:11       ` Pavel Tatashin
2018-11-05 21:19 ` [mm PATCH v5 4/7] mm: Initialize MAX_ORDER_NR_PAGES at a time instead of doing larger sections Alexander Duyck
2018-11-10  1:02   ` Pavel Tatashin
2018-11-19 18:53     ` Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 5/7] mm: Move hot-plug specific memory init into separate functions and optimize Alexander Duyck
2018-11-10  2:07   ` Pavel Tatashin
2018-11-05 21:19 ` [mm PATCH v5 6/7] mm: Add reserved flag setting to set_page_links Alexander Duyck
2018-11-10  2:11   ` Pavel Tatashin
2018-11-05 21:20 ` [mm PATCH v5 7/7] mm: Use common iterator for deferred_init_pages and deferred_free_pages Alexander Duyck
2018-11-10  4:13   ` Pavel Tatashin
2018-11-12 15:12     ` Alexander Duyck
2018-11-09 21:15 ` [mm PATCH v5 0/7] Deferred page init improvements Pavel Tatashin
2018-11-09 23:14   ` Alexander Duyck
2018-11-10  0:00     ` Pavel Tatashin
2018-11-10  0:46       ` Alexander Duyck
2018-11-10  1:16         ` Pavel Tatashin
2018-11-12 19:10           ` Alexander Duyck
2018-11-12 20:37             ` Pavel Tatashin
2018-11-12 16:25       ` Daniel Jordan
2018-11-14 15:07 ` Michal Hocko
2018-11-14 19:12   ` Pavel Tatashin
2018-11-14 21:35     ` Michal Hocko
2018-11-15  0:50   ` Alexander Duyck
2018-11-15  1:55     ` Mike Rapoport
2018-11-15 19:09       ` Mike Rapoport
2018-11-15  8:10     ` Michal Hocko
2018-11-15 16:02       ` Alexander Duyck
2018-11-15 16:40         ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=154145278071.30046.9022571960145979137.stgit@ahduyck-desk1.jf.intel.com \
    --to=alexander.h.duyck@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=davem@davemloft.net \
    --cc=khalid.aziz@oracle.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=ldufour@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=mingo@kernel.org \
    --cc=pavel.tatashin@microsoft.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=sparclinux@vger.kernel.org \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=yi.z.zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git