From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
To: akpm@linux-foundation.org, linux-mm@kvack.org
Cc: pavel.tatashin@microsoft.com, mhocko@suse.com,
linux-nvdimm@lists.01.org, alexander.h.duyck@linux.intel.com,
linux-kernel@vger.kernel.org, willy@infradead.org,
mingo@kernel.org, khalid.aziz@oracle.com,
rppt@linux.vnet.ibm.com, vbabka@suse.cz,
sparclinux@vger.kernel.org, ldufour@linux.vnet.ibm.com,
mgorman@techsingularity.net, davem@davemloft.net,
kirill.shutemov@linux.intel.com
Subject: [mm PATCH v6 3/7] mm: Implement new zone specific memblock iterator
Date: Fri, 30 Nov 2018 13:53:03 -0800 [thread overview]
Message-ID: <154361478343.7497.6591693538181082582.stgit@ahduyck-desk1.amr.corp.intel.com> (raw)
In-Reply-To: <154361452447.7497.1348692079883153517.stgit@ahduyck-desk1.amr.corp.intel.com>
Introduce a new iterator for_each_free_mem_pfn_range_in_zone.
This iterator will take care of making sure a given memory range provided
is in fact contained within a zone. It takes are of all the bounds checking
we were doing in deferred_grow_zone, and deferred_init_memmap. In addition
it should help to speed up the search a bit by iterating until the end of a
range is greater than the start of the zone pfn range, and will exit
completely if the start is beyond the end of the zone.
Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
include/linux/memblock.h | 25 ++++++++++++++++++
mm/memblock.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 31 +++++++++-------------
3 files changed, 101 insertions(+), 19 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 64c41cf45590..95d1aaa3f412 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -247,6 +247,31 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
+ unsigned long *out_spfn,
+ unsigned long *out_epfn);
+/**
+ * for_each_free_mem_range_in_zone - iterate through zone specific free
+ * memblock areas
+ * @i: u64 used as loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ *
+ * Walks over free (memory && !reserved) areas of memblock in a specific
+ * zone. Available once memblock and an empty zone is initialized. The main
+ * assumption is that the zone start, end, and pgdat have been associated.
+ * This way we can use the zone to determine NUMA node, and if a given part
+ * of the memblock is valid for the zone.
+ */
+#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
+ for (i = 0, \
+ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
+ i != U64_MAX; \
+ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
/**
* for_each_free_mem_range - iterate through free memblock areas
* @i: u64 used as loop variable
diff --git a/mm/memblock.c b/mm/memblock.c
index 57298abc7d98..0e49382033dd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1247,6 +1247,70 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
return 0;
}
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/**
+ * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
+ *
+ * @idx: pointer to u64 loop variable
+ * @zone: zone in which all of the memory blocks reside
+ * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL
+ * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL
+ *
+ * This function is meant to be a zone/pfn specific wrapper for the
+ * for_each_mem_range type iterators. Specifically they are used in the
+ * deferred memory init routines and as such we were duplicating much of
+ * this logic throughout the code. So instead of having it in multiple
+ * locations it seemed like it would make more sense to centralize this to
+ * one new iterator that does everything they need.
+ */
+void __init_memblock
+__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
+ unsigned long *out_spfn, unsigned long *out_epfn)
+{
+ int zone_nid = zone_to_nid(zone);
+ phys_addr_t spa, epa;
+ int nid;
+
+ __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
+ &memblock.memory, &memblock.reserved,
+ &spa, &epa, &nid);
+
+ while (*idx != U64_MAX) {
+ unsigned long epfn = PFN_DOWN(epa);
+ unsigned long spfn = PFN_UP(spa);
+
+ /*
+ * Verify the end is at least past the start of the zone and
+ * that we have at least one PFN to initialize.
+ */
+ if (zone->zone_start_pfn < epfn && spfn < epfn) {
+ /* if we went too far just stop searching */
+ if (zone_end_pfn(zone) <= spfn) {
+ *idx = U64_MAX;
+ break;
+ }
+
+ if (out_spfn)
+ *out_spfn = max(zone->zone_start_pfn, spfn);
+ if (out_epfn)
+ *out_epfn = min(zone_end_pfn(zone), epfn);
+
+ return;
+ }
+
+ __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
+ &memblock.memory, &memblock.reserved,
+ &spa, &epa, &nid);
+ }
+
+ /* signal end of iteration */
+ if (out_spfn)
+ *out_spfn = ULONG_MAX;
+ if (out_epfn)
+ *out_epfn = 0;
+}
+
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 09969619ab48..72f9889e3866 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1523,11 +1523,9 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
static int __init deferred_init_memmap(void *data)
{
pg_data_t *pgdat = data;
- int nid = pgdat->node_id;
unsigned long start = jiffies;
unsigned long nr_pages = 0;
unsigned long spfn, epfn, first_init_pfn, flags;
- phys_addr_t spa, epa;
int zid;
struct zone *zone;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
@@ -1564,14 +1562,12 @@ static int __init deferred_init_memmap(void *data)
* freeing pages we can access pages that are ahead (computing buddy
* page in __free_one_page()).
*/
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+ for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+ spfn = max_t(unsigned long, first_init_pfn, spfn);
nr_pages += deferred_init_pages(zone, spfn, epfn);
}
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+ for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+ spfn = max_t(unsigned long, first_init_pfn, spfn);
deferred_free_pages(spfn, epfn);
}
pgdat_resize_unlock(pgdat, &flags);
@@ -1579,8 +1575,8 @@ static int __init deferred_init_memmap(void *data)
/* Sanity check that the next zone really is unpopulated */
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
- pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
- jiffies_to_msecs(jiffies - start));
+ pr_info("node %d initialised, %lu pages in %ums\n",
+ pgdat->node_id, nr_pages, jiffies_to_msecs(jiffies - start));
pgdat_init_report_one_done();
return 0;
@@ -1611,13 +1607,11 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
static noinline bool __init
deferred_grow_zone(struct zone *zone, unsigned int order)
{
- int nid = zone_to_nid(zone);
- pg_data_t *pgdat = NODE_DATA(nid);
unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+ pg_data_t *pgdat = zone->zone_pgdat;
unsigned long nr_pages = 0;
unsigned long first_init_pfn, spfn, epfn, t, flags;
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
- phys_addr_t spa, epa;
u64 i;
/* Only the last zone may have deferred pages */
@@ -1653,9 +1647,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
return false;
}
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+ for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+ spfn = max_t(unsigned long, first_init_pfn, spfn);
while (spfn < epfn && nr_pages < nr_pages_needed) {
t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
@@ -1669,9 +1662,9 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
break;
}
- for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
- spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
- epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
+ for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
+ spfn = max_t(unsigned long, first_init_pfn, spfn);
+ epfn = min_t(unsigned long, first_deferred_pfn, epfn);
deferred_free_pages(spfn, epfn);
if (first_deferred_pfn == epfn)
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm
next prev parent reply other threads:[~2018-11-30 21:53 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-30 21:52 [mm PATCH v6 0/7] Deferred page init improvements Alexander Duyck
2018-11-30 21:52 ` [mm PATCH v6 1/7] mm: Use mm_zero_struct_page from SPARC on all 64b architectures Alexander Duyck
2018-11-30 21:52 ` [mm PATCH v6 2/7] mm: Drop meminit_pfn_in_nid as it is redundant Alexander Duyck
2018-11-30 21:53 ` Alexander Duyck [this message]
2018-11-30 21:53 ` [mm PATCH v6 4/7] mm: Initialize MAX_ORDER_NR_PAGES at a time instead of doing larger sections Alexander Duyck
2018-11-30 21:53 ` [mm PATCH v6 5/7] mm: Move hot-plug specific memory init into separate functions and optimize Alexander Duyck
2018-11-30 21:53 ` [mm PATCH v6 6/7] mm: Add reserved flag setting to set_page_links Alexander Duyck
2018-12-05 17:22 ` Michal Hocko
2018-12-05 17:55 ` Alexander Duyck
2018-12-05 20:42 ` Michal Hocko
2019-03-12 22:07 ` Andrew Morton
2019-03-12 22:50 ` Alexander Duyck
2019-03-13 16:33 ` Andrew Morton
2019-03-13 17:07 ` Alexander Duyck
2018-11-30 21:53 ` [mm PATCH v6 7/7] mm: Use common iterator for deferred_init_pages and deferred_free_pages Alexander Duyck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=154361478343.7497.6591693538181082582.stgit@ahduyck-desk1.amr.corp.intel.com \
--to=alexander.h.duyck@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=davem@davemloft.net \
--cc=khalid.aziz@oracle.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=ldufour@linux.vnet.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@lists.01.org \
--cc=mgorman@techsingularity.net \
--cc=mhocko@suse.com \
--cc=mingo@kernel.org \
--cc=pavel.tatashin@microsoft.com \
--cc=rppt@linux.vnet.ibm.com \
--cc=sparclinux@vger.kernel.org \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).