From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Mike Rapoport <rppt@linux.ibm.com>, Jia He <justin.he@arm.com>,
Will Deacon <will@kernel.org>,
David Hildenbrand <david@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
peterz@infradead.org, dave.hansen@linux.intel.com,
ard.biesheuvel@linaro.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
hch@lst.de, joao.m.martins@oracle.com
Subject: [PATCH v2 08/22] memblock: Introduce a generic phys_addr_to_target_node()
Date: Sun, 12 Jul 2020 09:26:48 -0700 [thread overview]
Message-ID: <159457120893.754248.7783260004248722175.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <159457116473.754248.7879464730875147365.stgit@dwillia2-desk3.amr.corp.intel.com>
Similar to how generic memory_add_physaddr_to_nid() interrogates
memblock data for numa information, introduce
get_reserved_pfn_range_from_nid() to enable the same operation for
reserved memory ranges. Example memory ranges that are reserved, but
still have associated numa-info are persistent memory or Soft Reserved
(EFI_MEMORY_SP) memory.
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Jia He <justin.he@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
include/linux/memblock.h | 4 +++
include/linux/mm.h | 2 +
include/linux/numa.h | 2 +
mm/memblock.c | 22 ++++++++++++++--
mm/page_alloc.c | 63 +++++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 87 insertions(+), 6 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017fae833d4a..0655e8376c72 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -234,6 +234,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
+#define for_each_reserved_pfn_range(i, nid, p_start, p_end, p_nid) \
+ for (i = -1, __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid); \
+ i >= 0; __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid))
+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
unsigned long *out_spfn,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e76ee5da20b..82dac9f42c46 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2438,6 +2438,8 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
+extern void get_reserved_pfn_range_for_nid(unsigned int nid,
+ unsigned long *start_pfn, unsigned long *end_pfn);
extern unsigned long find_min_pfn_with_active_regions(void);
extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 5d25c5de1322..52b2430bc759 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -19,7 +19,7 @@ int numa_map_to_online_node(int node);
/*
* Optional architecture specific implementation, users need a "depends
- * on $ARCH"
+ * on $ARCH" or depends on CONFIG_MEMBLOCK_NUMA_INFO
*/
int phys_to_target_node(phys_addr_t addr);
#else
diff --git a/mm/memblock.c b/mm/memblock.c
index 39aceafc57f6..43c3abab705e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1200,11 +1200,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
/*
* Common iterator interface used to define for_each_mem_pfn_range().
*/
-void __init_memblock __next_mem_pfn_range(int *idx, int nid,
+static void __init_memblock __next_memblock_pfn_range(int *idx, int nid,
unsigned long *out_start_pfn,
- unsigned long *out_end_pfn, int *out_nid)
+ unsigned long *out_end_pfn, int *out_nid,
+ struct memblock_type *type)
{
- struct memblock_type *type = &memblock.memory;
struct memblock_region *r;
int r_nid;
@@ -1230,6 +1230,22 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
*out_nid = r_nid;
}
+void __init_memblock __next_mem_pfn_range(int *idx, int nid,
+ unsigned long *out_start_pfn,
+ unsigned long *out_end_pfn, int *out_nid)
+{
+ __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid,
+ &memblock.memory);
+}
+
+void __init_memblock __next_reserved_pfn_range(int *idx, int nid,
+ unsigned long *out_start_pfn,
+ unsigned long *out_end_pfn, int *out_nid)
+{
+ __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid,
+ &memblock.reserved);
+}
+
/**
* memblock_set_node - set node ID on memblock regions
* @base: base of area to set node ID for
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df8bd169dbb4..94ad77c0c338 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6369,12 +6369,39 @@ void __init_or_memblock get_pfn_range_for_nid(unsigned int nid,
*start_pfn = 0;
}
+/**
+ * get_reserved_pfn_range_for_nid - Return the start and end page frames for a node
+ * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
+ * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
+ * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
+ *
+ * Mostly identical to get_pfn_range_for_nid() except it operates on
+ * reserved ranges rather than online memory.
+ */
+void __init_or_memblock get_reserved_pfn_range_for_nid(unsigned int nid,
+ unsigned long *start_pfn, unsigned long *end_pfn)
+{
+ unsigned long this_start_pfn, this_end_pfn;
+ int i;
+
+ *start_pfn = -1UL;
+ *end_pfn = 0;
+
+ for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
+ *start_pfn = min(*start_pfn, this_start_pfn);
+ *end_pfn = max(*end_pfn, this_end_pfn);
+ }
+
+ if (*start_pfn == -1UL)
+ *start_pfn = 0;
+}
+
/*
* Generic implementation of memory_add_physaddr_to_nid() depends on
* architecture using memblock data for numa information.
*/
#ifdef CONFIG_MEMBLOCK_NUMA_INFO
-int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
+static int __init_or_memblock __memory_add_physaddr_to_nid(u64 addr)
{
unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
int nid;
@@ -6384,10 +6411,42 @@ int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
if (pfn >= start_pfn && pfn <= end_pfn)
return nid;
}
+ return NUMA_NO_NODE;
+}
+
+int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
+{
+ int nid = __memory_add_physaddr_to_nid(addr);
+
/* Default to node0 as not all callers are prepared for this to fail */
- return 0;
+ if (nid == NUMA_NO_NODE)
+ return 0;
+ return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+
+int __init_or_memblock phys_to_target_node(u64 addr)
+{
+ unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
+ int nid = __memory_add_physaddr_to_nid(addr);
+
+ if (nid != NUMA_NO_NODE)
+ return nid;
+
+ /*
+ * Search reserved memory ranges since the memory address does
+ * not appear to be online
+ */
+ for_each_possible_node(nid) {
+ if (node_online(nid))
+ continue;
+ get_reserved_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ if (pfn >= start_pfn && pfn <= end_pfn)
+ return nid;
+ }
+ return NUMA_NO_NODE;
+}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
#endif /* CONFIG_MEMBLOCK_NUMA_INFO */
/*
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
next prev parent reply other threads:[~2020-07-12 16:43 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-12 16:26 [PATCH v2 00/22] device-dax: Support sub-dividing soft-reserved ranges Dan Williams
2020-07-12 16:26 ` [PATCH v2 01/22] x86/numa: Cleanup configuration dependent command-line options Dan Williams
2020-07-12 16:26 ` [PATCH v2 02/22] x86/numa: Add 'nohmat' option Dan Williams
2020-07-12 16:58 ` Randy Dunlap
2020-07-12 16:26 ` [PATCH v2 03/22] efi/fake_mem: Arrange for a resource entry per efi_fake_mem instance Dan Williams
2020-07-12 16:26 ` [PATCH v2 04/22] ACPI: HMAT: Refactor hmat_register_target_device to hmem_register_device Dan Williams
2020-07-12 16:26 ` [PATCH v2 05/22] resource: Report parent to walk_iomem_res_desc() callback Dan Williams
2020-07-12 16:26 ` [PATCH v2 06/22] x86: Move NUMA_KEEP_MEMINFO and related definition to x86-internals Dan Williams
2020-07-12 16:26 ` [PATCH v2 07/22] numa: Introduce a generic memory_add_physaddr_to_nid() Dan Williams
2020-07-13 6:58 ` Mike Rapoport
2020-07-13 15:42 ` Dan Williams
2020-07-12 16:26 ` Dan Williams [this message]
2020-07-13 7:03 ` [PATCH v2 08/22] memblock: Introduce a generic phys_addr_to_target_node() Mike Rapoport
2020-07-13 15:48 ` Dan Williams
2020-07-14 1:36 ` Justin He
2020-07-12 16:26 ` [PATCH v2 09/22] arm64: Convert to generic memblock for numa-info Dan Williams
2020-07-12 16:26 ` [PATCH v2 10/22] ACPI: HMAT: Attach a device for each soft-reserved range Dan Williams
2020-07-12 16:27 ` [PATCH v2 11/22] device-dax: Drop the dax_region.pfn_flags attribute Dan Williams
2020-07-12 16:27 ` [PATCH v2 12/22] device-dax: Move instance creation parameters to 'struct dev_dax_data' Dan Williams
2020-07-12 16:27 ` [PATCH v2 13/22] device-dax: Make pgmap optional for instance creation Dan Williams
2020-07-12 16:27 ` [PATCH v2 14/22] device-dax: Kill dax_kmem_res Dan Williams
2020-07-12 16:27 ` [PATCH v2 15/22] device-dax: Add an allocation interface for device-dax instances Dan Williams
2020-07-12 16:27 ` [PATCH v2 16/22] device-dax: Introduce 'seed' devices Dan Williams
2020-07-12 16:27 ` [PATCH v2 17/22] drivers/base: Make device_find_child_by_name() compatible with sysfs inputs Dan Williams
2020-07-12 17:09 ` Greg Kroah-Hartman
2020-07-13 15:39 ` Dan Williams
2020-07-13 15:52 ` Greg Kroah-Hartman
2020-07-13 16:09 ` Dan Williams
2020-07-13 16:12 ` Greg Kroah-Hartman
2020-07-13 16:36 ` Dan Williams
2020-07-12 16:27 ` [PATCH v2 18/22] device-dax: Add resize support Dan Williams
2020-07-12 16:27 ` [PATCH v2 19/22] mm/memremap_pages: Convert to 'struct range' Dan Williams
2020-07-13 16:36 ` Ralph Campbell
2020-07-13 16:54 ` Dan Williams
2020-07-12 16:27 ` [PATCH v2 20/22] mm/memremap_pages: Support multiple ranges per invocation Dan Williams
2020-07-12 16:27 ` [PATCH v2 21/22] device-dax: Add dis-contiguous resource support Dan Williams
2020-07-12 16:28 ` [PATCH v2 22/22] device-dax: Introduce 'mapping' devices Dan Williams
2020-07-16 13:18 ` Joao Martins
2020-07-16 16:00 ` Dan Williams
2020-07-16 19:04 ` Joao Martins
2020-07-16 17:29 ` [PATCH v1 0/4] device-dax: Further improvements to subdivision Joao Martins
2020-07-16 17:29 ` [PATCH v1 1/4] device-dax: Make align a per-device property Joao Martins
2020-07-16 17:29 ` [PATCH v1 2/4] device-dax: Add an 'align' attribute Joao Martins
2020-07-31 14:52 ` Dan Williams
2020-07-16 17:29 ` [PATCH v1 3/4] dax/hmem: Introduce dax_hmem.region_idle parameter Joao Martins
2020-07-16 17:29 ` [PATCH v1 4/4] device-dax: Add a range mapping allocation attribute Joao Martins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=159457120893.754248.7783260004248722175.stgit@dwillia2-desk3.amr.corp.intel.com \
--to=dan.j.williams@intel.com \
--cc=akpm@linux-foundation.org \
--cc=ard.biesheuvel@linaro.org \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=hch@lst.de \
--cc=joao.m.martins@oracle.com \
--cc=justin.he@arm.com \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@lists.01.org \
--cc=peterz@infradead.org \
--cc=rppt@linux.ibm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).