Linux-ACPI Archive on lore.kernel.org
 help / color / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Mike Rapoport <rppt@linux.ibm.com>, Jia He <justin.he@arm.com>,
	Will Deacon <will@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	peterz@infradead.org, vishal.l.verma@intel.com,
	dave.hansen@linux.intel.com, ard.biesheuvel@linaro.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-acpi@vger.kernel.org, hch@lst.de,
	joao.m.martins@oracle.com
Subject: [PATCH v2 08/22] memblock: Introduce a generic phys_addr_to_target_node()
Date: Sun, 12 Jul 2020 09:26:48 -0700
Message-ID: <159457120893.754248.7783260004248722175.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <159457116473.754248.7879464730875147365.stgit@dwillia2-desk3.amr.corp.intel.com>

Similar to how generic memory_add_physaddr_to_nid() interrogates
memblock data for numa information, introduce
get_reserved_pfn_range_from_nid() to enable the same operation for
reserved memory ranges. Example memory ranges that are reserved, but
still have associated numa-info are persistent memory or Soft Reserved
(EFI_MEMORY_SP) memory.

Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Jia He <justin.he@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/memblock.h |    4 +++
 include/linux/mm.h       |    2 +
 include/linux/numa.h     |    2 +
 mm/memblock.c            |   22 ++++++++++++++--
 mm/page_alloc.c          |   63 +++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017fae833d4a..0655e8376c72 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -234,6 +234,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
 	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
 
+#define for_each_reserved_pfn_range(i, nid, p_start, p_end, p_nid)		\
+	for (i = -1, __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid); \
+	     i >= 0; __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid))
+
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
 				  unsigned long *out_spfn,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e76ee5da20b..82dac9f42c46 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2438,6 +2438,8 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 
 extern void get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn);
+extern void get_reserved_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn);
 extern unsigned long find_min_pfn_with_active_regions(void);
 extern void sparse_memory_present_with_active_regions(int nid);
 
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 5d25c5de1322..52b2430bc759 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -19,7 +19,7 @@ int numa_map_to_online_node(int node);
 
 /*
  * Optional architecture specific implementation, users need a "depends
- * on $ARCH"
+ * on $ARCH" or depends on CONFIG_MEMBLOCK_NUMA_INFO
  */
 int phys_to_target_node(phys_addr_t addr);
 #else
diff --git a/mm/memblock.c b/mm/memblock.c
index 39aceafc57f6..43c3abab705e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1200,11 +1200,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
 /*
  * Common iterator interface used to define for_each_mem_pfn_range().
  */
-void __init_memblock __next_mem_pfn_range(int *idx, int nid,
+static void __init_memblock __next_memblock_pfn_range(int *idx, int nid,
 				unsigned long *out_start_pfn,
-				unsigned long *out_end_pfn, int *out_nid)
+				unsigned long *out_end_pfn, int *out_nid,
+				struct memblock_type *type)
 {
-	struct memblock_type *type = &memblock.memory;
 	struct memblock_region *r;
 	int r_nid;
 
@@ -1230,6 +1230,22 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
 		*out_nid = r_nid;
 }
 
+void __init_memblock __next_mem_pfn_range(int *idx, int nid,
+				unsigned long *out_start_pfn,
+				unsigned long *out_end_pfn, int *out_nid)
+{
+	__next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid,
+				  &memblock.memory);
+}
+
+void __init_memblock __next_reserved_pfn_range(int *idx, int nid,
+				unsigned long *out_start_pfn,
+				unsigned long *out_end_pfn, int *out_nid)
+{
+	__next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid,
+				  &memblock.reserved);
+}
+
 /**
  * memblock_set_node - set node ID on memblock regions
  * @base: base of area to set node ID for
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df8bd169dbb4..94ad77c0c338 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6369,12 +6369,39 @@ void __init_or_memblock get_pfn_range_for_nid(unsigned int nid,
 		*start_pfn = 0;
 }
 
+/**
+ * get_reserved_pfn_range_for_nid - Return the start and end page frames for a node
+ * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
+ * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
+ * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
+ *
+ * Mostly identical to get_pfn_range_for_nid() except it operates on
+ * reserved ranges rather than online memory.
+ */
+void __init_or_memblock get_reserved_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn)
+{
+	unsigned long this_start_pfn, this_end_pfn;
+	int i;
+
+	*start_pfn = -1UL;
+	*end_pfn = 0;
+
+	for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
+		*start_pfn = min(*start_pfn, this_start_pfn);
+		*end_pfn = max(*end_pfn, this_end_pfn);
+	}
+
+	if (*start_pfn == -1UL)
+		*start_pfn = 0;
+}
+
 /*
  * Generic implementation of memory_add_physaddr_to_nid() depends on
  * architecture using memblock data for numa information.
  */
 #ifdef CONFIG_MEMBLOCK_NUMA_INFO
-int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
+static int __init_or_memblock __memory_add_physaddr_to_nid(u64 addr)
 {
 	unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
 	int nid;
@@ -6384,10 +6411,42 @@ int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
 		if (pfn >= start_pfn && pfn <= end_pfn)
 			return nid;
 	}
+	return NUMA_NO_NODE;
+}
+
+int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
+{
+	int nid = __memory_add_physaddr_to_nid(addr);
+
 	/* Default to node0 as not all callers are prepared for this to fail */
-	return 0;
+	if (nid == NUMA_NO_NODE)
+		return 0;
+	return nid;
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+
+int __init_or_memblock phys_to_target_node(u64 addr)
+{
+	unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
+	int nid = __memory_add_physaddr_to_nid(addr);
+
+	if (nid != NUMA_NO_NODE)
+		return nid;
+
+	/*
+	 * Search reserved memory ranges since the memory address does
+	 * not appear to be online
+	 */
+	for_each_possible_node(nid) {
+		if (node_online(nid))
+			continue;
+		get_reserved_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+		if (pfn >= start_pfn && pfn <= end_pfn)
+			return nid;
+	}
+	return NUMA_NO_NODE;
+}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
 #endif /* CONFIG_MEMBLOCK_NUMA_INFO */
 
 /*


  parent reply index

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-12 16:26 [PATCH v2 00/22] device-dax: Support sub-dividing soft-reserved ranges Dan Williams
2020-07-12 16:26 ` [PATCH v2 01/22] x86/numa: Cleanup configuration dependent command-line options Dan Williams
2020-07-12 16:26 ` [PATCH v2 02/22] x86/numa: Add 'nohmat' option Dan Williams
2020-07-12 16:58   ` Randy Dunlap
2020-07-12 16:26 ` [PATCH v2 03/22] efi/fake_mem: Arrange for a resource entry per efi_fake_mem instance Dan Williams
2020-07-12 16:26 ` [PATCH v2 04/22] ACPI: HMAT: Refactor hmat_register_target_device to hmem_register_device Dan Williams
2020-07-12 16:26 ` [PATCH v2 05/22] resource: Report parent to walk_iomem_res_desc() callback Dan Williams
2020-07-12 16:26 ` [PATCH v2 06/22] x86: Move NUMA_KEEP_MEMINFO and related definition to x86-internals Dan Williams
2020-07-12 16:26 ` [PATCH v2 07/22] numa: Introduce a generic memory_add_physaddr_to_nid() Dan Williams
2020-07-13  6:58   ` Mike Rapoport
2020-07-13 15:42     ` Dan Williams
2020-07-12 16:26 ` Dan Williams [this message]
2020-07-13  7:03   ` [PATCH v2 08/22] memblock: Introduce a generic phys_addr_to_target_node() Mike Rapoport
2020-07-13 15:48     ` Dan Williams
2020-07-14  1:36       ` Justin He
2020-07-12 16:26 ` [PATCH v2 09/22] arm64: Convert to generic memblock for numa-info Dan Williams
2020-07-12 16:26 ` [PATCH v2 10/22] ACPI: HMAT: Attach a device for each soft-reserved range Dan Williams
2020-07-12 16:27 ` [PATCH v2 11/22] device-dax: Drop the dax_region.pfn_flags attribute Dan Williams
2020-07-12 16:27 ` [PATCH v2 12/22] device-dax: Move instance creation parameters to 'struct dev_dax_data' Dan Williams
2020-07-12 16:27 ` [PATCH v2 13/22] device-dax: Make pgmap optional for instance creation Dan Williams
2020-07-12 16:27 ` [PATCH v2 14/22] device-dax: Kill dax_kmem_res Dan Williams
2020-07-12 16:27 ` [PATCH v2 15/22] device-dax: Add an allocation interface for device-dax instances Dan Williams
2020-07-12 16:27 ` [PATCH v2 16/22] device-dax: Introduce 'seed' devices Dan Williams
2020-07-12 16:27 ` [PATCH v2 17/22] drivers/base: Make device_find_child_by_name() compatible with sysfs inputs Dan Williams
2020-07-12 17:09   ` Greg Kroah-Hartman
2020-07-13 15:39     ` Dan Williams
2020-07-13 15:52       ` Greg Kroah-Hartman
2020-07-13 16:09         ` Dan Williams
2020-07-13 16:12           ` Greg Kroah-Hartman
2020-07-13 16:36             ` Dan Williams
2020-07-12 16:27 ` [PATCH v2 18/22] device-dax: Add resize support Dan Williams
2020-07-12 16:27 ` [PATCH v2 19/22] mm/memremap_pages: Convert to 'struct range' Dan Williams
2020-07-13 16:36   ` Ralph Campbell
2020-07-13 16:54     ` Dan Williams
2020-07-12 16:27 ` [PATCH v2 20/22] mm/memremap_pages: Support multiple ranges per invocation Dan Williams
2020-07-12 16:27 ` [PATCH v2 21/22] device-dax: Add dis-contiguous resource support Dan Williams
2020-07-12 16:28 ` [PATCH v2 22/22] device-dax: Introduce 'mapping' devices Dan Williams
2020-07-16 13:18   ` Joao Martins
2020-07-16 16:00     ` Dan Williams
2020-07-16 19:04       ` Joao Martins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=159457120893.754248.7783260004248722175.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=hch@lst.de \
    --cc=joao.m.martins@oracle.com \
    --cc=justin.he@arm.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=peterz@infradead.org \
    --cc=rppt@linux.ibm.com \
    --cc=vishal.l.verma@intel.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-ACPI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-acpi/0 linux-acpi/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-acpi linux-acpi/ https://lore.kernel.org/linux-acpi \
		linux-acpi@vger.kernel.org
	public-inbox-index linux-acpi

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-acpi


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git