All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: mhocko@suse.com, Pavel Tatashin <pasha.tatashin@soleen.com>,
	linux-nvdimm@lists.01.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Vlastimil Babka <vbabka@suse.cz>,
	osalvador@suse.de
Subject: [PATCH v8 03/12] mm/sparsemem: Add helpers track active portions of a section at boot
Date: Mon, 06 May 2019 16:39:42 -0700	[thread overview]
Message-ID: <155718598213.130019.10989541248734713186.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <155718596657.130019.17139634728875079809.stgit@dwillia2-desk3.amr.corp.intel.com>

Prepare for hot{plug,remove} of sub-ranges of a section by tracking a
sub-section active bitmask, each bit representing a PMD_SIZE span of the
architecture's memory hotplug section size.

The implications of a partially populated section is that pfn_valid()
needs to go beyond a valid_section() check and read the sub-section
active ranges from the bitmask. The expectation is that the bitmask
(subsection_map) fits in the same cacheline as the valid_section() data,
so the incremental performance overhead to pfn_valid() should be
negligible.

Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Tested-by: Jane Chu <jane.chu@oracle.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/mmzone.h |   29 ++++++++++++++++++++++++++++-
 mm/page_alloc.c        |    4 +++-
 mm/sparse.c            |   29 +++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ac163f2f274f..6dd52d544857 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1199,6 +1199,8 @@ struct mem_section_usage {
 	unsigned long pageblock_flags[0];
 };
 
+void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
+
 struct page;
 struct page_ext;
 struct mem_section {
@@ -1336,12 +1338,36 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 
 extern int __highest_present_section_nr;
 
+static inline int subsection_map_index(unsigned long pfn)
+{
+	return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+{
+	int idx = subsection_map_index(pfn);
+
+	return test_bit(idx, ms->usage->subsection_map);
+}
+#else
+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+{
+	return 1;
+}
+#endif
+
 #ifndef CONFIG_HAVE_ARCH_PFN_VALID
 static inline int pfn_valid(unsigned long pfn)
 {
+	struct mem_section *ms;
+
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
-	return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
+	ms = __nr_to_section(pfn_to_section_nr(pfn));
+	if (!valid_section(ms))
+		return 0;
+	return pfn_section_valid(ms, pfn);
 }
 #endif
 
@@ -1373,6 +1399,7 @@ void sparse_init(void);
 #define sparse_init()	do {} while (0)
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #define pfn_present pfn_valid
+#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 61c2b54a5b61..13816c5a51eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7291,10 +7291,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 
 	/* Print out the early node map */
 	pr_info("Early memory node ranges\n");
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
 		pr_info("  node %3d: [mem %#018Lx-%#018Lx]\n", nid,
 			(u64)start_pfn << PAGE_SHIFT,
 			((u64)end_pfn << PAGE_SHIFT) - 1);
+		subsection_map_init(start_pfn, end_pfn - start_pfn);
+	}
 
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
diff --git a/mm/sparse.c b/mm/sparse.c
index f87de7ad32c8..ac47a48050c7 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -210,6 +210,35 @@ static inline unsigned long first_present_section_nr(void)
 	return next_present_section_nr(-1);
 }
 
+void subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+{
+	int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
+	int i, start_sec = pfn_to_section_nr(pfn);
+
+	if (!nr_pages)
+		return;
+
+	for (i = start_sec; i <= end_sec; i++) {
+		int idx, end;
+		unsigned long pfns;
+		struct mem_section *ms;
+
+		idx = subsection_map_index(pfn);
+		pfns = min(nr_pages, PAGES_PER_SECTION
+				- (pfn & ~PAGE_SECTION_MASK));
+		end = subsection_map_index(pfn + pfns - 1);
+
+		ms = __nr_to_section(i);
+		bitmap_set(ms->usage->subsection_map, idx, end - idx + 1);
+
+		pr_debug("%s: sec: %d pfns: %ld set(%d, %d)\n", __func__, i,
+				pfns, idx, end - idx + 1);
+
+		pfn += pfns;
+		nr_pages -= pfns;
+	}
+}
+
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: Michal Hocko <mhocko@suse.com>, Vlastimil Babka <vbabka@suse.cz>,
	Logan Gunthorpe <logang@deltatee.com>,
	Oscar Salvador <osalvador@suse.de>,
	Pavel Tatashin <pasha.tatashin@soleen.com>,
	Jane Chu <jane.chu@oracle.com>,
	linux-nvdimm@lists.01.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, osalvador@suse.de, mhocko@suse.com
Subject: [PATCH v8 03/12] mm/sparsemem: Add helpers track active portions of a section at boot
Date: Mon, 06 May 2019 16:39:42 -0700	[thread overview]
Message-ID: <155718598213.130019.10989541248734713186.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <155718596657.130019.17139634728875079809.stgit@dwillia2-desk3.amr.corp.intel.com>

Prepare for hot{plug,remove} of sub-ranges of a section by tracking a
sub-section active bitmask, each bit representing a PMD_SIZE span of the
architecture's memory hotplug section size.

The implications of a partially populated section is that pfn_valid()
needs to go beyond a valid_section() check and read the sub-section
active ranges from the bitmask. The expectation is that the bitmask
(subsection_map) fits in the same cacheline as the valid_section() data,
so the incremental performance overhead to pfn_valid() should be
negligible.

Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Tested-by: Jane Chu <jane.chu@oracle.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/mmzone.h |   29 ++++++++++++++++++++++++++++-
 mm/page_alloc.c        |    4 +++-
 mm/sparse.c            |   29 +++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ac163f2f274f..6dd52d544857 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1199,6 +1199,8 @@ struct mem_section_usage {
 	unsigned long pageblock_flags[0];
 };
 
+void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
+
 struct page;
 struct page_ext;
 struct mem_section {
@@ -1336,12 +1338,36 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 
 extern int __highest_present_section_nr;
 
+static inline int subsection_map_index(unsigned long pfn)
+{
+	return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+{
+	int idx = subsection_map_index(pfn);
+
+	return test_bit(idx, ms->usage->subsection_map);
+}
+#else
+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+{
+	return 1;
+}
+#endif
+
 #ifndef CONFIG_HAVE_ARCH_PFN_VALID
 static inline int pfn_valid(unsigned long pfn)
 {
+	struct mem_section *ms;
+
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
-	return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
+	ms = __nr_to_section(pfn_to_section_nr(pfn));
+	if (!valid_section(ms))
+		return 0;
+	return pfn_section_valid(ms, pfn);
 }
 #endif
 
@@ -1373,6 +1399,7 @@ void sparse_init(void);
 #define sparse_init()	do {} while (0)
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #define pfn_present pfn_valid
+#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 61c2b54a5b61..13816c5a51eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7291,10 +7291,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 
 	/* Print out the early node map */
 	pr_info("Early memory node ranges\n");
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
 		pr_info("  node %3d: [mem %#018Lx-%#018Lx]\n", nid,
 			(u64)start_pfn << PAGE_SHIFT,
 			((u64)end_pfn << PAGE_SHIFT) - 1);
+		subsection_map_init(start_pfn, end_pfn - start_pfn);
+	}
 
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
diff --git a/mm/sparse.c b/mm/sparse.c
index f87de7ad32c8..ac47a48050c7 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -210,6 +210,35 @@ static inline unsigned long first_present_section_nr(void)
 	return next_present_section_nr(-1);
 }
 
+void subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+{
+	int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
+	int i, start_sec = pfn_to_section_nr(pfn);
+
+	if (!nr_pages)
+		return;
+
+	for (i = start_sec; i <= end_sec; i++) {
+		int idx, end;
+		unsigned long pfns;
+		struct mem_section *ms;
+
+		idx = subsection_map_index(pfn);
+		pfns = min(nr_pages, PAGES_PER_SECTION
+				- (pfn & ~PAGE_SECTION_MASK));
+		end = subsection_map_index(pfn + pfns - 1);
+
+		ms = __nr_to_section(i);
+		bitmap_set(ms->usage->subsection_map, idx, end - idx + 1);
+
+		pr_debug("%s: sec: %d pfns: %ld set(%d, %d)\n", __func__, i,
+				pfns, idx, end - idx + 1);
+
+		pfn += pfns;
+		nr_pages -= pfns;
+	}
+}
+
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {


  parent reply	other threads:[~2019-05-06 23:53 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-06 23:39 [PATCH v8 00/12] mm: Sub-section memory hotplug support Dan Williams
2019-05-06 23:39 ` Dan Williams
2019-05-06 23:39 ` [PATCH v8 01/12] mm/sparsemem: Introduce struct mem_section_usage Dan Williams
2019-05-06 23:39   ` Dan Williams
2019-05-10 13:30   ` osalvador
2019-05-10 19:38     ` Dan Williams
2019-05-10 19:38       ` Dan Williams
2019-05-06 23:39 ` [PATCH v8 02/12] mm/memremap: Rename and consolidate SECTION_SIZE Dan Williams
2019-05-06 23:39   ` Dan Williams
2019-05-06 23:39 ` Dan Williams [this message]
2019-05-06 23:39   ` [PATCH v8 03/12] mm/sparsemem: Add helpers track active portions of a section at boot Dan Williams
2019-05-10 12:56   ` osalvador
2019-05-06 23:39 ` [PATCH v8 04/12] mm/hotplug: Prepare shrink_{zone, pgdat}_span for sub-section removal Dan Williams
2019-05-06 23:39   ` Dan Williams
2019-05-06 23:39 ` [PATCH v8 05/12] mm/sparsemem: Convert kmalloc_section_memmap() to populate_section_memmap() Dan Williams
2019-05-06 23:39   ` Dan Williams
2019-05-06 23:39 ` [PATCH v8 06/12] mm/hotplug: Kill is_dev_zone() usage in __remove_pages() Dan Williams
2019-05-06 23:39   ` Dan Williams
2019-05-06 23:40 ` [PATCH v8 07/12] mm: Kill is_dev_zone() helper Dan Williams
2019-05-06 23:40   ` Dan Williams
2019-05-06 23:40 ` [PATCH v8 08/12] mm/sparsemem: Prepare for sub-section ranges Dan Williams
2019-05-06 23:40   ` Dan Williams
2019-05-10 13:00   ` osalvador
2019-05-10 13:00     ` osalvador
2019-05-06 23:40 ` [PATCH v8 09/12] mm/sparsemem: Support sub-section hotplug Dan Williams
2019-05-06 23:40   ` Dan Williams
2019-05-08 23:15   ` Oscar Salvador
2019-05-08 23:15     ` Oscar Salvador
2019-05-08 23:15     ` Oscar Salvador
2019-05-13 13:54   ` Oscar Salvador
2019-06-04  4:47     ` Dan Williams
2019-06-04  4:47       ` Dan Williams
2019-06-04  4:47       ` Dan Williams
2019-05-06 23:40 ` [PATCH v8 10/12] mm/devm_memremap_pages: Enable sub-section remap Dan Williams
2019-05-06 23:40   ` Dan Williams
2019-05-06 23:40 ` [PATCH v8 11/12] libnvdimm/pfn: Fix fsdax-mode namespace info-block zero-fields Dan Williams
2019-05-06 23:40   ` Dan Williams
2019-05-06 23:40 ` [PATCH v8 12/12] libnvdimm/pfn: Stop padding pmem namespaces to section alignment Dan Williams
2019-05-06 23:40   ` Dan Williams
2019-05-13 21:01 ` [PATCH v8 00/12] mm: Sub-section memory hotplug support Mike Rapoport
2019-05-13 21:01   ` Mike Rapoport
2019-05-13 21:11   ` Dan Williams
2019-05-13 21:11     ` Dan Williams
2019-06-04  7:41 ` Oscar Salvador

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=155718598213.130019.10989541248734713186.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mhocko@suse.com \
    --cc=osalvador@suse.de \
    --cc=pasha.tatashin@soleen.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.