LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
To: akpm@linux-foundation.org, linux-mm@kvack.org
Cc: sparclinux@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-nvdimm@lists.01.org, davem@davemloft.net,
	pavel.tatashin@microsoft.com, mhocko@suse.com, mingo@kernel.org,
	kirill.shutemov@linux.intel.com, dan.j.williams@intel.com,
	dave.jiang@intel.com, alexander.h.duyck@linux.intel.com,
	rppt@linux.vnet.ibm.com, willy@infradead.org, vbabka@suse.cz,
	khalid.aziz@oracle.com, ldufour@linux.vnet.ibm.com,
	mgorman@techsingularity.net, yi.z.zhang@linux.intel.com,
	alexander.h.duyck@linux.intel.com
Subject: [mm PATCH v5 6/7] mm: Add reserved flag setting to set_page_links
Date: Mon, 05 Nov 2018 13:19:56 -0800
Message-ID: <154145279604.30046.5646399488589213615.stgit@ahduyck-desk1.jf.intel.com> (raw)
In-Reply-To: <154145268025.30046.11742652345962594283.stgit@ahduyck-desk1.jf.intel.com>

This patch modifies the set_page_links function to include the setting of
the reserved flag via a simple AND and OR operation. The motivation for
this is the fact that the existing __set_bit call still seems to have
effects on performance as replacing the call with the AND and OR can reduce
initialization time.

Looking over the assembly code before and after the change the main
difference between the two is that the reserved bit is stored in a value
that is generated outside of the main initialization loop and is then
written with the other flags field values in one write to the page->flags
value. Previously the generated value was written and then then a btsq
instruction was issued.

On my x86_64 test system with 3TB of persistent memory per node I saw the
persistent memory initialization time on average drop from 23.49s to
19.12s per node.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 include/linux/mm.h |    9 ++++++++-
 mm/page_alloc.c    |   29 +++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 288c407c08fc..de6535a98e45 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node)
 	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 
+static inline void set_page_reserved(struct page *page, bool reserved)
+{
+	page->flags &= ~(1ul << PG_reserved);
+	page->flags |= (unsigned long)(!!reserved) << PG_reserved;
+}
+
 static inline void set_page_links(struct page *page, enum zone_type zone,
-	unsigned long node, unsigned long pfn)
+	unsigned long node, unsigned long pfn, bool reserved)
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
+	set_page_reserved(page, reserved);
 #ifdef SECTION_IN_PAGE_FLAGS
 	set_page_section(page, pfn_to_section_nr(pfn));
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dbe00c1a0e23..9eb993a9be99 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 				unsigned long zone, int nid)
 {
 	mm_zero_struct_page(page);
-	set_page_links(page, zone, nid, pfn);
+	set_page_links(page, zone, nid, pfn, false);
 	init_page_count(page);
 	page_mapcount_reset(page);
 	page_cpupid_reset_last(page);
@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 static void __meminit __init_pageblock(unsigned long start_pfn,
 				       unsigned long nr_pages,
 				       unsigned long zone, int nid,
-				       struct dev_pagemap *pgmap)
+				       struct dev_pagemap *pgmap,
+				       bool is_reserved)
 {
 	unsigned long nr_pgmask = pageblock_nr_pages - 1;
 	struct page *start_page = pfn_to_page(start_pfn);
@@ -1231,18 +1232,15 @@ static void __meminit __init_pageblock(unsigned long start_pfn,
 		 * call because of the fact that the pfn number is used to
 		 * get the section_nr and this function should not be
 		 * spanning more than a single section.
+		 *
+		 * We can use a non-atomic operation for setting the
+		 * PG_reserved flag as we are still initializing the pages.
 		 */
-		set_page_links(page, zone, nid, start_pfn);
+		set_page_links(page, zone, nid, start_pfn, is_reserved);
 		init_page_count(page);
 		page_mapcount_reset(page);
 		page_cpupid_reset_last(page);
 
-		/*
-		 * We can use the non-atomic __set_bit operation for setting
-		 * the flag as we are still initializing the pages.
-		 */
-		__SetPageReserved(page);
-
 		/*
 		 * ZONE_DEVICE pages union ->lru with a ->pgmap back
 		 * pointer and hmm_data.  It is a bug if a ZONE_DEVICE
@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid,
 		pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);
 		stride -= pfn;
 
-		__init_pageblock(pfn, stride, zone, nid, pgmap);
+		/*
+		 * The last argument of __init_pageblock is a boolean
+		 * value indicating if the page will be marked as reserved.
+		 *
+		 * Mark page reserved as it will need to wait for onlining
+		 * phase for it to be fully associated with a zone.
+		 *
+		 * Under certain circumstances ZONE_DEVICE pages may not
+		 * need to be marked as reserved, however there is still
+		 * code that is depending on this being set for now.
+		 */
+		__init_pageblock(pfn, stride, zone, nid, pgmap, true);
 
 		cond_resched();
 	}


  parent reply index

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-05 21:19 [mm PATCH v5 0/7] Deferred page init improvements Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 1/7] mm: Use mm_zero_struct_page from SPARC on all 64b architectures Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 2/7] mm: Drop meminit_pfn_in_nid as it is redundant Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 3/7] mm: Implement new zone specific memblock iterator Alexander Duyck
2018-11-09 23:26   ` Pavel Tatashin
2018-11-09 23:58     ` Alexander Duyck
2018-11-10  0:11       ` Pavel Tatashin
2018-11-05 21:19 ` [mm PATCH v5 4/7] mm: Initialize MAX_ORDER_NR_PAGES at a time instead of doing larger sections Alexander Duyck
2018-11-10  1:02   ` Pavel Tatashin
2018-11-19 18:53     ` Alexander Duyck
2018-11-05 21:19 ` [mm PATCH v5 5/7] mm: Move hot-plug specific memory init into separate functions and optimize Alexander Duyck
2018-11-10  2:07   ` Pavel Tatashin
2018-11-05 21:19 ` Alexander Duyck [this message]
2018-11-10  2:11   ` [mm PATCH v5 6/7] mm: Add reserved flag setting to set_page_links Pavel Tatashin
2018-11-05 21:20 ` [mm PATCH v5 7/7] mm: Use common iterator for deferred_init_pages and deferred_free_pages Alexander Duyck
2018-11-10  4:13   ` Pavel Tatashin
2018-11-12 15:12     ` Alexander Duyck
2018-11-09 21:15 ` [mm PATCH v5 0/7] Deferred page init improvements Pavel Tatashin
2018-11-09 23:14   ` Alexander Duyck
2018-11-10  0:00     ` Pavel Tatashin
2018-11-10  0:46       ` Alexander Duyck
2018-11-10  1:16         ` Pavel Tatashin
2018-11-12 19:10           ` Alexander Duyck
2018-11-12 20:37             ` Pavel Tatashin
2018-11-12 16:25       ` Daniel Jordan
2018-11-14 15:07 ` Michal Hocko
2018-11-14 19:12   ` Pavel Tatashin
2018-11-14 21:35     ` Michal Hocko
2018-11-15  0:50   ` Alexander Duyck
2018-11-15  1:55     ` Mike Rapoport
2018-11-15 19:09       ` Mike Rapoport
2018-11-15  8:10     ` Michal Hocko
2018-11-15 16:02       ` Alexander Duyck
2018-11-15 16:40         ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=154145279604.30046.5646399488589213615.stgit@ahduyck-desk1.jf.intel.com \
    --to=alexander.h.duyck@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=davem@davemloft.net \
    --cc=khalid.aziz@oracle.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=ldufour@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=mingo@kernel.org \
    --cc=pavel.tatashin@microsoft.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=sparclinux@vger.kernel.org \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=yi.z.zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git