linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Huaisheng HS1 Ye <yehs1@lenovo.com>
To: "akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: "mhocko@suse.com" <mhocko@suse.com>,
	"willy@infradead.org" <willy@infradead.org>,
	"vbabka@suse.cz" <vbabka@suse.cz>,
	"mgorman@techsingularity.net" <mgorman@techsingularity.net>,
	"pasha.tatashin@oracle.com" <pasha.tatashin@oracle.com>,
	"alexander.levin@verizon.com" <alexander.levin@verizon.com>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	"penguin-kernel@I-love.SAKURA.ne.jp"
	<penguin-kernel@I-love.SAKURA.ne.jp>,
	"colyli@suse.de" <colyli@suse.de>,
	NingTing Cheng <chengnt@lenovo.com>,
	Ocean HY1 He <hehy1@lenovo.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-nvdimm@lists.01.org" <linux-nvdimm@lists.01.org>
Subject: [External]  [RFC PATCH v1 3/6] mm, zone_type: create ZONE_NVM and fill into GFP_ZONE_TABLE
Date: Tue, 8 May 2018 02:33:30 +0000	[thread overview]
Message-ID: <HK2PR03MB1684653383FFEDAE9B41A548929A0@HK2PR03MB1684.apcprd03.prod.outlook.com> (raw)
In-Reply-To: <1525746628-114136-4-git-send-email-yehs1@lenovo.com>

Expand ZONE_NVM into enum zone_type, and create GFP_NVM
which represents gfp_t flag for NVM zone.

Because there is no lower plain integer GFP bitmask can be
used for ___GFP_NVM, a workable way is to get space from
GFP_ZONE_BAD to fill ZONE_NVM into GFP_ZONE_TABLE.

Signed-off-by: Huaisheng Ye <yehs1@lenovo.com>
Signed-off-by: Ocean He <hehy1@lenovo.com>
---
 include/linux/gfp.h    | 57 +++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/mmzone.h |  3 +++
 mm/Kconfig             | 16 ++++++++++++++
 mm/page_alloc.c        |  3 +++
 4 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..9e4d867 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,6 +39,9 @@
 #define ___GFP_DIRECT_RECLAIM	0x400000u
 #define ___GFP_WRITE		0x800000u
 #define ___GFP_KSWAPD_RECLAIM	0x1000000u
+#ifdef CONFIG_ZONE_NVM
+#define ___GFP_NVM		0x4000000u
+#endif
 #ifdef CONFIG_LOCKDEP
 #define ___GFP_NOLOCKDEP	0x2000000u
 #else
@@ -57,7 +60,12 @@
 #define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
 #define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
+#ifdef CONFIG_ZONE_NVM
+#define __GFP_NVM	((__force gfp_t)___GFP_NVM)  /* ZONE_NVM allowed */
+#define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE|__GFP_NVM)
+#else
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#endif
 
 /*
  * Page mobility and placement hints
@@ -205,7 +213,8 @@
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP) + \
+				(IS_ENABLED(CONFIG_ZONE_NVM) << 1))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /*
@@ -283,6 +292,9 @@
 #define GFP_TRANSHUGE_LIGHT	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE	(GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+#ifdef CONFIG_ZONE_NVM
+#define GFP_NVM		__GFP_NVM
+#endif
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -342,7 +354,7 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
  *       0x0    => NORMAL
  *       0x1    => DMA or NORMAL
  *       0x2    => HIGHMEM or NORMAL
- *       0x3    => BAD (DMA+HIGHMEM)
+ *       0x3    => NVM (DMA+HIGHMEM), now it is used by NVDIMM zone
  *       0x4    => DMA32 or DMA or NORMAL
  *       0x5    => BAD (DMA+DMA32)
  *       0x6    => BAD (HIGHMEM+DMA32)
@@ -370,6 +382,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 #error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
 #endif
 
+#ifdef CONFIG_ZONE_NVM
+#define ___GFP_NVM_BIT (___GFP_DMA | ___GFP_HIGHMEM)
+#define GFP_ZONE_TABLE ( \
+	((__force unsigned long)ZONE_NORMAL <<				       \
+			0 * GFP_ZONES_SHIFT)				       \
+	| ((__force unsigned long)OPT_ZONE_DMA <<			       \
+			___GFP_DMA * GFP_ZONES_SHIFT)			       \
+	| ((__force unsigned long)OPT_ZONE_HIGHMEM <<			       \
+			___GFP_HIGHMEM * GFP_ZONES_SHIFT)		       \
+	| ((__force unsigned long)OPT_ZONE_DMA32 <<			       \
+			___GFP_DMA32 * GFP_ZONES_SHIFT)			       \
+	| ((__force unsigned long)ZONE_NORMAL <<			       \
+			___GFP_MOVABLE * GFP_ZONES_SHIFT)		       \
+	| ((__force unsigned long)OPT_ZONE_DMA <<			       \
+			(___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT)       \
+	| ((__force unsigned long)ZONE_MOVABLE <<			       \
+			(___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)   \
+	| ((__force unsigned long)OPT_ZONE_DMA32 <<			       \
+			(___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)     \
+	| ((__force unsigned long)ZONE_NVM <<				       \
+			___GFP_NVM_BIT * GFP_ZONES_SHIFT)                      \
+)
+#else
 #define GFP_ZONE_TABLE ( \
 	(ZONE_NORMAL << 0 * GFP_ZONES_SHIFT)				       \
 	| (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT)		       \
@@ -380,6 +415,7 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\
 	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\
 )
+#endif
 
 /*
  * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
@@ -387,6 +423,17 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
  * entry starting with bit 0. Bit is set if the combination is not
  * allowed.
  */
+#ifdef CONFIG_ZONE_NVM
+#define GFP_ZONE_BAD ( \
+	1 << (___GFP_DMA | ___GFP_DMA32)				      \
+	| 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)				      \
+	| 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
+)
+#else
 #define GFP_ZONE_BAD ( \
 	1 << (___GFP_DMA | ___GFP_HIGHMEM)				      \
 	| 1 << (___GFP_DMA | ___GFP_DMA32)				      \
@@ -397,12 +444,16 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
 	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
 )
+#endif
 
 static inline enum zone_type gfp_zone(gfp_t flags)
 {
 	enum zone_type z;
 	int bit = (__force int) (flags & GFP_ZONEMASK);
-
+#ifdef CONFIG_ZONE_NVM
+	if (bit & __GFP_NVM)
+		bit = (__force int)___GFP_NVM_BIT;
+#endif
 	z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
 					 ((1 << GFP_ZONES_SHIFT) - 1);
 	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7522a69..f38e4a0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -345,6 +345,9 @@ enum zone_type {
 	 */
 	ZONE_HIGHMEM,
 #endif
+#ifdef CONFIG_ZONE_NVM
+	ZONE_NVM,
+#endif
 	ZONE_MOVABLE,
 #ifdef CONFIG_ZONE_DEVICE
 	ZONE_DEVICE,
diff --git a/mm/Kconfig b/mm/Kconfig
index c782e8f..5fe1f63 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -687,6 +687,22 @@ config ZONE_DEVICE
 
 	  If FS_DAX is enabled, then say Y.
 
+config ZONE_NVM
+	bool "Manage NVDIMM (pmem) by memory management (EXPERIMENTAL)"
+	depends on NUMA && X86_64
+	depends on HAVE_MEMBLOCK_NODE_MAP
+	depends on HAVE_MEMBLOCK
+	depends on !IA32_EMULATION
+	default n
+
+	help
+	  This option allows you to use memory management subsystem to manage
+	  NVDIMM (pmem). With it mm can arrange NVDIMMs into real physical zones
+	  like NORMAL and DMA32. That means buddy system and swap can be used
+	  directly to NVDIMM zone. This feature is beneficial to recover
+	  dirty pages from power fail or system crash by storing write cache
+	  to NVDIMM zone.
+
 config ARCH_HAS_HMM
 	bool
 	default y
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 266c065..d8bd20d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -228,6 +228,9 @@ bool pm_suspended_storage(void)
 	 "DMA32",
 #endif
 	 "Normal",
+#ifdef CONFIG_ZONE_NVM
+	 "NVM",
+#endif
 #ifdef CONFIG_HIGHMEM
 	 "HighMem",
 #endif
-- 
1.8.3.1

  parent reply	other threads:[~2018-05-08  2:33 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-08  2:30 [RFC PATCH v1 0/6] use mm to manage NVDIMM (pmem) zone Huaisheng Ye
     [not found] ` <1525746628-114136-2-git-send-email-yehs1@lenovo.com>
2018-05-08  2:30   ` [External] [RFC PATCH v1 1/6] mm/memblock: Expand definition of flags to support NVDIMM Huaisheng HS1 Ye
2018-05-08  2:30 ` [RFC PATCH v1 4/6] arch/x86/kernel: mark NVDIMM regions from e820_table Huaisheng Ye
     [not found] ` <1525746628-114136-3-git-send-email-yehs1@lenovo.com>
2018-05-08  2:32   ` [External] [RFC PATCH v1 2/6] mm/page_alloc.c: get pfn range with flags of memblock Huaisheng HS1 Ye
     [not found] ` <1525746628-114136-4-git-send-email-yehs1@lenovo.com>
2018-05-08  2:33   ` Huaisheng HS1 Ye [this message]
2018-05-08  4:43     ` [External] [RFC PATCH v1 3/6] mm, zone_type: create ZONE_NVM and fill into GFP_ZONE_TABLE Randy Dunlap
2018-05-09  4:22       ` Huaisheng HS1 Ye
2018-05-09 11:47         ` Michal Hocko
2018-05-09 14:04           ` Huaisheng HS1 Ye
2018-05-09 20:56             ` Michal Hocko
2018-05-10  3:53               ` Huaisheng HS1 Ye
     [not found] ` <1525746628-114136-6-git-send-email-yehs1@lenovo.com>
2018-05-08  2:34   ` [External] [RFC PATCH v1 5/6] mm: get zone spanned pages separately for DRAM and NVDIMM Huaisheng HS1 Ye
     [not found] ` <1525746628-114136-7-git-send-email-yehs1@lenovo.com>
2018-05-08  2:35   ` [External] [RFC PATCH v1 6/6] arch/x86/mm: create page table mapping for DRAM and NVDIMM both Huaisheng HS1 Ye
2018-05-10  7:57 ` [RFC PATCH v1 0/6] use mm to manage NVDIMM (pmem) zone Michal Hocko
2018-05-10  8:41   ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=HK2PR03MB1684653383FFEDAE9B41A548929A0@HK2PR03MB1684.apcprd03.prod.outlook.com \
    --to=yehs1@lenovo.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.levin@verizon.com \
    --cc=chengnt@lenovo.com \
    --cc=colyli@suse.de \
    --cc=hannes@cmpxchg.org \
    --cc=hehy1@lenovo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=pasha.tatashin@oracle.com \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).