linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: willy@infradead.org, corbet@lwn.net, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, x86@kernel.org, hpa@zytor.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, rppt@linux.ibm.com,
	akpm@linux-foundation.org, hughd@google.com,
	ebiederm@xmission.com, masahiroy@kernel.org, ardb@kernel.org,
	ndesaulniers@google.com, dima@golovin.in,
	daniel.kiper@oracle.com, nivedita@alum.mit.edu,
	rafael.j.wysocki@intel.com, dan.j.williams@intel.com,
	zhenzhong.duan@oracle.com, jroedel@suse.de, bhe@redhat.com,
	guro@fb.com, Thomas.Lendacky@amd.com,
	andriy.shevchenko@linux.intel.com, keescook@chromium.org,
	hannes@cmpxchg.org, minchan@kernel.org, mhocko@kernel.org,
	ying.huang@intel.com, yang.shi@linux.alibaba.com,
	gustavo@embeddedor.com, ziqian.lzq@antfin.com,
	vdavydov.dev@gmail.com, jason.zeng@intel.com,
	kevin.tian@intel.com, zhiyuan.lv@intel.com, lei.l.li@intel.com,
	paul.c.lai@intel.com, ashok.raj@intel.com,
	linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org,
	kexec@lists.infradead.org
Subject: [RFC 08/43] mm: PKRAM: introduce super block
Date: Wed,  6 May 2020 17:41:34 -0700	[thread overview]
Message-ID: <1588812129-8596-9-git-send-email-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <1588812129-8596-1-git-send-email-anthony.yznaga@oracle.com>

The PKRAM super block is the starting point for restoring preserved
memory. By providing the super block to the new kernel at boot time,
preserved memory can be reserved and made available to be restored.
To point the kernel to the location of the super block, one passes
its pfn via the 'pkram' boot param. For that purpose, the pkram super
block pfn is exported via /sys/kernel/pkram. If none is passed, any
preserved memory will not be kept, and a new super block will be
allocated.

Originally-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 mm/pkram.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 94 insertions(+), 2 deletions(-)

diff --git a/mm/pkram.c b/mm/pkram.c
index 44fadb70acf6..70f2219e6218 100644
--- a/mm/pkram.c
+++ b/mm/pkram.c
@@ -5,15 +5,18 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
+#include <linux/pfn.h>
 #include <linux/pkram.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
 #include <linux/string.h>
+#include <linux/sysfs.h>
 #include <linux/types.h>
 
 #include "internal.h"
@@ -80,12 +83,38 @@ struct pkram_node {
 #define PKRAM_ACCMODE_MASK	3
 
 /*
+ * The PKRAM super block contains data needed to restore the preserved memory
+ * structure on boot. The pointer to it (pfn) should be passed via the 'pkram'
+ * boot param if one wants to restore preserved data saved by the previously
+ * executing kernel. For that purpose the kernel exports the pfn via
+ * /sys/kernel/pkram. If none is passed, preserved memory if any will not be
+ * preserved and a new clean page will be allocated for the super block.
+ *
+ * The structure occupies a memory page.
+ */
+struct pkram_super_block {
+	__u64	node_pfn;		/* first element of the node list */
+};
+
+static unsigned long pkram_sb_pfn __initdata;
+static struct pkram_super_block *pkram_sb;
+
+/*
  * For convenience sake PKRAM nodes are kept in an auxiliary doubly-linked list
  * connected through the lru field of the page struct.
  */
 static LIST_HEAD(pkram_nodes);			/* linked through page::lru */
 static DEFINE_MUTEX(pkram_mutex);		/* serializes open/close */
 
+/*
+ * The PKRAM super block pfn, see above.
+ */
+static int __init parse_pkram_sb_pfn(char *arg)
+{
+	return kstrtoul(arg, 16, &pkram_sb_pfn);
+}
+early_param("pkram", parse_pkram_sb_pfn);
+
 static inline struct page *pkram_alloc_page(gfp_t gfp_mask)
 {
 	return alloc_page(gfp_mask);
@@ -209,6 +238,7 @@ static void pkram_stream_init_obj(struct pkram_stream *ps, struct pkram_obj *obj
  * @gfp_mask specifies the memory allocation mask to be used when saving data.
  *
  * Error values:
+ *	%ENODEV: PKRAM not available
  *	%ENAMETOOLONG: name len >= PKRAM_NAME_MAX
  *	%ENOMEM: insufficient memory available
  *	%EEXIST: node with specified name already exists
@@ -224,6 +254,9 @@ int pkram_prepare_save(struct pkram_stream *ps, const char *name, gfp_t gfp_mask
 	struct pkram_node *node;
 	int err = 0;
 
+	if (!pkram_sb)
+		return -ENODEV;
+
 	if (strlen(name) >= PKRAM_NAME_MAX)
 		return -ENAMETOOLONG;
 
@@ -333,6 +366,7 @@ void pkram_discard_save(struct pkram_stream *ps)
  * Returns 0 on success, -errno on failure.
  *
  * Error values:
+ *	%ENODEV: PKRAM not available
  *	%ENOENT: node with specified name does not exist
  *	%EBUSY: save to required node has not finished yet
  *
@@ -343,6 +377,9 @@ int pkram_prepare_load(struct pkram_stream *ps, const char *name)
 	struct pkram_node *node;
 	int err = 0;
 
+	if (!pkram_sb)
+		return -ENODEV;
+
 	mutex_lock(&pkram_mutex);
 	node = pkram_find_node(name);
 	if (!node) {
@@ -708,6 +745,7 @@ static void __pkram_reboot(void)
 		node->node_pfn = node_pfn;
 		node_pfn = page_to_pfn(page);
 	}
+	pkram_sb->node_pfn = node_pfn;
 }
 
 static int pkram_reboot(struct notifier_block *notifier,
@@ -715,7 +753,8 @@ static int pkram_reboot(struct notifier_block *notifier,
 {
 	if (val != SYS_RESTART)
 		return NOTIFY_DONE;
-	__pkram_reboot();
+	if (pkram_sb)
+		__pkram_reboot();
 	return NOTIFY_OK;
 }
 
@@ -723,9 +762,62 @@ static struct notifier_block pkram_reboot_notifier = {
 	.notifier_call = pkram_reboot,
 };
 
+static ssize_t show_pkram_sb_pfn(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	unsigned long pfn = pkram_sb ? PFN_DOWN(__pa(pkram_sb)) : 0;
+
+	return sprintf(buf, "%lx\n", pfn);
+}
+
+static struct kobj_attribute pkram_sb_pfn_attr =
+	__ATTR(pkram, 0444, show_pkram_sb_pfn, NULL);
+
+static struct attribute *pkram_attrs[] = {
+	&pkram_sb_pfn_attr.attr,
+	NULL,
+};
+
+static struct attribute_group pkram_attr_group = {
+	.attrs = pkram_attrs,
+};
+
+/* returns non-zero on success */
+static int __init pkram_init_sb(void)
+{
+	unsigned long pfn;
+	struct pkram_node *node;
+
+	if (!pkram_sb) {
+		struct page *page;
+
+		page = pkram_alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page) {
+			pr_err("PKRAM: Failed to allocate super block\n");
+			return 0;
+		}
+		pkram_sb = page_address(page);
+	}
+
+	/*
+	 * Build auxiliary doubly-linked list of nodes connected through
+	 * page::lru for convenience sake.
+	 */
+	pfn = pkram_sb->node_pfn;
+	while (pfn) {
+		node = pfn_to_kaddr(pfn);
+		pkram_insert_node(node);
+		pfn = node->node_pfn;
+	}
+	return 1;
+}
+
 static int __init pkram_init(void)
 {
-	register_reboot_notifier(&pkram_reboot_notifier);
+	if (pkram_init_sb()) {
+		register_reboot_notifier(&pkram_reboot_notifier);
+		sysfs_update_group(kernel_kobj, &pkram_attr_group);
+	}
 	return 0;
 }
 module_init(pkram_init);
-- 
2.13.3



  parent reply	other threads:[~2020-05-07  0:43 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-07  0:41 [RFC 00/43] PKRAM: Preserved-over-Kexec RAM Anthony Yznaga
2020-05-07  0:41 ` [RFC 01/43] mm: add PKRAM API stubs and Kconfig Anthony Yznaga
2020-05-07  0:41 ` [RFC 02/43] mm: PKRAM: implement node load and save functions Anthony Yznaga
2020-05-07  0:41 ` [RFC 03/43] mm: PKRAM: implement object " Anthony Yznaga
2020-05-07  0:41 ` [RFC 04/43] mm: PKRAM: implement page stream operations Anthony Yznaga
2020-05-07  0:41 ` [RFC 05/43] mm: PKRAM: support preserving transparent hugepages Anthony Yznaga
2020-05-07  0:41 ` [RFC 06/43] mm: PKRAM: implement byte stream operations Anthony Yznaga
2020-05-07  0:41 ` [RFC 07/43] mm: PKRAM: link nodes by pfn before reboot Anthony Yznaga
2020-05-07  0:41 ` Anthony Yznaga [this message]
2020-05-07  0:41 ` [RFC 09/43] PKRAM: build a physical mapping pagetable of pages to be preserved Anthony Yznaga
2020-05-07  0:41 ` [RFC 10/43] PKRAM: add code for walking the preserved pages pagetable Anthony Yznaga
2020-05-07  0:41 ` [RFC 11/43] PKRAM: pass the preserved pages pagetable to the next kernel Anthony Yznaga
2020-05-07  0:41 ` [RFC 12/43] mm: PKRAM: reserve preserved memory at boot Anthony Yznaga
2020-05-07  0:41 ` [RFC 13/43] mm: PKRAM: free preserved pages pagetable Anthony Yznaga
2020-05-07  0:41 ` [RFC 14/43] mm: memblock: PKRAM: prevent memblock resize from clobbering preserved pages Anthony Yznaga
2020-05-11 13:57   ` Mike Rapoport
2020-05-11 23:29     ` Anthony Yznaga
2020-05-07  0:41 ` [RFC 15/43] PKRAM: provide a way to ban pages from use by PKRAM Anthony Yznaga
2020-05-07  0:41 ` [RFC 16/43] kexec: PKRAM: prevent kexec clobbering preserved pages in some cases Anthony Yznaga
2020-05-07  0:41 ` [RFC 17/43] PKRAM: provide a way to check if a memory range has preserved pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 18/43] kexec: PKRAM: avoid clobbering already " Anthony Yznaga
2020-05-07  0:41 ` [RFC 19/43] mm: PKRAM: allow preserved memory to be freed from userspace Anthony Yznaga
2020-05-07  0:41 ` [RFC 20/43] PKRAM: disable feature when running the kdump kernel Anthony Yznaga
2020-05-07  0:41 ` [RFC 21/43] x86/KASLR: PKRAM: support physical kaslr Anthony Yznaga
2020-05-07 17:51   ` Kees Cook
2020-05-07 18:41     ` Anthony Yznaga
2020-05-07  0:41 ` [RFC 22/43] mm: shmem: introduce shmem_insert_page Anthony Yznaga
2020-05-07  0:41 ` [RFC 23/43] mm: shmem: enable saving to PKRAM Anthony Yznaga
2020-05-07  0:41 ` [RFC 24/43] mm: shmem: prevent swapping of PKRAM-enabled tmpfs pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 25/43] mm: shmem: specify the mm to use when inserting pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 26/43] mm: shmem: when inserting, handle pages already charged to a memcg Anthony Yznaga
2020-05-07  0:41 ` [RFC 27/43] x86/mm/numa: add numa_isolate_memblocks() Anthony Yznaga
2020-05-07  0:41 ` [RFC 28/43] PKRAM: ensure memblocks with preserved pages init'd for numa Anthony Yznaga
2020-05-07  0:41 ` [RFC 29/43] memblock: PKRAM: mark memblocks that contain preserved pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 30/43] memblock: add for_each_reserved_mem_range() Anthony Yznaga
2020-05-07  0:41 ` [RFC 31/43] memblock, mm: defer initialization of preserved pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 32/43] shmem: PKRAM: preserve shmem files a chunk at a time Anthony Yznaga
2020-05-07  0:41 ` [RFC 33/43] PKRAM: atomically add and remove link pages Anthony Yznaga
2020-05-07  0:42 ` [RFC 34/43] shmem: PKRAM: multithread preserving and restoring shmem pages Anthony Yznaga
2020-05-07 16:30   ` Randy Dunlap
2020-05-07 17:59     ` Anthony Yznaga
2020-05-07  0:42 ` [RFC 35/43] shmem: introduce shmem_insert_pages() Anthony Yznaga
2020-05-07  0:42 ` [RFC 36/43] PKRAM: add support for loading pages in bulk Anthony Yznaga
2020-05-07  0:42 ` [RFC 37/43] shmem: PKRAM: enable bulk loading of preserved pages into shmem Anthony Yznaga
2020-05-07  0:42 ` [RFC 38/43] mm: implement splicing a list of pages to the LRU Anthony Yznaga
2020-05-07  0:42 ` [RFC 39/43] shmem: optimize adding pages to the LRU in shmem_insert_pages() Anthony Yznaga
2020-05-07  0:42 ` [RFC 40/43] shmem: initial support for adding multiple pages to pagecache Anthony Yznaga
2020-05-07  0:42 ` [RFC 41/43] XArray: add xas_export_node() and xas_import_node() Anthony Yznaga
2020-05-07  0:42 ` [RFC 42/43] shmem: reduce time holding xa_lock when inserting pages Anthony Yznaga
2020-05-07  0:42 ` [RFC 43/43] PKRAM: improve index alignment of pkram_link entries Anthony Yznaga

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1588812129-8596-9-git-send-email-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=Thomas.Lendacky@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=ardb@kernel.org \
    --cc=ashok.raj@intel.com \
    --cc=bhe@redhat.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=daniel.kiper@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dima@golovin.in \
    --cc=ebiederm@xmission.com \
    --cc=guro@fb.com \
    --cc=gustavo@embeddedor.com \
    --cc=hannes@cmpxchg.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jason.zeng@intel.com \
    --cc=jroedel@suse.de \
    --cc=keescook@chromium.org \
    --cc=kevin.tian@intel.com \
    --cc=kexec@lists.infradead.org \
    --cc=lei.l.li@intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=masahiroy@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=minchan@kernel.org \
    --cc=mingo@redhat.com \
    --cc=ndesaulniers@google.com \
    --cc=nivedita@alum.mit.edu \
    --cc=paul.c.lai@intel.com \
    --cc=peterz@infradead.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vdavydov.dev@gmail.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ying.huang@intel.com \
    --cc=zhenzhong.duan@oracle.com \
    --cc=zhiyuan.lv@intel.com \
    --cc=ziqian.lzq@antfin.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).