All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
	ebiederm@xmission.com, akpm@linux-foundation.org,
	stanislav.kinsburskii@gmail.com, corbet@lwn.net,
	linux-kernel@vger.kernel.org, kexec@lists.infradead.org,
	linux-mm@kvack.org, kys@microsoft.com, jgowans@amazon.com,
	wei.liu@kernel.org, arnd@arndb.de, gregkh@linuxfoundation.org,
	graf@amazon.de, pbonzini@redhat.com
Subject: [RFC PATCH v2 4/7] pmpool: Introduce persistent memory pool
Date: Mon, 25 Sep 2023 14:28:09 -0700	[thread overview]
Message-ID: <169567728905.19708.12668538787634109172.stgit@skinsburskii.> (raw)
In-Reply-To: <169567722094.19708.3583735425859054859.stgit@skinsburskii.>

From: Stanislav Kinsburskii <stanislav.kinsburskii@gmail.com>

This patch introduces a memory allocator specifically tailored for
persistent memory within the kernel. The allocator maintains
kernel-specific states like DMA passthrough device states, IOMMU state, and
more across kexec.

The current implementation provides a foundation for custom solutions that
may be developed in the future. Although the design is kept concise and
straightforward to encourage discussion and feedback, it remains fully
functional.

The persistent memory pool builds upon the continuous memory allocator
(CMA) and ensures CMA state persistency across kexec by incorporating the
CMA bitmap into the memory region.

Potential applications include:

  1. Enabling various in-kernel entities to allocate persistent pages from
     a unified memory pool, obviating the need for reserving multiple
     regions.

  2. For in-kernel components that need the allocation address to be
     retained on kernel kexec, this address can be exposed to user space
     and subsequently passed through the command line.

  3. Distinct subsystems or drivers can set aside their region, allocating
     a segment for their persistent memory pool, suitable for uses such as
     file systems, key-value stores, and other applications.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 include/linux/pmpool.h |   22 +++++++++++
 mm/Kconfig             |    8 ++++
 mm/Makefile            |    1 
 mm/pmpool.c            |  100 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 131 insertions(+)
 create mode 100644 include/linux/pmpool.h
 create mode 100644 mm/pmpool.c

diff --git a/include/linux/pmpool.h b/include/linux/pmpool.h
new file mode 100644
index 000000000000..b41f16fa9660
--- /dev/null
+++ b/include/linux/pmpool.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PMPOOL_H
+#define _PMPOOL_H
+
+struct page;
+
+#if defined(CONFIG_PMPOOL)
+struct page *pmpool_alloc(unsigned long count);
+bool pmpool_release(struct page *pages, unsigned long count);
+#else
+static inline struct page *pmpool_alloc(unsigned long count)
+{
+	return NULL;
+}
+static inline bool pmpool_release(struct page *pages, unsigned long count)
+{
+	return false;
+}
+#endif
+
+#endif /* _PMPOOL_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 09130434e30d..e7c10094fb10 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -922,6 +922,14 @@ config CMA_AREAS
 
 	  If unsure, leave the default value "7" in UMA and "19" in NUMA.
 
+config PMPOOL
+	bool "Persistent memory pool support"
+	select CMA
+	help
+	  This option adds support for CMA-based persistent memory pool
+	  feature, which provides pages allocation and freeing from a set of
+	  persistent memory ranges, deposited to the memory pool.
+
 config MEM_SOFT_DIRTY
 	bool "Track memory changes"
 	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
diff --git a/mm/Makefile b/mm/Makefile
index 678530a07326..8d3579e58c2c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -139,3 +139,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o
 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
 obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
 obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
+obj-$(CONFIG_PMPOOL) += pmpool.o
diff --git a/mm/pmpool.c b/mm/pmpool.c
new file mode 100644
index 000000000000..12a8cac75558
--- /dev/null
+++ b/mm/pmpool.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "pmpool: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cma.h>
+#include <linux/io.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/pmpool.h>
+
+#include "cma.h"
+
+struct pmpool {
+	struct cma *cma;
+};
+
+static struct pmpool *default_pmpool;
+
+bool pmpool_release(struct page *pages, unsigned long count)
+{
+	if (!default_pmpool)
+		return false;
+
+	return cma_release(default_pmpool->cma, pages, count);
+}
+
+struct page *pmpool_alloc(unsigned long count)
+{
+	if (!default_pmpool)
+		return NULL;
+
+	return cma_alloc(default_pmpool->cma, count, 0, true);
+}
+
+static void pmpool_fixup_cma(struct cma *cma)
+{
+	unsigned long bitmap_size;
+
+	bitmap_free(cma->bitmap);
+	cma->bitmap = phys_to_virt(PFN_PHYS(cma->base_pfn));
+
+	bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma));
+	memset(cma->bitmap, 0, bitmap_size);
+	bitmap_set(cma->bitmap, 0, PAGE_ALIGN(bitmap_size) >> PAGE_SHIFT);
+
+	pr_info("CMA bitmap moved to %#llx\n", virt_to_phys(cma->bitmap));
+}
+
+static int __init default_pmpool_fixup_cma(void)
+{
+	if (!default_pmpool)
+		return 0;
+
+	pmpool_fixup_cma(default_pmpool->cma);
+	return 0;
+}
+postcore_initcall(default_pmpool_fixup_cma);
+
+static int __init parse_pmpool_opt(char *str)
+{
+	static struct pmpool pmpool;
+	phys_addr_t base, size;
+	int err;
+
+	/* Format is pmpool=<base>,<size> */
+	base = memparse(str, &str);
+	size = memparse(str + 1, NULL);
+
+	err = memblock_is_region_reserved(base, size);
+	if (err) {
+		pr_err("memory block overlaps with another one: %d\n", err);
+		return 0;
+	}
+
+	err = memblock_reserve(base, size);
+	if (err) {
+		pr_err("failed to reerve memory block: %d\n", err);
+		return 0;
+	}
+
+	err = cma_init_reserved_mem(base, size, 0, "pmpool", &pmpool.cma);
+	if (err) {
+		pr_err("failed to initialize CMA: %d\n", err);
+		goto free_memblock;
+	}
+
+	pr_info("default memory pool is created: %#llx-%#llx\n",
+		base, base + size);
+
+	default_pmpool = &pmpool;
+
+	return 0;
+
+free_memblock:
+	memblock_phys_free(base, size);
+	return 0;
+}
+early_param("pmpool", parse_pmpool_opt);




WARNING: multiple messages have this Message-ID (diff)
From: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
	ebiederm@xmission.com, akpm@linux-foundation.org,
	stanislav.kinsburskii@gmail.com, corbet@lwn.net,
	linux-kernel@vger.kernel.org, kexec@lists.infradead.org,
	linux-mm@kvack.org, kys@microsoft.com, jgowans@amazon.com,
	wei.liu@kernel.org, arnd@arndb.de, gregkh@linuxfoundation.org,
	graf@amazon.de, pbonzini@redhat.com
Subject: [RFC PATCH v2 4/7] pmpool: Introduce persistent memory pool
Date: Mon, 25 Sep 2023 14:28:09 -0700	[thread overview]
Message-ID: <169567728905.19708.12668538787634109172.stgit@skinsburskii.> (raw)
In-Reply-To: <169567722094.19708.3583735425859054859.stgit@skinsburskii.>

From: Stanislav Kinsburskii <stanislav.kinsburskii@gmail.com>

This patch introduces a memory allocator specifically tailored for
persistent memory within the kernel. The allocator maintains
kernel-specific states like DMA passthrough device states, IOMMU state, and
more across kexec.

The current implementation provides a foundation for custom solutions that
may be developed in the future. Although the design is kept concise and
straightforward to encourage discussion and feedback, it remains fully
functional.

The persistent memory pool builds upon the continuous memory allocator
(CMA) and ensures CMA state persistency across kexec by incorporating the
CMA bitmap into the memory region.

Potential applications include:

  1. Enabling various in-kernel entities to allocate persistent pages from
     a unified memory pool, obviating the need for reserving multiple
     regions.

  2. For in-kernel components that need the allocation address to be
     retained on kernel kexec, this address can be exposed to user space
     and subsequently passed through the command line.

  3. Distinct subsystems or drivers can set aside their region, allocating
     a segment for their persistent memory pool, suitable for uses such as
     file systems, key-value stores, and other applications.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 include/linux/pmpool.h |   22 +++++++++++
 mm/Kconfig             |    8 ++++
 mm/Makefile            |    1 
 mm/pmpool.c            |  100 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 131 insertions(+)
 create mode 100644 include/linux/pmpool.h
 create mode 100644 mm/pmpool.c

diff --git a/include/linux/pmpool.h b/include/linux/pmpool.h
new file mode 100644
index 000000000000..b41f16fa9660
--- /dev/null
+++ b/include/linux/pmpool.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PMPOOL_H
+#define _PMPOOL_H
+
+struct page;
+
+#if defined(CONFIG_PMPOOL)
+struct page *pmpool_alloc(unsigned long count);
+bool pmpool_release(struct page *pages, unsigned long count);
+#else
+static inline struct page *pmpool_alloc(unsigned long count)
+{
+	return NULL;
+}
+static inline bool pmpool_release(struct page *pages, unsigned long count)
+{
+	return false;
+}
+#endif
+
+#endif /* _PMPOOL_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 09130434e30d..e7c10094fb10 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -922,6 +922,14 @@ config CMA_AREAS
 
 	  If unsure, leave the default value "7" in UMA and "19" in NUMA.
 
+config PMPOOL
+	bool "Persistent memory pool support"
+	select CMA
+	help
+	  This option adds support for CMA-based persistent memory pool
+	  feature, which provides pages allocation and freeing from a set of
+	  persistent memory ranges, deposited to the memory pool.
+
 config MEM_SOFT_DIRTY
 	bool "Track memory changes"
 	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
diff --git a/mm/Makefile b/mm/Makefile
index 678530a07326..8d3579e58c2c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -139,3 +139,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o
 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
 obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
 obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
+obj-$(CONFIG_PMPOOL) += pmpool.o
diff --git a/mm/pmpool.c b/mm/pmpool.c
new file mode 100644
index 000000000000..12a8cac75558
--- /dev/null
+++ b/mm/pmpool.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "pmpool: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cma.h>
+#include <linux/io.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/pmpool.h>
+
+#include "cma.h"
+
+struct pmpool {
+	struct cma *cma;
+};
+
+static struct pmpool *default_pmpool;
+
+bool pmpool_release(struct page *pages, unsigned long count)
+{
+	if (!default_pmpool)
+		return false;
+
+	return cma_release(default_pmpool->cma, pages, count);
+}
+
+struct page *pmpool_alloc(unsigned long count)
+{
+	if (!default_pmpool)
+		return NULL;
+
+	return cma_alloc(default_pmpool->cma, count, 0, true);
+}
+
+static void pmpool_fixup_cma(struct cma *cma)
+{
+	unsigned long bitmap_size;
+
+	bitmap_free(cma->bitmap);
+	cma->bitmap = phys_to_virt(PFN_PHYS(cma->base_pfn));
+
+	bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma));
+	memset(cma->bitmap, 0, bitmap_size);
+	bitmap_set(cma->bitmap, 0, PAGE_ALIGN(bitmap_size) >> PAGE_SHIFT);
+
+	pr_info("CMA bitmap moved to %#llx\n", virt_to_phys(cma->bitmap));
+}
+
+static int __init default_pmpool_fixup_cma(void)
+{
+	if (!default_pmpool)
+		return 0;
+
+	pmpool_fixup_cma(default_pmpool->cma);
+	return 0;
+}
+postcore_initcall(default_pmpool_fixup_cma);
+
+static int __init parse_pmpool_opt(char *str)
+{
+	static struct pmpool pmpool;
+	phys_addr_t base, size;
+	int err;
+
+	/* Format is pmpool=<base>,<size> */
+	base = memparse(str, &str);
+	size = memparse(str + 1, NULL);
+
+	err = memblock_is_region_reserved(base, size);
+	if (err) {
+		pr_err("memory block overlaps with another one: %d\n", err);
+		return 0;
+	}
+
+	err = memblock_reserve(base, size);
+	if (err) {
+		pr_err("failed to reerve memory block: %d\n", err);
+		return 0;
+	}
+
+	err = cma_init_reserved_mem(base, size, 0, "pmpool", &pmpool.cma);
+	if (err) {
+		pr_err("failed to initialize CMA: %d\n", err);
+		goto free_memblock;
+	}
+
+	pr_info("default memory pool is created: %#llx-%#llx\n",
+		base, base + size);
+
+	default_pmpool = &pmpool;
+
+	return 0;
+
+free_memblock:
+	memblock_phys_free(base, size);
+	return 0;
+}
+early_param("pmpool", parse_pmpool_opt);



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

  parent reply	other threads:[~2023-09-25 21:28 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-25 21:27 [RFC PATCH v2 0/7] Introduce persistent memory pool Stanislav Kinsburskii
2023-09-25 21:27 ` Stanislav Kinsburskii
2023-09-25 21:27 ` [RFC PATCH v2 1/7] kexec_file: Add fdt modification callback support Stanislav Kinsburskii
2023-09-25 21:27   ` Stanislav Kinsburskii
2023-09-25 21:27 ` [RFC PATCH v2 2/7] x86: kexec: Transfer existing fdt to the new kernel Stanislav Kinsburskii
2023-09-25 21:27   ` Stanislav Kinsburskii
2023-09-25 21:28 ` [RFC PATCH v2 3/7] x86: kexec: Enable fdt modification in callbacks Stanislav Kinsburskii
2023-09-25 21:28   ` Stanislav Kinsburskii
2023-09-25 21:28 ` Stanislav Kinsburskii [this message]
2023-09-25 21:28   ` [RFC PATCH v2 4/7] pmpool: Introduce persistent memory pool Stanislav Kinsburskii
2023-09-25 21:28 ` [RFC PATCH v2 5/7] pmpool: Update device tree on kexec Stanislav Kinsburskii
2023-09-25 21:28   ` Stanislav Kinsburskii
2023-09-25 21:28 ` [RFC PATCH v2 6/7] pmpool: Restore state from device tree post-kexec Stanislav Kinsburskii
2023-09-25 21:28   ` Stanislav Kinsburskii
2023-09-25 21:28 ` [RFC PATCH v2 7/7] Drivers: hv: Allocate persistent pages for root partition Stanislav Kinsburskii
2023-09-25 21:28   ` Stanislav Kinsburskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=169567728905.19708.12668538787634109172.stgit@skinsburskii. \
    --to=skinsburskii@linux.microsoft.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=ebiederm@xmission.com \
    --cc=graf@amazon.de \
    --cc=gregkh@linuxfoundation.org \
    --cc=hpa@zytor.com \
    --cc=jgowans@amazon.com \
    --cc=kexec@lists.infradead.org \
    --cc=kys@microsoft.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=stanislav.kinsburskii@gmail.com \
    --cc=tglx@linutronix.de \
    --cc=wei.liu@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.