All of lore.kernel.org
 help / color / mirror / Atom feed
From: madvenka@linux.microsoft.com
To: gregkh@linuxfoundation.org, pbonzini@redhat.com, rppt@kernel.org,
	jgowans@amazon.com, graf@amazon.de, arnd@arndb.de,
	keescook@chromium.org, stanislav.kinsburskii@gmail.com,
	anthony.yznaga@oracle.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, madvenka@linux.microsoft.com,
	jamorris@linux.microsoft.com
Subject: [RFC PATCH v1 06/10] mm/prmem: Implement persistent XArray (and Radix Tree)
Date: Mon, 16 Oct 2023 18:32:11 -0500	[thread overview]
Message-ID: <20231016233215.13090-7-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20231016233215.13090-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Consumers can persist their data structures by allocating persistent
memory for them.

Now, data structures are connected to one another using pointers, arrays,
linked lists, RB nodes, etc. These can all be persisted by allocating
memory for them from persistent memory. E.g., a linked list is persisted
if the data structures that embed the list head and the list nodes are
allocated from persistent memory. Ditto for RB trees.

One important exception is the XArray. The XArray itself can be embedded in
a persistent data structure. However, the XA nodes are allocated using the
kmem allocator.

Implement a persistent XArray. Introduce a new field, xa_persistent, in the
XArray. Implement an accessor function to set the field. If xa_persistent
is true, allocate XA nodes using the prmem allocator instead of the kmem
allocator. This makes the whole XArray persistent.

Since Radix Trees (lib/radix-tree.c) are implemented based on the XArray,
we also get persistent Radix Trees. The only difference is that pre-loading
is not supported for persistent Radix Tree nodes.

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 include/linux/radix-tree.h |  4 ++++
 include/linux/xarray.h     | 15 ++++++++++++
 lib/radix-tree.c           | 49 +++++++++++++++++++++++++++++++-------
 lib/xarray.c               | 11 +++++----
 4 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index eae67015ce51..74f0bdc60bea 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -82,6 +82,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 	struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
 #define INIT_RADIX_TREE(root, mask) xa_init_flags(root, mask)
+#define PERSIST_RADIX_TREE(root) xa_persistent(root)
 
 static inline bool radix_tree_empty(const struct radix_tree_root *root)
 {
@@ -254,6 +255,9 @@ unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *,
 		void __rcu ***results, unsigned long first_index,
 		unsigned int max_items, unsigned int tag);
 int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
+struct radix_tree_node *radix_node_alloc(struct radix_tree_root *root,
+		struct list_lru *lru, gfp_t gfp);
+void radix_node_free(struct radix_tree_node *node);
 
 static inline void radix_tree_preload_end(void)
 {
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 741703b45f61..3176a5f62caf 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -295,6 +295,7 @@ enum xa_lock_type {
  */
 struct xarray {
 	spinlock_t	xa_lock;
+	bool		xa_persistent;
 /* private: The rest of the data structure is not to be used directly. */
 	gfp_t		xa_flags;
 	void __rcu *	xa_head;
@@ -302,6 +303,7 @@ struct xarray {
 
 #define XARRAY_INIT(name, flags) {				\
 	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),		\
+	.xa_persistent = false,					\
 	.xa_flags = flags,					\
 	.xa_head = NULL,					\
 }
@@ -378,6 +380,7 @@ void xa_destroy(struct xarray *);
 static inline void xa_init_flags(struct xarray *xa, gfp_t flags)
 {
 	spin_lock_init(&xa->xa_lock);
+	xa->xa_persistent = false;
 	xa->xa_flags = flags;
 	xa->xa_head = NULL;
 }
@@ -395,6 +398,17 @@ static inline void xa_init(struct xarray *xa)
 	xa_init_flags(xa, 0);
 }
 
+/**
+ * xa_peristent() - xa_root and xa_node allocated from persistent memory.
+ * @xa: XArray.
+ *
+ * Context: Any context.
+ */
+static inline void xa_persistent(struct xarray *xa)
+{
+	xa->xa_persistent = true;
+}
+
 /**
  * xa_empty() - Determine if an array has any present entries.
  * @xa: XArray.
@@ -1142,6 +1156,7 @@ struct xa_node {
 	unsigned char	offset;		/* Slot offset in parent */
 	unsigned char	count;		/* Total entry count */
 	unsigned char	nr_values;	/* Value entry count */
+	bool		persistent;	/* Allocated from persistent memory. */
 	struct xa_node __rcu *parent;	/* NULL at top of tree */
 	struct xarray	*array;		/* The array we belong to */
 	union {
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 976b9bd02a1b..d3af6ff6c625 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -21,6 +21,7 @@
 #include <linux/kmemleak.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>		/* in_interrupt() */
+#include <linux/prmem.h>
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
@@ -225,6 +226,36 @@ static unsigned long next_index(unsigned long index,
 	return (index & ~node_maxindex(node)) + (offset << node->shift);
 }
 
+static void radix_tree_node_ctor(void *arg);
+
+struct radix_tree_node *
+radix_node_alloc(struct radix_tree_root *root, struct list_lru *lru, gfp_t gfp)
+{
+	struct radix_tree_node *node;
+
+	if (root && root->xa_persistent) {
+		node = prmem_alloc(sizeof(struct radix_tree_node), gfp);
+		if (node) {
+			radix_tree_node_ctor(node);
+			node->persistent = true;
+		}
+	} else {
+		node = kmem_cache_alloc_lru(radix_tree_node_cachep, lru, gfp);
+		if (node)
+			node->persistent = false;
+	}
+	return node;
+}
+
+void radix_node_free(struct radix_tree_node *node)
+{
+	if (node->persistent) {
+		prmem_free(node, sizeof(*node));
+		return;
+	}
+	kmem_cache_free(radix_tree_node_cachep, node);
+}
+
 /*
  * This assumes that the caller has performed appropriate preallocation, and
  * that the caller has pinned this thread of control to the current CPU.
@@ -241,8 +272,11 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 	 * Preload code isn't irq safe and it doesn't make sense to use
 	 * preloading during an interrupt anyway as all the allocations have
 	 * to be atomic. So just do normal allocation when in interrupt.
+	 *
+	 * Also, there is no preloading for persistent trees.
 	 */
-	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
+	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt() &&
+	    !root->xa_persistent) {
 		struct radix_tree_preload *rtp;
 
 		/*
@@ -250,8 +284,7 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 		 * cache first for the new node to get accounted to the memory
 		 * cgroup.
 		 */
-		ret = kmem_cache_alloc(radix_tree_node_cachep,
-				       gfp_mask | __GFP_NOWARN);
+		ret = radix_node_alloc(root, NULL, gfp_mask | __GFP_NOWARN);
 		if (ret)
 			goto out;
 
@@ -273,7 +306,7 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 		kmemleak_update_trace(ret);
 		goto out;
 	}
-	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+	ret = radix_node_alloc(root, NULL, gfp_mask);
 out:
 	BUG_ON(radix_tree_is_internal_node(ret));
 	if (ret) {
@@ -301,7 +334,7 @@ void radix_tree_node_rcu_free(struct rcu_head *head)
 	memset(node->tags, 0, sizeof(node->tags));
 	INIT_LIST_HEAD(&node->private_list);
 
-	kmem_cache_free(radix_tree_node_cachep, node);
+	radix_node_free(node);
 }
 
 static inline void
@@ -335,7 +368,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 	rtp = this_cpu_ptr(&radix_tree_preloads);
 	while (rtp->nr < nr) {
 		local_unlock(&radix_tree_preloads.lock);
-		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+		node = radix_node_alloc(NULL, NULL, gfp_mask);
 		if (node == NULL)
 			goto out;
 		local_lock(&radix_tree_preloads.lock);
@@ -345,7 +378,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 			rtp->nodes = node;
 			rtp->nr++;
 		} else {
-			kmem_cache_free(radix_tree_node_cachep, node);
+			radix_node_free(node);
 		}
 	}
 	ret = 0;
@@ -1585,7 +1618,7 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 	while (rtp->nr) {
 		node = rtp->nodes;
 		rtp->nodes = node->parent;
-		kmem_cache_free(radix_tree_node_cachep, node);
+		radix_node_free(node);
 		rtp->nr--;
 	}
 	return 0;
diff --git a/lib/xarray.c b/lib/xarray.c
index 2071a3718f4e..33a74b713e6a 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -9,6 +9,7 @@
 #include <linux/bitmap.h>
 #include <linux/export.h>
 #include <linux/list.h>
+#include <linux/prmem.h>
 #include <linux/slab.h>
 #include <linux/xarray.h>
 
@@ -303,7 +304,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
 	}
 	if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
 		gfp |= __GFP_ACCOUNT;
-	xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
+	xas->xa_alloc = radix_node_alloc(xas->xa, xas->xa_lru, gfp);
 	if (!xas->xa_alloc)
 		return false;
 	xas->xa_alloc->parent = NULL;
@@ -335,10 +336,10 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
 		gfp |= __GFP_ACCOUNT;
 	if (gfpflags_allow_blocking(gfp)) {
 		xas_unlock_type(xas, lock_type);
-		xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
+		xas->xa_alloc = radix_node_alloc(xas->xa, xas->xa_lru, gfp);
 		xas_lock_type(xas, lock_type);
 	} else {
-		xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
+		xas->xa_alloc = radix_node_alloc(xas->xa, xas->xa_lru, gfp);
 	}
 	if (!xas->xa_alloc)
 		return false;
@@ -372,7 +373,7 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift)
 		if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
 			gfp |= __GFP_ACCOUNT;
 
-		node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
+		node = radix_node_alloc(xas->xa, xas->xa_lru, gfp);
 		if (!node) {
 			xas_set_err(xas, -ENOMEM);
 			return NULL;
@@ -1017,7 +1018,7 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
 		void *sibling = NULL;
 		struct xa_node *node;
 
-		node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
+		node = radix_node_alloc(xas->xa, xas->xa_lru, gfp);
 		if (!node)
 			goto nomem;
 		node->array = xas->xa;
-- 
2.25.1


  parent reply	other threads:[~2023-10-16 23:32 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1b1bc25eb87355b91fcde1de7c2f93f38abb2bf9>
2023-10-16 23:32 ` [RFC PATCH v1 00/10] mm/prmem: Implement the Persistent-Across-Kexec memory feature (prmem) madvenka
2023-10-16 23:32   ` [RFC PATCH v1 01/10] mm/prmem: Allocate memory during boot for storing persistent data madvenka
2023-10-17 18:36     ` kernel test robot
2023-10-16 23:32   ` [RFC PATCH v1 02/10] mm/prmem: Reserve metadata and persistent regions in early boot after kexec madvenka
2023-10-17 19:29     ` kernel test robot
2023-10-16 23:32   ` [RFC PATCH v1 03/10] mm/prmem: Manage persistent memory with the gen pool allocator madvenka
2023-10-16 23:32   ` [RFC PATCH v1 04/10] mm/prmem: Implement a page allocator for persistent memory madvenka
2023-10-16 23:32   ` [RFC PATCH v1 05/10] mm/prmem: Implement a buffer " madvenka
2023-10-16 23:32   ` madvenka [this message]
2023-10-16 23:32   ` [RFC PATCH v1 07/10] mm/prmem: Implement named Persistent Instances madvenka
2023-10-16 23:32   ` [RFC PATCH v1 08/10] mm/prmem: Implement Persistent Ramdisk instances madvenka
2023-10-17 16:39     ` kernel test robot
2023-10-16 23:32   ` [RFC PATCH v1 09/10] mm/prmem: Implement DAX support for Persistent Ramdisks madvenka
2023-10-16 23:32   ` [RFC PATCH v1 10/10] mm/prmem: Implement dynamic expansion of prmem madvenka
2023-10-17  8:31   ` [RFC PATCH v1 00/10] mm/prmem: Implement the Persistent-Across-Kexec memory feature (prmem) Alexander Graf
2023-10-17 18:08     ` Madhavan T. Venkataraman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231016233215.13090-7-madvenka@linux.microsoft.com \
    --to=madvenka@linux.microsoft.com \
    --cc=anthony.yznaga@oracle.com \
    --cc=arnd@arndb.de \
    --cc=graf@amazon.de \
    --cc=gregkh@linuxfoundation.org \
    --cc=jamorris@linux.microsoft.com \
    --cc=jgowans@amazon.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=pbonzini@redhat.com \
    --cc=rppt@kernel.org \
    --cc=stanislav.kinsburskii@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.