linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>,
	"Kirill A. Shutemov" <kirill@shutemov.name>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	kernel-team@fb.com
Subject: [PATCH 6/9] lib: radix-tree: add entry deletion support to __radix_tree_replace()
Date: Thu, 17 Nov 2016 14:30:58 -0500	[thread overview]
Message-ID: <20161117193058.GC23430@cmpxchg.org> (raw)
In-Reply-To: <20161117191138.22769-1-hannes@cmpxchg.org>

Page cache shadow entry handling will be a lot simpler when it can use
a single generic replacement function for pages, shadow entries, and
emptying slots.

Make __radix_tree_replace() properly account insertions and deletions
in node->count and garbage collect nodes as they become empty. Then
re-implement radix_tree_delete() on top of it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 lib/radix-tree.c | 227 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 116 insertions(+), 111 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f91d5b0af654..5d8930f3b3d8 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -539,6 +539,107 @@ static int radix_tree_extend(struct radix_tree_root *root,
 }
 
 /**
+ *	radix_tree_shrink    -    shrink radix tree to minimum height
+ *	@root		radix tree root
+ */
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
+{
+	bool shrunk = false;
+
+	for (;;) {
+		struct radix_tree_node *node = root->rnode;
+		struct radix_tree_node *child;
+
+		if (!radix_tree_is_internal_node(node))
+			break;
+		node = entry_to_node(node);
+
+		/*
+		 * The candidate node has more than one child, or its child
+		 * is not at the leftmost slot, or the child is a multiorder
+		 * entry, we cannot shrink.
+		 */
+		if (node->count != 1)
+			break;
+		child = node->slots[0];
+		if (!child)
+			break;
+		if (!radix_tree_is_internal_node(child) && node->shift)
+			break;
+
+		if (radix_tree_is_internal_node(child))
+			entry_to_node(child)->parent = NULL;
+
+		/*
+		 * We don't need rcu_assign_pointer(), since we are simply
+		 * moving the node from one part of the tree to another: if it
+		 * was safe to dereference the old pointer to it
+		 * (node->slots[0]), it will be safe to dereference the new
+		 * one (root->rnode) as far as dependent read barriers go.
+		 */
+		root->rnode = child;
+
+		/*
+		 * We have a dilemma here. The node's slot[0] must not be
+		 * NULLed in case there are concurrent lookups expecting to
+		 * find the item. However if this was a bottom-level node,
+		 * then it may be subject to the slot pointer being visible
+		 * to callers dereferencing it. If item corresponding to
+		 * slot[0] is subsequently deleted, these callers would expect
+		 * their slot to become empty sooner or later.
+		 *
+		 * For example, lockless pagecache will look up a slot, deref
+		 * the page pointer, and if the page has 0 refcount it means it
+		 * was concurrently deleted from pagecache so try the deref
+		 * again. Fortunately there is already a requirement for logic
+		 * to retry the entire slot lookup -- the indirect pointer
+		 * problem (replacing direct root node with an indirect pointer
+		 * also results in a stale slot). So tag the slot as indirect
+		 * to force callers to retry.
+		 */
+		if (!radix_tree_is_internal_node(child))
+			node->slots[0] = RADIX_TREE_RETRY;
+
+		radix_tree_node_free(node);
+		shrunk = true;
+	}
+
+	return shrunk;
+}
+
+static bool delete_node(struct radix_tree_root *root,
+			struct radix_tree_node *node)
+{
+	bool deleted = false;
+
+	do {
+		struct radix_tree_node *parent;
+
+		if (node->count) {
+			if (node == entry_to_node(root->rnode))
+				deleted |= radix_tree_shrink(root);
+			return deleted;
+		}
+
+		parent = node->parent;
+		if (parent) {
+			parent->slots[node->offset] = NULL;
+			parent->count--;
+		} else {
+			root_tag_clear_all(root);
+			root->rnode = NULL;
+		}
+
+		radix_tree_node_free(node);
+		deleted = true;
+
+		node = parent;
+	} while (node);
+
+	return deleted;
+}
+
+/**
  *	__radix_tree_create	-	create a slot in a radix tree
  *	@root:		radix tree root
  *	@index:		index key
@@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
 			 bool warn_typeswitch)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional;
+	int count, exceptional;
 
 	WARN_ON_ONCE(radix_tree_is_internal_node(item));
-	WARN_ON_ONCE(!!item - !!old);
 
+	count = !!item - !!old;
 	exceptional = !!radix_tree_exceptional_entry(item) -
 		      !!radix_tree_exceptional_entry(old);
 
-	WARN_ON_ONCE(warn_typeswitch && exceptional);
+	WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
 
-	if (node)
+	if (node) {
+		node->count += count;
 		node->exceptional += exceptional;
+	}
 
 	rcu_assign_pointer(*slot, item);
 }
@@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  void **slot, void *item)
 {
 	/*
-	 * This function supports replacing exceptional entries, but
-	 * that needs accounting against the node unless the slot is
-	 * root->rnode.
+	 * This function supports replacing exceptional entries and
+	 * deleting entries, but that needs accounting against the
+	 * node unless the slot is root->rnode.
 	 */
 	replace_slot(root, node, slot, item,
 		     !node && slot != (void **)&root->rnode);
+
+	delete_node(root, node);
 }
 
 /**
@@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
  *
  * NOTE: This cannot be used to switch between non-entries (empty slots),
  * regular entries, and exceptional entries, as that requires accounting
- * inside the radix tree node. When switching from one type of entry to
- * another, use __radix_tree_lookup() and __radix_tree_replace().
+ * inside the radix tree node. When switching from one type of entry or
+ * deleting, use __radix_tree_lookup() and __radix_tree_replace().
  */
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item)
@@ -1467,75 +1572,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
 #endif /* CONFIG_SHMEM && CONFIG_SWAP */
 
 /**
- *	radix_tree_shrink    -    shrink radix tree to minimum height
- *	@root		radix tree root
- */
-static inline bool radix_tree_shrink(struct radix_tree_root *root)
-{
-	bool shrunk = false;
-
-	for (;;) {
-		struct radix_tree_node *node = root->rnode;
-		struct radix_tree_node *child;
-
-		if (!radix_tree_is_internal_node(node))
-			break;
-		node = entry_to_node(node);
-
-		/*
-		 * The candidate node has more than one child, or its child
-		 * is not at the leftmost slot, or the child is a multiorder
-		 * entry, we cannot shrink.
-		 */
-		if (node->count != 1)
-			break;
-		child = node->slots[0];
-		if (!child)
-			break;
-		if (!radix_tree_is_internal_node(child) && node->shift)
-			break;
-
-		if (radix_tree_is_internal_node(child))
-			entry_to_node(child)->parent = NULL;
-
-		/*
-		 * We don't need rcu_assign_pointer(), since we are simply
-		 * moving the node from one part of the tree to another: if it
-		 * was safe to dereference the old pointer to it
-		 * (node->slots[0]), it will be safe to dereference the new
-		 * one (root->rnode) as far as dependent read barriers go.
-		 */
-		root->rnode = child;
-
-		/*
-		 * We have a dilemma here. The node's slot[0] must not be
-		 * NULLed in case there are concurrent lookups expecting to
-		 * find the item. However if this was a bottom-level node,
-		 * then it may be subject to the slot pointer being visible
-		 * to callers dereferencing it. If item corresponding to
-		 * slot[0] is subsequently deleted, these callers would expect
-		 * their slot to become empty sooner or later.
-		 *
-		 * For example, lockless pagecache will look up a slot, deref
-		 * the page pointer, and if the page has 0 refcount it means it
-		 * was concurrently deleted from pagecache so try the deref
-		 * again. Fortunately there is already a requirement for logic
-		 * to retry the entire slot lookup -- the indirect pointer
-		 * problem (replacing direct root node with an indirect pointer
-		 * also results in a stale slot). So tag the slot as indirect
-		 * to force callers to retry.
-		 */
-		if (!radix_tree_is_internal_node(child))
-			node->slots[0] = RADIX_TREE_RETRY;
-
-		radix_tree_node_free(node);
-		shrunk = true;
-	}
-
-	return shrunk;
-}
-
-/**
  *	__radix_tree_delete_node    -    try to free node after clearing a slot
  *	@root:		radix tree root
  *	@node:		node containing @index
@@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
 bool __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node)
 {
-	bool deleted = false;
-
-	do {
-		struct radix_tree_node *parent;
-
-		if (node->count) {
-			if (node == entry_to_node(root->rnode))
-				deleted |= radix_tree_shrink(root);
-			return deleted;
-		}
-
-		parent = node->parent;
-		if (parent) {
-			parent->slots[node->offset] = NULL;
-			parent->count--;
-		} else {
-			root_tag_clear_all(root);
-			root->rnode = NULL;
-		}
-
-		radix_tree_node_free(node);
-		deleted = true;
-
-		node = parent;
-	} while (node);
-
-	return deleted;
+	return delete_node(root, node);
 }
 
 static inline void delete_sibling_entries(struct radix_tree_node *node,
@@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 		node_tag_clear(root, node, tag, offset);
 
 	delete_sibling_entries(node, node_to_entry(slot), offset);
-	node->slots[offset] = NULL;
-	node->count--;
-	if (radix_tree_exceptional_entry(entry))
-		node->exceptional--;
-
-	__radix_tree_delete_node(root, node);
+	__radix_tree_replace(root, node, slot, NULL);
 
 	return entry;
 }
-- 
2.10.2

  parent reply	other threads:[~2016-11-17 19:31 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-17 19:11 [PATCH 0/9] mm: workingset: radix tree subtleties & single-page file refaults v3 Johannes Weiner
2016-11-17 19:11 ` [PATCH 1/9] mm: khugepaged: close use-after-free race during shmem collapsing Johannes Weiner
2016-11-17 23:19   ` Kirill A. Shutemov
2016-11-18  7:29   ` Jan Kara
2016-11-17 19:11 ` [PATCH 2/9] mm: khugepaged: fix radix tree node leak in shmem collapse error path Johannes Weiner
2016-11-17 23:21   ` Kirill A. Shutemov
2016-11-18  7:30   ` Jan Kara
2016-11-17 19:11 ` [PATCH 3/9] mm: workingset: turn shadow node shrinker bugs into warnings Johannes Weiner
2016-11-18  7:32   ` Jan Kara
2016-11-17 19:29 ` [PATCH 4/9] lib: radix-tree: native accounting of exceptional entries Johannes Weiner
2016-11-18  7:39   ` Jan Kara
2016-11-17 19:30 ` [PATCH 5/9] lib: radix-tree: check accounting of existing slot replacement users Johannes Weiner
2016-11-18  7:46   ` Jan Kara
2016-11-17 19:30 ` Johannes Weiner [this message]
2016-11-18  8:13   ` [PATCH 6/9] lib: radix-tree: add entry deletion support to __radix_tree_replace() Jan Kara
2016-11-17 19:31 ` [PATCH 7/9] lib: radix-tree: update callback for changing leaf nodes Johannes Weiner
2016-11-18  8:26   ` Jan Kara
2016-11-17 19:32 ` [PATCH 8/9] mm: workingset: move shadow entry tracking to radix tree exceptional tracking Johannes Weiner
2016-11-18  8:29   ` Jan Kara
2016-11-17 19:32 ` [PATCH 9/9] mm: workingset: restore refault tracking for single-page files Johannes Weiner
2016-11-18  8:30   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161117193058.GC23430@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=jack@suse.cz \
    --cc=kernel-team@fb.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).