linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>,
	"Kirill A. Shutemov" <kirill@shutemov.name>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	kernel-team@fb.com
Subject: [PATCH 5/9] lib: radix-tree: check accounting of existing slot replacement users
Date: Thu, 17 Nov 2016 14:30:21 -0500	[thread overview]
Message-ID: <20161117193021.GB23430@cmpxchg.org> (raw)
In-Reply-To: <20161117191138.22769-1-hannes@cmpxchg.org>

The bug in khugepaged fixed earlier in this series shows that radix
tree slot replacement is fragile; and it will become more so when not
only NULL<->!NULL transitions need to be caught but transitions from
and to exceptional entries as well. We need checks.

Re-implement radix_tree_replace_slot() on top of the sanity-checked
__radix_tree_replace(). This requires existing callers to also pass
the radix tree root, but it'll warn us when somebody replaces slots
with contents that need proper accounting (transitions between NULL
entries, real entries, exceptional entries) and where a replacement
through the slot pointer would corrupt the radix tree node counts.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 arch/s390/mm/gmap.c                   |  2 +-
 drivers/sh/intc/virq.c                |  2 +-
 fs/dax.c                              |  4 +--
 include/linux/radix-tree.h            | 16 ++-------
 lib/radix-tree.c                      | 63 +++++++++++++++++++++++++++--------
 mm/filemap.c                          |  4 +--
 mm/khugepaged.c                       |  5 +--
 mm/migrate.c                          |  4 +--
 mm/truncate.c                         |  2 +-
 tools/testing/radix-tree/multiorder.c |  2 +-
 10 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 3ba622702ce4..ec1f0dedb948 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
 	if (slot) {
 		rmap->next = radix_tree_deref_slot_protected(slot,
 							&sg->guest_table_lock);
-		radix_tree_replace_slot(slot, rmap);
+		radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
 	} else {
 		rmap->next = NULL;
 		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index e7899624aa0b..35bbe288ddb4 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -254,7 +254,7 @@ static void __init intc_subgroup_map(struct intc_desc_int *d)
 
 		radix_tree_tag_clear(&d->tree, entry->enum_id,
 				     INTC_TAG_VIRQ_NEEDS_ALLOC);
-		radix_tree_replace_slot((void **)entries[i],
+		radix_tree_replace_slot(&d->tree, (void **)entries[i],
 					&intc_irq_xlate[irq]);
 	}
 
diff --git a/fs/dax.c b/fs/dax.c
index db78bae0dc0f..85930c2a2749 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
 		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 
 	entry |= RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
 	return (void *)entry;
 }
 
@@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
 		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 
 	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
 	return (void *)entry;
 }
 
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7ced8a70cc8b..2d1b9b8be983 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-/**
- * radix_tree_replace_slot	- replace item in a slot
- * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
- * @item:	new item to store in the slot.
- *
- * For use with radix_tree_lookup_slot().  Caller must hold tree write locked
- * across slot lookup and replacement.
- */
-static inline void radix_tree_replace_slot(void **pslot, void *item)
-{
-	BUG_ON(radix_tree_is_internal_node(item));
-	rcu_assign_pointer(*pslot, item);
-}
-
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			unsigned order, struct radix_tree_node **nodep,
 			void ***slotp);
@@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
 			  void **slot, void *item);
+void radix_tree_replace_slot(struct radix_tree_root *root,
+			     void **slot, void *item);
 bool __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7885796d35ae..f91d5b0af654 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-/**
- * __radix_tree_replace		- replace item in a slot
- * @root:	radix tree root
- * @node:	pointer to tree node
- * @slot:	pointer to slot in @node
- * @item:	new item to store in the slot.
- *
- * For use with __radix_tree_lookup().  Caller must hold tree write locked
- * across slot lookup and replacement.
- */
-void __radix_tree_replace(struct radix_tree_root *root,
-			  struct radix_tree_node *node,
-			  void **slot, void *item)
+static void replace_slot(struct radix_tree_root *root,
+			 struct radix_tree_node *node,
+			 void **slot, void *item,
+			 bool warn_typeswitch)
 {
 	void *old = rcu_dereference_raw(*slot);
 	int exceptional;
@@ -776,7 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	exceptional = !!radix_tree_exceptional_entry(item) -
 		      !!radix_tree_exceptional_entry(old);
 
-	WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode);
+	WARN_ON_ONCE(warn_typeswitch && exceptional);
 
 	if (node)
 		node->exceptional += exceptional;
@@ -785,6 +776,50 @@ void __radix_tree_replace(struct radix_tree_root *root,
 }
 
 /**
+ * __radix_tree_replace		- replace item in a slot
+ * @root:	radix tree root
+ * @node:	pointer to tree node
+ * @slot:	pointer to slot in @node
+ * @item:	new item to store in the slot.
+ *
+ * For use with __radix_tree_lookup().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ */
+void __radix_tree_replace(struct radix_tree_root *root,
+			  struct radix_tree_node *node,
+			  void **slot, void *item)
+{
+	/*
+	 * This function supports replacing exceptional entries, but
+	 * that needs accounting against the node unless the slot is
+	 * root->rnode.
+	 */
+	replace_slot(root, node, slot, item,
+		     !node && slot != (void **)&root->rnode);
+}
+
+/**
+ * radix_tree_replace_slot	- replace item in a slot
+ * @root:	radix tree root
+ * @slot:	pointer to slot
+ * @item:	new item to store in the slot.
+ *
+ * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ *
+ * NOTE: This cannot be used to switch between non-entries (empty slots),
+ * regular entries, and exceptional entries, as that requires accounting
+ * inside the radix tree node. When switching from one type of entry to
+ * another, use __radix_tree_lookup() and __radix_tree_replace().
+ */
+void radix_tree_replace_slot(struct radix_tree_root *root,
+			     void **slot, void *item)
+{
+	replace_slot(root, NULL, slot, item, true);
+}
+
+/**
  *	radix_tree_tag_set - set a tag on a radix tree node
  *	@root:		radix tree root
  *	@index:		index key
diff --git a/mm/filemap.c b/mm/filemap.c
index c7fe2f16503f..eb463156f29a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 						      false);
 		}
 	}
-	radix_tree_replace_slot(slot, page);
+	radix_tree_replace_slot(&mapping->page_tree, slot, page);
 	mapping->nrpages++;
 	if (node) {
 		workingset_node_pages_inc(node);
@@ -193,7 +193,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 			shadow = NULL;
 		}
 
-		radix_tree_replace_slot(slot, shadow);
+		radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
 
 		if (!node)
 			break;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d553c294de40..c2f015bd57cb 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1424,7 +1424,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		list_add_tail(&page->lru, &pagelist);
 
 		/* Finally, replace with the new page. */
-		radix_tree_replace_slot(slot,
+		radix_tree_replace_slot(&mapping->page_tree, slot,
 				new_page + (index % HPAGE_PMD_NR));
 
 		slot = radix_tree_iter_next(&iter);
@@ -1536,7 +1536,8 @@ static void collapse_shmem(struct mm_struct *mm,
 			/* Unfreeze the page. */
 			list_del(&page->lru);
 			page_ref_unfreeze(page, 2);
-			radix_tree_replace_slot(slot, page);
+			radix_tree_replace_slot(&mapping->page_tree,
+						slot, page);
 			spin_unlock_irq(&mapping->tree_lock);
 			putback_lru_page(page);
 			unlock_page(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 99250aee1ac1..9b88e4e37d0a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 		SetPageDirty(newpage);
 	}
 
-	radix_tree_replace_slot(pslot, newpage);
+	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
 
 	/*
 	 * Drop cache reference from old page by unfreezing
@@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 	get_page(newpage);
 
-	radix_tree_replace_slot(pslot, newpage);
+	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
 
 	page_ref_unfreeze(page, expected_count - 1);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index a01cce450a26..6ae44571d4c7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space *mapping,
 		goto unlock;
 	if (*slot != entry)
 		goto unlock;
-	radix_tree_replace_slot(slot, NULL);
+	radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
 	mapping->nrexceptional--;
 	if (!node)
 		goto unlock;
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 05d7bc488971..d1be94667a30 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order)
 
 	slot = radix_tree_lookup_slot(&tree, index);
 	free(*slot);
-	radix_tree_replace_slot(slot, item2);
+	radix_tree_replace_slot(&tree, slot, item2);
 	for (i = min; i < max; i++) {
 		struct item *item = item_lookup(&tree, i);
 		assert(item != 0);
-- 
2.10.2

  parent reply	other threads:[~2016-11-17 19:30 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-17 19:11 [PATCH 0/9] mm: workingset: radix tree subtleties & single-page file refaults v3 Johannes Weiner
2016-11-17 19:11 ` [PATCH 1/9] mm: khugepaged: close use-after-free race during shmem collapsing Johannes Weiner
2016-11-17 23:19   ` Kirill A. Shutemov
2016-11-18  7:29   ` Jan Kara
2016-11-17 19:11 ` [PATCH 2/9] mm: khugepaged: fix radix tree node leak in shmem collapse error path Johannes Weiner
2016-11-17 23:21   ` Kirill A. Shutemov
2016-11-18  7:30   ` Jan Kara
2016-11-17 19:11 ` [PATCH 3/9] mm: workingset: turn shadow node shrinker bugs into warnings Johannes Weiner
2016-11-18  7:32   ` Jan Kara
2016-11-17 19:29 ` [PATCH 4/9] lib: radix-tree: native accounting of exceptional entries Johannes Weiner
2016-11-18  7:39   ` Jan Kara
2016-11-17 19:30 ` Johannes Weiner [this message]
2016-11-18  7:46   ` [PATCH 5/9] lib: radix-tree: check accounting of existing slot replacement users Jan Kara
2016-11-17 19:30 ` [PATCH 6/9] lib: radix-tree: add entry deletion support to __radix_tree_replace() Johannes Weiner
2016-11-18  8:13   ` Jan Kara
2016-11-17 19:31 ` [PATCH 7/9] lib: radix-tree: update callback for changing leaf nodes Johannes Weiner
2016-11-18  8:26   ` Jan Kara
2016-11-17 19:32 ` [PATCH 8/9] mm: workingset: move shadow entry tracking to radix tree exceptional tracking Johannes Weiner
2016-11-18  8:29   ` Jan Kara
2016-11-17 19:32 ` [PATCH 9/9] mm: workingset: restore refault tracking for single-page files Johannes Weiner
2016-11-18  8:30   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161117193021.GB23430@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=jack@suse.cz \
    --cc=kernel-team@fb.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).