linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Jan Kara <jack@suse.cz>,
	"Kirill A. Shutemov" <kirill@shutemov.name>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	kernel-team@fb.com
Subject: [PATCH 5/6] mm: workingset: switch shadow entry tracking to radix tree exceptional counting
Date: Mon,  7 Nov 2016 14:07:40 -0500	[thread overview]
Message-ID: <20161107190741.3619-6-hannes@cmpxchg.org> (raw)
In-Reply-To: <20161107190741.3619-1-hannes@cmpxchg.org>

Currently, we track the shadow entries in the page cache in the upper
bits of the radix_tree_node->count, behind the back of the radix tree
implementation. Because the radix tree code has no awareness of them,
we rely on random subtleties throughout the implementation (such as
the node->count != 1 check in the shrinking code which is meant to
exclude multi-entry nodes, but also happens to skip nodes with only
one shadow entry since it's accounted in the upper bits). This is
error prone and has, in fact, caused the bug fixed in d3798ae8c6f3
("mm: filemap: don't plant shadow entries without radix tree node").

To remove these subtleties, this patch moves shadow entry tracking
from the upper bits of node->count to the existing counter for
exceptional entries. node->count goes back to being a simple counter
of valid entries in the tree node and can be shrunk to a single byte.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/radix-tree.h |  6 +-----
 include/linux/swap.h       | 32 --------------------------------
 mm/filemap.c               | 30 +++++++++++-------------------
 mm/truncate.c              |  4 +---
 mm/workingset.c            | 11 +++++++----
 5 files changed, 20 insertions(+), 63 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 2d1b9b8be983..56619703ae7a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -80,14 +80,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
 
-/* Internally used bits of node->count */
-#define RADIX_TREE_COUNT_SHIFT	(RADIX_TREE_MAP_SHIFT + 1)
-#define RADIX_TREE_COUNT_MASK	((1UL << RADIX_TREE_COUNT_SHIFT) - 1)
-
 struct radix_tree_node {
 	unsigned char	shift;		/* Bits remaining in each slot */
 	unsigned char	offset;		/* Slot offset in parent */
-	unsigned int	count;		/* Total entry count */
+	unsigned char	count;		/* Total entry count */
 	unsigned char	exceptional;	/* Exceptional entry count */
 	union {
 		struct {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a56523cefb9b..660a11de0186 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -248,38 +248,6 @@ bool workingset_refault(void *shadow);
 void workingset_activation(struct page *page);
 extern struct list_lru workingset_shadow_nodes;
 
-static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
-{
-	return node->count & RADIX_TREE_COUNT_MASK;
-}
-
-static inline void workingset_node_pages_inc(struct radix_tree_node *node)
-{
-	node->count++;
-}
-
-static inline void workingset_node_pages_dec(struct radix_tree_node *node)
-{
-	VM_WARN_ON_ONCE(!workingset_node_pages(node));
-	node->count--;
-}
-
-static inline unsigned int workingset_node_shadows(struct radix_tree_node *node)
-{
-	return node->count >> RADIX_TREE_COUNT_SHIFT;
-}
-
-static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
-{
-	node->count += 1U << RADIX_TREE_COUNT_SHIFT;
-}
-
-static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
-{
-	VM_WARN_ON_ONCE(!workingset_node_shadows(node));
-	node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
-}
-
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
diff --git a/mm/filemap.c b/mm/filemap.c
index eb463156f29a..438f0b54f8fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -132,25 +132,19 @@ static int page_cache_tree_insert(struct address_space *mapping,
 		if (!dax_mapping(mapping)) {
 			if (shadowp)
 				*shadowp = p;
-			if (node)
-				workingset_node_shadows_dec(node);
 		} else {
 			/* DAX can replace empty locked entry with a hole */
 			WARN_ON_ONCE(p !=
 				(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
 					 RADIX_DAX_ENTRY_LOCK));
-			/* DAX accounts exceptional entries as normal pages */
-			if (node)
-				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
 			dax_wake_mapping_entry_waiter(mapping, page->index,
 						      false);
 		}
 	}
-	radix_tree_replace_slot(&mapping->page_tree, slot, page);
+	__radix_tree_replace(&mapping->page_tree, node, slot, page);
 	mapping->nrpages++;
 	if (node) {
-		workingset_node_pages_inc(node);
 		/*
 		 * Don't track node that contains actual pages.
 		 *
@@ -193,29 +187,27 @@ static void page_cache_tree_delete(struct address_space *mapping,
 			shadow = NULL;
 		}
 
-		radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
+		__radix_tree_replace(&mapping->page_tree, node, slot, shadow);
 
 		if (!node)
 			break;
 
-		workingset_node_pages_dec(node);
-		if (shadow)
-			workingset_node_shadows_inc(node);
-		else
-			if (__radix_tree_delete_node(&mapping->page_tree, node))
-				continue;
+		if (!shadow &&
+		    __radix_tree_delete_node(&mapping->page_tree, node))
+			continue;
 
 		/*
-		 * Track node that only contains shadow entries. DAX mappings
-		 * contain no shadow entries and may contain other exceptional
-		 * entries so skip those.
+		 * Track node that only contains shadow entries. DAX and SHMEM
+		 * mappings contain no shadow entries and may contain other
+		 * exceptional entries so skip those.
 		 *
 		 * Avoid acquiring the list_lru lock if already tracked.
 		 * The list_empty() test is safe as node->private_list is
 		 * protected by mapping->tree_lock.
 		 */
-		if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
-				list_empty(&node->private_list)) {
+		if (!dax_mapping(mapping) && !shmem_mapping(mapping) &&
+		    node->count == node->exceptional &&
+		    list_empty(&node->private_list)) {
 			node->private_data = mapping;
 			list_lru_add(&workingset_shadow_nodes,
 					&node->private_list);
diff --git a/mm/truncate.c b/mm/truncate.c
index 6ae44571d4c7..d3ce5f261f47 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -53,7 +53,6 @@ static void clear_exceptional_entry(struct address_space *mapping,
 	mapping->nrexceptional--;
 	if (!node)
 		goto unlock;
-	workingset_node_shadows_dec(node);
 	/*
 	 * Don't track node without shadow entries.
 	 *
@@ -61,8 +60,7 @@ static void clear_exceptional_entry(struct address_space *mapping,
 	 * The list_empty() test is safe as node->private_list is
 	 * protected by mapping->tree_lock.
 	 */
-	if (!workingset_node_shadows(node) &&
-	    !list_empty(&node->private_list))
+	if (!node->exceptional && !list_empty(&node->private_list))
 		list_lru_del(&workingset_shadow_nodes,
 				&node->private_list);
 	__radix_tree_delete_node(&mapping->page_tree, node);
diff --git a/mm/workingset.c b/mm/workingset.c
index 3cfc61d84a52..ca92d0f70d9a 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -418,22 +418,25 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * no pages, so we expect to be able to remove them all and
 	 * delete and free the empty node afterwards.
 	 */
-	if (WARN_ON_ONCE(!workingset_node_shadows(node)))
+	if (WARN_ON_ONCE(!node->exceptional))
 		goto out_invalid;
-	if (WARN_ON_ONCE(workingset_node_pages(node)))
+	if (WARN_ON_ONCE(node->count != node->exceptional))
 		goto out_invalid;
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
 			if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
 				goto out_invalid;
+			if (WARN_ON_ONCE(!node->exceptional))
+				goto out_invalid;
 			if (WARN_ON_ONCE(!mapping->nrexceptional))
 				goto out_invalid;
 			node->slots[i] = NULL;
-			workingset_node_shadows_dec(node);
+			node->exceptional--;
+			node->count--;
 			mapping->nrexceptional--;
 		}
 	}
-	if (WARN_ON_ONCE(workingset_node_shadows(node)))
+	if (WARN_ON_ONCE(node->exceptional))
 		goto out_invalid;
 	inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
 	__radix_tree_delete_node(&mapping->page_tree, node);
-- 
2.10.1

  parent reply	other threads:[~2016-11-07 19:08 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-07 19:07 [PATCH 0/6] mm: workingset: radix tree subtleties & single-page file refaults Johannes Weiner
2016-11-07 19:07 ` [PATCH 1/6] mm: khugepaged: fix radix tree node leak in shmem collapse error path Johannes Weiner
2016-11-08  9:53   ` Jan Kara
2016-11-08 16:12     ` Johannes Weiner
2016-11-09  7:41       ` Jan Kara
2016-11-11 10:59       ` Kirill A. Shutemov
2016-11-11 12:22         ` Jan Kara
2016-11-11 16:37           ` Kirill A. Shutemov
2016-11-14  8:07             ` Jan Kara
2016-11-14 14:29               ` Kirill A. Shutemov
2016-11-14 15:52                 ` Johannes Weiner
2016-11-14 16:48                   ` Johannes Weiner
2016-11-14 19:40                     ` Kirill A. Shutemov
2016-11-15 14:00                       ` Johannes Weiner
2016-11-07 19:07 ` [PATCH 2/6] mm: workingset: turn shadow node shrinker bugs into warnings Johannes Weiner
2016-11-08  9:57   ` Jan Kara
2016-11-07 19:07 ` [PATCH 3/6] lib: radix-tree: native accounting of exceptional entries Johannes Weiner
2016-11-08 10:08   ` Jan Kara
2016-11-07 19:07 ` [PATCH 4/6] lib: radix-tree: check accounting of existing slot replacement users Johannes Weiner
2016-11-08 10:12   ` Jan Kara
2016-11-07 19:07 ` Johannes Weiner [this message]
2016-11-08 10:27   ` [PATCH 5/6] mm: workingset: switch shadow entry tracking to radix tree exceptional counting Jan Kara
2016-11-08 19:30     ` Johannes Weiner
2016-11-07 19:07 ` [PATCH 6/6] mm: workingset: restore refault tracking for single-page files Johannes Weiner
2016-11-08 10:31   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161107190741.3619-6-hannes@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=jack@suse.cz \
    --cc=kernel-team@fb.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).