Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Mel Gorman <mgorman@techsingularity.net>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>,
	Linux-FSDevel <linux-fsdevel@vger.kernel.org>,
	LKML <linux-kernel@vger.kernel.org>, Jan Kara <jack@suse.cz>,
	Andi Kleen <ak@linux.intel.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Dave Chinner <david@fromorbit.com>,
	Mel Gorman <mgorman@techsingularity.net>
Subject: [PATCH 2/8] mm, truncate: Do not check mapping for every page being truncated
Date: Wed, 18 Oct 2017 08:59:46 +0100
Message-ID: <20171018075952.10627-3-mgorman@techsingularity.net> (raw)
In-Reply-To: <20171018075952.10627-1-mgorman@techsingularity.net>

During truncation, the mapping has already been checked for shmem and dax
so it's known that workingset_update_node is required. This patch avoids
the checks on mapping for each page being truncated. In all other cases,
a lookup helper is used to determine if workingset_update_node() needs
to be called. The one danger is that the API is slightly harder to use as
calling workingset_update_node directly without checking for dax or shmem
mappings could lead to surprises. However, the API rarely needs to be used
and hopefully the comment is enough to give people the hint.

sparsetruncate (tiny)
                              4.14.0-rc4             4.14.0-rc4
                             oneirq-v1r1        pickhelper-v1r1
Min          Time      141.00 (   0.00%)      140.00 (   0.71%)
1st-qrtle    Time      142.00 (   0.00%)      141.00 (   0.70%)
2nd-qrtle    Time      142.00 (   0.00%)      142.00 (   0.00%)
3rd-qrtle    Time      143.00 (   0.00%)      143.00 (   0.00%)
Max-90%      Time      144.00 (   0.00%)      144.00 (   0.00%)
Max-95%      Time      147.00 (   0.00%)      145.00 (   1.36%)
Max-99%      Time      195.00 (   0.00%)      191.00 (   2.05%)
Max          Time      230.00 (   0.00%)      205.00 (  10.87%)
Amean        Time      144.37 (   0.00%)      143.82 (   0.38%)
Stddev       Time       10.44 (   0.00%)        9.00 (  13.74%)
Coeff        Time        7.23 (   0.00%)        6.26 (  13.41%)
Best99%Amean Time      143.72 (   0.00%)      143.34 (   0.26%)
Best95%Amean Time      142.37 (   0.00%)      142.00 (   0.26%)
Best90%Amean Time      142.19 (   0.00%)      141.85 (   0.24%)
Best75%Amean Time      141.92 (   0.00%)      141.58 (   0.24%)
Best50%Amean Time      141.69 (   0.00%)      141.31 (   0.27%)
Best25%Amean Time      141.38 (   0.00%)      140.97 (   0.29%)

As you'd expect, the gain is marginal but it can be detected. The differences
in bonnie are all within the noise which is not surprising given the impact
on the microbenchmark.

radix_tree_update_node_t is a callback for some radix operations that
optionally passes in a private field. The only user of the callback is
workingset_update_node and as it no longer requires a mapping, the private
field is removed.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 fs/dax.c                              |  2 +-
 include/linux/radix-tree.h            |  7 +++----
 include/linux/swap.h                  | 13 ++++++++++++-
 lib/idr.c                             |  2 +-
 lib/radix-tree.c                      | 30 +++++++++++++-----------------
 mm/filemap.c                          |  7 ++++---
 mm/shmem.c                            |  2 +-
 mm/truncate.c                         |  2 +-
 mm/workingset.c                       | 10 ++--------
 tools/testing/radix-tree/multiorder.c |  2 +-
 10 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index f001d8c72a06..3318ae9046e6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -565,7 +565,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 		ret = __radix_tree_lookup(page_tree, index, &node, &slot);
 		WARN_ON_ONCE(ret != entry);
 		__radix_tree_replace(page_tree, node, slot,
-				     new_entry, NULL, NULL);
+				     new_entry, NULL);
 		entry = new_entry;
 	}
 
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 567ebb5eaab0..0ca448c1cb42 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -301,18 +301,17 @@ void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
 					unsigned long index);
-typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
+typedef void (*radix_tree_update_node_t)(struct radix_tree_node *);
 void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
 			  void __rcu **slot, void *entry,
-			  radix_tree_update_node_t update_node, void *private);
+			  radix_tree_update_node_t update_node);
 void radix_tree_iter_replace(struct radix_tree_root *,
 		const struct radix_tree_iter *, void __rcu **slot, void *entry);
 void radix_tree_replace_slot(struct radix_tree_root *,
 			     void __rcu **slot, void *entry);
 void __radix_tree_delete_node(struct radix_tree_root *,
 			      struct radix_tree_node *,
-			      radix_tree_update_node_t update_node,
-			      void *private);
+			      radix_tree_update_node_t update_node);
 void radix_tree_iter_delete(struct radix_tree_root *,
 			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8a807292037f..257c48a525c8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -292,7 +292,18 @@ struct vma_swap_readahead {
 void *workingset_eviction(struct address_space *mapping, struct page *page);
 bool workingset_refault(void *shadow);
 void workingset_activation(struct page *page);
-void workingset_update_node(struct radix_tree_node *node, void *private);
+
+/* Do not use directly, use workingset_lookup_update */
+void workingset_update_node(struct radix_tree_node *node);
+
+/* Returns workingset_update_node() if the mapping has shadow entries. */
+#define workingset_lookup_update(mapping)				\
+({									\
+	radix_tree_update_node_t __helper = workingset_update_node;	\
+	if (dax_mapping(mapping) || shmem_mapping(mapping))		\
+		__helper = NULL;					\
+	__helper;							\
+})
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
diff --git a/lib/idr.c b/lib/idr.c
index edd9b2be1651..2593ce513a18 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -171,7 +171,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
 		return ERR_PTR(-ENOENT);
 
-	__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
+	__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
 
 	return entry;
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8b1feca1230a..c8d55565fafa 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -677,8 +677,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
  *	@root		radix tree root
  */
 static inline bool radix_tree_shrink(struct radix_tree_root *root,
-				     radix_tree_update_node_t update_node,
-				     void *private)
+				     radix_tree_update_node_t update_node)
 {
 	bool shrunk = false;
 
@@ -739,7 +738,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		if (!radix_tree_is_internal_node(child)) {
 			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
 			if (update_node)
-				update_node(node, private);
+				update_node(node);
 		}
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -752,7 +751,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 
 static bool delete_node(struct radix_tree_root *root,
 			struct radix_tree_node *node,
-			radix_tree_update_node_t update_node, void *private)
+			radix_tree_update_node_t update_node)
 {
 	bool deleted = false;
 
@@ -762,8 +761,8 @@ static bool delete_node(struct radix_tree_root *root,
 		if (node->count) {
 			if (node_to_entry(node) ==
 					rcu_dereference_raw(root->rnode))
-				deleted |= radix_tree_shrink(root, update_node,
-								private);
+				deleted |= radix_tree_shrink(root,
+								update_node);
 			return deleted;
 		}
 
@@ -1173,7 +1172,6 @@ static int calculate_count(struct radix_tree_root *root,
  * @slot:		pointer to slot in @node
  * @item:		new item to store in the slot.
  * @update_node:	callback for changing leaf nodes
- * @private:		private data to pass to @update_node
  *
  * For use with __radix_tree_lookup().  Caller must hold tree write locked
  * across slot lookup and replacement.
@@ -1181,7 +1179,7 @@ static int calculate_count(struct radix_tree_root *root,
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
 			  void __rcu **slot, void *item,
-			  radix_tree_update_node_t update_node, void *private)
+			  radix_tree_update_node_t update_node)
 {
 	void *old = rcu_dereference_raw(*slot);
 	int exceptional = !!radix_tree_exceptional_entry(item) -
@@ -1201,9 +1199,9 @@ void __radix_tree_replace(struct radix_tree_root *root,
 		return;
 
 	if (update_node)
-		update_node(node, private);
+		update_node(node);
 
-	delete_node(root, node, update_node, private);
+	delete_node(root, node, update_node);
 }
 
 /**
@@ -1225,7 +1223,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void __rcu **slot, void *item)
 {
-	__radix_tree_replace(root, NULL, slot, item, NULL, NULL);
+	__radix_tree_replace(root, NULL, slot, item, NULL);
 }
 EXPORT_SYMBOL(radix_tree_replace_slot);
 
@@ -1242,7 +1240,7 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
 				const struct radix_tree_iter *iter,
 				void __rcu **slot, void *item)
 {
-	__radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
+	__radix_tree_replace(root, iter->node, slot, item, NULL);
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
@@ -1972,7 +1970,6 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  *	@root:		radix tree root
  *	@node:		node containing @index
  *	@update_node:	callback for changing leaf nodes
- *	@private:	private data to pass to @update_node
  *
  *	After clearing the slot at @index in @node from radix tree
  *	rooted at @root, call this function to attempt freeing the
@@ -1980,10 +1977,9 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  */
 void __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node,
-			      radix_tree_update_node_t update_node,
-			      void *private)
+			      radix_tree_update_node_t update_node)
 {
-	delete_node(root, node, update_node, private);
+	delete_node(root, node, update_node);
 }
 
 static bool __radix_tree_delete(struct radix_tree_root *root,
@@ -2001,7 +1997,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 			node_tag_clear(root, node, tag, offset);
 
 	replace_slot(slot, NULL, node, -1, exceptional);
-	return node && delete_node(root, node, NULL, NULL);
+	return node && delete_node(root, node, NULL);
 }
 
 /**
diff --git a/mm/filemap.c b/mm/filemap.c
index dba68e1d9869..e59580feefd9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -35,6 +35,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
+#include <linux/shmem_fs.h>
 #include <linux/rmap.h>
 #include "internal.h"
 
@@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 			*shadowp = p;
 	}
 	__radix_tree_replace(&mapping->page_tree, node, slot, page,
-			     workingset_update_node, mapping);
+			     workingset_lookup_update(mapping));
 	mapping->nrpages++;
 	return 0;
 }
@@ -162,7 +163,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 
 		radix_tree_clear_tags(&mapping->page_tree, node, slot);
 		__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
-				     workingset_update_node, mapping);
+				workingset_lookup_update(mapping));
 	}
 
 	page->mapping = NULL;
@@ -360,7 +361,7 @@ page_cache_tree_delete_batch(struct address_space *mapping, int count,
 		}
 		radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
 		__radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
-				     workingset_update_node, mapping);
+				workingset_lookup_update(mapping));
 		total_pages++;
 	}
 	mapping->nrpages -= total_pages;
diff --git a/mm/shmem.c b/mm/shmem.c
index 07a1d22807be..a72f68aee6a4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -338,7 +338,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 	if (item != expected)
 		return -ENOENT;
 	__radix_tree_replace(&mapping->page_tree, node, pslot,
-			     replacement, NULL, NULL);
+			     replacement, NULL);
 	return 0;
 }
 
diff --git a/mm/truncate.c b/mm/truncate.c
index 3dfa2d5e642e..d578d542a6ee 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -42,7 +42,7 @@ static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
 	if (*slot != entry)
 		goto unlock;
 	__radix_tree_replace(&mapping->page_tree, node, slot, NULL,
-			     workingset_update_node, mapping);
+			     workingset_update_node);
 	mapping->nrexceptional--;
 unlock:
 	spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/workingset.c b/mm/workingset.c
index 7119cd745ace..0f7b4fb130e3 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -339,14 +339,8 @@ void workingset_activation(struct page *page)
 
 static struct list_lru shadow_nodes;
 
-void workingset_update_node(struct radix_tree_node *node, void *private)
+void workingset_update_node(struct radix_tree_node *node)
 {
-	struct address_space *mapping = private;
-
-	/* Only regular page cache has shadow entries */
-	if (dax_mapping(mapping) || shmem_mapping(mapping))
-		return;
-
 	/*
 	 * Track non-empty nodes that contain only shadow entries;
 	 * unlink those that contain pages or are being freed.
@@ -474,7 +468,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 		goto out_invalid;
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
 	__radix_tree_delete_node(&mapping->page_tree, node,
-				 workingset_update_node, mapping);
+				 workingset_lookup_update(mapping));
 
 out_invalid:
 	spin_unlock(&mapping->tree_lock);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 06c71178d07d..59245b3d587c 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -618,7 +618,7 @@ static void multiorder_account(void)
 	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
 	assert(node->count == node->exceptional * 2);
-	__radix_tree_replace(&tree, node, slot, NULL, NULL, NULL);
+	__radix_tree_replace(&tree, node, slot, NULL, NULL);
 	assert(node->exceptional == 0);
 
 	item_kill_tree(&tree);
-- 
2.14.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply index

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-18  7:59 [PATCH 0/8] Follow-up for speed up page cache truncation v2 Mel Gorman
2017-10-18  7:59 ` [PATCH 1/8] mm, page_alloc: Enable/disable IRQs once when freeing a list of pages Mel Gorman
2017-10-18  9:02   ` Vlastimil Babka
2017-10-18 10:15     ` Mel Gorman
2017-10-18  7:59 ` Mel Gorman [this message]
2017-10-19  8:11   ` [PATCH 2/8] mm, truncate: Do not check mapping for every page being truncated Jan Kara
2017-10-18  7:59 ` [PATCH 3/8] mm, truncate: Remove all exceptional entries from pagevec under one lock Mel Gorman
2017-10-18  7:59 ` [PATCH 4/8] mm: Only drain per-cpu pagevecs once per pagevec usage Mel Gorman
2017-10-19  9:12   ` Vlastimil Babka
2017-10-19  9:33     ` Mel Gorman
2017-10-19 13:21       ` Vlastimil Babka
2017-10-18  7:59 ` [PATCH 5/8] mm, pagevec: Remove cold parameter for pagevecs Mel Gorman
2017-10-19  9:24   ` Vlastimil Babka
2017-10-18  7:59 ` [PATCH 6/8] mm: Remove cold parameter for release_pages Mel Gorman
2017-10-19  9:26   ` Vlastimil Babka
2017-10-18  7:59 ` [PATCH 7/8] mm, Remove cold parameter from free_hot_cold_page* Mel Gorman
2017-10-19 13:12   ` Vlastimil Babka
2017-10-19 15:43     ` Mel Gorman
2017-10-18  7:59 ` [PATCH 8/8] mm: Remove __GFP_COLD Mel Gorman
2017-10-19 13:18   ` Vlastimil Babka
2017-10-19 13:42   ` Vlastimil Babka
2017-10-19 14:32     ` Mel Gorman
  -- strict thread matches above, loose matches on Subject: below --
2017-10-12  9:30 [PATCH 0/8] Follow-up for speed up page cache truncation Mel Gorman
2017-10-12  9:30 ` [PATCH 2/8] mm, truncate: Do not check mapping for every page being truncated Mel Gorman
2017-10-12 12:15   ` Jan Kara
2017-10-12 12:41     ` Mel Gorman
2017-10-12 19:11   ` Johannes Weiner

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171018075952.10627-3-mgorman@techsingularity.net \
    --to=mgorman@techsingularity.net \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org linux-fsdevel@archiver.kernel.org
	public-inbox-index linux-fsdevel


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox