All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: Refactor find_get_pages() & friends
@ 2016-03-09  1:16 ` Kent Overstreet
  0 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-03-09  1:16 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-kernel, linux-mm, Andrew Morton

Collapse redundant implementations of various gang pagecache lookup - this is
also prep work for pagecache iterator work

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/pagemap.h    | 142 +++++++++++++++++--
 include/linux/radix-tree.h |  49 ++-----
 mm/filemap.c               | 331 +++++----------------------------------------
 3 files changed, 174 insertions(+), 348 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 92395a0a7d..12cb653423 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -352,18 +352,136 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-			  unsigned int nr_entries, struct page **entries,
-			  pgoff_t *indices);
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
-			       unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages);
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices);
+
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags);
+
+/**
+ * find_get_entries - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page cache index
+ * @nr_entries:	The maximum number of entries
+ * @entries:	Where the resulting entries are placed
+ * @indices:	The cache indices corresponding to the entries in @entries
+ *
+ * find_get_entries() will search for and return a group of up to
+ * @nr_entries entries in the mapping.  The entries are placed at
+ * @entries.  find_get_entries() takes a reference against any actual
+ * pages it returns.
+ *
+ * The search returns a group of mapping-contiguous page cache entries
+ * with ascending indexes.  There may be holes in the indices due to
+ * not-present pages.
+ *
+ * Any shadow entries of evicted pages, or swap entries from
+ * shmem/tmpfs, are included in the returned array.
+ *
+ * find_get_entries() returns the number of pages and shadow entries
+ * which were found.
+ */
+static inline unsigned find_get_entries(struct address_space *mapping,
+			pgoff_t start, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL);
+}
+
+/**
+ * find_get_pages - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages() will search for and return a group of up to
+ * @nr_pages pages in the mapping.  The pages are placed at @pages.
+ * find_get_pages() takes a reference against the returned pages.
+ *
+ * The search returns a group of mapping-contiguous pages with ascending
+ * indexes.  There may be holes in the indices due to not-present pages.
+ *
+ * find_get_pages() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL, 0);
+}
+
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages_contig(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL,
+				RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * find_get_pages_tag - find and return pages that match @tag
+ * @mapping:	the address_space to search
+ * @index:	the starting page index
+ * @tag:	the tag index
+ * @nr_pages:	the maximum number of pages
+ * @pages:	where the resulting pages are placed
+ *
+ * Like find_get_pages, except we only return pages which are tagged with
+ * @tag.   We update @index to index the next page for the traversal.
+ */
+static inline unsigned find_get_pages_tag(struct address_space *mapping,
+			pgoff_t *index, int tag,
+			unsigned nr_pages, struct page **pages)
+{
+	unsigned ret;
+
+	ret = __find_get_pages(mapping, *index, ULONG_MAX,
+			       nr_pages, pages, NULL,
+			       RADIX_TREE_ITER_TAGGED|tag);
+	if (ret)
+		*index = pages[ret - 1]->index + 1;
+
+	return ret;
+}
+
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:	the address_space to search
+ * @start:	the starting page cache index
+ * @tag:	the tag index
+ * @nr_entries:	the maximum number of entries
+ * @entries:	where the resulting entries are placed
+ * @indices:	the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+static inline unsigned find_get_entries_tag(struct address_space *mapping,
+			pgoff_t start, int tag, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL|
+				RADIX_TREE_ITER_TAGGED|tag);
+}
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 			pgoff_t index, unsigned flags);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7c88ad156a..c0c4dbf615 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -338,6 +338,8 @@ struct radix_tree_iter {
 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
 #define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+#define RADIX_TREE_ITER_EXCEPTIONAL	0x0400	/* include exceptional entries */
+						/* used by __find_get_pages() */
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -435,33 +437,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	return NULL;
 }
 
-/**
- * radix_tree_for_each_chunk - iterate over chunks
- *
- * @slot:	the void** variable for pointer to chunk first slot
- * @root:	the struct radix_tree_root pointer
- * @iter:	the struct radix_tree_iter pointer
- * @start:	iteration starting index
- * @flags:	RADIX_TREE_ITER_* and tag index
- *
- * Locks can be released and reacquired between iterations.
- */
-#define radix_tree_for_each_chunk(slot, root, iter, start, flags)	\
+#define __radix_tree_for_each_slot(slot, root, iter, start, flags)	\
 	for (slot = radix_tree_iter_init(iter, start) ;			\
-	      (slot = radix_tree_next_chunk(root, iter, flags)) ;)
-
-/**
- * radix_tree_for_each_chunk_slot - iterate over slots in one chunk
- *
- * @slot:	the void** variable, at the beginning points to chunk first slot
- * @iter:	the struct radix_tree_iter pointer
- * @flags:	RADIX_TREE_ITER_*, should be constant
- *
- * This macro is designed to be nested inside radix_tree_for_each_chunk().
- * @slot points to the radix tree slot, @iter->index contains its index.
- */
-#define radix_tree_for_each_chunk_slot(slot, iter, flags)		\
-	for (; slot ; slot = radix_tree_next_slot(slot, iter, flags))
+	     slot || (slot = radix_tree_next_chunk(root, iter, flags));	\
+	     slot = radix_tree_next_slot(slot, iter, flags))
 
 /**
  * radix_tree_for_each_slot - iterate over non-empty slots
@@ -474,9 +453,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_slot(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;	\
-	     slot = radix_tree_next_slot(slot, iter, 0))
+	__radix_tree_for_each_slot(slot, root, iter, start, 0)
 
 /**
  * radix_tree_for_each_contig - iterate over contiguous slots
@@ -489,11 +466,8 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_contig(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-				RADIX_TREE_ITER_CONTIG)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_CONTIG))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_CONTIG)
 
 /**
  * radix_tree_for_each_tagged - iterate over tagged slots
@@ -507,10 +481,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_tagged(slot, root, iter, start, tag)	\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-			      RADIX_TREE_ITER_TAGGED | tag)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_TAGGED))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_TAGGED|tag)
 
 #endif /* _LINUX_RADIX_TREE_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index bc943867d6..09dc7b493c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1211,109 +1211,48 @@ no_page:
 EXPORT_SYMBOL(pagecache_get_page);
 
 /**
- * find_get_entries - gang pagecache lookup
+ * __find_get_pages - gang pagecache lookup, internal mechanism
  * @mapping:	The address_space to search
  * @start:	The starting page cache index
+ * @end:	Page cache index to stop at (inclusive)
  * @nr_entries:	The maximum number of entries
  * @entries:	Where the resulting entries are placed
- * @indices:	The cache indices corresponding to the entries in @entries
+ * @indices:	If non NULL, indices of corresponding entries placed here
+ * @flags:	radix tree iter flags and tag (if supplied)
  *
- * find_get_entries() will search for and return a group of up to
- * @nr_entries entries in the mapping.  The entries are placed at
- * @entries.  find_get_entries() takes a reference against any actual
- * pages it returns.
+ * Don't use directly - see wrappers in pagemap.h
  *
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes.  There may be holes in the indices due to
- * not-present pages.
+ * Possible values for flags (may be used in combination):
  *
- * Any shadow entries of evicted pages, or swap entries from
- * shmem/tmpfs, are included in the returned array.
- *
- * find_get_entries() returns the number of pages and shadow entries
- * which were found.
+ * 0:				find_get_pages()
+ * RADIX_TREE_ITER_TAGGED|tag:	find_get_pages_tag()
+ * RADIX_TREE_ITER_CONTIG:	find_get_pages_contig()
+ * RADIX_TREE_ITER_EXCEPTIONAL:	find_get_entries()
  */
-unsigned find_get_entries(struct address_space *mapping,
-			  pgoff_t start, unsigned int nr_entries,
-			  struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page))
-				goto restart;
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-export:
-		indices[ret] = iter.index;
-		entries[ret] = page;
-		if (++ret == nr_entries)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages - gang pagecache lookup
- * @mapping:	The address_space to search
- * @start:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages() will search for and return a group of up to
- * @nr_pages pages in the mapping.  The pages are placed at @pages.
- * find_get_pages() takes a reference against the returned pages.
- *
- * The search returns a group of mapping-contiguous pages with ascending
- * indexes.  There may be holes in the indices due to not-present pages.
- *
- * find_get_pages() returns the number of pages which were found.
- */
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			    unsigned int nr_pages, struct page **pages)
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags)
 {
 	struct radix_tree_iter iter;
 	void **slot;
 	unsigned ret = 0;
 
-	if (unlikely(!nr_pages))
+	if (unlikely(!nr_entries || start > end))
 		return 0;
 
 	rcu_read_lock();
 restart:
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	__radix_tree_for_each_slot(slot, &mapping->page_tree,
+				   &iter, start, flags) {
 		struct page *page;
+
+		if (iter.index > end)
+			break;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
-			continue;
+			goto no_entry;
 
 		if (radix_tree_exception(page)) {
 			if (radix_tree_deref_retry(page)) {
@@ -1322,82 +1261,18 @@ repeat:
 				 * when entry at index 0 moves out of or back
 				 * to root: none yet gotten, safe to restart.
 				 */
-				WARN_ON(iter.index);
 				goto restart;
 			}
+
 			/*
 			 * A shadow entry of a recently evicted page,
 			 * or a swap entry from shmem/tmpfs.  Skip
 			 * over it.
 			 */
-			continue;
-		}
+			if (flags & RADIX_TREE_ITER_EXCEPTIONAL)
+				goto export;
 
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages_contig - gang contiguous pagecache lookup
- * @mapping:	The address_space to search
- * @index:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
- *
- * find_get_pages_contig() returns the number of pages which were found.
- */
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
-			       unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned int ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		/* The hole, there no reason to continue */
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-			/*
-			 * A shadow entry of a recently evicted page,
-			 * or a swap entry from shmem/tmpfs.  Stop
-			 * looking for contiguous pages.
-			 */
-			break;
+			goto no_entry;
 		}
 
 		if (!page_cache_get_speculative(page))
@@ -1414,164 +1289,26 @@ repeat:
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page->index != iter.index) {
+		if ((flags & RADIX_TREE_ITER_CONTIG) &&
+		    (page->mapping == NULL || page->index != iter.index)) {
 			page_cache_release(page);
 			break;
 		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_contig);
-
-/**
- * find_get_pages_tag - find and return pages that match @tag
- * @mapping:	the address_space to search
- * @index:	the starting page index
- * @tag:	the tag index
- * @nr_pages:	the maximum number of pages
- * @pages:	where the resulting pages are placed
- *
- * Like find_get_pages, except we only return pages which are tagged with
- * @tag.   We update @index to index the next page for the traversal.
- */
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-			/*
-			 * A shadow entry of a recently evicted page.
-			 *
-			 * Those entries should never be tagged, but
-			 * this tree walk is lockless and the tags are
-			 * looked up in bulk, one radix tree node at a
-			 * time, so there is a sizable window for page
-			 * reclaim to evict a page we saw tagged.
-			 *
-			 * Skip over it.
-			 */
-			continue;
-		}
-
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-
-	if (ret)
-		*index = pages[ret - 1]->index + 1;
-
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_tag);
-
-/**
- * find_get_entries_tag - find and return entries that match @tag
- * @mapping:	the address_space to search
- * @start:	the starting page cache index
- * @tag:	the tag index
- * @nr_entries:	the maximum number of entries
- * @entries:	where the resulting entries are placed
- * @indices:	the cache indices corresponding to the entries in @entries
- *
- * Like find_get_entries, except we only return entries which are tagged with
- * @tag.
- */
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, start, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
 export:
-		indices[ret] = iter.index;
+		if (indices)
+			indices[ret] = iter.index;
 		entries[ret] = page;
 		if (++ret == nr_entries)
 			break;
+		continue;
+no_entry:
+		if (flags & RADIX_TREE_ITER_CONTIG)
+			break;
 	}
 	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(find_get_entries_tag);
+EXPORT_SYMBOL(__find_get_pages);
 
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH] mm: Refactor find_get_pages() & friends
@ 2016-03-09  1:16 ` Kent Overstreet
  0 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-03-09  1:16 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-kernel, linux-mm, Andrew Morton

Collapse redundant implementations of various gang pagecache lookup - this is
also prep work for pagecache iterator work

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/pagemap.h    | 142 +++++++++++++++++--
 include/linux/radix-tree.h |  49 ++-----
 mm/filemap.c               | 331 +++++----------------------------------------
 3 files changed, 174 insertions(+), 348 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 92395a0a7d..12cb653423 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -352,18 +352,136 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-			  unsigned int nr_entries, struct page **entries,
-			  pgoff_t *indices);
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
-			       unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages);
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices);
+
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags);
+
+/**
+ * find_get_entries - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page cache index
+ * @nr_entries:	The maximum number of entries
+ * @entries:	Where the resulting entries are placed
+ * @indices:	The cache indices corresponding to the entries in @entries
+ *
+ * find_get_entries() will search for and return a group of up to
+ * @nr_entries entries in the mapping.  The entries are placed at
+ * @entries.  find_get_entries() takes a reference against any actual
+ * pages it returns.
+ *
+ * The search returns a group of mapping-contiguous page cache entries
+ * with ascending indexes.  There may be holes in the indices due to
+ * not-present pages.
+ *
+ * Any shadow entries of evicted pages, or swap entries from
+ * shmem/tmpfs, are included in the returned array.
+ *
+ * find_get_entries() returns the number of pages and shadow entries
+ * which were found.
+ */
+static inline unsigned find_get_entries(struct address_space *mapping,
+			pgoff_t start, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL);
+}
+
+/**
+ * find_get_pages - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages() will search for and return a group of up to
+ * @nr_pages pages in the mapping.  The pages are placed at @pages.
+ * find_get_pages() takes a reference against the returned pages.
+ *
+ * The search returns a group of mapping-contiguous pages with ascending
+ * indexes.  There may be holes in the indices due to not-present pages.
+ *
+ * find_get_pages() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL, 0);
+}
+
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages_contig(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL,
+				RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * find_get_pages_tag - find and return pages that match @tag
+ * @mapping:	the address_space to search
+ * @index:	the starting page index
+ * @tag:	the tag index
+ * @nr_pages:	the maximum number of pages
+ * @pages:	where the resulting pages are placed
+ *
+ * Like find_get_pages, except we only return pages which are tagged with
+ * @tag.   We update @index to index the next page for the traversal.
+ */
+static inline unsigned find_get_pages_tag(struct address_space *mapping,
+			pgoff_t *index, int tag,
+			unsigned nr_pages, struct page **pages)
+{
+	unsigned ret;
+
+	ret = __find_get_pages(mapping, *index, ULONG_MAX,
+			       nr_pages, pages, NULL,
+			       RADIX_TREE_ITER_TAGGED|tag);
+	if (ret)
+		*index = pages[ret - 1]->index + 1;
+
+	return ret;
+}
+
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:	the address_space to search
+ * @start:	the starting page cache index
+ * @tag:	the tag index
+ * @nr_entries:	the maximum number of entries
+ * @entries:	where the resulting entries are placed
+ * @indices:	the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+static inline unsigned find_get_entries_tag(struct address_space *mapping,
+			pgoff_t start, int tag, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL|
+				RADIX_TREE_ITER_TAGGED|tag);
+}
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 			pgoff_t index, unsigned flags);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7c88ad156a..c0c4dbf615 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -338,6 +338,8 @@ struct radix_tree_iter {
 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
 #define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+#define RADIX_TREE_ITER_EXCEPTIONAL	0x0400	/* include exceptional entries */
+						/* used by __find_get_pages() */
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -435,33 +437,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	return NULL;
 }
 
-/**
- * radix_tree_for_each_chunk - iterate over chunks
- *
- * @slot:	the void** variable for pointer to chunk first slot
- * @root:	the struct radix_tree_root pointer
- * @iter:	the struct radix_tree_iter pointer
- * @start:	iteration starting index
- * @flags:	RADIX_TREE_ITER_* and tag index
- *
- * Locks can be released and reacquired between iterations.
- */
-#define radix_tree_for_each_chunk(slot, root, iter, start, flags)	\
+#define __radix_tree_for_each_slot(slot, root, iter, start, flags)	\
 	for (slot = radix_tree_iter_init(iter, start) ;			\
-	      (slot = radix_tree_next_chunk(root, iter, flags)) ;)
-
-/**
- * radix_tree_for_each_chunk_slot - iterate over slots in one chunk
- *
- * @slot:	the void** variable, at the beginning points to chunk first slot
- * @iter:	the struct radix_tree_iter pointer
- * @flags:	RADIX_TREE_ITER_*, should be constant
- *
- * This macro is designed to be nested inside radix_tree_for_each_chunk().
- * @slot points to the radix tree slot, @iter->index contains its index.
- */
-#define radix_tree_for_each_chunk_slot(slot, iter, flags)		\
-	for (; slot ; slot = radix_tree_next_slot(slot, iter, flags))
+	     slot || (slot = radix_tree_next_chunk(root, iter, flags));	\
+	     slot = radix_tree_next_slot(slot, iter, flags))
 
 /**
  * radix_tree_for_each_slot - iterate over non-empty slots
@@ -474,9 +453,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_slot(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;	\
-	     slot = radix_tree_next_slot(slot, iter, 0))
+	__radix_tree_for_each_slot(slot, root, iter, start, 0)
 
 /**
  * radix_tree_for_each_contig - iterate over contiguous slots
@@ -489,11 +466,8 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_contig(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-				RADIX_TREE_ITER_CONTIG)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_CONTIG))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_CONTIG)
 
 /**
  * radix_tree_for_each_tagged - iterate over tagged slots
@@ -507,10 +481,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_tagged(slot, root, iter, start, tag)	\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-			      RADIX_TREE_ITER_TAGGED | tag)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_TAGGED))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_TAGGED|tag)
 
 #endif /* _LINUX_RADIX_TREE_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index bc943867d6..09dc7b493c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1211,109 +1211,48 @@ no_page:
 EXPORT_SYMBOL(pagecache_get_page);
 
 /**
- * find_get_entries - gang pagecache lookup
+ * __find_get_pages - gang pagecache lookup, internal mechanism
  * @mapping:	The address_space to search
  * @start:	The starting page cache index
+ * @end:	Page cache index to stop at (inclusive)
  * @nr_entries:	The maximum number of entries
  * @entries:	Where the resulting entries are placed
- * @indices:	The cache indices corresponding to the entries in @entries
+ * @indices:	If non NULL, indices of corresponding entries placed here
+ * @flags:	radix tree iter flags and tag (if supplied)
  *
- * find_get_entries() will search for and return a group of up to
- * @nr_entries entries in the mapping.  The entries are placed at
- * @entries.  find_get_entries() takes a reference against any actual
- * pages it returns.
+ * Don't use directly - see wrappers in pagemap.h
  *
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes.  There may be holes in the indices due to
- * not-present pages.
+ * Possible values for flags (may be used in combination):
  *
- * Any shadow entries of evicted pages, or swap entries from
- * shmem/tmpfs, are included in the returned array.
- *
- * find_get_entries() returns the number of pages and shadow entries
- * which were found.
+ * 0:				find_get_pages()
+ * RADIX_TREE_ITER_TAGGED|tag:	find_get_pages_tag()
+ * RADIX_TREE_ITER_CONTIG:	find_get_pages_contig()
+ * RADIX_TREE_ITER_EXCEPTIONAL:	find_get_entries()
  */
-unsigned find_get_entries(struct address_space *mapping,
-			  pgoff_t start, unsigned int nr_entries,
-			  struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page))
-				goto restart;
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-export:
-		indices[ret] = iter.index;
-		entries[ret] = page;
-		if (++ret == nr_entries)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages - gang pagecache lookup
- * @mapping:	The address_space to search
- * @start:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages() will search for and return a group of up to
- * @nr_pages pages in the mapping.  The pages are placed at @pages.
- * find_get_pages() takes a reference against the returned pages.
- *
- * The search returns a group of mapping-contiguous pages with ascending
- * indexes.  There may be holes in the indices due to not-present pages.
- *
- * find_get_pages() returns the number of pages which were found.
- */
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			    unsigned int nr_pages, struct page **pages)
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags)
 {
 	struct radix_tree_iter iter;
 	void **slot;
 	unsigned ret = 0;
 
-	if (unlikely(!nr_pages))
+	if (unlikely(!nr_entries || start > end))
 		return 0;
 
 	rcu_read_lock();
 restart:
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	__radix_tree_for_each_slot(slot, &mapping->page_tree,
+				   &iter, start, flags) {
 		struct page *page;
+
+		if (iter.index > end)
+			break;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
-			continue;
+			goto no_entry;
 
 		if (radix_tree_exception(page)) {
 			if (radix_tree_deref_retry(page)) {
@@ -1322,82 +1261,18 @@ repeat:
 				 * when entry at index 0 moves out of or back
 				 * to root: none yet gotten, safe to restart.
 				 */
-				WARN_ON(iter.index);
 				goto restart;
 			}
+
 			/*
 			 * A shadow entry of a recently evicted page,
 			 * or a swap entry from shmem/tmpfs.  Skip
 			 * over it.
 			 */
-			continue;
-		}
+			if (flags & RADIX_TREE_ITER_EXCEPTIONAL)
+				goto export;
 
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages_contig - gang contiguous pagecache lookup
- * @mapping:	The address_space to search
- * @index:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
- *
- * find_get_pages_contig() returns the number of pages which were found.
- */
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
-			       unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned int ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		/* The hole, there no reason to continue */
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-			/*
-			 * A shadow entry of a recently evicted page,
-			 * or a swap entry from shmem/tmpfs.  Stop
-			 * looking for contiguous pages.
-			 */
-			break;
+			goto no_entry;
 		}
 
 		if (!page_cache_get_speculative(page))
@@ -1414,164 +1289,26 @@ repeat:
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page->index != iter.index) {
+		if ((flags & RADIX_TREE_ITER_CONTIG) &&
+		    (page->mapping == NULL || page->index != iter.index)) {
 			page_cache_release(page);
 			break;
 		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_contig);
-
-/**
- * find_get_pages_tag - find and return pages that match @tag
- * @mapping:	the address_space to search
- * @index:	the starting page index
- * @tag:	the tag index
- * @nr_pages:	the maximum number of pages
- * @pages:	where the resulting pages are placed
- *
- * Like find_get_pages, except we only return pages which are tagged with
- * @tag.   We update @index to index the next page for the traversal.
- */
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-			/*
-			 * A shadow entry of a recently evicted page.
-			 *
-			 * Those entries should never be tagged, but
-			 * this tree walk is lockless and the tags are
-			 * looked up in bulk, one radix tree node at a
-			 * time, so there is a sizable window for page
-			 * reclaim to evict a page we saw tagged.
-			 *
-			 * Skip over it.
-			 */
-			continue;
-		}
-
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-
-	if (ret)
-		*index = pages[ret - 1]->index + 1;
-
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_tag);
-
-/**
- * find_get_entries_tag - find and return entries that match @tag
- * @mapping:	the address_space to search
- * @start:	the starting page cache index
- * @tag:	the tag index
- * @nr_entries:	the maximum number of entries
- * @entries:	where the resulting entries are placed
- * @indices:	the cache indices corresponding to the entries in @entries
- *
- * Like find_get_entries, except we only return entries which are tagged with
- * @tag.
- */
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-restart:
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, start, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
 export:
-		indices[ret] = iter.index;
+		if (indices)
+			indices[ret] = iter.index;
 		entries[ret] = page;
 		if (++ret == nr_entries)
 			break;
+		continue;
+no_entry:
+		if (flags & RADIX_TREE_ITER_CONTIG)
+			break;
 	}
 	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(find_get_entries_tag);
+EXPORT_SYMBOL(__find_get_pages);
 
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
-- 
2.7.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] mm: Refactor find_get_pages() & friends
  2016-03-09  1:16 ` Kent Overstreet
@ 2016-03-29 21:29   ` Andrew Morton
  -1 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2016-03-29 21:29 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Al Viro, linux-kernel, linux-mm

On Tue, 8 Mar 2016 16:16:43 -0900 Kent Overstreet <kent.overstreet@gmail.com> wrote:

> Collapse redundant implementations of various gang pagecache lookup - this is
> also prep work for pagecache iterator work

Patch looks nice.  Unfortunately filemap.c has changed rather a lot
since 4.5.  Can you please redo the patch some time?

And a more informative changelog would be appropriate, although it's
all pretty obvious.  I don't know what "pagecache iterator work" is
and I doubt if many other readers do either, so some illumination there
wouldn't hurt.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] mm: Refactor find_get_pages() & friends
@ 2016-03-29 21:29   ` Andrew Morton
  0 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2016-03-29 21:29 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Al Viro, linux-kernel, linux-mm

On Tue, 8 Mar 2016 16:16:43 -0900 Kent Overstreet <kent.overstreet@gmail.com> wrote:

> Collapse redundant implementations of various gang pagecache lookup - this is
> also prep work for pagecache iterator work

Patch looks nice.  Unfortunately filemap.c has changed rather a lot
since 4.5.  Can you please redo the patch some time?

And a more informative changelog would be appropriate, although it's
all pretty obvious.  I don't know what "pagecache iterator work" is
and I doubt if many other readers do either, so some illumination there
wouldn't hurt.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] mm: Refactor find_get_pages() & friends
  2016-03-29 21:29   ` Andrew Morton
@ 2016-04-01  2:35     ` Kent Overstreet
  -1 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01  2:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm

On Tue, Mar 29, 2016 at 02:29:11PM -0700, Andrew Morton wrote:
> On Tue, 8 Mar 2016 16:16:43 -0900 Kent Overstreet <kent.overstreet@gmail.com> wrote:
> 
> > Collapse redundant implementations of various gang pagecache lookup - this is
> > also prep work for pagecache iterator work
> 
> Patch looks nice.  Unfortunately filemap.c has changed rather a lot
> since 4.5.  Can you please redo the patch some time?
> 
> And a more informative changelog would be appropriate, although it's
> all pretty obvious.  I don't know what "pagecache iterator work" is
> and I doubt if many other readers do either, so some illumination there
> wouldn't hurt.

Can do - sending you the pagecache iterator patch too:

Kent Overstreet (2):
  mm: Refactor find_get_pages() & friends
  mm: Real pagecache iterators

 fs/ext4/inode.c            | 261 +++++++++++++------------------
 include/linux/pagemap.h    | 142 +++++++++++++++--
 include/linux/pagevec.h    |  67 +++++++-
 include/linux/radix-tree.h |  49 ++----
 mm/filemap.c               | 376 ++++++++++-----------------------------------
 mm/page-writeback.c        | 148 ++++++++----------
 mm/swap.c                  |  33 +---
 mm/truncate.c              | 259 ++++++++++++-------------------
 8 files changed, 546 insertions(+), 789 deletions(-)

-- 
2.8.0.rc3

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] mm: Refactor find_get_pages() & friends
@ 2016-04-01  2:35     ` Kent Overstreet
  0 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01  2:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm

On Tue, Mar 29, 2016 at 02:29:11PM -0700, Andrew Morton wrote:
> On Tue, 8 Mar 2016 16:16:43 -0900 Kent Overstreet <kent.overstreet@gmail.com> wrote:
> 
> > Collapse redundant implementations of various gang pagecache lookup - this is
> > also prep work for pagecache iterator work
> 
> Patch looks nice.  Unfortunately filemap.c has changed rather a lot
> since 4.5.  Can you please redo the patch some time?
> 
> And a more informative changelog would be appropriate, although it's
> all pretty obvious.  I don't know what "pagecache iterator work" is
> and I doubt if many other readers do either, so some illumination there
> wouldn't hurt.

Can do - sending you the pagecache iterator patch too:

Kent Overstreet (2):
  mm: Refactor find_get_pages() & friends
  mm: Real pagecache iterators

 fs/ext4/inode.c            | 261 +++++++++++++------------------
 include/linux/pagemap.h    | 142 +++++++++++++++--
 include/linux/pagevec.h    |  67 +++++++-
 include/linux/radix-tree.h |  49 ++----
 mm/filemap.c               | 376 ++++++++++-----------------------------------
 mm/page-writeback.c        | 148 ++++++++----------
 mm/swap.c                  |  33 +---
 mm/truncate.c              | 259 ++++++++++++-------------------
 8 files changed, 546 insertions(+), 789 deletions(-)

-- 
2.8.0.rc3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] mm: Refactor find_get_pages() & friends
  2016-04-01  2:35     ` Kent Overstreet
@ 2016-04-01  2:38       ` Kent Overstreet
  -1 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01  2:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm, Kent Overstreet

Collapse redundant implementations of various gang pagecache lookup - this is
also prep work for pagecache iterators, in the next patch. This gives us a
single common interface (__find_get_pages()) that the pagecache iterator will
make use of.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/pagemap.h    | 142 ++++++++++++++++++--
 include/linux/radix-tree.h |  49 ++-----
 mm/filemap.c               | 316 +++++----------------------------------------
 3 files changed, 174 insertions(+), 333 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1ebd65c914..e8ebf77407 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -339,18 +339,136 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-			  unsigned int nr_entries, struct page **entries,
-			  pgoff_t *indices);
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
-			       unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages);
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices);
+
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags);
+
+/**
+ * find_get_entries - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page cache index
+ * @nr_entries:	The maximum number of entries
+ * @entries:	Where the resulting entries are placed
+ * @indices:	The cache indices corresponding to the entries in @entries
+ *
+ * find_get_entries() will search for and return a group of up to
+ * @nr_entries entries in the mapping.  The entries are placed at
+ * @entries.  find_get_entries() takes a reference against any actual
+ * pages it returns.
+ *
+ * The search returns a group of mapping-contiguous page cache entries
+ * with ascending indexes.  There may be holes in the indices due to
+ * not-present pages.
+ *
+ * Any shadow entries of evicted pages, or swap entries from
+ * shmem/tmpfs, are included in the returned array.
+ *
+ * find_get_entries() returns the number of pages and shadow entries
+ * which were found.
+ */
+static inline unsigned find_get_entries(struct address_space *mapping,
+			pgoff_t start, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL);
+}
+
+/**
+ * find_get_pages - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages() will search for and return a group of up to
+ * @nr_pages pages in the mapping.  The pages are placed at @pages.
+ * find_get_pages() takes a reference against the returned pages.
+ *
+ * The search returns a group of mapping-contiguous pages with ascending
+ * indexes.  There may be holes in the indices due to not-present pages.
+ *
+ * find_get_pages() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL, 0);
+}
+
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages_contig(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL,
+				RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * find_get_pages_tag - find and return pages that match @tag
+ * @mapping:	the address_space to search
+ * @index:	the starting page index
+ * @tag:	the tag index
+ * @nr_pages:	the maximum number of pages
+ * @pages:	where the resulting pages are placed
+ *
+ * Like find_get_pages, except we only return pages which are tagged with
+ * @tag.   We update @index to index the next page for the traversal.
+ */
+static inline unsigned find_get_pages_tag(struct address_space *mapping,
+			pgoff_t *index, int tag,
+			unsigned nr_pages, struct page **pages)
+{
+	unsigned ret;
+
+	ret = __find_get_pages(mapping, *index, ULONG_MAX,
+			       nr_pages, pages, NULL,
+			       RADIX_TREE_ITER_TAGGED|tag);
+	if (ret)
+		*index = pages[ret - 1]->index + 1;
+
+	return ret;
+}
+
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:	the address_space to search
+ * @start:	the starting page cache index
+ * @tag:	the tag index
+ * @nr_entries:	the maximum number of entries
+ * @entries:	where the resulting entries are placed
+ * @indices:	the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+static inline unsigned find_get_entries_tag(struct address_space *mapping,
+			pgoff_t start, int tag, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL|
+				RADIX_TREE_ITER_TAGGED|tag);
+}
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 			pgoff_t index, unsigned flags);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 51a97ac8bf..b7539c94bc 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -346,6 +346,8 @@ struct radix_tree_iter {
 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
 #define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+#define RADIX_TREE_ITER_EXCEPTIONAL	0x0400	/* include exceptional entries */
+						/* used by __find_get_pages() */
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -475,33 +477,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	return NULL;
 }
 
-/**
- * radix_tree_for_each_chunk - iterate over chunks
- *
- * @slot:	the void** variable for pointer to chunk first slot
- * @root:	the struct radix_tree_root pointer
- * @iter:	the struct radix_tree_iter pointer
- * @start:	iteration starting index
- * @flags:	RADIX_TREE_ITER_* and tag index
- *
- * Locks can be released and reacquired between iterations.
- */
-#define radix_tree_for_each_chunk(slot, root, iter, start, flags)	\
+#define __radix_tree_for_each_slot(slot, root, iter, start, flags)	\
 	for (slot = radix_tree_iter_init(iter, start) ;			\
-	      (slot = radix_tree_next_chunk(root, iter, flags)) ;)
-
-/**
- * radix_tree_for_each_chunk_slot - iterate over slots in one chunk
- *
- * @slot:	the void** variable, at the beginning points to chunk first slot
- * @iter:	the struct radix_tree_iter pointer
- * @flags:	RADIX_TREE_ITER_*, should be constant
- *
- * This macro is designed to be nested inside radix_tree_for_each_chunk().
- * @slot points to the radix tree slot, @iter->index contains its index.
- */
-#define radix_tree_for_each_chunk_slot(slot, iter, flags)		\
-	for (; slot ; slot = radix_tree_next_slot(slot, iter, flags))
+	     slot || (slot = radix_tree_next_chunk(root, iter, flags));	\
+	     slot = radix_tree_next_slot(slot, iter, flags))
 
 /**
  * radix_tree_for_each_slot - iterate over non-empty slots
@@ -514,9 +493,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_slot(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;	\
-	     slot = radix_tree_next_slot(slot, iter, 0))
+	__radix_tree_for_each_slot(slot, root, iter, start, 0)
 
 /**
  * radix_tree_for_each_contig - iterate over contiguous slots
@@ -529,11 +506,8 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_contig(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-				RADIX_TREE_ITER_CONTIG)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_CONTIG))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_CONTIG)
 
 /**
  * radix_tree_for_each_tagged - iterate over tagged slots
@@ -547,10 +521,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_tagged(slot, root, iter, start, tag)	\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-			      RADIX_TREE_ITER_TAGGED | tag)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_TAGGED))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_TAGGED|tag)
 
 #endif /* _LINUX_RADIX_TREE_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index a8c69c8c0a..81ce03fbc1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1221,183 +1221,63 @@ no_page:
 EXPORT_SYMBOL(pagecache_get_page);
 
 /**
- * find_get_entries - gang pagecache lookup
+ * __find_get_pages - gang pagecache lookup, internal mechanism
  * @mapping:	The address_space to search
  * @start:	The starting page cache index
+ * @end:	Page cache index to stop at (inclusive)
  * @nr_entries:	The maximum number of entries
  * @entries:	Where the resulting entries are placed
- * @indices:	The cache indices corresponding to the entries in @entries
+ * @indices:	If non NULL, indices of corresponding entries placed here
+ * @flags:	radix tree iter flags and tag (if supplied)
  *
- * find_get_entries() will search for and return a group of up to
- * @nr_entries entries in the mapping.  The entries are placed at
- * @entries.  find_get_entries() takes a reference against any actual
- * pages it returns.
+ * Don't use directly - see wrappers in pagemap.h
  *
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes.  There may be holes in the indices due to
- * not-present pages.
+ * Possible values for flags (may be used in combination):
  *
- * Any shadow entries of evicted pages, or swap entries from
- * shmem/tmpfs, are included in the returned array.
- *
- * find_get_entries() returns the number of pages and shadow entries
- * which were found.
+ * 0:				find_get_pages()
+ * RADIX_TREE_ITER_TAGGED|tag:	find_get_pages_tag()
+ * RADIX_TREE_ITER_CONTIG:	find_get_pages_contig()
+ * RADIX_TREE_ITER_EXCEPTIONAL:	find_get_entries()
  */
-unsigned find_get_entries(struct address_space *mapping,
-			  pgoff_t start, unsigned int nr_entries,
-			  struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-export:
-		indices[ret] = iter.index;
-		entries[ret] = page;
-		if (++ret == nr_entries)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages - gang pagecache lookup
- * @mapping:	The address_space to search
- * @start:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages() will search for and return a group of up to
- * @nr_pages pages in the mapping.  The pages are placed at @pages.
- * find_get_pages() takes a reference against the returned pages.
- *
- * The search returns a group of mapping-contiguous pages with ascending
- * indexes.  There may be holes in the indices due to not-present pages.
- *
- * find_get_pages() returns the number of pages which were found.
- */
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			    unsigned int nr_pages, struct page **pages)
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags)
 {
 	struct radix_tree_iter iter;
 	void **slot;
 	unsigned ret = 0;
 
-	if (unlikely(!nr_pages))
+	if (unlikely(!nr_entries || start > end))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	__radix_tree_for_each_slot(slot, &mapping->page_tree,
+				   &iter, start, flags) {
 		struct page *page;
+
+		if (iter.index > end)
+			break;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
-			continue;
+			goto no_entry;
 
 		if (radix_tree_exception(page)) {
 			if (radix_tree_deref_retry(page)) {
 				slot = radix_tree_iter_retry(&iter);
 				continue;
 			}
+
 			/*
 			 * A shadow entry of a recently evicted page,
 			 * or a swap entry from shmem/tmpfs.  Skip
 			 * over it.
 			 */
-			continue;
-		}
-
-		if (!page_cache_get_speculative(page))
-			goto repeat;
+			if (flags & RADIX_TREE_ITER_EXCEPTIONAL)
+				goto export;
 
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages_contig - gang contiguous pagecache lookup
- * @mapping:	The address_space to search
- * @index:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
- *
- * find_get_pages_contig() returns the number of pages which were found.
- */
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
-			       unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned int ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		/* The hole, there no reason to continue */
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page,
-			 * or a swap entry from shmem/tmpfs.  Stop
-			 * looking for contiguous pages.
-			 */
-			break;
+			goto no_entry;
 		}
 
 		if (!page_cache_get_speculative(page))
@@ -1414,154 +1294,26 @@ repeat:
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page->index != iter.index) {
+		if ((flags & RADIX_TREE_ITER_CONTIG) &&
+		    (page->mapping == NULL || page->index != iter.index)) {
 			page_cache_release(page);
 			break;
 		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_contig);
-
-/**
- * find_get_pages_tag - find and return pages that match @tag
- * @mapping:	the address_space to search
- * @index:	the starting page index
- * @tag:	the tag index
- * @nr_pages:	the maximum number of pages
- * @pages:	where the resulting pages are placed
- *
- * Like find_get_pages, except we only return pages which are tagged with
- * @tag.   We update @index to index the next page for the traversal.
- */
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page.
-			 *
-			 * Those entries should never be tagged, but
-			 * this tree walk is lockless and the tags are
-			 * looked up in bulk, one radix tree node at a
-			 * time, so there is a sizable window for page
-			 * reclaim to evict a page we saw tagged.
-			 *
-			 * Skip over it.
-			 */
-			continue;
-		}
-
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-
-	if (ret)
-		*index = pages[ret - 1]->index + 1;
-
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_tag);
-
-/**
- * find_get_entries_tag - find and return entries that match @tag
- * @mapping:	the address_space to search
- * @start:	the starting page cache index
- * @tag:	the tag index
- * @nr_entries:	the maximum number of entries
- * @entries:	where the resulting entries are placed
- * @indices:	the cache indices corresponding to the entries in @entries
- *
- * Like find_get_entries, except we only return entries which are tagged with
- * @tag.
- */
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, start, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
 export:
-		indices[ret] = iter.index;
+		if (indices)
+			indices[ret] = iter.index;
 		entries[ret] = page;
 		if (++ret == nr_entries)
 			break;
+		continue;
+no_entry:
+		if (flags & RADIX_TREE_ITER_CONTIG)
+			break;
 	}
 	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(find_get_entries_tag);
+EXPORT_SYMBOL(__find_get_pages);
 
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 1/2] mm: Refactor find_get_pages() & friends
@ 2016-04-01  2:38       ` Kent Overstreet
  0 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01  2:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm, Kent Overstreet

Collapse redundant implementations of various gang pagecache lookup - this is
also prep work for pagecache iterators, in the next patch. This gives us a
single common interface (__find_get_pages()) that the pagecache iterator will
make use of.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/pagemap.h    | 142 ++++++++++++++++++--
 include/linux/radix-tree.h |  49 ++-----
 mm/filemap.c               | 316 +++++----------------------------------------
 3 files changed, 174 insertions(+), 333 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1ebd65c914..e8ebf77407 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -339,18 +339,136 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-			  unsigned int nr_entries, struct page **entries,
-			  pgoff_t *indices);
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
-			       unsigned int nr_pages, struct page **pages);
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages);
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices);
+
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags);
+
+/**
+ * find_get_entries - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page cache index
+ * @nr_entries:	The maximum number of entries
+ * @entries:	Where the resulting entries are placed
+ * @indices:	The cache indices corresponding to the entries in @entries
+ *
+ * find_get_entries() will search for and return a group of up to
+ * @nr_entries entries in the mapping.  The entries are placed at
+ * @entries.  find_get_entries() takes a reference against any actual
+ * pages it returns.
+ *
+ * The search returns a group of mapping-contiguous page cache entries
+ * with ascending indexes.  There may be holes in the indices due to
+ * not-present pages.
+ *
+ * Any shadow entries of evicted pages, or swap entries from
+ * shmem/tmpfs, are included in the returned array.
+ *
+ * find_get_entries() returns the number of pages and shadow entries
+ * which were found.
+ */
+static inline unsigned find_get_entries(struct address_space *mapping,
+			pgoff_t start, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL);
+}
+
+/**
+ * find_get_pages - gang pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages() will search for and return a group of up to
+ * @nr_pages pages in the mapping.  The pages are placed at @pages.
+ * find_get_pages() takes a reference against the returned pages.
+ *
+ * The search returns a group of mapping-contiguous pages with ascending
+ * indexes.  There may be holes in the indices due to not-present pages.
+ *
+ * find_get_pages() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL, 0);
+}
+
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+static inline unsigned find_get_pages_contig(struct address_space *mapping,
+			pgoff_t start, unsigned nr_pages,
+			struct page **pages)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_pages, pages, NULL,
+				RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * find_get_pages_tag - find and return pages that match @tag
+ * @mapping:	the address_space to search
+ * @index:	the starting page index
+ * @tag:	the tag index
+ * @nr_pages:	the maximum number of pages
+ * @pages:	where the resulting pages are placed
+ *
+ * Like find_get_pages, except we only return pages which are tagged with
+ * @tag.   We update @index to index the next page for the traversal.
+ */
+static inline unsigned find_get_pages_tag(struct address_space *mapping,
+			pgoff_t *index, int tag,
+			unsigned nr_pages, struct page **pages)
+{
+	unsigned ret;
+
+	ret = __find_get_pages(mapping, *index, ULONG_MAX,
+			       nr_pages, pages, NULL,
+			       RADIX_TREE_ITER_TAGGED|tag);
+	if (ret)
+		*index = pages[ret - 1]->index + 1;
+
+	return ret;
+}
+
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:	the address_space to search
+ * @start:	the starting page cache index
+ * @tag:	the tag index
+ * @nr_entries:	the maximum number of entries
+ * @entries:	where the resulting entries are placed
+ * @indices:	the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+static inline unsigned find_get_entries_tag(struct address_space *mapping,
+			pgoff_t start, int tag, unsigned nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	return __find_get_pages(mapping, start, ULONG_MAX,
+				nr_entries, entries, indices,
+				RADIX_TREE_ITER_EXCEPTIONAL|
+				RADIX_TREE_ITER_TAGGED|tag);
+}
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 			pgoff_t index, unsigned flags);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 51a97ac8bf..b7539c94bc 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -346,6 +346,8 @@ struct radix_tree_iter {
 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
 #define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+#define RADIX_TREE_ITER_EXCEPTIONAL	0x0400	/* include exceptional entries */
+						/* used by __find_get_pages() */
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -475,33 +477,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	return NULL;
 }
 
-/**
- * radix_tree_for_each_chunk - iterate over chunks
- *
- * @slot:	the void** variable for pointer to chunk first slot
- * @root:	the struct radix_tree_root pointer
- * @iter:	the struct radix_tree_iter pointer
- * @start:	iteration starting index
- * @flags:	RADIX_TREE_ITER_* and tag index
- *
- * Locks can be released and reacquired between iterations.
- */
-#define radix_tree_for_each_chunk(slot, root, iter, start, flags)	\
+#define __radix_tree_for_each_slot(slot, root, iter, start, flags)	\
 	for (slot = radix_tree_iter_init(iter, start) ;			\
-	      (slot = radix_tree_next_chunk(root, iter, flags)) ;)
-
-/**
- * radix_tree_for_each_chunk_slot - iterate over slots in one chunk
- *
- * @slot:	the void** variable, at the beginning points to chunk first slot
- * @iter:	the struct radix_tree_iter pointer
- * @flags:	RADIX_TREE_ITER_*, should be constant
- *
- * This macro is designed to be nested inside radix_tree_for_each_chunk().
- * @slot points to the radix tree slot, @iter->index contains its index.
- */
-#define radix_tree_for_each_chunk_slot(slot, iter, flags)		\
-	for (; slot ; slot = radix_tree_next_slot(slot, iter, flags))
+	     slot || (slot = radix_tree_next_chunk(root, iter, flags));	\
+	     slot = radix_tree_next_slot(slot, iter, flags))
 
 /**
  * radix_tree_for_each_slot - iterate over non-empty slots
@@ -514,9 +493,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_slot(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;	\
-	     slot = radix_tree_next_slot(slot, iter, 0))
+	__radix_tree_for_each_slot(slot, root, iter, start, 0)
 
 /**
  * radix_tree_for_each_contig - iterate over contiguous slots
@@ -529,11 +506,8 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_contig(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-				RADIX_TREE_ITER_CONTIG)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_CONTIG))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_CONTIG)
 
 /**
  * radix_tree_for_each_tagged - iterate over tagged slots
@@ -547,10 +521,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
  * @slot points to radix tree slot, @iter->index contains its index.
  */
 #define radix_tree_for_each_tagged(slot, root, iter, start, tag)	\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-			      RADIX_TREE_ITER_TAGGED | tag)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_TAGGED))
+	__radix_tree_for_each_slot(slot, root, iter, start,		\
+			      RADIX_TREE_ITER_TAGGED|tag)
 
 #endif /* _LINUX_RADIX_TREE_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index a8c69c8c0a..81ce03fbc1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1221,183 +1221,63 @@ no_page:
 EXPORT_SYMBOL(pagecache_get_page);
 
 /**
- * find_get_entries - gang pagecache lookup
+ * __find_get_pages - gang pagecache lookup, internal mechanism
  * @mapping:	The address_space to search
  * @start:	The starting page cache index
+ * @end:	Page cache index to stop at (inclusive)
  * @nr_entries:	The maximum number of entries
  * @entries:	Where the resulting entries are placed
- * @indices:	The cache indices corresponding to the entries in @entries
+ * @indices:	If non NULL, indices of corresponding entries placed here
+ * @flags:	radix tree iter flags and tag (if supplied)
  *
- * find_get_entries() will search for and return a group of up to
- * @nr_entries entries in the mapping.  The entries are placed at
- * @entries.  find_get_entries() takes a reference against any actual
- * pages it returns.
+ * Don't use directly - see wrappers in pagemap.h
  *
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes.  There may be holes in the indices due to
- * not-present pages.
+ * Possible values for flags (may be used in combination):
  *
- * Any shadow entries of evicted pages, or swap entries from
- * shmem/tmpfs, are included in the returned array.
- *
- * find_get_entries() returns the number of pages and shadow entries
- * which were found.
+ * 0:				find_get_pages()
+ * RADIX_TREE_ITER_TAGGED|tag:	find_get_pages_tag()
+ * RADIX_TREE_ITER_CONTIG:	find_get_pages_contig()
+ * RADIX_TREE_ITER_EXCEPTIONAL:	find_get_entries()
  */
-unsigned find_get_entries(struct address_space *mapping,
-			  pgoff_t start, unsigned int nr_entries,
-			  struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-export:
-		indices[ret] = iter.index;
-		entries[ret] = page;
-		if (++ret == nr_entries)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages - gang pagecache lookup
- * @mapping:	The address_space to search
- * @start:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages() will search for and return a group of up to
- * @nr_pages pages in the mapping.  The pages are placed at @pages.
- * find_get_pages() takes a reference against the returned pages.
- *
- * The search returns a group of mapping-contiguous pages with ascending
- * indexes.  There may be holes in the indices due to not-present pages.
- *
- * find_get_pages() returns the number of pages which were found.
- */
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
-			    unsigned int nr_pages, struct page **pages)
+unsigned __find_get_pages(struct address_space *mapping,
+			  pgoff_t start, pgoff_t end,
+			  unsigned nr_entries, struct page **entries,
+			  pgoff_t *indices, unsigned flags)
 {
 	struct radix_tree_iter iter;
 	void **slot;
 	unsigned ret = 0;
 
-	if (unlikely(!nr_pages))
+	if (unlikely(!nr_entries || start > end))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	__radix_tree_for_each_slot(slot, &mapping->page_tree,
+				   &iter, start, flags) {
 		struct page *page;
+
+		if (iter.index > end)
+			break;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
-			continue;
+			goto no_entry;
 
 		if (radix_tree_exception(page)) {
 			if (radix_tree_deref_retry(page)) {
 				slot = radix_tree_iter_retry(&iter);
 				continue;
 			}
+
 			/*
 			 * A shadow entry of a recently evicted page,
 			 * or a swap entry from shmem/tmpfs.  Skip
 			 * over it.
 			 */
-			continue;
-		}
-
-		if (!page_cache_get_speculative(page))
-			goto repeat;
+			if (flags & RADIX_TREE_ITER_EXCEPTIONAL)
+				goto export;
 
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * find_get_pages_contig - gang contiguous pagecache lookup
- * @mapping:	The address_space to search
- * @index:	The starting page index
- * @nr_pages:	The maximum number of pages
- * @pages:	Where the resulting pages are placed
- *
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
- *
- * find_get_pages_contig() returns the number of pages which were found.
- */
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
-			       unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned int ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		/* The hole, there no reason to continue */
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page,
-			 * or a swap entry from shmem/tmpfs.  Stop
-			 * looking for contiguous pages.
-			 */
-			break;
+			goto no_entry;
 		}
 
 		if (!page_cache_get_speculative(page))
@@ -1414,154 +1294,26 @@ repeat:
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page->index != iter.index) {
+		if ((flags & RADIX_TREE_ITER_CONTIG) &&
+		    (page->mapping == NULL || page->index != iter.index)) {
 			page_cache_release(page);
 			break;
 		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_contig);
-
-/**
- * find_get_pages_tag - find and return pages that match @tag
- * @mapping:	the address_space to search
- * @index:	the starting page index
- * @tag:	the tag index
- * @nr_pages:	the maximum number of pages
- * @pages:	where the resulting pages are placed
- *
- * Like find_get_pages, except we only return pages which are tagged with
- * @tag.   We update @index to index the next page for the traversal.
- */
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page.
-			 *
-			 * Those entries should never be tagged, but
-			 * this tree walk is lockless and the tags are
-			 * looked up in bulk, one radix tree node at a
-			 * time, so there is a sizable window for page
-			 * reclaim to evict a page we saw tagged.
-			 *
-			 * Skip over it.
-			 */
-			continue;
-		}
-
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
-
-		pages[ret] = page;
-		if (++ret == nr_pages)
-			break;
-	}
-
-	rcu_read_unlock();
-
-	if (ret)
-		*index = pages[ret - 1]->index + 1;
-
-	return ret;
-}
-EXPORT_SYMBOL(find_get_pages_tag);
-
-/**
- * find_get_entries_tag - find and return entries that match @tag
- * @mapping:	the address_space to search
- * @start:	the starting page cache index
- * @tag:	the tag index
- * @nr_entries:	the maximum number of entries
- * @entries:	where the resulting entries are placed
- * @indices:	the cache indices corresponding to the entries in @entries
- *
- * Like find_get_entries, except we only return entries which are tagged with
- * @tag.
- */
-unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
-			struct page **entries, pgoff_t *indices)
-{
-	void **slot;
-	unsigned int ret = 0;
-	struct radix_tree_iter iter;
-
-	if (!nr_entries)
-		return 0;
-
-	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, start, tag) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
-		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
-
-		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			page_cache_release(page);
-			goto repeat;
-		}
 export:
-		indices[ret] = iter.index;
+		if (indices)
+			indices[ret] = iter.index;
 		entries[ret] = page;
 		if (++ret == nr_entries)
 			break;
+		continue;
+no_entry:
+		if (flags & RADIX_TREE_ITER_CONTIG)
+			break;
 	}
 	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(find_get_entries_tag);
+EXPORT_SYMBOL(__find_get_pages);
 
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
-- 
2.8.0.rc3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] mm: Real pagecache iterators
  2016-04-01  2:38       ` Kent Overstreet
@ 2016-04-01  2:38         ` Kent Overstreet
  -1 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01  2:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm, Kent Overstreet

Introduce for_each_pagecache_page() and related macros, with the goal of
replacing most/all uses of pagevec_lookup().

For the most part this shouldn't be a functional change. The one functional
difference with the new macros is that they now take an @end parameter, so we're
able to avoid grabbing pages in __find_get_pages() that we'll never use.

This patch only does some of the conversions, the ones I was able to easily test
myself - the conversions are mechanical but tricky enough they generally warrent
testing.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/inode.c         | 261 ++++++++++++++++++++----------------------------
 include/linux/pagevec.h |  67 ++++++++++++-
 mm/filemap.c            |  76 +++++++++-----
 mm/page-writeback.c     | 148 +++++++++++----------------
 mm/swap.c               |  33 +-----
 mm/truncate.c           | 259 +++++++++++++++++------------------------------
 6 files changed, 380 insertions(+), 464 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dab84a2530..c4d73f67b5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1605,11 +1605,10 @@ struct mpage_da_data {
 static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 				       bool invalidate)
 {
-	int nr_pages, i;
 	pgoff_t index, end;
-	struct pagevec pvec;
+	struct pagecache_iter iter;
+	struct page *page;
 	struct inode *inode = mpd->inode;
-	struct address_space *mapping = inode->i_mapping;
 
 	/* This is necessary when next_page == 0. */
 	if (mpd->first_page >= mpd->next_page)
@@ -1624,25 +1623,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 		ext4_es_remove_extent(inode, start, last - start + 1);
 	}
 
-	pagevec_init(&pvec, 0);
-	while (index <= end) {
-		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
-		if (nr_pages == 0)
-			break;
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-			if (page->index > end)
-				break;
-			BUG_ON(!PageLocked(page));
-			BUG_ON(PageWriteback(page));
-			if (invalidate) {
-				block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
-				ClearPageUptodate(page);
-			}
-			unlock_page(page);
+	for_each_pagecache_page(&iter, inode->i_mapping, index, end, page) {
+		BUG_ON(!PageLocked(page));
+		BUG_ON(PageWriteback(page));
+		if (invalidate) {
+			block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+			ClearPageUptodate(page);
 		}
-		index = pvec.pages[nr_pages - 1]->index + 1;
-		pagevec_release(&pvec);
+		unlock_page(page);
 	}
 }
 
@@ -2209,8 +2197,8 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
  */
 static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 {
-	struct pagevec pvec;
-	int nr_pages, i;
+	struct pagecache_iter iter;
+	struct page *page;
 	struct inode *inode = mpd->inode;
 	struct buffer_head *head, *bh;
 	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
@@ -2224,67 +2212,55 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 	lblk = start << bpp_bits;
 	pblock = mpd->map.m_pblk;
 
-	pagevec_init(&pvec, 0);
-	while (start <= end) {
-		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
-					  PAGEVEC_SIZE);
-		if (nr_pages == 0)
-			break;
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			if (page->index > end)
-				break;
-			/* Up to 'end' pages must be contiguous */
-			BUG_ON(page->index != start);
-			bh = head = page_buffers(page);
-			do {
-				if (lblk < mpd->map.m_lblk)
-					continue;
-				if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
-					/*
-					 * Buffer after end of mapped extent.
-					 * Find next buffer in the page to map.
-					 */
-					mpd->map.m_len = 0;
-					mpd->map.m_flags = 0;
-					/*
-					 * FIXME: If dioread_nolock supports
-					 * blocksize < pagesize, we need to make
-					 * sure we add size mapped so far to
-					 * io_end->size as the following call
-					 * can submit the page for IO.
-					 */
-					err = mpage_process_page_bufs(mpd, head,
-								      bh, lblk);
-					pagevec_release(&pvec);
-					if (err > 0)
-						err = 0;
-					return err;
-				}
-				if (buffer_delay(bh)) {
-					clear_buffer_delay(bh);
-					bh->b_blocknr = pblock++;
-				}
-				clear_buffer_unwritten(bh);
-			} while (lblk++, (bh = bh->b_this_page) != head);
-
-			/*
-			 * FIXME: This is going to break if dioread_nolock
-			 * supports blocksize < pagesize as we will try to
-			 * convert potentially unmapped parts of inode.
-			 */
-			mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
-			/* Page fully mapped - let IO run! */
-			err = mpage_submit_page(mpd, page);
-			if (err < 0) {
-				pagevec_release(&pvec);
+	for_each_pagecache_page(&iter, inode->i_mapping, start, end, page) {
+		/* Up to 'end' pages must be contiguous */
+		BUG_ON(page->index != start);
+		bh = head = page_buffers(page);
+		do {
+			if (lblk < mpd->map.m_lblk)
+				continue;
+			if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
+				/*
+				 * Buffer after end of mapped extent. Find next
+				 * buffer in the page to map.
+				 */
+				mpd->map.m_len = 0;
+				mpd->map.m_flags = 0;
+				/*
+				 * FIXME: If dioread_nolock supports blocksize <
+				 * pagesize, we need to make sure we add size
+				 * mapped so far to io_end->size as the
+				 * following call can submit the page for IO.
+				 */
+				err = mpage_process_page_bufs(mpd, head,
+							      bh, lblk);
+				pagecache_iter_release(&iter);
+				if (err > 0)
+					err = 0;
 				return err;
 			}
-			start++;
+			if (buffer_delay(bh)) {
+				clear_buffer_delay(bh);
+				bh->b_blocknr = pblock++;
+			}
+			clear_buffer_unwritten(bh);
+		} while (lblk++, (bh = bh->b_this_page) != head);
+
+		/*
+		 * FIXME: This is going to break if dioread_nolock supports
+		 * blocksize < pagesize as we will try to convert potentially
+		 * unmapped parts of inode.
+		 */
+		mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
+		/* Page fully mapped - let IO run! */
+		err = mpage_submit_page(mpd, page);
+		if (err < 0) {
+			pagecache_iter_release(&iter);
+			return err;
 		}
-		pagevec_release(&pvec);
+		start++;
 	}
+
 	/* Extent fully mapped and matches with page boundary. We are done. */
 	mpd->map.m_len = 0;
 	mpd->map.m_flags = 0;
@@ -2485,13 +2461,10 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 {
 	struct address_space *mapping = mpd->inode->i_mapping;
-	struct pagevec pvec;
-	unsigned int nr_pages;
+	struct pagecache_iter iter;
+	struct page *page;
 	long left = mpd->wbc->nr_to_write;
-	pgoff_t index = mpd->first_page;
-	pgoff_t end = mpd->last_page;
-	int tag;
-	int i, err = 0;
+	int tag, err = 0;
 	int blkbits = mpd->inode->i_blkbits;
 	ext4_lblk_t lblk;
 	struct buffer_head *head;
@@ -2501,81 +2474,59 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 	else
 		tag = PAGECACHE_TAG_DIRTY;
 
-	pagevec_init(&pvec, 0);
 	mpd->map.m_len = 0;
-	mpd->next_page = index;
-	while (index <= end) {
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
-			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-		if (nr_pages == 0)
-			goto out;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
+	mpd->next_page = mpd->first_page;
 
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or
-			 * even swizzled back from swapper_space to tmpfs file
-			 * mapping. However, page->index will not change
-			 * because we have a reference on the page.
-			 */
-			if (page->index > end)
-				goto out;
-
-			/*
-			 * Accumulated enough dirty pages? This doesn't apply
-			 * to WB_SYNC_ALL mode. For integrity sync we have to
-			 * keep going because someone may be concurrently
-			 * dirtying pages, and we might have synced a lot of
-			 * newly appeared dirty pages, but have not synced all
-			 * of the old dirty pages.
-			 */
-			if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
-				goto out;
-
-			/* If we can't merge this page, we are done. */
-			if (mpd->map.m_len > 0 && mpd->next_page != page->index)
-				goto out;
+	for_each_pagecache_tag(&iter, mapping, tag, mpd->first_page,
+			       mpd->last_page, page) {
+		/*
+		 * Accumulated enough dirty pages? This doesn't apply to
+		 * WB_SYNC_ALL mode. For integrity sync we have to keep going
+		 * because someone may be concurrently dirtying pages, and we
+		 * might have synced a lot of newly appeared dirty pages, but
+		 * have not synced all of the old dirty pages.
+		 */
+		if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
+			break;
 
-			lock_page(page);
-			/*
-			 * If the page is no longer dirty, or its mapping no
-			 * longer corresponds to inode we are writing (which
-			 * means it has been truncated or invalidated), or the
-			 * page is already under writeback and we are not doing
-			 * a data integrity writeback, skip the page
-			 */
-			if (!PageDirty(page) ||
-			    (PageWriteback(page) &&
-			     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
-			    unlikely(page->mapping != mapping)) {
-				unlock_page(page);
-				continue;
-			}
+		/* If we can't merge this page, we are done. */
+		if (mpd->map.m_len > 0 && mpd->next_page != page->index)
+			break;
 
-			wait_on_page_writeback(page);
-			BUG_ON(PageWriteback(page));
-
-			if (mpd->map.m_len == 0)
-				mpd->first_page = page->index;
-			mpd->next_page = page->index + 1;
-			/* Add all dirty buffers to mpd */
-			lblk = ((ext4_lblk_t)page->index) <<
-				(PAGE_CACHE_SHIFT - blkbits);
-			head = page_buffers(page);
-			err = mpage_process_page_bufs(mpd, head, head, lblk);
-			if (err <= 0)
-				goto out;
-			err = 0;
-			left--;
+		lock_page(page);
+		/*
+		 * If the page is no longer dirty, or its mapping no longer
+		 * corresponds to inode we are writing (which means it has been
+		 * truncated or invalidated), or the page is already under
+		 * writeback and we are not doing a data integrity writeback,
+		 * skip the page
+		 */
+		if (!PageDirty(page) ||
+		    (PageWriteback(page) &&
+		     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
+		    unlikely(page->mapping != mapping)) {
+			unlock_page(page);
+			continue;
 		}
-		pagevec_release(&pvec);
-		cond_resched();
+
+		wait_on_page_writeback(page);
+		BUG_ON(PageWriteback(page));
+
+		if (mpd->map.m_len == 0)
+			mpd->first_page = page->index;
+		mpd->next_page = page->index + 1;
+		/* Add all dirty buffers to mpd */
+		lblk = ((ext4_lblk_t)page->index) <<
+			(PAGE_CACHE_SHIFT - blkbits);
+		head = page_buffers(page);
+		err = mpage_process_page_bufs(mpd, head, head, lblk);
+		if (err <= 0)
+			break;
+		err = 0;
+		left--;
 	}
-	return 0;
-out:
-	pagevec_release(&pvec);
+	pagecache_iter_release(&iter);
+
 	return err;
 }
 
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index b45d391b45..e60d74148d 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -22,10 +22,6 @@ struct pagevec {
 
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_lru_add(struct pagevec *pvec);
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
-				struct address_space *mapping,
-				pgoff_t start, unsigned nr_entries,
-				pgoff_t *indices);
 void pagevec_remove_exceptionals(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
@@ -69,4 +65,67 @@ static inline void pagevec_release(struct pagevec *pvec)
 		__pagevec_release(pvec);
 }
 
+struct pagecache_iter {
+	unsigned	nr;
+	unsigned	idx;
+	pgoff_t		index;
+	struct page	*pages[PAGEVEC_SIZE];
+	pgoff_t		indices[PAGEVEC_SIZE];
+};
+
+static inline void pagecache_iter_init(struct pagecache_iter *iter,
+				       pgoff_t start)
+{
+	iter->nr	= 0;
+	iter->idx	= 0;
+	iter->index	= start;
+}
+
+void __pagecache_iter_release(struct pagecache_iter *iter);
+
+/**
+ * pagecache_iter_release - release cached pages from pagacache_iter
+ *
+ * Must be called if breaking out of for_each_pagecache_page() etc. early - not
+ * needed if pagecache_iter_next() returned NULL and loop terminated normally
+ */
+static inline void pagecache_iter_release(struct pagecache_iter *iter)
+{
+	if (iter->nr)
+		__pagecache_iter_release(iter);
+}
+
+struct page *pagecache_iter_next(struct pagecache_iter *iter,
+				 struct address_space *mapping,
+				 pgoff_t end, pgoff_t *index,
+				 unsigned flags);
+
+#define __pagecache_iter_for_each(_iter, _mapping, _start, _end,	\
+				  _page, _index, _flags)		\
+	for (pagecache_iter_init((_iter), (_start));			\
+	     ((_page) = pagecache_iter_next((_iter), (_mapping),	\
+			(_end), (_index), (_flags)));)
+
+#define for_each_pagecache_page(_iter, _mapping, _start, _end, _page)	\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), NULL, 0)
+
+#define for_each_pagecache_page_contig(_iter, _mapping, _start, _end, _page)\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), NULL, RADIX_TREE_ITER_CONTIG)
+
+#define for_each_pagecache_tag(_iter, _mapping, _tag, _start, _end, _page)\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), NULL, RADIX_TREE_ITER_TAGGED|(_tag))
+
+#define for_each_pagecache_entry(_iter, _mapping, _start, _end, _page, _index)\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), &(_index), RADIX_TREE_ITER_EXCEPTIONAL)
+
+#define for_each_pagecache_entry_tag(_iter, _mapping, _tag,		\
+				     _start, _end, _page, _index)	\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), &(_index), RADIX_TREE_ITER_EXCEPTIONAL|\
+			RADIX_TREE_ITER_TAGGED|(_tag))
+
 #endif /* _LINUX_PAGEVEC_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 81ce03fbc1..11fbc97f8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -352,35 +352,20 @@ EXPORT_SYMBOL(filemap_flush);
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
-	pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
+	pgoff_t start = start_byte >> PAGE_CACHE_SHIFT;
 	pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
-	struct pagevec pvec;
-	int nr_pages;
+	struct pagecache_iter iter;
+	struct page *page;
 	int ret = 0;
 
 	if (end_byte < start_byte)
 		goto out;
 
-	pagevec_init(&pvec, 0);
-	while ((index <= end) &&
-			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			PAGECACHE_TAG_WRITEBACK,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
-		unsigned i;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			/* until radix tree lookup accepts end_index */
-			if (page->index > end)
-				continue;
-
-			wait_on_page_writeback(page);
-			if (TestClearPageError(page))
-				ret = -EIO;
-		}
-		pagevec_release(&pvec);
-		cond_resched();
+	for_each_pagecache_tag(&iter, mapping, PAGECACHE_TAG_WRITEBACK,
+			       start, end, page) {
+		wait_on_page_writeback(page);
+		if (TestClearPageError(page))
+			ret = -EIO;
 	}
 out:
 	return ret;
@@ -1315,6 +1300,51 @@ no_entry:
 }
 EXPORT_SYMBOL(__find_get_pages);
 
+void __pagecache_iter_release(struct pagecache_iter *iter)
+{
+	lru_add_drain();
+	release_pages(iter->pages, iter->nr, 0);
+	iter->nr	= 0;
+	iter->idx	= 0;
+}
+EXPORT_SYMBOL(__pagecache_iter_release);
+
+/**
+ * pagecache_iter_next - get next page from pagecache iterator and advance
+ * iterator
+ * @iter:	The iterator to advance
+ * @mapping:	The address_space to search
+ * @end:	Page cache index to stop at (inclusive)
+ * @index:	if non NULL, index of page or entry will be returned here
+ * @flags:	radix tree iter flags and tag for __find_get_pages()
+ */
+struct page *pagecache_iter_next(struct pagecache_iter *iter,
+				 struct address_space *mapping,
+				 pgoff_t end, pgoff_t *index,
+				 unsigned flags)
+{
+	struct page *page;
+
+	if (iter->idx >= iter->nr) {
+		pagecache_iter_release(iter);
+		cond_resched();
+
+		iter->nr = __find_get_pages(mapping, iter->index, end,
+					    PAGEVEC_SIZE, iter->pages,
+					    iter->indices, flags);
+		if (!iter->nr)
+			return NULL;
+	}
+
+	iter->index	= iter->indices[iter->idx] + 1;
+	if (index)
+		*index	= iter->indices[iter->idx];
+	page		= iter->pages[iter->idx];
+	iter->idx++;
+	return page;
+}
+EXPORT_SYMBOL(pagecache_iter_next);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 11ff8f7586..2eb2e93313 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2154,10 +2154,10 @@ int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data)
 {
+	struct pagecache_iter iter;
+	struct page *page;
 	int ret = 0;
 	int done = 0;
-	struct pagevec pvec;
-	int nr_pages;
 	pgoff_t uninitialized_var(writeback_index);
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
@@ -2166,7 +2166,6 @@ int write_cache_pages(struct address_space *mapping,
 	int range_whole = 0;
 	int tag;
 
-	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
 		writeback_index = mapping->writeback_index; /* prev offset */
 		index = writeback_index;
@@ -2189,105 +2188,80 @@ int write_cache_pages(struct address_space *mapping,
 retry:
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 		tag_pages_for_writeback(mapping, index, end);
-	done_index = index;
-	while (!done && (index <= end)) {
-		int i;
-
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
-			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-		if (nr_pages == 0)
-			break;
 
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or
-			 * even swizzled back from swapper_space to tmpfs file
-			 * mapping. However, page->index will not change
-			 * because we have a reference on the page.
-			 */
-			if (page->index > end) {
-				/*
-				 * can't be range_cyclic (1st pass) because
-				 * end == -1 in that case.
-				 */
-				done = 1;
-				break;
-			}
+	done_index = index;
 
-			done_index = page->index;
+	for_each_pagecache_tag(&iter, mapping, tag, index, end, page) {
+		done_index = page->index;
 
-			lock_page(page);
+		lock_page(page);
 
-			/*
-			 * Page truncated or invalidated. We can freely skip it
-			 * then, even for data integrity operations: the page
-			 * has disappeared concurrently, so there could be no
-			 * real expectation of this data interity operation
-			 * even if there is now a new, dirty page at the same
-			 * pagecache address.
-			 */
-			if (unlikely(page->mapping != mapping)) {
+		/*
+		 * Page truncated or invalidated. We can freely skip it
+		 * then, even for data integrity operations: the page
+		 * has disappeared concurrently, so there could be no
+		 * real expectation of this data interity operation
+		 * even if there is now a new, dirty page at the same
+		 * pagecache address.
+		 */
+		if (unlikely(page->mapping != mapping)) {
 continue_unlock:
-				unlock_page(page);
-				continue;
-			}
-
-			if (!PageDirty(page)) {
-				/* someone wrote it for us */
-				goto continue_unlock;
-			}
+			unlock_page(page);
+			continue;
+		}
 
-			if (PageWriteback(page)) {
-				if (wbc->sync_mode != WB_SYNC_NONE)
-					wait_on_page_writeback(page);
-				else
-					goto continue_unlock;
-			}
+		if (!PageDirty(page)) {
+			/* someone wrote it for us */
+			goto continue_unlock;
+		}
 
-			BUG_ON(PageWriteback(page));
-			if (!clear_page_dirty_for_io(page))
+		if (PageWriteback(page)) {
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
+			else
 				goto continue_unlock;
+		}
 
-			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-			ret = (*writepage)(page, wbc, data);
-			if (unlikely(ret)) {
-				if (ret == AOP_WRITEPAGE_ACTIVATE) {
-					unlock_page(page);
-					ret = 0;
-				} else {
-					/*
-					 * done_index is set past this page,
-					 * so media errors will not choke
-					 * background writeout for the entire
-					 * file. This has consequences for
-					 * range_cyclic semantics (ie. it may
-					 * not be suitable for data integrity
-					 * writeout).
-					 */
-					done_index = page->index + 1;
-					done = 1;
-					break;
-				}
-			}
+		BUG_ON(PageWriteback(page));
+		if (!clear_page_dirty_for_io(page))
+			goto continue_unlock;
 
-			/*
-			 * We stop writing back only if we are not doing
-			 * integrity sync. In case of integrity sync we have to
-			 * keep going until we have written all the pages
-			 * we tagged for writeback prior to entering this loop.
-			 */
-			if (--wbc->nr_to_write <= 0 &&
-			    wbc->sync_mode == WB_SYNC_NONE) {
+		trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+		ret = (*writepage)(page, wbc, data);
+		if (unlikely(ret)) {
+			if (ret == AOP_WRITEPAGE_ACTIVATE) {
+				unlock_page(page);
+				ret = 0;
+			} else {
+				/*
+				 * done_index is set past this page,
+				 * so media errors will not choke
+				 * background writeout for the entire
+				 * file. This has consequences for
+				 * range_cyclic semantics (ie. it may
+				 * not be suitable for data integrity
+				 * writeout).
+				 */
+				done_index = page->index + 1;
 				done = 1;
 				break;
 			}
 		}
-		pagevec_release(&pvec);
-		cond_resched();
+
+		/*
+		 * We stop writing back only if we are not doing
+		 * integrity sync. In case of integrity sync we have to
+		 * keep going until we have written all the pages
+		 * we tagged for writeback prior to entering this loop.
+		 */
+		if (--wbc->nr_to_write <= 0 &&
+		    wbc->sync_mode == WB_SYNC_NONE) {
+			done = 1;
+			break;
+		}
 	}
+	pagecache_iter_release(&iter);
+
 	if (!cycled && !done) {
 		/*
 		 * range_cyclic:
diff --git a/mm/swap.c b/mm/swap.c
index 09fe5e9771..f48cedeb1c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -718,6 +718,9 @@ void release_pages(struct page **pages, int nr, bool cold)
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
+		if (radix_tree_exceptional_entry(page))
+			continue;
+
 		/*
 		 * Make sure the IRQ-safe lock-holding time does not get
 		 * excessive with a continuous string of pages from the
@@ -857,36 +860,6 @@ void __pagevec_lru_add(struct pagevec *pvec)
 EXPORT_SYMBOL(__pagevec_lru_add);
 
 /**
- * pagevec_lookup_entries - gang pagecache lookup
- * @pvec:	Where the resulting entries are placed
- * @mapping:	The address_space to search
- * @start:	The starting entry index
- * @nr_entries:	The maximum number of entries
- * @indices:	The cache indices corresponding to the entries in @pvec
- *
- * pagevec_lookup_entries() will search for and return a group of up
- * to @nr_entries pages and shadow entries in the mapping.  All
- * entries are placed in @pvec.  pagevec_lookup_entries() takes a
- * reference against actual pages in @pvec.
- *
- * The search returns a group of mapping-contiguous entries with
- * ascending indexes.  There may be holes in the indices due to
- * not-present entries.
- *
- * pagevec_lookup_entries() returns the number of entries which were
- * found.
- */
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
-				struct address_space *mapping,
-				pgoff_t start, unsigned nr_pages,
-				pgoff_t *indices)
-{
-	pvec->nr = find_get_entries(mapping, start, nr_pages,
-				    pvec->pages, indices);
-	return pagevec_count(pvec);
-}
-
-/**
  * pagevec_remove_exceptionals - pagevec exceptionals pruning
  * @pvec:	The pagevec to prune
  *
diff --git a/mm/truncate.c b/mm/truncate.c
index 7598b552ae..dca55e4b97 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -231,10 +231,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	pgoff_t		end;		/* exclusive */
 	unsigned int	partial_start;	/* inclusive */
 	unsigned int	partial_end;	/* exclusive */
-	struct pagevec	pvec;
-	pgoff_t		indices[PAGEVEC_SIZE];
-	pgoff_t		index;
-	int		i;
+	struct pagecache_iter iter;
+	struct page *page;
+	pgoff_t index;
+	bool found;
 
 	cleancache_invalidate_inode(mapping);
 	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
@@ -250,51 +250,36 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	 * start of the range and 'partial_end' at the end of the range.
 	 * Note that 'end' is exclusive while 'lend' is inclusive.
 	 */
-	start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	start = round_up(lstart, PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT;
 	if (lend == -1)
 		/*
-		 * lend == -1 indicates end-of-file so we have to set 'end'
-		 * to the highest possible pgoff_t and since the type is
-		 * unsigned we're using -1.
+		 * lend == -1 indicates end-of-file so we have to set 'end' to
+		 * the highest possible pgoff_t
 		 */
-		end = -1;
+		end = ULONG_MAX;
 	else
 		end = (lend + 1) >> PAGE_CACHE_SHIFT;
 
-	pagevec_init(&pvec, 0);
-	index = start;
-	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE),
-			indices)) {
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index >= end)
-				break;
+	if (start >= end)
+		goto do_partial;
 
-			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
-				continue;
-			}
+	for_each_pagecache_entry(&iter, mapping, start, end - 1, page, index) {
+		if (radix_tree_exceptional_entry(page)) {
+			clear_exceptional_entry(mapping, index, page);
+			continue;
+		}
 
-			if (!trylock_page(page))
-				continue;
-			WARN_ON(page->index != index);
-			if (PageWriteback(page)) {
-				unlock_page(page);
-				continue;
-			}
-			truncate_inode_page(mapping, page);
+		if (!trylock_page(page))
+			continue;
+		WARN_ON(page->index != index);
+		if (PageWriteback(page)) {
 			unlock_page(page);
+			continue;
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		cond_resched();
-		index++;
+		truncate_inode_page(mapping, page);
+		unlock_page(page);
 	}
-
+do_partial:
 	if (partial_start) {
 		struct page *page = find_lock_page(mapping, start - 1);
 		if (page) {
@@ -334,34 +319,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	if (start >= end)
 		return;
 
-	index = start;
-	for ( ; ; ) {
-		cond_resched();
-		if (!pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
-			/* If all gone from start onwards, we're done */
-			if (index == start)
-				break;
-			/* Otherwise restart to make sure all gone */
-			index = start;
-			continue;
-		}
-		if (index == start && indices[0] >= end) {
-			/* All gone out of hole to be punched, we're done */
-			pagevec_remove_exceptionals(&pvec);
-			pagevec_release(&pvec);
-			break;
-		}
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index >= end) {
-				/* Restart punch to make sure all gone */
-				index = start - 1;
-				break;
-			}
+	do {
+		found = false;
+
+		for_each_pagecache_entry(&iter, mapping, start,
+					 end - 1, page, index) {
+			found = true;
 
 			if (radix_tree_exceptional_entry(page)) {
 				clear_exceptional_entry(mapping, index, page);
@@ -374,10 +337,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			truncate_inode_page(mapping, page);
 			unlock_page(page);
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		index++;
-	}
+	} while (found);
+
 	cleancache_invalidate_inode(mapping);
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -463,48 +424,32 @@ EXPORT_SYMBOL(truncate_inode_pages_final);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t end)
 {
-	pgoff_t indices[PAGEVEC_SIZE];
-	struct pagevec pvec;
-	pgoff_t index = start;
+	struct pagecache_iter iter;
+	struct page *page;
+	pgoff_t index;
 	unsigned long ret;
 	unsigned long count = 0;
-	int i;
-
-	pagevec_init(&pvec, 0);
-	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-			indices)) {
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
 
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index > end)
-				break;
-
-			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
-				continue;
-			}
-
-			if (!trylock_page(page))
-				continue;
-			WARN_ON(page->index != index);
-			ret = invalidate_inode_page(page);
-			unlock_page(page);
-			/*
-			 * Invalidation is a hint that the page is no longer
-			 * of interest and try to speed up its reclaim.
-			 */
-			if (!ret)
-				deactivate_file_page(page);
-			count += ret;
+	for_each_pagecache_entry(&iter, mapping, start, end, page, index) {
+		if (radix_tree_exceptional_entry(page)) {
+			clear_exceptional_entry(mapping, index, page);
+			continue;
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		cond_resched();
-		index++;
+
+		if (!trylock_page(page))
+			continue;
+		WARN_ON(page->index != index);
+		ret = invalidate_inode_page(page);
+		unlock_page(page);
+		/*
+		 * Invalidation is a hint that the page is no longer
+		 * of interest and try to speed up its reclaim.
+		 */
+		if (!ret)
+			deactivate_file_page(page);
+		count += ret;
 	}
+
 	return count;
 }
 EXPORT_SYMBOL(invalidate_mapping_pages);
@@ -568,75 +513,59 @@ static int do_launder_page(struct address_space *mapping, struct page *page)
 int invalidate_inode_pages2_range(struct address_space *mapping,
 				  pgoff_t start, pgoff_t end)
 {
-	pgoff_t indices[PAGEVEC_SIZE];
-	struct pagevec pvec;
+	struct pagecache_iter iter;
+	struct page *page;
 	pgoff_t index;
-	int i;
 	int ret = 0;
 	int ret2 = 0;
 	int did_range_unmap = 0;
 
 	cleancache_invalidate_inode(mapping);
-	pagevec_init(&pvec, 0);
-	index = start;
-	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-			indices)) {
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index > end)
-				break;
 
-			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
-				continue;
-			}
+	for_each_pagecache_entry(&iter, mapping, start, end, page, index) {
+		if (radix_tree_exceptional_entry(page)) {
+			clear_exceptional_entry(mapping, index, page);
+			continue;
+		}
 
-			lock_page(page);
-			WARN_ON(page->index != index);
-			if (page->mapping != mapping) {
-				unlock_page(page);
-				continue;
-			}
-			wait_on_page_writeback(page);
-			if (page_mapped(page)) {
-				if (!did_range_unmap) {
-					/*
-					 * Zap the rest of the file in one hit.
-					 */
-					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
-					   (loff_t)(1 + end - index)
-							 << PAGE_CACHE_SHIFT,
-					    0);
-					did_range_unmap = 1;
-				} else {
-					/*
-					 * Just zap this page
-					 */
-					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
-					   PAGE_CACHE_SIZE, 0);
-				}
-			}
-			BUG_ON(page_mapped(page));
-			ret2 = do_launder_page(mapping, page);
-			if (ret2 == 0) {
-				if (!invalidate_complete_page2(mapping, page))
-					ret2 = -EBUSY;
-			}
-			if (ret2 < 0)
-				ret = ret2;
+		lock_page(page);
+		WARN_ON(page->index != index);
+		if (page->mapping != mapping) {
 			unlock_page(page);
+			continue;
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		cond_resched();
-		index++;
+		wait_on_page_writeback(page);
+		if (page_mapped(page)) {
+			if (!did_range_unmap) {
+				/*
+				 * Zap the rest of the file in one hit.
+				 */
+				unmap_mapping_range(mapping,
+				   (loff_t)index << PAGE_CACHE_SHIFT,
+				   (loff_t)(1 + end - index)
+						 << PAGE_CACHE_SHIFT,
+				    0);
+				did_range_unmap = 1;
+			} else {
+				/*
+				 * Just zap this page
+				 */
+				unmap_mapping_range(mapping,
+				   (loff_t)index << PAGE_CACHE_SHIFT,
+				   PAGE_CACHE_SIZE, 0);
+			}
+		}
+		BUG_ON(page_mapped(page));
+		ret2 = do_launder_page(mapping, page);
+		if (ret2 == 0) {
+			if (!invalidate_complete_page2(mapping, page))
+				ret2 = -EBUSY;
+		}
+		if (ret2 < 0)
+			ret = ret2;
+		unlock_page(page);
 	}
+
 	cleancache_invalidate_inode(mapping);
 	return ret;
 }
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] mm: Real pagecache iterators
@ 2016-04-01  2:38         ` Kent Overstreet
  0 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01  2:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm, Kent Overstreet

Introduce for_each_pagecache_page() and related macros, with the goal of
replacing most/all uses of pagevec_lookup().

For the most part this shouldn't be a functional change. The one functional
difference with the new macros is that they now take an @end parameter, so we're
able to avoid grabbing pages in __find_get_pages() that we'll never use.

This patch only does some of the conversions, the ones I was able to easily test
myself - the conversions are mechanical but tricky enough they generally warrent
testing.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/inode.c         | 261 ++++++++++++++++++++----------------------------
 include/linux/pagevec.h |  67 ++++++++++++-
 mm/filemap.c            |  76 +++++++++-----
 mm/page-writeback.c     | 148 +++++++++++----------------
 mm/swap.c               |  33 +-----
 mm/truncate.c           | 259 +++++++++++++++++------------------------------
 6 files changed, 380 insertions(+), 464 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dab84a2530..c4d73f67b5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1605,11 +1605,10 @@ struct mpage_da_data {
 static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 				       bool invalidate)
 {
-	int nr_pages, i;
 	pgoff_t index, end;
-	struct pagevec pvec;
+	struct pagecache_iter iter;
+	struct page *page;
 	struct inode *inode = mpd->inode;
-	struct address_space *mapping = inode->i_mapping;
 
 	/* This is necessary when next_page == 0. */
 	if (mpd->first_page >= mpd->next_page)
@@ -1624,25 +1623,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 		ext4_es_remove_extent(inode, start, last - start + 1);
 	}
 
-	pagevec_init(&pvec, 0);
-	while (index <= end) {
-		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
-		if (nr_pages == 0)
-			break;
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-			if (page->index > end)
-				break;
-			BUG_ON(!PageLocked(page));
-			BUG_ON(PageWriteback(page));
-			if (invalidate) {
-				block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
-				ClearPageUptodate(page);
-			}
-			unlock_page(page);
+	for_each_pagecache_page(&iter, inode->i_mapping, index, end, page) {
+		BUG_ON(!PageLocked(page));
+		BUG_ON(PageWriteback(page));
+		if (invalidate) {
+			block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+			ClearPageUptodate(page);
 		}
-		index = pvec.pages[nr_pages - 1]->index + 1;
-		pagevec_release(&pvec);
+		unlock_page(page);
 	}
 }
 
@@ -2209,8 +2197,8 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
  */
 static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 {
-	struct pagevec pvec;
-	int nr_pages, i;
+	struct pagecache_iter iter;
+	struct page *page;
 	struct inode *inode = mpd->inode;
 	struct buffer_head *head, *bh;
 	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
@@ -2224,67 +2212,55 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 	lblk = start << bpp_bits;
 	pblock = mpd->map.m_pblk;
 
-	pagevec_init(&pvec, 0);
-	while (start <= end) {
-		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
-					  PAGEVEC_SIZE);
-		if (nr_pages == 0)
-			break;
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			if (page->index > end)
-				break;
-			/* Up to 'end' pages must be contiguous */
-			BUG_ON(page->index != start);
-			bh = head = page_buffers(page);
-			do {
-				if (lblk < mpd->map.m_lblk)
-					continue;
-				if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
-					/*
-					 * Buffer after end of mapped extent.
-					 * Find next buffer in the page to map.
-					 */
-					mpd->map.m_len = 0;
-					mpd->map.m_flags = 0;
-					/*
-					 * FIXME: If dioread_nolock supports
-					 * blocksize < pagesize, we need to make
-					 * sure we add size mapped so far to
-					 * io_end->size as the following call
-					 * can submit the page for IO.
-					 */
-					err = mpage_process_page_bufs(mpd, head,
-								      bh, lblk);
-					pagevec_release(&pvec);
-					if (err > 0)
-						err = 0;
-					return err;
-				}
-				if (buffer_delay(bh)) {
-					clear_buffer_delay(bh);
-					bh->b_blocknr = pblock++;
-				}
-				clear_buffer_unwritten(bh);
-			} while (lblk++, (bh = bh->b_this_page) != head);
-
-			/*
-			 * FIXME: This is going to break if dioread_nolock
-			 * supports blocksize < pagesize as we will try to
-			 * convert potentially unmapped parts of inode.
-			 */
-			mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
-			/* Page fully mapped - let IO run! */
-			err = mpage_submit_page(mpd, page);
-			if (err < 0) {
-				pagevec_release(&pvec);
+	for_each_pagecache_page(&iter, inode->i_mapping, start, end, page) {
+		/* Up to 'end' pages must be contiguous */
+		BUG_ON(page->index != start);
+		bh = head = page_buffers(page);
+		do {
+			if (lblk < mpd->map.m_lblk)
+				continue;
+			if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
+				/*
+				 * Buffer after end of mapped extent. Find next
+				 * buffer in the page to map.
+				 */
+				mpd->map.m_len = 0;
+				mpd->map.m_flags = 0;
+				/*
+				 * FIXME: If dioread_nolock supports blocksize <
+				 * pagesize, we need to make sure we add size
+				 * mapped so far to io_end->size as the
+				 * following call can submit the page for IO.
+				 */
+				err = mpage_process_page_bufs(mpd, head,
+							      bh, lblk);
+				pagecache_iter_release(&iter);
+				if (err > 0)
+					err = 0;
 				return err;
 			}
-			start++;
+			if (buffer_delay(bh)) {
+				clear_buffer_delay(bh);
+				bh->b_blocknr = pblock++;
+			}
+			clear_buffer_unwritten(bh);
+		} while (lblk++, (bh = bh->b_this_page) != head);
+
+		/*
+		 * FIXME: This is going to break if dioread_nolock supports
+		 * blocksize < pagesize as we will try to convert potentially
+		 * unmapped parts of inode.
+		 */
+		mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
+		/* Page fully mapped - let IO run! */
+		err = mpage_submit_page(mpd, page);
+		if (err < 0) {
+			pagecache_iter_release(&iter);
+			return err;
 		}
-		pagevec_release(&pvec);
+		start++;
 	}
+
 	/* Extent fully mapped and matches with page boundary. We are done. */
 	mpd->map.m_len = 0;
 	mpd->map.m_flags = 0;
@@ -2485,13 +2461,10 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 {
 	struct address_space *mapping = mpd->inode->i_mapping;
-	struct pagevec pvec;
-	unsigned int nr_pages;
+	struct pagecache_iter iter;
+	struct page *page;
 	long left = mpd->wbc->nr_to_write;
-	pgoff_t index = mpd->first_page;
-	pgoff_t end = mpd->last_page;
-	int tag;
-	int i, err = 0;
+	int tag, err = 0;
 	int blkbits = mpd->inode->i_blkbits;
 	ext4_lblk_t lblk;
 	struct buffer_head *head;
@@ -2501,81 +2474,59 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 	else
 		tag = PAGECACHE_TAG_DIRTY;
 
-	pagevec_init(&pvec, 0);
 	mpd->map.m_len = 0;
-	mpd->next_page = index;
-	while (index <= end) {
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
-			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-		if (nr_pages == 0)
-			goto out;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
+	mpd->next_page = mpd->first_page;
 
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or
-			 * even swizzled back from swapper_space to tmpfs file
-			 * mapping. However, page->index will not change
-			 * because we have a reference on the page.
-			 */
-			if (page->index > end)
-				goto out;
-
-			/*
-			 * Accumulated enough dirty pages? This doesn't apply
-			 * to WB_SYNC_ALL mode. For integrity sync we have to
-			 * keep going because someone may be concurrently
-			 * dirtying pages, and we might have synced a lot of
-			 * newly appeared dirty pages, but have not synced all
-			 * of the old dirty pages.
-			 */
-			if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
-				goto out;
-
-			/* If we can't merge this page, we are done. */
-			if (mpd->map.m_len > 0 && mpd->next_page != page->index)
-				goto out;
+	for_each_pagecache_tag(&iter, mapping, tag, mpd->first_page,
+			       mpd->last_page, page) {
+		/*
+		 * Accumulated enough dirty pages? This doesn't apply to
+		 * WB_SYNC_ALL mode. For integrity sync we have to keep going
+		 * because someone may be concurrently dirtying pages, and we
+		 * might have synced a lot of newly appeared dirty pages, but
+		 * have not synced all of the old dirty pages.
+		 */
+		if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
+			break;
 
-			lock_page(page);
-			/*
-			 * If the page is no longer dirty, or its mapping no
-			 * longer corresponds to inode we are writing (which
-			 * means it has been truncated or invalidated), or the
-			 * page is already under writeback and we are not doing
-			 * a data integrity writeback, skip the page
-			 */
-			if (!PageDirty(page) ||
-			    (PageWriteback(page) &&
-			     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
-			    unlikely(page->mapping != mapping)) {
-				unlock_page(page);
-				continue;
-			}
+		/* If we can't merge this page, we are done. */
+		if (mpd->map.m_len > 0 && mpd->next_page != page->index)
+			break;
 
-			wait_on_page_writeback(page);
-			BUG_ON(PageWriteback(page));
-
-			if (mpd->map.m_len == 0)
-				mpd->first_page = page->index;
-			mpd->next_page = page->index + 1;
-			/* Add all dirty buffers to mpd */
-			lblk = ((ext4_lblk_t)page->index) <<
-				(PAGE_CACHE_SHIFT - blkbits);
-			head = page_buffers(page);
-			err = mpage_process_page_bufs(mpd, head, head, lblk);
-			if (err <= 0)
-				goto out;
-			err = 0;
-			left--;
+		lock_page(page);
+		/*
+		 * If the page is no longer dirty, or its mapping no longer
+		 * corresponds to inode we are writing (which means it has been
+		 * truncated or invalidated), or the page is already under
+		 * writeback and we are not doing a data integrity writeback,
+		 * skip the page
+		 */
+		if (!PageDirty(page) ||
+		    (PageWriteback(page) &&
+		     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
+		    unlikely(page->mapping != mapping)) {
+			unlock_page(page);
+			continue;
 		}
-		pagevec_release(&pvec);
-		cond_resched();
+
+		wait_on_page_writeback(page);
+		BUG_ON(PageWriteback(page));
+
+		if (mpd->map.m_len == 0)
+			mpd->first_page = page->index;
+		mpd->next_page = page->index + 1;
+		/* Add all dirty buffers to mpd */
+		lblk = ((ext4_lblk_t)page->index) <<
+			(PAGE_CACHE_SHIFT - blkbits);
+		head = page_buffers(page);
+		err = mpage_process_page_bufs(mpd, head, head, lblk);
+		if (err <= 0)
+			break;
+		err = 0;
+		left--;
 	}
-	return 0;
-out:
-	pagevec_release(&pvec);
+	pagecache_iter_release(&iter);
+
 	return err;
 }
 
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index b45d391b45..e60d74148d 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -22,10 +22,6 @@ struct pagevec {
 
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_lru_add(struct pagevec *pvec);
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
-				struct address_space *mapping,
-				pgoff_t start, unsigned nr_entries,
-				pgoff_t *indices);
 void pagevec_remove_exceptionals(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
@@ -69,4 +65,67 @@ static inline void pagevec_release(struct pagevec *pvec)
 		__pagevec_release(pvec);
 }
 
+struct pagecache_iter {
+	unsigned	nr;
+	unsigned	idx;
+	pgoff_t		index;
+	struct page	*pages[PAGEVEC_SIZE];
+	pgoff_t		indices[PAGEVEC_SIZE];
+};
+
+static inline void pagecache_iter_init(struct pagecache_iter *iter,
+				       pgoff_t start)
+{
+	iter->nr	= 0;
+	iter->idx	= 0;
+	iter->index	= start;
+}
+
+void __pagecache_iter_release(struct pagecache_iter *iter);
+
+/**
+ * pagecache_iter_release - release cached pages from pagacache_iter
+ *
+ * Must be called if breaking out of for_each_pagecache_page() etc. early - not
+ * needed if pagecache_iter_next() returned NULL and loop terminated normally
+ */
+static inline void pagecache_iter_release(struct pagecache_iter *iter)
+{
+	if (iter->nr)
+		__pagecache_iter_release(iter);
+}
+
+struct page *pagecache_iter_next(struct pagecache_iter *iter,
+				 struct address_space *mapping,
+				 pgoff_t end, pgoff_t *index,
+				 unsigned flags);
+
+#define __pagecache_iter_for_each(_iter, _mapping, _start, _end,	\
+				  _page, _index, _flags)		\
+	for (pagecache_iter_init((_iter), (_start));			\
+	     ((_page) = pagecache_iter_next((_iter), (_mapping),	\
+			(_end), (_index), (_flags)));)
+
+#define for_each_pagecache_page(_iter, _mapping, _start, _end, _page)	\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), NULL, 0)
+
+#define for_each_pagecache_page_contig(_iter, _mapping, _start, _end, _page)\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), NULL, RADIX_TREE_ITER_CONTIG)
+
+#define for_each_pagecache_tag(_iter, _mapping, _tag, _start, _end, _page)\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), NULL, RADIX_TREE_ITER_TAGGED|(_tag))
+
+#define for_each_pagecache_entry(_iter, _mapping, _start, _end, _page, _index)\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), &(_index), RADIX_TREE_ITER_EXCEPTIONAL)
+
+#define for_each_pagecache_entry_tag(_iter, _mapping, _tag,		\
+				     _start, _end, _page, _index)	\
+	__pagecache_iter_for_each((_iter), (_mapping), (_start), (_end),\
+			(_page), &(_index), RADIX_TREE_ITER_EXCEPTIONAL|\
+			RADIX_TREE_ITER_TAGGED|(_tag))
+
 #endif /* _LINUX_PAGEVEC_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 81ce03fbc1..11fbc97f8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -352,35 +352,20 @@ EXPORT_SYMBOL(filemap_flush);
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
-	pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
+	pgoff_t start = start_byte >> PAGE_CACHE_SHIFT;
 	pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
-	struct pagevec pvec;
-	int nr_pages;
+	struct pagecache_iter iter;
+	struct page *page;
 	int ret = 0;
 
 	if (end_byte < start_byte)
 		goto out;
 
-	pagevec_init(&pvec, 0);
-	while ((index <= end) &&
-			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			PAGECACHE_TAG_WRITEBACK,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
-		unsigned i;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			/* until radix tree lookup accepts end_index */
-			if (page->index > end)
-				continue;
-
-			wait_on_page_writeback(page);
-			if (TestClearPageError(page))
-				ret = -EIO;
-		}
-		pagevec_release(&pvec);
-		cond_resched();
+	for_each_pagecache_tag(&iter, mapping, PAGECACHE_TAG_WRITEBACK,
+			       start, end, page) {
+		wait_on_page_writeback(page);
+		if (TestClearPageError(page))
+			ret = -EIO;
 	}
 out:
 	return ret;
@@ -1315,6 +1300,51 @@ no_entry:
 }
 EXPORT_SYMBOL(__find_get_pages);
 
+void __pagecache_iter_release(struct pagecache_iter *iter)
+{
+	lru_add_drain();
+	release_pages(iter->pages, iter->nr, 0);
+	iter->nr	= 0;
+	iter->idx	= 0;
+}
+EXPORT_SYMBOL(__pagecache_iter_release);
+
+/**
+ * pagecache_iter_next - get next page from pagecache iterator and advance
+ * iterator
+ * @iter:	The iterator to advance
+ * @mapping:	The address_space to search
+ * @end:	Page cache index to stop at (inclusive)
+ * @index:	if non NULL, index of page or entry will be returned here
+ * @flags:	radix tree iter flags and tag for __find_get_pages()
+ */
+struct page *pagecache_iter_next(struct pagecache_iter *iter,
+				 struct address_space *mapping,
+				 pgoff_t end, pgoff_t *index,
+				 unsigned flags)
+{
+	struct page *page;
+
+	if (iter->idx >= iter->nr) {
+		pagecache_iter_release(iter);
+		cond_resched();
+
+		iter->nr = __find_get_pages(mapping, iter->index, end,
+					    PAGEVEC_SIZE, iter->pages,
+					    iter->indices, flags);
+		if (!iter->nr)
+			return NULL;
+	}
+
+	iter->index	= iter->indices[iter->idx] + 1;
+	if (index)
+		*index	= iter->indices[iter->idx];
+	page		= iter->pages[iter->idx];
+	iter->idx++;
+	return page;
+}
+EXPORT_SYMBOL(pagecache_iter_next);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 11ff8f7586..2eb2e93313 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2154,10 +2154,10 @@ int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data)
 {
+	struct pagecache_iter iter;
+	struct page *page;
 	int ret = 0;
 	int done = 0;
-	struct pagevec pvec;
-	int nr_pages;
 	pgoff_t uninitialized_var(writeback_index);
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
@@ -2166,7 +2166,6 @@ int write_cache_pages(struct address_space *mapping,
 	int range_whole = 0;
 	int tag;
 
-	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
 		writeback_index = mapping->writeback_index; /* prev offset */
 		index = writeback_index;
@@ -2189,105 +2188,80 @@ int write_cache_pages(struct address_space *mapping,
 retry:
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 		tag_pages_for_writeback(mapping, index, end);
-	done_index = index;
-	while (!done && (index <= end)) {
-		int i;
-
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
-			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-		if (nr_pages == 0)
-			break;
 
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or
-			 * even swizzled back from swapper_space to tmpfs file
-			 * mapping. However, page->index will not change
-			 * because we have a reference on the page.
-			 */
-			if (page->index > end) {
-				/*
-				 * can't be range_cyclic (1st pass) because
-				 * end == -1 in that case.
-				 */
-				done = 1;
-				break;
-			}
+	done_index = index;
 
-			done_index = page->index;
+	for_each_pagecache_tag(&iter, mapping, tag, index, end, page) {
+		done_index = page->index;
 
-			lock_page(page);
+		lock_page(page);
 
-			/*
-			 * Page truncated or invalidated. We can freely skip it
-			 * then, even for data integrity operations: the page
-			 * has disappeared concurrently, so there could be no
-			 * real expectation of this data interity operation
-			 * even if there is now a new, dirty page at the same
-			 * pagecache address.
-			 */
-			if (unlikely(page->mapping != mapping)) {
+		/*
+		 * Page truncated or invalidated. We can freely skip it
+		 * then, even for data integrity operations: the page
+		 * has disappeared concurrently, so there could be no
+		 * real expectation of this data interity operation
+		 * even if there is now a new, dirty page at the same
+		 * pagecache address.
+		 */
+		if (unlikely(page->mapping != mapping)) {
 continue_unlock:
-				unlock_page(page);
-				continue;
-			}
-
-			if (!PageDirty(page)) {
-				/* someone wrote it for us */
-				goto continue_unlock;
-			}
+			unlock_page(page);
+			continue;
+		}
 
-			if (PageWriteback(page)) {
-				if (wbc->sync_mode != WB_SYNC_NONE)
-					wait_on_page_writeback(page);
-				else
-					goto continue_unlock;
-			}
+		if (!PageDirty(page)) {
+			/* someone wrote it for us */
+			goto continue_unlock;
+		}
 
-			BUG_ON(PageWriteback(page));
-			if (!clear_page_dirty_for_io(page))
+		if (PageWriteback(page)) {
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
+			else
 				goto continue_unlock;
+		}
 
-			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-			ret = (*writepage)(page, wbc, data);
-			if (unlikely(ret)) {
-				if (ret == AOP_WRITEPAGE_ACTIVATE) {
-					unlock_page(page);
-					ret = 0;
-				} else {
-					/*
-					 * done_index is set past this page,
-					 * so media errors will not choke
-					 * background writeout for the entire
-					 * file. This has consequences for
-					 * range_cyclic semantics (ie. it may
-					 * not be suitable for data integrity
-					 * writeout).
-					 */
-					done_index = page->index + 1;
-					done = 1;
-					break;
-				}
-			}
+		BUG_ON(PageWriteback(page));
+		if (!clear_page_dirty_for_io(page))
+			goto continue_unlock;
 
-			/*
-			 * We stop writing back only if we are not doing
-			 * integrity sync. In case of integrity sync we have to
-			 * keep going until we have written all the pages
-			 * we tagged for writeback prior to entering this loop.
-			 */
-			if (--wbc->nr_to_write <= 0 &&
-			    wbc->sync_mode == WB_SYNC_NONE) {
+		trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+		ret = (*writepage)(page, wbc, data);
+		if (unlikely(ret)) {
+			if (ret == AOP_WRITEPAGE_ACTIVATE) {
+				unlock_page(page);
+				ret = 0;
+			} else {
+				/*
+				 * done_index is set past this page,
+				 * so media errors will not choke
+				 * background writeout for the entire
+				 * file. This has consequences for
+				 * range_cyclic semantics (ie. it may
+				 * not be suitable for data integrity
+				 * writeout).
+				 */
+				done_index = page->index + 1;
 				done = 1;
 				break;
 			}
 		}
-		pagevec_release(&pvec);
-		cond_resched();
+
+		/*
+		 * We stop writing back only if we are not doing
+		 * integrity sync. In case of integrity sync we have to
+		 * keep going until we have written all the pages
+		 * we tagged for writeback prior to entering this loop.
+		 */
+		if (--wbc->nr_to_write <= 0 &&
+		    wbc->sync_mode == WB_SYNC_NONE) {
+			done = 1;
+			break;
+		}
 	}
+	pagecache_iter_release(&iter);
+
 	if (!cycled && !done) {
 		/*
 		 * range_cyclic:
diff --git a/mm/swap.c b/mm/swap.c
index 09fe5e9771..f48cedeb1c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -718,6 +718,9 @@ void release_pages(struct page **pages, int nr, bool cold)
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
+		if (radix_tree_exceptional_entry(page))
+			continue;
+
 		/*
 		 * Make sure the IRQ-safe lock-holding time does not get
 		 * excessive with a continuous string of pages from the
@@ -857,36 +860,6 @@ void __pagevec_lru_add(struct pagevec *pvec)
 EXPORT_SYMBOL(__pagevec_lru_add);
 
 /**
- * pagevec_lookup_entries - gang pagecache lookup
- * @pvec:	Where the resulting entries are placed
- * @mapping:	The address_space to search
- * @start:	The starting entry index
- * @nr_entries:	The maximum number of entries
- * @indices:	The cache indices corresponding to the entries in @pvec
- *
- * pagevec_lookup_entries() will search for and return a group of up
- * to @nr_entries pages and shadow entries in the mapping.  All
- * entries are placed in @pvec.  pagevec_lookup_entries() takes a
- * reference against actual pages in @pvec.
- *
- * The search returns a group of mapping-contiguous entries with
- * ascending indexes.  There may be holes in the indices due to
- * not-present entries.
- *
- * pagevec_lookup_entries() returns the number of entries which were
- * found.
- */
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
-				struct address_space *mapping,
-				pgoff_t start, unsigned nr_pages,
-				pgoff_t *indices)
-{
-	pvec->nr = find_get_entries(mapping, start, nr_pages,
-				    pvec->pages, indices);
-	return pagevec_count(pvec);
-}
-
-/**
  * pagevec_remove_exceptionals - pagevec exceptionals pruning
  * @pvec:	The pagevec to prune
  *
diff --git a/mm/truncate.c b/mm/truncate.c
index 7598b552ae..dca55e4b97 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -231,10 +231,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	pgoff_t		end;		/* exclusive */
 	unsigned int	partial_start;	/* inclusive */
 	unsigned int	partial_end;	/* exclusive */
-	struct pagevec	pvec;
-	pgoff_t		indices[PAGEVEC_SIZE];
-	pgoff_t		index;
-	int		i;
+	struct pagecache_iter iter;
+	struct page *page;
+	pgoff_t index;
+	bool found;
 
 	cleancache_invalidate_inode(mapping);
 	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
@@ -250,51 +250,36 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	 * start of the range and 'partial_end' at the end of the range.
 	 * Note that 'end' is exclusive while 'lend' is inclusive.
 	 */
-	start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	start = round_up(lstart, PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT;
 	if (lend == -1)
 		/*
-		 * lend == -1 indicates end-of-file so we have to set 'end'
-		 * to the highest possible pgoff_t and since the type is
-		 * unsigned we're using -1.
+		 * lend == -1 indicates end-of-file so we have to set 'end' to
+		 * the highest possible pgoff_t
 		 */
-		end = -1;
+		end = ULONG_MAX;
 	else
 		end = (lend + 1) >> PAGE_CACHE_SHIFT;
 
-	pagevec_init(&pvec, 0);
-	index = start;
-	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE),
-			indices)) {
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index >= end)
-				break;
+	if (start >= end)
+		goto do_partial;
 
-			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
-				continue;
-			}
+	for_each_pagecache_entry(&iter, mapping, start, end - 1, page, index) {
+		if (radix_tree_exceptional_entry(page)) {
+			clear_exceptional_entry(mapping, index, page);
+			continue;
+		}
 
-			if (!trylock_page(page))
-				continue;
-			WARN_ON(page->index != index);
-			if (PageWriteback(page)) {
-				unlock_page(page);
-				continue;
-			}
-			truncate_inode_page(mapping, page);
+		if (!trylock_page(page))
+			continue;
+		WARN_ON(page->index != index);
+		if (PageWriteback(page)) {
 			unlock_page(page);
+			continue;
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		cond_resched();
-		index++;
+		truncate_inode_page(mapping, page);
+		unlock_page(page);
 	}
-
+do_partial:
 	if (partial_start) {
 		struct page *page = find_lock_page(mapping, start - 1);
 		if (page) {
@@ -334,34 +319,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	if (start >= end)
 		return;
 
-	index = start;
-	for ( ; ; ) {
-		cond_resched();
-		if (!pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
-			/* If all gone from start onwards, we're done */
-			if (index == start)
-				break;
-			/* Otherwise restart to make sure all gone */
-			index = start;
-			continue;
-		}
-		if (index == start && indices[0] >= end) {
-			/* All gone out of hole to be punched, we're done */
-			pagevec_remove_exceptionals(&pvec);
-			pagevec_release(&pvec);
-			break;
-		}
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index >= end) {
-				/* Restart punch to make sure all gone */
-				index = start - 1;
-				break;
-			}
+	do {
+		found = false;
+
+		for_each_pagecache_entry(&iter, mapping, start,
+					 end - 1, page, index) {
+			found = true;
 
 			if (radix_tree_exceptional_entry(page)) {
 				clear_exceptional_entry(mapping, index, page);
@@ -374,10 +337,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			truncate_inode_page(mapping, page);
 			unlock_page(page);
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		index++;
-	}
+	} while (found);
+
 	cleancache_invalidate_inode(mapping);
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -463,48 +424,32 @@ EXPORT_SYMBOL(truncate_inode_pages_final);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t end)
 {
-	pgoff_t indices[PAGEVEC_SIZE];
-	struct pagevec pvec;
-	pgoff_t index = start;
+	struct pagecache_iter iter;
+	struct page *page;
+	pgoff_t index;
 	unsigned long ret;
 	unsigned long count = 0;
-	int i;
-
-	pagevec_init(&pvec, 0);
-	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-			indices)) {
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
 
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index > end)
-				break;
-
-			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
-				continue;
-			}
-
-			if (!trylock_page(page))
-				continue;
-			WARN_ON(page->index != index);
-			ret = invalidate_inode_page(page);
-			unlock_page(page);
-			/*
-			 * Invalidation is a hint that the page is no longer
-			 * of interest and try to speed up its reclaim.
-			 */
-			if (!ret)
-				deactivate_file_page(page);
-			count += ret;
+	for_each_pagecache_entry(&iter, mapping, start, end, page, index) {
+		if (radix_tree_exceptional_entry(page)) {
+			clear_exceptional_entry(mapping, index, page);
+			continue;
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		cond_resched();
-		index++;
+
+		if (!trylock_page(page))
+			continue;
+		WARN_ON(page->index != index);
+		ret = invalidate_inode_page(page);
+		unlock_page(page);
+		/*
+		 * Invalidation is a hint that the page is no longer
+		 * of interest and try to speed up its reclaim.
+		 */
+		if (!ret)
+			deactivate_file_page(page);
+		count += ret;
 	}
+
 	return count;
 }
 EXPORT_SYMBOL(invalidate_mapping_pages);
@@ -568,75 +513,59 @@ static int do_launder_page(struct address_space *mapping, struct page *page)
 int invalidate_inode_pages2_range(struct address_space *mapping,
 				  pgoff_t start, pgoff_t end)
 {
-	pgoff_t indices[PAGEVEC_SIZE];
-	struct pagevec pvec;
+	struct pagecache_iter iter;
+	struct page *page;
 	pgoff_t index;
-	int i;
 	int ret = 0;
 	int ret2 = 0;
 	int did_range_unmap = 0;
 
 	cleancache_invalidate_inode(mapping);
-	pagevec_init(&pvec, 0);
-	index = start;
-	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-			indices)) {
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-			if (index > end)
-				break;
 
-			if (radix_tree_exceptional_entry(page)) {
-				clear_exceptional_entry(mapping, index, page);
-				continue;
-			}
+	for_each_pagecache_entry(&iter, mapping, start, end, page, index) {
+		if (radix_tree_exceptional_entry(page)) {
+			clear_exceptional_entry(mapping, index, page);
+			continue;
+		}
 
-			lock_page(page);
-			WARN_ON(page->index != index);
-			if (page->mapping != mapping) {
-				unlock_page(page);
-				continue;
-			}
-			wait_on_page_writeback(page);
-			if (page_mapped(page)) {
-				if (!did_range_unmap) {
-					/*
-					 * Zap the rest of the file in one hit.
-					 */
-					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
-					   (loff_t)(1 + end - index)
-							 << PAGE_CACHE_SHIFT,
-					    0);
-					did_range_unmap = 1;
-				} else {
-					/*
-					 * Just zap this page
-					 */
-					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
-					   PAGE_CACHE_SIZE, 0);
-				}
-			}
-			BUG_ON(page_mapped(page));
-			ret2 = do_launder_page(mapping, page);
-			if (ret2 == 0) {
-				if (!invalidate_complete_page2(mapping, page))
-					ret2 = -EBUSY;
-			}
-			if (ret2 < 0)
-				ret = ret2;
+		lock_page(page);
+		WARN_ON(page->index != index);
+		if (page->mapping != mapping) {
 			unlock_page(page);
+			continue;
 		}
-		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		cond_resched();
-		index++;
+		wait_on_page_writeback(page);
+		if (page_mapped(page)) {
+			if (!did_range_unmap) {
+				/*
+				 * Zap the rest of the file in one hit.
+				 */
+				unmap_mapping_range(mapping,
+				   (loff_t)index << PAGE_CACHE_SHIFT,
+				   (loff_t)(1 + end - index)
+						 << PAGE_CACHE_SHIFT,
+				    0);
+				did_range_unmap = 1;
+			} else {
+				/*
+				 * Just zap this page
+				 */
+				unmap_mapping_range(mapping,
+				   (loff_t)index << PAGE_CACHE_SHIFT,
+				   PAGE_CACHE_SIZE, 0);
+			}
+		}
+		BUG_ON(page_mapped(page));
+		ret2 = do_launder_page(mapping, page);
+		if (ret2 == 0) {
+			if (!invalidate_complete_page2(mapping, page))
+				ret2 = -EBUSY;
+		}
+		if (ret2 < 0)
+			ret = ret2;
+		unlock_page(page);
 	}
+
 	cleancache_invalidate_inode(mapping);
 	return ret;
 }
-- 
2.8.0.rc3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] mm: Real pagecache iterators
  2016-04-01  2:38         ` Kent Overstreet
@ 2016-04-01 22:57           ` Andrew Morton
  -1 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2016-04-01 22:57 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Al Viro, linux-kernel, linux-mm

On Thu, 31 Mar 2016 18:38:11 -0800 Kent Overstreet <kent.overstreet@gmail.com> wrote:

> Introduce for_each_pagecache_page() and related macros, with the goal of
> replacing most/all uses of pagevec_lookup().
> 
> For the most part this shouldn't be a functional change. The one functional
> difference with the new macros is that they now take an @end parameter, so we're
> able to avoid grabbing pages in __find_get_pages() that we'll never use.
> 
> This patch only does some of the conversions, the ones I was able to easily test
> myself - the conversions are mechanical but tricky enough they generally warrent
> testing.

What is the reason for this change?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] mm: Real pagecache iterators
@ 2016-04-01 22:57           ` Andrew Morton
  0 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2016-04-01 22:57 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Al Viro, linux-kernel, linux-mm

On Thu, 31 Mar 2016 18:38:11 -0800 Kent Overstreet <kent.overstreet@gmail.com> wrote:

> Introduce for_each_pagecache_page() and related macros, with the goal of
> replacing most/all uses of pagevec_lookup().
> 
> For the most part this shouldn't be a functional change. The one functional
> difference with the new macros is that they now take an @end parameter, so we're
> able to avoid grabbing pages in __find_get_pages() that we'll never use.
> 
> This patch only does some of the conversions, the ones I was able to easily test
> myself - the conversions are mechanical but tricky enough they generally warrent
> testing.

What is the reason for this change?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] mm: Real pagecache iterators
  2016-04-01 22:57           ` Andrew Morton
@ 2016-04-01 23:20             ` Kent Overstreet
  -1 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01 23:20 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm

On Fri, Apr 01, 2016 at 03:57:47PM -0700, Andrew Morton wrote:
> On Thu, 31 Mar 2016 18:38:11 -0800 Kent Overstreet <kent.overstreet@gmail.com> wrote:
> 
> > Introduce for_each_pagecache_page() and related macros, with the goal of
> > replacing most/all uses of pagevec_lookup().
> > 
> > For the most part this shouldn't be a functional change. The one functional
> > difference with the new macros is that they now take an @end parameter, so we're
> > able to avoid grabbing pages in __find_get_pages() that we'll never use.
> > 
> > This patch only does some of the conversions, the ones I was able to easily test
> > myself - the conversions are mechanical but tricky enough they generally warrent
> > testing.
> 
> What is the reason for this change?

I just got tired of code being hard to follow when I was trying to work on some
pagecache invalidation stuff.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] mm: Real pagecache iterators
@ 2016-04-01 23:20             ` Kent Overstreet
  0 siblings, 0 replies; 16+ messages in thread
From: Kent Overstreet @ 2016-04-01 23:20 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Al Viro, linux-kernel, linux-mm

On Fri, Apr 01, 2016 at 03:57:47PM -0700, Andrew Morton wrote:
> On Thu, 31 Mar 2016 18:38:11 -0800 Kent Overstreet <kent.overstreet@gmail.com> wrote:
> 
> > Introduce for_each_pagecache_page() and related macros, with the goal of
> > replacing most/all uses of pagevec_lookup().
> > 
> > For the most part this shouldn't be a functional change. The one functional
> > difference with the new macros is that they now take an @end parameter, so we're
> > able to avoid grabbing pages in __find_get_pages() that we'll never use.
> > 
> > This patch only does some of the conversions, the ones I was able to easily test
> > myself - the conversions are mechanical but tricky enough they generally warrent
> > testing.
> 
> What is the reason for this change?

I just got tired of code being hard to follow when I was trying to work on some
pagecache invalidation stuff.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] mm: Real pagecache iterators
  2016-04-01  2:38         ` Kent Overstreet
@ 2016-04-27 21:27           ` Andrew Morton
  -1 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2016-04-27 21:27 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Al Viro, linux-kernel, linux-mm

On Thu, 31 Mar 2016 18:38:11 -0800 Kent Overstreet <kent.overstreet@gmail.com> wrote:

> Introduce for_each_pagecache_page() and related macros, with the goal of
> replacing most/all uses of pagevec_lookup().
> 
> For the most part this shouldn't be a functional change. The one functional
> difference with the new macros is that they now take an @end parameter, so we're
> able to avoid grabbing pages in __find_get_pages() that we'll never use.
> 
> This patch only does some of the conversions, the ones I was able to easily test
> myself - the conversions are mechanical but tricky enough they generally warrent
> testing.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  fs/ext4/inode.c         | 261 ++++++++++++++++++++----------------------------
>  include/linux/pagevec.h |  67 ++++++++++++-
>  mm/filemap.c            |  76 +++++++++-----
>  mm/page-writeback.c     | 148 +++++++++++----------------
>  mm/swap.c               |  33 +-----
>  mm/truncate.c           | 259 +++++++++++++++++------------------------------
>  6 files changed, 380 insertions(+), 464 deletions(-)

hm, it's a lot of churn in sensitive areas for an 80 line saving.  What
do others think?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] mm: Real pagecache iterators
@ 2016-04-27 21:27           ` Andrew Morton
  0 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2016-04-27 21:27 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: Al Viro, linux-kernel, linux-mm

On Thu, 31 Mar 2016 18:38:11 -0800 Kent Overstreet <kent.overstreet@gmail.com> wrote:

> Introduce for_each_pagecache_page() and related macros, with the goal of
> replacing most/all uses of pagevec_lookup().
> 
> For the most part this shouldn't be a functional change. The one functional
> difference with the new macros is that they now take an @end parameter, so we're
> able to avoid grabbing pages in __find_get_pages() that we'll never use.
> 
> This patch only does some of the conversions, the ones I was able to easily test
> myself - the conversions are mechanical but tricky enough they generally warrent
> testing.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  fs/ext4/inode.c         | 261 ++++++++++++++++++++----------------------------
>  include/linux/pagevec.h |  67 ++++++++++++-
>  mm/filemap.c            |  76 +++++++++-----
>  mm/page-writeback.c     | 148 +++++++++++----------------
>  mm/swap.c               |  33 +-----
>  mm/truncate.c           | 259 +++++++++++++++++------------------------------
>  6 files changed, 380 insertions(+), 464 deletions(-)

hm, it's a lot of churn in sensitive areas for an 80 line saving.  What
do others think?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-04-27 21:27 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-09  1:16 [PATCH] mm: Refactor find_get_pages() & friends Kent Overstreet
2016-03-09  1:16 ` Kent Overstreet
2016-03-29 21:29 ` Andrew Morton
2016-03-29 21:29   ` Andrew Morton
2016-04-01  2:35   ` Kent Overstreet
2016-04-01  2:35     ` Kent Overstreet
2016-04-01  2:38     ` [PATCH 1/2] " Kent Overstreet
2016-04-01  2:38       ` Kent Overstreet
2016-04-01  2:38       ` [PATCH 2/2] mm: Real pagecache iterators Kent Overstreet
2016-04-01  2:38         ` Kent Overstreet
2016-04-01 22:57         ` Andrew Morton
2016-04-01 22:57           ` Andrew Morton
2016-04-01 23:20           ` Kent Overstreet
2016-04-01 23:20             ` Kent Overstreet
2016-04-27 21:27         ` Andrew Morton
2016-04-27 21:27           ` Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.