linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/7] tmpfs: simplify by splice instead of readpage
@ 2011-06-09 22:30 Hugh Dickins
  2011-06-09 22:32 ` [PATCH 1/7] tmpfs: clone shmem_file_splice_read Hugh Dickins
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

Here's a second patchset for mmotm, based on 30-rc2 plus the 14
in mm tmpfs and trunc changes, which were preparation for this.

Add shmem_file_splice_read(), remove shmem_readpage(), and
simplify: before advancing to the interesting radix_tree
conversion in the final patchset, to follow in a few days.

1/7 tmpfs: clone shmem_file_splice_read
2/7 tmpfs: refine shmem_file_splice_read
3/7 tmpfs: pass gfp to shmem_getpage_gfp
4/7 tmpfs: remove shmem_readpage
5/7 tmpfs: simplify prealloc_page
6/7 tmpfs: simplify filepage/swappage
7/7 tmpfs: simplify unuse and writepage

 fs/splice.c            |    4 
 include/linux/splice.h |    2 
 mm/shmem.c             |  549 ++++++++++++++++++++-------------------
 3 files changed, 299 insertions(+), 256 deletions(-)

Hugh

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/7] tmpfs: clone shmem_file_splice_read
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
@ 2011-06-09 22:32 ` Hugh Dickins
  2011-06-10  9:19   ` Jens Axboe
  2011-06-09 22:33 ` [PATCH 2/7] tmpfs: refine shmem_file_splice_read Hugh Dickins
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jens Axboe, linux-kernel, linux-mm

Copy __generic_file_splice_read() and generic_file_splice_read()
from fs/splice.c to shmem_file_splice_read() in mm/shmem.c.  Make
page_cache_pipe_buf_ops and spd_release_page() accessible to it.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Jens Axboe <jaxboe@fusionio.com>
---
 fs/splice.c            |    4 
 include/linux/splice.h |    2 
 mm/shmem.c             |  218 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 221 insertions(+), 3 deletions(-)

--- linux.orig/fs/splice.c	2011-06-09 11:37:57.548770334 -0700
+++ linux/fs/splice.c	2011-06-09 11:38:05.228808404 -0700
@@ -132,7 +132,7 @@ error:
 	return err;
 }
 
-static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
+const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
@@ -264,7 +264,7 @@ ssize_t splice_to_pipe(struct pipe_inode
 	return ret;
 }
 
-static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 {
 	page_cache_release(spd->pages[i]);
 }
--- linux.orig/include/linux/splice.h	2011-06-09 11:37:57.548770334 -0700
+++ linux/include/linux/splice.h	2011-06-09 11:38:05.228808404 -0700
@@ -88,5 +88,7 @@ extern ssize_t splice_direct_to_actor(st
 extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
 extern void splice_shrink_spd(struct pipe_inode_info *,
 				struct splice_pipe_desc *);
+extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
 
+extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
 #endif
--- linux.orig/mm/shmem.c	2011-06-09 11:37:57.552770354 -0700
+++ linux/mm/shmem.c	2011-06-09 11:38:05.232808448 -0700
@@ -51,6 +51,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/shmem_fs.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/splice.h>
 #include <linux/security.h>
 #include <linux/swapops.h>
 #include <linux/mempolicy.h>
@@ -1844,6 +1845,221 @@ static ssize_t shmem_file_aio_read(struc
 	return retval;
 }
 
+static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
+				struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags)
+{
+	struct address_space *mapping = in->f_mapping;
+	unsigned int loff, nr_pages, req_pages;
+	struct page *pages[PIPE_DEF_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
+	struct page *page;
+	pgoff_t index, end_index;
+	loff_t isize, left;
+	int error, page_nr;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &page_cache_pipe_buf_ops,
+		.spd_release = spd_release_page,
+	};
+
+	isize = i_size_read(in->f_mapping->host);
+	if (unlikely(*ppos >= isize))
+		return 0;
+
+	left = isize - *ppos;
+	if (unlikely(left < len))
+		len = left;
+
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	loff = *ppos & ~PAGE_CACHE_MASK;
+	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	nr_pages = min(req_pages, pipe->buffers);
+
+	/*
+	 * Lookup the (hopefully) full range of pages we need.
+	 */
+	spd.nr_pages = find_get_pages_contig(mapping, index,
+						nr_pages, spd.pages);
+	index += spd.nr_pages;
+
+	/*
+	 * If find_get_pages_contig() returned fewer pages than we needed,
+	 * readahead/allocate the rest and fill in the holes.
+	 */
+	if (spd.nr_pages < nr_pages)
+		page_cache_sync_readahead(mapping, &in->f_ra, in,
+				index, req_pages - spd.nr_pages);
+
+	error = 0;
+	while (spd.nr_pages < nr_pages) {
+		/*
+		 * Page could be there, find_get_pages_contig() breaks on
+		 * the first hole.
+		 */
+		page = find_get_page(mapping, index);
+		if (!page) {
+			/*
+			 * page didn't exist, allocate one.
+			 */
+			page = page_cache_alloc_cold(mapping);
+			if (!page)
+				break;
+
+			error = add_to_page_cache_lru(page, mapping, index,
+						GFP_KERNEL);
+			if (unlikely(error)) {
+				page_cache_release(page);
+				if (error == -EEXIST)
+					continue;
+				break;
+			}
+			/*
+			 * add_to_page_cache() locks the page, unlock it
+			 * to avoid convoluting the logic below even more.
+			 */
+			unlock_page(page);
+		}
+
+		spd.pages[spd.nr_pages++] = page;
+		index++;
+	}
+
+	/*
+	 * Now loop over the map and see if we need to start IO on any
+	 * pages, fill in the partial map, etc.
+	 */
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	nr_pages = spd.nr_pages;
+	spd.nr_pages = 0;
+	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
+		unsigned int this_len;
+
+		if (!len)
+			break;
+
+		/*
+		 * this_len is the max we'll use from this page
+		 */
+		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+		page = spd.pages[page_nr];
+
+		if (PageReadahead(page))
+			page_cache_async_readahead(mapping, &in->f_ra, in,
+					page, index, req_pages - page_nr);
+
+		/*
+		 * If the page isn't uptodate, we may need to start io on it
+		 */
+		if (!PageUptodate(page)) {
+			lock_page(page);
+
+			/*
+			 * Page was truncated, or invalidated by the
+			 * filesystem.  Redo the find/create, but this time the
+			 * page is kept locked, so there's no chance of another
+			 * race with truncate/invalidate.
+			 */
+			if (!page->mapping) {
+				unlock_page(page);
+				page = find_or_create_page(mapping, index,
+						mapping_gfp_mask(mapping));
+
+				if (!page) {
+					error = -ENOMEM;
+					break;
+				}
+				page_cache_release(spd.pages[page_nr]);
+				spd.pages[page_nr] = page;
+			}
+			/*
+			 * page was already under io and is now done, great
+			 */
+			if (PageUptodate(page)) {
+				unlock_page(page);
+				goto fill_it;
+			}
+
+			/*
+			 * need to read in the page
+			 */
+			error = mapping->a_ops->readpage(in, page);
+			if (unlikely(error)) {
+				/*
+				 * We really should re-lookup the page here,
+				 * but it complicates things a lot. Instead
+				 * lets just do what we already stored, and
+				 * we'll get it the next time we are called.
+				 */
+				if (error == AOP_TRUNCATED_PAGE)
+					error = 0;
+
+				break;
+			}
+		}
+fill_it:
+		/*
+		 * i_size must be checked after PageUptodate.
+		 */
+		isize = i_size_read(mapping->host);
+		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		if (unlikely(!isize || index > end_index))
+			break;
+
+		/*
+		 * if this is the last page, see if we need to shrink
+		 * the length and stop
+		 */
+		if (end_index == index) {
+			unsigned int plen;
+
+			/*
+			 * max good bytes in this page
+			 */
+			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			if (plen <= loff)
+				break;
+
+			/*
+			 * force quit after adding this page
+			 */
+			this_len = min(this_len, plen - loff);
+			len = this_len;
+		}
+
+		spd.partial[page_nr].offset = loff;
+		spd.partial[page_nr].len = this_len;
+		len -= this_len;
+		loff = 0;
+		spd.nr_pages++;
+		index++;
+	}
+
+	/*
+	 * Release any pages at the end, if we quit early. 'page_nr' is how far
+	 * we got, 'nr_pages' is how many pages are in the map.
+	 */
+	while (page_nr < nr_pages)
+		page_cache_release(spd.pages[page_nr++]);
+	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+
+	if (spd.nr_pages)
+		error = splice_to_pipe(pipe, &spd);
+
+	splice_shrink_spd(pipe, &spd);
+
+	if (error > 0) {
+		*ppos += error;
+		file_accessed(in);
+	}
+	return error;
+}
+
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2699,7 +2915,7 @@ static const struct file_operations shme
 	.aio_read	= shmem_file_aio_read,
 	.aio_write	= generic_file_aio_write,
 	.fsync		= noop_fsync,
-	.splice_read	= generic_file_splice_read,
+	.splice_read	= shmem_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 #endif
 };

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/7] tmpfs: refine shmem_file_splice_read
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
  2011-06-09 22:32 ` [PATCH 1/7] tmpfs: clone shmem_file_splice_read Hugh Dickins
@ 2011-06-09 22:33 ` Hugh Dickins
  2011-06-09 22:34 ` [PATCH 3/7] tmpfs: pass gfp to shmem_getpage_gfp Hugh Dickins
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

Tidy up shmem_file_splice_read():

Remove readahead: okay, we could implement shmem readahead on swap,
but have never done so before, swap being the slow exceptional path.

Use shmem_getpage() instead of find_or_create_page() plus ->readpage().

Remove several comments: sorry, I found them more distracting than
helpful, and this will not be the reference version of splice_read().

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c |  138 +++++++--------------------------------------------
 1 file changed, 19 insertions(+), 119 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:38:05.232808448 -0700
+++ linux/mm/shmem.c	2011-06-09 11:38:13.436849182 -0700
@@ -1850,6 +1850,7 @@ static ssize_t shmem_file_splice_read(st
 				unsigned int flags)
 {
 	struct address_space *mapping = in->f_mapping;
+	struct inode *inode = mapping->host;
 	unsigned int loff, nr_pages, req_pages;
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
@@ -1865,7 +1866,7 @@ static ssize_t shmem_file_splice_read(st
 		.spd_release = spd_release_page,
 	};
 
-	isize = i_size_read(in->f_mapping->host);
+	isize = i_size_read(inode);
 	if (unlikely(*ppos >= isize))
 		return 0;
 
@@ -1881,153 +1882,57 @@ static ssize_t shmem_file_splice_read(st
 	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	nr_pages = min(req_pages, pipe->buffers);
 
-	/*
-	 * Lookup the (hopefully) full range of pages we need.
-	 */
 	spd.nr_pages = find_get_pages_contig(mapping, index,
 						nr_pages, spd.pages);
 	index += spd.nr_pages;
-
-	/*
-	 * If find_get_pages_contig() returned fewer pages than we needed,
-	 * readahead/allocate the rest and fill in the holes.
-	 */
-	if (spd.nr_pages < nr_pages)
-		page_cache_sync_readahead(mapping, &in->f_ra, in,
-				index, req_pages - spd.nr_pages);
-
 	error = 0;
-	while (spd.nr_pages < nr_pages) {
-		/*
-		 * Page could be there, find_get_pages_contig() breaks on
-		 * the first hole.
-		 */
-		page = find_get_page(mapping, index);
-		if (!page) {
-			/*
-			 * page didn't exist, allocate one.
-			 */
-			page = page_cache_alloc_cold(mapping);
-			if (!page)
-				break;
-
-			error = add_to_page_cache_lru(page, mapping, index,
-						GFP_KERNEL);
-			if (unlikely(error)) {
-				page_cache_release(page);
-				if (error == -EEXIST)
-					continue;
-				break;
-			}
-			/*
-			 * add_to_page_cache() locks the page, unlock it
-			 * to avoid convoluting the logic below even more.
-			 */
-			unlock_page(page);
-		}
 
+	while (spd.nr_pages < nr_pages) {
+		page = NULL;
+		error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
+		if (error)
+			break;
+		unlock_page(page);
 		spd.pages[spd.nr_pages++] = page;
 		index++;
 	}
 
-	/*
-	 * Now loop over the map and see if we need to start IO on any
-	 * pages, fill in the partial map, etc.
-	 */
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	nr_pages = spd.nr_pages;
 	spd.nr_pages = 0;
+
 	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
 		unsigned int this_len;
 
 		if (!len)
 			break;
 
-		/*
-		 * this_len is the max we'll use from this page
-		 */
 		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
 		page = spd.pages[page_nr];
 
-		if (PageReadahead(page))
-			page_cache_async_readahead(mapping, &in->f_ra, in,
-					page, index, req_pages - page_nr);
-
-		/*
-		 * If the page isn't uptodate, we may need to start io on it
-		 */
-		if (!PageUptodate(page)) {
-			lock_page(page);
-
-			/*
-			 * Page was truncated, or invalidated by the
-			 * filesystem.  Redo the find/create, but this time the
-			 * page is kept locked, so there's no chance of another
-			 * race with truncate/invalidate.
-			 */
-			if (!page->mapping) {
-				unlock_page(page);
-				page = find_or_create_page(mapping, index,
-						mapping_gfp_mask(mapping));
-
-				if (!page) {
-					error = -ENOMEM;
-					break;
-				}
-				page_cache_release(spd.pages[page_nr]);
-				spd.pages[page_nr] = page;
-			}
-			/*
-			 * page was already under io and is now done, great
-			 */
-			if (PageUptodate(page)) {
-				unlock_page(page);
-				goto fill_it;
-			}
-
-			/*
-			 * need to read in the page
-			 */
-			error = mapping->a_ops->readpage(in, page);
-			if (unlikely(error)) {
-				/*
-				 * We really should re-lookup the page here,
-				 * but it complicates things a lot. Instead
-				 * lets just do what we already stored, and
-				 * we'll get it the next time we are called.
-				 */
-				if (error == AOP_TRUNCATED_PAGE)
-					error = 0;
-
+		if (!PageUptodate(page) || page->mapping != mapping) {
+			page = NULL;
+			error = shmem_getpage(inode, index, &page,
+							SGP_CACHE, NULL);
+			if (error)
 				break;
-			}
+			unlock_page(page);
+			page_cache_release(spd.pages[page_nr]);
+			spd.pages[page_nr] = page;
 		}
-fill_it:
-		/*
-		 * i_size must be checked after PageUptodate.
-		 */
-		isize = i_size_read(mapping->host);
+
+		isize = i_size_read(inode);
 		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 		if (unlikely(!isize || index > end_index))
 			break;
 
-		/*
-		 * if this is the last page, see if we need to shrink
-		 * the length and stop
-		 */
 		if (end_index == index) {
 			unsigned int plen;
 
-			/*
-			 * max good bytes in this page
-			 */
 			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
 			if (plen <= loff)
 				break;
 
-			/*
-			 * force quit after adding this page
-			 */
 			this_len = min(this_len, plen - loff);
 			len = this_len;
 		}
@@ -2040,13 +1945,8 @@ fill_it:
 		index++;
 	}
 
-	/*
-	 * Release any pages at the end, if we quit early. 'page_nr' is how far
-	 * we got, 'nr_pages' is how many pages are in the map.
-	 */
 	while (page_nr < nr_pages)
 		page_cache_release(spd.pages[page_nr++]);
-	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/7] tmpfs: pass gfp to shmem_getpage_gfp
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
  2011-06-09 22:32 ` [PATCH 1/7] tmpfs: clone shmem_file_splice_read Hugh Dickins
  2011-06-09 22:33 ` [PATCH 2/7] tmpfs: refine shmem_file_splice_read Hugh Dickins
@ 2011-06-09 22:34 ` Hugh Dickins
  2011-06-09 22:35 ` [PATCH 4/7] tmpfs: remove_shmem_readpage Hugh Dickins
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

Make shmem_getpage() a wrapper, passing mapping_gfp_mask() down to
shmem_getpage_gfp(), which in turn passes gfp down to shmem_swp_alloc().

Change shmem_read_mapping_page_gfp() to use shmem_getpage_gfp() in the
CONFIG_SHMEM case; but leave tiny !SHMEM using read_cache_page_gfp().

Add a BUG_ON() in case anyone happens to call this on a non-shmem mapping;
though we might later want to let that case route to read_cache_page_gfp().

It annoys me to have these two almost-redundant args, gfp and fault_type:
I can't find a better way; but initialize fault_type only in shmem_fault().

Note that before, read_cache_page_gfp() was allocating i915_gem's pages
with __GFP_NORETRY as intended; but the corresponding swap vector pages
got allocated without it, leaving a small possibility of OOM.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c |   67 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 23 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:38:13.436849182 -0700
+++ linux/mm/shmem.c	2011-06-09 11:38:22.912896122 -0700
@@ -127,8 +127,15 @@ static unsigned long shmem_default_max_i
 }
 #endif
 
-static int shmem_getpage(struct inode *inode, unsigned long idx,
-			 struct page **pagep, enum sgp_type sgp, int *type);
+static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
+	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
+
+static inline int shmem_getpage(struct inode *inode, pgoff_t index,
+	struct page **pagep, enum sgp_type sgp, int *fault_type)
+{
+	return shmem_getpage_gfp(inode, index, pagep, sgp,
+			mapping_gfp_mask(inode->i_mapping), fault_type);
+}
 
 static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
 {
@@ -404,10 +411,12 @@ static void shmem_swp_set(struct shmem_i
  * @info:	info structure for the inode
  * @index:	index of the page to find
  * @sgp:	check and recheck i_size? skip allocation?
+ * @gfp:	gfp mask to use for any page allocation
  *
  * If the entry does not exist, allocate it.
  */
-static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
+static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info,
+			unsigned long index, enum sgp_type sgp, gfp_t gfp)
 {
 	struct inode *inode = &info->vfs_inode;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@@ -435,7 +444,7 @@ static swp_entry_t *shmem_swp_alloc(stru
 		}
 
 		spin_unlock(&info->lock);
-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
+		page = shmem_dir_alloc(gfp);
 		spin_lock(&info->lock);
 
 		if (!page) {
@@ -1225,14 +1234,14 @@ static inline struct mempolicy *shmem_ge
 #endif
 
 /*
- * shmem_getpage - either get the page from swap or allocate a new one
+ * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
  *
  * If we allocate a new one we do not mark it dirty. That's up to the
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache
  */
-static int shmem_getpage(struct inode *inode, unsigned long idx,
-			struct page **pagep, enum sgp_type sgp, int *type)
+static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx,
+	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1242,15 +1251,11 @@ static int shmem_getpage(struct inode *i
 	struct page *prealloc_page = NULL;
 	swp_entry_t *entry;
 	swp_entry_t swap;
-	gfp_t gfp;
 	int error;
 
 	if (idx >= SHMEM_MAX_INDEX)
 		return -EFBIG;
 
-	if (type)
-		*type = 0;
-
 	/*
 	 * Normally, filepage is NULL on entry, and either found
 	 * uptodate immediately, or allocated and zeroed, or read
@@ -1264,13 +1269,12 @@ repeat:
 		filepage = find_lock_page(mapping, idx);
 	if (filepage && PageUptodate(filepage))
 		goto done;
-	gfp = mapping_gfp_mask(mapping);
 	if (!filepage) {
 		/*
 		 * Try to preload while we can wait, to not make a habit of
 		 * draining atomic reserves; but don't latch on to this cpu.
 		 */
-		error = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
+		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
 		if (error)
 			goto failed;
 		radix_tree_preload_end();
@@ -1290,7 +1294,7 @@ repeat:
 
 	spin_lock(&info->lock);
 	shmem_recalc_inode(inode);
-	entry = shmem_swp_alloc(info, idx, sgp);
+	entry = shmem_swp_alloc(info, idx, sgp, gfp);
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
 		error = PTR_ERR(entry);
@@ -1305,12 +1309,12 @@ repeat:
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			/* here we actually do the io */
-			if (type)
-				*type |= VM_FAULT_MAJOR;
+			if (fault_type)
+				*fault_type |= VM_FAULT_MAJOR;
 			swappage = shmem_swapin(swap, gfp, info, idx);
 			if (!swappage) {
 				spin_lock(&info->lock);
-				entry = shmem_swp_alloc(info, idx, sgp);
+				entry = shmem_swp_alloc(info, idx, sgp, gfp);
 				if (IS_ERR(entry))
 					error = PTR_ERR(entry);
 				else {
@@ -1461,7 +1465,7 @@ repeat:
 				SetPageSwapBacked(filepage);
 			}
 
-			entry = shmem_swp_alloc(info, idx, sgp);
+			entry = shmem_swp_alloc(info, idx, sgp, gfp);
 			if (IS_ERR(entry))
 				error = PTR_ERR(entry);
 			else {
@@ -1539,7 +1543,7 @@ static int shmem_fault(struct vm_area_st
 {
 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
 	int error;
-	int ret;
+	int ret = VM_FAULT_LOCKED;
 
 	if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return VM_FAULT_SIGBUS;
@@ -1547,11 +1551,12 @@ static int shmem_fault(struct vm_area_st
 	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
+
 	if (ret & VM_FAULT_MAJOR) {
 		count_vm_event(PGMAJFAULT);
 		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 	}
-	return ret | VM_FAULT_LOCKED;
+	return ret;
 }
 
 #ifdef CONFIG_NUMA
@@ -3162,13 +3167,29 @@ int shmem_zero_setup(struct vm_area_stru
  * suit tmpfs, since it may have pages in swapcache, and needs to find those
  * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
  *
- * Provide a stub for those callers to start using now, then later
- * flesh it out to call shmem_getpage() with additional gfp mask, when
- * shmem_file_splice_read() is added and shmem_readpage() is removed.
+ * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
+ * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
  */
 struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					 pgoff_t index, gfp_t gfp)
 {
+#ifdef CONFIG_SHMEM
+	struct inode *inode = mapping->host;
+	struct page *page = NULL;
+	int error;
+
+	BUG_ON(mapping->a_ops != &shmem_aops);
+	error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
+	if (error)
+		page = ERR_PTR(error);
+	else
+		unlock_page(page);
+	return page;
+#else
+	/*
+	 * The tiny !SHMEM case uses ramfs without swap
+	 */
 	return read_cache_page_gfp(mapping, index, gfp);
+#endif
 }
 EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/7] tmpfs: remove_shmem_readpage
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
                   ` (2 preceding siblings ...)
  2011-06-09 22:34 ` [PATCH 3/7] tmpfs: pass gfp to shmem_getpage_gfp Hugh Dickins
@ 2011-06-09 22:35 ` Hugh Dickins
  2011-06-09 22:39 ` [PATCH 5/7] tmpfs: simplify prealloc_page Hugh Dickins
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

Remove that pernicious shmem_readpage() at last: the things we needed
it for (splice, loop, sendfile, i915 GEM) are now fully taken care of
by shmem_file_splice_read() and shmem_read_mapping_page_gfp().

This removal clears the way for a simpler shmem_getpage_gfp(), since
page is never passed in; but leave most of that cleanup until after.

sys_readahead() and sys_fadvise(POSIX_FADV_WILLNEED) will now EINVAL,
instead of unexpectedly trying to read ahead on tmpfs: if that proves
to be an issue for someone, then we can either arrange for them to
return success instead, or try to implement async readahead on tmpfs.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c |   40 ++++++----------------------------------
 1 file changed, 6 insertions(+), 34 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:38:22.912896122 -0700
+++ linux/mm/shmem.c	2011-06-09 11:39:32.361240481 -0700
@@ -1246,7 +1246,7 @@ static int shmem_getpage_gfp(struct inod
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo;
-	struct page *filepage = *pagep;
+	struct page *filepage;
 	struct page *swappage;
 	struct page *prealloc_page = NULL;
 	swp_entry_t *entry;
@@ -1255,18 +1255,8 @@ static int shmem_getpage_gfp(struct inod
 
 	if (idx >= SHMEM_MAX_INDEX)
 		return -EFBIG;
-
-	/*
-	 * Normally, filepage is NULL on entry, and either found
-	 * uptodate immediately, or allocated and zeroed, or read
-	 * in under swappage, which is then assigned to filepage.
-	 * But shmem_readpage (required for splice) passes in a locked
-	 * filepage, which may be found not uptodate by other callers
-	 * too, and may need to be copied from the swappage read in.
-	 */
 repeat:
-	if (!filepage)
-		filepage = find_lock_page(mapping, idx);
+	filepage = find_lock_page(mapping, idx);
 	if (filepage && PageUptodate(filepage))
 		goto done;
 	if (!filepage) {
@@ -1513,8 +1503,7 @@ nospace:
 	 * Perhaps the page was brought in from swap between find_lock_page
 	 * and taking info->lock?  We allow for that at add_to_page_cache_lru,
 	 * but must also avoid reporting a spurious ENOSPC while working on a
-	 * full tmpfs.  (When filepage has been passed in to shmem_getpage, it
-	 * is already in page cache, which prevents this race from occurring.)
+	 * full tmpfs.
 	 */
 	if (!filepage) {
 		struct page *page = find_get_page(mapping, idx);
@@ -1527,7 +1516,7 @@ nospace:
 	spin_unlock(&info->lock);
 	error = -ENOSPC;
 failed:
-	if (*pagep != filepage) {
+	if (filepage) {
 		unlock_page(filepage);
 		page_cache_release(filepage);
 	}
@@ -1673,19 +1662,6 @@ static struct inode *shmem_get_inode(str
 static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_symlink_inline_operations;
 
-/*
- * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin;
- * but providing them allows a tmpfs file to be used for splice, sendfile, and
- * below the loop driver, in the generic fashion that many filesystems support.
- */
-static int shmem_readpage(struct file *file, struct page *page)
-{
-	struct inode *inode = page->mapping->host;
-	int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
-	unlock_page(page);
-	return error;
-}
-
 static int
 shmem_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
@@ -1693,7 +1669,6 @@ shmem_write_begin(struct file *file, str
 {
 	struct inode *inode = mapping->host;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	*pagep = NULL;
 	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
 }
 
@@ -1893,7 +1868,6 @@ static ssize_t shmem_file_splice_read(st
 	error = 0;
 
 	while (spd.nr_pages < nr_pages) {
-		page = NULL;
 		error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
 		if (error)
 			break;
@@ -1916,7 +1890,6 @@ static ssize_t shmem_file_splice_read(st
 		page = spd.pages[page_nr];
 
 		if (!PageUptodate(page) || page->mapping != mapping) {
-			page = NULL;
 			error = shmem_getpage(inode, index, &page,
 							SGP_CACHE, NULL);
 			if (error)
@@ -2125,7 +2098,7 @@ static int shmem_symlink(struct inode *d
 	int error;
 	int len;
 	struct inode *inode;
-	struct page *page = NULL;
+	struct page *page;
 	char *kaddr;
 	struct shmem_inode_info *info;
 
@@ -2803,7 +2776,6 @@ static const struct address_space_operat
 	.writepage	= shmem_writepage,
 	.set_page_dirty	= __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
-	.readpage	= shmem_readpage,
 	.write_begin	= shmem_write_begin,
 	.write_end	= shmem_write_end,
 #endif
@@ -3175,7 +3147,7 @@ struct page *shmem_read_mapping_page_gfp
 {
 #ifdef CONFIG_SHMEM
 	struct inode *inode = mapping->host;
-	struct page *page = NULL;
+	struct page *page;
 	int error;
 
 	BUG_ON(mapping->a_ops != &shmem_aops);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 5/7] tmpfs: simplify prealloc_page
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
                   ` (3 preceding siblings ...)
  2011-06-09 22:35 ` [PATCH 4/7] tmpfs: remove_shmem_readpage Hugh Dickins
@ 2011-06-09 22:39 ` Hugh Dickins
  2011-06-10  2:02   ` Shaohua Li
  2011-06-09 22:40 ` [PATCH 6/7] tmpfs: simplify filepage/swappage Hugh Dickins
  2011-06-09 22:42 ` [PATCH 7/7] tmpfs: simplify unuse and writepage Hugh Dickins
  6 siblings, 1 reply; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:39 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Shaohua Li, Zhang, Yanmin, Tim Chen, linux-kernel, linux-mm

The prealloc_page handling in shmem_getpage_gfp() is unnecessarily
complicated: first simplify that before going on to filepage/swappage.

That's right, don't report ENOMEM when the preallocation fails: we may
or may not need the page.  But simply report ENOMEM once we find we do
need it, instead of dropping lock, repeating allocation, unwinding on
failure etc.  And leave the out label on the fast path, don't goto.

Fix something that looks like a bug but turns out not to be: set
PageSwapBacked on prealloc_page before its mem_cgroup_cache_charge(),
as the removed case was doing.  That's important before adding to LRU
(determines which LRU the page goes on), and does affect which path it
takes through memcontrol.c, but in the end MEM_CGROUP_CHANGE_TYPE_
SHMEM is handled no differently from CACHE.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: "Zhang, Yanmin" <yanmin.zhang@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
---
 mm/shmem.c |   59 ++++++++++++---------------------------------------
 1 file changed, 15 insertions(+), 44 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:39:32.361240481 -0700
+++ linux/mm/shmem.c	2011-06-09 11:39:42.845292474 -0700
@@ -1269,9 +1269,9 @@ repeat:
 			goto failed;
 		radix_tree_preload_end();
 		if (sgp != SGP_READ && !prealloc_page) {
-			/* We don't care if this fails */
 			prealloc_page = shmem_alloc_page(gfp, info, idx);
 			if (prealloc_page) {
+				SetPageSwapBacked(prealloc_page);
 				if (mem_cgroup_cache_charge(prealloc_page,
 						current->mm, GFP_KERNEL)) {
 					page_cache_release(prealloc_page);
@@ -1403,7 +1403,8 @@ repeat:
 			goto repeat;
 		}
 		spin_unlock(&info->lock);
-	} else {
+
+	} else if (prealloc_page) {
 		shmem_swp_unmap(entry);
 		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
@@ -1419,41 +1420,8 @@ repeat:
 		if (!filepage) {
 			int ret;
 
-			if (!prealloc_page) {
-				spin_unlock(&info->lock);
-				filepage = shmem_alloc_page(gfp, info, idx);
-				if (!filepage) {
-					spin_lock(&info->lock);
-					shmem_unacct_blocks(info->flags, 1);
-					shmem_free_blocks(inode, 1);
-					spin_unlock(&info->lock);
-					error = -ENOMEM;
-					goto failed;
-				}
-				SetPageSwapBacked(filepage);
-
-				/*
-				 * Precharge page while we can wait, compensate
-				 * after
-				 */
-				error = mem_cgroup_cache_charge(filepage,
-					current->mm, GFP_KERNEL);
-				if (error) {
-					page_cache_release(filepage);
-					spin_lock(&info->lock);
-					shmem_unacct_blocks(info->flags, 1);
-					shmem_free_blocks(inode, 1);
-					spin_unlock(&info->lock);
-					filepage = NULL;
-					goto failed;
-				}
-
-				spin_lock(&info->lock);
-			} else {
-				filepage = prealloc_page;
-				prealloc_page = NULL;
-				SetPageSwapBacked(filepage);
-			}
+			filepage = prealloc_page;
+			prealloc_page = NULL;
 
 			entry = shmem_swp_alloc(info, idx, sgp, gfp);
 			if (IS_ERR(entry))
@@ -1492,11 +1460,19 @@ repeat:
 		SetPageUptodate(filepage);
 		if (sgp == SGP_DIRTY)
 			set_page_dirty(filepage);
+	} else {
+		error = -ENOMEM;
+		goto out;
 	}
 done:
 	*pagep = filepage;
 	error = 0;
-	goto out;
+out:
+	if (prealloc_page) {
+		mem_cgroup_uncharge_cache_page(prealloc_page);
+		page_cache_release(prealloc_page);
+	}
+	return error;
 
 nospace:
 	/*
@@ -1520,12 +1496,7 @@ failed:
 		unlock_page(filepage);
 		page_cache_release(filepage);
 	}
-out:
-	if (prealloc_page) {
-		mem_cgroup_uncharge_cache_page(prealloc_page);
-		page_cache_release(prealloc_page);
-	}
-	return error;
+	goto out;
 }
 
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 6/7] tmpfs: simplify filepage/swappage
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
                   ` (4 preceding siblings ...)
  2011-06-09 22:39 ` [PATCH 5/7] tmpfs: simplify prealloc_page Hugh Dickins
@ 2011-06-09 22:40 ` Hugh Dickins
  2011-06-10  6:46   ` [PATCH v2 " Hugh Dickins
  2011-06-09 22:42 ` [PATCH 7/7] tmpfs: simplify unuse and writepage Hugh Dickins
  6 siblings, 1 reply; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:40 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

We can now simplify shmem_getpage_gfp(): there is no longer a dilemma
of filepage passed in via shmem_readpage(), then swappage found, which
must then be copied over to it.

Although at first it's tempting to replace the **pagep arg by returning
struct page *, that makes a mess of IS_ERR_OR_NULL(page)s in all the
callers, so leave as is.

Insert BUG_ON(!PageUptodate) when we find and lock page: some of the
complication came from uninitialized pages inserted into filecache
prior to readpage; but now we're in control, and only release pagelock
on filecache once it's uptodate (if an error occurs in reading back
from swap, the page remains in swapcache, never moved to filecache).

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c |  237 +++++++++++++++++++++++----------------------------
 1 file changed, 108 insertions(+), 129 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:39:42.845292474 -0700
+++ linux/mm/shmem.c	2011-06-09 11:39:50.369329884 -0700
@@ -1246,41 +1246,47 @@ static int shmem_getpage_gfp(struct inod
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo;
-	struct page *filepage;
-	struct page *swappage;
+	struct page *page;
 	struct page *prealloc_page = NULL;
 	swp_entry_t *entry;
 	swp_entry_t swap;
 	int error;
+	int ret;
 
 	if (idx >= SHMEM_MAX_INDEX)
 		return -EFBIG;
 repeat:
-	filepage = find_lock_page(mapping, idx);
-	if (filepage && PageUptodate(filepage))
-		goto done;
-	if (!filepage) {
+	page = find_lock_page(mapping, idx);
+	if (page) {
 		/*
-		 * Try to preload while we can wait, to not make a habit of
-		 * draining atomic reserves; but don't latch on to this cpu.
+		 * Once we can get the page lock, it must be uptodate:
+		 * if there were an error in reading back from swap,
+		 * the page would not be inserted into the filecache.
 		 */
-		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
-		if (error)
-			goto failed;
-		radix_tree_preload_end();
-		if (sgp != SGP_READ && !prealloc_page) {
-			prealloc_page = shmem_alloc_page(gfp, info, idx);
-			if (prealloc_page) {
-				SetPageSwapBacked(prealloc_page);
-				if (mem_cgroup_cache_charge(prealloc_page,
-						current->mm, GFP_KERNEL)) {
-					page_cache_release(prealloc_page);
-					prealloc_page = NULL;
-				}
+		BUG_ON(!PageUptodate(page));
+		goto done;
+	}
+
+	/*
+	 * Try to preload while we can wait, to not make a habit of
+	 * draining atomic reserves; but don't latch on to this cpu.
+	 */
+	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+	if (error)
+		goto out;
+	radix_tree_preload_end();
+
+	if (sgp != SGP_READ && !prealloc_page) {
+		prealloc_page = shmem_alloc_page(gfp, info, idx);
+		if (prealloc_page) {
+			SetPageSwapBacked(prealloc_page);
+			if (mem_cgroup_cache_charge(prealloc_page,
+					current->mm, GFP_KERNEL)) {
+				page_cache_release(prealloc_page);
+				prealloc_page = NULL;
 			}
 		}
 	}
-	error = 0;
 
 	spin_lock(&info->lock);
 	shmem_recalc_inode(inode);
@@ -1288,21 +1294,21 @@ repeat:
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
 		error = PTR_ERR(entry);
-		goto failed;
+		goto out;
 	}
 	swap = *entry;
 
 	if (swap.val) {
 		/* Look it up and read it in.. */
-		swappage = lookup_swap_cache(swap);
-		if (!swappage) {
+		page = lookup_swap_cache(swap);
+		if (!page) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			/* here we actually do the io */
 			if (fault_type)
 				*fault_type |= VM_FAULT_MAJOR;
-			swappage = shmem_swapin(swap, gfp, info, idx);
-			if (!swappage) {
+			page = shmem_swapin(swap, gfp, info, idx);
+			if (!page) {
 				spin_lock(&info->lock);
 				entry = shmem_swp_alloc(info, idx, sgp, gfp);
 				if (IS_ERR(entry))
@@ -1314,62 +1320,42 @@ repeat:
 				}
 				spin_unlock(&info->lock);
 				if (error)
-					goto failed;
+					goto out;
 				goto repeat;
 			}
-			wait_on_page_locked(swappage);
-			page_cache_release(swappage);
+			wait_on_page_locked(page);
+			page_cache_release(page);
 			goto repeat;
 		}
 
 		/* We have to do this with page locked to prevent races */
-		if (!trylock_page(swappage)) {
+		if (!trylock_page(page)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
-			wait_on_page_locked(swappage);
-			page_cache_release(swappage);
+			wait_on_page_locked(page);
+			page_cache_release(page);
 			goto repeat;
 		}
-		if (PageWriteback(swappage)) {
+		if (PageWriteback(page)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
-			wait_on_page_writeback(swappage);
-			unlock_page(swappage);
-			page_cache_release(swappage);
+			wait_on_page_writeback(page);
+			unlock_page(page);
+			page_cache_release(page);
 			goto repeat;
 		}
-		if (!PageUptodate(swappage)) {
+		if (!PageUptodate(page)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
-			unlock_page(swappage);
-			page_cache_release(swappage);
+			unlock_page(page);
+			page_cache_release(page);
 			error = -EIO;
-			goto failed;
+			goto out;
 		}
 
-		if (filepage) {
-			shmem_swp_set(info, entry, 0);
-			shmem_swp_unmap(entry);
-			delete_from_swap_cache(swappage);
-			spin_unlock(&info->lock);
-			copy_highpage(filepage, swappage);
-			unlock_page(swappage);
-			page_cache_release(swappage);
-			flush_dcache_page(filepage);
-			SetPageUptodate(filepage);
-			set_page_dirty(filepage);
-			swap_free(swap);
-		} else if (!(error = add_to_page_cache_locked(swappage, mapping,
-					idx, GFP_NOWAIT))) {
-			info->flags |= SHMEM_PAGEIN;
-			shmem_swp_set(info, entry, 0);
-			shmem_swp_unmap(entry);
-			delete_from_swap_cache(swappage);
-			spin_unlock(&info->lock);
-			filepage = swappage;
-			set_page_dirty(filepage);
-			swap_free(swap);
-		} else {
+		error = add_to_page_cache_locked(page, mapping,
+						 idx, GFP_NOWAIT);
+		if (error) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			if (error == -ENOMEM) {
@@ -1378,28 +1364,33 @@ repeat:
 				 * call memcg's OOM if needed.
 				 */
 				error = mem_cgroup_shmem_charge_fallback(
-								swappage,
-								current->mm,
-								gfp);
+						page, current->mm, gfp);
 				if (error) {
-					unlock_page(swappage);
-					page_cache_release(swappage);
-					goto failed;
+					unlock_page(page);
+					page_cache_release(page);
+					goto out;
 				}
 			}
-			unlock_page(swappage);
-			page_cache_release(swappage);
+			unlock_page(page);
+			page_cache_release(page);
 			goto repeat;
 		}
-	} else if (sgp == SGP_READ && !filepage) {
+
+		info->flags |= SHMEM_PAGEIN;
+		shmem_swp_set(info, entry, 0);
+		shmem_swp_unmap(entry);
+		delete_from_swap_cache(page);
+		spin_unlock(&info->lock);
+		set_page_dirty(page);
+		swap_free(swap);
+
+	} else if (sgp == SGP_READ) {
 		shmem_swp_unmap(entry);
-		filepage = find_get_page(mapping, idx);
-		if (filepage &&
-		    (!PageUptodate(filepage) || !trylock_page(filepage))) {
+		page = find_get_page(mapping, idx);
+		if (page && !trylock_page(page)) {
 			spin_unlock(&info->lock);
-			wait_on_page_locked(filepage);
-			page_cache_release(filepage);
-			filepage = NULL;
+			wait_on_page_locked(page);
+			page_cache_release(page);
 			goto repeat;
 		}
 		spin_unlock(&info->lock);
@@ -1417,55 +1408,51 @@ repeat:
 		} else if (shmem_acct_block(info->flags))
 			goto nospace;
 
-		if (!filepage) {
-			int ret;
-
-			filepage = prealloc_page;
-			prealloc_page = NULL;
+		page = prealloc_page;
+		prealloc_page = NULL;
 
-			entry = shmem_swp_alloc(info, idx, sgp, gfp);
-			if (IS_ERR(entry))
-				error = PTR_ERR(entry);
-			else {
-				swap = *entry;
-				shmem_swp_unmap(entry);
-			}
-			ret = error || swap.val;
-			if (ret)
-				mem_cgroup_uncharge_cache_page(filepage);
-			else
-				ret = add_to_page_cache_lru(filepage, mapping,
+		entry = shmem_swp_alloc(info, idx, sgp, gfp);
+		if (IS_ERR(entry))
+			error = PTR_ERR(entry);
+		else {
+			swap = *entry;
+			shmem_swp_unmap(entry);
+		}
+		ret = error || swap.val;
+		if (ret)
+			mem_cgroup_uncharge_cache_page(page);
+		else
+			ret = add_to_page_cache_lru(page, mapping,
 						idx, GFP_NOWAIT);
-			/*
-			 * At add_to_page_cache_lru() failure, uncharge will
-			 * be done automatically.
-			 */
-			if (ret) {
-				shmem_unacct_blocks(info->flags, 1);
-				shmem_free_blocks(inode, 1);
-				spin_unlock(&info->lock);
-				page_cache_release(filepage);
-				filepage = NULL;
-				if (error)
-					goto failed;
-				goto repeat;
-			}
-			info->flags |= SHMEM_PAGEIN;
+		/*
+		 * At add_to_page_cache_lru() failure,
+		 * uncharge will be done automatically.
+		 */
+		if (ret) {
+			shmem_unacct_blocks(info->flags, 1);
+			shmem_free_blocks(inode, 1);
+			spin_unlock(&info->lock);
+			page_cache_release(page);
+			if (error)
+				goto out;
+			goto repeat;
 		}
 
+		info->flags |= SHMEM_PAGEIN;
 		info->alloced++;
 		spin_unlock(&info->lock);
-		clear_highpage(filepage);
-		flush_dcache_page(filepage);
-		SetPageUptodate(filepage);
+		clear_highpage(page);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
 		if (sgp == SGP_DIRTY)
-			set_page_dirty(filepage);
+			set_page_dirty(page);
+
 	} else {
 		error = -ENOMEM;
 		goto out;
 	}
 done:
-	*pagep = filepage;
+	*pagep = page;
 	error = 0;
 out:
 	if (prealloc_page) {
@@ -1481,21 +1468,13 @@ nospace:
 	 * but must also avoid reporting a spurious ENOSPC while working on a
 	 * full tmpfs.
 	 */
-	if (!filepage) {
-		struct page *page = find_get_page(mapping, idx);
-		if (page) {
-			spin_unlock(&info->lock);
-			page_cache_release(page);
-			goto repeat;
-		}
-	}
+	page = find_get_page(mapping, idx);
 	spin_unlock(&info->lock);
-	error = -ENOSPC;
-failed:
-	if (filepage) {
-		unlock_page(filepage);
-		page_cache_release(filepage);
+	if (page) {
+		page_cache_release(page);
+		goto repeat;
 	}
+	error = -ENOSPC;
 	goto out;
 }
 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 7/7] tmpfs: simplify unuse and writepage
  2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
                   ` (5 preceding siblings ...)
  2011-06-09 22:40 ` [PATCH 6/7] tmpfs: simplify filepage/swappage Hugh Dickins
@ 2011-06-09 22:42 ` Hugh Dickins
  6 siblings, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-09 22:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Erez Zadok, linux-kernel, linux-mm

shmem_unuse_inode() and shmem_writepage() contain a little code to
cope with pages inserted independently into the filecache, probably
by a filesystem stacked on top of tmpfs, then fed to its ->readpage()
or ->writepage().

Unionfs was indeed experimenting with working in that way three years
ago, but I find no current examples: nowadays the stacking filesystems
use vfs interfaces to the lower filesystem.

It's now illegal: remove most of that code, adding some WARN_ON_ONCEs.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Erez Zadok <ezk@fsl.cs.sunysb.edu>
---
 mm/shmem.c |   50 ++++++++++++++++----------------------------------
 1 file changed, 16 insertions(+), 34 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:39:50.369329884 -0700
+++ linux/mm/shmem.c	2011-06-09 11:40:02.761391246 -0700
@@ -972,20 +972,7 @@ found:
 	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
 	/* which does mem_cgroup_uncharge_cache_page on error */
 
-	if (error == -EEXIST) {
-		struct page *filepage = find_get_page(mapping, idx);
-		error = 1;
-		if (filepage) {
-			/*
-			 * There might be a more uptodate page coming down
-			 * from a stacked writepage: forget our swappage if so.
-			 */
-			if (PageUptodate(filepage))
-				error = 0;
-			page_cache_release(filepage);
-		}
-	}
-	if (!error) {
+	if (error != -ENOMEM) {
 		delete_from_swap_cache(page);
 		set_page_dirty(page);
 		info->flags |= SHMEM_PAGEIN;
@@ -1072,16 +1059,17 @@ static int shmem_writepage(struct page *
 	/*
 	 * shmem_backing_dev_info's capabilities prevent regular writeback or
 	 * sync from ever calling shmem_writepage; but a stacking filesystem
-	 * may use the ->writepage of its underlying filesystem, in which case
+	 * might use ->writepage of its underlying filesystem, in which case
 	 * tmpfs should write out to swap only in response to memory pressure,
-	 * and not for the writeback threads or sync.  However, in those cases,
-	 * we do still want to check if there's a redundant swappage to be
-	 * discarded.
+	 * and not for the writeback threads or sync.
 	 */
-	if (wbc->for_reclaim)
-		swap = get_swap_page();
-	else
-		swap.val = 0;
+	if (!wbc->for_reclaim) {
+		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
+		goto redirty;
+	}
+	swap = get_swap_page();
+	if (!swap.val)
+		goto redirty;
 
 	/*
 	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
@@ -1092,15 +1080,12 @@ static int shmem_writepage(struct page *
 	 * we've taken the spinlock, because shmem_unuse_inode() will
 	 * prune a !swapped inode from the swaplist under both locks.
 	 */
-	if (swap.val) {
-		mutex_lock(&shmem_swaplist_mutex);
-		if (list_empty(&info->swaplist))
-			list_add_tail(&info->swaplist, &shmem_swaplist);
-	}
+	mutex_lock(&shmem_swaplist_mutex);
+	if (list_empty(&info->swaplist))
+		list_add_tail(&info->swaplist, &shmem_swaplist);
 
 	spin_lock(&info->lock);
-	if (swap.val)
-		mutex_unlock(&shmem_swaplist_mutex);
+	mutex_unlock(&shmem_swaplist_mutex);
 
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
@@ -1108,16 +1093,13 @@ static int shmem_writepage(struct page *
 	}
 	entry = shmem_swp_entry(info, index, NULL);
 	if (entry->val) {
-		/*
-		 * The more uptodate page coming down from a stacked
-		 * writepage should replace our old swappage.
-		 */
+		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
 		free_swap_and_cache(*entry);
 		shmem_swp_set(info, entry, 0);
 	}
 	shmem_recalc_inode(inode);
 
-	if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
+	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
 		delete_from_page_cache(page);
 		shmem_swp_set(info, entry, swap.val);
 		shmem_swp_unmap(entry);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/7] tmpfs: simplify prealloc_page
  2011-06-09 22:39 ` [PATCH 5/7] tmpfs: simplify prealloc_page Hugh Dickins
@ 2011-06-10  2:02   ` Shaohua Li
  2011-06-10  6:40     ` Hugh Dickins
  2011-06-10  6:44     ` [PATCH v2 " Hugh Dickins
  0 siblings, 2 replies; 14+ messages in thread
From: Shaohua Li @ 2011-06-10  2:02 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Zhang, Yanmin, Tim Chen, linux-kernel, linux-mm

On Fri, 2011-06-10 at 06:39 +0800, Hugh Dickins wrote:
> The prealloc_page handling in shmem_getpage_gfp() is unnecessarily
> complicated: first simplify that before going on to filepage/swappage.
> 
> That's right, don't report ENOMEM when the preallocation fails: we may
> or may not need the page.  But simply report ENOMEM once we find we do
> need it, instead of dropping lock, repeating allocation, unwinding on
> failure etc.  And leave the out label on the fast path, don't goto.
> 
> Fix something that looks like a bug but turns out not to be: set
> PageSwapBacked on prealloc_page before its mem_cgroup_cache_charge(),
> as the removed case was doing.  That's important before adding to LRU
> (determines which LRU the page goes on), and does affect which path it
> takes through memcontrol.c, but in the end MEM_CGROUP_CHANGE_TYPE_
> SHMEM is handled no differently from CACHE.
> 
> Signed-off-by: Hugh Dickins <hughd@google.com>
> Cc: Shaohua Li <shaohua.li@intel.com>
> Cc: "Zhang, Yanmin" <yanmin.zhang@intel.com>
> Cc: Tim Chen <tim.c.chen@linux.intel.com>
> ---
>  mm/shmem.c |   59 ++++++++++++---------------------------------------
>  1 file changed, 15 insertions(+), 44 deletions(-)
> 
> --- linux.orig/mm/shmem.c	2011-06-09 11:39:32.361240481 -0700
> +++ linux/mm/shmem.c	2011-06-09 11:39:42.845292474 -0700
> @@ -1269,9 +1269,9 @@ repeat:
>  			goto failed;
>  		radix_tree_preload_end();
>  		if (sgp != SGP_READ && !prealloc_page) {
> -			/* We don't care if this fails */
>  			prealloc_page = shmem_alloc_page(gfp, info, idx);
>  			if (prealloc_page) {
> +				SetPageSwapBacked(prealloc_page);
>  				if (mem_cgroup_cache_charge(prealloc_page,
>  						current->mm, GFP_KERNEL)) {
>  					page_cache_release(prealloc_page);
> @@ -1403,7 +1403,8 @@ repeat:
>  			goto repeat;
>  		}
>  		spin_unlock(&info->lock);
> -	} else {
> +
> +	} else if (prealloc_page) {
>  		shmem_swp_unmap(entry);
>  		sbinfo = SHMEM_SB(inode->i_sb);
>  		if (sbinfo->max_blocks) {
> @@ -1419,41 +1420,8 @@ repeat:
>  		if (!filepage) {
>  			int ret;
>  
> -			if (!prealloc_page) {
> -				spin_unlock(&info->lock);
> -				filepage = shmem_alloc_page(gfp, info, idx);
> -				if (!filepage) {
> -					spin_lock(&info->lock);
> -					shmem_unacct_blocks(info->flags, 1);
> -					shmem_free_blocks(inode, 1);
> -					spin_unlock(&info->lock);
> -					error = -ENOMEM;
> -					goto failed;
> -				}
> -				SetPageSwapBacked(filepage);
> -
> -				/*
> -				 * Precharge page while we can wait, compensate
> -				 * after
> -				 */
> -				error = mem_cgroup_cache_charge(filepage,
> -					current->mm, GFP_KERNEL);
> -				if (error) {
> -					page_cache_release(filepage);
> -					spin_lock(&info->lock);
> -					shmem_unacct_blocks(info->flags, 1);
> -					shmem_free_blocks(inode, 1);
> -					spin_unlock(&info->lock);
> -					filepage = NULL;
> -					goto failed;
> -				}
> -
> -				spin_lock(&info->lock);
> -			} else {
> -				filepage = prealloc_page;
> -				prealloc_page = NULL;
> -				SetPageSwapBacked(filepage);
> -			}
> +			filepage = prealloc_page;
> +			prealloc_page = NULL;
>  
>  			entry = shmem_swp_alloc(info, idx, sgp, gfp);
>  			if (IS_ERR(entry))
> @@ -1492,11 +1460,19 @@ repeat:
>  		SetPageUptodate(filepage);
>  		if (sgp == SGP_DIRTY)
>  			set_page_dirty(filepage);
> +	} else {
Looks info->lock unlock is missed here.
Otherwise looks good to me.

Thanks,
Shaohua


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/7] tmpfs: simplify prealloc_page
  2011-06-10  2:02   ` Shaohua Li
@ 2011-06-10  6:40     ` Hugh Dickins
  2011-06-10  6:44     ` [PATCH v2 " Hugh Dickins
  1 sibling, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-10  6:40 UTC (permalink / raw)
  To: Shaohua Li; +Cc: Andrew Morton, Zhang, Yanmin, Tim Chen, linux-kernel, linux-mm

On Fri, 10 Jun 2011, Shaohua Li wrote:
> On Fri, 2011-06-10 at 06:39 +0800, Hugh Dickins wrote:
> > @@ -1492,11 +1460,19 @@ repeat:
> >  		SetPageUptodate(filepage);
> >  		if (sgp == SGP_DIRTY)
> >  			set_page_dirty(filepage);
> > +	} else {
> Looks info->lock unlock is missed here.
> Otherwise looks good to me.

Many thanks for catching that!  Replacements for this patch,
and the next which then rejects, follow as replies.

Hugh

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 5/7] tmpfs: simplify prealloc_page
  2011-06-10  2:02   ` Shaohua Li
  2011-06-10  6:40     ` Hugh Dickins
@ 2011-06-10  6:44     ` Hugh Dickins
  1 sibling, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-10  6:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Shaohua Li, Zhang, Yanmin, Tim Chen, linux-kernel, linux-mm

The prealloc_page handling in shmem_getpage_gfp() is unnecessarily
complicated: first simplify that before going on to filepage/swappage.

That's right, don't report ENOMEM when the preallocation fails: we may
or may not need the page.  But simply report ENOMEM once we find we do
need it, instead of dropping lock, repeating allocation, unwinding on
failure etc.  And leave the out label on the fast path, don't goto.

Fix something that looks like a bug but turns out not to be: set
PageSwapBacked on prealloc_page before its mem_cgroup_cache_charge(),
as the removed case was doing.  That's important before adding to LRU
(determines which LRU the page goes on), and does affect which path it
takes through memcontrol.c, but in the end MEM_CGROUP_CHANGE_TYPE_
SHMEM is handled no differently from CACHE.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Cc: "Zhang, Yanmin" <yanmin.zhang@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
---
 mm/shmem.c |   60 +++++++++++++--------------------------------------
 1 file changed, 16 insertions(+), 44 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:39:32.361240481 -0700
+++ linux/mm/shmem.c	2011-06-09 23:17:08.364060453 -0700
@@ -1269,9 +1269,9 @@ repeat:
 			goto failed;
 		radix_tree_preload_end();
 		if (sgp != SGP_READ && !prealloc_page) {
-			/* We don't care if this fails */
 			prealloc_page = shmem_alloc_page(gfp, info, idx);
 			if (prealloc_page) {
+				SetPageSwapBacked(prealloc_page);
 				if (mem_cgroup_cache_charge(prealloc_page,
 						current->mm, GFP_KERNEL)) {
 					page_cache_release(prealloc_page);
@@ -1403,7 +1403,8 @@ repeat:
 			goto repeat;
 		}
 		spin_unlock(&info->lock);
-	} else {
+
+	} else if (prealloc_page) {
 		shmem_swp_unmap(entry);
 		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
@@ -1419,41 +1420,8 @@ repeat:
 		if (!filepage) {
 			int ret;
 
-			if (!prealloc_page) {
-				spin_unlock(&info->lock);
-				filepage = shmem_alloc_page(gfp, info, idx);
-				if (!filepage) {
-					spin_lock(&info->lock);
-					shmem_unacct_blocks(info->flags, 1);
-					shmem_free_blocks(inode, 1);
-					spin_unlock(&info->lock);
-					error = -ENOMEM;
-					goto failed;
-				}
-				SetPageSwapBacked(filepage);
-
-				/*
-				 * Precharge page while we can wait, compensate
-				 * after
-				 */
-				error = mem_cgroup_cache_charge(filepage,
-					current->mm, GFP_KERNEL);
-				if (error) {
-					page_cache_release(filepage);
-					spin_lock(&info->lock);
-					shmem_unacct_blocks(info->flags, 1);
-					shmem_free_blocks(inode, 1);
-					spin_unlock(&info->lock);
-					filepage = NULL;
-					goto failed;
-				}
-
-				spin_lock(&info->lock);
-			} else {
-				filepage = prealloc_page;
-				prealloc_page = NULL;
-				SetPageSwapBacked(filepage);
-			}
+			filepage = prealloc_page;
+			prealloc_page = NULL;
 
 			entry = shmem_swp_alloc(info, idx, sgp, gfp);
 			if (IS_ERR(entry))
@@ -1492,11 +1460,20 @@ repeat:
 		SetPageUptodate(filepage);
 		if (sgp == SGP_DIRTY)
 			set_page_dirty(filepage);
+	} else {
+		spin_unlock(&info->lock);
+		error = -ENOMEM;
+		goto out;
 	}
 done:
 	*pagep = filepage;
 	error = 0;
-	goto out;
+out:
+	if (prealloc_page) {
+		mem_cgroup_uncharge_cache_page(prealloc_page);
+		page_cache_release(prealloc_page);
+	}
+	return error;
 
 nospace:
 	/*
@@ -1520,12 +1497,7 @@ failed:
 		unlock_page(filepage);
 		page_cache_release(filepage);
 	}
-out:
-	if (prealloc_page) {
-		mem_cgroup_uncharge_cache_page(prealloc_page);
-		page_cache_release(prealloc_page);
-	}
-	return error;
+	goto out;
 }
 
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 6/7] tmpfs: simplify filepage/swappage
  2011-06-09 22:40 ` [PATCH 6/7] tmpfs: simplify filepage/swappage Hugh Dickins
@ 2011-06-10  6:46   ` Hugh Dickins
  0 siblings, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-10  6:46 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

We can now simplify shmem_getpage_gfp(): there is no longer a dilemma
of filepage passed in via shmem_readpage(), then swappage found, which
must then be copied over to it.

Although at first it's tempting to replace the **pagep arg by returning
struct page *, that makes a mess of IS_ERR_OR_NULL(page)s in all the
callers, so leave as is.

Insert BUG_ON(!PageUptodate) when we find and lock page: some of the
complication came from uninitialized pages inserted into filecache
prior to readpage; but now we're in control, and only release pagelock
on filecache once it's uptodate (if an error occurs in reading back
from swap, the page remains in swapcache, never moved to filecache).

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c |  237 +++++++++++++++++++++++----------------------------
 1 file changed, 108 insertions(+), 129 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 23:17:08.000000000 -0700
+++ linux/mm/shmem.c	2011-06-09 23:31:44.056402814 -0700
@@ -1246,41 +1246,47 @@ static int shmem_getpage_gfp(struct inod
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo;
-	struct page *filepage;
-	struct page *swappage;
+	struct page *page;
 	struct page *prealloc_page = NULL;
 	swp_entry_t *entry;
 	swp_entry_t swap;
 	int error;
+	int ret;
 
 	if (idx >= SHMEM_MAX_INDEX)
 		return -EFBIG;
 repeat:
-	filepage = find_lock_page(mapping, idx);
-	if (filepage && PageUptodate(filepage))
-		goto done;
-	if (!filepage) {
+	page = find_lock_page(mapping, idx);
+	if (page) {
 		/*
-		 * Try to preload while we can wait, to not make a habit of
-		 * draining atomic reserves; but don't latch on to this cpu.
+		 * Once we can get the page lock, it must be uptodate:
+		 * if there were an error in reading back from swap,
+		 * the page would not be inserted into the filecache.
 		 */
-		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
-		if (error)
-			goto failed;
-		radix_tree_preload_end();
-		if (sgp != SGP_READ && !prealloc_page) {
-			prealloc_page = shmem_alloc_page(gfp, info, idx);
-			if (prealloc_page) {
-				SetPageSwapBacked(prealloc_page);
-				if (mem_cgroup_cache_charge(prealloc_page,
-						current->mm, GFP_KERNEL)) {
-					page_cache_release(prealloc_page);
-					prealloc_page = NULL;
-				}
+		BUG_ON(!PageUptodate(page));
+		goto done;
+	}
+
+	/*
+	 * Try to preload while we can wait, to not make a habit of
+	 * draining atomic reserves; but don't latch on to this cpu.
+	 */
+	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+	if (error)
+		goto out;
+	radix_tree_preload_end();
+
+	if (sgp != SGP_READ && !prealloc_page) {
+		prealloc_page = shmem_alloc_page(gfp, info, idx);
+		if (prealloc_page) {
+			SetPageSwapBacked(prealloc_page);
+			if (mem_cgroup_cache_charge(prealloc_page,
+					current->mm, GFP_KERNEL)) {
+				page_cache_release(prealloc_page);
+				prealloc_page = NULL;
 			}
 		}
 	}
-	error = 0;
 
 	spin_lock(&info->lock);
 	shmem_recalc_inode(inode);
@@ -1288,21 +1294,21 @@ repeat:
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
 		error = PTR_ERR(entry);
-		goto failed;
+		goto out;
 	}
 	swap = *entry;
 
 	if (swap.val) {
 		/* Look it up and read it in.. */
-		swappage = lookup_swap_cache(swap);
-		if (!swappage) {
+		page = lookup_swap_cache(swap);
+		if (!page) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			/* here we actually do the io */
 			if (fault_type)
 				*fault_type |= VM_FAULT_MAJOR;
-			swappage = shmem_swapin(swap, gfp, info, idx);
-			if (!swappage) {
+			page = shmem_swapin(swap, gfp, info, idx);
+			if (!page) {
 				spin_lock(&info->lock);
 				entry = shmem_swp_alloc(info, idx, sgp, gfp);
 				if (IS_ERR(entry))
@@ -1314,62 +1320,42 @@ repeat:
 				}
 				spin_unlock(&info->lock);
 				if (error)
-					goto failed;
+					goto out;
 				goto repeat;
 			}
-			wait_on_page_locked(swappage);
-			page_cache_release(swappage);
+			wait_on_page_locked(page);
+			page_cache_release(page);
 			goto repeat;
 		}
 
 		/* We have to do this with page locked to prevent races */
-		if (!trylock_page(swappage)) {
+		if (!trylock_page(page)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
-			wait_on_page_locked(swappage);
-			page_cache_release(swappage);
+			wait_on_page_locked(page);
+			page_cache_release(page);
 			goto repeat;
 		}
-		if (PageWriteback(swappage)) {
+		if (PageWriteback(page)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
-			wait_on_page_writeback(swappage);
-			unlock_page(swappage);
-			page_cache_release(swappage);
+			wait_on_page_writeback(page);
+			unlock_page(page);
+			page_cache_release(page);
 			goto repeat;
 		}
-		if (!PageUptodate(swappage)) {
+		if (!PageUptodate(page)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
-			unlock_page(swappage);
-			page_cache_release(swappage);
+			unlock_page(page);
+			page_cache_release(page);
 			error = -EIO;
-			goto failed;
+			goto out;
 		}
 
-		if (filepage) {
-			shmem_swp_set(info, entry, 0);
-			shmem_swp_unmap(entry);
-			delete_from_swap_cache(swappage);
-			spin_unlock(&info->lock);
-			copy_highpage(filepage, swappage);
-			unlock_page(swappage);
-			page_cache_release(swappage);
-			flush_dcache_page(filepage);
-			SetPageUptodate(filepage);
-			set_page_dirty(filepage);
-			swap_free(swap);
-		} else if (!(error = add_to_page_cache_locked(swappage, mapping,
-					idx, GFP_NOWAIT))) {
-			info->flags |= SHMEM_PAGEIN;
-			shmem_swp_set(info, entry, 0);
-			shmem_swp_unmap(entry);
-			delete_from_swap_cache(swappage);
-			spin_unlock(&info->lock);
-			filepage = swappage;
-			set_page_dirty(filepage);
-			swap_free(swap);
-		} else {
+		error = add_to_page_cache_locked(page, mapping,
+						 idx, GFP_NOWAIT);
+		if (error) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			if (error == -ENOMEM) {
@@ -1378,28 +1364,33 @@ repeat:
 				 * call memcg's OOM if needed.
 				 */
 				error = mem_cgroup_shmem_charge_fallback(
-								swappage,
-								current->mm,
-								gfp);
+						page, current->mm, gfp);
 				if (error) {
-					unlock_page(swappage);
-					page_cache_release(swappage);
-					goto failed;
+					unlock_page(page);
+					page_cache_release(page);
+					goto out;
 				}
 			}
-			unlock_page(swappage);
-			page_cache_release(swappage);
+			unlock_page(page);
+			page_cache_release(page);
 			goto repeat;
 		}
-	} else if (sgp == SGP_READ && !filepage) {
+
+		info->flags |= SHMEM_PAGEIN;
+		shmem_swp_set(info, entry, 0);
+		shmem_swp_unmap(entry);
+		delete_from_swap_cache(page);
+		spin_unlock(&info->lock);
+		set_page_dirty(page);
+		swap_free(swap);
+
+	} else if (sgp == SGP_READ) {
 		shmem_swp_unmap(entry);
-		filepage = find_get_page(mapping, idx);
-		if (filepage &&
-		    (!PageUptodate(filepage) || !trylock_page(filepage))) {
+		page = find_get_page(mapping, idx);
+		if (page && !trylock_page(page)) {
 			spin_unlock(&info->lock);
-			wait_on_page_locked(filepage);
-			page_cache_release(filepage);
-			filepage = NULL;
+			wait_on_page_locked(page);
+			page_cache_release(page);
 			goto repeat;
 		}
 		spin_unlock(&info->lock);
@@ -1417,56 +1408,52 @@ repeat:
 		} else if (shmem_acct_block(info->flags))
 			goto nospace;
 
-		if (!filepage) {
-			int ret;
-
-			filepage = prealloc_page;
-			prealloc_page = NULL;
+		page = prealloc_page;
+		prealloc_page = NULL;
 
-			entry = shmem_swp_alloc(info, idx, sgp, gfp);
-			if (IS_ERR(entry))
-				error = PTR_ERR(entry);
-			else {
-				swap = *entry;
-				shmem_swp_unmap(entry);
-			}
-			ret = error || swap.val;
-			if (ret)
-				mem_cgroup_uncharge_cache_page(filepage);
-			else
-				ret = add_to_page_cache_lru(filepage, mapping,
+		entry = shmem_swp_alloc(info, idx, sgp, gfp);
+		if (IS_ERR(entry))
+			error = PTR_ERR(entry);
+		else {
+			swap = *entry;
+			shmem_swp_unmap(entry);
+		}
+		ret = error || swap.val;
+		if (ret)
+			mem_cgroup_uncharge_cache_page(page);
+		else
+			ret = add_to_page_cache_lru(page, mapping,
 						idx, GFP_NOWAIT);
-			/*
-			 * At add_to_page_cache_lru() failure, uncharge will
-			 * be done automatically.
-			 */
-			if (ret) {
-				shmem_unacct_blocks(info->flags, 1);
-				shmem_free_blocks(inode, 1);
-				spin_unlock(&info->lock);
-				page_cache_release(filepage);
-				filepage = NULL;
-				if (error)
-					goto failed;
-				goto repeat;
-			}
-			info->flags |= SHMEM_PAGEIN;
+		/*
+		 * At add_to_page_cache_lru() failure,
+		 * uncharge will be done automatically.
+		 */
+		if (ret) {
+			shmem_unacct_blocks(info->flags, 1);
+			shmem_free_blocks(inode, 1);
+			spin_unlock(&info->lock);
+			page_cache_release(page);
+			if (error)
+				goto out;
+			goto repeat;
 		}
 
+		info->flags |= SHMEM_PAGEIN;
 		info->alloced++;
 		spin_unlock(&info->lock);
-		clear_highpage(filepage);
-		flush_dcache_page(filepage);
-		SetPageUptodate(filepage);
+		clear_highpage(page);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
 		if (sgp == SGP_DIRTY)
-			set_page_dirty(filepage);
+			set_page_dirty(page);
+
 	} else {
 		spin_unlock(&info->lock);
 		error = -ENOMEM;
 		goto out;
 	}
 done:
-	*pagep = filepage;
+	*pagep = page;
 	error = 0;
 out:
 	if (prealloc_page) {
@@ -1482,21 +1469,13 @@ nospace:
 	 * but must also avoid reporting a spurious ENOSPC while working on a
 	 * full tmpfs.
 	 */
-	if (!filepage) {
-		struct page *page = find_get_page(mapping, idx);
-		if (page) {
-			spin_unlock(&info->lock);
-			page_cache_release(page);
-			goto repeat;
-		}
-	}
+	page = find_get_page(mapping, idx);
 	spin_unlock(&info->lock);
-	error = -ENOSPC;
-failed:
-	if (filepage) {
-		unlock_page(filepage);
-		page_cache_release(filepage);
+	if (page) {
+		page_cache_release(page);
+		goto repeat;
 	}
+	error = -ENOSPC;
 	goto out;
 }
 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/7] tmpfs: clone shmem_file_splice_read
  2011-06-09 22:32 ` [PATCH 1/7] tmpfs: clone shmem_file_splice_read Hugh Dickins
@ 2011-06-10  9:19   ` Jens Axboe
  2011-06-10 20:01     ` Hugh Dickins
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2011-06-10  9:19 UTC (permalink / raw)
  To: Hugh Dickins; +Cc: Andrew Morton, linux-kernel, linux-mm

On 2011-06-10 00:32, Hugh Dickins wrote:
> Copy __generic_file_splice_read() and generic_file_splice_read()
> from fs/splice.c to shmem_file_splice_read() in mm/shmem.c.  Make
> page_cache_pipe_buf_ops and spd_release_page() accessible to it.

That's a lot of fairly complicated and convoluted code to have
duplicated. Yes, I know I know, it's already largely duplicated from the
normal file read, but still... Really no easy way to share this?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/7] tmpfs: clone shmem_file_splice_read
  2011-06-10  9:19   ` Jens Axboe
@ 2011-06-10 20:01     ` Hugh Dickins
  0 siblings, 0 replies; 14+ messages in thread
From: Hugh Dickins @ 2011-06-10 20:01 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Andrew Morton, linux-kernel, linux-mm

On Fri, 10 Jun 2011, Jens Axboe wrote:
> On 2011-06-10 00:32, Hugh Dickins wrote:
> > Copy __generic_file_splice_read() and generic_file_splice_read()
> > from fs/splice.c to shmem_file_splice_read() in mm/shmem.c.  Make
> > page_cache_pipe_buf_ops and spd_release_page() accessible to it.
> 
> That's a lot of fairly complicated and convoluted code to have
> duplicated. Yes, I know I know, it's already largely duplicated from the
> normal file read, but still... Really no easy way to share this?

I hadn't thought in that direction at all, to be honest: imagined you
had already factored out what could be factored out, splice_to_pipe()
and the spd helpers.  I just copied the code over in this patch, then
trimmed it down for tmpfs in the next.

(I didn't Cc you on that second patch because it was local to shmem.c:
maybe you looked it up on the list, maybe you didn't - here it is below.
It would be very unfair to claim that it halves the size, since I took
out some comments; compiled size goes down from 1219 to 897 bytes.)

What tmpfs wants to avoid is outsiders inserting pages into its filecache
then calling ->readpage on them (because tmpfs may already have those
pages in swapcache - though I've another reason to avoid it coming soon).
Since the interesting uses of tmpfs go through its ->splice_read nowadays,
I just switched that over not to use ->readpage.

You ask, no easy way to share this?  I guess we could make a little
library-like function of the isize, this_len code at fill_it; but
that doesn't really seem worth much.

I could drop shmem.c's find_get_pages_contig() and combine the other
two loops to make just a single shmem_getpage()ing loop; or even
resort to using default_file_splice_read() with its page allocation
and copying.  But crippling the shmem splice just seems a retrograde
step, which might be noticed by sendfile users: I don't think that's
what you intend.

Ever since birth, shmem had to have its own file_read and file_write:
your splice work with Nick's write_begin changes let it use generic
splice for writing, but I do need to avoid readpage in reading.

If I thought I had my finger on the pulse of what modern filesystems
want, I might propose a readpage replacement for all; but, frankly,
the notion that I have my finger on any pulse at all is laughable ;)

Any suggestions?

Hugh

[PATCH 2/7] tmpfs: refine shmem_file_splice_read

Tidy up shmem_file_splice_read():

Remove readahead: okay, we could implement shmem readahead on swap,
but have never done so before, swap being the slow exceptional path.

Use shmem_getpage() instead of find_or_create_page() plus ->readpage().

Remove several comments: sorry, I found them more distracting than
helpful, and this will not be the reference version of splice_read().

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c |  138 +++++++--------------------------------------------
 1 file changed, 19 insertions(+), 119 deletions(-)

--- linux.orig/mm/shmem.c	2011-06-09 11:38:05.232808448 -0700
+++ linux/mm/shmem.c	2011-06-09 11:38:13.436849182 -0700
@@ -1850,6 +1850,7 @@ static ssize_t shmem_file_splice_read(st
 				unsigned int flags)
 {
 	struct address_space *mapping = in->f_mapping;
+	struct inode *inode = mapping->host;
 	unsigned int loff, nr_pages, req_pages;
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
@@ -1865,7 +1866,7 @@ static ssize_t shmem_file_splice_read(st
 		.spd_release = spd_release_page,
 	};
 
-	isize = i_size_read(in->f_mapping->host);
+	isize = i_size_read(inode);
 	if (unlikely(*ppos >= isize))
 		return 0;
 
@@ -1881,153 +1882,57 @@ static ssize_t shmem_file_splice_read(st
 	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	nr_pages = min(req_pages, pipe->buffers);
 
-	/*
-	 * Lookup the (hopefully) full range of pages we need.
-	 */
 	spd.nr_pages = find_get_pages_contig(mapping, index,
 						nr_pages, spd.pages);
 	index += spd.nr_pages;
-
-	/*
-	 * If find_get_pages_contig() returned fewer pages than we needed,
-	 * readahead/allocate the rest and fill in the holes.
-	 */
-	if (spd.nr_pages < nr_pages)
-		page_cache_sync_readahead(mapping, &in->f_ra, in,
-				index, req_pages - spd.nr_pages);
-
 	error = 0;
-	while (spd.nr_pages < nr_pages) {
-		/*
-		 * Page could be there, find_get_pages_contig() breaks on
-		 * the first hole.
-		 */
-		page = find_get_page(mapping, index);
-		if (!page) {
-			/*
-			 * page didn't exist, allocate one.
-			 */
-			page = page_cache_alloc_cold(mapping);
-			if (!page)
-				break;
-
-			error = add_to_page_cache_lru(page, mapping, index,
-						GFP_KERNEL);
-			if (unlikely(error)) {
-				page_cache_release(page);
-				if (error == -EEXIST)
-					continue;
-				break;
-			}
-			/*
-			 * add_to_page_cache() locks the page, unlock it
-			 * to avoid convoluting the logic below even more.
-			 */
-			unlock_page(page);
-		}
 
+	while (spd.nr_pages < nr_pages) {
+		page = NULL;
+		error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
+		if (error)
+			break;
+		unlock_page(page);
 		spd.pages[spd.nr_pages++] = page;
 		index++;
 	}
 
-	/*
-	 * Now loop over the map and see if we need to start IO on any
-	 * pages, fill in the partial map, etc.
-	 */
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	nr_pages = spd.nr_pages;
 	spd.nr_pages = 0;
+
 	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
 		unsigned int this_len;
 
 		if (!len)
 			break;
 
-		/*
-		 * this_len is the max we'll use from this page
-		 */
 		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
 		page = spd.pages[page_nr];
 
-		if (PageReadahead(page))
-			page_cache_async_readahead(mapping, &in->f_ra, in,
-					page, index, req_pages - page_nr);
-
-		/*
-		 * If the page isn't uptodate, we may need to start io on it
-		 */
-		if (!PageUptodate(page)) {
-			lock_page(page);
-
-			/*
-			 * Page was truncated, or invalidated by the
-			 * filesystem.  Redo the find/create, but this time the
-			 * page is kept locked, so there's no chance of another
-			 * race with truncate/invalidate.
-			 */
-			if (!page->mapping) {
-				unlock_page(page);
-				page = find_or_create_page(mapping, index,
-						mapping_gfp_mask(mapping));
-
-				if (!page) {
-					error = -ENOMEM;
-					break;
-				}
-				page_cache_release(spd.pages[page_nr]);
-				spd.pages[page_nr] = page;
-			}
-			/*
-			 * page was already under io and is now done, great
-			 */
-			if (PageUptodate(page)) {
-				unlock_page(page);
-				goto fill_it;
-			}
-
-			/*
-			 * need to read in the page
-			 */
-			error = mapping->a_ops->readpage(in, page);
-			if (unlikely(error)) {
-				/*
-				 * We really should re-lookup the page here,
-				 * but it complicates things a lot. Instead
-				 * lets just do what we already stored, and
-				 * we'll get it the next time we are called.
-				 */
-				if (error == AOP_TRUNCATED_PAGE)
-					error = 0;
-
+		if (!PageUptodate(page) || page->mapping != mapping) {
+			page = NULL;
+			error = shmem_getpage(inode, index, &page,
+							SGP_CACHE, NULL);
+			if (error)
 				break;
-			}
+			unlock_page(page);
+			page_cache_release(spd.pages[page_nr]);
+			spd.pages[page_nr] = page;
 		}
-fill_it:
-		/*
-		 * i_size must be checked after PageUptodate.
-		 */
-		isize = i_size_read(mapping->host);
+
+		isize = i_size_read(inode);
 		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 		if (unlikely(!isize || index > end_index))
 			break;
 
-		/*
-		 * if this is the last page, see if we need to shrink
-		 * the length and stop
-		 */
 		if (end_index == index) {
 			unsigned int plen;
 
-			/*
-			 * max good bytes in this page
-			 */
 			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
 			if (plen <= loff)
 				break;
 
-			/*
-			 * force quit after adding this page
-			 */
 			this_len = min(this_len, plen - loff);
 			len = this_len;
 		}
@@ -2040,13 +1945,8 @@ fill_it:
 		index++;
 	}
 
-	/*
-	 * Release any pages at the end, if we quit early. 'page_nr' is how far
-	 * we got, 'nr_pages' is how many pages are in the map.
-	 */
 	while (page_nr < nr_pages)
 		page_cache_release(spd.pages[page_nr++]);
-	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-06-10 20:01 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-09 22:30 [PATCH 0/7] tmpfs: simplify by splice instead of readpage Hugh Dickins
2011-06-09 22:32 ` [PATCH 1/7] tmpfs: clone shmem_file_splice_read Hugh Dickins
2011-06-10  9:19   ` Jens Axboe
2011-06-10 20:01     ` Hugh Dickins
2011-06-09 22:33 ` [PATCH 2/7] tmpfs: refine shmem_file_splice_read Hugh Dickins
2011-06-09 22:34 ` [PATCH 3/7] tmpfs: pass gfp to shmem_getpage_gfp Hugh Dickins
2011-06-09 22:35 ` [PATCH 4/7] tmpfs: remove_shmem_readpage Hugh Dickins
2011-06-09 22:39 ` [PATCH 5/7] tmpfs: simplify prealloc_page Hugh Dickins
2011-06-10  2:02   ` Shaohua Li
2011-06-10  6:40     ` Hugh Dickins
2011-06-10  6:44     ` [PATCH v2 " Hugh Dickins
2011-06-09 22:40 ` [PATCH 6/7] tmpfs: simplify filepage/swappage Hugh Dickins
2011-06-10  6:46   ` [PATCH v2 " Hugh Dickins
2011-06-09 22:42 ` [PATCH 7/7] tmpfs: simplify unuse and writepage Hugh Dickins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).