linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] ondemand readahead simplifications
       [not found] <20070613134902.132573123@mail.ustc.edu.cn>
@ 2007-06-13 13:49 ` Fengguang Wu
       [not found] ` <20070613135128.152095732@mail.ustc.edu.cn>
       [not found] ` <20070613135128.282288719@mail.ustc.edu.cn>
  2 siblings, 0 replies; 4+ messages in thread
From: Fengguang Wu @ 2007-06-13 13:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew,

These two patches makes the interface and data structure of ondemand readahead
more clear:

diffstat:
 fs/ext3/dir.c      |    4 -
 fs/ext4/dir.c      |    4 -
 fs/splice.c        |    6 -
 include/linux/fs.h |   61 +--------------
 include/linux/mm.h |   20 +++--
 mm/filemap.c       |   10 +-
 mm/readahead.c     |  165 +++++++++++++++++++++----------------------
 7 files changed, 116 insertions(+), 154 deletions(-)

Regards,
Fengguang

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] readahead: split ondemand readahead interface into two functions
       [not found] ` <20070613135128.152095732@mail.ustc.edu.cn>
@ 2007-06-13 13:49   ` Fengguang Wu
  0 siblings, 0 replies; 4+ messages in thread
From: Fengguang Wu @ 2007-06-13 13:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Rusty Russell

[-- Attachment #1: readahead-interface-rusty.patch --]
[-- Type: text/plain, Size: 9276 bytes --]

Split ondemand readahead interface into two functions.  I think this
makes it a little clearer for non-readahead experts (like Rusty).

Internally they both call ondemand_readahead(), but the page argument
is changed to an obvious boolean flag.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
---
 fs/ext3/dir.c      |    4 -
 fs/ext4/dir.c      |    4 -
 fs/splice.c        |    6 +-
 include/linux/mm.h |   20 ++++++---
 mm/filemap.c       |   10 ++--
 mm/readahead.c     |   95 ++++++++++++++++++++++++++-----------------
 6 files changed, 84 insertions(+), 55 deletions(-)

--- linux-2.6.22-rc4-mm2.orig/fs/ext3/dir.c
+++ linux-2.6.22-rc4-mm2/fs/ext3/dir.c
@@ -139,10 +139,10 @@ static int ext3_readdir(struct file * fi
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
 			if (!ra_has_index(&filp->f_ra, index))
-				page_cache_readahead_ondemand(
+				page_cache_sync_readahead(
 					sb->s_bdev->bd_inode->i_mapping,
 					&filp->f_ra, filp,
-					NULL, index, 1);
+					index, 1);
 			filp->f_ra.prev_index = index;
 			bh = ext3_bread(NULL, inode, blk, 0, &err);
 		}
--- linux-2.6.22-rc4-mm2.orig/fs/ext4/dir.c
+++ linux-2.6.22-rc4-mm2/fs/ext4/dir.c
@@ -138,10 +138,10 @@ static int ext4_readdir(struct file * fi
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
 			if (!ra_has_index(&filp->f_ra, index))
-				page_cache_readahead_ondemand(
+				page_cache_sync_readahead(
 					sb->s_bdev->bd_inode->i_mapping,
 					&filp->f_ra, filp,
-					NULL, index, 1);
+					index, 1);
 			filp->f_ra.prev_index = index;
 			bh = ext4_bread(NULL, inode, blk, 0, &err);
 		}
--- linux-2.6.22-rc4-mm2.orig/fs/splice.c
+++ linux-2.6.22-rc4-mm2/fs/splice.c
@@ -304,8 +304,8 @@ __generic_file_splice_read(struct file *
 	 * readahead/allocate the rest.
 	 */
 	if (spd.nr_pages < nr_pages)
-		page_cache_readahead_ondemand(mapping, &in->f_ra, in,
-				NULL, index, req_pages - spd.nr_pages);
+		page_cache_sync_readahead(mapping, &in->f_ra, in,
+				index, req_pages - spd.nr_pages);
 
 	while (spd.nr_pages < nr_pages) {
 		/*
@@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *
 		page = pages[page_nr];
 
 		if (PageReadahead(page))
-			page_cache_readahead_ondemand(mapping, &in->f_ra, in,
+			page_cache_async_readahead(mapping, &in->f_ra, in,
 					page, index, req_pages - page_nr);
 
 		/*
--- linux-2.6.22-rc4-mm2.orig/include/linux/mm.h
+++ linux-2.6.22-rc4-mm2/include/linux/mm.h
@@ -1146,12 +1146,20 @@ int do_page_cache_readahead(struct addre
 			pgoff_t offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
-unsigned long page_cache_readahead_ondemand(struct address_space *mapping,
-			  struct file_ra_state *ra,
-			  struct file *filp,
-			  struct page *page,
-			  pgoff_t offset,
-			  unsigned long size);
+
+void page_cache_sync_readahead(struct address_space *mapping,
+			       struct file_ra_state *ra,
+			       struct file *filp,
+			       pgoff_t offset,
+			       unsigned long size);
+
+void page_cache_async_readahead(struct address_space *mapping,
+				struct file_ra_state *ra,
+				struct file *filp,
+				struct page *pg,
+				pgoff_t offset,
+				unsigned long size);
+
 unsigned long max_sane_readahead(unsigned long nr);
 
 /* Do stack extension */
--- linux-2.6.22-rc4-mm2.orig/mm/filemap.c
+++ linux-2.6.22-rc4-mm2/mm/filemap.c
@@ -913,15 +913,15 @@ void do_generic_mapping_read(struct addr
 find_page:
 		page = find_get_page(mapping, index);
 		if (!page) {
-			page_cache_readahead_ondemand(mapping,
-					&ra, filp, page,
+			page_cache_sync_readahead(mapping,
+					&ra, filp,
 					index, last_index - index);
 			page = find_get_page(mapping, index);
 			if (unlikely(page == NULL))
 				goto no_cached_page;
 		}
 		if (PageReadahead(page)) {
-			page_cache_readahead_ondemand(mapping,
+			page_cache_async_readahead(mapping,
 					&ra, filp, page,
 					index, last_index - index);
 		}
@@ -1382,14 +1382,14 @@ retry_find:
 	 */
 	if (VM_SequentialReadHint(vma)) {
 		if (!page) {
-			page_cache_readahead_ondemand(mapping, ra, file, page,
+			page_cache_sync_readahead(mapping, ra, file,
 							   fdata->pgoff, 1);
 			page = find_lock_page(mapping, fdata->pgoff);
 			if (!page)
 				goto no_cached_page;
 		}
 		if (PageReadahead(page)) {
-			page_cache_readahead_ondemand(mapping, ra, file, page,
+			page_cache_async_readahead(mapping, ra, file, page,
 							   fdata->pgoff, 1);
 		}
 	}
--- linux-2.6.22-rc4-mm2.orig/mm/readahead.c
+++ linux-2.6.22-rc4-mm2/mm/readahead.c
@@ -351,7 +351,7 @@ static unsigned long get_next_ra_size(st
 static unsigned long
 ondemand_readahead(struct address_space *mapping,
 		   struct file_ra_state *ra, struct file *filp,
-		   struct page *page, pgoff_t offset,
+		   bool hit_readahead_marker, pgoff_t offset,
 		   unsigned long req_size)
 {
 	unsigned long max;	/* max readahead pages */
@@ -379,7 +379,7 @@ ondemand_readahead(struct address_space 
 	 * Standalone, small read.
 	 * Read as is, and do not pollute the readahead state.
 	 */
-	if (!page && !sequential) {
+	if (!hit_readahead_marker && !sequential) {
 		return __do_page_cache_readahead(mapping, filp,
 						offset, req_size, 0);
 	}
@@ -400,7 +400,7 @@ ondemand_readahead(struct address_space 
 	 * E.g. interleaved reads.
 	 * Not knowing its readahead pos/size, bet on the minimal possible one.
 	 */
-	if (page) {
+	if (hit_readahead_marker) {
 		ra_index++;
 		ra_size = min(4 * ra_size, max);
 	}
@@ -413,50 +413,71 @@ fill_ra:
 }
 
 /**
- * page_cache_readahead_ondemand - generic file readahead
+ * page_cache_sync_readahead - generic file readahead
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @filp: passed on to ->readpage() and ->readpages()
- * @page: the page at @offset, or NULL if non-present
- * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units
+ * @offset: start offset into @mapping, in pagecache page-sized units
  * @req_size: hint: total size of the read which the caller is performing in
- *            PAGE_CACHE_SIZE units
+ *            pagecache pages
  *
- * page_cache_readahead_ondemand() is the entry point of readahead logic.
- * This function should be called when it is time to perform readahead:
- * 1) @page == NULL
- *    A cache miss happened, time for synchronous readahead.
- * 2) @page != NULL && PageReadahead(@page)
- *    A look-ahead hit occured, time for asynchronous readahead.
+ * page_cache_sync_readahead() should be called when a cache miss happened:
+ * it will submit the read.  The readahead logic may decide to piggyback more
+ * pages onto the read request if access patterns suggest it will improve
+ * performance.
  */
-unsigned long
-page_cache_readahead_ondemand(struct address_space *mapping,
-				struct file_ra_state *ra, struct file *filp,
-				struct page *page, pgoff_t offset,
-				unsigned long req_size)
+void page_cache_sync_readahead(struct address_space *mapping,
+			       struct file_ra_state *ra, struct file *filp,
+			       pgoff_t offset, unsigned long req_size)
 {
 	/* no read-ahead */
 	if (!ra->ra_pages)
-		return 0;
+		return;
 
-	if (page) {
-		/*
-		 * It can be PG_reclaim.
-		 */
-		if (PageWriteback(page))
-			return 0;
-
-		ClearPageReadahead(page);
-
-		/*
-		 * Defer asynchronous read-ahead on IO congestion.
-		 */
-		if (bdi_read_congested(mapping->backing_dev_info))
-			return 0;
-	}
+	/* do read-ahead */
+	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
+}
+EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
+
+/**
+ * page_cache_async_readahead - file readahead for marked pages
+ * @mapping: address_space which holds the pagecache and I/O vectors
+ * @ra: file_ra_state which holds the readahead state
+ * @filp: passed on to ->readpage() and ->readpages()
+ * @page: the page at @offset which has the PG_readahead flag set
+ * @offset: start offset into @mapping, in pagecache page-sized units
+ * @req_size: hint: total size of the read which the caller is performing in
+ *            pagecache pages
+ *
+ * page_cache_async_ondemand() should be called when a page is used which
+ * has the PG_readahead flag: this is a marker to suggest that the application
+ * has used up enough of the readahead window that we should start pulling in
+ * more pages. */
+void
+page_cache_async_readahead(struct address_space *mapping,
+			   struct file_ra_state *ra, struct file *filp,
+			   struct page *page, pgoff_t offset,
+			   unsigned long req_size)
+{
+	/* no read-ahead */
+	if (!ra->ra_pages)
+		return;
+
+	/*
+	 * Same bit is used for PG_readahead and PG_reclaim.
+	 */
+	if (PageWriteback(page))
+		return;
+
+	ClearPageReadahead(page);
+
+	/*
+	 * Defer asynchronous read-ahead on IO congestion.
+	 */
+	if (bdi_read_congested(mapping->backing_dev_info))
+		return;
 
 	/* do read-ahead */
-	return ondemand_readahead(mapping, ra, filp, page,
-					offset, req_size);
+	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
 }
-EXPORT_SYMBOL_GPL(page_cache_readahead_ondemand);
+EXPORT_SYMBOL_GPL(page_cache_async_readahead);

--

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/2] readahead: sanify file_ra_state names
       [not found] ` <20070613135128.282288719@mail.ustc.edu.cn>
@ 2007-06-13 13:49   ` Fengguang Wu
  0 siblings, 0 replies; 4+ messages in thread
From: Fengguang Wu @ 2007-06-13 13:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

[-- Attachment #1: readahead-rename.patch --]
[-- Type: text/plain, Size: 8151 bytes --]

Rename some file_ra_state variables and remove some accessors.

It results in much simpler code.
Kudos to Rusty!

Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
---
 include/linux/fs.h |   61 +++-----------------------------------
 mm/readahead.c     |   68 +++++++++++++++----------------------------
 2 files changed, 31 insertions(+), 98 deletions(-)

--- linux-2.6.22-rc4-mm2.orig/include/linux/fs.h
+++ linux-2.6.22-rc4-mm2/include/linux/fs.h
@@ -768,16 +768,12 @@ struct fown_struct {
 
 /*
  * Track a single file's readahead state
- *
- *  ================#============|==================#==================|
- *                  ^            ^                  ^                  ^
- *  file_ra_state.la_index    .ra_index   .lookahead_index   .readahead_index
  */
 struct file_ra_state {
-	pgoff_t la_index;               /* enqueue time */
-	pgoff_t ra_index;               /* begin offset */
-	pgoff_t lookahead_index;        /* time to do next readahead */
-	pgoff_t readahead_index;        /* end offset */
+	pgoff_t start;                  /* where readahead started */
+	unsigned long size;             /* # of readahead pages */
+	unsigned long async_size;       /* do asynchronous readahead when
+					   there are only # of pages ahead */
 
 	unsigned long ra_pages;		/* Maximum readahead window */
 	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
@@ -787,59 +783,14 @@ struct file_ra_state {
 };
 
 /*
- * Measuring read-ahead sizes.
- *
- *                  |----------- readahead size ------------>|
- *  ===#============|==================#=====================|
- *     |------- invoke interval ------>|-- lookahead size -->|
- */
-static inline unsigned long ra_readahead_size(struct file_ra_state *ra)
-{
-	return ra->readahead_index - ra->ra_index;
-}
-
-static inline unsigned long ra_lookahead_size(struct file_ra_state *ra)
-{
-	return ra->readahead_index - ra->lookahead_index;
-}
-
-static inline unsigned long ra_invoke_interval(struct file_ra_state *ra)
-{
-	return ra->lookahead_index - ra->la_index;
-}
-
-/*
  * Check if @index falls in the readahead windows.
  */
 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 {
-	return (index >= ra->la_index &&
-		index <  ra->readahead_index);
-}
-
-/*
- * Where is the old read-ahead and look-ahead?
- */
-static inline void ra_set_index(struct file_ra_state *ra,
-				pgoff_t la_index, pgoff_t ra_index)
-{
-	ra->la_index = la_index;
-	ra->ra_index = ra_index;
+	return (index >= ra->start &&
+		index <  ra->start + ra->size);
 }
 
-/*
- * Where is the new read-ahead and look-ahead?
- */
-static inline void ra_set_size(struct file_ra_state *ra,
-				unsigned long ra_size, unsigned long la_size)
-{
-	ra->readahead_index = ra->ra_index + ra_size;
-	ra->lookahead_index = ra->ra_index + ra_size - la_size;
-}
-
-unsigned long ra_submit(struct file_ra_state *ra,
-		       struct address_space *mapping, struct file *filp);
-
 struct file {
 	/*
 	 * fu_list becomes invalid after file_free is called and queued via
--- linux-2.6.22-rc4-mm2.orig/mm/readahead.c
+++ linux-2.6.22-rc4-mm2/mm/readahead.c
@@ -245,21 +245,16 @@ unsigned long max_sane_readahead(unsigne
 /*
  * Submit IO for the read-ahead request in file_ra_state.
  */
-unsigned long ra_submit(struct file_ra_state *ra,
+static unsigned long ra_submit(struct file_ra_state *ra,
 		       struct address_space *mapping, struct file *filp)
 {
-	unsigned long ra_size;
-	unsigned long la_size;
 	int actual;
 
-	ra_size = ra_readahead_size(ra);
-	la_size = ra_lookahead_size(ra);
 	actual = __do_page_cache_readahead(mapping, filp,
-					ra->ra_index, ra_size, la_size);
+					ra->start, ra->size, ra->async_size);
 
 	return actual;
 }
-EXPORT_SYMBOL_GPL(ra_submit);
 
 /*
  * Set the initial window size, round to next power of 2 and square
@@ -288,7 +283,7 @@ static unsigned long get_init_ra_size(un
 static unsigned long get_next_ra_size(struct file_ra_state *ra,
 						unsigned long max)
 {
-	unsigned long cur = ra->readahead_index - ra->ra_index;
+	unsigned long cur = ra->size;
 	unsigned long newsize;
 
 	if (cur < max / 16)
@@ -305,28 +300,21 @@ static unsigned long get_next_ra_size(st
  * The fields in struct file_ra_state represent the most-recently-executed
  * readahead attempt:
  *
- *                    |-------- last readahead window -------->|
- *       |-- application walking here -->|
- * ======#============|==================#=====================|
- *       ^la_index    ^ra_index          ^lookahead_index      ^readahead_index
- *
- * [ra_index, readahead_index) represents the last readahead window.
- *
- * [la_index, lookahead_index] is where the application would be walking(in
- * the common case of cache-cold sequential reads): the last window was
- * established when the application was at la_index, and the next window will
- * be bring in when the application reaches lookahead_index.
+ *                        |<----- async_size ---------|
+ *     |------------------- size -------------------->|
+ *     |==================#===========================|
+ *     ^start             ^page marked with PG_readahead
  *
  * To overlap application thinking time and disk I/O time, we do
  * `readahead pipelining': Do not wait until the application consumed all
  * readahead pages and stalled on the missing page at readahead_index;
- * Instead, submit an asynchronous readahead I/O as early as the application
- * reads on the page at lookahead_index. Normally lookahead_index will be
- * equal to ra_index, for maximum pipelining.
+ * Instead, submit an asynchronous readahead I/O as soon as there are
+ * only async_size pages left in the readahead window. Normally async_size
+ * will be equal to size, for maximum pipelining.
  *
  * In interleaved sequential reads, concurrent streams on the same fd can
  * be invalidating each other's readahead state. So we flag the new readahead
- * page at lookahead_index with PG_readahead, and use it as readahead
+ * page at (start+size-async_size) with PG_readahead, and use it as readahead
  * indicator. The flag won't be set on already cached pages, to avoid the
  * readahead-for-nothing fuss, saving pointless page cache lookups.
  *
@@ -355,24 +343,21 @@ ondemand_readahead(struct address_space 
 		   unsigned long req_size)
 {
 	unsigned long max;	/* max readahead pages */
-	pgoff_t ra_index;	/* readahead index */
-	unsigned long ra_size;	/* readahead size */
-	unsigned long la_size;	/* lookahead size */
 	int sequential;
 
 	max = ra->ra_pages;
 	sequential = (offset - ra->prev_index <= 1UL) || (req_size > max);
 
 	/*
-	 * Lookahead/readahead hit, assume sequential access.
+	 * It's the expected callback offset, assume sequential access.
 	 * Ramp up sizes, and push forward the readahead window.
 	 */
-	if (offset && (offset == ra->lookahead_index ||
-			offset == ra->readahead_index)) {
-		ra_index = ra->readahead_index;
-		ra_size = get_next_ra_size(ra, max);
-		la_size = ra_size;
-		goto fill_ra;
+	if (offset && (offset == (ra->start + ra->size - ra->async_size) ||
+			offset == (ra->start + ra->size))) {
+		ra->start += ra->size;
+		ra->size = get_next_ra_size(ra, max);
+		ra->async_size = ra->size;
+		goto readit;
 	}
 
 	/*
@@ -391,24 +376,21 @@ ondemand_readahead(struct address_space 
 	 * 	- oversize random read
 	 * Start readahead for it.
 	 */
-	ra_index = offset;
-	ra_size = get_init_ra_size(req_size, max);
-	la_size = ra_size > req_size ? ra_size - req_size : ra_size;
+	ra->start = offset;
+	ra->size = get_init_ra_size(req_size, max);
+	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
 
 	/*
-	 * Hit on a lookahead page without valid readahead state.
+	 * Hit on a marked page without valid readahead state.
 	 * E.g. interleaved reads.
 	 * Not knowing its readahead pos/size, bet on the minimal possible one.
 	 */
 	if (hit_readahead_marker) {
-		ra_index++;
-		ra_size = min(4 * ra_size, max);
+		ra->start++;
+		ra->size = min(4 * ra->size, max);
 	}
 
-fill_ra:
-	ra_set_index(ra, offset, ra_index);
-	ra_set_size(ra, ra_size, la_size);
-
+readit:
 	return ra_submit(ra, mapping, filp);
 }
 

--

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] readahead: split ondemand readahead interface into two functions
       [not found] ` <20070614062815.949361167@mail.ustc.edu.cn>
@ 2007-06-14  6:21   ` Fengguang Wu
  0 siblings, 0 replies; 4+ messages in thread
From: Fengguang Wu @ 2007-06-14  6:21 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rusty Russell, linux-kernel

[-- Attachment #1: readahead-interface-rusty.patch --]
[-- Type: text/plain, Size: 9276 bytes --]

Split ondemand readahead interface into two functions.  I think this
makes it a little clearer for non-readahead experts (like Rusty).

Internally they both call ondemand_readahead(), but the page argument
is changed to an obvious boolean flag.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
---
 fs/ext3/dir.c      |    4 -
 fs/ext4/dir.c      |    4 -
 fs/splice.c        |    6 +-
 include/linux/mm.h |   20 ++++++---
 mm/filemap.c       |   10 ++--
 mm/readahead.c     |   95 ++++++++++++++++++++++++++-----------------
 6 files changed, 84 insertions(+), 55 deletions(-)

--- linux-2.6.22-rc4-mm2.orig/fs/ext3/dir.c
+++ linux-2.6.22-rc4-mm2/fs/ext3/dir.c
@@ -139,10 +139,10 @@ static int ext3_readdir(struct file * fi
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
 			if (!ra_has_index(&filp->f_ra, index))
-				page_cache_readahead_ondemand(
+				page_cache_sync_readahead(
 					sb->s_bdev->bd_inode->i_mapping,
 					&filp->f_ra, filp,
-					NULL, index, 1);
+					index, 1);
 			filp->f_ra.prev_index = index;
 			bh = ext3_bread(NULL, inode, blk, 0, &err);
 		}
--- linux-2.6.22-rc4-mm2.orig/fs/ext4/dir.c
+++ linux-2.6.22-rc4-mm2/fs/ext4/dir.c
@@ -138,10 +138,10 @@ static int ext4_readdir(struct file * fi
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
 			if (!ra_has_index(&filp->f_ra, index))
-				page_cache_readahead_ondemand(
+				page_cache_sync_readahead(
 					sb->s_bdev->bd_inode->i_mapping,
 					&filp->f_ra, filp,
-					NULL, index, 1);
+					index, 1);
 			filp->f_ra.prev_index = index;
 			bh = ext4_bread(NULL, inode, blk, 0, &err);
 		}
--- linux-2.6.22-rc4-mm2.orig/fs/splice.c
+++ linux-2.6.22-rc4-mm2/fs/splice.c
@@ -304,8 +304,8 @@ __generic_file_splice_read(struct file *
 	 * readahead/allocate the rest.
 	 */
 	if (spd.nr_pages < nr_pages)
-		page_cache_readahead_ondemand(mapping, &in->f_ra, in,
-				NULL, index, req_pages - spd.nr_pages);
+		page_cache_sync_readahead(mapping, &in->f_ra, in,
+				index, req_pages - spd.nr_pages);
 
 	while (spd.nr_pages < nr_pages) {
 		/*
@@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *
 		page = pages[page_nr];
 
 		if (PageReadahead(page))
-			page_cache_readahead_ondemand(mapping, &in->f_ra, in,
+			page_cache_async_readahead(mapping, &in->f_ra, in,
 					page, index, req_pages - page_nr);
 
 		/*
--- linux-2.6.22-rc4-mm2.orig/include/linux/mm.h
+++ linux-2.6.22-rc4-mm2/include/linux/mm.h
@@ -1146,12 +1146,20 @@ int do_page_cache_readahead(struct addre
 			pgoff_t offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
-unsigned long page_cache_readahead_ondemand(struct address_space *mapping,
-			  struct file_ra_state *ra,
-			  struct file *filp,
-			  struct page *page,
-			  pgoff_t offset,
-			  unsigned long size);
+
+void page_cache_sync_readahead(struct address_space *mapping,
+			       struct file_ra_state *ra,
+			       struct file *filp,
+			       pgoff_t offset,
+			       unsigned long size);
+
+void page_cache_async_readahead(struct address_space *mapping,
+				struct file_ra_state *ra,
+				struct file *filp,
+				struct page *pg,
+				pgoff_t offset,
+				unsigned long size);
+
 unsigned long max_sane_readahead(unsigned long nr);
 
 /* Do stack extension */
--- linux-2.6.22-rc4-mm2.orig/mm/filemap.c
+++ linux-2.6.22-rc4-mm2/mm/filemap.c
@@ -913,15 +913,15 @@ void do_generic_mapping_read(struct addr
 find_page:
 		page = find_get_page(mapping, index);
 		if (!page) {
-			page_cache_readahead_ondemand(mapping,
-					&ra, filp, page,
+			page_cache_sync_readahead(mapping,
+					&ra, filp,
 					index, last_index - index);
 			page = find_get_page(mapping, index);
 			if (unlikely(page == NULL))
 				goto no_cached_page;
 		}
 		if (PageReadahead(page)) {
-			page_cache_readahead_ondemand(mapping,
+			page_cache_async_readahead(mapping,
 					&ra, filp, page,
 					index, last_index - index);
 		}
@@ -1382,14 +1382,14 @@ retry_find:
 	 */
 	if (VM_SequentialReadHint(vma)) {
 		if (!page) {
-			page_cache_readahead_ondemand(mapping, ra, file, page,
+			page_cache_sync_readahead(mapping, ra, file,
 							   fdata->pgoff, 1);
 			page = find_lock_page(mapping, fdata->pgoff);
 			if (!page)
 				goto no_cached_page;
 		}
 		if (PageReadahead(page)) {
-			page_cache_readahead_ondemand(mapping, ra, file, page,
+			page_cache_async_readahead(mapping, ra, file, page,
 							   fdata->pgoff, 1);
 		}
 	}
--- linux-2.6.22-rc4-mm2.orig/mm/readahead.c
+++ linux-2.6.22-rc4-mm2/mm/readahead.c
@@ -351,7 +351,7 @@ static unsigned long get_next_ra_size(st
 static unsigned long
 ondemand_readahead(struct address_space *mapping,
 		   struct file_ra_state *ra, struct file *filp,
-		   struct page *page, pgoff_t offset,
+		   bool hit_readahead_marker, pgoff_t offset,
 		   unsigned long req_size)
 {
 	unsigned long max;	/* max readahead pages */
@@ -379,7 +379,7 @@ ondemand_readahead(struct address_space 
 	 * Standalone, small read.
 	 * Read as is, and do not pollute the readahead state.
 	 */
-	if (!page && !sequential) {
+	if (!hit_readahead_marker && !sequential) {
 		return __do_page_cache_readahead(mapping, filp,
 						offset, req_size, 0);
 	}
@@ -400,7 +400,7 @@ ondemand_readahead(struct address_space 
 	 * E.g. interleaved reads.
 	 * Not knowing its readahead pos/size, bet on the minimal possible one.
 	 */
-	if (page) {
+	if (hit_readahead_marker) {
 		ra_index++;
 		ra_size = min(4 * ra_size, max);
 	}
@@ -413,50 +413,71 @@ fill_ra:
 }
 
 /**
- * page_cache_readahead_ondemand - generic file readahead
+ * page_cache_sync_readahead - generic file readahead
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @filp: passed on to ->readpage() and ->readpages()
- * @page: the page at @offset, or NULL if non-present
- * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units
+ * @offset: start offset into @mapping, in pagecache page-sized units
  * @req_size: hint: total size of the read which the caller is performing in
- *            PAGE_CACHE_SIZE units
+ *            pagecache pages
  *
- * page_cache_readahead_ondemand() is the entry point of readahead logic.
- * This function should be called when it is time to perform readahead:
- * 1) @page == NULL
- *    A cache miss happened, time for synchronous readahead.
- * 2) @page != NULL && PageReadahead(@page)
- *    A look-ahead hit occured, time for asynchronous readahead.
+ * page_cache_sync_readahead() should be called when a cache miss happened:
+ * it will submit the read.  The readahead logic may decide to piggyback more
+ * pages onto the read request if access patterns suggest it will improve
+ * performance.
  */
-unsigned long
-page_cache_readahead_ondemand(struct address_space *mapping,
-				struct file_ra_state *ra, struct file *filp,
-				struct page *page, pgoff_t offset,
-				unsigned long req_size)
+void page_cache_sync_readahead(struct address_space *mapping,
+			       struct file_ra_state *ra, struct file *filp,
+			       pgoff_t offset, unsigned long req_size)
 {
 	/* no read-ahead */
 	if (!ra->ra_pages)
-		return 0;
+		return;
 
-	if (page) {
-		/*
-		 * It can be PG_reclaim.
-		 */
-		if (PageWriteback(page))
-			return 0;
-
-		ClearPageReadahead(page);
-
-		/*
-		 * Defer asynchronous read-ahead on IO congestion.
-		 */
-		if (bdi_read_congested(mapping->backing_dev_info))
-			return 0;
-	}
+	/* do read-ahead */
+	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
+}
+EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
+
+/**
+ * page_cache_async_readahead - file readahead for marked pages
+ * @mapping: address_space which holds the pagecache and I/O vectors
+ * @ra: file_ra_state which holds the readahead state
+ * @filp: passed on to ->readpage() and ->readpages()
+ * @page: the page at @offset which has the PG_readahead flag set
+ * @offset: start offset into @mapping, in pagecache page-sized units
+ * @req_size: hint: total size of the read which the caller is performing in
+ *            pagecache pages
+ *
+ * page_cache_async_ondemand() should be called when a page is used which
+ * has the PG_readahead flag: this is a marker to suggest that the application
+ * has used up enough of the readahead window that we should start pulling in
+ * more pages. */
+void
+page_cache_async_readahead(struct address_space *mapping,
+			   struct file_ra_state *ra, struct file *filp,
+			   struct page *page, pgoff_t offset,
+			   unsigned long req_size)
+{
+	/* no read-ahead */
+	if (!ra->ra_pages)
+		return;
+
+	/*
+	 * Same bit is used for PG_readahead and PG_reclaim.
+	 */
+	if (PageWriteback(page))
+		return;
+
+	ClearPageReadahead(page);
+
+	/*
+	 * Defer asynchronous read-ahead on IO congestion.
+	 */
+	if (bdi_read_congested(mapping->backing_dev_info))
+		return;
 
 	/* do read-ahead */
-	return ondemand_readahead(mapping, ra, filp, page,
-					offset, req_size);
+	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
 }
-EXPORT_SYMBOL_GPL(page_cache_readahead_ondemand);
+EXPORT_SYMBOL_GPL(page_cache_async_readahead);

--

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-06-14  6:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20070613134902.132573123@mail.ustc.edu.cn>
2007-06-13 13:49 ` [PATCH 0/2] ondemand readahead simplifications Fengguang Wu
     [not found] ` <20070613135128.152095732@mail.ustc.edu.cn>
2007-06-13 13:49   ` [PATCH 1/2] readahead: split ondemand readahead interface into two functions Fengguang Wu
     [not found] ` <20070613135128.282288719@mail.ustc.edu.cn>
2007-06-13 13:49   ` [PATCH 2/2] readahead: sanify file_ra_state names Fengguang Wu
     [not found] <20070614062447.058708836@mail.ustc.edu.cn>
     [not found] ` <20070614062815.949361167@mail.ustc.edu.cn>
2007-06-14  6:21   ` [PATCH 1/2] readahead: split ondemand readahead interface into two functions Fengguang Wu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).