linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>,
	Li Shaohua <shaohua.li@intel.com>, Jan Kara <jack@suse.cz>,
	Wu Fengguang <fengguang.wu@intel.com>
Cc: Linux Memory Management List <linux-mm@kvack.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 7/9] readahead: basic support for backwards prefetching
Date: Fri, 27 Jan 2012 11:05:31 +0800	[thread overview]
Message-ID: <20120127031327.293145482@intel.com> (raw)
In-Reply-To: 20120127030524.854259561@intel.com

[-- Attachment #1: readahead-backwards.patch --]
[-- Type: text/plain, Size: 5588 bytes --]

Add the backwards prefetching feature. It's pretty simple if we don't
support async prefetching and interleaved reads.

tail and tac are observed to have the reverse read pattern:

tail-3501  [006]   111.881191: readahead: readahead-random(bdi=0:16, ino=1548450, req=750+1, ra=750+1-0, async=0) = 1
tail-3501  [006]   111.881506: readahead: readahead-backwards(bdi=0:16, ino=1548450, req=748+2, ra=746+5-0, async=0) = 4
tail-3501  [006]   111.882021: readahead: readahead-backwards(bdi=0:16, ino=1548450, req=744+2, ra=726+25-0, async=0) = 20
tail-3501  [006]   111.883713: readahead: readahead-backwards(bdi=0:16, ino=1548450, req=724+2, ra=626+125-0, async=0) = 100

 tac-3528  [001]   118.671924: readahead: readahead-random(bdi=0:16, ino=1548445, req=750+1, ra=750+1-0, async=0) = 1
 tac-3528  [001]   118.672371: readahead: readahead-backwards(bdi=0:16, ino=1548445, req=748+2, ra=746+5-0, async=0) = 4
 tac-3528  [001]   118.673039: readahead: readahead-backwards(bdi=0:16, ino=1548445, req=744+2, ra=726+25-0, async=0) = 20

Here is the behavior with an 8-page read sequence from 10000 down to 0.
(The readahead size is a bit large since it's an NFS mount.)

readahead-random(dev=0:16, ino=3948605, req=10000+8, ra=10000+8-0, async=0) = 8
readahead-backwards(dev=0:16, ino=3948605, req=9992+8, ra=9968+32-0, async=0) = 32
readahead-backwards(dev=0:16, ino=3948605, req=9960+8, ra=9840+128-0, async=0) = 128
readahead-backwards(dev=0:16, ino=3948605, req=9832+8, ra=9584+256-0, async=0) = 256
readahead-backwards(dev=0:16, ino=3948605, req=9576+8, ra=9072+512-0, async=0) = 512
readahead-backwards(dev=0:16, ino=3948605, req=9064+8, ra=8048+1024-0, async=0) = 1024
readahead-backwards(dev=0:16, ino=3948605, req=8040+8, ra=6128+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=6120+8, ra=4208+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=4200+8, ra=2288+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=2280+8, ra=368+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=360+8, ra=0+368-0, async=0) = 368

And a simple 1-page read sequence from 10000 down to 0.

readahead-random(dev=0:16, ino=3948605, req=10000+1, ra=10000+1-0, async=0) = 1
readahead-backwards(dev=0:16, ino=3948605, req=9999+1, ra=9996+4-0, async=0) = 4
readahead-backwards(dev=0:16, ino=3948605, req=9995+1, ra=9980+16-0, async=0) = 16
readahead-backwards(dev=0:16, ino=3948605, req=9979+1, ra=9916+64-0, async=0) = 64
readahead-backwards(dev=0:16, ino=3948605, req=9915+1, ra=9660+256-0, async=0) = 256
readahead-backwards(dev=0:16, ino=3948605, req=9659+1, ra=9148+512-0, async=0) = 512
readahead-backwards(dev=0:16, ino=3948605, req=9147+1, ra=8124+1024-0, async=0) = 1024
readahead-backwards(dev=0:16, ino=3948605, req=8123+1, ra=6204+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=6203+1, ra=4284+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=4283+1, ra=2364+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=2363+1, ra=444+1920-0, async=0) = 1920
readahead-backwards(dev=0:16, ino=3948605, req=443+1, ra=0+444-0, async=0) = 444

CC: Andi Kleen <andi@firstfloor.org>
CC: Li Shaohua <shaohua.li@intel.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/fs.h         |    2 ++
 include/trace/events/vfs.h |    1 +
 mm/readahead.c             |   20 ++++++++++++++++++++
 3 files changed, 23 insertions(+)

--- linux-next.orig/include/linux/fs.h	2012-01-25 15:57:52.000000000 +0800
+++ linux-next/include/linux/fs.h	2012-01-25 15:57:57.000000000 +0800
@@ -975,6 +975,7 @@ struct file_ra_state {
  *				streams.
  * RA_PATTERN_MMAP_AROUND	read-around on mmap page faults
  *				(w/o any sequential/random hints)
+ * RA_PATTERN_BACKWARDS		reverse reading detected
  * RA_PATTERN_FADVISE		triggered by POSIX_FADV_WILLNEED or FMODE_RANDOM
  * RA_PATTERN_OVERSIZE		a random read larger than max readahead size,
  *				do max readahead to break down the read size
@@ -985,6 +986,7 @@ enum readahead_pattern {
 	RA_PATTERN_SUBSEQUENT,
 	RA_PATTERN_CONTEXT,
 	RA_PATTERN_MMAP_AROUND,
+	RA_PATTERN_BACKWARDS,
 	RA_PATTERN_FADVISE,
 	RA_PATTERN_OVERSIZE,
 	RA_PATTERN_RANDOM,
--- linux-next.orig/mm/readahead.c	2012-01-25 15:57:53.000000000 +0800
+++ linux-next/mm/readahead.c	2012-01-25 15:57:57.000000000 +0800
@@ -695,6 +695,26 @@ ondemand_readahead(struct address_space 
 	}
 
 	/*
+	 * backwards reading
+	 */
+	if (offset < ra->start && offset + req_size >= ra->start) {
+		ra->pattern = RA_PATTERN_BACKWARDS;
+		ra->size = get_next_ra_size(ra, max);
+		if (ra->size > ra->start) {
+			/*
+			 * ra->start may be concurrently set to some huge
+			 * value, the min() at least avoids submitting huge IO
+			 * in this race condition
+			 */
+			ra->size = min(ra->start, max);
+			ra->start = 0;
+		} else
+			ra->start -= ra->size;
+		ra->async_size = 0;
+		goto readit;
+	}
+
+	/*
 	 * Query the page cache and look for the traces(cached history pages)
 	 * that a sequential stream would leave behind.
 	 */
--- linux-next.orig/include/trace/events/vfs.h	2012-01-25 15:57:52.000000000 +0800
+++ linux-next/include/trace/events/vfs.h	2012-01-25 15:57:57.000000000 +0800
@@ -14,6 +14,7 @@
 			{ RA_PATTERN_SUBSEQUENT,	"subsequent"	}, \
 			{ RA_PATTERN_CONTEXT,		"context"	}, \
 			{ RA_PATTERN_MMAP_AROUND,	"around"	}, \
+			{ RA_PATTERN_BACKWARDS,		"backwards"	}, \
 			{ RA_PATTERN_FADVISE,		"fadvise"	}, \
 			{ RA_PATTERN_OVERSIZE,		"oversize"	}, \
 			{ RA_PATTERN_RANDOM,		"random"	}, \



  parent reply	other threads:[~2012-01-27  3:41 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-27  3:05 [PATCH 0/9] readahead stats/tracing, backwards prefetching and more (v4) Wu Fengguang
2012-01-27  3:05 ` [PATCH 1/9] readahead: make context readahead more conservative Wu Fengguang
2012-01-27  3:05 ` [PATCH 2/9] readahead: record readahead patterns Wu Fengguang
2012-01-27  3:05 ` [PATCH 3/9] readahead: tag mmap page fault call sites Wu Fengguang
2012-01-27  3:05 ` [PATCH 4/9] readahead: tag metadata " Wu Fengguang
2012-01-27  3:05 ` [PATCH 5/9] readahead: add vfs/readahead tracing event Wu Fengguang
2012-01-27  3:05 ` [PATCH 6/9] readahead: add /debug/readahead/stats Wu Fengguang
2012-01-27 16:21   ` Christoph Lameter
2012-01-27 20:15     ` Andrew Morton
2012-01-29  5:07       ` Wu Fengguang
2012-01-30  4:02       ` Dave Chinner
2012-01-27  3:05 ` Wu Fengguang [this message]
2012-01-27  3:05 ` [PATCH 8/9] readahead: dont do start-of-file readahead after lseek() Wu Fengguang
2012-01-27  3:05 ` [PATCH 9/9] readahead: snap readahead request to EOF Wu Fengguang
2012-02-09  3:22 ` [PATCH 6/9 update changelog] readahead: add /debug/readahead/stats Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120127031327.293145482@intel.com \
    --to=fengguang.wu@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=jack@suse.cz \
    --cc=shaohua.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).