linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Andi Kleen <andi@firstfloor.org>, Ingo Molnar <mingo@elte.hu>,
	Jens Axboe <axboe@kernel.dk>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Rik van Riel <riel@redhat.com>,
	Linux Memory Management List <linux-mm@kvack.org>,
	linux-fsdevel@vger.kernel.org,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 3/9] readahead: record readahead patterns
Date: Tue, 29 Nov 2011 15:40:34 +0100	[thread overview]
Message-ID: <20111129144034.GK5635@quack.suse.cz> (raw)
In-Reply-To: <20111129131456.278516066@intel.com>

On Tue 29-11-11 21:09:03, Wu Fengguang wrote:
> Record the readahead pattern in ra->pattern and extend the ra_submit()
> parameters, to be used by the next readahead tracing/stats patches.
> 
> 7 patterns are defined:
> 
>       	pattern			readahead for
> -----------------------------------------------------------
> 	RA_PATTERN_INITIAL	start-of-file read
> 	RA_PATTERN_SUBSEQUENT	trivial sequential read
> 	RA_PATTERN_CONTEXT	interleaved sequential read
> 	RA_PATTERN_OVERSIZE	oversize read
> 	RA_PATTERN_MMAP_AROUND	mmap fault
> 	RA_PATTERN_FADVISE	posix_fadvise()
> 	RA_PATTERN_RANDOM	random read
> 
> Note that random reads will be recorded in file_ra_state now.
> This won't deteriorate cache bouncing because the ra->prev_pos update
> in do_generic_file_read() already pollutes the data cache, and
> filemap_fault() will stop calling into us after MMAP_LOTSAMISS.
> 
> CC: Ingo Molnar <mingo@elte.hu>
> CC: Jens Axboe <axboe@kernel.dk>
> CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Acked-by: Rik van Riel <riel@redhat.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
  The patch looks OK. You can add:
Acked-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  include/linux/fs.h |   36 +++++++++++++++++++++++++++++++++++-
>  include/linux/mm.h |    4 +++-
>  mm/filemap.c       |    3 ++-
>  mm/readahead.c     |   29 ++++++++++++++++++++++-------
>  4 files changed, 62 insertions(+), 10 deletions(-)
> 
> --- linux-next.orig/include/linux/fs.h	2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/include/linux/fs.h	2011-11-29 10:23:41.000000000 +0800
> @@ -945,11 +945,45 @@ struct file_ra_state {
>  					   there are only # of pages ahead */
>  
>  	unsigned int ra_pages;		/* Maximum readahead window */
> -	unsigned int mmap_miss;		/* Cache miss stat for mmap accesses */
> +	u16 mmap_miss;			/* Cache miss stat for mmap accesses */
> +	u8 pattern;			/* one of RA_PATTERN_* */
> +
>  	loff_t prev_pos;		/* Cache last read() position */
>  };
>  
>  /*
> + * Which policy makes decision to do the current read-ahead IO?
> + *
> + * RA_PATTERN_INITIAL		readahead window is initially opened,
> + *				normally when reading from start of file
> + * RA_PATTERN_SUBSEQUENT	readahead window is pushed forward
> + * RA_PATTERN_CONTEXT		no readahead window available, querying the
> + *				page cache to decide readahead start/size.
> + *				This typically happens on interleaved reads (eg.
> + *				reading pages 0, 1000, 1, 1001, 2, 1002, ...)
> + *				where one file_ra_state struct is not enough
> + *				for recording 2+ interleaved sequential read
> + *				streams.
> + * RA_PATTERN_MMAP_AROUND	read-around on mmap page faults
> + *				(w/o any sequential/random hints)
> + * RA_PATTERN_FADVISE		triggered by POSIX_FADV_WILLNEED or FMODE_RANDOM
> + * RA_PATTERN_OVERSIZE		a random read larger than max readahead size,
> + *				do max readahead to break down the read size
> + * RA_PATTERN_RANDOM		a small random read
> + */
> +enum readahead_pattern {
> +	RA_PATTERN_INITIAL,
> +	RA_PATTERN_SUBSEQUENT,
> +	RA_PATTERN_CONTEXT,
> +	RA_PATTERN_MMAP_AROUND,
> +	RA_PATTERN_FADVISE,
> +	RA_PATTERN_OVERSIZE,
> +	RA_PATTERN_RANDOM,
> +	RA_PATTERN_ALL,		/* for summary stats */
> +	RA_PATTERN_MAX
> +};
> +
> +/*
>   * Check if @index falls in the readahead windows.
>   */
>  static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
> --- linux-next.orig/mm/readahead.c	2011-11-28 22:24:16.000000000 +0800
> +++ linux-next/mm/readahead.c	2011-11-29 10:17:14.000000000 +0800
> @@ -249,7 +249,10 @@ unsigned long max_sane_readahead(unsigne
>   * Submit IO for the read-ahead request in file_ra_state.
>   */
>  unsigned long ra_submit(struct file_ra_state *ra,
> -		       struct address_space *mapping, struct file *filp)
> +			struct address_space *mapping,
> +			struct file *filp,
> +			pgoff_t offset,
> +			unsigned long req_size)
>  {
>  	pgoff_t eof = ((i_size_read(mapping->host)-1) >> PAGE_CACHE_SHIFT) + 1;
>  	pgoff_t start = ra->start;
> @@ -390,6 +393,7 @@ static int try_context_readahead(struct 
>  	if (size >= offset)
>  		size *= 2;
>  
> +	ra->pattern = RA_PATTERN_CONTEXT;
>  	ra->start = offset;
>  	ra->size = get_init_ra_size(size + req_size, max);
>  	ra->async_size = ra->size;
> @@ -411,8 +415,10 @@ ondemand_readahead(struct address_space 
>  	/*
>  	 * start of file
>  	 */
> -	if (!offset)
> +	if (!offset) {
> +		ra->pattern = RA_PATTERN_INITIAL;
>  		goto initial_readahead;
> +	}
>  
>  	/*
>  	 * It's the expected callback offset, assume sequential access.
> @@ -420,6 +426,7 @@ ondemand_readahead(struct address_space 
>  	 */
>  	if ((offset == (ra->start + ra->size - ra->async_size) ||
>  	     offset == (ra->start + ra->size))) {
> +		ra->pattern = RA_PATTERN_SUBSEQUENT;
>  		ra->start += ra->size;
>  		ra->size = get_next_ra_size(ra, max);
>  		ra->async_size = ra->size;
> @@ -442,6 +449,7 @@ ondemand_readahead(struct address_space 
>  		if (!start || start - offset > max)
>  			return 0;
>  
> +		ra->pattern = RA_PATTERN_CONTEXT;
>  		ra->start = start;
>  		ra->size = start - offset;	/* old async_size */
>  		ra->size += req_size;
> @@ -453,14 +461,18 @@ ondemand_readahead(struct address_space 
>  	/*
>  	 * oversize read
>  	 */
> -	if (req_size > max)
> +	if (req_size > max) {
> +		ra->pattern = RA_PATTERN_OVERSIZE;
>  		goto initial_readahead;
> +	}
>  
>  	/*
>  	 * sequential cache miss
>  	 */
> -	if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
> +	if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL) {
> +		ra->pattern = RA_PATTERN_INITIAL;
>  		goto initial_readahead;
> +	}
>  
>  	/*
>  	 * Query the page cache and look for the traces(cached history pages)
> @@ -471,9 +483,12 @@ ondemand_readahead(struct address_space 
>  
>  	/*
>  	 * standalone, small random read
> -	 * Read as is, and do not pollute the readahead state.
>  	 */
> -	return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
> +	ra->pattern = RA_PATTERN_RANDOM;
> +	ra->start = offset;
> +	ra->size = req_size;
> +	ra->async_size = 0;
> +	goto readit;
>  
>  initial_readahead:
>  	ra->start = offset;
> @@ -491,7 +506,7 @@ readit:
>  		ra->size += ra->async_size;
>  	}
>  
> -	return ra_submit(ra, mapping, filp);
> +	return ra_submit(ra, mapping, filp, offset, req_size);
>  }
>  
>  /**
> --- linux-next.orig/include/linux/mm.h	2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/include/linux/mm.h	2011-11-28 22:24:16.000000000 +0800
> @@ -1456,7 +1456,9 @@ void page_cache_async_readahead(struct a
>  unsigned long max_sane_readahead(unsigned long nr);
>  unsigned long ra_submit(struct file_ra_state *ra,
>  			struct address_space *mapping,
> -			struct file *filp);
> +			struct file *filp,
> +			pgoff_t offset,
> +			unsigned long req_size);
>  
>  /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
>  extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
> --- linux-next.orig/mm/filemap.c	2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/mm/filemap.c	2011-11-29 10:17:14.000000000 +0800
> @@ -1611,11 +1611,12 @@ static void do_sync_mmap_readahead(struc
>  	/*
>  	 * mmap read-around
>  	 */
> +	ra->pattern = RA_PATTERN_MMAP_AROUND;
>  	ra_pages = max_sane_readahead(ra->ra_pages);
>  	ra->start = max_t(long, 0, offset - ra_pages / 2);
>  	ra->size = ra_pages;
>  	ra->async_size = ra_pages / 4;
> -	ra_submit(ra, mapping, file);
> +	ra_submit(ra, mapping, file, offset, 1);
>  }
>  
>  /*
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

  reply	other threads:[~2011-11-29 14:40 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-29 13:09 [PATCH 0/9] readahead stats/tracing, backwards prefetching and more (v2) Wu Fengguang
2011-11-29 13:09 ` [PATCH 1/9] block: limit default readahead size for small devices Wu Fengguang
2011-11-29 13:09 ` [PATCH 2/9] readahead: snap readahead request to EOF Wu Fengguang
2011-11-29 14:29   ` Jan Kara
2011-11-30  1:06     ` Wu Fengguang
2011-11-30 11:37       ` Jan Kara
2011-11-30 12:06         ` Wu Fengguang
2011-11-29 13:09 ` [PATCH 3/9] readahead: record readahead patterns Wu Fengguang
2011-11-29 14:40   ` Jan Kara [this message]
2011-11-29 17:57   ` Andi Kleen
2011-11-30  1:18     ` Wu Fengguang
2011-12-15  8:55     ` [PATCH] proc: show readahead state in fdinfo Wu Fengguang
2011-12-15  9:49       ` Ingo Molnar
2011-11-29 13:09 ` [PATCH 4/9] readahead: tag mmap page fault call sites Wu Fengguang
2011-11-29 14:41   ` Jan Kara
2011-11-29 13:09 ` [PATCH 5/9] readahead: tag metadata " Wu Fengguang
2011-11-29 14:45   ` Jan Kara
2011-11-29 13:09 ` [PATCH 6/9] readahead: add /debug/readahead/stats Wu Fengguang
2011-11-29 15:21   ` Jan Kara
2011-11-30  0:44     ` Wu Fengguang
2011-12-14  6:36     ` Wu Fengguang
2011-12-19 16:32       ` Jan Kara
2011-12-21  1:29         ` Wu Fengguang
2011-12-21  4:06           ` Dave Chinner
2011-12-23  3:33             ` Wu Fengguang
2011-12-23 11:16               ` Jan Kara
2011-11-29 13:09 ` [PATCH 7/9] readahead: add vfs/readahead tracing event Wu Fengguang
2011-11-29 15:22   ` Jan Kara
2011-11-30  0:42     ` Wu Fengguang
2011-11-30 11:44       ` Jan Kara
2011-11-30 12:06         ` Wu Fengguang
2011-12-06 15:30   ` Christoph Hellwig
2011-12-07  9:18     ` Wu Fengguang
2011-12-08  9:03     ` [PATCH] writeback: show writeback reason with __print_symbolic Wu Fengguang
2011-11-29 13:09 ` [PATCH 8/9] readahead: basic support for backwards prefetching Wu Fengguang
2011-11-29 15:35   ` Jan Kara
2011-11-29 16:37     ` Pádraig Brady
2011-11-30  0:24       ` Wu Fengguang
2011-11-30  0:37     ` Wu Fengguang
2011-11-30 11:21       ` Jan Kara
2011-11-29 13:09 ` [PATCH 9/9] readahead: dont do start-of-file readahead after lseek() Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111129144034.GK5635@quack.suse.cz \
    --to=jack@suse.cz \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=axboe@kernel.dk \
    --cc=fengguang.wu@intel.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).