From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755459Ab1K2N0W (ORCPT ); Tue, 29 Nov 2011 08:26:22 -0500 Received: from mga03.intel.com ([143.182.124.21]:46635 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755385Ab1K2N0R (ORCPT ); Tue, 29 Nov 2011 08:26:17 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.69,590,1315206000"; d="scan'208";a="80197589" Message-Id: <20111129131456.278516066@intel.com> User-Agent: quilt/0.48-1 Date: Tue, 29 Nov 2011 21:09:03 +0800 From: Wu Fengguang To: Andrew Morton cc: Andi Kleen , Ingo Molnar , Jens Axboe , Peter Zijlstra , Rik van Riel , Wu Fengguang cc: Linux Memory Management List , Cc: LKML Subject: [PATCH 3/9] readahead: record readahead patterns References: <20111129130900.628549879@intel.com> Content-Disposition: inline; filename=readahead-tracepoints.patch Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Record the readahead pattern in ra->pattern and extend the ra_submit() parameters, to be used by the next readahead tracing/stats patches. 7 patterns are defined: pattern readahead for ----------------------------------------------------------- RA_PATTERN_INITIAL start-of-file read RA_PATTERN_SUBSEQUENT trivial sequential read RA_PATTERN_CONTEXT interleaved sequential read RA_PATTERN_OVERSIZE oversize read RA_PATTERN_MMAP_AROUND mmap fault RA_PATTERN_FADVISE posix_fadvise() RA_PATTERN_RANDOM random read Note that random reads will be recorded in file_ra_state now. This won't deteriorate cache bouncing because the ra->prev_pos update in do_generic_file_read() already pollutes the data cache, and filemap_fault() will stop calling into us after MMAP_LOTSAMISS. CC: Ingo Molnar CC: Jens Axboe CC: Peter Zijlstra Acked-by: Rik van Riel Signed-off-by: Wu Fengguang --- include/linux/fs.h | 36 +++++++++++++++++++++++++++++++++++- include/linux/mm.h | 4 +++- mm/filemap.c | 3 ++- mm/readahead.c | 29 ++++++++++++++++++++++------- 4 files changed, 62 insertions(+), 10 deletions(-) --- linux-next.orig/include/linux/fs.h 2011-11-28 21:21:05.000000000 +0800 +++ linux-next/include/linux/fs.h 2011-11-29 10:23:41.000000000 +0800 @@ -945,11 +945,45 @@ struct file_ra_state { there are only # of pages ahead */ unsigned int ra_pages; /* Maximum readahead window */ - unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ + u16 mmap_miss; /* Cache miss stat for mmap accesses */ + u8 pattern; /* one of RA_PATTERN_* */ + loff_t prev_pos; /* Cache last read() position */ }; /* + * Which policy makes decision to do the current read-ahead IO? + * + * RA_PATTERN_INITIAL readahead window is initially opened, + * normally when reading from start of file + * RA_PATTERN_SUBSEQUENT readahead window is pushed forward + * RA_PATTERN_CONTEXT no readahead window available, querying the + * page cache to decide readahead start/size. + * This typically happens on interleaved reads (eg. + * reading pages 0, 1000, 1, 1001, 2, 1002, ...) + * where one file_ra_state struct is not enough + * for recording 2+ interleaved sequential read + * streams. + * RA_PATTERN_MMAP_AROUND read-around on mmap page faults + * (w/o any sequential/random hints) + * RA_PATTERN_FADVISE triggered by POSIX_FADV_WILLNEED or FMODE_RANDOM + * RA_PATTERN_OVERSIZE a random read larger than max readahead size, + * do max readahead to break down the read size + * RA_PATTERN_RANDOM a small random read + */ +enum readahead_pattern { + RA_PATTERN_INITIAL, + RA_PATTERN_SUBSEQUENT, + RA_PATTERN_CONTEXT, + RA_PATTERN_MMAP_AROUND, + RA_PATTERN_FADVISE, + RA_PATTERN_OVERSIZE, + RA_PATTERN_RANDOM, + RA_PATTERN_ALL, /* for summary stats */ + RA_PATTERN_MAX +}; + +/* * Check if @index falls in the readahead windows. */ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) --- linux-next.orig/mm/readahead.c 2011-11-28 22:24:16.000000000 +0800 +++ linux-next/mm/readahead.c 2011-11-29 10:17:14.000000000 +0800 @@ -249,7 +249,10 @@ unsigned long max_sane_readahead(unsigne * Submit IO for the read-ahead request in file_ra_state. */ unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, struct file *filp) + struct address_space *mapping, + struct file *filp, + pgoff_t offset, + unsigned long req_size) { pgoff_t eof = ((i_size_read(mapping->host)-1) >> PAGE_CACHE_SHIFT) + 1; pgoff_t start = ra->start; @@ -390,6 +393,7 @@ static int try_context_readahead(struct if (size >= offset) size *= 2; + ra->pattern = RA_PATTERN_CONTEXT; ra->start = offset; ra->size = get_init_ra_size(size + req_size, max); ra->async_size = ra->size; @@ -411,8 +415,10 @@ ondemand_readahead(struct address_space /* * start of file */ - if (!offset) + if (!offset) { + ra->pattern = RA_PATTERN_INITIAL; goto initial_readahead; + } /* * It's the expected callback offset, assume sequential access. @@ -420,6 +426,7 @@ ondemand_readahead(struct address_space */ if ((offset == (ra->start + ra->size - ra->async_size) || offset == (ra->start + ra->size))) { + ra->pattern = RA_PATTERN_SUBSEQUENT; ra->start += ra->size; ra->size = get_next_ra_size(ra, max); ra->async_size = ra->size; @@ -442,6 +449,7 @@ ondemand_readahead(struct address_space if (!start || start - offset > max) return 0; + ra->pattern = RA_PATTERN_CONTEXT; ra->start = start; ra->size = start - offset; /* old async_size */ ra->size += req_size; @@ -453,14 +461,18 @@ ondemand_readahead(struct address_space /* * oversize read */ - if (req_size > max) + if (req_size > max) { + ra->pattern = RA_PATTERN_OVERSIZE; goto initial_readahead; + } /* * sequential cache miss */ - if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL) + if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL) { + ra->pattern = RA_PATTERN_INITIAL; goto initial_readahead; + } /* * Query the page cache and look for the traces(cached history pages) @@ -471,9 +483,12 @@ ondemand_readahead(struct address_space /* * standalone, small random read - * Read as is, and do not pollute the readahead state. */ - return __do_page_cache_readahead(mapping, filp, offset, req_size, 0); + ra->pattern = RA_PATTERN_RANDOM; + ra->start = offset; + ra->size = req_size; + ra->async_size = 0; + goto readit; initial_readahead: ra->start = offset; @@ -491,7 +506,7 @@ readit: ra->size += ra->async_size; } - return ra_submit(ra, mapping, filp); + return ra_submit(ra, mapping, filp, offset, req_size); } /** --- linux-next.orig/include/linux/mm.h 2011-11-28 21:21:05.000000000 +0800 +++ linux-next/include/linux/mm.h 2011-11-28 22:24:16.000000000 +0800 @@ -1456,7 +1456,9 @@ void page_cache_async_readahead(struct a unsigned long max_sane_readahead(unsigned long nr); unsigned long ra_submit(struct file_ra_state *ra, struct address_space *mapping, - struct file *filp); + struct file *filp, + pgoff_t offset, + unsigned long req_size); /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); --- linux-next.orig/mm/filemap.c 2011-11-28 21:21:05.000000000 +0800 +++ linux-next/mm/filemap.c 2011-11-29 10:17:14.000000000 +0800 @@ -1611,11 +1611,12 @@ static void do_sync_mmap_readahead(struc /* * mmap read-around */ + ra->pattern = RA_PATTERN_MMAP_AROUND; ra_pages = max_sane_readahead(ra->ra_pages); ra->start = max_t(long, 0, offset - ra_pages / 2); ra->size = ra_pages; ra->async_size = ra_pages / 4; - ra_submit(ra, mapping, file); + ra_submit(ra, mapping, file, offset, 1); } /*