Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: John Hubbard <jhubbard@nvidia.com>
To: Matthew Wilcox <willy@infradead.org>, <linux-fsdevel@vger.kernel.org>
Cc: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>,
	<linux-btrfs@vger.kernel.org>, <linux-erofs@lists.ozlabs.org>,
	<linux-ext4@vger.kernel.org>,
	<linux-f2fs-devel@lists.sourceforge.net>,
	<cluster-devel@redhat.com>, <ocfs2-devel@oss.oracle.com>,
	<linux-xfs@vger.kernel.org>
Subject: Re: [PATCH v5 03/13] mm: Put readahead pages in cache earlier
Date: Thu, 13 Feb 2020 19:36:38 -0800
Message-ID: <b0cdd7b4-e103-a884-d8f7-2378905f7b3b@nvidia.com> (raw)
In-Reply-To: <20200211010348.6872-4-willy@infradead.org>

On 2/10/20 5:03 PM, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> At allocation time, put the pages in the cache unless we're using
> ->readpages.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/readahead.c | 66 ++++++++++++++++++++++++++++++++------------------
>  1 file changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/mm/readahead.c b/mm/readahead.c
> index fc77d13af556..96c6ca68a174 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -114,10 +114,10 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
>  EXPORT_SYMBOL(read_cache_pages);
>  
>  static void read_pages(struct address_space *mapping, struct file *filp,
> -		struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
> +		struct list_head *pages, pgoff_t start,
> +		unsigned int nr_pages)
>  {
>  	struct blk_plug plug;
> -	unsigned page_idx;
>  
>  	blk_start_plug(&plug);
>  
> @@ -125,18 +125,17 @@ static void read_pages(struct address_space *mapping, struct file *filp,
>  		mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
>  		/* Clean up the remaining pages */
>  		put_pages_list(pages);
> -		goto out;
> -	}
> +	} else {
> +		struct page *page;
> +		unsigned long index;
>  
> -	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
> -		struct page *page = lru_to_page(pages);
> -		list_del(&page->lru);
> -		if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
> +		xa_for_each_range(&mapping->i_pages, index, page, start,
> +				start + nr_pages - 1) {
>  			mapping->a_ops->readpage(filp, page);
> -		put_page(page);
> +			put_page(page);
> +		}
>  	}
>  
> -out:
>  	blk_finish_plug(&plug);
>  }
>  
> @@ -149,17 +148,18 @@ static void read_pages(struct address_space *mapping, struct file *filp,
>   * Returns the number of pages requested, or the maximum amount of I/O allowed.
>   */
>  unsigned long __do_page_cache_readahead(struct address_space *mapping,
> -		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
> +		struct file *filp, pgoff_t start, unsigned long nr_to_read,
>  		unsigned long lookahead_size)
>  {
>  	struct inode *inode = mapping->host;
> -	struct page *page;
>  	unsigned long end_index;	/* The last page we want to read */
>  	LIST_HEAD(page_pool);
>  	int page_idx;
> +	pgoff_t page_offset = start;
>  	unsigned long nr_pages = 0;
>  	loff_t isize = i_size_read(inode);
>  	gfp_t gfp_mask = readahead_gfp_mask(mapping);
> +	bool use_list = mapping->a_ops->readpages;
>  
>  	if (isize == 0)
>  		goto out;
> @@ -170,7 +170,7 @@ unsigned long __do_page_cache_readahead(struct address_space *mapping,
>  	 * Preallocate as many pages as we will need.
>  	 */
>  	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
> -		pgoff_t page_offset = offset + page_idx;
> +		struct page *page;

I see two distinct things happening in this patch, and I think they want to each be
in their own patch:

1) A significant refactoring of the page loop, and

2) Changing the place where the page is added to the page cache. (Only this one is 
   mentioned in the commit description.)

We'll be more likely to spot any errors if these are teased apart.


thanks,
-- 
John Hubbard
NVIDIA

>  
>  		if (page_offset > end_index)
>  			break;
> @@ -178,25 +178,43 @@ unsigned long __do_page_cache_readahead(struct address_space *mapping,
>  		page = xa_load(&mapping->i_pages, page_offset);
>  		if (page && !xa_is_value(page)) {
>  			/*
> -			 * Page already present?  Kick off the current batch of
> -			 * contiguous pages before continuing with the next
> -			 * batch.
> +			 * Page already present?  Kick off the current batch
> +			 * of contiguous pages before continuing with the
> +			 * next batch.
> +			 * It's possible this page is the page we should
> +			 * be marking with PageReadahead.  However, we
> +			 * don't have a stable ref to this page so it might
> +			 * be reallocated to another user before we can set
> +			 * the bit.  There's probably another page in the
> +			 * cache marked with PageReadahead from the other
> +			 * process which accessed this file.
>  			 */
> -			if (nr_pages)
> -				read_pages(mapping, filp, &page_pool, nr_pages,
> -						gfp_mask);
> -			nr_pages = 0;
> -			continue;
> +			goto skip;
>  		}
>  
>  		page = __page_cache_alloc(gfp_mask);
>  		if (!page)
>  			break;
> -		page->index = page_offset;
> -		list_add(&page->lru, &page_pool);
> +		if (use_list) {
> +			page->index = page_offset;
> +			list_add(&page->lru, &page_pool);
> +		} else if (add_to_page_cache_lru(page, mapping, page_offset,
> +					gfp_mask) < 0) {
> +			put_page(page);
> +			goto skip;
> +		}
> +
>  		if (page_idx == nr_to_read - lookahead_size)
>  			SetPageReadahead(page);
>  		nr_pages++;
> +		page_offset++;
> +		continue;
> +skip:
> +		if (nr_pages)
> +			read_pages(mapping, filp, &page_pool, start, nr_pages);
> +		nr_pages = 0;
> +		page_offset++;
> +		start = page_offset;
>  	}
>  
>  	/*
> @@ -205,7 +223,7 @@ unsigned long __do_page_cache_readahead(struct address_space *mapping,
>  	 * will then handle the error.
>  	 */
>  	if (nr_pages)
> -		read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask);
> +		read_pages(mapping, filp, &page_pool, start, nr_pages);
>  	BUG_ON(!list_empty(&page_pool));
>  out:
>  	return nr_pages;
> 

  reply index

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-11  1:03 [PATCH v5 00/13] Change readahead API Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 01/13] mm: Fix the return type of __do_page_cache_readahead Matthew Wilcox
2020-02-11  8:19   ` Johannes Thumshirn
2020-02-11 12:34     ` Matthew Wilcox
2020-02-12 18:13   ` Christoph Hellwig
2020-02-14  3:19   ` John Hubbard
2020-02-14  4:21     ` Matthew Wilcox
2020-02-14  4:33       ` John Hubbard
2020-02-14 19:50   ` Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 02/13] mm: Ignore return value of ->readpages Matthew Wilcox
2020-02-12 18:13   ` Christoph Hellwig
2020-02-11  1:03 ` [PATCH v5 03/13] mm: Put readahead pages in cache earlier Matthew Wilcox
2020-02-14  3:36   ` John Hubbard [this message]
2020-02-15  1:15     ` Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 04/13] mm: Add readahead address space operation Matthew Wilcox
2020-02-11  4:52   ` Dave Chinner
2020-02-11 12:54     ` Matthew Wilcox
2020-02-11 20:08       ` Dave Chinner
2020-02-12 18:18   ` Christoph Hellwig
2020-02-14  5:36   ` John Hubbard
2020-02-15  1:15     ` Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 05/13] mm: Add page_cache_readahead_limit Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 06/13] fs: Convert mpage_readpages to mpage_readahead Matthew Wilcox
2020-02-13 22:09   ` Junxiao Bi
2020-02-11  1:03 ` [PATCH v5 07/13] btrfs: Convert from readpages to readahead Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 08/13] erofs: Convert uncompressed files " Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 09/13] erofs: Convert compressed " Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 10/13] ext4: Convert " Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 11/13] f2fs: " Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 12/13] fuse: " Matthew Wilcox
2020-02-11  1:03 ` [PATCH v5 13/13] iomap: " Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b0cdd7b4-e103-a884-d8f7-2378905f7b3b@nvidia.com \
    --to=jhubbard@nvidia.com \
    --cc=cluster-devel@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-erofs@lists.ozlabs.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-f2fs-devel@lists.sourceforge.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git