Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2 24/25] mm: Add large page readahead
Date: Tue, 11 Feb 2020 20:18:44 -0800
Message-ID: <20200212041845.25879-25-willy@infradead.org> (raw)
In-Reply-To: <20200212041845.25879-1-willy@infradead.org>

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

If the filesystem supports large pages, allocate larger pages in the
readahead code when it seems worth doing.  The heuristic for choosing
larger page sizes will surely need some tuning, but this aggressive
ramp-up seems good for testing.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/readahead.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 93 insertions(+), 5 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index 29ca25c8f01e..b582f09aa7e3 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -406,13 +406,96 @@ static int try_context_readahead(struct address_space *mapping,
 	return 1;
 }
 
+static inline int ra_alloc_page(struct address_space *mapping, pgoff_t offset,
+		pgoff_t mark, unsigned int order, gfp_t gfp)
+{
+	int err;
+	struct page *page = __page_cache_alloc_order(gfp, order);
+
+	if (!page)
+		return -ENOMEM;
+	if (mark - offset < (1UL << order))
+		SetPageReadahead(page);
+	err = add_to_page_cache_lru(page, mapping, offset, gfp);
+	if (err)
+		put_page(page);
+	return err;
+}
+
+#define PMD_ORDER	(PMD_SHIFT - PAGE_SHIFT)
+
+static unsigned long page_cache_readahead_order(struct address_space *mapping,
+		struct file_ra_state *ra, struct file *file, unsigned int order)
+{
+	struct readahead_control rac = {
+		.mapping = mapping,
+		.file = file,
+		.start = ra->start,
+		.nr_pages = 0,
+	};
+	unsigned int old_order = order;
+	pgoff_t offset = ra->start;
+	pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
+	pgoff_t mark = offset + ra->size - ra->async_size;
+	int err = 0;
+	gfp_t gfp = readahead_gfp_mask(mapping);
+
+	limit = min(limit, offset + ra->size - 1);
+
+	/* Grow page size up to PMD size */
+	if (order < PMD_ORDER) {
+		order += 2;
+		if (order > PMD_ORDER)
+			order = PMD_ORDER;
+		while ((1 << order) > ra->size)
+			order--;
+	}
+
+	/* If size is somehow misaligned, fill with order-0 pages */
+	while (!err && offset & ((1UL << old_order) - 1)) {
+		err = ra_alloc_page(mapping, offset++, mark, 0, gfp);
+		if (!err)
+			rac.nr_pages++;
+	}
+
+	while (!err && offset & ((1UL << order) - 1)) {
+		err = ra_alloc_page(mapping, offset, mark, old_order, gfp);
+		if (!err)
+			rac.nr_pages += 1UL << old_order;
+		offset += 1UL << old_order;
+	}
+
+	while (!err && offset <= limit) {
+		err = ra_alloc_page(mapping, offset, mark, order, gfp);
+		if (!err)
+			rac.nr_pages += 1UL << order;
+		offset += 1UL << order;
+	}
+
+	if (offset > limit) {
+		ra->size += offset - limit - 1;
+		ra->async_size += offset - limit - 1;
+	}
+
+	read_pages(&rac, NULL);
+
+	/*
+	 * If there were already pages in the page cache, then we may have
+	 * left some gaps.  Let the regular readahead code take care of this
+	 * situation.
+	 */
+	if (err)
+		return ra_submit(ra, mapping, file);
+	return 0;
+}
+
 /*
  * A minimal readahead algorithm for trivial sequential/random reads.
  */
 static unsigned long
 ondemand_readahead(struct address_space *mapping,
 		   struct file_ra_state *ra, struct file *filp,
-		   bool hit_readahead_marker, pgoff_t offset,
+		   struct page *page, pgoff_t offset,
 		   unsigned long req_size)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
@@ -451,7 +534,7 @@ ondemand_readahead(struct address_space *mapping,
 	 * Query the pagecache for async_size, which normally equals to
 	 * readahead size. Ramp it up and use it as the new readahead size.
 	 */
-	if (hit_readahead_marker) {
+	if (page) {
 		pgoff_t start;
 
 		rcu_read_lock();
@@ -520,7 +603,12 @@ ondemand_readahead(struct address_space *mapping,
 		}
 	}
 
-	return ra_submit(ra, mapping, filp);
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !page ||
+	    !(mapping->host->i_sb->s_type->fs_flags & FS_LARGE_PAGES))
+		return ra_submit(ra, mapping, filp);
+
+	return page_cache_readahead_order(mapping, ra, filp,
+						compound_order(page));
 }
 
 /**
@@ -555,7 +643,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
 	}
 
 	/* do read-ahead */
-	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
+	ondemand_readahead(mapping, ra, filp, NULL, offset, req_size);
 }
 EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
 
@@ -602,7 +690,7 @@ page_cache_async_readahead(struct address_space *mapping,
 		return;
 
 	/* do read-ahead */
-	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
+	ondemand_readahead(mapping, ra, filp, page, offset, req_size);
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
 
-- 
2.25.0


  parent reply index

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-12  4:18 [PATCH v2 00/25] Large pages in the page cache Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 01/25] mm: Use vm_fault error code directly Matthew Wilcox
2020-02-12  7:34   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 02/25] mm: Optimise find_subpage for !THP Matthew Wilcox
2020-02-12  7:41   ` Christoph Hellwig
2020-02-12 13:02     ` Matthew Wilcox
2020-02-12 17:52       ` Christoph Hellwig
2020-02-13 13:50       ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 03/25] mm: Use VM_BUG_ON_PAGE in clear_page_dirty_for_io Matthew Wilcox
2020-02-12  7:38   ` Christoph Hellwig
2020-02-13 13:50   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 04/25] mm: Unexport find_get_entry Matthew Wilcox
2020-02-12  7:37   ` Christoph Hellwig
2020-02-13 13:51   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 05/25] mm: Fix documentation of FGP flags Matthew Wilcox
2020-02-12  7:42   ` Christoph Hellwig
2020-02-12 19:11     ` Matthew Wilcox
2020-02-13 14:00       ` Kirill A. Shutemov
2020-02-13 13:59   ` Kirill A. Shutemov
2020-02-13 14:34     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 06/25] mm: Allow hpages to be arbitrary order Matthew Wilcox
2020-02-13 14:11   ` Kirill A. Shutemov
2020-02-13 14:30     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 07/25] mm: Introduce thp_size Matthew Wilcox
2020-02-13 14:19   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 08/25] mm: Introduce thp_order Matthew Wilcox
2020-02-13 14:20   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 09/25] fs: Add a filesystem flag for large pages Matthew Wilcox
2020-02-12  7:43   ` Christoph Hellwig
2020-02-12 14:59     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 10/25] fs: Introduce i_blocks_per_page Matthew Wilcox
2020-02-12  7:44   ` Christoph Hellwig
2020-02-12 15:05     ` Matthew Wilcox
2020-02-12 17:54       ` Christoph Hellwig
2020-02-13 15:40   ` Kirill A. Shutemov
2020-02-13 16:07     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 11/25] fs: Make page_mkwrite_check_truncate thp-aware Matthew Wilcox
2020-02-13 15:44   ` Kirill A. Shutemov
2020-02-13 16:26     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 12/25] mm: Add file_offset_of_ helpers Matthew Wilcox
2020-02-12  7:46   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 13/25] fs: Add zero_user_large Matthew Wilcox
2020-02-14 13:52   ` Kirill A. Shutemov
2020-02-14 16:03     ` Matthew Wilcox
2020-02-18 14:16       ` Kirill A. Shutemov
2020-02-18 16:13         ` Matthew Wilcox
2020-02-18 17:10           ` Kirill A. Shutemov
2020-02-18 18:07             ` Matthew Wilcox
2020-02-21 12:42               ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 14/25] iomap: Support arbitrarily many blocks per page Matthew Wilcox
2020-02-12  8:05   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 15/25] iomap: Support large pages in iomap_adjust_read_range Matthew Wilcox
2020-02-12  8:11   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 16/25] iomap: Support large pages in read paths Matthew Wilcox
2020-02-12  8:13   ` Christoph Hellwig
2020-02-12 17:45     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 17/25] iomap: Support large pages in write paths Matthew Wilcox
2020-02-12  8:17   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 18/25] iomap: Inline data shouldn't see large pages Matthew Wilcox
2020-02-12  8:05   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 19/25] xfs: Support " Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 20/25] mm: Make prep_transhuge_page return its argument Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 21/25] mm: Add __page_cache_alloc_order Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 22/25] mm: Allow large pages to be added to the page cache Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 23/25] mm: Allow large pages to be removed from " Matthew Wilcox
2020-02-12  4:18 ` Matthew Wilcox [this message]
2020-02-12  4:18 ` [PATCH v2 25/25] mm: Align THP mappings for non-DAX Matthew Wilcox
2020-02-12  7:50   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200212041845.25879-25-willy@infradead.org \
    --to=willy@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git