All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/17] Refactor generic_file_buffered_read
@ 2020-11-02 18:42 Matthew Wilcox (Oracle)
  2020-11-02 18:42 ` [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
                   ` (17 more replies)
  0 siblings, 18 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:42 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

This is a combination of Christoph's work to refactor
generic_file_buffered_read() and my THP support on top of Kent's patches
which are currently in -mm.  I really like where this ended up.

Christoph Hellwig (2):
  mm/filemap: rename generic_file_buffered_read to filemap_read
  mm: simplify generic_file_read_iter

Matthew Wilcox (Oracle) (15):
  mm/filemap: Rename generic_file_buffered_read subfunctions
  mm/filemap: Use THPs in generic_file_buffered_read
  mm/filemap: Pass a sleep state to put_and_wait_on_page_locked
  mm/filemap: Support readpage splitting a page
  mm/filemap: Inline __wait_on_page_locked_async into caller
  mm/filemap: Don't call ->readpage if IOCB_WAITQ is set
  mm/filemap: Change filemap_read_page calling conventions
  mm/filemap: Change filemap_create_page arguments
  mm/filemap: Convert filemap_update_page to return an errno
  mm/filemap: Move the iocb checks into filemap_update_page
  mm/filemap: Add filemap_range_uptodate
  mm/filemap: Split filemap_readahead out of filemap_get_pages
  mm/filemap: Remove parameters from filemap_update_page()
  mm/filemap: Restructure filemap_get_pages
  mm/filemap: Don't relock the page after calling readpage

 fs/btrfs/file.c         |   2 +-
 include/linux/fs.h      |   4 +-
 include/linux/pagemap.h |   3 +-
 mm/filemap.c            | 493 +++++++++++++++++++---------------------
 mm/huge_memory.c        |   4 +-
 mm/migrate.c            |   4 +-
 6 files changed, 237 insertions(+), 273 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
@ 2020-11-02 18:42 ` Matthew Wilcox (Oracle)
  2020-11-02 18:53   ` Kent Overstreet
  2020-11-03  7:27   ` Christoph Hellwig
  2020-11-02 18:42 ` [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (16 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:42 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

The recent split of generic_file_buffered_read() created some very
long function names which are hard to distinguish from each other.
Rename as follows:

generic_file_buffered_read_readpage -> filemap_read_page
generic_file_buffered_read_pagenotuptodate -> filemap_update_page
generic_file_buffered_read_no_cached_page -> filemap_create_page
generic_file_buffered_read_get_pages -> filemap_get_pages

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 44 +++++++++++++++-----------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index a68516ddeddc..23e3781b3aef 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2176,11 +2176,8 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
 		return lock_page_killable(page);
 }
 
-static struct page *
-generic_file_buffered_read_readpage(struct kiocb *iocb,
-				    struct file *filp,
-				    struct address_space *mapping,
-				    struct page *page)
+static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
+		struct address_space *mapping, struct page *page)
 {
 	struct file_ra_state *ra = &filp->f_ra;
 	int error;
@@ -2231,12 +2228,9 @@ generic_file_buffered_read_readpage(struct kiocb *iocb,
 	return page;
 }
 
-static struct page *
-generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
-					   struct file *filp,
-					   struct iov_iter *iter,
-					   struct page *page,
-					   loff_t pos, loff_t count)
+static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
+		struct iov_iter *iter, struct page *page, loff_t pos,
+		loff_t count)
 {
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
@@ -2299,12 +2293,11 @@ generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
 		return page;
 	}
 
-	return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+	return filemap_read_page(iocb, filp, mapping, page);
 }
 
-static struct page *
-generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
-					  struct iov_iter *iter)
+static struct page *filemap_create_page(struct kiocb *iocb,
+		struct iov_iter *iter)
 {
 	struct file *filp = iocb->ki_filp;
 	struct address_space *mapping = filp->f_mapping;
@@ -2315,10 +2308,6 @@ generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
 	if (iocb->ki_flags & IOCB_NOIO)
 		return ERR_PTR(-EAGAIN);
 
-	/*
-	 * Ok, it wasn't cached, so we need to create a new
-	 * page..
-	 */
 	page = page_cache_alloc(mapping);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
@@ -2330,13 +2319,11 @@ generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
 		return error != -EEXIST ? ERR_PTR(error) : NULL;
 	}
 
-	return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+	return filemap_read_page(iocb, filp, mapping, page);
 }
 
-static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
-						struct iov_iter *iter,
-						struct page **pages,
-						unsigned int nr)
+static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
+		struct page **pages, unsigned int nr)
 {
 	struct file *filp = iocb->ki_filp;
 	struct address_space *mapping = filp->f_mapping;
@@ -2363,7 +2350,7 @@ static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
 	if (nr_got)
 		goto got_pages;
 
-	pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
+	pages[0] = filemap_create_page(iocb, iter);
 	err = PTR_ERR_OR_ZERO(pages[0]);
 	if (!IS_ERR_OR_NULL(pages[0]))
 		nr_got = 1;
@@ -2397,8 +2384,8 @@ static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
 				break;
 			}
 
-			page = generic_file_buffered_read_pagenotuptodate(iocb,
-					filp, iter, page, pg_pos, pg_count);
+			page = filemap_update_page(iocb, filp, iter, page,
+					pg_pos, pg_count);
 			if (IS_ERR_OR_NULL(page)) {
 				for (j = i + 1; j < nr_got; j++)
 					put_page(pages[j]);
@@ -2474,8 +2461,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 			iocb->ki_flags |= IOCB_NOWAIT;
 
 		i = 0;
-		pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
-							     pages, nr_pages);
+		pg_nr = filemap_get_pages(iocb, iter, pages, nr_pages);
 		if (pg_nr < 0) {
 			error = pg_nr;
 			break;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
  2020-11-02 18:42 ` [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
@ 2020-11-02 18:42 ` Matthew Wilcox (Oracle)
  2020-11-02 18:55   ` Kent Overstreet
  2020-11-03  7:28   ` Christoph Hellwig
  2020-11-02 18:42 ` [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
                   ` (15 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:42 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

Add mapping_get_read_thps() which returns the THPs which represent a
contiguous array of bytes in the file.  It also stops when encountering
a page marked as Readahead or !Uptodate (but does return that page)
so it can be handled appropriately by filemap_get_pages().  That lets us
remove the loop in filemap_get_pages() and check only the last page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 96 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 70 insertions(+), 26 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 23e3781b3aef..d9636ccf87ff 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2176,6 +2176,48 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
 		return lock_page_killable(page);
 }
 
+static unsigned mapping_get_read_thps(struct address_space *mapping,
+		pgoff_t index, unsigned int nr_pages, struct page **pages)
+{
+	XA_STATE(xas, &mapping->i_pages, index);
+	struct page *head;
+	unsigned int ret = 0;
+
+	if (unlikely(!nr_pages))
+		return 0;
+
+	rcu_read_lock();
+	for (head = xas_load(&xas); head; head = xas_next(&xas)) {
+		if (xas_retry(&xas, head))
+			continue;
+		if (xa_is_value(head))
+			break;
+		if (!page_cache_get_speculative(head))
+			goto retry;
+
+		/* Has the page moved or been split? */
+		if (unlikely(head != xas_reload(&xas)))
+			goto put_page;
+
+		pages[ret++] = head;
+		if (ret == nr_pages)
+			break;
+		if (!PageUptodate(head))
+			break;
+		if (PageReadahead(head))
+			break;
+		xas.xa_index = head->index + thp_nr_pages(head) - 1;
+		xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
 static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
 		struct address_space *mapping, struct page *page)
 {
@@ -2330,14 +2372,14 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 	struct file_ra_state *ra = &filp->f_ra;
 	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
 	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-	int i, j, nr_got, err = 0;
+	int nr_got, err = 0;
 
 	nr = min_t(unsigned long, last_index - index, nr);
 find_page:
 	if (fatal_signal_pending(current))
 		return -EINTR;
 
-	nr_got = find_get_pages_contig(mapping, index, nr, pages);
+	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
 	if (nr_got)
 		goto got_pages;
 
@@ -2346,7 +2388,7 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 
 	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
 
-	nr_got = find_get_pages_contig(mapping, index, nr, pages);
+	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
 	if (nr_got)
 		goto got_pages;
 
@@ -2355,20 +2397,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 	if (!IS_ERR_OR_NULL(pages[0]))
 		nr_got = 1;
 got_pages:
-	for (i = 0; i < nr_got; i++) {
-		struct page *page = pages[i];
-		pgoff_t pg_index = index + i;
+	if (nr_got > 0) {
+		struct page *page = pages[nr_got - 1];
+		pgoff_t pg_index = page->index;
 		loff_t pg_pos = max(iocb->ki_pos,
 				    (loff_t) pg_index << PAGE_SHIFT);
 		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
 
 		if (PageReadahead(page)) {
 			if (iocb->ki_flags & IOCB_NOIO) {
-				for (j = i; j < nr_got; j++)
-					put_page(pages[j]);
-				nr_got = i;
+				put_page(page);
+				nr_got--;
 				err = -EAGAIN;
-				break;
+				goto err;
 			}
 			page_cache_async_readahead(mapping, ra, filp, page,
 					pg_index, last_index - pg_index);
@@ -2376,26 +2417,23 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 
 		if (!PageUptodate(page)) {
 			if ((iocb->ki_flags & IOCB_NOWAIT) ||
-			    ((iocb->ki_flags & IOCB_WAITQ) && i)) {
-				for (j = i; j < nr_got; j++)
-					put_page(pages[j]);
-				nr_got = i;
+			    ((iocb->ki_flags & IOCB_WAITQ) && nr_got > 1)) {
+				put_page(page);
+				nr_got--;
 				err = -EAGAIN;
-				break;
+				goto err;
 			}
 
 			page = filemap_update_page(iocb, filp, iter, page,
 					pg_pos, pg_count);
 			if (IS_ERR_OR_NULL(page)) {
-				for (j = i + 1; j < nr_got; j++)
-					put_page(pages[j]);
-				nr_got = i;
+				nr_got--;
 				err = PTR_ERR_OR_ZERO(page);
-				break;
 			}
 		}
 	}
 
+err:
 	if (likely(nr_got))
 		return nr_got;
 	if (err)
@@ -2502,20 +2540,26 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 			mark_page_accessed(pages[i]);
 
 		for (i = 0; i < pg_nr; i++) {
-			unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
-			unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-						   PAGE_SIZE - offset);
-			unsigned int copied;
+			struct page *page = pages[i];
+			size_t page_size = thp_size(page);
+			size_t offset = iocb->ki_pos & (page_size - 1);
+			size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+					     page_size - offset);
+			size_t copied;
 
 			/*
 			 * If users can be writing to this page using arbitrary
 			 * virtual addresses, take care about potential aliasing
 			 * before reading the page on the kernel side.
 			 */
-			if (writably_mapped)
-				flush_dcache_page(pages[i]);
+			if (writably_mapped) {
+				int j;
+
+				for (j = 0; j < thp_nr_pages(page); j++)
+					flush_dcache_page(page + j);
+			}
 
-			copied = copy_page_to_iter(pages[i], offset, bytes, iter);
+			copied = copy_page_to_iter(page, offset, bytes, iter);
 
 			written += copied;
 			iocb->ki_pos += copied;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
  2020-11-02 18:42 ` [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
  2020-11-02 18:42 ` [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read Matthew Wilcox (Oracle)
@ 2020-11-02 18:42 ` Matthew Wilcox (Oracle)
  2020-11-02 18:56   ` Kent Overstreet
  2020-11-03  7:28   ` Christoph Hellwig
  2020-11-02 18:42 ` [PATCH 04/17] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
                   ` (14 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:42 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

This is prep work for the next patch, but I think at least one of the
current callers would prefer a killable sleep to an uninterruptible one.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagemap.h | 3 +--
 mm/filemap.c            | 7 +++++--
 mm/huge_memory.c        | 4 ++--
 mm/migrate.c            | 4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 00288ed24698..71b36b275e4d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -681,8 +681,7 @@ static inline int wait_on_page_locked_killable(struct page *page)
 	return wait_on_page_bit_killable(compound_head(page), PG_locked);
 }
 
-extern void put_and_wait_on_page_locked(struct page *page);
-
+int put_and_wait_on_page_locked(struct page *page, int state);
 void wait_on_page_writeback(struct page *page);
 extern void end_page_writeback(struct page *page);
 void wait_for_stable_page(struct page *page);
diff --git a/mm/filemap.c b/mm/filemap.c
index d9636ccf87ff..709774a60379 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1358,20 +1358,23 @@ static int wait_on_page_locked_async(struct page *page,
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
+ * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
  *
  * The caller should hold a reference on @page.  They expect the page to
  * become unlocked relatively soon, but do not wish to hold up migration
  * (for example) by holding the reference while waiting for the page to
  * come unlocked.  After this function returns, the caller should not
  * dereference @page.
+ *
+ * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal.
  */
-void put_and_wait_on_page_locked(struct page *page)
+int put_and_wait_on_page_locked(struct page *page, int state)
 {
 	wait_queue_head_t *q;
 
 	page = compound_head(page);
 	q = page_waitqueue(page);
-	wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
+	return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
 }
 
 /**
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 616102ba3682..ac114d265950 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1432,7 +1432,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 		if (!get_page_unless_zero(page))
 			goto out_unlock;
 		spin_unlock(vmf->ptl);
-		put_and_wait_on_page_locked(page);
+		put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 		goto out;
 	}
 
@@ -1468,7 +1468,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 		if (!get_page_unless_zero(page))
 			goto out_unlock;
 		spin_unlock(vmf->ptl);
-		put_and_wait_on_page_locked(page);
+		put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 		goto out;
 	}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 39663dfbc273..a50bbb0e029b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -335,7 +335,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
 	if (!get_page_unless_zero(page))
 		goto out;
 	pte_unmap_unlock(ptep, ptl);
-	put_and_wait_on_page_locked(page);
+	put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 	return;
 out:
 	pte_unmap_unlock(ptep, ptl);
@@ -369,7 +369,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
 	if (!get_page_unless_zero(page))
 		goto unlock;
 	spin_unlock(ptl);
-	put_and_wait_on_page_locked(page);
+	put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 	return;
 unlock:
 	spin_unlock(ptl);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 04/17] mm/filemap: Support readpage splitting a page
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2020-11-02 18:42 ` [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
@ 2020-11-02 18:42 ` Matthew Wilcox (Oracle)
  2020-11-02 19:00   ` Kent Overstreet
                     ` (2 more replies)
  2020-11-02 18:43 ` [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
                   ` (13 subsequent siblings)
  17 siblings, 3 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:42 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

For page splitting to succeed, the thread asking to split the
page has to be the only one with a reference to the page.  Calling
wait_on_page_locked() while holding a reference to the page will
effectively prevent this from happening with sufficient threads waiting
on the same page.  Use put_and_wait_on_page_locked() to sleep without
holding a reference to the page, then retry the page lookup after the
page is unlocked.

Since we now get the page lock a little earlier in filemap_update_page(),
we can eliminate a number of duplicate checks.  The original intent
(commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
for IO to complete during a read")) behind getting the page lock later
was to avoid re-locking the page after it has been brought uptodate by
another thread.  We still avoid that because we go through the normal
lookup path again after the winning thread has brought the page uptodate.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 76 ++++++++++++++++------------------------------------
 1 file changed, 23 insertions(+), 53 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 709774a60379..550e023abb52 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1347,14 +1347,6 @@ static int __wait_on_page_locked_async(struct page *page,
 	return ret;
 }
 
-static int wait_on_page_locked_async(struct page *page,
-				     struct wait_page_queue *wait)
-{
-	if (!PageLocked(page))
-		return 0;
-	return __wait_on_page_locked_async(compound_head(page), wait, false);
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -2281,64 +2273,42 @@ static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
 	struct inode *inode = mapping->host;
 	int error;
 
-	/*
-	 * See comment in do_read_cache_page on why
-	 * wait_on_page_locked is used to avoid unnecessarily
-	 * serialisations and why it's safe.
-	 */
 	if (iocb->ki_flags & IOCB_WAITQ) {
-		error = wait_on_page_locked_async(page,
-						iocb->ki_waitq);
+		error = lock_page_async(page, iocb->ki_waitq);
+		if (error) {
+			put_page(page);
+			return ERR_PTR(error);
+		}
 	} else {
-		error = wait_on_page_locked_killable(page);
-	}
-	if (unlikely(error)) {
-		put_page(page);
-		return ERR_PTR(error);
+		if (!trylock_page(page)) {
+			put_and_wait_on_page_locked(page, TASK_KILLABLE);
+			return NULL;
+		}
 	}
-	if (PageUptodate(page))
-		return page;
 
+	if (!page->mapping)
+		goto truncated;
+	if (PageUptodate(page))
+		goto uptodate;
 	if (inode->i_blkbits == PAGE_SHIFT ||
 			!mapping->a_ops->is_partially_uptodate)
-		goto page_not_up_to_date;
+		goto readpage;
 	/* pipes can't handle partially uptodate pages */
 	if (unlikely(iov_iter_is_pipe(iter)))
-		goto page_not_up_to_date;
-	if (!trylock_page(page))
-		goto page_not_up_to_date;
-	/* Did it get truncated before we got the lock? */
-	if (!page->mapping)
-		goto page_not_up_to_date_locked;
+		goto readpage;
 	if (!mapping->a_ops->is_partially_uptodate(page,
-				pos & ~PAGE_MASK, count))
-		goto page_not_up_to_date_locked;
+				pos & (thp_size(page) - 1), count))
+		goto readpage;
+uptodate:
 	unlock_page(page);
 	return page;
 
-page_not_up_to_date:
-	/* Get exclusive access to the page ... */
-	error = lock_page_for_iocb(iocb, page);
-	if (unlikely(error)) {
-		put_page(page);
-		return ERR_PTR(error);
-	}
-
-page_not_up_to_date_locked:
-	/* Did it get truncated before we got the lock? */
-	if (!page->mapping) {
-		unlock_page(page);
-		put_page(page);
-		return NULL;
-	}
-
-	/* Did somebody else fill it already? */
-	if (PageUptodate(page)) {
-		unlock_page(page);
-		return page;
-	}
-
+readpage:
 	return filemap_read_page(iocb, filp, mapping, page);
+truncated:
+	unlock_page(page);
+	put_page(page);
+	return NULL;
 }
 
 static struct page *filemap_create_page(struct kiocb *iocb,
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2020-11-02 18:42 ` [PATCH 04/17] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:00   ` Kent Overstreet
  2020-11-03  7:31   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
                   ` (12 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

The previous patch removed wait_on_page_locked_async(), so inline
__wait_on_page_locked_async into __lock_page_async().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 53 ++++++++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 550e023abb52..7a4101ceb106 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1317,36 +1317,6 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit_killable);
 
-static int __wait_on_page_locked_async(struct page *page,
-				       struct wait_page_queue *wait, bool set)
-{
-	struct wait_queue_head *q = page_waitqueue(page);
-	int ret = 0;
-
-	wait->page = page;
-	wait->bit_nr = PG_locked;
-
-	spin_lock_irq(&q->lock);
-	__add_wait_queue_entry_tail(q, &wait->wait);
-	SetPageWaiters(page);
-	if (set)
-		ret = !trylock_page(page);
-	else
-		ret = PageLocked(page);
-	/*
-	 * If we were succesful now, we know we're still on the
-	 * waitqueue as we're still under the lock. This means it's
-	 * safe to remove and return success, we know the callback
-	 * isn't going to trigger.
-	 */
-	if (!ret)
-		__remove_wait_queue(q, &wait->wait);
-	else
-		ret = -EIOCBQUEUED;
-	spin_unlock_irq(&q->lock);
-	return ret;
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -1514,7 +1484,28 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
 
 int __lock_page_async(struct page *page, struct wait_page_queue *wait)
 {
-	return __wait_on_page_locked_async(page, wait, true);
+	struct wait_queue_head *q = page_waitqueue(page);
+	int ret = 0;
+
+	wait->page = page;
+	wait->bit_nr = PG_locked;
+
+	spin_lock_irq(&q->lock);
+	__add_wait_queue_entry_tail(q, &wait->wait);
+	SetPageWaiters(page);
+	ret = !trylock_page(page);
+	/*
+	 * If we were succesful now, we know we're still on the
+	 * waitqueue as we're still under the lock. This means it's
+	 * safe to remove and return success, we know the callback
+	 * isn't going to trigger.
+	 */
+	if (!ret)
+		__remove_wait_queue(q, &wait->wait);
+	else
+		ret = -EIOCBQUEUED;
+	spin_unlock_irq(&q->lock);
+	return ret;
 }
 
 /*
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (4 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:17   ` Kent Overstreet
                     ` (2 more replies)
  2020-11-02 18:43 ` [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions Matthew Wilcox (Oracle)
                   ` (11 subsequent siblings)
  17 siblings, 3 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

The readpage operation can block in many (most?) filesystems, so we
should punt to a work queue instead of calling it.  This was the last
caller of lock_page_async(), so remove it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 7a4101ceb106..5bafd2dc830c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2152,16 +2152,6 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
 	ra->ra_pages /= 4;
 }
 
-static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
-{
-	if (iocb->ki_flags & IOCB_WAITQ)
-		return lock_page_async(page, iocb->ki_waitq);
-	else if (iocb->ki_flags & IOCB_NOWAIT)
-		return trylock_page(page) ? 0 : -EAGAIN;
-	else
-		return lock_page_killable(page);
-}
-
 static unsigned mapping_get_read_thps(struct address_space *mapping,
 		pgoff_t index, unsigned int nr_pages, struct page **pages)
 {
@@ -2210,7 +2200,7 @@ static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
 	struct file_ra_state *ra = &filp->f_ra;
 	int error;
 
-	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
+	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
 		unlock_page(page);
 		put_page(page);
 		return ERR_PTR(-EAGAIN);
@@ -2231,7 +2221,7 @@ static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
 	}
 
 	if (!PageUptodate(page)) {
-		error = lock_page_for_iocb(iocb, page);
+		error = lock_page_killable(page);
 		if (unlikely(error)) {
 			put_page(page);
 			return ERR_PTR(error);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (5 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:37   ` Kent Overstreet
  2020-11-03  7:34   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 08/17] mm/filemap: Change filemap_create_page arguments Matthew Wilcox (Oracle)
                   ` (10 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

Make this function more generic by passing the file instead of the iocb.
Check in the callers whether we should call readpage or not.  Also make
it return an errno / 0 / AOP_TRUNCATED_PAGE, and make calling put_page()
the caller's responsibility.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 89 +++++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 47 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 5bafd2dc830c..171da5c592fa 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2194,56 +2194,38 @@ static unsigned mapping_get_read_thps(struct address_space *mapping,
 	return ret;
 }
 
-static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
-		struct address_space *mapping, struct page *page)
+static int filemap_read_page(struct file *file, struct address_space *mapping,
+		struct page *page)
 {
-	struct file_ra_state *ra = &filp->f_ra;
 	int error;
 
-	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
-		unlock_page(page);
-		put_page(page);
-		return ERR_PTR(-EAGAIN);
-	}
-
 	/*
-	 * A previous I/O error may have been due to temporary
-	 * failures, eg. multipath errors.
-	 * PG_error will be set again if readpage fails.
+	 * A previous I/O error may have been due to temporary failures,
+	 * eg. multipath errors.  PG_error will be set again if readpage
+	 * fails.
 	 */
 	ClearPageError(page);
 	/* Start the actual read. The read will unlock the page. */
-	error = mapping->a_ops->readpage(filp, page);
-
-	if (unlikely(error)) {
-		put_page(page);
-		return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
-	}
+	error = mapping->a_ops->readpage(file, page);
+	if (error)
+		return error;
+	if (PageUptodate(page))
+		return 0;
 
+	error = lock_page_killable(page);
+	if (error)
+		return error;
 	if (!PageUptodate(page)) {
-		error = lock_page_killable(page);
-		if (unlikely(error)) {
-			put_page(page);
-			return ERR_PTR(error);
-		}
-		if (!PageUptodate(page)) {
-			if (page->mapping == NULL) {
-				/*
-				 * invalidate_mapping_pages got it
-				 */
-				unlock_page(page);
-				put_page(page);
-				return NULL;
-			}
-			unlock_page(page);
-			shrink_readahead_size_eio(ra);
-			put_page(page);
-			return ERR_PTR(-EIO);
+		if (page->mapping == NULL) {
+			/* page truncated */
+			error = AOP_TRUNCATED_PAGE;
+		} else {
+			shrink_readahead_size_eio(&file->f_ra);
+			error = -EIO;
 		}
-		unlock_page(page);
 	}
-
-	return page;
+	unlock_page(page);
+	return error;
 }
 
 static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
@@ -2285,7 +2267,18 @@ static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
 	return page;
 
 readpage:
-	return filemap_read_page(iocb, filp, mapping, page);
+	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
+		unlock_page(page);
+		put_page(page);
+		return ERR_PTR(-EAGAIN);
+	}
+	error = filemap_read_page(iocb->ki_filp, mapping, page);
+	if (!error)
+		return page;
+	put_page(page);
+	if (error == AOP_TRUNCATED_PAGE)
+		return NULL;
+	return ERR_PTR(error);
 truncated:
 	unlock_page(page);
 	put_page(page);
@@ -2301,7 +2294,7 @@ static struct page *filemap_create_page(struct kiocb *iocb,
 	struct page *page;
 	int error;
 
-	if (iocb->ki_flags & IOCB_NOIO)
+	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
 		return ERR_PTR(-EAGAIN);
 
 	page = page_cache_alloc(mapping);
@@ -2310,12 +2303,14 @@ static struct page *filemap_create_page(struct kiocb *iocb,
 
 	error = add_to_page_cache_lru(page, mapping, index,
 				      mapping_gfp_constraint(mapping, GFP_KERNEL));
-	if (error) {
-		put_page(page);
-		return error != -EEXIST ? ERR_PTR(error) : NULL;
-	}
-
-	return filemap_read_page(iocb, filp, mapping, page);
+	if (!error)
+		error = filemap_read_page(iocb->ki_filp, mapping, page);
+	if (!error)
+		return page;
+	put_page(page);
+	if (error == -EEXIST || error == AOP_TRUNCATED_PAGE)
+		return NULL;
+	return ERR_PTR(error);
 }
 
 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 08/17] mm/filemap: Change filemap_create_page arguments
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (6 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:38   ` Kent Overstreet
  2020-11-03  7:35   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno Matthew Wilcox (Oracle)
                   ` (9 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

filemap_create_page() doesn't need the iocb or the iter.  It just needs
the file and the index.  Move the iocb flag checks to the caller.  We can
skip checking GFP_NOIO as that's checked a few lines earlier.  There's no
need to fall through to filemap_update_page() -- if filemap_create_page
couldn't update it, filemap_update_page will not be able to.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 171da5c592fa..5527b239771c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2285,26 +2285,20 @@ static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
 	return NULL;
 }
 
-static struct page *filemap_create_page(struct kiocb *iocb,
-		struct iov_iter *iter)
+static struct page *filemap_create_page(struct file *file,
+		struct address_space *mapping, pgoff_t index)
 {
-	struct file *filp = iocb->ki_filp;
-	struct address_space *mapping = filp->f_mapping;
-	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
 	struct page *page;
 	int error;
 
-	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
-		return ERR_PTR(-EAGAIN);
-
 	page = page_cache_alloc(mapping);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
 	error = add_to_page_cache_lru(page, mapping, index,
-				      mapping_gfp_constraint(mapping, GFP_KERNEL));
+			mapping_gfp_constraint(mapping, GFP_KERNEL));
 	if (!error)
-		error = filemap_read_page(iocb->ki_filp, mapping, page);
+		error = filemap_read_page(file, mapping, page);
 	if (!error)
 		return page;
 	put_page(page);
@@ -2338,13 +2332,17 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
 
 	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
-	if (nr_got)
-		goto got_pages;
-
-	pages[0] = filemap_create_page(iocb, iter);
-	err = PTR_ERR_OR_ZERO(pages[0]);
-	if (!IS_ERR_OR_NULL(pages[0]))
-		nr_got = 1;
+	if (!nr_got) {
+		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
+			return -EAGAIN;
+		pages[0] = filemap_create_page(filp, mapping,
+				iocb->ki_pos >> PAGE_SHIFT);
+		if (!pages[0])
+			goto find_page;
+		if (IS_ERR(pages[0]))
+			return PTR_ERR(pages[0]);
+		return 1;
+	}
 got_pages:
 	if (nr_got > 0) {
 		struct page *page = pages[nr_got - 1];
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (7 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 08/17] mm/filemap: Change filemap_create_page arguments Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:39   ` Kent Overstreet
  2020-11-03  7:36   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page Matthew Wilcox (Oracle)
                   ` (8 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

Use AOP_TRUNCATED_PAGE to indicate that no error occurred, but the
page we looked up is no longer valid.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 43 +++++++++++++++++++------------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 5527b239771c..ebb14fdec0cc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2228,24 +2228,21 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
 	return error;
 }
 
-static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
-		struct iov_iter *iter, struct page *page, loff_t pos,
-		loff_t count)
+static int filemap_update_page(struct kiocb *iocb,
+		struct address_space *mapping, struct iov_iter *iter,
+		struct page *page, loff_t pos, loff_t count)
 {
-	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
 	int error;
 
 	if (iocb->ki_flags & IOCB_WAITQ) {
 		error = lock_page_async(page, iocb->ki_waitq);
-		if (error) {
-			put_page(page);
-			return ERR_PTR(error);
-		}
+		if (error)
+			goto error;
 	} else {
 		if (!trylock_page(page)) {
 			put_and_wait_on_page_locked(page, TASK_KILLABLE);
-			return NULL;
+			return AOP_TRUNCATED_PAGE;
 		}
 	}
 
@@ -2264,25 +2261,24 @@ static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
 		goto readpage;
 uptodate:
 	unlock_page(page);
-	return page;
+	return 0;
 
 readpage:
 	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
 		unlock_page(page);
-		put_page(page);
-		return ERR_PTR(-EAGAIN);
+		error = -EAGAIN;
+	} else {
+		error = filemap_read_page(iocb->ki_filp, mapping, page);
+		if (!error)
+			return 0;
 	}
-	error = filemap_read_page(iocb->ki_filp, mapping, page);
-	if (!error)
-		return page;
+error:
 	put_page(page);
-	if (error == AOP_TRUNCATED_PAGE)
-		return NULL;
-	return ERR_PTR(error);
+	return error;
 truncated:
 	unlock_page(page);
 	put_page(page);
-	return NULL;
+	return AOP_TRUNCATED_PAGE;
 }
 
 static struct page *filemap_create_page(struct file *file,
@@ -2371,20 +2367,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 				goto err;
 			}
 
-			page = filemap_update_page(iocb, filp, iter, page,
+			err = filemap_update_page(iocb, mapping, iter, page,
 					pg_pos, pg_count);
-			if (IS_ERR_OR_NULL(page)) {
+			if (err)
 				nr_got--;
-				err = PTR_ERR_OR_ZERO(page);
-			}
 		}
 	}
 
 err:
 	if (likely(nr_got))
 		return nr_got;
-	if (err)
+	if (err < 0)
 		return err;
+	err = 0;
 	/*
 	 * No pages and no error means we raced and should retry:
 	 */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (8 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:45   ` Kent Overstreet
  2020-11-03  7:41   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 11/17] mm/filemap: Add filemap_range_uptodate Matthew Wilcox (Oracle)
                   ` (7 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

We don't need to give up when a special request sees a !Uptodate page.
We may be able to satisfy the read from a partially-uptodate page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index ebb14fdec0cc..41b90243f4ee 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2230,17 +2230,21 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
 
 static int filemap_update_page(struct kiocb *iocb,
 		struct address_space *mapping, struct iov_iter *iter,
-		struct page *page, loff_t pos, loff_t count)
+		struct page *page, loff_t pos, loff_t count, bool first)
 {
 	struct inode *inode = mapping->host;
-	int error;
+	int error = -EAGAIN;
 
-	if (iocb->ki_flags & IOCB_WAITQ) {
-		error = lock_page_async(page, iocb->ki_waitq);
-		if (error)
+	if (!trylock_page(page)) {
+		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
 			goto error;
-	} else {
-		if (!trylock_page(page)) {
+		if (iocb->ki_flags & IOCB_WAITQ) {
+			if (!first)
+				goto error;
+			error = __lock_page_async(page, iocb->ki_waitq);
+			if (error)
+				goto error;
+		} else {
 			put_and_wait_on_page_locked(page, TASK_KILLABLE);
 			return AOP_TRUNCATED_PAGE;
 		}
@@ -2359,16 +2363,8 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 		}
 
 		if (!PageUptodate(page)) {
-			if ((iocb->ki_flags & IOCB_NOWAIT) ||
-			    ((iocb->ki_flags & IOCB_WAITQ) && nr_got > 1)) {
-				put_page(page);
-				nr_got--;
-				err = -EAGAIN;
-				goto err;
-			}
-
 			err = filemap_update_page(iocb, mapping, iter, page,
-					pg_pos, pg_count);
+					pg_pos, pg_count, nr_got == 1);
 			if (err)
 				nr_got--;
 		}
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (9 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:50   ` Kent Overstreet
  2020-11-03  7:49   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 12/17] mm/filemap: Split filemap_readahead out of filemap_get_pages Matthew Wilcox (Oracle)
                   ` (6 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

Move the complicated condition and the calculations out of
filemap_update_page() into its own function.
---
 mm/filemap.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 41b90243f4ee..81b569d818a3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2228,11 +2228,39 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
 	return error;
 }
 
+static bool filemap_range_uptodate(struct kiocb *iocb,
+		struct address_space *mapping, struct iov_iter *iter,
+		struct page *page)
+{
+	loff_t pos;
+	int count;
+
+	if (PageUptodate(page))
+		return true;
+	/* pipes can't handle partially uptodate pages */
+	if (iov_iter_is_pipe(iter))
+		return false;
+	if (!mapping->a_ops->is_partially_uptodate)
+		return false;
+	if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page)))
+		return false;
+
+	pos = (loff_t) page->index << PAGE_SHIFT;
+	if (pos > iocb->ki_pos) {
+		count = iocb->ki_pos + iter->count - pos;
+		pos = 0;
+	} else {
+		count = iter->count;
+		pos = iocb->ki_pos & (thp_size(page) - 1);
+	}
+
+	return mapping->a_ops->is_partially_uptodate(page, pos, count);
+}
+
 static int filemap_update_page(struct kiocb *iocb,
 		struct address_space *mapping, struct iov_iter *iter,
 		struct page *page, loff_t pos, loff_t count, bool first)
 {
-	struct inode *inode = mapping->host;
 	int error = -EAGAIN;
 
 	if (!trylock_page(page)) {
@@ -2252,22 +2280,11 @@ static int filemap_update_page(struct kiocb *iocb,
 
 	if (!page->mapping)
 		goto truncated;
-	if (PageUptodate(page))
-		goto uptodate;
-	if (inode->i_blkbits == PAGE_SHIFT ||
-			!mapping->a_ops->is_partially_uptodate)
-		goto readpage;
-	/* pipes can't handle partially uptodate pages */
-	if (unlikely(iov_iter_is_pipe(iter)))
-		goto readpage;
-	if (!mapping->a_ops->is_partially_uptodate(page,
-				pos & (thp_size(page) - 1), count))
-		goto readpage;
-uptodate:
-	unlock_page(page);
-	return 0;
+	if (filemap_range_uptodate(iocb, mapping, iter, page)) {
+		unlock_page(page);
+		return 0;
+	}
 
-readpage:
 	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
 		unlock_page(page);
 		error = -EAGAIN;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 12/17] mm/filemap: Split filemap_readahead out of filemap_get_pages
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (10 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 11/17] mm/filemap: Add filemap_range_uptodate Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:52   ` Kent Overstreet
  2020-11-02 18:43 ` [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page() Matthew Wilcox (Oracle)
                   ` (5 subsequent siblings)
  17 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

This simplifies the error handling.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 81b569d818a3..7c6380a3a871 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2324,6 +2324,17 @@ static struct page *filemap_create_page(struct file *file,
 	return ERR_PTR(error);
 }
 
+static int filemap_readahead(struct kiocb *iocb, struct file *file,
+		struct address_space *mapping, struct page *page,
+		pgoff_t last_index)
+{
+	if (iocb->ki_flags & IOCB_NOIO)
+		return -EAGAIN;
+	page_cache_async_readahead(mapping, ra, filp, page,
+			pg_index, last_index - pg_index);
+	return 0;
+}
+
 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 		struct page **pages, unsigned int nr)
 {
@@ -2368,23 +2379,14 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 				    (loff_t) pg_index << PAGE_SHIFT);
 		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
 
-		if (PageReadahead(page)) {
-			if (iocb->ki_flags & IOCB_NOIO) {
-				put_page(page);
-				nr_got--;
-				err = -EAGAIN;
-				goto err;
-			}
-			page_cache_async_readahead(mapping, ra, filp, page,
-					pg_index, last_index - pg_index);
-		}
-
-		if (!PageUptodate(page)) {
+		if (PageReadahead(page))
+			err = filemap_readahead(iocb, filp, mapping, page,
+					last_index);
+		if (!err && !PageUptodate(page))
 			err = filemap_update_page(iocb, mapping, iter, page,
 					pg_pos, pg_count, nr_got == 1);
-			if (err)
-				nr_got--;
-		}
+		if (err)
+			nr_got--;
 	}
 
 err:
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page()
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (11 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 12/17] mm/filemap: Split filemap_readahead out of filemap_get_pages Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 19:52   ` Kent Overstreet
  2020-11-03  7:50   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 14/17] mm/filemap: Restructure filemap_get_pages Matthew Wilcox (Oracle)
                   ` (4 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

The 'pos' and 'count' params are no longer used in filemap_update_page()

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 7c6380a3a871..0ae8305ccb97 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2259,7 +2259,7 @@ static bool filemap_range_uptodate(struct kiocb *iocb,
 
 static int filemap_update_page(struct kiocb *iocb,
 		struct address_space *mapping, struct iov_iter *iter,
-		struct page *page, loff_t pos, loff_t count, bool first)
+		struct page *page, bool first)
 {
 	int error = -EAGAIN;
 
@@ -2330,8 +2330,8 @@ static int filemap_readahead(struct kiocb *iocb, struct file *file,
 {
 	if (iocb->ki_flags & IOCB_NOIO)
 		return -EAGAIN;
-	page_cache_async_readahead(mapping, ra, filp, page,
-			pg_index, last_index - pg_index);
+	page_cache_async_readahead(mapping, &file->f_ra, file, page,
+			page->index, last_index - page->index);
 	return 0;
 }
 
@@ -2374,22 +2374,17 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 got_pages:
 	if (nr_got > 0) {
 		struct page *page = pages[nr_got - 1];
-		pgoff_t pg_index = page->index;
-		loff_t pg_pos = max(iocb->ki_pos,
-				    (loff_t) pg_index << PAGE_SHIFT);
-		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
 
 		if (PageReadahead(page))
 			err = filemap_readahead(iocb, filp, mapping, page,
 					last_index);
 		if (!err && !PageUptodate(page))
 			err = filemap_update_page(iocb, mapping, iter, page,
-					pg_pos, pg_count, nr_got == 1);
+					nr_got == 1);
 		if (err)
 			nr_got--;
 	}
 
-err:
 	if (likely(nr_got))
 		return nr_got;
 	if (err < 0)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 14/17] mm/filemap: Restructure filemap_get_pages
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (12 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page() Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 20:05   ` Kent Overstreet
  2020-11-03  7:57   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

Avoid a goto, and by the time we get to calling filemap_update_page(),
we definitely have at least one page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 0ae8305ccb97..f16b1eb03bca 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2343,6 +2343,7 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 	struct file_ra_state *ra = &filp->f_ra;
 	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
 	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+	struct page *page;
 	int nr_got, err = 0;
 
 	nr = min_t(unsigned long, last_index - index, nr);
@@ -2351,15 +2352,13 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 		return -EINTR;
 
 	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
-	if (nr_got)
-		goto got_pages;
-
-	if (iocb->ki_flags & IOCB_NOIO)
-		return -EAGAIN;
-
-	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
-
-	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
+	if (!nr_got) {
+		if (iocb->ki_flags & IOCB_NOIO)
+			return -EAGAIN;
+		page_cache_sync_readahead(mapping, ra, filp, index,
+				last_index - index);
+		nr_got = mapping_get_read_thps(mapping, index, nr, pages);
+	}
 	if (!nr_got) {
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
 			return -EAGAIN;
@@ -2371,20 +2370,16 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 			return PTR_ERR(pages[0]);
 		return 1;
 	}
-got_pages:
-	if (nr_got > 0) {
-		struct page *page = pages[nr_got - 1];
 
-		if (PageReadahead(page))
-			err = filemap_readahead(iocb, filp, mapping, page,
-					last_index);
-		if (!err && !PageUptodate(page))
-			err = filemap_update_page(iocb, mapping, iter, page,
-					nr_got == 1);
-		if (err)
-			nr_got--;
-	}
+	page = pages[nr_got - 1];
+	if (PageReadahead(page))
+		err = filemap_readahead(iocb, filp, mapping, page, last_index);
+	if (!err && !PageUptodate(page))
+		err = filemap_update_page(iocb, mapping, iter, page,
+				nr_got == 1);
 
+	if (err)
+		nr_got--;
 	if (likely(nr_got))
 		return nr_got;
 	if (err < 0)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (13 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 14/17] mm/filemap: Restructure filemap_get_pages Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 20:06   ` Kent Overstreet
  2020-11-03  8:00   ` Christoph Hellwig
  2020-11-02 18:43 ` [PATCH 16/17] mm/filemap: rename generic_file_buffered_read to filemap_read Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  17 siblings, 2 replies; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm; +Cc: Matthew Wilcox (Oracle), hch, kent.overstreet

We don't need to get the page lock again; we just need to wait for
the I/O to finish, so use wait_on_page_locked_killable() like the
other callers of ->readpage.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f16b1eb03bca..f2de97d51441 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2209,23 +2209,16 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
 	error = mapping->a_ops->readpage(file, page);
 	if (error)
 		return error;
-	if (PageUptodate(page))
-		return 0;
 
-	error = lock_page_killable(page);
+	error = wait_on_page_locked_killable(page);
 	if (error)
 		return error;
-	if (!PageUptodate(page)) {
-		if (page->mapping == NULL) {
-			/* page truncated */
-			error = AOP_TRUNCATED_PAGE;
-		} else {
-			shrink_readahead_size_eio(&file->f_ra);
-			error = -EIO;
-		}
-	}
-	unlock_page(page);
-	return error;
+	if (PageUptodate(page))
+		return 0;
+	if (!page->mapping)	/* page truncated */
+		return AOP_TRUNCATED_PAGE;
+	shrink_readahead_size_eio(&file->f_ra);
+	return -EIO;
 }
 
 static bool filemap_range_uptodate(struct kiocb *iocb,
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 16/17] mm/filemap: rename generic_file_buffered_read to filemap_read
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (14 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 20:06   ` Kent Overstreet
  2020-11-02 18:43 ` [PATCH 17/17] mm: simplify generic_file_read_iter Matthew Wilcox (Oracle)
  2020-11-02 20:14 ` [PATCH 00/17] Refactor generic_file_buffered_read Kent Overstreet
  17 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm
  Cc: Christoph Hellwig, kent.overstreet, Matthew Wilcox

From: Christoph Hellwig <hch@lst.de>

Rename generic_file_buffered_read to match the naming of filemap_fault,
also update the written parameter to a more descriptive name and
improve the kerneldoc comment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/btrfs/file.c    |  2 +-
 include/linux/fs.h |  4 ++--
 mm/filemap.c       | 35 ++++++++++++++++-------------------
 3 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 87355a38a654..1a4913e1fd12 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3633,7 +3633,7 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			return ret;
 	}
 
-	return generic_file_buffered_read(iocb, to, ret);
+	return filemap_read(iocb, to, ret);
 }
 
 const struct file_operations btrfs_file_operations = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ccc879ae845..413e327fa1c6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2946,8 +2946,8 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_write_check_limits(struct file *file, loff_t pos,
 		loff_t *count);
 extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
-extern ssize_t generic_file_buffered_read(struct kiocb *iocb,
-		struct iov_iter *to, ssize_t already_read);
+ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
+		ssize_t already_read);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index f2de97d51441..92bb308029c3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2385,23 +2385,20 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 }
 
 /**
- * generic_file_buffered_read - generic file read routine
- * @iocb:	the iocb to read
- * @iter:	data destination
- * @written:	already copied
+ * filemap_read - Read data from the page cache.
+ * @iocb: The iocb to read.
+ * @iter: Destination for the data.
+ * @already_read: Number of bytes already read by the caller.
  *
- * This is a generic file read routine, and uses the
- * mapping->a_ops->readpage() function for the actual low-level stuff.
+ * Copies data from the page cache.  If the data is not currently present,
+ * uses the readahead and readpage address_space operations to fetch it.
  *
- * This is really ugly. But the goto's actually try to clarify some
- * of the logic when it comes to error handling etc.
- *
- * Return:
- * * total number of bytes copied, including those the were already @written
- * * negative error code if nothing was copied
+ * Return: Total number of bytes copied, including those already read by
+ * the caller.  If an error happens before any bytes are copied, returns
+ * a negative error number.
  */
-ssize_t generic_file_buffered_read(struct kiocb *iocb,
-		struct iov_iter *iter, ssize_t written)
+ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
+		ssize_t already_read)
 {
 	struct file *filp = iocb->ki_filp;
 	struct file_ra_state *ra = &filp->f_ra;
@@ -2435,7 +2432,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 		 * can no longer safely return -EIOCBQUEUED. Hence mark
 		 * an async read NOWAIT at that point.
 		 */
-		if ((iocb->ki_flags & IOCB_WAITQ) && written)
+		if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
 			iocb->ki_flags |= IOCB_NOWAIT;
 
 		i = 0;
@@ -2501,7 +2498,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 
 			copied = copy_page_to_iter(page, offset, bytes, iter);
 
-			written += copied;
+			already_read += copied;
 			iocb->ki_pos += copied;
 			ra->prev_pos = iocb->ki_pos;
 
@@ -2520,9 +2517,9 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 	if (pages != pages_onstack)
 		kfree(pages);
 
-	return written ? written : error;
+	return already_read ? already_read : error;
 }
-EXPORT_SYMBOL_GPL(generic_file_buffered_read);
+EXPORT_SYMBOL_GPL(filemap_read);
 
 /**
  * generic_file_read_iter - generic filesystem read routine
@@ -2596,7 +2593,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 			goto out;
 	}
 
-	retval = generic_file_buffered_read(iocb, iter, retval);
+	retval = filemap_read(iocb, iter, retval);
 out:
 	return retval;
 }
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 17/17] mm: simplify generic_file_read_iter
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (15 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 16/17] mm/filemap: rename generic_file_buffered_read to filemap_read Matthew Wilcox (Oracle)
@ 2020-11-02 18:43 ` Matthew Wilcox (Oracle)
  2020-11-02 20:07   ` Kent Overstreet
  2020-11-02 20:14 ` [PATCH 00/17] Refactor generic_file_buffered_read Kent Overstreet
  17 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-11-02 18:43 UTC (permalink / raw)
  To: linux-fsdevel, linux-mm
  Cc: Christoph Hellwig, kent.overstreet, Matthew Wilcox

From: Christoph Hellwig <hch@lst.de>

Avoid the pointless goto out just for returning retval.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 92bb308029c3..4676ce15a9a5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2549,7 +2549,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	ssize_t retval = 0;
 
 	if (!count)
-		goto out; /* skip atime */
+		return 0; /* skip atime */
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		struct file *file = iocb->ki_filp;
@@ -2567,7 +2567,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 						iocb->ki_pos,
 					        iocb->ki_pos + count - 1);
 			if (retval < 0)
-				goto out;
+				return retval;
 		}
 
 		file_accessed(file);
@@ -2590,12 +2590,10 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		 */
 		if (retval < 0 || !count || iocb->ki_pos >= size ||
 		    IS_DAX(inode))
-			goto out;
+			return retval;
 	}
 
-	retval = filemap_read(iocb, iter, retval);
-out:
-	return retval;
+	return filemap_read(iocb, iter, retval);
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions
  2020-11-02 18:42 ` [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
@ 2020-11-02 18:53   ` Kent Overstreet
  2020-11-03  7:27   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 18:53 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:42:56PM +0000, Matthew Wilcox (Oracle) wrote:
> The recent split of generic_file_buffered_read() created some very
> long function names which are hard to distinguish from each other.
> Rename as follows:
> 
> generic_file_buffered_read_readpage -> filemap_read_page
> generic_file_buffered_read_pagenotuptodate -> filemap_update_page
> generic_file_buffered_read_no_cached_page -> filemap_create_page
> generic_file_buffered_read_get_pages -> filemap_get_pages
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read
  2020-11-02 18:42 ` [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read Matthew Wilcox (Oracle)
@ 2020-11-02 18:55   ` Kent Overstreet
  2020-11-03  7:28   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 18:55 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:42:57PM +0000, Matthew Wilcox (Oracle) wrote:
> Add mapping_get_read_thps() which returns the THPs which represent a
> contiguous array of bytes in the file.  It also stops when encountering
> a page marked as Readahead or !Uptodate (but does return that page)
> so it can be handled appropriately by filemap_get_pages().  That lets us
> remove the loop in filemap_get_pages() and check only the last page.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked
  2020-11-02 18:42 ` [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
@ 2020-11-02 18:56   ` Kent Overstreet
  2020-11-03  7:28   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 18:56 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:42:58PM +0000, Matthew Wilcox (Oracle) wrote:
> This is prep work for the next patch, but I think at least one of the
> current callers would prefer a killable sleep to an uninterruptible one.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 04/17] mm/filemap: Support readpage splitting a page
  2020-11-02 18:42 ` [PATCH 04/17] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
@ 2020-11-02 19:00   ` Kent Overstreet
  2020-11-02 19:03     ` Matthew Wilcox
  2020-11-02 19:12   ` Kent Overstreet
  2020-11-03  7:31   ` Christoph Hellwig
  2 siblings, 1 reply; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:00 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:42:59PM +0000, Matthew Wilcox (Oracle) wrote:
> For page splitting to succeed, the thread asking to split the
> page has to be the only one with a reference to the page.  Calling
> wait_on_page_locked() while holding a reference to the page will
> effectively prevent this from happening with sufficient threads waiting
> on the same page.  Use put_and_wait_on_page_locked() to sleep without
> holding a reference to the page, then retry the page lookup after the
> page is unlocked.
> 
> Since we now get the page lock a little earlier in filemap_update_page(),
> we can eliminate a number of duplicate checks.  The original intent
> (commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
> for IO to complete during a read")) behind getting the page lock later
> was to avoid re-locking the page after it has been brought uptodate by
> another thread.  We still avoid that because we go through the normal
> lookup path again after the winning thread has brought the page uptodate.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/filemap.c | 76 ++++++++++++++++------------------------------------
>  1 file changed, 23 insertions(+), 53 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 709774a60379..550e023abb52 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1347,14 +1347,6 @@ static int __wait_on_page_locked_async(struct page *page,
>  	return ret;
>  }
>  
> -static int wait_on_page_locked_async(struct page *page,
> -				     struct wait_page_queue *wait)
> -{
> -	if (!PageLocked(page))
> -		return 0;
> -	return __wait_on_page_locked_async(compound_head(page), wait, false);
> -}
> -
>  /**
>   * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
>   * @page: The page to wait for.
> @@ -2281,64 +2273,42 @@ static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
>  	struct inode *inode = mapping->host;
>  	int error;
>  
> -	/*
> -	 * See comment in do_read_cache_page on why
> -	 * wait_on_page_locked is used to avoid unnecessarily
> -	 * serialisations and why it's safe.
> -	 */
>  	if (iocb->ki_flags & IOCB_WAITQ) {
> -		error = wait_on_page_locked_async(page,
> -						iocb->ki_waitq);
> +		error = lock_page_async(page, iocb->ki_waitq);
> +		if (error) {
> +			put_page(page);
> +			return ERR_PTR(error);
> +		}
>  	} else {
> -		error = wait_on_page_locked_killable(page);
> -	}
> -	if (unlikely(error)) {
> -		put_page(page);
> -		return ERR_PTR(error);
> +		if (!trylock_page(page)) {
> +			put_and_wait_on_page_locked(page, TASK_KILLABLE);
> +			return NULL;
> +		}
>  	}
> -	if (PageUptodate(page))
> -		return page;
>  
> +	if (!page->mapping)
> +		goto truncated;

Since we're dropping our ref to the page, it could potentially be truncated and
then reused, no? So we should be checking page->mapping == mapping &&
page->index == index (and stashing page->index before dropping our ref, or
passing it in).

> +	if (PageUptodate(page))
> +		goto uptodate;
>  	if (inode->i_blkbits == PAGE_SHIFT ||
>  			!mapping->a_ops->is_partially_uptodate)
> -		goto page_not_up_to_date;
> +		goto readpage;
>  	/* pipes can't handle partially uptodate pages */
>  	if (unlikely(iov_iter_is_pipe(iter)))
> -		goto page_not_up_to_date;
> -	if (!trylock_page(page))
> -		goto page_not_up_to_date;
> -	/* Did it get truncated before we got the lock? */
> -	if (!page->mapping)
> -		goto page_not_up_to_date_locked;
> +		goto readpage;
>  	if (!mapping->a_ops->is_partially_uptodate(page,
> -				pos & ~PAGE_MASK, count))
> -		goto page_not_up_to_date_locked;
> +				pos & (thp_size(page) - 1), count))
> +		goto readpage;
> +uptodate:
>  	unlock_page(page);
>  	return page;
>  
> -page_not_up_to_date:
> -	/* Get exclusive access to the page ... */
> -	error = lock_page_for_iocb(iocb, page);
> -	if (unlikely(error)) {
> -		put_page(page);
> -		return ERR_PTR(error);
> -	}
> -
> -page_not_up_to_date_locked:
> -	/* Did it get truncated before we got the lock? */
> -	if (!page->mapping) {
> -		unlock_page(page);
> -		put_page(page);
> -		return NULL;
> -	}
> -
> -	/* Did somebody else fill it already? */
> -	if (PageUptodate(page)) {
> -		unlock_page(page);
> -		return page;
> -	}
> -
> +readpage:
>  	return filemap_read_page(iocb, filp, mapping, page);
> +truncated:
> +	unlock_page(page);
> +	put_page(page);
> +	return NULL;
>  }
>  
>  static struct page *filemap_create_page(struct kiocb *iocb,
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller
  2020-11-02 18:43 ` [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
@ 2020-11-02 19:00   ` Kent Overstreet
  2020-11-03  7:31   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:00 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:00PM +0000, Matthew Wilcox (Oracle) wrote:
> The previous patch removed wait_on_page_locked_async(), so inline
> __wait_on_page_locked_async into __lock_page_async().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 04/17] mm/filemap: Support readpage splitting a page
  2020-11-02 19:00   ` Kent Overstreet
@ 2020-11-02 19:03     ` Matthew Wilcox
  0 siblings, 0 replies; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-02 19:03 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 02:00:08PM -0500, Kent Overstreet wrote:
(snipped the deleted lines for clarity)
> >  	if (iocb->ki_flags & IOCB_WAITQ) {
> > +		error = lock_page_async(page, iocb->ki_waitq);
> > +		if (error) {
> > +			put_page(page);
> > +			return ERR_PTR(error);
> > +		}
> >  	} else {
> > +		if (!trylock_page(page)) {
> > +			put_and_wait_on_page_locked(page, TASK_KILLABLE);
> > +			return NULL;
> > +		}
> >  	}
> >  
> > +	if (!page->mapping)
> > +		goto truncated;
> 
> Since we're dropping our ref to the page, it could potentially be truncated and
> then reused, no? So we should be checking page->mapping == mapping &&
> page->index == index (and stashing page->index before dropping our ref, or
> passing it in).

If we get to this point, then we _didn't_ drop our ref to the page.
All the paths above that call put_page() then return.


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 04/17] mm/filemap: Support readpage splitting a page
  2020-11-02 18:42 ` [PATCH 04/17] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
  2020-11-02 19:00   ` Kent Overstreet
@ 2020-11-02 19:12   ` Kent Overstreet
  2020-11-03  7:31   ` Christoph Hellwig
  2 siblings, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:12 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:42:59PM +0000, Matthew Wilcox (Oracle) wrote:
> For page splitting to succeed, the thread asking to split the
> page has to be the only one with a reference to the page.  Calling
> wait_on_page_locked() while holding a reference to the page will
> effectively prevent this from happening with sufficient threads waiting
> on the same page.  Use put_and_wait_on_page_locked() to sleep without
> holding a reference to the page, then retry the page lookup after the
> page is unlocked.
> 
> Since we now get the page lock a little earlier in filemap_update_page(),
> we can eliminate a number of duplicate checks.  The original intent
> (commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
> for IO to complete during a read")) behind getting the page lock later
> was to avoid re-locking the page after it has been brought uptodate by
> another thread.  We still avoid that because we go through the normal
> lookup path again after the winning thread has brought the page uptodate.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set
  2020-11-02 18:43 ` [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
@ 2020-11-02 19:17   ` Kent Overstreet
  2020-11-03  7:31   ` Christoph Hellwig
  2020-11-05  7:22   ` Nikolay Borisov
  2 siblings, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:17 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:01PM +0000, Matthew Wilcox (Oracle) wrote:
> The readpage operation can block in many (most?) filesystems, so we
> should punt to a work queue instead of calling it.  This was the last
> caller of lock_page_async(), so remove it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions
  2020-11-02 18:43 ` [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions Matthew Wilcox (Oracle)
@ 2020-11-02 19:37   ` Kent Overstreet
  2020-11-03  7:34   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:37 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:02PM +0000, Matthew Wilcox (Oracle) wrote:
> Make this function more generic by passing the file instead of the iocb.
> Check in the callers whether we should call readpage or not.  Also make
> it return an errno / 0 / AOP_TRUNCATED_PAGE, and make calling put_page()
> the caller's responsibility.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 08/17] mm/filemap: Change filemap_create_page arguments
  2020-11-02 18:43 ` [PATCH 08/17] mm/filemap: Change filemap_create_page arguments Matthew Wilcox (Oracle)
@ 2020-11-02 19:38   ` Kent Overstreet
  2020-11-03  7:35   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:38 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:03PM +0000, Matthew Wilcox (Oracle) wrote:
> filemap_create_page() doesn't need the iocb or the iter.  It just needs
> the file and the index.  Move the iocb flag checks to the caller.  We can
> skip checking GFP_NOIO as that's checked a few lines earlier.  There's no
> need to fall through to filemap_update_page() -- if filemap_create_page
> couldn't update it, filemap_update_page will not be able to.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno
  2020-11-02 18:43 ` [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno Matthew Wilcox (Oracle)
@ 2020-11-02 19:39   ` Kent Overstreet
  2020-11-03  7:36   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:39 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:04PM +0000, Matthew Wilcox (Oracle) wrote:
> Use AOP_TRUNCATED_PAGE to indicate that no error occurred, but the
> page we looked up is no longer valid.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page
  2020-11-02 18:43 ` [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page Matthew Wilcox (Oracle)
@ 2020-11-02 19:45   ` Kent Overstreet
  2020-11-03  7:41   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:45 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:05PM +0000, Matthew Wilcox (Oracle) wrote:
> We don't need to give up when a special request sees a !Uptodate page.
> We may be able to satisfy the read from a partially-uptodate page.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-02 18:43 ` [PATCH 11/17] mm/filemap: Add filemap_range_uptodate Matthew Wilcox (Oracle)
@ 2020-11-02 19:50   ` Kent Overstreet
  2020-11-02 20:09     ` Matthew Wilcox
  2020-11-03  7:49   ` Christoph Hellwig
  1 sibling, 1 reply; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:50 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:06PM +0000, Matthew Wilcox (Oracle) wrote:
> Move the complicated condition and the calculations out of
> filemap_update_page() into its own function.

You're missing a signed-off-by

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 12/17] mm/filemap: Split filemap_readahead out of filemap_get_pages
  2020-11-02 18:43 ` [PATCH 12/17] mm/filemap: Split filemap_readahead out of filemap_get_pages Matthew Wilcox (Oracle)
@ 2020-11-02 19:52   ` Kent Overstreet
  0 siblings, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:52 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:07PM +0000, Matthew Wilcox (Oracle) wrote:
> This simplifies the error handling.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page()
  2020-11-02 18:43 ` [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page() Matthew Wilcox (Oracle)
@ 2020-11-02 19:52   ` Kent Overstreet
  2020-11-03  7:50   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 19:52 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:08PM +0000, Matthew Wilcox (Oracle) wrote:
> The 'pos' and 'count' params are no longer used in filemap_update_page()
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 14/17] mm/filemap: Restructure filemap_get_pages
  2020-11-02 18:43 ` [PATCH 14/17] mm/filemap: Restructure filemap_get_pages Matthew Wilcox (Oracle)
@ 2020-11-02 20:05   ` Kent Overstreet
  2020-11-03  7:57   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 20:05 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:09PM +0000, Matthew Wilcox (Oracle) wrote:
> Avoid a goto, and by the time we get to calling filemap_update_page(),
> we definitely have at least one page.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage
  2020-11-02 18:43 ` [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage Matthew Wilcox (Oracle)
@ 2020-11-02 20:06   ` Kent Overstreet
  2020-11-03  8:00   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 20:06 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:43:10PM +0000, Matthew Wilcox (Oracle) wrote:
> We don't need to get the page lock again; we just need to wait for
> the I/O to finish, so use wait_on_page_locked_killable() like the
> other callers of ->readpage.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 16/17] mm/filemap: rename generic_file_buffered_read to filemap_read
  2020-11-02 18:43 ` [PATCH 16/17] mm/filemap: rename generic_file_buffered_read to filemap_read Matthew Wilcox (Oracle)
@ 2020-11-02 20:06   ` Kent Overstreet
  0 siblings, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 20:06 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, Christoph Hellwig

On Mon, Nov 02, 2020 at 06:43:11PM +0000, Matthew Wilcox (Oracle) wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> Rename generic_file_buffered_read to match the naming of filemap_fault,
> also update the written parameter to a more descriptive name and
> improve the kerneldoc comment.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 17/17] mm: simplify generic_file_read_iter
  2020-11-02 18:43 ` [PATCH 17/17] mm: simplify generic_file_read_iter Matthew Wilcox (Oracle)
@ 2020-11-02 20:07   ` Kent Overstreet
  0 siblings, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 20:07 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, Christoph Hellwig

On Mon, Nov 02, 2020 at 06:43:12PM +0000, Matthew Wilcox (Oracle) wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> Avoid the pointless goto out just for returning retval.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-02 19:50   ` Kent Overstreet
@ 2020-11-02 20:09     ` Matthew Wilcox
  0 siblings, 0 replies; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-02 20:09 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 02:50:56PM -0500, Kent Overstreet wrote:
> On Mon, Nov 02, 2020 at 06:43:06PM +0000, Matthew Wilcox (Oracle) wrote:
> > Move the complicated condition and the calculations out of
> > filemap_update_page() into its own function.
> 
> You're missing a signed-off-by
> 
> Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>

Oops.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/17] Refactor generic_file_buffered_read
  2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
                   ` (16 preceding siblings ...)
  2020-11-02 18:43 ` [PATCH 17/17] mm: simplify generic_file_read_iter Matthew Wilcox (Oracle)
@ 2020-11-02 20:14 ` Kent Overstreet
  17 siblings, 0 replies; 66+ messages in thread
From: Kent Overstreet @ 2020-11-02 20:14 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch

On Mon, Nov 02, 2020 at 06:42:55PM +0000, Matthew Wilcox (Oracle) wrote:
> This is a combination of Christoph's work to refactor
> generic_file_buffered_read() and my THP support on top of Kent's patches
> which are currently in -mm.  I really like where this ended up.

Nice patch series, I had a look at the results in your git tree and I really
like where things ended up too.

> 
> Christoph Hellwig (2):
>   mm/filemap: rename generic_file_buffered_read to filemap_read
>   mm: simplify generic_file_read_iter
> 
> Matthew Wilcox (Oracle) (15):
>   mm/filemap: Rename generic_file_buffered_read subfunctions
>   mm/filemap: Use THPs in generic_file_buffered_read
>   mm/filemap: Pass a sleep state to put_and_wait_on_page_locked
>   mm/filemap: Support readpage splitting a page
>   mm/filemap: Inline __wait_on_page_locked_async into caller
>   mm/filemap: Don't call ->readpage if IOCB_WAITQ is set
>   mm/filemap: Change filemap_read_page calling conventions
>   mm/filemap: Change filemap_create_page arguments
>   mm/filemap: Convert filemap_update_page to return an errno
>   mm/filemap: Move the iocb checks into filemap_update_page
>   mm/filemap: Add filemap_range_uptodate
>   mm/filemap: Split filemap_readahead out of filemap_get_pages
>   mm/filemap: Remove parameters from filemap_update_page()
>   mm/filemap: Restructure filemap_get_pages
>   mm/filemap: Don't relock the page after calling readpage
> 
>  fs/btrfs/file.c         |   2 +-
>  include/linux/fs.h      |   4 +-
>  include/linux/pagemap.h |   3 +-
>  mm/filemap.c            | 493 +++++++++++++++++++---------------------
>  mm/huge_memory.c        |   4 +-
>  mm/migrate.c            |   4 +-
>  6 files changed, 237 insertions(+), 273 deletions(-)
> 
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions
  2020-11-02 18:42 ` [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
  2020-11-02 18:53   ` Kent Overstreet
@ 2020-11-03  7:27   ` Christoph Hellwig
  2020-11-03 14:52     ` Matthew Wilcox
  1 sibling, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:27 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:42:56PM +0000, Matthew Wilcox (Oracle) wrote:
> The recent split of generic_file_buffered_read() created some very
> long function names which are hard to distinguish from each other.
> Rename as follows:
> 
> generic_file_buffered_read_readpage -> filemap_read_page
> generic_file_buffered_read_pagenotuptodate -> filemap_update_page
> generic_file_buffered_read_no_cached_page -> filemap_create_page
> generic_file_buffered_read_get_pages -> filemap_get_pages

Find with me, although I think filemap_find_get_pages would be a better
name for filemap_get_pages.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read
  2020-11-02 18:42 ` [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read Matthew Wilcox (Oracle)
  2020-11-02 18:55   ` Kent Overstreet
@ 2020-11-03  7:28   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:28 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:42:57PM +0000, Matthew Wilcox (Oracle) wrote:
> +static unsigned mapping_get_read_thps(struct address_space *mapping,
> +		pgoff_t index, unsigned int nr_pages, struct page **pages)

This function could really use a comment describing it, as the name
isn't overly descriptive.

> +{
> +	XA_STATE(xas, &mapping->i_pages, index);
> +	struct page *head;
> +	unsigned int ret = 0;

I'd rename ret to nr_found to be a little more descriptive.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked
  2020-11-02 18:42 ` [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
  2020-11-02 18:56   ` Kent Overstreet
@ 2020-11-03  7:28   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:28 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:42:58PM +0000, Matthew Wilcox (Oracle) wrote:
> This is prep work for the next patch, but I think at least one of the
> current callers would prefer a killable sleep to an uninterruptible one.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 04/17] mm/filemap: Support readpage splitting a page
  2020-11-02 18:42 ` [PATCH 04/17] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
  2020-11-02 19:00   ` Kent Overstreet
  2020-11-02 19:12   ` Kent Overstreet
@ 2020-11-03  7:31   ` Christoph Hellwig
  2 siblings, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:31 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:42:59PM +0000, Matthew Wilcox (Oracle) wrote:
> For page splitting to succeed, the thread asking to split the
> page has to be the only one with a reference to the page.  Calling
> wait_on_page_locked() while holding a reference to the page will
> effectively prevent this from happening with sufficient threads waiting
> on the same page.  Use put_and_wait_on_page_locked() to sleep without
> holding a reference to the page, then retry the page lookup after the
> page is unlocked.
> 
> Since we now get the page lock a little earlier in filemap_update_page(),
> we can eliminate a number of duplicate checks.  The original intent
> (commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
> for IO to complete during a read")) behind getting the page lock later
> was to avoid re-locking the page after it has been brought uptodate by
> another thread.  We still avoid that because we go through the normal
> lookup path again after the winning thread has brought the page uptodate.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller
  2020-11-02 18:43 ` [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
  2020-11-02 19:00   ` Kent Overstreet
@ 2020-11-03  7:31   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:31 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set
  2020-11-02 18:43 ` [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
  2020-11-02 19:17   ` Kent Overstreet
@ 2020-11-03  7:31   ` Christoph Hellwig
  2020-11-05  7:22   ` Nikolay Borisov
  2 siblings, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:31 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:43:01PM +0000, Matthew Wilcox (Oracle) wrote:
> The readpage operation can block in many (most?) filesystems, so we
> should punt to a work queue instead of calling it.  This was the last
> caller of lock_page_async(), so remove it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions
  2020-11-02 18:43 ` [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions Matthew Wilcox (Oracle)
  2020-11-02 19:37   ` Kent Overstreet
@ 2020-11-03  7:34   ` Christoph Hellwig
  2020-11-03 15:11     ` Matthew Wilcox
  1 sibling, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:34 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

> +static int filemap_read_page(struct file *file, struct address_space *mapping,
> +		struct page *page)

I still think we should drop the mapping argument as well.

> +	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
> +		unlock_page(page);
> +		put_page(page);
> +		return ERR_PTR(-EAGAIN);
> +	}
> +	error = filemap_read_page(iocb->ki_filp, mapping, page);
> +	if (!error)
> +		return page;

I think a goto error for the error cases would be much more useful.
That would allow to also share the error put_page for the flag check
above and the truncated case below, but most importantly make the code
flow obvious vs the early return for the success case.

> -	return filemap_read_page(iocb, filp, mapping, page);
> +	if (!error)
> +		error = filemap_read_page(iocb->ki_filp, mapping, page);
> +	if (!error)
> +		return page;
> +	put_page(page);
> +	if (error == -EEXIST || error == AOP_TRUNCATED_PAGE)
> +		return NULL;
> +	return ERR_PTR(error);

Same here.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 08/17] mm/filemap: Change filemap_create_page arguments
  2020-11-02 18:43 ` [PATCH 08/17] mm/filemap: Change filemap_create_page arguments Matthew Wilcox (Oracle)
  2020-11-02 19:38   ` Kent Overstreet
@ 2020-11-03  7:35   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:35 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno
  2020-11-02 18:43 ` [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno Matthew Wilcox (Oracle)
  2020-11-02 19:39   ` Kent Overstreet
@ 2020-11-03  7:36   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:36 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

> -	if (err)
> +	if (err < 0)

Please check for AOP_TRUNCATED_PAGE explicitly here.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page
  2020-11-02 18:43 ` [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page Matthew Wilcox (Oracle)
  2020-11-02 19:45   ` Kent Overstreet
@ 2020-11-03  7:41   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:41 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-02 18:43 ` [PATCH 11/17] mm/filemap: Add filemap_range_uptodate Matthew Wilcox (Oracle)
  2020-11-02 19:50   ` Kent Overstreet
@ 2020-11-03  7:49   ` Christoph Hellwig
  2020-11-03 15:18     ` Matthew Wilcox
  1 sibling, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:49 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:43:06PM +0000, Matthew Wilcox (Oracle) wrote:
> Move the complicated condition and the calculations out of
> filemap_update_page() into its own function.

The logic looks good, but the flow inside of filemap_update_page looks
odd.  The patch below relative to this commit shows how I'd do it:

diff --git a/mm/filemap.c b/mm/filemap.c
index 81b569d818a3f8..304180c022d38a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2266,40 +2266,39 @@ static int filemap_update_page(struct kiocb *iocb,
 	if (!trylock_page(page)) {
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
 			goto error;
-		if (iocb->ki_flags & IOCB_WAITQ) {
-			if (!first)
-				goto error;
-			error = __lock_page_async(page, iocb->ki_waitq);
-			if (error)
-				goto error;
-		} else {
+		if (!(iocb->ki_flags & IOCB_WAITQ)) {
 			put_and_wait_on_page_locked(page, TASK_KILLABLE);
 			return AOP_TRUNCATED_PAGE;
 		}
+		if (!first)
+			goto error;
+		error = __lock_page_async(page, iocb->ki_waitq);
+		if (error)
+			goto error;
 	}
 
+	error = AOP_TRUNCATED_PAGE;
 	if (!page->mapping)
-		goto truncated;
-	if (filemap_range_uptodate(iocb, mapping, iter, page)) {
-		unlock_page(page);
-		return 0;
-	}
+		goto error_unlock_page;
 
-	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
-		unlock_page(page);
+	if (!filemap_range_uptodate(iocb, mapping, iter, page)) {
 		error = -EAGAIN;
-	} else {
+		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
+			goto error_unlock_page;
 		error = filemap_read_page(iocb->ki_filp, mapping, page);
-		if (!error)
-			return 0;
+		if (error)
+			goto error;
+		return 0; /* filemap_read_page unlocks the page */
 	}
+
+	unlock_page(page);
+	return 0;
+
+error_unlock_page:
+	unlock_page(page);
 error:
 	put_page(page);
 	return error;
-truncated:
-	unlock_page(page);
-	put_page(page);
-	return AOP_TRUNCATED_PAGE;
 }
 
 static struct page *filemap_create_page(struct file *file,

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page()
  2020-11-02 18:43 ` [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page() Matthew Wilcox (Oracle)
  2020-11-02 19:52   ` Kent Overstreet
@ 2020-11-03  7:50   ` Christoph Hellwig
  1 sibling, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:50 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:43:08PM +0000, Matthew Wilcox (Oracle) wrote:
> The 'pos' and 'count' params are no longer used in filemap_update_page()

Shouldn't this go into the patch that removes the usage of the parameters?

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 14/17] mm/filemap: Restructure filemap_get_pages
  2020-11-02 18:43 ` [PATCH 14/17] mm/filemap: Restructure filemap_get_pages Matthew Wilcox (Oracle)
  2020-11-02 20:05   ` Kent Overstreet
@ 2020-11-03  7:57   ` Christoph Hellwig
  2020-11-03 14:46     ` Matthew Wilcox
  1 sibling, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  7:57 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:43:09PM +0000, Matthew Wilcox (Oracle) wrote:
> Avoid a goto, and by the time we get to calling filemap_update_page(),
> we definitely have at least one page.

I find the error handling flow hard to follow and the existing but
heavily touched naming of the nr_got variable and the find_pages label
not helpful.  I'd do the following on top of this patch:

diff --git a/mm/filemap.c b/mm/filemap.c
index f16b1eb03bcad0..3ef73a58ce9456 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2344,51 +2344,54 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
 	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
 	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
 	struct page *page;
-	int nr_got, err = 0;
+	int nr_pages, err = 0;
 
 	nr = min_t(unsigned long, last_index - index, nr);
-find_page:
+retry:
 	if (fatal_signal_pending(current))
 		return -EINTR;
 
-	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
-	if (!nr_got) {
+	nr_pages = mapping_get_read_thps(mapping, index, nr, pages);
+	if (!nr_pages) {
 		if (iocb->ki_flags & IOCB_NOIO)
 			return -EAGAIN;
 		page_cache_sync_readahead(mapping, ra, filp, index,
 				last_index - index);
-		nr_got = mapping_get_read_thps(mapping, index, nr, pages);
+		nr_pages = mapping_get_read_thps(mapping, index, nr, pages);
 	}
-	if (!nr_got) {
+	if (!nr_pages) {
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
 			return -EAGAIN;
 		pages[0] = filemap_create_page(filp, mapping,
 				iocb->ki_pos >> PAGE_SHIFT);
 		if (!pages[0])
-			goto find_page;
+			goto retry;
 		if (IS_ERR(pages[0]))
 			return PTR_ERR(pages[0]);
 		return 1;
 	}
 
-	page = pages[nr_got - 1];
-	if (PageReadahead(page))
+	page = pages[nr_pages - 1];
+	if (PageReadahead(page)) {
 		err = filemap_readahead(iocb, filp, mapping, page, last_index);
-	if (!err && !PageUptodate(page))
+		if (err)
+			goto error;
+	}
+	if (!PageUptodate(page)) {
 		err = filemap_update_page(iocb, mapping, iter, page,
-				nr_got == 1);
+				nr_pages == 1);
+		if (err)
+			goto error;
+	}
 
-	if (err)
-		nr_got--;
-	if (likely(nr_got))
-		return nr_got;
-	if (err < 0)
-		return err;
-	err = 0;
-	/*
-	 * No pages and no error means we raced and should retry:
-	 */
-	goto find_page;
+	return nr_pages;
+
+error:
+	if (nr_pages > 1)
+		return nr_pages - 1;
+	if (err == AOP_TRUNCATED_PAGE)
+		goto retry;
+	return err;
 }
 
 /**

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage
  2020-11-02 18:43 ` [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage Matthew Wilcox (Oracle)
  2020-11-02 20:06   ` Kent Overstreet
@ 2020-11-03  8:00   ` Christoph Hellwig
  2020-11-03 15:24     ` Matthew Wilcox
  1 sibling, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03  8:00 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, hch, kent.overstreet

On Mon, Nov 02, 2020 at 06:43:10PM +0000, Matthew Wilcox (Oracle) wrote:
> We don't need to get the page lock again; we just need to wait for
> the I/O to finish, so use wait_on_page_locked_killable() like the
> other callers of ->readpage.

As that isn't entirely obvious, what about adding a comment next to
the wait_on_page_locked_killable call to document it?

>  
> +	error = wait_on_page_locked_killable(page);
>  	if (error)
>  		return error;
> +	if (PageUptodate(page))
> +		return 0;
> +	if (!page->mapping)	/* page truncated */
> +		return AOP_TRUNCATED_PAGE;
> +	shrink_readahead_size_eio(&file->f_ra);
> +	return -EIO;

You might notice a theme here, but I hate having the fast path exit
early without a good reason, so I'd be much happier with:

	if (!PageUptodate(page)) {
		if (!page->mapping)	/* page truncated */
			return AOP_TRUNCATED_PAGE;
		shrink_readahead_size_eio(&file->f_ra);
		return -EIO;
	}
	return 0;

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 14/17] mm/filemap: Restructure filemap_get_pages
  2020-11-03  7:57   ` Christoph Hellwig
@ 2020-11-03 14:46     ` Matthew Wilcox
  2020-11-03 15:29       ` Christoph Hellwig
  0 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 14:46 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 08:57:36AM +0100, Christoph Hellwig wrote:
> On Mon, Nov 02, 2020 at 06:43:09PM +0000, Matthew Wilcox (Oracle) wrote:
> > Avoid a goto, and by the time we get to calling filemap_update_page(),
> > we definitely have at least one page.
> 
> I find the error handling flow hard to follow and the existing but
> heavily touched naming of the nr_got variable and the find_pages label
> not helpful.  I'd do the following on top of this patch:

I've removed nr_got entirely in my current tree ...

diff --git a/mm/filemap.c b/mm/filemap.c
index 8264bcdb99f4..ea0cd0df638b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2166,21 +2166,17 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
 	ra->ra_pages /= 4;
 }
 
-static unsigned mapping_get_read_thps(struct address_space *mapping,
-		pgoff_t index, unsigned int nr_pages, struct page **pages)
+static void mapping_get_read_thps(struct address_space *mapping,
+		pgoff_t index, pgoff_t max, struct pagevec *pvec)
 {
 	XA_STATE(xas, &mapping->i_pages, index);
 	struct page *head;
-	unsigned int ret = 0;
-
-	if (unlikely(!nr_pages))
-		return 0;
 
 	rcu_read_lock();
 	for (head = xas_load(&xas); head; head = xas_next(&xas)) {
 		if (xas_retry(&xas, head))
 			continue;
-		if (xa_is_value(head))
+		if (xas.xa_index > max || xa_is_value(head))
 			break;
 		if (!page_cache_get_speculative(head))
 			goto retry;
@@ -2189,8 +2185,7 @@ static unsigned mapping_get_read_thps(struct address_space *mapping,
 		if (unlikely(head != xas_reload(&xas)))
 			goto put_page;
 
-		pages[ret++] = head;
-		if (ret == nr_pages)
+		if (!pagevec_add(pvec, head))
 			break;
 		if (!PageUptodate(head))
 			break;
@@ -2205,7 +2200,6 @@ static unsigned mapping_get_read_thps(struct address_space *mapping,
 		xas_reset(&xas);
 	}
 	rcu_read_unlock();
-	return ret;
 }
 
 static int filemap_read_page(struct file *file, struct address_space *mapping,
@@ -2343,52 +2337,53 @@ static int filemap_readahead(struct kiocb *iocb, struct file *file,
 }
 
 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
-		struct page **pages, unsigned int nr)
+		struct pagevec *pvec)
 {
 	struct file *filp = iocb->ki_filp;
 	struct address_space *mapping = filp->f_mapping;
 	struct file_ra_state *ra = &filp->f_ra;
 	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+	pgoff_t maxindex = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
 	struct page *page;
-	int nr_got, err = 0;
+	int err = 0;
 
-	nr = min_t(unsigned long, last_index - index, nr);
 find_page:
 	if (fatal_signal_pending(current))
 		return -EINTR;
 
-	nr_got = mapping_get_read_thps(mapping, index, nr, pages);
-	if (!nr_got) {
+	pagevec_init(pvec);
+	mapping_get_read_thps(mapping, index, maxindex, pvec);
+	if (!pagevec_count(pvec)) {
 		if (iocb->ki_flags & IOCB_NOIO)
 			return -EAGAIN;
 		page_cache_sync_readahead(mapping, ra, filp, index,
-				last_index - index);
-		nr_got = mapping_get_read_thps(mapping, index, nr, pages);
+				maxindex - index);
+		mapping_get_read_thps(mapping, index, maxindex, pvec);
 	}
-	if (!nr_got) {
+	if (!pagevec_count(pvec)) {
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
 			return -EAGAIN;
-		pages[0] = filemap_create_page(filp, mapping,
+		page = filemap_create_page(filp, mapping,
 				iocb->ki_pos >> PAGE_SHIFT);
-		if (!pages[0])
+		if (!page)
 			goto find_page;
-		if (IS_ERR(pages[0]))
-			return PTR_ERR(pages[0]);
-		return 1;
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		pagevec_add(pvec, page);
+		return 0;
 	}
 
-	page = pages[nr_got - 1];
+	page = pvec->pages[pagevec_count(pvec) - 1];
 	if (PageReadahead(page))
-		err = filemap_readahead(iocb, filp, mapping, page, last_index);
+		err = filemap_readahead(iocb, filp, mapping, page, maxindex);
 	if (!err && !PageUptodate(page))
 		err = filemap_update_page(iocb, mapping, iter, page,
-				nr_got == 1);
+				pagevec_count(pvec) == 1);
 
 	if (err)
-		nr_got--;
-	if (likely(nr_got))
-		return nr_got;
+		pvec->nr--;
+	if (likely(pagevec_count(pvec)))
+		return 0;
 	if (err < 0)
 		return err;
 	err = 0;
@@ -2418,11 +2413,8 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 	struct file_ra_state *ra = &filp->f_ra;
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
-	struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
-	unsigned int nr_pages = min_t(unsigned int, 512,
-			((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
-			(iocb->ki_pos >> PAGE_SHIFT));
-	int i, pg_nr, error = 0;
+	struct pagevec pvec;
+	int i, error = 0;
 	bool writably_mapped;
 	loff_t isize, end_offset;
 
@@ -2430,14 +2422,6 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 		return 0;
 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
 
-	if (nr_pages > ARRAY_SIZE(pages_onstack))
-		pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
-
-	if (!pages) {
-		pages = pages_onstack;
-		nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
-	}
-
 	do {
 		cond_resched();
 
@@ -2449,12 +2433,9 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 		if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
 			iocb->ki_flags |= IOCB_NOWAIT;
 
-		i = 0;
-		pg_nr = filemap_get_pages(iocb, iter, pages, nr_pages);
-		if (pg_nr < 0) {
-			error = pg_nr;
+		error = filemap_get_pages(iocb, iter, &pvec);
+		if (error < 0)
 			break;
-		}
 
 		/*
 		 * i_size must be checked after we know the pages are Uptodate.
@@ -2467,13 +2448,8 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 		isize = i_size_read(inode);
 		if (unlikely(iocb->ki_pos >= isize))
 			goto put_pages;
-
 		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 
-		while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
-		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
-			put_page(pages[--pg_nr]);
-
 		/*
 		 * Once we start copying data, we don't want to be touching any
 		 * cachelines that might be contended:
@@ -2486,18 +2462,20 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 		 */
 		if (iocb->ki_pos >> PAGE_SHIFT !=
 		    ra->prev_pos >> PAGE_SHIFT)
-			mark_page_accessed(pages[0]);
-		for (i = 1; i < pg_nr; i++)
-			mark_page_accessed(pages[i]);
+			mark_page_accessed(pvec.pages[0]);
 
-		for (i = 0; i < pg_nr; i++) {
-			struct page *page = pages[i];
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
 			size_t page_size = thp_size(page);
 			size_t offset = iocb->ki_pos & (page_size - 1);
 			size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
 					     page_size - offset);
 			size_t copied;
 
+			if (end_offset < page_offset(page))
+				break;
+			if (i > 0)
+				mark_page_accessed(page);
 			/*
 			 * If users can be writing to this page using arbitrary
 			 * virtual addresses, take care about potential aliasing
@@ -2522,15 +2500,11 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 			}
 		}
 put_pages:
-		for (i = 0; i < pg_nr; i++)
-			put_page(pages[i]);
+		pagevec_release(&pvec);
 	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
 
 	file_accessed(filp);
 
-	if (pages != pages_onstack)
-		kfree(pages);
-
 	return already_read ? already_read : error;
 }
 EXPORT_SYMBOL_GPL(filemap_read);

I like a lot of the restructuring you did there.  I'll incorporate it.


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions
  2020-11-03  7:27   ` Christoph Hellwig
@ 2020-11-03 14:52     ` Matthew Wilcox
  2020-11-03 15:02         ` Amy Parker
  0 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 14:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 08:27:00AM +0100, Christoph Hellwig wrote:
> On Mon, Nov 02, 2020 at 06:42:56PM +0000, Matthew Wilcox (Oracle) wrote:
> > The recent split of generic_file_buffered_read() created some very
> > long function names which are hard to distinguish from each other.
> > Rename as follows:
> > 
> > generic_file_buffered_read_readpage -> filemap_read_page
> > generic_file_buffered_read_pagenotuptodate -> filemap_update_page
> > generic_file_buffered_read_no_cached_page -> filemap_create_page
> > generic_file_buffered_read_get_pages -> filemap_get_pages
> 
> Find with me, although I think filemap_find_get_pages would be a better
> name for filemap_get_pages.

To me, 'find' means 'starting from this position, search forward in the
array for the next page', but we don't want to do that, we just want to
get a batch of pages starting _at_ this index.  Arguably it'd be better
named filemap_get_or_create_batch().

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions
  2020-11-03 14:52     ` Matthew Wilcox
@ 2020-11-03 15:02         ` Amy Parker
  0 siblings, 0 replies; 66+ messages in thread
From: Amy Parker @ 2020-11-03 15:02 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 3, 2020 at 6:53 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Tue, Nov 03, 2020 at 08:27:00AM +0100, Christoph Hellwig wrote:
> > On Mon, Nov 02, 2020 at 06:42:56PM +0000, Matthew Wilcox (Oracle) wrote:
> > > The recent split of generic_file_buffered_read() created some very
> > > long function names which are hard to distinguish from each other.
> > > Rename as follows:
> > >
> > > generic_file_buffered_read_readpage -> filemap_read_page
> > > generic_file_buffered_read_pagenotuptodate -> filemap_update_page
> > > generic_file_buffered_read_no_cached_page -> filemap_create_page
> > > generic_file_buffered_read_get_pages -> filemap_get_pages
> >
> > Find with me, although I think filemap_find_get_pages would be a better
> > name for filemap_get_pages.
>
> To me, 'find' means 'starting from this position, search forward in the
> array for the next page', but we don't want to do that, we just want to
> get a batch of pages starting _at_ this index.  Arguably it'd be better
> named filemap_get_or_create_batch().

filemap_get_or_crerate_batch() would be a better name, as to me, find
entails "begin here and continue looking for the next instance" (in this
case, an instance would be a page).

Best regards,
Amy Parker
(they/them)

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions
@ 2020-11-03 15:02         ` Amy Parker
  0 siblings, 0 replies; 66+ messages in thread
From: Amy Parker @ 2020-11-03 15:02 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 3, 2020 at 6:53 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Tue, Nov 03, 2020 at 08:27:00AM +0100, Christoph Hellwig wrote:
> > On Mon, Nov 02, 2020 at 06:42:56PM +0000, Matthew Wilcox (Oracle) wrote:
> > > The recent split of generic_file_buffered_read() created some very
> > > long function names which are hard to distinguish from each other.
> > > Rename as follows:
> > >
> > > generic_file_buffered_read_readpage -> filemap_read_page
> > > generic_file_buffered_read_pagenotuptodate -> filemap_update_page
> > > generic_file_buffered_read_no_cached_page -> filemap_create_page
> > > generic_file_buffered_read_get_pages -> filemap_get_pages
> >
> > Find with me, although I think filemap_find_get_pages would be a better
> > name for filemap_get_pages.
>
> To me, 'find' means 'starting from this position, search forward in the
> array for the next page', but we don't want to do that, we just want to
> get a batch of pages starting _at_ this index.  Arguably it'd be better
> named filemap_get_or_create_batch().

filemap_get_or_crerate_batch() would be a better name, as to me, find
entails "begin here and continue looking for the next instance" (in this
case, an instance would be a page).

Best regards,
Amy Parker
(they/them)


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions
  2020-11-03  7:34   ` Christoph Hellwig
@ 2020-11-03 15:11     ` Matthew Wilcox
  0 siblings, 0 replies; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 15:11 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 08:34:27AM +0100, Christoph Hellwig wrote:
> > +static int filemap_read_page(struct file *file, struct address_space *mapping,
> > +		struct page *page)
> 
> I still think we should drop the mapping argument as well.

Feels a little weird to have it in the caller and not pass it in.

> > +	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
> > +		unlock_page(page);
> > +		put_page(page);
> > +		return ERR_PTR(-EAGAIN);
> > +	}
> > +	error = filemap_read_page(iocb->ki_filp, mapping, page);
> > +	if (!error)
> > +		return page;
> 
> I think a goto error for the error cases would be much more useful.
> That would allow to also share the error put_page for the flag check
> above and the truncated case below, but most importantly make the code
> flow obvious vs the early return for the success case.

In this patch, I'm trying to be obvious about the code motion between
the two functions.  This gets straightened out eventually:

        if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
                unlock_page(page);
                error = -EAGAIN;
        } else {
                error = filemap_read_page(iocb->ki_filp, mapping, page);
                if (!error)
                        return 0;
        }
error:
        put_page(page);
        return error;


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-03  7:49   ` Christoph Hellwig
@ 2020-11-03 15:18     ` Matthew Wilcox
  2020-11-03 15:31       ` Christoph Hellwig
  0 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 15:18 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 08:49:44AM +0100, Christoph Hellwig wrote:
> On Mon, Nov 02, 2020 at 06:43:06PM +0000, Matthew Wilcox (Oracle) wrote:
> > Move the complicated condition and the calculations out of
> > filemap_update_page() into its own function.
> 
> The logic looks good, but the flow inside of filemap_update_page looks
> odd.  The patch below relative to this commit shows how I'd do it:

I have a simplification in mind that gets rid of the awkward 'first'
parameter.  In filemap_get_pages(), do:

                if ((iocb->ki_flags & IOCB_WAITQ) && (pagevec_count(pvec) > 1))
                        iocb->ki_flags |= IOCB_NOWAIT;

before calling filemap_update_page().  That matches what Kent did in
filemap_read():

                if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
                        iocb->ki_flags |= IOCB_NOWAIT;

> diff --git a/mm/filemap.c b/mm/filemap.c
> index 81b569d818a3f8..304180c022d38a 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2266,40 +2266,39 @@ static int filemap_update_page(struct kiocb *iocb,
>  	if (!trylock_page(page)) {
>  		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
>  			goto error;
> -		if (iocb->ki_flags & IOCB_WAITQ) {
> -			if (!first)
> -				goto error;
> -			error = __lock_page_async(page, iocb->ki_waitq);
> -			if (error)
> -				goto error;
> -		} else {
> +		if (!(iocb->ki_flags & IOCB_WAITQ)) {
>  			put_and_wait_on_page_locked(page, TASK_KILLABLE);
>  			return AOP_TRUNCATED_PAGE;
>  		}
> +		if (!first)
> +			goto error;
> +		error = __lock_page_async(page, iocb->ki_waitq);
> +		if (error)
> +			goto error;
>  	}

I see where you're going there.  OK.

>  
> +	error = AOP_TRUNCATED_PAGE;
>  	if (!page->mapping)
> -		goto truncated;
> -	if (filemap_range_uptodate(iocb, mapping, iter, page)) {
> -		unlock_page(page);
> -		return 0;
> -	}
> +		goto error_unlock_page;
>  
> -	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
> -		unlock_page(page);
> +	if (!filemap_range_uptodate(iocb, mapping, iter, page)) {
>  		error = -EAGAIN;
> -	} else {
> +		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
> +			goto error_unlock_page;
>  		error = filemap_read_page(iocb->ki_filp, mapping, page);
> -		if (!error)
> -			return 0;
> +		if (error)
> +			goto error;
> +		return 0; /* filemap_read_page unlocks the page */
>  	}
> +
> +	unlock_page(page);
> +	return 0;
> +
> +error_unlock_page:
> +	unlock_page(page);
>  error:
>  	put_page(page);
>  	return error;
> -truncated:
> -	unlock_page(page);
> -	put_page(page);
> -	return AOP_TRUNCATED_PAGE;
>  }

There's something niggling at me about this change ... I'll play with
it a bit.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage
  2020-11-03  8:00   ` Christoph Hellwig
@ 2020-11-03 15:24     ` Matthew Wilcox
  2020-11-03 17:13       ` Christoph Hellwig
  0 siblings, 1 reply; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 15:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 09:00:45AM +0100, Christoph Hellwig wrote:
> On Mon, Nov 02, 2020 at 06:43:10PM +0000, Matthew Wilcox (Oracle) wrote:
> > We don't need to get the page lock again; we just need to wait for
> > the I/O to finish, so use wait_on_page_locked_killable() like the
> > other callers of ->readpage.
> 
> As that isn't entirely obvious, what about adding a comment next to
> the wait_on_page_locked_killable call to document it?

The other callers of ->readpage don't document that, so not sure why
we should here?

> > +	error = wait_on_page_locked_killable(page);
> >  	if (error)
> >  		return error;
> > +	if (PageUptodate(page))
> > +		return 0;
> > +	if (!page->mapping)	/* page truncated */
> > +		return AOP_TRUNCATED_PAGE;
> > +	shrink_readahead_size_eio(&file->f_ra);
> > +	return -EIO;
> 
> You might notice a theme here, but I hate having the fast path exit
> early without a good reason, so I'd be much happier with:
> 
> 	if (!PageUptodate(page)) {
> 		if (!page->mapping)	/* page truncated */
> 			return AOP_TRUNCATED_PAGE;
> 		shrink_readahead_size_eio(&file->f_ra);
> 		return -EIO;
> 	}
> 	return 0;

I'm just not a fan of unnecessary indentation.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 14/17] mm/filemap: Restructure filemap_get_pages
  2020-11-03 14:46     ` Matthew Wilcox
@ 2020-11-03 15:29       ` Christoph Hellwig
  0 siblings, 0 replies; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03 15:29 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 02:46:19PM +0000, Matthew Wilcox wrote:
> On Tue, Nov 03, 2020 at 08:57:36AM +0100, Christoph Hellwig wrote:
> > On Mon, Nov 02, 2020 at 06:43:09PM +0000, Matthew Wilcox (Oracle) wrote:
> > > Avoid a goto, and by the time we get to calling filemap_update_page(),
> > > we definitely have at least one page.
> > 
> > I find the error handling flow hard to follow and the existing but
> > heavily touched naming of the nr_got variable and the find_pages label
> > not helpful.  I'd do the following on top of this patch:
> 
> I've removed nr_got entirely in my current tree ...

Even better.  The pagevec usage looks pretty nice!

> +static void mapping_get_read_thps(struct address_space *mapping,

Maybe call this pagevec_get_read_thps?

> +	pgoff_t maxindex = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
>  	struct page *page;
> +	int err = 0;
>  
>  find_page:
>  	if (fatal_signal_pending(current))
>  		return -EINTR;
>  
> +	pagevec_init(pvec);
> +	mapping_get_read_thps(mapping, index, maxindex, pvec);
> +	if (!pagevec_count(pvec)) {
>  		if (iocb->ki_flags & IOCB_NOIO)
>  			return -EAGAIN;
>  		page_cache_sync_readahead(mapping, ra, filp, index,
> +				maxindex - index);
> +		mapping_get_read_thps(mapping, index, maxindex, pvec);
>  	}
> +	if (!pagevec_count(pvec)) {
>  		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
>  			return -EAGAIN;
> +		page = filemap_create_page(filp, mapping,
>  				iocb->ki_pos >> PAGE_SHIFT);
> +		if (!page)
>  			goto find_page;
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +		pagevec_add(pvec, page);
> +		return 0;

I'd pass the pagevec to filemap_create_page and just return an errno,
which should be a little easier to follow.

> -	page = pages[nr_got - 1];
> +	page = pvec->pages[pagevec_count(pvec) - 1];

I wonder if a pagevec_last_page() helper would be neat for things
like this? (might be a bit of over engineering..)

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-03 15:18     ` Matthew Wilcox
@ 2020-11-03 15:31       ` Christoph Hellwig
  2020-11-03 15:42         ` Matthew Wilcox
  0 siblings, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03 15:31 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 03:18:47PM +0000, Matthew Wilcox wrote:
> I have a simplification in mind that gets rid of the awkward 'first'
> parameter.  In filemap_get_pages(), do:
> 
>                 if ((iocb->ki_flags & IOCB_WAITQ) && (pagevec_count(pvec) > 1))
>                         iocb->ki_flags |= IOCB_NOWAIT;
> 
> before calling filemap_update_page().  That matches what Kent did in
> filemap_read():

Yes, that makes a lot of sense.  No need for the second pair of inner
braces, though :)

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 11/17] mm/filemap: Add filemap_range_uptodate
  2020-11-03 15:31       ` Christoph Hellwig
@ 2020-11-03 15:42         ` Matthew Wilcox
  0 siblings, 0 replies; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 15:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 04:31:18PM +0100, Christoph Hellwig wrote:
> On Tue, Nov 03, 2020 at 03:18:47PM +0000, Matthew Wilcox wrote:
> > I have a simplification in mind that gets rid of the awkward 'first'
> > parameter.  In filemap_get_pages(), do:
> > 
> >                 if ((iocb->ki_flags & IOCB_WAITQ) && (pagevec_count(pvec) > 1))
> >                         iocb->ki_flags |= IOCB_NOWAIT;
> > 
> > before calling filemap_update_page().  That matches what Kent did in
> > filemap_read():
> 
> Yes, that makes a lot of sense.  No need for the second pair of inner
> braces, though :)

I wrote it as
		if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1)
originally, and then talked myself into putting the unnecessary brackets
in to make it look more symmetric ;-)

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage
  2020-11-03 15:24     ` Matthew Wilcox
@ 2020-11-03 17:13       ` Christoph Hellwig
  2020-11-03 18:55         ` Matthew Wilcox
  0 siblings, 1 reply; 66+ messages in thread
From: Christoph Hellwig @ 2020-11-03 17:13 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 03:24:36PM +0000, Matthew Wilcox wrote:
> On Tue, Nov 03, 2020 at 09:00:45AM +0100, Christoph Hellwig wrote:
> > On Mon, Nov 02, 2020 at 06:43:10PM +0000, Matthew Wilcox (Oracle) wrote:
> > > We don't need to get the page lock again; we just need to wait for
> > > the I/O to finish, so use wait_on_page_locked_killable() like the
> > > other callers of ->readpage.
> > 
> > As that isn't entirely obvious, what about adding a comment next to
> > the wait_on_page_locked_killable call to document it?
> 
> The other callers of ->readpage don't document that, so not sure why
> we should here?

The callers of ->readpage are a mess :(

Many use lock_page or trylock_page, one uses wait_on_page_locked directly,
another one uses the wait_on_page_read helper.  I think we need a good
helper here, and I think it looks a lot like filemap_read_page in your
tree..

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage
  2020-11-03 17:13       ` Christoph Hellwig
@ 2020-11-03 18:55         ` Matthew Wilcox
  0 siblings, 0 replies; 66+ messages in thread
From: Matthew Wilcox @ 2020-11-03 18:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, linux-mm, kent.overstreet

On Tue, Nov 03, 2020 at 06:13:56PM +0100, Christoph Hellwig wrote:
> On Tue, Nov 03, 2020 at 03:24:36PM +0000, Matthew Wilcox wrote:
> > On Tue, Nov 03, 2020 at 09:00:45AM +0100, Christoph Hellwig wrote:
> > > On Mon, Nov 02, 2020 at 06:43:10PM +0000, Matthew Wilcox (Oracle) wrote:
> > > > We don't need to get the page lock again; we just need to wait for
> > > > the I/O to finish, so use wait_on_page_locked_killable() like the
> > > > other callers of ->readpage.
> > > 
> > > As that isn't entirely obvious, what about adding a comment next to
> > > the wait_on_page_locked_killable call to document it?
> > 
> > The other callers of ->readpage don't document that, so not sure why
> > we should here?
> 
> The callers of ->readpage are a mess :(
> 
> Many use lock_page or trylock_page, one uses wait_on_page_locked directly,
> another one uses the wait_on_page_read helper.  I think we need a good
> helper here, and I think it looks a lot like filemap_read_page in your
> tree..

Oh, heh.  Turns out I wrote this in a patch series the other day ...

static int mapping_readpage(struct file *file, struct address_space *mapping,
                struct page *page, bool synchronous)
{
        struct readahead_control ractl = {
                .file = file,
                .mapping = mapping,
                ._index = page->index,
                ._nr_pages = 1,
        };
        int ret;

        if (!synchronous && mapping->a_ops->readahead) {
                mapping->a_ops->readahead(&ractl);
                return 0;
        }

        ret = mapping->a_ops->readpage(file, page);
        if (ret != AOP_UPDATED_PAGE)
                return ret;
        unlock_page(page);
        return 0;
}

(it's for swap_readpage, which is generally called in an async manner ...
and really doesn't handle errors at all well)

This does illustrate that we need the mapping argument.  Some of the
callers of ->readpage need to pass a NULL file pointer, so we can't
get it from file->f_mapping.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set
  2020-11-02 18:43 ` [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
  2020-11-02 19:17   ` Kent Overstreet
  2020-11-03  7:31   ` Christoph Hellwig
@ 2020-11-05  7:22   ` Nikolay Borisov
  2 siblings, 0 replies; 66+ messages in thread
From: Nikolay Borisov @ 2020-11-05  7:22 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), linux-fsdevel, linux-mm; +Cc: hch, kent.overstreet



On 2.11.20 г. 20:43 ч., Matthew Wilcox (Oracle) wrote:
> The readpage operation can block in many (most?) filesystems, so we
> should punt to a work queue instead of calling it.  This was the last
> caller of lock_page_async(), so remove it.

You mention lock_page_async, yet you remove lock_page_for_iocb.

> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/filemap.c | 14 ++------------
>  1 file changed, 2 insertions(+), 12 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 7a4101ceb106..5bafd2dc830c 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2152,16 +2152,6 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
>  	ra->ra_pages /= 4;
>  }
>  
> -static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
> -{
> -	if (iocb->ki_flags & IOCB_WAITQ)
> -		return lock_page_async(page, iocb->ki_waitq);
> -	else if (iocb->ki_flags & IOCB_NOWAIT)
> -		return trylock_page(page) ? 0 : -EAGAIN;
> -	else
> -		return lock_page_killable(page);
> -}
> -
>  static unsigned mapping_get_read_thps(struct address_space *mapping,
>  		pgoff_t index, unsigned int nr_pages, struct page **pages)
>  {
> @@ -2210,7 +2200,7 @@ static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
>  	struct file_ra_state *ra = &filp->f_ra;
>  	int error;
>  
> -	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
> +	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) {
>  		unlock_page(page);
>  		put_page(page);
>  		return ERR_PTR(-EAGAIN);
> @@ -2231,7 +2221,7 @@ static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
>  	}
>  
>  	if (!PageUptodate(page)) {
> -		error = lock_page_for_iocb(iocb, page);
> +		error = lock_page_killable(page);
>  		if (unlikely(error)) {
>  			put_page(page);
>  			return ERR_PTR(error);
> 

^ permalink raw reply	[flat|nested] 66+ messages in thread

end of thread, other threads:[~2020-11-05  7:23 UTC | newest]

Thread overview: 66+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-02 18:42 [PATCH 00/17] Refactor generic_file_buffered_read Matthew Wilcox (Oracle)
2020-11-02 18:42 ` [PATCH 01/17] mm/filemap: Rename generic_file_buffered_read subfunctions Matthew Wilcox (Oracle)
2020-11-02 18:53   ` Kent Overstreet
2020-11-03  7:27   ` Christoph Hellwig
2020-11-03 14:52     ` Matthew Wilcox
2020-11-03 15:02       ` Amy Parker
2020-11-03 15:02         ` Amy Parker
2020-11-02 18:42 ` [PATCH 02/17] mm/filemap: Use THPs in generic_file_buffered_read Matthew Wilcox (Oracle)
2020-11-02 18:55   ` Kent Overstreet
2020-11-03  7:28   ` Christoph Hellwig
2020-11-02 18:42 ` [PATCH 03/17] mm/filemap: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
2020-11-02 18:56   ` Kent Overstreet
2020-11-03  7:28   ` Christoph Hellwig
2020-11-02 18:42 ` [PATCH 04/17] mm/filemap: Support readpage splitting a page Matthew Wilcox (Oracle)
2020-11-02 19:00   ` Kent Overstreet
2020-11-02 19:03     ` Matthew Wilcox
2020-11-02 19:12   ` Kent Overstreet
2020-11-03  7:31   ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 05/17] mm/filemap: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
2020-11-02 19:00   ` Kent Overstreet
2020-11-03  7:31   ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 06/17] mm/filemap: Don't call ->readpage if IOCB_WAITQ is set Matthew Wilcox (Oracle)
2020-11-02 19:17   ` Kent Overstreet
2020-11-03  7:31   ` Christoph Hellwig
2020-11-05  7:22   ` Nikolay Borisov
2020-11-02 18:43 ` [PATCH 07/17] mm/filemap: Change filemap_read_page calling conventions Matthew Wilcox (Oracle)
2020-11-02 19:37   ` Kent Overstreet
2020-11-03  7:34   ` Christoph Hellwig
2020-11-03 15:11     ` Matthew Wilcox
2020-11-02 18:43 ` [PATCH 08/17] mm/filemap: Change filemap_create_page arguments Matthew Wilcox (Oracle)
2020-11-02 19:38   ` Kent Overstreet
2020-11-03  7:35   ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 09/17] mm/filemap: Convert filemap_update_page to return an errno Matthew Wilcox (Oracle)
2020-11-02 19:39   ` Kent Overstreet
2020-11-03  7:36   ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 10/17] mm/filemap: Move the iocb checks into filemap_update_page Matthew Wilcox (Oracle)
2020-11-02 19:45   ` Kent Overstreet
2020-11-03  7:41   ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 11/17] mm/filemap: Add filemap_range_uptodate Matthew Wilcox (Oracle)
2020-11-02 19:50   ` Kent Overstreet
2020-11-02 20:09     ` Matthew Wilcox
2020-11-03  7:49   ` Christoph Hellwig
2020-11-03 15:18     ` Matthew Wilcox
2020-11-03 15:31       ` Christoph Hellwig
2020-11-03 15:42         ` Matthew Wilcox
2020-11-02 18:43 ` [PATCH 12/17] mm/filemap: Split filemap_readahead out of filemap_get_pages Matthew Wilcox (Oracle)
2020-11-02 19:52   ` Kent Overstreet
2020-11-02 18:43 ` [PATCH 13/17] mm/filemap: Remove parameters from filemap_update_page() Matthew Wilcox (Oracle)
2020-11-02 19:52   ` Kent Overstreet
2020-11-03  7:50   ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 14/17] mm/filemap: Restructure filemap_get_pages Matthew Wilcox (Oracle)
2020-11-02 20:05   ` Kent Overstreet
2020-11-03  7:57   ` Christoph Hellwig
2020-11-03 14:46     ` Matthew Wilcox
2020-11-03 15:29       ` Christoph Hellwig
2020-11-02 18:43 ` [PATCH 15/17] mm/filemap: Don't relock the page after calling readpage Matthew Wilcox (Oracle)
2020-11-02 20:06   ` Kent Overstreet
2020-11-03  8:00   ` Christoph Hellwig
2020-11-03 15:24     ` Matthew Wilcox
2020-11-03 17:13       ` Christoph Hellwig
2020-11-03 18:55         ` Matthew Wilcox
2020-11-02 18:43 ` [PATCH 16/17] mm/filemap: rename generic_file_buffered_read to filemap_read Matthew Wilcox (Oracle)
2020-11-02 20:06   ` Kent Overstreet
2020-11-02 18:43 ` [PATCH 17/17] mm: simplify generic_file_read_iter Matthew Wilcox (Oracle)
2020-11-02 20:07   ` Kent Overstreet
2020-11-02 20:14 ` [PATCH 00/17] Refactor generic_file_buffered_read Kent Overstreet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.