All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Wait for I/O without holding a page reference
@ 2020-10-13  3:00 Matthew Wilcox (Oracle)
  2020-10-13  3:00 ` [PATCH 1/3] mm: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-13  3:00 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Matthew Wilcox (Oracle), linux-mm, Mel Gorman

The upcoming THP patchset keeps THPs Uptodate at all times unless we
hit an I/O error.  So I have a patch which induces I/O errors in 10%
of readahead I/Os in order to test the fallback path.  It hits a
problem with xfstests generic/273 which has 500 threads livelocking
trying to split the THP.  This patchset fixes that livelock and
takes 21 lines out of generic_file_buffered_read().

Matthew Wilcox (Oracle) (3):
  mm: Pass a sleep state to put_and_wait_on_page_locked
  mm/filemap: Don't hold a page reference while waiting for unlock
  mm: Inline __wait_on_page_locked_async into caller

 include/linux/pagemap.h |   3 +-
 mm/filemap.c            | 129 +++++++++++++++-------------------------
 mm/huge_memory.c        |   4 +-
 mm/migrate.c            |   4 +-
 4 files changed, 52 insertions(+), 88 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] mm: Pass a sleep state to put_and_wait_on_page_locked
  2020-10-13  3:00 [PATCH 0/3] Wait for I/O without holding a page reference Matthew Wilcox (Oracle)
@ 2020-10-13  3:00 ` Matthew Wilcox (Oracle)
  2020-10-13  3:00 ` [PATCH 2/3] mm: Don't hold a page reference while waiting for unlock Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-13  3:00 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Matthew Wilcox (Oracle), linux-mm, Mel Gorman

This is prep work for the next patch, but I think at least one of the
current callers would prefer a killable sleep to an uninterruptible one.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagemap.h | 3 +--
 mm/filemap.c            | 7 +++++--
 mm/huge_memory.c        | 4 ++--
 mm/migrate.c            | 4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 853733286138..a9ed9ac86e0f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -618,8 +618,7 @@ static inline int wait_on_page_locked_killable(struct page *page)
 	return wait_on_page_bit_killable(compound_head(page), PG_locked);
 }
 
-extern void put_and_wait_on_page_locked(struct page *page);
-
+int put_and_wait_on_page_locked(struct page *page, int state);
 void wait_on_page_writeback(struct page *page);
 extern void end_page_writeback(struct page *page);
 void wait_for_stable_page(struct page *page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 0ef06d515532..f70227941627 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1265,20 +1265,23 @@ static int wait_on_page_locked_async(struct page *page,
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
+ * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
  *
  * The caller should hold a reference on @page.  They expect the page to
  * become unlocked relatively soon, but do not wish to hold up migration
  * (for example) by holding the reference while waiting for the page to
  * come unlocked.  After this function returns, the caller should not
  * dereference @page.
+ *
+ * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal.
  */
-void put_and_wait_on_page_locked(struct page *page)
+int put_and_wait_on_page_locked(struct page *page, int state)
 {
 	wait_queue_head_t *q;
 
 	page = compound_head(page);
 	q = page_waitqueue(page);
-	wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
+	return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
 }
 
 /**
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7ff29cc3d55c..4b85729016b2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1398,7 +1398,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 		if (!get_page_unless_zero(page))
 			goto out_unlock;
 		spin_unlock(vmf->ptl);
-		put_and_wait_on_page_locked(page);
+		put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 		goto out;
 	}
 
@@ -1434,7 +1434,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 		if (!get_page_unless_zero(page))
 			goto out_unlock;
 		spin_unlock(vmf->ptl);
-		put_and_wait_on_page_locked(page);
+		put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 		goto out;
 	}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 941b89383cf3..ac18bbbd8347 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -335,7 +335,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
 	if (!get_page_unless_zero(page))
 		goto out;
 	pte_unmap_unlock(ptep, ptl);
-	put_and_wait_on_page_locked(page);
+	put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 	return;
 out:
 	pte_unmap_unlock(ptep, ptl);
@@ -369,7 +369,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
 	if (!get_page_unless_zero(page))
 		goto unlock;
 	spin_unlock(ptl);
-	put_and_wait_on_page_locked(page);
+	put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 	return;
 unlock:
 	spin_unlock(ptl);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] mm: Don't hold a page reference while waiting for unlock
  2020-10-13  3:00 [PATCH 0/3] Wait for I/O without holding a page reference Matthew Wilcox (Oracle)
  2020-10-13  3:00 ` [PATCH 1/3] mm: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
@ 2020-10-13  3:00 ` Matthew Wilcox (Oracle)
  2020-10-13 13:32   ` Matthew Wilcox
  2020-10-13  3:00 ` [PATCH 2/3] mm/filemap: " Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-13  3:00 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Matthew Wilcox (Oracle), linux-mm

In the upcoming THP patch series, if we find a !Uptodate page, it
is because of a read error.  In this case, we want to split the THP
into smaller pages so we can handle the error in as granular a fashion
as possible.  But xfstests generic/273 defeats this strategy by having
500 threads all sleeping on the same page, each waiting for their turn
to split the page.  None of them will ever succeed because splitting a
page requires that you hold the only reference to it.

To fix this, use put_and_wait_on_page_locked() to sleep without holding
a reference.  Each of the readers will then go back and retry the
page lookup.

This requires a few changes since we now get the page lock a little
earlier in generic_file_buffered_read().  This is unlikely to affect any
normal workloads as pages in the page cache are generally uptodate and
will not hit this path.  With the THP patch set and the readahead error
injector, I see about a 25% performance improvement with this patch over
an alternate approach which moves the page locking down.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 51 ++++++++++++++-------------------------------------
 1 file changed, 14 insertions(+), 37 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f70227941627..9916353f0f0d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1254,14 +1254,6 @@ static int __wait_on_page_locked_async(struct page *page,
 	return ret;
 }
 
-static int wait_on_page_locked_async(struct page *page,
-				     struct wait_page_queue *wait)
-{
-	if (!PageLocked(page))
-		return 0;
-	return __wait_on_page_locked_async(compound_head(page), wait, false);
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -2128,19 +2120,21 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					put_page(page);
 					goto out;
 				}
-				error = wait_on_page_locked_async(page,
-								iocb->ki_waitq);
+				error = lock_page_async(page, iocb->ki_waitq);
+				if (error)
+					goto readpage_error;
+			} else if (iocb->ki_flags & IOCB_NOWAIT) {
+				put_page(page);
+				goto would_block;
 			} else {
-				if (iocb->ki_flags & IOCB_NOWAIT) {
-					put_page(page);
-					goto would_block;
+				if (!trylock_page(page)) {
+					put_and_wait_on_page_locked(page,
+							TASK_KILLABLE);
+					goto find_page;
 				}
-				error = wait_on_page_locked_killable(page);
 			}
-			if (unlikely(error))
-				goto readpage_error;
 			if (PageUptodate(page))
-				goto page_ok;
+				goto uptodate;
 
 			if (inode->i_blkbits == PAGE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
@@ -2148,14 +2142,13 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 			/* pipes can't handle partially uptodate pages */
 			if (unlikely(iov_iter_is_pipe(iter)))
 				goto page_not_up_to_date;
-			if (!trylock_page(page))
-				goto page_not_up_to_date;
 			/* Did it get truncated before we got the lock? */
 			if (!page->mapping)
-				goto page_not_up_to_date_locked;
+				goto page_not_up_to_date;
 			if (!mapping->a_ops->is_partially_uptodate(page,
 							offset, iter->count))
-				goto page_not_up_to_date_locked;
+				goto page_not_up_to_date;
+uptodate:
 			unlock_page(page);
 		}
 page_ok:
@@ -2223,28 +2216,12 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 		continue;
 
 page_not_up_to_date:
-		/* Get exclusive access to the page ... */
-		if (iocb->ki_flags & IOCB_WAITQ)
-			error = lock_page_async(page, iocb->ki_waitq);
-		else
-			error = lock_page_killable(page);
-		if (unlikely(error))
-			goto readpage_error;
-
-page_not_up_to_date_locked:
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
 			put_page(page);
 			continue;
 		}
-
-		/* Did somebody else fill it already? */
-		if (PageUptodate(page)) {
-			unlock_page(page);
-			goto page_ok;
-		}
-
 readpage:
 		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
 			unlock_page(page);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] mm/filemap: Don't hold a page reference while waiting for unlock
  2020-10-13  3:00 [PATCH 0/3] Wait for I/O without holding a page reference Matthew Wilcox (Oracle)
  2020-10-13  3:00 ` [PATCH 1/3] mm: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
  2020-10-13  3:00 ` [PATCH 2/3] mm: Don't hold a page reference while waiting for unlock Matthew Wilcox (Oracle)
@ 2020-10-13  3:00 ` Matthew Wilcox (Oracle)
  2020-10-13  3:00 ` [PATCH 3/3] mm: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
  2020-10-13 13:32 ` [PATCH 0/3] Wait for I/O without holding a page reference William Kucharski
  4 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-13  3:00 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Matthew Wilcox (Oracle), linux-mm, Mel Gorman

In the upcoming THP patch series, if we find a !Uptodate page, it
is because of a read error.  In this case, we want to split the THP
into smaller pages so we can handle the error in as granular a fashion
as possible.  But xfstests generic/273 defeats this strategy by having
500 threads all sleeping on the same page, each waiting for their turn
to split the page.  None of them will ever succeed because splitting a
page requires that you hold the only reference to it.

To fix this, use put_and_wait_on_page_locked() to sleep without holding a
reference to the page.  Each of the readers will then go back and retry
the page lookup after the page is unlocked.

Since we now get the page lock a little earlier in
generic_file_buffered_read(), we can eliminate a number of duplicate
checks.  The original intent (commit ebded02788b5 ("avoid unnecessary
calls to lock_page when waiting for IO to complete during a read")
behind getting the page lock later was to avoid re-locking the page
after it has been brought uptodate by another thread.  We will still
avoid that because we go through the normal lookup path again after the
winning thread has brought the page uptodate.

Using the "fail 10% of readaheads" patch, which will induce the !Uptodate
case, I can detect no significant difference by applying this patch with
the generic/273 test.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 69 +++++++++++++++-------------------------------------
 1 file changed, 20 insertions(+), 49 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f70227941627..ac2dfa857568 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1254,14 +1254,6 @@ static int __wait_on_page_locked_async(struct page *page,
 	return ret;
 }
 
-static int wait_on_page_locked_async(struct page *page,
-				     struct wait_page_queue *wait)
-{
-	if (!PageLocked(page))
-		return 0;
-	return __wait_on_page_locked_async(compound_head(page), wait, false);
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -2128,34 +2120,37 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					put_page(page);
 					goto out;
 				}
-				error = wait_on_page_locked_async(page,
-								iocb->ki_waitq);
+				error = lock_page_async(page, iocb->ki_waitq);
+				if (error)
+					goto readpage_error;
+			} else if (iocb->ki_flags & IOCB_NOWAIT) {
+				put_page(page);
+				goto would_block;
 			} else {
-				if (iocb->ki_flags & IOCB_NOWAIT) {
-					put_page(page);
-					goto would_block;
+				if (!trylock_page(page)) {
+					put_and_wait_on_page_locked(page,
+							TASK_KILLABLE);
+					continue;
 				}
-				error = wait_on_page_locked_killable(page);
 			}
-			if (unlikely(error))
-				goto readpage_error;
 			if (PageUptodate(page))
-				goto page_ok;
+				goto uptodate;
+			if (!page->mapping) {
+				unlock_page(page);
+				put_page(page);
+				continue;
+			}
 
 			if (inode->i_blkbits == PAGE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
-				goto page_not_up_to_date;
+				goto readpage;
 			/* pipes can't handle partially uptodate pages */
 			if (unlikely(iov_iter_is_pipe(iter)))
-				goto page_not_up_to_date;
-			if (!trylock_page(page))
-				goto page_not_up_to_date;
-			/* Did it get truncated before we got the lock? */
-			if (!page->mapping)
-				goto page_not_up_to_date_locked;
+				goto readpage;
 			if (!mapping->a_ops->is_partially_uptodate(page,
 							offset, iter->count))
-				goto page_not_up_to_date_locked;
+				goto readpage;
+uptodate:
 			unlock_page(page);
 		}
 page_ok:
@@ -2221,30 +2216,6 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 			goto out;
 		}
 		continue;
-
-page_not_up_to_date:
-		/* Get exclusive access to the page ... */
-		if (iocb->ki_flags & IOCB_WAITQ)
-			error = lock_page_async(page, iocb->ki_waitq);
-		else
-			error = lock_page_killable(page);
-		if (unlikely(error))
-			goto readpage_error;
-
-page_not_up_to_date_locked:
-		/* Did it get truncated before we got the lock? */
-		if (!page->mapping) {
-			unlock_page(page);
-			put_page(page);
-			continue;
-		}
-
-		/* Did somebody else fill it already? */
-		if (PageUptodate(page)) {
-			unlock_page(page);
-			goto page_ok;
-		}
-
 readpage:
 		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
 			unlock_page(page);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] mm: Inline __wait_on_page_locked_async into caller
  2020-10-13  3:00 [PATCH 0/3] Wait for I/O without holding a page reference Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2020-10-13  3:00 ` [PATCH 2/3] mm/filemap: " Matthew Wilcox (Oracle)
@ 2020-10-13  3:00 ` Matthew Wilcox (Oracle)
  2020-10-13 13:32 ` [PATCH 0/3] Wait for I/O without holding a page reference William Kucharski
  4 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-13  3:00 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Matthew Wilcox (Oracle), linux-mm, Mel Gorman

The previous patch removed wait_on_page_locked_async(), so inline
__wait_on_page_locked_async into __lock_page_async().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 53 ++++++++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index ac2dfa857568..a3c299d6a2b6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1224,36 +1224,6 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit_killable);
 
-static int __wait_on_page_locked_async(struct page *page,
-				       struct wait_page_queue *wait, bool set)
-{
-	struct wait_queue_head *q = page_waitqueue(page);
-	int ret = 0;
-
-	wait->page = page;
-	wait->bit_nr = PG_locked;
-
-	spin_lock_irq(&q->lock);
-	__add_wait_queue_entry_tail(q, &wait->wait);
-	SetPageWaiters(page);
-	if (set)
-		ret = !trylock_page(page);
-	else
-		ret = PageLocked(page);
-	/*
-	 * If we were succesful now, we know we're still on the
-	 * waitqueue as we're still under the lock. This means it's
-	 * safe to remove and return success, we know the callback
-	 * isn't going to trigger.
-	 */
-	if (!ret)
-		__remove_wait_queue(q, &wait->wait);
-	else
-		ret = -EIOCBQUEUED;
-	spin_unlock_irq(&q->lock);
-	return ret;
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -1421,7 +1391,28 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
 
 int __lock_page_async(struct page *page, struct wait_page_queue *wait)
 {
-	return __wait_on_page_locked_async(page, wait, true);
+	struct wait_queue_head *q = page_waitqueue(page);
+	int ret = 0;
+
+	wait->page = page;
+	wait->bit_nr = PG_locked;
+
+	spin_lock_irq(&q->lock);
+	__add_wait_queue_entry_tail(q, &wait->wait);
+	SetPageWaiters(page);
+	ret = !trylock_page(page);
+	/*
+	 * If we were succesful now, we know we're still on the
+	 * waitqueue as we're still under the lock. This means it's
+	 * safe to remove and return success, we know the callback
+	 * isn't going to trigger.
+	 */
+	if (!ret)
+		__remove_wait_queue(q, &wait->wait);
+	else
+		ret = -EIOCBQUEUED;
+	spin_unlock_irq(&q->lock);
+	return ret;
 }
 
 /*
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] Wait for I/O without holding a page reference
  2020-10-13  3:00 [PATCH 0/3] Wait for I/O without holding a page reference Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2020-10-13  3:00 ` [PATCH 3/3] mm: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
@ 2020-10-13 13:32 ` William Kucharski
  4 siblings, 0 replies; 7+ messages in thread
From: William Kucharski @ 2020-10-13 13:32 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-fsdevel, linux-mm, Mel Gorman



> On Oct 12, 2020, at 9:00 PM, Matthew Wilcox (Oracle) <willy@infradead.org> wrote:
> 
> The upcoming THP patchset keeps THPs Uptodate at all times unless we
> hit an I/O error.  So I have a patch which induces I/O errors in 10%
> of readahead I/Os in order to test the fallback path.  It hits a
> problem with xfstests generic/273 which has 500 threads livelocking
> trying to split the THP.  This patchset fixes that livelock and
> takes 21 lines out of generic_file_buffered_read().
> 
> Matthew Wilcox (Oracle) (3):
>  mm: Pass a sleep state to put_and_wait_on_page_locked
>  mm/filemap: Don't hold a page reference while waiting for unlock
>  mm: Inline __wait_on_page_locked_async into caller
> 
> include/linux/pagemap.h |   3 +-
> mm/filemap.c            | 129 +++++++++++++++-------------------------
> mm/huge_memory.c        |   4 +-
> mm/migrate.c            |   4 +-
> 4 files changed, 52 insertions(+), 88 deletions(-)
> 
> -- 
> 2.28.0

For the series:

Reviewed-by: William Kucharski <william.kucharski@oracle.com>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] mm: Don't hold a page reference while waiting for unlock
  2020-10-13  3:00 ` [PATCH 2/3] mm: Don't hold a page reference while waiting for unlock Matthew Wilcox (Oracle)
@ 2020-10-13 13:32   ` Matthew Wilcox
  0 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2020-10-13 13:32 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-mm


This patch was inadvertently included.  I blame my tools.  The actual
patch 2/3 is the other one.

On Tue, Oct 13, 2020 at 04:00:06AM +0100, Matthew Wilcox (Oracle) wrote:
> In the upcoming THP patch series, if we find a !Uptodate page, it
> is because of a read error.  In this case, we want to split the THP
> into smaller pages so we can handle the error in as granular a fashion
> as possible.  But xfstests generic/273 defeats this strategy by having
> 500 threads all sleeping on the same page, each waiting for their turn
> to split the page.  None of them will ever succeed because splitting a
> page requires that you hold the only reference to it.
> 
> To fix this, use put_and_wait_on_page_locked() to sleep without holding
> a reference.  Each of the readers will then go back and retry the
> page lookup.
> 
> This requires a few changes since we now get the page lock a little
> earlier in generic_file_buffered_read().  This is unlikely to affect any
> normal workloads as pages in the page cache are generally uptodate and
> will not hit this path.  With the THP patch set and the readahead error
> injector, I see about a 25% performance improvement with this patch over
> an alternate approach which moves the page locking down.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/filemap.c | 51 ++++++++++++++-------------------------------------
>  1 file changed, 14 insertions(+), 37 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index f70227941627..9916353f0f0d 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1254,14 +1254,6 @@ static int __wait_on_page_locked_async(struct page *page,
>  	return ret;
>  }
>  
> -static int wait_on_page_locked_async(struct page *page,
> -				     struct wait_page_queue *wait)
> -{
> -	if (!PageLocked(page))
> -		return 0;
> -	return __wait_on_page_locked_async(compound_head(page), wait, false);
> -}
> -
>  /**
>   * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
>   * @page: The page to wait for.
> @@ -2128,19 +2120,21 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  					put_page(page);
>  					goto out;
>  				}
> -				error = wait_on_page_locked_async(page,
> -								iocb->ki_waitq);
> +				error = lock_page_async(page, iocb->ki_waitq);
> +				if (error)
> +					goto readpage_error;
> +			} else if (iocb->ki_flags & IOCB_NOWAIT) {
> +				put_page(page);
> +				goto would_block;
>  			} else {
> -				if (iocb->ki_flags & IOCB_NOWAIT) {
> -					put_page(page);
> -					goto would_block;
> +				if (!trylock_page(page)) {
> +					put_and_wait_on_page_locked(page,
> +							TASK_KILLABLE);
> +					goto find_page;
>  				}
> -				error = wait_on_page_locked_killable(page);
>  			}
> -			if (unlikely(error))
> -				goto readpage_error;
>  			if (PageUptodate(page))
> -				goto page_ok;
> +				goto uptodate;
>  
>  			if (inode->i_blkbits == PAGE_SHIFT ||
>  					!mapping->a_ops->is_partially_uptodate)
> @@ -2148,14 +2142,13 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  			/* pipes can't handle partially uptodate pages */
>  			if (unlikely(iov_iter_is_pipe(iter)))
>  				goto page_not_up_to_date;
> -			if (!trylock_page(page))
> -				goto page_not_up_to_date;
>  			/* Did it get truncated before we got the lock? */
>  			if (!page->mapping)
> -				goto page_not_up_to_date_locked;
> +				goto page_not_up_to_date;
>  			if (!mapping->a_ops->is_partially_uptodate(page,
>  							offset, iter->count))
> -				goto page_not_up_to_date_locked;
> +				goto page_not_up_to_date;
> +uptodate:
>  			unlock_page(page);
>  		}
>  page_ok:
> @@ -2223,28 +2216,12 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  		continue;
>  
>  page_not_up_to_date:
> -		/* Get exclusive access to the page ... */
> -		if (iocb->ki_flags & IOCB_WAITQ)
> -			error = lock_page_async(page, iocb->ki_waitq);
> -		else
> -			error = lock_page_killable(page);
> -		if (unlikely(error))
> -			goto readpage_error;
> -
> -page_not_up_to_date_locked:
>  		/* Did it get truncated before we got the lock? */
>  		if (!page->mapping) {
>  			unlock_page(page);
>  			put_page(page);
>  			continue;
>  		}
> -
> -		/* Did somebody else fill it already? */
> -		if (PageUptodate(page)) {
> -			unlock_page(page);
> -			goto page_ok;
> -		}
> -
>  readpage:
>  		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
>  			unlock_page(page);
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-10-13 13:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-13  3:00 [PATCH 0/3] Wait for I/O without holding a page reference Matthew Wilcox (Oracle)
2020-10-13  3:00 ` [PATCH 1/3] mm: Pass a sleep state to put_and_wait_on_page_locked Matthew Wilcox (Oracle)
2020-10-13  3:00 ` [PATCH 2/3] mm: Don't hold a page reference while waiting for unlock Matthew Wilcox (Oracle)
2020-10-13 13:32   ` Matthew Wilcox
2020-10-13  3:00 ` [PATCH 2/3] mm/filemap: " Matthew Wilcox (Oracle)
2020-10-13  3:00 ` [PATCH 3/3] mm: Inline __wait_on_page_locked_async into caller Matthew Wilcox (Oracle)
2020-10-13 13:32 ` [PATCH 0/3] Wait for I/O without holding a page reference William Kucharski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.