All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	hannes@cmpxchg.org, tj@kernel.org, linux-fsdevel@vger.kernel.org,
	akpm@linux-foundation.org, riel@redhat.com, linux-mm@kvack.org,
	linux-btrfs@vger.kernel.org
Subject: [PATCH 2/9] mm: drop mmap_sem for page cache read IO submission
Date: Wed, 26 Sep 2018 17:08:49 -0400	[thread overview]
Message-ID: <20180926210856.7895-3-josef@toxicpanda.com> (raw)
In-Reply-To: <20180926210856.7895-1-josef@toxicpanda.com>

From: Johannes Weiner <hannes@cmpxchg.org>

Reads can take a long time, and if anybody needs to take a write lock on
the mmap_sem it'll block any subsequent readers to the mmap_sem while
the read is outstanding, which could cause long delays.  Instead drop
the mmap_sem if we do any reads at all.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 mm/filemap.c | 119 ++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 90 insertions(+), 29 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..1ed35cd99b2c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2366,6 +2366,18 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 EXPORT_SYMBOL(generic_file_read_iter);
 
 #ifdef CONFIG_MMU
+static struct file *maybe_unlock_mmap_for_io(struct vm_area_struct *vma, int flags)
+{
+	if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == FAULT_FLAG_ALLOW_RETRY) {
+		struct file *file;
+
+		file = get_file(vma->vm_file);
+		up_read(&vma->vm_mm->mmap_sem);
+		return file;
+	}
+	return NULL;
+}
+
 /**
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:	file to read
@@ -2405,23 +2417,28 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
  * Synchronous readahead happens when we don't even find
  * a page in the page cache at all.
  */
-static void do_sync_mmap_readahead(struct vm_area_struct *vma,
-				   struct file_ra_state *ra,
-				   struct file *file,
-				   pgoff_t offset)
+static int do_sync_mmap_readahead(struct vm_area_struct *vma,
+				  struct file_ra_state *ra,
+				  struct file *file,
+				  pgoff_t offset,
+				  int flags)
 {
 	struct address_space *mapping = file->f_mapping;
+	struct file *fpin;
 
 	/* If we don't want any read-ahead, don't bother */
 	if (vma->vm_flags & VM_RAND_READ)
-		return;
+		return 0;
 	if (!ra->ra_pages)
-		return;
+		return 0;
 
 	if (vma->vm_flags & VM_SEQ_READ) {
+		fpin = maybe_unlock_mmap_for_io(vma, flags);
 		page_cache_sync_readahead(mapping, ra, file, offset,
 					  ra->ra_pages);
-		return;
+		if (fpin)
+			fput(fpin);
+		return fpin ? -EAGAIN : 0;
 	}
 
 	/* Avoid banging the cache line if not needed */
@@ -2433,7 +2450,9 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
 	 * stop bothering with read-ahead. It will only hurt.
 	 */
 	if (ra->mmap_miss > MMAP_LOTSAMISS)
-		return;
+		return 0;
+
+	fpin = maybe_unlock_mmap_for_io(vma, flags);
 
 	/*
 	 * mmap read-around
@@ -2442,28 +2461,40 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
 	ra->size = ra->ra_pages;
 	ra->async_size = ra->ra_pages / 4;
 	ra_submit(ra, mapping, file);
+
+	if (fpin)
+		fput(fpin);
+
+	return fpin ? -EAGAIN : 0;
 }
 
 /*
  * Asynchronous readahead happens when we find the page and PG_readahead,
  * so we want to possibly extend the readahead further..
  */
-static void do_async_mmap_readahead(struct vm_area_struct *vma,
-				    struct file_ra_state *ra,
-				    struct file *file,
-				    struct page *page,
-				    pgoff_t offset)
+static int do_async_mmap_readahead(struct vm_area_struct *vma,
+				   struct file_ra_state *ra,
+				   struct file *file,
+				   struct page *page,
+				   pgoff_t offset,
+				   int flags)
 {
 	struct address_space *mapping = file->f_mapping;
+	struct file *fpin;
 
 	/* If we don't want any read-ahead, don't bother */
 	if (vma->vm_flags & VM_RAND_READ)
-		return;
+		return 0;
 	if (ra->mmap_miss > 0)
 		ra->mmap_miss--;
-	if (PageReadahead(page))
-		page_cache_async_readahead(mapping, ra, file,
-					   page, offset, ra->ra_pages);
+	if (!PageReadahead(page))
+		return 0;
+	fpin = maybe_unlock_mmap_for_io(vma, flags);
+	page_cache_async_readahead(mapping, ra, file,
+				   page, offset, ra->ra_pages);
+	if (fpin)
+		fput(fpin);
+	return fpin ? -EAGAIN : 0;
 }
 
 /**
@@ -2479,10 +2510,8 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
  *
  * vma->vm_mm->mmap_sem must be held on entry.
  *
- * If our return value has VM_FAULT_RETRY set, it's because
- * lock_page_or_retry() returned 0.
- * The mmap_sem has usually been released in this case.
- * See __lock_page_or_retry() for the exception.
+ * If our return value has VM_FAULT_RETRY set, the mmap_sem has
+ * usually been released.
  *
  * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
  * has not been released.
@@ -2492,11 +2521,13 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
 vm_fault_t filemap_fault(struct vm_fault *vmf)
 {
 	int error;
+	struct mm_struct *mm = vmf->vma->vm_mm;
 	struct file *file = vmf->vma->vm_file;
 	struct address_space *mapping = file->f_mapping;
 	struct file_ra_state *ra = &file->f_ra;
 	struct inode *inode = mapping->host;
 	pgoff_t offset = vmf->pgoff;
+	int flags = vmf->flags;
 	pgoff_t max_off;
 	struct page *page;
 	vm_fault_t ret = 0;
@@ -2509,27 +2540,44 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 	 * Do we have something in the page cache already?
 	 */
 	page = find_get_page(mapping, offset);
-	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
+	if (likely(page) && !(flags & FAULT_FLAG_TRIED)) {
 		/*
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
-		do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
+		error = do_async_mmap_readahead(vmf->vma, ra, file, page, offset, vmf->flags);
+		if (error == -EAGAIN)
+			goto out_retry_wait;
 	} else if (!page) {
 		/* No page in the page cache at all */
-		do_sync_mmap_readahead(vmf->vma, ra, file, offset);
-		count_vm_event(PGMAJFAULT);
-		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
+		count_vm_event(PGMAJFAULT);
+		count_memcg_event_mm(mm, PGMAJFAULT);
+		error = do_sync_mmap_readahead(vmf->vma, ra, file, offset, vmf->flags);
+		if (error == -EAGAIN)
+			goto out_retry_wait;
 retry_find:
 		page = find_get_page(mapping, offset);
 		if (!page)
 			goto no_cached_page;
 	}
 
-	if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
-		put_page(page);
-		return ret | VM_FAULT_RETRY;
+	if (!trylock_page(page)) {
+		if (flags & FAULT_FLAG_ALLOW_RETRY) {
+			if (flags & FAULT_FLAG_RETRY_NOWAIT)
+				goto out_retry;
+			up_read(&mm->mmap_sem);
+			goto out_retry_wait;
+		}
+		if (flags & FAULT_FLAG_KILLABLE) {
+			int ret = __lock_page_killable(page);
+
+			if (ret) {
+				up_read(&mm->mmap_sem);
+				goto out_retry;
+			}
+		} else
+			__lock_page(page);
 	}
 
 	/* Did it get truncated? */
@@ -2607,6 +2655,19 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 	/* Things didn't work out. Return zero to tell the mm layer so. */
 	shrink_readahead_size_eio(file, ra);
 	return VM_FAULT_SIGBUS;
+
+out_retry_wait:
+	if (page) {
+		if (flags & FAULT_FLAG_KILLABLE)
+			wait_on_page_locked_killable(page);
+		else
+			wait_on_page_locked(page);
+	}
+
+out_retry:
+	if (page)
+		put_page(page);
+	return ret | VM_FAULT_RETRY;
 }
 EXPORT_SYMBOL(filemap_fault);
 
-- 
2.14.3


  parent reply	other threads:[~2018-09-26 21:09 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-26 21:08 [RFC][PATCH 0/9][V2] drop the mmap_sem when doing IO in the fault path Josef Bacik
2018-09-26 21:08 ` [PATCH 1/9] mm: infrastructure for page fault page caching Josef Bacik
2018-09-27 16:24   ` Matthew Wilcox
2018-09-26 21:08 ` Josef Bacik [this message]
2018-09-26 21:08 ` [PATCH 3/9] mm: clean up swapcache lookup and creation function names Josef Bacik
2018-09-26 21:08 ` [PATCH 4/9] mm: drop mmap_sem for swap read IO submission Josef Bacik
2018-09-26 21:08 ` [PATCH 5/9] mm: drop the mmap_sem in all read fault cases Josef Bacik
2018-09-26 21:08 ` [PATCH 6/9] mm: use the cached page for filemap_fault Josef Bacik
2018-09-26 21:08 ` [PATCH 7/9] mm: add a flag to indicate we used a cached page Josef Bacik
2018-09-26 21:08 ` [PATCH 8/9] mm: allow ->page_mkwrite to do retries Josef Bacik
2018-09-26 21:08 ` [PATCH 9/9] btrfs: drop mmap_sem in mkwrite for btrfs Josef Bacik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180926210856.7895-3-josef@toxicpanda.com \
    --to=josef@toxicpanda.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.