linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: Theodore Ts'o <tytso@mit.edu>
Cc: Andreas Gruenbacher <agruenba@redhat.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Paul Mackerras <paulus@ozlabs.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J. Wong" <djwong@kernel.org>, Jan Kara <jack@suse.cz>,
	Matthew Wilcox <willy@infradead.org>,
	cluster-devel@redhat.com, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, ocfs2-devel@oss.oracle.com,
	kvm-ppc@vger.kernel.org, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v8 00/17] gfs2: Fix mmap + page fault deadlocks
Date: Wed, 27 Oct 2021 23:21:38 +0200	[thread overview]
Message-ID: <20211027212138.3722977-1-agruenba@redhat.com> (raw)
In-Reply-To: <20211026094430.3669156-1-agruenba@redhat.com>

One of the arguments against Dave Hansen's patch that eliminates the
pre-faulting was that it doubles the number of page faults in the slow
case.  This can be avoided by using get_user_pages() to do the
"post-faulting", though.  For what it's worth, here's a patch for that
(on top of this series).

Andreas

--

fs: Avoid predictable page faults for sys_write() user buffer pages

Introduce a new fault_in_iov_iter_slow_readable() helper for faulting in
an iterator via get_user_pages() instead of triggering page faults.
This is slower than a simple memory read when the underlying pages are
resident, but avoids the page fault overhead when the underlying pages
need to be faulted in.

Use fault_in_iov_iter_slow_readable() in generic_perform_write and
iomap_write_iter when reading from the user buffer fails.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/iomap/buffered-io.c  |  2 +-
 include/linux/pagemap.h |  3 ++-
 include/linux/uio.h     | 17 ++++++++++++++++-
 lib/iov_iter.c          | 10 ++++++----
 mm/filemap.c            |  2 +-
 mm/gup.c                | 10 ++++++----
 6 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d8809cd9ab31..15a0b4bb9528 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -770,7 +770,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 				bytes = copied;
 				goto again;
 			}
-			if (fault_in_iov_iter_readable(i, bytes) != bytes)
+			if (fault_in_iov_iter_slow_readable(i, bytes) != bytes)
 				goto again;
 			status = -EFAULT;
 			break;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2f7dd14083d9..43844ed5675f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -736,8 +736,9 @@ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
  * Fault in userspace address range.
  */
 size_t fault_in_writeable(char __user *uaddr, size_t size);
-size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
 size_t fault_in_readable(const char __user *uaddr, size_t size);
+size_t __fault_in_slow(const char __user *uaddr, size_t size,
+		       unsigned int flags);
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6350354f97e9..b071f4445059 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/thread_info.h>
 #include <uapi/linux/uio.h>
+#include <linux/mm.h>
 
 struct page;
 struct pipe_inode_info;
@@ -135,7 +136,21 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
-size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
+size_t __fault_in_iov_iter_slow(const struct iov_iter *i, size_t bytes,
+				unsigned int flags);
+
+static inline size_t fault_in_iov_iter_slow_readable(const struct iov_iter *i,
+						     size_t bytes)
+{
+	return __fault_in_iov_iter_slow(i, bytes, 0);
+}
+
+static inline size_t fault_in_iov_iter_writeable(const struct iov_iter *i,
+						 size_t bytes)
+{
+	return __fault_in_iov_iter_slow(i, bytes, FOLL_WRITE);
+}
+
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 66a740e6e153..73789a5409f6 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -468,9 +468,10 @@ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
 EXPORT_SYMBOL(fault_in_iov_iter_readable);
 
 /*
- * fault_in_iov_iter_writeable - fault in iov iterator for writing
+ * __fault_in_iov_iter_slow - fault in iov iterator for reading/writing
  * @i: iterator
  * @size: maximum length
+ * @flags: FOLL_* flags (FOLL_WRITE for writing)
  *
  * Faults in the iterator using get_user_pages(), i.e., without triggering
  * hardware page faults.  This is primarily useful when we already know that
@@ -481,7 +482,8 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
  *
  * Always returns 0 for non-user-space iterators.
  */
-size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
+size_t __fault_in_iov_iter_slow(const struct iov_iter *i, size_t size,
+				unsigned int flags)
 {
 	if (iter_is_iovec(i)) {
 		size_t count = min(size, iov_iter_count(i));
@@ -495,7 +497,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
 
 			if (unlikely(!len))
 				continue;
-			ret = fault_in_safe_writeable(p->iov_base + skip, len);
+			ret = __fault_in_slow(p->iov_base + skip, len, flags);
 			count -= len - ret;
 			if (ret)
 				break;
@@ -504,7 +506,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(fault_in_iov_iter_writeable);
+EXPORT_SYMBOL(__fault_in_iov_iter_slow);
 
 void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
diff --git a/mm/filemap.c b/mm/filemap.c
index 467cdb7d086d..7ca76f4aa974 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3787,7 +3787,7 @@ ssize_t generic_perform_write(struct file *file,
 				bytes = copied;
 				goto again;
 			}
-			if (fault_in_iov_iter_readable(i, bytes) != bytes)
+			if (fault_in_iov_iter_slow_readable(i, bytes) != bytes)
 				goto again;
 			status = -EFAULT;
 			break;
diff --git a/mm/gup.c b/mm/gup.c
index e1c7e4bde11f..def9f478a621 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1694,9 +1694,10 @@ size_t fault_in_writeable(char __user *uaddr, size_t size)
 EXPORT_SYMBOL(fault_in_writeable);
 
 /*
- * fault_in_safe_writeable - fault in an address range for writing
+ * __fault_in_slow - fault in an address range for reading/writing
  * @uaddr: start of address range
  * @size: length of address range
+ * @flags: FOLL_* flags (FOLL_WRITE for writing)
  *
  * Faults in an address range using get_user_pages, i.e., without triggering
  * hardware page faults.  This is primarily useful when we already know that
@@ -1711,7 +1712,8 @@ EXPORT_SYMBOL(fault_in_writeable);
  * Returns the number of bytes not faulted in, like copy_to_user() and
  * copy_from_user().
  */
-size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
+size_t __fault_in_slow(const char __user *uaddr, size_t size,
+		       unsigned int flags)
 {
 	unsigned long start = (unsigned long)untagged_addr(uaddr);
 	unsigned long end, nstart, nend;
@@ -1743,7 +1745,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 		nr_pages = (nend - nstart) / PAGE_SIZE;
 		ret = __get_user_pages_locked(mm, nstart, nr_pages,
 					      NULL, NULL, &locked,
-					      FOLL_TOUCH | FOLL_WRITE);
+					      FOLL_TOUCH | flags);
 		if (ret <= 0)
 			break;
 		nend = nstart + ret * PAGE_SIZE;
@@ -1754,7 +1756,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 		return 0;
 	return size - min_t(size_t, nstart - start, size);
 }
-EXPORT_SYMBOL(fault_in_safe_writeable);
+EXPORT_SYMBOL(__fault_in_slow);
 
 /**
  * fault_in_readable - fault in userspace address range for reading
-- 
2.26.3


      parent reply	other threads:[~2021-10-27 21:23 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-19 13:41 [PATCH v8 00/17] gfs2: Fix mmap + page fault deadlocks Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 01/17] iov_iter: Fix iov_iter_get_pages{,_alloc} page fault return value Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 02/17] powerpc/kvm: Fix kvm_use_magic_page Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 03/17] gup: Turn fault_in_pages_{readable,writeable} into fault_in_{readable,writeable} Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 04/17] iov_iter: Turn iov_iter_fault_in_readable into fault_in_iov_iter_readable Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 05/17] iov_iter: Introduce fault_in_iov_iter_writeable Andreas Gruenbacher
2021-10-20 16:25   ` Catalin Marinas
2021-10-19 13:41 ` [PATCH v8 06/17] gfs2: Add wrapper for iomap_file_buffered_write Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 07/17] gfs2: Clean up function may_grant Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 08/17] gfs2: Introduce flag for glock holder auto-demotion Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 09/17] gfs2: Move the inode glock locking to gfs2_file_buffered_write Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 10/17] gfs2: Eliminate ip->i_gh Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 11/17] gfs2: Fix mmap + page fault deadlocks for buffered I/O Andreas Gruenbacher
2021-10-19 13:41 ` [PATCH v8 12/17] iomap: Fix iomap_dio_rw return value for user copies Andreas Gruenbacher
2021-10-19 13:42 ` [PATCH v8 13/17] iomap: Support partial direct I/O on user copy failures Andreas Gruenbacher
2021-10-19 13:42 ` [PATCH v8 14/17] iomap: Add done_before argument to iomap_dio_rw Andreas Gruenbacher
2021-10-19 15:51   ` Darrick J. Wong
2021-10-19 19:30     ` Andreas Gruenbacher
2021-10-20  1:57       ` Darrick J. Wong
2021-10-19 13:42 ` [PATCH v8 15/17] gup: Introduce FOLL_NOFAULT flag to disable page faults Andreas Gruenbacher
2021-10-19 13:42 ` [PATCH v8 16/17] iov_iter: Introduce nofault " Andreas Gruenbacher
2021-10-19 13:42 ` [PATCH v8 17/17] gfs2: Fix mmap + page fault deadlocks for direct I/O Andreas Gruenbacher
2021-10-19 15:40 ` [PATCH v8 00/17] gfs2: Fix mmap + page fault deadlocks Linus Torvalds
2021-10-19 16:00   ` Bob Peterson
2021-10-20 16:36   ` Catalin Marinas
2021-10-20 20:11     ` Linus Torvalds
2021-10-20 22:44       ` Catalin Marinas
2021-10-21  6:19         ` Linus Torvalds
2021-10-22 18:06           ` Catalin Marinas
2021-10-22 19:22             ` Linus Torvalds
2021-10-25 19:00               ` Andreas Gruenbacher
2021-10-26 18:24                 ` Catalin Marinas
2021-10-26 18:50                   ` Linus Torvalds
2021-10-26 19:18                     ` Linus Torvalds
2021-10-27 19:13                     ` Catalin Marinas
2021-10-27 21:14                       ` Linus Torvalds
2021-10-28 21:20                         ` Catalin Marinas
2021-10-28 21:40                           ` Catalin Marinas
2021-10-28 22:15                           ` Andreas Grünbacher
2021-10-29 12:50                             ` Catalin Marinas
2021-10-28 22:32                           ` Linus Torvalds
2021-10-29 17:50                             ` Catalin Marinas
2021-10-29 18:47                               ` Linus Torvalds
2021-10-25 18:24             ` Andreas Gruenbacher
2021-10-26  5:12               ` Theodore Ts'o
2021-10-26  9:44               ` Andreas Gruenbacher
2021-10-27 21:21               ` Andreas Gruenbacher [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211027212138.3722977-1-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=cluster-devel@redhat.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=paulus@ozlabs.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).