ocfs2-devel.oss.oracle.com archive mirror
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J. Wong" <djwong@kernel.org>
Cc: Jan Kara <jack@suse.cz>,
	Andreas Gruenbacher <agruenba@redhat.com>,
	linux-kernel@vger.kernel.org, cluster-devel@redhat.com,
	linux-fsdevel@vger.kernel.org, ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH v6 18/19] gfs2: Fix mmap + page fault deadlocks for direct I/O
Date: Thu, 19 Aug 2021 21:41:01 +0200	[thread overview]
Message-ID: <20210819194102.1491495-19-agruenba@redhat.com> (raw)
In-Reply-To: <20210819194102.1491495-1-agruenba@redhat.com>

Also disable page faults during direct I/O requests and implement the
same kind of retry logic as in the buffered I/O case.

Direct I/O requests differ from buffered I/O requests in that they use
bio_iov_iter_get_pages for grabbing page references and faulting in
pages instead of triggering physical page faults.  Those manual page
faults can be disabled with the new iocb->nofault flag.

This kind of locking problem in gfs2 was originally reported by Jan
Kara.  Linus came up with the proposal to disable page faults.  Many
thanks to Al Viro and Matthew Wilcox for their feedback.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 99 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 12 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8ca6bdba907c..2cf3466b9dde 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -811,22 +811,64 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
 {
 	struct file *file = iocb->ki_filp;
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	size_t count = iov_iter_count(to);
+	size_t prev_count = 0, window_size = 0;
+	size_t written = 0;
 	ssize_t ret;
 
-	if (!count)
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and retry.
+	 *
+	 * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
+	 * physical as well as manual page faults, and we need to disable both
+	 * kinds.
+	 *
+	 * For direct I/O, gfs2 takes the inode glock in deferred mode.  This
+	 * locking mode is compatible with other deferred holders, so multiple
+	 * processes and nodes can do direct I/O to a file at the same time.
+	 * There's no guarantee that reads or writes will be atomic.  Any
+	 * coordination among readers and writers needs to happen externally.
+	 */
+
+	if (!iov_iter_count(to))
 		return 0; /* skip atime */
 
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
+retry_under_glock:
+	pagefault_disable();
+	to->nofault = true;
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
+			   IOMAP_DIO_PARTIAL, written);
+	to->nofault = false;
+	pagefault_enable();
+	if (ret > 0)
+		written = ret;
+
+	if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) &&
+	    should_fault_in_pages(to, &prev_count, &window_size)) {
+		size_t leftover;
 
-	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
-	gfs2_glock_dq(gh);
+		gfs2_holder_allow_demote(gh);
+		leftover = fault_in_iov_iter_writeable(to, window_size);
+		gfs2_holder_disallow_demote(gh);
+		if (leftover != window_size) {
+			if (!gfs2_holder_queued(gh))
+				goto retry;
+			goto retry_under_glock;
+		}
+	}
+	if (gfs2_holder_queued(gh))
+		gfs2_glock_dq(gh);
 out_uninit:
 	gfs2_holder_uninit(gh);
-	return ret;
+	if (ret < 0)
+		return ret;
+	return written;
 }
 
 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -835,10 +877,19 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
-	size_t len = iov_iter_count(from);
-	loff_t offset = iocb->ki_pos;
+	size_t prev_count = 0, window_size = 0;
+	size_t read = 0;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and retry.
+	 *
+	 * For writes, iomap_dio_rw only triggers manual page faults, so we
+	 * don't need to disable physical ones.
+	 */
+
 	/*
 	 * Deferred lock, even if its a write, since we do no allocation on
 	 * this path. All we need to change is the atime, and this lock mode
@@ -848,22 +899,46 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	 * VFS does.
 	 */
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
-
+retry_under_glock:
 	/* Silently fall back to buffered I/O when writing beyond EOF */
-	if (offset + len > i_size_read(&ip->i_inode))
+	if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
 		goto out;
 
-	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
+	from->nofault = true;
+	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
+			   IOMAP_DIO_PARTIAL, read);
+	from->nofault = false;
+
 	if (ret == -ENOTBLK)
 		ret = 0;
+	if (ret > 0)
+		read = ret;
+
+	if (unlikely(iov_iter_count(from) && (ret > 0 || ret == -EFAULT)) &&
+	    should_fault_in_pages(from, &prev_count, &window_size)) {
+		size_t leftover;
+
+		gfs2_holder_allow_demote(gh);
+		leftover = fault_in_iov_iter_readable(from, window_size);
+		gfs2_holder_disallow_demote(gh);
+		if (leftover != window_size) {
+			if (!gfs2_holder_queued(gh))
+				goto retry;
+			goto retry_under_glock;
+		}
+	}
 out:
-	gfs2_glock_dq(gh);
+	if (gfs2_holder_queued(gh))
+		gfs2_glock_dq(gh);
 out_uninit:
 	gfs2_holder_uninit(gh);
-	return ret;
+	if (ret < 0)
+		return ret;
+	return read;
 }
 
 static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-- 
2.26.3


_______________________________________________
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel

  parent reply	other threads:[~2021-08-19 19:42 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-19 19:40 [Ocfs2-devel] [PATCH v6 00/19] gfs2: Fix mmap + page fault deadlocks Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 01/19] iov_iter: Fix iov_iter_get_pages{, _alloc} page fault return value Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 02/19] powerpc/kvm: Fix kvm_use_magic_page Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 03/19] Turn fault_in_pages_{readable, writeable} into fault_in_{readable, writeable} Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 04/19] Turn iov_iter_fault_in_readable into fault_in_iov_iter_readable Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 05/19] iov_iter: Introduce fault_in_iov_iter_writeable Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 06/19] gfs2: Add wrapper for iomap_file_buffered_write Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 07/19] gfs2: Clean up function may_grant Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 08/19] gfs2: Eliminate vestigial HIF_FIRST Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 09/19] gfs2: Remove redundant check from gfs2_glock_dq Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 10/19] gfs2: Introduce flag for glock holder auto-demotion Andreas Gruenbacher
2021-08-20  9:35   ` [Ocfs2-devel] [Cluster-devel] " Steven Whitehouse
2021-08-20 13:11     ` Bob Peterson
2021-08-20 13:41       ` Steven Whitehouse
2021-08-20 15:22       ` Andreas Gruenbacher
2021-08-23  8:14         ` Steven Whitehouse
2021-08-23 15:18           ` Andreas Gruenbacher
2021-08-23 16:05             ` Matthew Wilcox
2021-08-23 16:36               ` Bob Peterson
2021-08-23 19:12               ` Andreas Gruenbacher
2021-08-24  7:59             ` Steven Whitehouse
2021-08-20 13:17     ` Andreas Gruenbacher
2021-08-20 13:47       ` Steven Whitehouse
2021-08-20 14:43         ` Andreas Grünbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 11/19] gfs2: Move the inode glock locking to gfs2_file_buffered_write Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 12/19] gfs2: Fix mmap + page fault deadlocks for buffered I/O Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 13/19] iomap: Fix iomap_dio_rw return value for user copies Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 14/19] iomap: Support partial direct I/O on user copy failures Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 15/19] iomap: Add done_before argument to iomap_dio_rw Andreas Gruenbacher
2021-08-19 19:40 ` [Ocfs2-devel] [PATCH v6 16/19] gup: Introduce FOLL_NOFAULT flag to disable page faults Andreas Gruenbacher
2021-08-19 19:41 ` [Ocfs2-devel] [PATCH v6 17/19] iov_iter: Introduce nofault " Andreas Gruenbacher
2021-08-19 19:41 ` Andreas Gruenbacher [this message]
2021-08-19 19:41 ` [Ocfs2-devel] [PATCH v6 19/19] gfs2: Eliminate ip->i_gh Andreas Gruenbacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210819194102.1491495-19-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    --cc=cluster-devel@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --subject='Re: [Ocfs2-devel] [PATCH v6 18/19] gfs2: Fix mmap + page fault deadlocks for direct I/O' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).