All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J. Wong" <djwong@kernel.org>
Cc: Jan Kara <jack@suse.cz>, Matthew Wilcox <willy@infradead.org>,
	cluster-devel@redhat.com, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, ocfs2-devel@oss.oracle.com,
	Andreas Gruenbacher <agruenba@redhat.com>
Subject: [PATCH v4 8/8] gfs2: Fix mmap + page fault deadlocks for direct I/O
Date: Sat, 24 Jul 2021 21:34:49 +0200	[thread overview]
Message-ID: <20210724193449.361667-9-agruenba@redhat.com> (raw)
In-Reply-To: <20210724193449.361667-1-agruenba@redhat.com>

Also disable page faults during direct I/O requests and implement the same
kind of retry logic as in the buffered I/O case.

Direct I/O requests differ from buffered I/O requests in that they use
bio_iov_iter_get_pages for grabbing page references and faulting in pages
instead of triggering physical page faults.  Those manual page faults can be
disabled with the new iocb->noio flag.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index eea42cc94585..fbdee282185f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -782,21 +782,47 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
 	struct file *file = iocb->ki_filp;
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	size_t count = iov_iter_count(to);
+	size_t written = 0;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and then we retry.
+	 *
+	 * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
+	 * physical as well as manual page faults, and we need to disable both
+	 * kinds.
+	 */
+
 	if (!count)
 		return 0; /* skip atime */
 
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
 
-	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
+	pagefault_disable();
+	to->noio = true;
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
+			   IOMAP_DIO_FAULT_RETRY, written);
+	to->noio = false;
+	pagefault_enable();
+
 	gfs2_glock_dq(gh);
+	if (ret > 0)
+		written = ret;
+	if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) &&
+	    iter_is_iovec(to) &&
+	    iov_iter_fault_in_writeable(to, SIZE_MAX) == 0)
+		goto retry;
 out_uninit:
 	gfs2_holder_uninit(gh);
-	return ret;
+	if (ret < 0)
+		return ret;
+	return written;
 }
 
 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -809,6 +835,15 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and then we retry.
+	 *
+	 * For writes, iomap_dio_rw only triggers manual page faults, so we
+	 * don't need to disable physical ones.
+	 */
+
 	/*
 	 * Deferred lock, even if its a write, since we do no allocation on
 	 * this path. All we need to change is the atime, and this lock mode
@@ -818,6 +853,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	 * VFS does.
 	 */
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
@@ -826,11 +862,18 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	if (offset + len > i_size_read(&ip->i_inode))
 		goto out;
 
+	from->noio = true;
 	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
+	from->noio = false;
+
 	if (ret == -ENOTBLK)
 		ret = 0;
 out:
 	gfs2_glock_dq(gh);
+	if (unlikely(ret == -EFAULT) &&
+	    iter_is_iovec(from) &&
+	    iov_iter_fault_in_readable(from, SIZE_MAX) == 0)
+		goto retry;
 out_uninit:
 	gfs2_holder_uninit(gh);
 	return ret;
-- 
2.26.3


WARNING: multiple messages have this Message-ID (diff)
From: Andreas Gruenbacher <agruenba@redhat.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J. Wong" <djwong@kernel.org>
Cc: Jan Kara <jack@suse.cz>,
	Andreas Gruenbacher <agruenba@redhat.com>,
	linux-kernel@vger.kernel.org, cluster-devel@redhat.com,
	linux-fsdevel@vger.kernel.org, ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH v4 8/8] gfs2: Fix mmap + page fault deadlocks for direct I/O
Date: Sat, 24 Jul 2021 21:34:49 +0200	[thread overview]
Message-ID: <20210724193449.361667-9-agruenba@redhat.com> (raw)
In-Reply-To: <20210724193449.361667-1-agruenba@redhat.com>

Also disable page faults during direct I/O requests and implement the same
kind of retry logic as in the buffered I/O case.

Direct I/O requests differ from buffered I/O requests in that they use
bio_iov_iter_get_pages for grabbing page references and faulting in pages
instead of triggering physical page faults.  Those manual page faults can be
disabled with the new iocb->noio flag.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index eea42cc94585..fbdee282185f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -782,21 +782,47 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
 	struct file *file = iocb->ki_filp;
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	size_t count = iov_iter_count(to);
+	size_t written = 0;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and then we retry.
+	 *
+	 * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
+	 * physical as well as manual page faults, and we need to disable both
+	 * kinds.
+	 */
+
 	if (!count)
 		return 0; /* skip atime */
 
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
 
-	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
+	pagefault_disable();
+	to->noio = true;
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
+			   IOMAP_DIO_FAULT_RETRY, written);
+	to->noio = false;
+	pagefault_enable();
+
 	gfs2_glock_dq(gh);
+	if (ret > 0)
+		written = ret;
+	if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) &&
+	    iter_is_iovec(to) &&
+	    iov_iter_fault_in_writeable(to, SIZE_MAX) == 0)
+		goto retry;
 out_uninit:
 	gfs2_holder_uninit(gh);
-	return ret;
+	if (ret < 0)
+		return ret;
+	return written;
 }
 
 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -809,6 +835,15 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and then we retry.
+	 *
+	 * For writes, iomap_dio_rw only triggers manual page faults, so we
+	 * don't need to disable physical ones.
+	 */
+
 	/*
 	 * Deferred lock, even if its a write, since we do no allocation on
 	 * this path. All we need to change is the atime, and this lock mode
@@ -818,6 +853,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	 * VFS does.
 	 */
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
@@ -826,11 +862,18 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	if (offset + len > i_size_read(&ip->i_inode))
 		goto out;
 
+	from->noio = true;
 	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
+	from->noio = false;
+
 	if (ret == -ENOTBLK)
 		ret = 0;
 out:
 	gfs2_glock_dq(gh);
+	if (unlikely(ret == -EFAULT) &&
+	    iter_is_iovec(from) &&
+	    iov_iter_fault_in_readable(from, SIZE_MAX) == 0)
+		goto retry;
 out_uninit:
 	gfs2_holder_uninit(gh);
 	return ret;
-- 
2.26.3


_______________________________________________
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel

WARNING: multiple messages have this Message-ID (diff)
From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH v4 8/8] gfs2: Fix mmap + page fault deadlocks for direct I/O
Date: Sat, 24 Jul 2021 21:34:49 +0200	[thread overview]
Message-ID: <20210724193449.361667-9-agruenba@redhat.com> (raw)
In-Reply-To: <20210724193449.361667-1-agruenba@redhat.com>

Also disable page faults during direct I/O requests and implement the same
kind of retry logic as in the buffered I/O case.

Direct I/O requests differ from buffered I/O requests in that they use
bio_iov_iter_get_pages for grabbing page references and faulting in pages
instead of triggering physical page faults.  Those manual page faults can be
disabled with the new iocb->noio flag.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index eea42cc94585..fbdee282185f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -782,21 +782,47 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
 	struct file *file = iocb->ki_filp;
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	size_t count = iov_iter_count(to);
+	size_t written = 0;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and then we retry.
+	 *
+	 * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
+	 * physical as well as manual page faults, and we need to disable both
+	 * kinds.
+	 */
+
 	if (!count)
 		return 0; /* skip atime */
 
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
 
-	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
+	pagefault_disable();
+	to->noio = true;
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
+			   IOMAP_DIO_FAULT_RETRY, written);
+	to->noio = false;
+	pagefault_enable();
+
 	gfs2_glock_dq(gh);
+	if (ret > 0)
+		written = ret;
+	if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) &&
+	    iter_is_iovec(to) &&
+	    iov_iter_fault_in_writeable(to, SIZE_MAX) == 0)
+		goto retry;
 out_uninit:
 	gfs2_holder_uninit(gh);
-	return ret;
+	if (ret < 0)
+		return ret;
+	return written;
 }
 
 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -809,6 +835,15 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
+	/*
+	 * In this function, we disable page faults when we're holding the
+	 * inode glock while doing I/O.  If a page fault occurs, we drop the
+	 * inode glock, fault in the pages manually, and then we retry.
+	 *
+	 * For writes, iomap_dio_rw only triggers manual page faults, so we
+	 * don't need to disable physical ones.
+	 */
+
 	/*
 	 * Deferred lock, even if its a write, since we do no allocation on
 	 * this path. All we need to change is the atime, and this lock mode
@@ -818,6 +853,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	 * VFS does.
 	 */
 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+retry:
 	ret = gfs2_glock_nq(gh);
 	if (ret)
 		goto out_uninit;
@@ -826,11 +862,18 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	if (offset + len > i_size_read(&ip->i_inode))
 		goto out;
 
+	from->noio = true;
 	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
+	from->noio = false;
+
 	if (ret == -ENOTBLK)
 		ret = 0;
 out:
 	gfs2_glock_dq(gh);
+	if (unlikely(ret == -EFAULT) &&
+	    iter_is_iovec(from) &&
+	    iov_iter_fault_in_readable(from, SIZE_MAX) == 0)
+		goto retry;
 out_uninit:
 	gfs2_holder_uninit(gh);
 	return ret;
-- 
2.26.3



  parent reply	other threads:[~2021-07-24 19:35 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-24 19:34 [PATCH v4 0/8] gfs2: Fix mmap + page fault deadlocks Andreas Gruenbacher
2021-07-24 19:34 ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [PATCH v4 1/8] iov_iter: Introduce iov_iter_fault_in_writeable helper Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:52   ` Linus Torvalds
2021-07-24 19:52     ` [Cluster-devel] " Linus Torvalds
2021-07-24 19:52     ` [Ocfs2-devel] " Linus Torvalds
2021-07-24 20:24     ` Al Viro
2021-07-24 20:24       ` [Cluster-devel] " Al Viro
2021-07-24 20:24       ` [Ocfs2-devel] " Al Viro
2021-07-24 20:37       ` Linus Torvalds
2021-07-24 20:37         ` [Cluster-devel] " Linus Torvalds
2021-07-24 20:37         ` [Ocfs2-devel] " Linus Torvalds
2021-07-24 21:38       ` Andreas Gruenbacher
2021-07-24 21:38         ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 21:38         ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 21:57         ` Al Viro
2021-07-24 21:57           ` [Cluster-devel] " Al Viro
2021-07-24 21:57           ` [Ocfs2-devel] " Al Viro
2021-07-24 22:06           ` Andreas Gruenbacher
2021-07-24 22:06             ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 22:06             ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 23:39             ` Al Viro
2021-07-24 23:39               ` [Cluster-devel] " Al Viro
2021-07-24 23:39               ` [Ocfs2-devel] " Al Viro
2021-07-27  9:30     ` David Laight
2021-07-27  9:30       ` [Cluster-devel] " David Laight
2021-07-27  9:30       ` [Ocfs2-devel] " David Laight
2021-07-27 11:13       ` Andreas Gruenbacher
2021-07-27 11:13         ` [Cluster-devel] " Andreas Gruenbacher
2021-07-27 11:13         ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-27 17:51         ` Linus Torvalds
2021-07-27 17:51           ` [Cluster-devel] " Linus Torvalds
2021-07-27 17:51           ` [Ocfs2-devel] " Linus Torvalds
2021-07-24 19:34 ` [PATCH v4 2/8] gfs2: Add wrapper for iomap_file_buffered_write Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [PATCH v4 3/8] gfs2: Fix mmap + page fault deadlocks for buffered I/O Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [PATCH v4 4/8] iomap: Fix iomap_dio_rw return value for user copies Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [PATCH v4 5/8] iomap: Add done_before argument to iomap_dio_rw Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [PATCH v4 6/8] iomap: Support restarting direct I/O requests after user copy failures Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` [PATCH v4 7/8] iov_iter: Introduce noio flag to disable page faults Andreas Gruenbacher
2021-07-24 19:34   ` [Cluster-devel] " Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher
2021-07-24 19:34 ` Andreas Gruenbacher [this message]
2021-07-24 19:34   ` [Cluster-devel] [PATCH v4 8/8] gfs2: Fix mmap + page fault deadlocks for direct I/O Andreas Gruenbacher
2021-07-24 19:34   ` [Ocfs2-devel] " Andreas Gruenbacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210724193449.361667-9-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    --cc=cluster-devel@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.