All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Dave Chinner <david@fromorbit.com>, Goldwyn Rodrigues <rgoldwyn@suse.de>
Cc: Damien Le Moal <damien.lemoal@wdc.com>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	Johannes Thumshirn <jth@kernel.org>,
	Matthew Wilcox <willy@infradead.org>,
	linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	cluster-devel@redhat.com, linux-ext4@vger.kernel.org,
	linux-xfs@vger.kernel.org
Subject: [PATCH 2/2] iomap: fall back to buffered writes for invalidation failures
Date: Mon, 13 Jul 2020 09:46:33 +0200	[thread overview]
Message-ID: <20200713074633.875946-3-hch@lst.de> (raw)
In-Reply-To: <20200713074633.875946-1-hch@lst.de>

Failing to invalid the page cache means data in incoherent, which is
a very bad state for the system.  Always fall back to buffered I/O
through the page cache if we can't invalidate mappings.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext4/file.c       |  2 ++
 fs/gfs2/file.c       |  3 ++-
 fs/iomap/direct-io.c | 13 ++++++++-----
 fs/iomap/trace.h     |  1 +
 fs/xfs/xfs_file.c    |  4 ++--
 fs/zonefs/super.c    |  7 +++++--
 6 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2a01e31a032c4c..0da6c2a2c32c1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -544,6 +544,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		iomap_ops = &ext4_iomap_overwrite_ops;
 	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
 			   is_sync_kiocb(iocb) || unaligned_io || extend);
+	if (ret == -EREMCHG)
+		ret = 0;
 
 	if (extend)
 		ret = ext4_handle_inode_extension(inode, offset, ret, count);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index fe305e4bfd3734..c7907d40c61d17 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -814,7 +814,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 
 	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
 			   is_sync_kiocb(iocb));
-
+	if (ret == -EREMCHG)
+		ret = 0;
 out:
 	gfs2_glock_dq(&gh);
 out_uninit:
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 190967e87b69e4..62626235cdbe8d 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -10,6 +10,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uio.h>
 #include <linux/task_io_accounting_ops.h>
+#include "trace.h"
 
 #include "../internal.h"
 
@@ -478,13 +479,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (iov_iter_rw(iter) == WRITE) {
 		/*
 		 * Try to invalidate cache pages for the range we are writing.
-		 * If this invalidation fails, tough, the write will still work,
-		 * but racing two incompatible write paths is a pretty crazy
-		 * thing to do, so we don't support it 100%.
+		 * If this invalidation fails, let the caller fall back to
+		 * buffered I/O.
 		 */
 		if (invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
-				end >> PAGE_SHIFT))
-			dio_warn_stale_pagecache(iocb->ki_filp);
+				end >> PAGE_SHIFT)) {
+			trace_iomap_dio_invalidate_fail(inode, pos, count);
+			ret = -EREMCHG;
+			goto out_free_dio;
+		}
 
 		if (!wait_for_completion && !inode->i_sb->s_dio_done_wq) {
 			ret = sb_init_dio_done_wq(inode->i_sb);
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 5693a39d52fb63..fdc7ae388476f5 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -74,6 +74,7 @@ DEFINE_EVENT(iomap_range_class, name,	\
 DEFINE_RANGE_EVENT(iomap_writepage);
 DEFINE_RANGE_EVENT(iomap_releasepage);
 DEFINE_RANGE_EVENT(iomap_invalidatepage);
+DEFINE_RANGE_EVENT(iomap_dio_invalidate_fail);
 
 #define IOMAP_TYPE_STRINGS \
 	{ IOMAP_HOLE,		"HOLE" }, \
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 00db81eac80d6c..551cca39fa3ba6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -553,8 +553,8 @@ xfs_file_dio_aio_write(
 	xfs_iunlock(ip, iolock);
 
 	/*
-	 * No fallback to buffered IO on errors for XFS, direct IO will either
-	 * complete fully or fail.
+	 * No partial fallback to buffered IO on errors for XFS, direct IO will
+	 * either complete fully or fail.
 	 */
 	ASSERT(ret < 0 || ret == count);
 	return ret;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 07bc42d62673ce..793850454b752f 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -786,8 +786,11 @@ static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
 		return -EFBIG;
 
-	if (iocb->ki_flags & IOCB_DIRECT)
-		return zonefs_file_dio_write(iocb, from);
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		ret = zonefs_file_dio_write(iocb, from);
+		if (ret != -EREMCHG)
+			return ret;
+	}
 
 	return zonefs_file_buffered_write(iocb, from);
 }
-- 
2.26.2


WARNING: multiple messages have this Message-ID (diff)
From: Christoph Hellwig <hch@lst.de>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 2/2] iomap: fall back to buffered writes for invalidation failures
Date: Mon, 13 Jul 2020 09:46:33 +0200	[thread overview]
Message-ID: <20200713074633.875946-3-hch@lst.de> (raw)
In-Reply-To: <20200713074633.875946-1-hch@lst.de>

Failing to invalid the page cache means data in incoherent, which is
a very bad state for the system.  Always fall back to buffered I/O
through the page cache if we can't invalidate mappings.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext4/file.c       |  2 ++
 fs/gfs2/file.c       |  3 ++-
 fs/iomap/direct-io.c | 13 ++++++++-----
 fs/iomap/trace.h     |  1 +
 fs/xfs/xfs_file.c    |  4 ++--
 fs/zonefs/super.c    |  7 +++++--
 6 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2a01e31a032c4c..0da6c2a2c32c1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -544,6 +544,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		iomap_ops = &ext4_iomap_overwrite_ops;
 	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
 			   is_sync_kiocb(iocb) || unaligned_io || extend);
+	if (ret == -EREMCHG)
+		ret = 0;
 
 	if (extend)
 		ret = ext4_handle_inode_extension(inode, offset, ret, count);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index fe305e4bfd3734..c7907d40c61d17 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -814,7 +814,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 
 	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
 			   is_sync_kiocb(iocb));
-
+	if (ret == -EREMCHG)
+		ret = 0;
 out:
 	gfs2_glock_dq(&gh);
 out_uninit:
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 190967e87b69e4..62626235cdbe8d 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -10,6 +10,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uio.h>
 #include <linux/task_io_accounting_ops.h>
+#include "trace.h"
 
 #include "../internal.h"
 
@@ -478,13 +479,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (iov_iter_rw(iter) == WRITE) {
 		/*
 		 * Try to invalidate cache pages for the range we are writing.
-		 * If this invalidation fails, tough, the write will still work,
-		 * but racing two incompatible write paths is a pretty crazy
-		 * thing to do, so we don't support it 100%.
+		 * If this invalidation fails, let the caller fall back to
+		 * buffered I/O.
 		 */
 		if (invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
-				end >> PAGE_SHIFT))
-			dio_warn_stale_pagecache(iocb->ki_filp);
+				end >> PAGE_SHIFT)) {
+			trace_iomap_dio_invalidate_fail(inode, pos, count);
+			ret = -EREMCHG;
+			goto out_free_dio;
+		}
 
 		if (!wait_for_completion && !inode->i_sb->s_dio_done_wq) {
 			ret = sb_init_dio_done_wq(inode->i_sb);
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 5693a39d52fb63..fdc7ae388476f5 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -74,6 +74,7 @@ DEFINE_EVENT(iomap_range_class, name,	\
 DEFINE_RANGE_EVENT(iomap_writepage);
 DEFINE_RANGE_EVENT(iomap_releasepage);
 DEFINE_RANGE_EVENT(iomap_invalidatepage);
+DEFINE_RANGE_EVENT(iomap_dio_invalidate_fail);
 
 #define IOMAP_TYPE_STRINGS \
 	{ IOMAP_HOLE,		"HOLE" }, \
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 00db81eac80d6c..551cca39fa3ba6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -553,8 +553,8 @@ xfs_file_dio_aio_write(
 	xfs_iunlock(ip, iolock);
 
 	/*
-	 * No fallback to buffered IO on errors for XFS, direct IO will either
-	 * complete fully or fail.
+	 * No partial fallback to buffered IO on errors for XFS, direct IO will
+	 * either complete fully or fail.
 	 */
 	ASSERT(ret < 0 || ret == count);
 	return ret;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 07bc42d62673ce..793850454b752f 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -786,8 +786,11 @@ static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
 		return -EFBIG;
 
-	if (iocb->ki_flags & IOCB_DIRECT)
-		return zonefs_file_dio_write(iocb, from);
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		ret = zonefs_file_dio_write(iocb, from);
+		if (ret != -EREMCHG)
+			return ret;
+	}
 
 	return zonefs_file_buffered_write(iocb, from);
 }
-- 
2.26.2



  parent reply	other threads:[~2020-07-13  7:51 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-13  7:46 RFC: iomap write invalidation Christoph Hellwig
2020-07-13  7:46 ` [Cluster-devel] " Christoph Hellwig
2020-07-13  7:46 ` [PATCH 1/2] iomap: Only invalidate page cache pages on direct IO writes Christoph Hellwig
2020-07-13  7:46   ` [Cluster-devel] " Christoph Hellwig
2020-07-13  7:46 ` Christoph Hellwig [this message]
2020-07-13  7:46   ` [Cluster-devel] [PATCH 2/2] iomap: fall back to buffered writes for invalidation failures Christoph Hellwig
2020-07-13 11:55   ` Matthew Wilcox
2020-07-13 11:55     ` [Cluster-devel] " Matthew Wilcox
2020-07-14 11:00     ` Christoph Hellwig
2020-07-14 11:00       ` [Cluster-devel] " Christoph Hellwig
2020-07-13 12:20   ` Goldwyn Rodrigues
2020-07-13 12:20     ` [Cluster-devel] " Goldwyn Rodrigues
2020-07-13 16:09     ` David Sterba
2020-07-13 16:09       ` [Cluster-devel] " David Sterba
2020-07-13 15:39   ` Darrick J. Wong
2020-07-13 15:39     ` [Cluster-devel] " Darrick J. Wong
2020-07-14 11:00     ` Christoph Hellwig
2020-07-14 11:00       ` [Cluster-devel] " Christoph Hellwig
2020-07-14  1:41   ` Damien Le Moal
2020-07-14  1:41     ` [Cluster-devel] " Damien Le Moal
2020-07-15  1:47 ` RFC: iomap write invalidation Dave Chinner
2020-07-15  1:47   ` [Cluster-devel] " Dave Chinner
2020-07-20 21:51 ` Goldwyn Rodrigues
2020-07-20 21:51   ` [Cluster-devel] " Goldwyn Rodrigues
2020-07-21 14:53   ` Christoph Hellwig
2020-07-21 14:53     ` [Cluster-devel] " Christoph Hellwig
2020-07-21 14:59     ` Darrick J. Wong
2020-07-21 14:59       ` [Cluster-devel] " Darrick J. Wong
2020-07-21 15:04     ` Matthew Wilcox
2020-07-21 15:04       ` [Cluster-devel] " Matthew Wilcox
2020-07-21 15:06       ` Christoph Hellwig
2020-07-21 15:06         ` [Cluster-devel] " Christoph Hellwig
2020-07-21 15:14         ` Matthew Wilcox
2020-07-21 15:14           ` [Cluster-devel] " Matthew Wilcox
2020-07-21 15:16           ` Christoph Hellwig
2020-07-21 15:16             ` [Cluster-devel] " Christoph Hellwig
2020-07-21 15:27             ` Darrick J. Wong
2020-07-21 15:27               ` [Cluster-devel] " Darrick J. Wong
2020-07-21 15:41               ` Christoph Hellwig
2020-07-21 15:41                 ` [Cluster-devel] " Christoph Hellwig
2020-07-21 15:59                 ` Darrick J. Wong
2020-07-21 15:59                   ` [Cluster-devel] " Darrick J. Wong
2020-07-21 16:01                   ` Christoph Hellwig
2020-07-21 16:01                     ` [Cluster-devel] " Christoph Hellwig
2020-07-21 16:05                     ` Darrick J. Wong
2020-07-21 16:05                       ` [Cluster-devel] " Darrick J. Wong
2020-07-21 15:31             ` Matthew Wilcox
2020-07-21 15:31               ` [Cluster-devel] " Matthew Wilcox
2020-07-21 15:42               ` Christoph Hellwig
2020-07-21 15:42                 ` [Cluster-devel] " Christoph Hellwig
2020-07-21 15:52                 ` Matthew Wilcox
2020-07-21 15:52                   ` [Cluster-devel] " Matthew Wilcox
2020-07-21 16:03                   ` Darrick J. Wong
2020-07-21 16:03                     ` [Cluster-devel] " Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200713074633.875946-3-hch@lst.de \
    --to=hch@lst.de \
    --cc=cluster-devel@redhat.com \
    --cc=damien.lemoal@wdc.com \
    --cc=david@fromorbit.com \
    --cc=jth@kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    --cc=rgoldwyn@suse.de \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.