From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org
Cc: Dave Chinner <dchinner@redhat.com>,
linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
david@fromorbit.com, hch@infradead.org
Subject: [PATCH 06/14] xfs: use iomap_valid method to detect stale cached iomaps
Date: Wed, 09 Nov 2022 10:16:18 -0800 [thread overview]
Message-ID: <166801777846.3992140.13450989888668636860.stgit@magnolia> (raw)
In-Reply-To: <166801774453.3992140.241667783932550826.stgit@magnolia>
From: Dave Chinner <dchinner@redhat.com>
Now that iomap supports a mechanism to validate cached iomaps for
buffered write operations, hook it up to the XFS buffered write ops
so that we can avoid data corruptions that result from stale cached
iomaps. See:
https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/
or the ->iomap_valid() introduction commit for exact details of the
corruption vector.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_bmap.c | 4 +--
fs/xfs/xfs_aops.c | 2 +
fs/xfs/xfs_iomap.c | 69 +++++++++++++++++++++++++++++++++++-----------
fs/xfs/xfs_iomap.h | 4 +--
fs/xfs/xfs_pnfs.c | 5 ++-
5 files changed, 61 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 49d0d4ea63fc..db225130618c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4551,8 +4551,8 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset.
*/
if (!isnullstartblock(bma.got.br_startblock)) {
- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq);
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, *seq);
goto out_trans_cancel;
}
@@ -4599,8 +4599,8 @@ xfs_bmapi_convert_delalloc(
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq);
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, *seq);
if (whichfork == XFS_COW_FORK)
xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5d1a995b15f8..ca5a9e45a48c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -373,7 +373,7 @@ xfs_map_blocks(
isnullstartblock(imap.br_startblock))
goto allocate_blocks;
- xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0;
allocate_blocks:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2d48fcc7bd6f..5053ffcf10fe 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -54,7 +54,8 @@ xfs_bmbt_to_iomap(
struct iomap *iomap,
struct xfs_bmbt_irec *imap,
unsigned int mapping_flags,
- u16 iomap_flags)
+ u16 iomap_flags,
+ int sequence)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
@@ -91,6 +92,9 @@ xfs_bmbt_to_iomap(
if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
iomap->flags |= IOMAP_F_DIRTY;
+
+ /* The extent tree sequence is needed for iomap validity checking. */
+ *((int *)&iomap->private) = sequence;
return 0;
}
@@ -195,7 +199,8 @@ xfs_iomap_write_direct(
xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb,
unsigned int flags,
- struct xfs_bmbt_irec *imap)
+ struct xfs_bmbt_irec *imap,
+ int *seq)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
@@ -285,6 +290,8 @@ xfs_iomap_write_direct(
error = xfs_alert_fsblock_zero(ip, imap);
out_unlock:
+ if (seq)
+ *seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
@@ -743,6 +750,7 @@ xfs_direct_write_iomap_begin(
bool shared = false;
u16 iomap_flags = 0;
unsigned int lockmode = XFS_ILOCK_SHARED;
+ int seq;
ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
@@ -811,9 +819,10 @@ xfs_direct_write_iomap_begin(
goto out_unlock;
}
+ seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
allocate_blocks:
error = -EAGAIN;
@@ -839,24 +848,25 @@ xfs_direct_write_iomap_begin(
xfs_iunlock(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
- flags, &imap);
+ flags, &imap, &seq);
if (error)
return error;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
- iomap_flags | IOMAP_F_NEW);
+ iomap_flags | IOMAP_F_NEW, seq);
out_found_cow:
+ seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, lockmode);
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
if (imap.br_startblock != HOLESTARTBLOCK) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
if (error)
return error;
}
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
out_unlock:
if (lockmode)
@@ -915,6 +925,7 @@ xfs_buffered_write_iomap_begin(
int allocfork = XFS_DATA_FORK;
int error = 0;
unsigned int lockmode = XFS_ILOCK_EXCL;
+ int seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -1094,26 +1105,29 @@ xfs_buffered_write_iomap_begin(
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
+ seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
found_imap:
+ seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
found_cow:
+ seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (imap.br_startoff <= offset_fsb) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
if (error)
return error;
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
- IOMAP_F_SHARED);
+ IOMAP_F_SHARED, seq);
}
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1328,9 +1342,26 @@ xfs_buffered_write_iomap_end(
return 0;
}
+/*
+ * Check that the iomap passed to us is still valid for the given offset and
+ * length.
+ */
+static bool
+xfs_buffered_write_iomap_valid(
+ struct inode *inode,
+ const struct iomap *iomap)
+{
+ int seq = *((int *)&iomap->private);
+
+ if (seq != READ_ONCE(XFS_I(inode)->i_df.if_seq))
+ return false;
+ return true;
+}
+
const struct iomap_ops xfs_buffered_write_iomap_ops = {
.iomap_begin = xfs_buffered_write_iomap_begin,
.iomap_end = xfs_buffered_write_iomap_end,
+ .iomap_valid = xfs_buffered_write_iomap_valid,
};
/*
@@ -1359,6 +1390,7 @@ xfs_read_iomap_begin(
int nimaps = 1, error = 0;
bool shared = false;
unsigned int lockmode = XFS_ILOCK_SHARED;
+ int seq;
ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
@@ -1372,13 +1404,14 @@ xfs_read_iomap_begin(
&nimaps, 0);
if (!error && (flags & IOMAP_REPORT))
error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
+ seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, lockmode);
if (error)
return error;
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
- shared ? IOMAP_F_SHARED : 0);
+ shared ? IOMAP_F_SHARED : 0, seq);
}
const struct iomap_ops xfs_read_iomap_ops = {
@@ -1438,7 +1471,7 @@ xfs_seek_iomap_begin(
end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
- IOMAP_F_SHARED);
+ IOMAP_F_SHARED, READ_ONCE(ip->i_cowfp->if_seq));
/*
* This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed
@@ -1460,7 +1493,8 @@ xfs_seek_iomap_begin(
imap.br_state = XFS_EXT_NORM;
done:
xfs_trim_extent(&imap, offset_fsb, end_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0,
+ READ_ONCE(ip->i_df.if_seq));
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
@@ -1486,6 +1520,7 @@ xfs_xattr_iomap_begin(
struct xfs_bmbt_irec imap;
int nimaps = 1, error = 0;
unsigned lockmode;
+ int seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -1502,12 +1537,14 @@ xfs_xattr_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, XFS_BMAPI_ATTRFORK);
out_unlock:
+
+ seq = READ_ONCE(ip->i_af.if_seq);
xfs_iunlock(ip, lockmode);
if (error)
return error;
ASSERT(nimaps);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
}
const struct iomap_ops xfs_xattr_iomap_ops = {
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 0f62ab633040..792fed2a9072 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -13,14 +13,14 @@ struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb, unsigned int flags,
- struct xfs_bmbt_irec *imap);
+ struct xfs_bmbt_irec *imap, int *sequence);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
xfs_fileoff_t end_fsb);
int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
- u16 iomap_flags);
+ u16 iomap_flags, int sequence);
int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
bool *did_zero);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 37a24f0f7cd4..eea507a80c5c 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -125,6 +125,7 @@ xfs_fs_map_blocks(
int nimaps = 1;
uint lock_flags;
int error = 0;
+ int seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -189,7 +190,7 @@ xfs_fs_map_blocks(
xfs_iunlock(ip, lock_flags);
error = xfs_iomap_write_direct(ip, offset_fsb,
- end_fsb - offset_fsb, 0, &imap);
+ end_fsb - offset_fsb, 0, &imap, &seq);
if (error)
goto out_unlock;
@@ -209,7 +210,7 @@ xfs_fs_map_blocks(
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq);
*device_generation = mp->m_generation;
return error;
out_unlock:
next prev parent reply other threads:[~2022-11-09 18:16 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-09 18:15 [PATCHSET RFCRAP v2 00/14] xfs, iomap: fix data corruption due to stale cached iomaps Darrick J. Wong
2022-11-09 18:15 ` [PATCH 01/14] xfs: write page faults in iomap are not buffered writes Darrick J. Wong
2022-11-09 18:15 ` [PATCH 02/14] xfs: punching delalloc extents on write failure is racy Darrick J. Wong
2022-11-09 18:16 ` [PATCH 03/14] xfs: use byte ranges for write cleanup ranges Darrick J. Wong
2022-11-09 18:16 ` [PATCH 04/14] xfs: buffered write failure should not truncate the page cache Darrick J. Wong
2022-11-15 8:32 ` Christoph Hellwig
2022-11-09 18:16 ` [PATCH 05/14] iomap: write iomap validity checks Darrick J. Wong
2022-11-09 18:16 ` Darrick J. Wong [this message]
2022-11-09 18:16 ` [PATCH 07/14] xfs: drop write error injection is unfixable, remove it Darrick J. Wong
2022-11-09 18:16 ` [PATCH 08/14] iomap: pass iter to ->iomap_begin implementations Darrick J. Wong
2022-11-15 8:36 ` Christoph Hellwig
2022-11-09 18:16 ` [PATCH 09/14] iomap: pass iter to ->iomap_end implementations Darrick J. Wong
2022-11-09 18:16 ` [PATCH 10/14] iomap: pass a private pointer to iomap_file_buffered_write Darrick J. Wong
2022-11-15 8:37 ` Christoph Hellwig
2022-11-09 18:16 ` [PATCH 11/14] xfs: move the seq counters for buffered writes to a private struct Darrick J. Wong
2022-11-15 8:38 ` Christoph Hellwig
2022-11-09 18:16 ` [PATCH 12/14] xfs: validate COW fork sequence counters during buffered writes Darrick J. Wong
2022-11-09 18:16 ` [PATCH 13/14] xfs: add debug knob to slow down writeback for fun Darrick J. Wong
2022-11-09 18:17 ` [PATCH 14/14] xfs: add debug knob to slow down write " Darrick J. Wong
2022-11-09 18:22 ` [PATCH 15/14] fstest: regression test for writeback corruption bug Darrick J. Wong
2022-11-09 18:23 ` [PATCH 16/14] fstest: regression test for writes racing with reclaim writeback Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=166801777846.3992140.13450989888668636860.stgit@magnolia \
--to=djwong@kernel.org \
--cc=david@fromorbit.com \
--cc=dchinner@redhat.com \
--cc=hch@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.