All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org, hch@infradead.org
Subject: [PATCH 2/2] xfs: relax unwritten writeback overhead under some circumstances
Date: Wed, 15 Jan 2020 22:15:58 -0800	[thread overview]
Message-ID: <157915535801.2406747.10502356876965505327.stgit@magnolia> (raw)
In-Reply-To: <157915534429.2406747.2688273938645013888.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

In the previous patch, we solved a stale disk contents exposure problem
by forcing the delalloc write path to create unwritten extents, write
the data, and convert the extents to written after writeback completes.

This is a pretty huge hammer to use, so we'll relax the delalloc write
strategy to go straight to written extents (as we once did) if someone
tells us to write the entire file to disk.  This reopens the exposure
window slightly, but we'll only be affected if writeback completes out
of order and the system crashes during writeback.

Because once again we can map written extents past EOF, we also
enlarge the writepages window downward if the window is beyond the
on-disk size and there are written extents after the EOF block.  This
ensures that speculative post-EOF preallocations are not left uncovered.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c |    8 ++++---
 fs/xfs/libxfs/xfs_bmap.h |    3 ++-
 fs/xfs/xfs_aops.c        |   52 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 58 insertions(+), 5 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 220ea1dc67ab..65b2bd12720e 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4545,7 +4545,8 @@ xfs_bmapi_convert_delalloc(
 	int			whichfork,
 	xfs_off_t		offset,
 	struct iomap		*iomap,
-	unsigned int		*seq)
+	unsigned int		*seq,
+	bool			full_writeback)
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -4610,11 +4611,12 @@ xfs_bmapi_convert_delalloc(
 	 *
 	 * New data fork extents must be mapped in as unwritten and converted
 	 * to real extents after the write succeeds to avoid exposing stale
-	 * disk contents if we crash.
+	 * disk contents if we crash.  We relax this requirement if we've been
+	 * told to flush all data to disk.
 	 */
 	if (whichfork == XFS_COW_FORK)
 		bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
-	else
+	else if (!full_writeback)
 		bma.flags = XFS_BMAPI_PREALLOC;
 
 	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 14d25e0b7d9c..9d0b0ed83c9f 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -228,7 +228,8 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
 		int eof);
 int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
-		xfs_off_t offset, struct iomap *iomap, unsigned int *seq);
+		xfs_off_t offset, struct iomap *iomap, unsigned int *seq,
+		bool full_writeback);
 int	xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
 		struct xfs_inode *ip, int whichfork,
 		struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a688eb5c5ae..45174dfa0b7d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -18,10 +18,13 @@
 #include "xfs_bmap_util.h"
 #include "xfs_reflink.h"
 
+#define XFS_WRITEPAGE_FULL_RANGE	(1 << 0)
+
 struct xfs_writepage_ctx {
 	struct iomap_writepage_ctx ctx;
 	unsigned int		data_seq;
 	unsigned int		cow_seq;
+	unsigned int		flags;
 };
 
 static inline struct xfs_writepage_ctx *
@@ -327,7 +330,8 @@ xfs_convert_blocks(
 	 */
 	do {
 		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
-				&wpc->iomap, seq);
+				&wpc->iomap, seq,
+				XFS_WPC(wpc)->flags & XFS_WRITEPAGE_FULL_RANGE);
 		if (error)
 			return error;
 	} while (wpc->iomap.offset + wpc->iomap.length <= offset);
@@ -567,6 +571,48 @@ xfs_vm_writepage(
 	return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
 }
 
+/*
+ * If we've been told to write a range of the file that is beyond the on-disk
+ * file size and there's a written extent beyond the EOF block, we conclude
+ * that we previously wrote a speculative post-EOF preallocation to disk (as
+ * written extents) and later extended the incore file size.
+ *
+ * To prevent exposure of the contents of those speculative preallocations
+ * after a crash, extend the writeback range all the way down to the old file
+ * size to make sure that those pages get flushed.
+ */
+static void
+xfs_vm_adjust_posteof_writepages(
+	struct xfs_inode		*ip,
+	struct writeback_control	*wbc)
+{
+	struct xfs_iext_cursor		icur;
+	struct xfs_bmbt_irec		irec;
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	if (ip->i_d.di_size >= wbc->range_start)
+		goto out;
+
+	/* We're done if we can't find a real extent past EOF. */
+	if (!xfs_iext_lookup_extent(ip, XFS_IFORK_PTR(ip, XFS_DATA_FORK),
+			XFS_B_TO_FSB(ip->i_mount, ip->i_d.di_size), &icur,
+			&irec))
+		goto out;
+	if (irec.br_startblock == HOLESTARTBLOCK)
+		goto out;
+
+	wbc->range_start = ip->i_d.di_size;
+
+	/* Adjust the number of pages to write, if needed. */
+	if (wbc->nr_to_write == LONG_MAX)
+		goto out;
+
+	wbc->nr_to_write += (wbc->range_start >> PAGE_SHIFT) -
+			    (ip->i_d.di_size >> PAGE_SHIFT);
+out:
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+}
+
 STATIC int
 xfs_vm_writepages(
 	struct address_space	*mapping,
@@ -574,6 +620,10 @@ xfs_vm_writepages(
 {
 	struct xfs_writepage_ctx wpc = { };
 
+	xfs_vm_adjust_posteof_writepages(XFS_I(mapping->host), wbc);
+	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+		wpc.flags |= XFS_WRITEPAGE_FULL_RANGE;
+
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
 	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
 }


  parent reply	other threads:[~2020-01-16  6:16 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-16  6:15 [PATCH 0/2] xfs: fix stale disk exposure after crash Darrick J. Wong
2020-01-16  6:15 ` [PATCH 1/2] xfs: force writes to delalloc regions to unwritten Darrick J. Wong
2020-01-16 16:47   ` Christoph Hellwig
2020-01-16 23:16     ` Darrick J. Wong
2020-01-19 20:49   ` Dave Chinner
2020-02-03 20:14     ` Darrick J. Wong
2020-05-07 10:32       ` Brian Foster
2020-05-14 16:33         ` Darrick J. Wong
2020-05-14 17:44           ` Brian Foster
2020-05-17  7:48             ` Christoph Hellwig
2020-05-19  0:40               ` Darrick J. Wong
2020-05-20  1:03             ` Dave Chinner
2020-01-16  6:15 ` Darrick J. Wong [this message]
2020-01-16 16:49   ` [PATCH 2/2] xfs: relax unwritten writeback overhead under some circumstances Christoph Hellwig
2020-01-16 23:15     ` Darrick J. Wong
2020-01-16 16:49 ` [PATCH 0/2] xfs: fix stale disk exposure after crash Christoph Hellwig
2020-01-16 23:00   ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=157915535801.2406747.10502356876965505327.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=hch@infradead.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.