All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org, xfs@oss.sgi.com
Subject: [PATCH 71/71] xfs: implement swapext for rmap filesystems
Date: Thu, 25 Aug 2016 16:39:38 -0700	[thread overview]
Message-ID: <147216837842.867.8168149197321420871.stgit@birch.djwong.org> (raw)
In-Reply-To: <147216791538.867.12413509832420924168.stgit@birch.djwong.org>

Implement swapext for filesystems that have reverse mapping.  Back in
the reflink patches, we augmented the bmap code with a 'REMAP' flag
that updates only the bmbt and doesn't touch the allocator and
implemented log redo items for those two operations.  Now we can
rewrite extent swapping as a (looong) series of remap operations.

This is far less efficient than the fork swapping method implemented
in the past, so we only switch this on for rmap.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c |  164 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_trace.h     |    5 +
 2 files changed, 166 insertions(+), 3 deletions(-)


diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3bd85aa..e9b48dc 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1559,6 +1559,13 @@ xfs_swap_extents_check_format(
 		return -EINVAL;
 
 	/*
+	 * If we have to use the (expensive) rmap swap method, we can
+	 * handle any number of extents and any format.
+	 */
+	if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb))
+		return 0;
+
+	/*
 	 * if the target inode is in extent form and the temp inode is in btree
 	 * form then we will end up with the target inode in the wrong format
 	 * as we already know there are less extents in the temp inode.
@@ -1627,6 +1634,132 @@ xfs_swap_extent_flush(
 	return 0;
 }
 
+/*
+ * Move extents from one file to another, when rmap is enabled.
+ */
+STATIC int
+xfs_swap_extent_rmap(
+	struct xfs_trans		**tpp,
+	struct xfs_inode		*ip,
+	struct xfs_inode		*tip)
+{
+	struct xfs_bmbt_irec		irec;
+	struct xfs_bmbt_irec		uirec;
+	struct xfs_bmbt_irec		tirec;
+	xfs_fileoff_t			offset_fsb;
+	xfs_fileoff_t			end_fsb;
+	xfs_filblks_t			count_fsb;
+	xfs_fsblock_t			firstfsb;
+	struct xfs_defer_ops		dfops;
+	int				done;
+	int				error;
+	xfs_filblks_t			ilen;
+	xfs_filblks_t			rlen;
+	int				nimaps;
+	__uint64_t			tip_flags2;
+
+	/*
+	 * If the source file has shared blocks, we must flag the donor
+	 * file as having shared blocks so that we get the shared-block
+	 * rmap functions when we go to fix up the rmaps.  The flags
+	 * will be switch for reals later.
+	 */
+	tip_flags2 = tip->i_d.di_flags2;
+	if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)
+		tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
+
+	offset_fsb = 0;
+	end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
+	count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
+
+	while (count_fsb) {
+		/* Read extent from the donor file */
+		nimaps = 1;
+		error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
+				&nimaps, 0);
+		if (error)
+			goto out;
+		ASSERT(nimaps == 1);
+		ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);
+
+		trace_xfs_swap_extent_rmap_remap(tip, &tirec);
+		ilen = tirec.br_blockcount;
+
+		/* Unmap the old blocks in the source file. */
+		done = false;
+		while (tirec.br_blockcount) {
+			xfs_defer_init(&dfops, &firstfsb);
+			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
+
+			/* Read extent from the source file */
+			nimaps = 1;
+			error = xfs_bmapi_read(ip, tirec.br_startoff,
+					tirec.br_blockcount, &irec,
+					&nimaps, 0);
+			if (error)
+				goto out_defer;
+			ASSERT(nimaps == 1);
+			ASSERT(tirec.br_startoff == irec.br_startoff);
+			trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
+
+			/* Trim the extent. */
+			uirec = tirec;
+			uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
+					tirec.br_blockcount,
+					irec.br_blockcount);
+			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
+
+			/* Remove the mapping from the donor file. */
+			error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
+					tip, XFS_DATA_FORK, &uirec);
+			if (error)
+				goto out_defer;
+
+			/* Remove the mapping from the source file. */
+			error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
+					ip, XFS_DATA_FORK, &irec);
+			if (error)
+				goto out_defer;
+
+			/* Map the donor file's blocks into the source file. */
+			error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
+					ip, XFS_DATA_FORK, &uirec);
+			if (error)
+				goto out_defer;
+
+			/* Map the source file's blocks into the donor file. */
+			error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
+					tip, XFS_DATA_FORK, &irec);
+			if (error)
+				goto out_defer;
+
+			error = xfs_defer_finish(tpp, &dfops, ip);
+			if (error)
+				goto out_defer;
+
+			tirec.br_startoff += rlen;
+			if (tirec.br_startblock != HOLESTARTBLOCK &&
+			    tirec.br_startblock != DELAYSTARTBLOCK)
+				tirec.br_startblock += rlen;
+			tirec.br_blockcount -= rlen;
+		}
+
+		/* Roll on... */
+		count_fsb -= ilen;
+		offset_fsb += ilen;
+	}
+
+	tip->i_d.di_flags2 = tip_flags2;
+	return 0;
+
+out_defer:
+	xfs_defer_cancel(&dfops);
+out:
+	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
+	tip->i_d.di_flags2 = tip_flags2;
+	return error;
+}
+
 /* Swap the extents of two files by swapping data forks. */
 STATIC int
 xfs_swap_extent_forks(
@@ -1777,6 +1910,7 @@ xfs_swap_extents(
 	int			lock_flags;
 	struct xfs_ifork	*cowfp;
 	__uint64_t		f;
+	int			resblks;
 
 	/*
 	 * Lock the inodes against other IO, page faults and truncate to
@@ -1807,7 +1941,28 @@ xfs_swap_extents(
 	if (error)
 		goto out_unlock;
 
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+	/*
+	 * Extent "swapping" with rmap requires a permanent reservation and
+	 * a block reservation because it's really just a remap operation
+	 * performed with log redo items!
+	 */
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		/*
+		 * Conceptually this shouldn't affect the shape of either
+		 * bmbt, but since we atomically move extents one by one,
+		 * we reserve enough space to rebuild both trees.
+		 */
+		resblks = XFS_NEXTENTADD_SPACE_RES(mp,
+				XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK),
+				XFS_DATA_FORK) +
+			  XFS_NEXTENTADD_SPACE_RES(mp,
+				XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
+				XFS_DATA_FORK);
+		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
+				0, 0, &tp);
+	} else
+		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
+				0, 0, &tp);
 	if (error)
 		goto out_unlock;
 
@@ -1866,8 +2021,11 @@ xfs_swap_extents(
 	src_log_flags = XFS_ILOG_CORE;
 	target_log_flags = XFS_ILOG_CORE;
 
-	error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
-			&target_log_flags);
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		error = xfs_swap_extent_rmap(&tp, ip, tip);
+	else
+		error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
+				&target_log_flags);
 	if (error)
 		goto out_trans_cancel;
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 8b1f803..f980cca 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3345,6 +3345,11 @@ DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
 
+/* rmap swapext tracepoints */
+DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
+DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
+DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2016-08-25 23:39 UTC|newest]

Thread overview: 119+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-25 23:31 [PATCH v8 00/71] xfs: add reflink and dedupe support Darrick J. Wong
2016-08-25 23:32 ` [PATCH 01/71] xfs: remove xfs_btree_bigkey Darrick J. Wong
2016-09-05 15:04   ` Christoph Hellwig
2016-08-25 23:32 ` [PATCH 02/71] xfs: create a standard btree size calculator code Darrick J. Wong
2016-09-05 15:05   ` Christoph Hellwig
2016-08-25 23:32 ` [PATCH 03/71] xfs: count the blocks in a btree Darrick J. Wong
2016-09-05 15:05   ` Christoph Hellwig
2016-08-25 23:32 ` [PATCH 04/71] xfs: defer should allow ->finish_item to request a new transaction Darrick J. Wong
2016-09-06  6:38   ` Christoph Hellwig
2016-09-06 23:57     ` Darrick J. Wong
2016-08-25 23:32 ` [PATCH 05/71] xfs: introduce tracepoints for AG reservation code Darrick J. Wong
2016-09-06  6:38   ` Christoph Hellwig
2016-08-25 23:32 ` [PATCH 06/71] xfs: set up per-AG free space reservations Darrick J. Wong
2016-09-06 14:53   ` Christoph Hellwig
2016-09-06 17:31     ` Darrick J. Wong
2016-09-08 17:47       ` Darrick J. Wong
2016-08-25 23:32 ` [PATCH 07/71] xfs: define tracepoints for refcount btree activities Darrick J. Wong
2016-09-06 14:54   ` Christoph Hellwig
2016-09-08 18:20     ` Darrick J. Wong
2016-08-25 23:32 ` [PATCH 08/71] xfs: introduce refcount btree definitions Darrick J. Wong
2016-09-06 14:59   ` Christoph Hellwig
2016-09-06 17:13     ` Darrick J. Wong
2016-08-25 23:32 ` [PATCH 09/71] xfs: add refcount btree stats infrastructure Darrick J. Wong
2016-09-06 14:59   ` Christoph Hellwig
2016-08-25 23:33 ` [PATCH 10/71] xfs: refcount btree add more reserved blocks Darrick J. Wong
2016-09-06 15:00   ` Christoph Hellwig
2016-08-25 23:33 ` [PATCH 11/71] xfs: define the on-disk refcount btree format Darrick J. Wong
2016-09-06 15:06   ` Christoph Hellwig
2016-08-25 23:33 ` [PATCH 12/71] xfs: add refcount btree support to growfs Darrick J. Wong
2016-09-06 15:06   ` Christoph Hellwig
2016-08-25 23:33 ` [PATCH 13/71] xfs: account for the refcount btree in the alloc/free log reservation Darrick J. Wong
2016-08-25 23:33 ` [PATCH 14/71] xfs: add refcount btree operations Darrick J. Wong
2016-09-06 15:09   ` Christoph Hellwig
2016-08-25 23:33 ` [PATCH 15/71] xfs: create refcount update intent log items Darrick J. Wong
2016-09-06 15:16   ` Christoph Hellwig
2016-09-06 16:43     ` Darrick J. Wong
2016-09-06 17:03       ` Christoph Hellwig
2016-08-25 23:33 ` [PATCH 16/71] xfs: log refcount intent items Darrick J. Wong
2016-09-06 15:21   ` Christoph Hellwig
2016-09-08 19:14     ` Darrick J. Wong
2016-09-08 23:13       ` Dave Chinner
2016-09-08 23:16         ` Darrick J. Wong
2016-09-11 12:52           ` Christoph Hellwig
2016-09-12 18:40             ` Darrick J. Wong
2016-09-12 23:28               ` Dave Chinner
2016-08-25 23:33 ` [PATCH 17/71] xfs: adjust refcount of an extent of blocks in refcount btree Darrick J. Wong
2016-08-25 23:33 ` [PATCH 18/71] xfs: connect refcount adjust functions to upper layers Darrick J. Wong
2016-08-25 23:34 ` [PATCH 19/71] xfs: adjust refcount when unmapping file blocks Darrick J. Wong
2016-08-25 23:34 ` [PATCH 20/71] xfs: add refcount btree block detection to log recovery Darrick J. Wong
2016-08-25 23:34 ` [PATCH 21/71] xfs: refcount btree requires more reserved space Darrick J. Wong
2016-08-25 23:34 ` [PATCH 22/71] xfs: introduce reflink utility functions Darrick J. Wong
2016-08-25 23:34 ` [PATCH 23/71] xfs: create bmbt update intent log items Darrick J. Wong
2016-08-25 23:34 ` [PATCH 24/71] xfs: log bmap intent items Darrick J. Wong
2016-08-25 23:34 ` [PATCH 25/71] xfs: map an inode's offset to an exact physical block Darrick J. Wong
2016-08-25 23:34 ` [PATCH 26/71] xfs: pass bmapi flags through to bmap_del_extent Darrick J. Wong
2016-08-25 23:34 ` [PATCH 27/71] xfs: implement deferred bmbt map/unmap operations Darrick J. Wong
2016-08-25 23:35 ` [PATCH 28/71] xfs: when replaying bmap operations, don't let unlinked inodes get reaped Darrick J. Wong
2016-08-25 23:35 ` [PATCH 29/71] xfs: return work remaining at the end of a bunmapi operation Darrick J. Wong
2016-08-25 23:35 ` [PATCH 30/71] xfs: define tracepoints for reflink activities Darrick J. Wong
2016-08-25 23:35 ` [PATCH 31/71] xfs: add reflink feature flag to geometry Darrick J. Wong
2016-08-25 23:35 ` [PATCH 32/71] xfs: don't allow reflinked dir/dev/fifo/socket/pipe files Darrick J. Wong
2016-08-25 23:35 ` [PATCH 33/71] xfs: introduce the CoW fork Darrick J. Wong
2016-08-25 23:35 ` [PATCH 34/71] xfs: support bmapping delalloc extents in " Darrick J. Wong
2016-09-06 15:25   ` Christoph Hellwig
2016-09-06 16:34     ` Darrick J. Wong
2016-09-11 12:59       ` Christoph Hellwig
2016-09-06 23:40     ` Dave Chinner
2016-09-11 12:57       ` Christoph Hellwig
2016-08-25 23:35 ` [PATCH 35/71] xfs: create delalloc extents in " Darrick J. Wong
2016-08-25 23:35 ` [PATCH 36/71] xfs: support allocating delayed " Darrick J. Wong
2016-08-25 23:35 ` [PATCH 37/71] xfs: allocate " Darrick J. Wong
2016-08-25 23:36 ` [PATCH 38/71] xfs: support removing extents from " Darrick J. Wong
2016-08-25 23:36 ` [PATCH 39/71] xfs: move mappings from cow fork to data fork after copy-write Darrick J. Wong
2016-08-25 23:36 ` [PATCH 40/71] xfs: report shared extents through the iomap interface Darrick J. Wong
2016-08-25 23:36 ` [PATCH 41/71] xfs: implement CoW for directio writes Darrick J. Wong
2016-08-25 23:36 ` [PATCH 42/71] xfs: cancel CoW reservations and clear inode reflink flag when freeing blocks Darrick J. Wong
2016-08-25 23:36 ` [PATCH 43/71] xfs: cancel pending CoW reservations when destroying inodes Darrick J. Wong
2016-08-25 23:36 ` [PATCH 44/71] xfs: store in-progress CoW allocations in the refcount btree Darrick J. Wong
2016-08-25 23:36 ` [PATCH 45/71] xfs: reflink extents from one file to another Darrick J. Wong
2016-08-25 23:36 ` [PATCH 46/71] xfs: add clone file and clone range vfs functions Darrick J. Wong
2016-08-25 23:37 ` [PATCH 47/71] xfs: add dedupe range vfs function Darrick J. Wong
2016-08-25 23:37 ` [PATCH 48/71] xfs: teach get_bmapx about shared extents and the CoW fork Darrick J. Wong
2016-08-25 23:37 ` [PATCH 49/71] xfs: swap inode reflink flags when swapping inode extents Darrick J. Wong
2016-08-25 23:37 ` [PATCH 50/71] xfs: unshare a range of blocks via fallocate Darrick J. Wong
2016-08-25 23:37 ` [PATCH 51/71] xfs: CoW shared EOF block when truncating file Darrick J. Wong
2016-08-25 23:37 ` [PATCH 52/71] xfs: support FS_XFLAG_REFLINK on reflink filesystems Darrick J. Wong
2016-08-25 23:37 ` [PATCH 53/71] xfs: create a separate cow extent size hint for the allocator Darrick J. Wong
2016-08-25 23:37 ` [PATCH 54/71] xfs: preallocate blocks for worst-case btree expansion Darrick J. Wong
2016-08-25 23:37 ` [PATCH 55/71] xfs: don't allow reflink when the AG is low on space Darrick J. Wong
2016-08-25 23:38 ` [PATCH 56/71] xfs: try other AGs to allocate a BMBT block Darrick J. Wong
2016-08-25 23:38 ` [PATCH 57/71] xfs: promote buffered writes to CoW when cowextsz is set Darrick J. Wong
2016-08-25 23:38 ` [PATCH 58/71] xfs: garbage collect old cowextsz reservations Darrick J. Wong
2016-09-24 19:42   ` Christoph Hellwig
2016-09-26 21:52     ` Darrick J. Wong
2016-09-27 18:50       ` Christoph Hellwig
2016-09-27 19:29         ` Darrick J. Wong
2016-09-27 20:15       ` Christoph Hellwig
2016-09-27 20:25         ` Darrick J. Wong
2016-08-25 23:38 ` [PATCH 59/71] xfs: provide switch to force filesystem to copy-on-write all the time Darrick J. Wong
2016-08-25 23:38 ` [PATCH 60/71] xfs: increase log reservations for reflink Darrick J. Wong
2016-08-25 23:38 ` [PATCH 61/71] xfs: add shared rmap map/unmap/convert log item types Darrick J. Wong
2016-08-25 23:38 ` [PATCH 62/71] xfs: use interval query for rmap alloc operations on shared files Darrick J. Wong
2016-08-25 23:38 ` [PATCH 63/71] xfs: convert unwritten status of reverse mappings for " Darrick J. Wong
2016-08-25 23:38 ` [PATCH 64/71] xfs: set a default CoW extent size of 32 blocks Darrick J. Wong
2016-08-25 23:38 ` [PATCH 65/71] xfs: check for invalid inode reflink flags Darrick J. Wong
2016-08-25 23:39 ` [PATCH 66/71] xfs: don't mix reflink and DAX mode for now Darrick J. Wong
2016-08-25 23:39 ` [PATCH 67/71] xfs: fail ->bmap for reflink inodes Darrick J. Wong
2016-09-06 15:29   ` Christoph Hellwig
2016-09-06 16:26     ` Darrick J. Wong
2016-09-06 17:02       ` Christoph Hellwig
2016-08-25 23:39 ` [PATCH 68/71] xfs: recognize the reflink feature bit Darrick J. Wong
2016-08-25 23:39 ` [PATCH 69/71] xfs: various swapext cleanups Darrick J. Wong
2016-08-25 23:39 ` [PATCH 70/71] xfs: refactor swapext code Darrick J. Wong
2016-08-25 23:39 ` Darrick J. Wong [this message]
2016-08-26 12:56 ` [PATCH v8 00/71] xfs: add reflink and dedupe support Christoph Hellwig
2016-08-26 16:28   ` Darrick J. Wong
2016-08-26 18:42   ` Darrick J. Wong
2016-08-26 14:08 ` Brian Foster
2016-08-26 18:44 ` [PATCH 72/71] xfs: track log done items directly in the deferred pending work item Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=147216837842.867.8168149197321420871.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.