All of lore.kernel.org
 help / color / mirror / Atom feed
* reflink direct I/O improvements V3
@ 2017-02-06  7:47 Christoph Hellwig
  2017-02-06  7:47 ` [PATCH 1/4] xfs: introduce xfs_aligned_fsb_count Christoph Hellwig
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-06  7:47 UTC (permalink / raw)
  To: linux-xfs

This series improves the reflink direct I/O path, by avoiding a pointless
detour through delayed allocations and doing the allocations directly
from the iomap code.

Changes since V2:
 - handle the new scheme to use unwritten extents for preallocation
   properly
 - fix up tracing for bounced dio writes

Changes since V1:
 - align the alignment code with the regular direct I/O path
 - rebase on top the patches from Darrick to use unwritten extents
   in the direct I/O COW path

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/4] xfs: introduce xfs_aligned_fsb_count
  2017-02-06  7:47 reflink direct I/O improvements V3 Christoph Hellwig
@ 2017-02-06  7:47 ` Christoph Hellwig
  2017-02-06  7:47 ` [PATCH 2/4] xfs: return the converted extent in __xfs_reflink_convert_cow Christoph Hellwig
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-06  7:47 UTC (permalink / raw)
  To: linux-xfs

Factor a helper to calculate the extent-size aligned block out of the
iomap code, so that it can be reused by the upcoming reflink dio code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iomap.c | 11 ++---------
 fs/xfs/xfs_iomap.h | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d17aa3b9c6a5..9d811eb1a416 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -162,7 +162,7 @@ xfs_iomap_write_direct(
 	xfs_fileoff_t	last_fsb;
 	xfs_filblks_t	count_fsb, resaligned;
 	xfs_fsblock_t	firstfsb;
-	xfs_extlen_t	extsz, temp;
+	xfs_extlen_t	extsz;
 	int		nimaps;
 	int		quota_flag;
 	int		rt;
@@ -203,14 +203,7 @@ xfs_iomap_write_direct(
 	}
 	count_fsb = last_fsb - offset_fsb;
 	ASSERT(count_fsb > 0);
-
-	resaligned = count_fsb;
-	if (unlikely(extsz)) {
-		if ((temp = do_mod(offset_fsb, extsz)))
-			resaligned += temp;
-		if ((temp = do_mod(resaligned, extsz)))
-			resaligned += extsz - temp;
-	}
+	resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
 
 	if (unlikely(rt)) {
 		resrtextents = qblocks = resaligned;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 6d45cf01fcff..1fef4a5b71cb 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -33,6 +33,26 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
 		struct xfs_bmbt_irec *);
 xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize);
 
+static inline xfs_filblks_t
+xfs_aligned_fsb_count(
+	xfs_fileoff_t		offset_fsb,
+	xfs_filblks_t		count_fsb,
+	xfs_extlen_t		extsz)
+{
+	if (extsz) {
+		xfs_extlen_t	align;
+
+		align = do_mod(offset_fsb, extsz);
+		if (align)
+			count_fsb += align;
+		align = do_mod(count_fsb, extsz);
+		if (align)
+			count_fsb += extsz - align;
+	}
+
+	return count_fsb;
+}
+
 extern struct iomap_ops xfs_iomap_ops;
 extern struct iomap_ops xfs_xattr_iomap_ops;
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/4] xfs: return the converted extent in __xfs_reflink_convert_cow
  2017-02-06  7:47 reflink direct I/O improvements V3 Christoph Hellwig
  2017-02-06  7:47 ` [PATCH 1/4] xfs: introduce xfs_aligned_fsb_count Christoph Hellwig
@ 2017-02-06  7:47 ` Christoph Hellwig
  2017-02-06  7:47 ` [PATCH 3/4] xfs: go straight to real allocations for direct I/O COW writes Christoph Hellwig
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-06  7:47 UTC (permalink / raw)
  To: linux-xfs

We'll need it for the direct I/O code.  Also rename the function to
xfs_reflink_convert_cow_extent to describe it a bit better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_reflink.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ae9ba9b6d48f..e6b8af91e9a4 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -325,24 +325,23 @@ xfs_reflink_reserve_cow(
 
 /* Convert part of an unwritten CoW extent to a real one. */
 STATIC int
-__xfs_reflink_convert_cow(
+xfs_reflink_convert_cow_extent(
 	struct xfs_inode		*ip,
 	struct xfs_bmbt_irec		*imap,
 	xfs_fileoff_t			offset_fsb,
 	xfs_filblks_t			count_fsb,
 	struct xfs_defer_ops		*dfops)
 {
-	struct xfs_bmbt_irec		irec = *imap;
 	xfs_fsblock_t			first_block;
 	int				nimaps = 1;
 
-	xfs_trim_extent(&irec, offset_fsb, count_fsb);
-	trace_xfs_reflink_convert_cow(ip, &irec);
-	if (irec.br_blockcount == 0)
+	xfs_trim_extent(imap, offset_fsb, count_fsb);
+	trace_xfs_reflink_convert_cow(ip, imap);
+	if (imap->br_blockcount == 0)
 		return 0;
-	return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount,
+	return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
-			0, &irec, &nimaps, dfops);
+			0, imap, &nimaps, dfops);
 }
 
 /* Convert all of the unwritten CoW extents in a file's range to real ones. */
@@ -372,7 +371,7 @@ xfs_reflink_convert_cow(
 	     found = xfs_iext_get_extent(ifp, ++idx, &got)) {
 		if (got.br_state == XFS_EXT_NORM)
 			continue;
-		error = __xfs_reflink_convert_cow(ip, &got, offset_fsb,
+		error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
 				end_fsb - offset_fsb, &dfops);
 		if (error)
 			break;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/4] xfs: go straight to real allocations for direct I/O COW writes
  2017-02-06  7:47 reflink direct I/O improvements V3 Christoph Hellwig
  2017-02-06  7:47 ` [PATCH 1/4] xfs: introduce xfs_aligned_fsb_count Christoph Hellwig
  2017-02-06  7:47 ` [PATCH 2/4] xfs: return the converted extent in __xfs_reflink_convert_cow Christoph Hellwig
@ 2017-02-06  7:47 ` Christoph Hellwig
  2017-02-07  1:12   ` Darrick J. Wong
  2017-02-06  7:47 ` [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin Christoph Hellwig
  2017-02-06 18:41 ` reflink direct I/O improvements V3 Darrick J. Wong
  4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-06  7:47 UTC (permalink / raw)
  To: linux-xfs

When we allocate COW fork blocks for direct I/O writes we currently first
create a delayed allocation, and then convert it to a real allocation
once we've got the delayed one.

As there is no good reason for that this patch instead makes use call
xfs_bmapi_write from the COW allocation path.  The only interesting bits
are a few tweaks the low-level allocator to allow for this, most notably
the need to remove the call to xfs_bmap_extsize_align for the cowextsize
in xfs_bmap_btalloc - for the existing convert case it's a no-op, but
for the direct allocation case it would blow up our block reservation
way beyond what we reserved for the transaction.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c |  3 +-
 fs/xfs/xfs_reflink.c     | 94 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1566e9d9fd7d..2b4ad8442ec0 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2909,13 +2909,14 @@ xfs_bmap_add_extent_hole_real(
 	ASSERT(!isnullstartblock(new->br_startblock));
 	ASSERT(!bma->cur ||
 	       !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
-	ASSERT(whichfork != XFS_COW_FORK);
 
 	XFS_STATS_INC(mp, xs_add_exlist);
 
 	state = 0;
 	if (whichfork == XFS_ATTR_FORK)
 		state |= BMAP_ATTRFORK;
+	if (whichfork == XFS_COW_FORK)
+		state |= BMAP_COWFORK;
 
 	/*
 	 * Check and set flags if this segment has a left neighbor.
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e6b8af91e9a4..e577f54beb2f 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -390,62 +390,100 @@ __xfs_reflink_allocate_cow(
 	xfs_fileoff_t		end_fsb)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	imap;
+	struct xfs_bmbt_irec	imap, got;
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans	*tp;
 	xfs_fsblock_t		first_block;
-	int			nimaps = 1, error;
-	bool			shared;
+	int			nimaps, error, lockmode;
+	bool			shared, trimmed;
+	xfs_filblks_t		resaligned;
+	xfs_extlen_t		resblks;
+	xfs_extnum_t		idx;
 
-	xfs_defer_init(&dfops, &first_block);
+	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
+			xfs_get_cowextsz_hint(ip));
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
 
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
-			XFS_TRANS_RESERVE, &tp);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
 		return error;
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	lockmode = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, lockmode);
 
-	/* Read extent from the source file. */
-	nimaps = 1;
-	error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-			&imap, &nimaps, 0);
-	if (error)
-		goto out_unlock;
-	ASSERT(nimaps == 1);
+	/*
+	 * Even if the extent is not shared we might have a preallocation for
+	 * it in the COW fork.  If so use it.
+	 */
+	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
+	    got.br_startoff <= *offset_fsb) {
+		/* If we have a real allocation in the COW fork we're done. */
+		if (!isnullstartblock(got.br_startblock)) {
+			xfs_trim_extent(&got, *offset_fsb,
+					end_fsb - *offset_fsb);
+			*offset_fsb = got.br_startoff + got.br_blockcount;
+			goto out_trans_cancel;
+		}
+	} else {
+		nimaps = 1;
+		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+				&imap, &nimaps, 0);
+		if (error)
+			goto out_trans_cancel;
+		ASSERT(nimaps == 1);
+
+		/* Trim the mapping to the nearest shared extent boundary. */
+		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+				&trimmed);
+		if (error)
+			goto out_trans_cancel;
+
+		if (!shared) {
+			*offset_fsb = imap.br_startoff + imap.br_blockcount;
+			goto out_trans_cancel;
+		}
 
-	/* Make sure there's a CoW reservation for it. */
-	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+		*offset_fsb = imap.br_startoff;
+		end_fsb = imap.br_startoff + imap.br_blockcount;
+	}
+
+	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
+			XFS_QMOPT_RES_REGBLKS);
 	if (error)
 		goto out_trans_cancel;
 
-	if (!shared) {
-		*offset_fsb = imap.br_startoff + imap.br_blockcount;
-		goto out_trans_cancel;
-	}
+	xfs_trans_ijoin(tp, ip, 0);
+
+	xfs_defer_init(&dfops, &first_block);
+	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
-	xfs_trans_ijoin(tp, ip, 0);
-	error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
+	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
-			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
-			&imap, &nimaps, &dfops);
+			resblks, &imap, &nimaps, &dfops);
 	if (error)
-		goto out_trans_cancel;
+		goto out_bmap_cancel;
 
 	/* Finish up. */
 	error = xfs_defer_finish(&tp, &dfops, NULL);
 	if (error)
-		goto out_trans_cancel;
+		goto out_bmap_cancel;
 
 	error = xfs_trans_commit(tp);
+	if (error)
+		goto out_unlock;
 
 	*offset_fsb = imap.br_startoff + imap.br_blockcount;
+
 out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, lockmode);
 	return error;
-out_trans_cancel:
+
+out_bmap_cancel:
 	xfs_defer_cancel(&dfops);
+	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
+			XFS_QMOPT_RES_REGBLKS);
+out_trans_cancel:
 	xfs_trans_cancel(tp);
 	goto out_unlock;
 }
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-06  7:47 reflink direct I/O improvements V3 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2017-02-06  7:47 ` [PATCH 3/4] xfs: go straight to real allocations for direct I/O COW writes Christoph Hellwig
@ 2017-02-06  7:47 ` Christoph Hellwig
  2017-02-07  1:41   ` Darrick J. Wong
  2017-02-06 18:41 ` reflink direct I/O improvements V3 Darrick J. Wong
  4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-06  7:47 UTC (permalink / raw)
  To: linux-xfs

Instead of preallocating all the required COW blocks in the high-level
write code do it inside the iomap code, like we do for all other I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c    |   8 ---
 fs/xfs/xfs_iomap.c   |  31 +++++------
 fs/xfs/xfs_reflink.c | 144 +++++++++++++++++++--------------------------------
 fs/xfs/xfs_reflink.h |   4 +-
 fs/xfs/xfs_trace.h   |   2 -
 5 files changed, 66 insertions(+), 123 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index b1f74f1d0b5f..1ff32d34514f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -561,14 +561,6 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-
-	/* If this is a block-aligned directio CoW, remap immediately. */
-	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
-		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
-		if (ret)
-			goto out;
-	}
-
 	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
 out:
 	xfs_iunlock(ip, iolock);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 9d811eb1a416..0978a5f0b50c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,37 +995,31 @@ xfs_file_iomap_begin(
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
-	if (xfs_is_reflink_inode(ip) &&
-	    (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
-		shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
-		if (shared) {
-			xfs_iunlock(ip, lockmode);
-			goto alloc_done;
-		}
-		ASSERT(!isnullstartblock(imap.br_startblock));
-	}
-
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, 0);
 	if (error)
 		goto out_unlock;
 
-	if ((flags & IOMAP_REPORT) ||
-	    (xfs_is_reflink_inode(ip) &&
-	     (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
+	if (flags & IOMAP_REPORT) {
 		/* Trim the mapping to the nearest shared extent boundary. */
 		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
 				&trimmed);
 		if (error)
 			goto out_unlock;
-
-		ASSERT((flags & IOMAP_REPORT) || !shared);
 	}
 
 	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
-		error = xfs_reflink_reserve_cow(ip, &imap, &shared);
-		if (error)
-			goto out_unlock;
+		if (flags & IOMAP_DIRECT) {
+			/* may drop and re-acquire the ilock */
+			error = xfs_reflink_allocate_cow(ip, &imap, &shared,
+					&lockmode);
+			if (error)
+				goto out_unlock;
+		} else {
+			error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+			if (error)
+				goto out_unlock;
+		}
 
 		end_fsb = imap.br_startoff + imap.br_blockcount;
 		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
@@ -1054,7 +1048,6 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
-alloc_done:
 		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e577f54beb2f..68d0fbdd0929 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -383,74 +383,75 @@ xfs_reflink_convert_cow(
 }
 
 /* Allocate all CoW reservations covering a range of blocks in a file. */
-static int
-__xfs_reflink_allocate_cow(
+int
+xfs_reflink_allocate_cow(
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		*offset_fsb,
-	xfs_fileoff_t		end_fsb)
+	struct xfs_bmbt_irec	*imap,
+	bool			*shared,
+	uint			*lockmode)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	imap, got;
+	xfs_fileoff_t		offset_fsb = imap->br_startoff;
+	xfs_filblks_t		count_fsb = imap->br_blockcount;
+	struct xfs_bmbt_irec	got;
 	struct xfs_defer_ops	dfops;
-	struct xfs_trans	*tp;
+	struct xfs_trans	*tp = NULL;
 	xfs_fsblock_t		first_block;
-	int			nimaps, error, lockmode;
-	bool			shared, trimmed;
+	int			nimaps, error = 0;
+	bool			trimmed;
 	xfs_filblks_t		resaligned;
-	xfs_extlen_t		resblks;
+	xfs_extlen_t		resblks = 0;
 	xfs_extnum_t		idx;
 
-	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
-			xfs_get_cowextsz_hint(ip));
-	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
-	if (error)
-		return error;
-
-	lockmode = XFS_ILOCK_EXCL;
-	xfs_ilock(ip, lockmode);
+retry:
+	ASSERT(xfs_is_reflink_inode(ip));
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
 
 	/*
 	 * Even if the extent is not shared we might have a preallocation for
 	 * it in the COW fork.  If so use it.
 	 */
-	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
-	    got.br_startoff <= *offset_fsb) {
+	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
+	    got.br_startoff <= offset_fsb) {
+		*shared = true;
+
 		/* If we have a real allocation in the COW fork we're done. */
 		if (!isnullstartblock(got.br_startblock)) {
-			xfs_trim_extent(&got, *offset_fsb,
-					end_fsb - *offset_fsb);
-			*offset_fsb = got.br_startoff + got.br_blockcount;
-			goto out_trans_cancel;
+			xfs_trim_extent(&got, offset_fsb, count_fsb);
+			*imap = got;
+			goto convert;
 		}
+
+		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
 	} else {
-		nimaps = 1;
-		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-				&imap, &nimaps, 0);
-		if (error)
-			goto out_trans_cancel;
-		ASSERT(nimaps == 1);
+		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+		if (error || !*shared)
+			goto out;
+	}
 
-		/* Trim the mapping to the nearest shared extent boundary. */
-		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
-				&trimmed);
-		if (error)
-			goto out_trans_cancel;
+	if (!tp) {
+		resaligned = xfs_aligned_fsb_count(imap->br_startoff,
+			imap->br_blockcount, xfs_get_cowextsz_hint(ip));
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
 
-		if (!shared) {
-			*offset_fsb = imap.br_startoff + imap.br_blockcount;
-			goto out_trans_cancel;
-		}
+		xfs_iunlock(ip, *lockmode);
+		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+		*lockmode = XFS_ILOCK_EXCL;
+		xfs_ilock(ip, *lockmode);
+
+		if (error)
+			return error;
 
-		*offset_fsb = imap.br_startoff;
-		end_fsb = imap.br_startoff + imap.br_blockcount;
+		error = xfs_qm_dqattach_locked(ip, 0);
+		if (error)
+			goto out;
+		goto retry;
 	}
 
 	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
 	if (error)
-		goto out_trans_cancel;
+		goto out;
 
 	xfs_trans_ijoin(tp, ip, 0);
 
@@ -458,9 +459,9 @@ __xfs_reflink_allocate_cow(
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
-	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
+	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
-			resblks, &imap, &nimaps, &dfops);
+			resblks, imap, &nimaps, &dfops);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -471,58 +472,17 @@ __xfs_reflink_allocate_cow(
 
 	error = xfs_trans_commit(tp);
 	if (error)
-		goto out_unlock;
-
-	*offset_fsb = imap.br_startoff + imap.br_blockcount;
-
-out_unlock:
-	xfs_iunlock(ip, lockmode);
-	return error;
-
+		return error;
+convert:
+	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
+			&dfops);
 out_bmap_cancel:
 	xfs_defer_cancel(&dfops);
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
-out_trans_cancel:
-	xfs_trans_cancel(tp);
-	goto out_unlock;
-}
-
-/* Allocate all CoW reservations covering a part of a file. */
-int
-xfs_reflink_allocate_cow_range(
-	struct xfs_inode	*ip,
-	xfs_off_t		offset,
-	xfs_off_t		count)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
-	int			error;
-
-	ASSERT(xfs_is_reflink_inode(ip));
-
-	trace_xfs_reflink_allocate_cow_range(ip, offset, count);
-
-	/*
-	 * Make sure that the dquots are there.
-	 */
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
-		return error;
-
-	while (offset_fsb < end_fsb) {
-		error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
-		if (error) {
-			trace_xfs_reflink_allocate_cow_range_error(ip, error,
-					_RET_IP_);
-			goto out;
-		}
-	}
-
-	/* Convert the CoW extents to regular. */
-	error = xfs_reflink_convert_cow(ip, offset, count);
 out:
+	if (tp)
+		xfs_trans_cancel(tp);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 1583c4727cb1..33ac9b8db683 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,8 +28,8 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
 
 extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
 		struct xfs_bmbt_irec *imap, bool *shared);
-extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
-		xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
+		struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
 extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 375c5e030e5b..32bad7bff840 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3248,7 +3248,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
 
 DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
-DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
 
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
 DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
@@ -3258,7 +3257,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
 
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: reflink direct I/O improvements V3
  2017-02-06  7:47 reflink direct I/O improvements V3 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2017-02-06  7:47 ` [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin Christoph Hellwig
@ 2017-02-06 18:41 ` Darrick J. Wong
  2017-02-06 20:55   ` Christoph Hellwig
  4 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2017-02-06 18:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 06, 2017 at 08:47:34AM +0100, Christoph Hellwig wrote:
> This series improves the reflink direct I/O path, by avoiding a pointless
> detour through delayed allocations and doing the allocations directly
> from the iomap code.
> 
> Changes since V2:
>  - handle the new scheme to use unwritten extents for preallocation
>    properly
>  - fix up tracing for bounced dio writes

I think "xfs: reject all unaligned direct writes to reflinked files" is
missing from this patchset?  If so, you can send it as a reply to the
cover letter email.

--D

> Changes since V1:
>  - align the alignment code with the regular direct I/O path
>  - rebase on top the patches from Darrick to use unwritten extents
>    in the direct I/O COW path
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: reflink direct I/O improvements V3
  2017-02-06 18:41 ` reflink direct I/O improvements V3 Darrick J. Wong
@ 2017-02-06 20:55   ` Christoph Hellwig
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-06 20:55 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, Feb 06, 2017 at 10:41:39AM -0800, Darrick J. Wong wrote:
> I think "xfs: reject all unaligned direct writes to reflinked files" is
> missing from this patchset?  If so, you can send it as a reply to the
> cover letter email.

I sent it out separately as I have no idea how to use git-send-email
to reply to an existing thread.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] xfs: go straight to real allocations for direct I/O COW writes
  2017-02-06  7:47 ` [PATCH 3/4] xfs: go straight to real allocations for direct I/O COW writes Christoph Hellwig
@ 2017-02-07  1:12   ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2017-02-07  1:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 06, 2017 at 08:47:37AM +0100, Christoph Hellwig wrote:
> When we allocate COW fork blocks for direct I/O writes we currently first
> create a delayed allocation, and then convert it to a real allocation
> once we've got the delayed one.
> 
> As there is no good reason for that this patch instead makes use call
> xfs_bmapi_write from the COW allocation path.  The only interesting bits
> are a few tweaks the low-level allocator to allow for this, most notably
> the need to remove the call to xfs_bmap_extsize_align for the cowextsize
> in xfs_bmap_btalloc - for the existing convert case it's a no-op, but
> for the direct allocation case it would blow up our block reservation
> way beyond what we reserved for the transaction.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_bmap.c |  3 +-
>  fs/xfs/xfs_reflink.c     | 94 +++++++++++++++++++++++++++++++++---------------
>  2 files changed, 68 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 1566e9d9fd7d..2b4ad8442ec0 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -2909,13 +2909,14 @@ xfs_bmap_add_extent_hole_real(
>  	ASSERT(!isnullstartblock(new->br_startblock));
>  	ASSERT(!bma->cur ||
>  	       !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
> -	ASSERT(whichfork != XFS_COW_FORK);
>  
>  	XFS_STATS_INC(mp, xs_add_exlist);
>  
>  	state = 0;
>  	if (whichfork == XFS_ATTR_FORK)
>  		state |= BMAP_ATTRFORK;
> +	if (whichfork == XFS_COW_FORK)
> +		state |= BMAP_COWFORK;
>  
>  	/*
>  	 * Check and set flags if this segment has a left neighbor.
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index e6b8af91e9a4..e577f54beb2f 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -390,62 +390,100 @@ __xfs_reflink_allocate_cow(
>  	xfs_fileoff_t		end_fsb)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_bmbt_irec	imap;
> +	struct xfs_bmbt_irec	imap, got;
>  	struct xfs_defer_ops	dfops;
>  	struct xfs_trans	*tp;
>  	xfs_fsblock_t		first_block;
> -	int			nimaps = 1, error;
> -	bool			shared;
> +	int			nimaps, error, lockmode;
> +	bool			shared, trimmed;
> +	xfs_filblks_t		resaligned;
> +	xfs_extlen_t		resblks;
> +	xfs_extnum_t		idx;
>  
> -	xfs_defer_init(&dfops, &first_block);
> +	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
> +			xfs_get_cowextsz_hint(ip));
> +	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
>  
> -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> -			XFS_TRANS_RESERVE, &tp);
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
>  	if (error)
>  		return error;
>  
> -	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	lockmode = XFS_ILOCK_EXCL;
> +	xfs_ilock(ip, lockmode);
>  
> -	/* Read extent from the source file. */
> -	nimaps = 1;
> -	error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> -			&imap, &nimaps, 0);
> -	if (error)
> -		goto out_unlock;
> -	ASSERT(nimaps == 1);
> +	/*
> +	 * Even if the extent is not shared we might have a preallocation for
> +	 * it in the COW fork.  If so use it.
> +	 */
> +	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
> +	    got.br_startoff <= *offset_fsb) {
> +		/* If we have a real allocation in the COW fork we're done. */
> +		if (!isnullstartblock(got.br_startblock)) {
> +			xfs_trim_extent(&got, *offset_fsb,
> +					end_fsb - *offset_fsb);
> +			*offset_fsb = got.br_startoff + got.br_blockcount;
> +			goto out_trans_cancel;
> +		}
> +	} else {
> +		nimaps = 1;
> +		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> +				&imap, &nimaps, 0);
> +		if (error)
> +			goto out_trans_cancel;
> +		ASSERT(nimaps == 1);
> +
> +		/* Trim the mapping to the nearest shared extent boundary. */
> +		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
> +				&trimmed);
> +		if (error)
> +			goto out_trans_cancel;
> +
> +		if (!shared) {
> +			*offset_fsb = imap.br_startoff + imap.br_blockcount;
> +			goto out_trans_cancel;
> +		}
>  
> -	/* Make sure there's a CoW reservation for it. */
> -	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> +		*offset_fsb = imap.br_startoff;
> +		end_fsb = imap.br_startoff + imap.br_blockcount;
> +	}
> +
> +	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
> +			XFS_QMOPT_RES_REGBLKS);
>  	if (error)
>  		goto out_trans_cancel;
>  
> -	if (!shared) {
> -		*offset_fsb = imap.br_startoff + imap.br_blockcount;
> -		goto out_trans_cancel;
> -	}
> +	xfs_trans_ijoin(tp, ip, 0);
> +
> +	xfs_defer_init(&dfops, &first_block);
> +	nimaps = 1;
>  
>  	/* Allocate the entire reservation as unwritten blocks. */
> -	xfs_trans_ijoin(tp, ip, 0);
> -	error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
> +	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
>  			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
> -			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
> -			&imap, &nimaps, &dfops);
> +			resblks, &imap, &nimaps, &dfops);
>  	if (error)
> -		goto out_trans_cancel;
> +		goto out_bmap_cancel;
>  
>  	/* Finish up. */
>  	error = xfs_defer_finish(&tp, &dfops, NULL);
>  	if (error)
> -		goto out_trans_cancel;
> +		goto out_bmap_cancel;
>  
>  	error = xfs_trans_commit(tp);
> +	if (error)
> +		goto out_unlock;
>  
>  	*offset_fsb = imap.br_startoff + imap.br_blockcount;
> +
>  out_unlock:
> -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	xfs_iunlock(ip, lockmode);
>  	return error;
> -out_trans_cancel:
> +
> +out_bmap_cancel:
>  	xfs_defer_cancel(&dfops);
> +	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
> +			XFS_QMOPT_RES_REGBLKS);
> +out_trans_cancel:
>  	xfs_trans_cancel(tp);
>  	goto out_unlock;
>  }
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-06  7:47 ` [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin Christoph Hellwig
@ 2017-02-07  1:41   ` Darrick J. Wong
  2017-02-09  7:21     ` Christoph Hellwig
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2017-02-07  1:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 06, 2017 at 08:47:38AM +0100, Christoph Hellwig wrote:
> Instead of preallocating all the required COW blocks in the high-level
> write code do it inside the iomap code, like we do for all other I/O.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Ok, got all the pieces together and they look ok to me.

I ran this through the dio reflink tests and they pass now.

I'm going to run this series (and all the other stuff I've collected for
4.11) overnight and if nothing screams then you can consider this series:

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_file.c    |   8 ---
>  fs/xfs/xfs_iomap.c   |  31 +++++------
>  fs/xfs/xfs_reflink.c | 144 +++++++++++++++++++--------------------------------
>  fs/xfs/xfs_reflink.h |   4 +-
>  fs/xfs/xfs_trace.h   |   2 -
>  5 files changed, 66 insertions(+), 123 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index b1f74f1d0b5f..1ff32d34514f 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -561,14 +561,6 @@ xfs_file_dio_aio_write(
>  	}
>  
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
> -
> -	/* If this is a block-aligned directio CoW, remap immediately. */
> -	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
> -		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
> -		if (ret)
> -			goto out;
> -	}
> -
>  	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
>  out:
>  	xfs_iunlock(ip, iolock);
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 9d811eb1a416..0978a5f0b50c 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -995,37 +995,31 @@ xfs_file_iomap_begin(
>  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
>  	end_fsb = XFS_B_TO_FSB(mp, offset + length);
>  
> -	if (xfs_is_reflink_inode(ip) &&
> -	    (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
> -		shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
> -		if (shared) {
> -			xfs_iunlock(ip, lockmode);
> -			goto alloc_done;
> -		}
> -		ASSERT(!isnullstartblock(imap.br_startblock));
> -	}
> -
>  	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
>  			       &nimaps, 0);
>  	if (error)
>  		goto out_unlock;
>  
> -	if ((flags & IOMAP_REPORT) ||
> -	    (xfs_is_reflink_inode(ip) &&
> -	     (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
> +	if (flags & IOMAP_REPORT) {
>  		/* Trim the mapping to the nearest shared extent boundary. */
>  		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
>  				&trimmed);
>  		if (error)
>  			goto out_unlock;
> -
> -		ASSERT((flags & IOMAP_REPORT) || !shared);
>  	}
>  
>  	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
> -		error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> -		if (error)
> -			goto out_unlock;
> +		if (flags & IOMAP_DIRECT) {
> +			/* may drop and re-acquire the ilock */
> +			error = xfs_reflink_allocate_cow(ip, &imap, &shared,
> +					&lockmode);
> +			if (error)
> +				goto out_unlock;
> +		} else {
> +			error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> +			if (error)
> +				goto out_unlock;
> +		}
>  
>  		end_fsb = imap.br_startoff + imap.br_blockcount;
>  		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> @@ -1054,7 +1048,6 @@ xfs_file_iomap_begin(
>  		if (error)
>  			return error;
>  
> -alloc_done:
>  		iomap->flags = IOMAP_F_NEW;
>  		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
>  	} else {
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index e577f54beb2f..68d0fbdd0929 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -383,74 +383,75 @@ xfs_reflink_convert_cow(
>  }
>  
>  /* Allocate all CoW reservations covering a range of blocks in a file. */
> -static int
> -__xfs_reflink_allocate_cow(
> +int
> +xfs_reflink_allocate_cow(
>  	struct xfs_inode	*ip,
> -	xfs_fileoff_t		*offset_fsb,
> -	xfs_fileoff_t		end_fsb)
> +	struct xfs_bmbt_irec	*imap,
> +	bool			*shared,
> +	uint			*lockmode)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_bmbt_irec	imap, got;
> +	xfs_fileoff_t		offset_fsb = imap->br_startoff;
> +	xfs_filblks_t		count_fsb = imap->br_blockcount;
> +	struct xfs_bmbt_irec	got;
>  	struct xfs_defer_ops	dfops;
> -	struct xfs_trans	*tp;
> +	struct xfs_trans	*tp = NULL;
>  	xfs_fsblock_t		first_block;
> -	int			nimaps, error, lockmode;
> -	bool			shared, trimmed;
> +	int			nimaps, error = 0;
> +	bool			trimmed;
>  	xfs_filblks_t		resaligned;
> -	xfs_extlen_t		resblks;
> +	xfs_extlen_t		resblks = 0;
>  	xfs_extnum_t		idx;
>  
> -	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
> -			xfs_get_cowextsz_hint(ip));
> -	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> -
> -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> -	if (error)
> -		return error;
> -
> -	lockmode = XFS_ILOCK_EXCL;
> -	xfs_ilock(ip, lockmode);
> +retry:
> +	ASSERT(xfs_is_reflink_inode(ip));
> +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
>  
>  	/*
>  	 * Even if the extent is not shared we might have a preallocation for
>  	 * it in the COW fork.  If so use it.
>  	 */
> -	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
> -	    got.br_startoff <= *offset_fsb) {
> +	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
> +	    got.br_startoff <= offset_fsb) {
> +		*shared = true;
> +
>  		/* If we have a real allocation in the COW fork we're done. */
>  		if (!isnullstartblock(got.br_startblock)) {
> -			xfs_trim_extent(&got, *offset_fsb,
> -					end_fsb - *offset_fsb);
> -			*offset_fsb = got.br_startoff + got.br_blockcount;
> -			goto out_trans_cancel;
> +			xfs_trim_extent(&got, offset_fsb, count_fsb);
> +			*imap = got;
> +			goto convert;
>  		}
> +
> +		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
>  	} else {
> -		nimaps = 1;
> -		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> -				&imap, &nimaps, 0);
> -		if (error)
> -			goto out_trans_cancel;
> -		ASSERT(nimaps == 1);
> +		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
> +		if (error || !*shared)
> +			goto out;
> +	}
>  
> -		/* Trim the mapping to the nearest shared extent boundary. */
> -		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
> -				&trimmed);
> -		if (error)
> -			goto out_trans_cancel;
> +	if (!tp) {
> +		resaligned = xfs_aligned_fsb_count(imap->br_startoff,
> +			imap->br_blockcount, xfs_get_cowextsz_hint(ip));
> +		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
>  
> -		if (!shared) {
> -			*offset_fsb = imap.br_startoff + imap.br_blockcount;
> -			goto out_trans_cancel;
> -		}
> +		xfs_iunlock(ip, *lockmode);
> +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> +		*lockmode = XFS_ILOCK_EXCL;
> +		xfs_ilock(ip, *lockmode);
> +
> +		if (error)
> +			return error;
>  
> -		*offset_fsb = imap.br_startoff;
> -		end_fsb = imap.br_startoff + imap.br_blockcount;
> +		error = xfs_qm_dqattach_locked(ip, 0);
> +		if (error)
> +			goto out;
> +		goto retry;
>  	}
>  
>  	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
>  			XFS_QMOPT_RES_REGBLKS);
>  	if (error)
> -		goto out_trans_cancel;
> +		goto out;
>  
>  	xfs_trans_ijoin(tp, ip, 0);
>  
> @@ -458,9 +459,9 @@ __xfs_reflink_allocate_cow(
>  	nimaps = 1;
>  
>  	/* Allocate the entire reservation as unwritten blocks. */
> -	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
> +	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
>  			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
> -			resblks, &imap, &nimaps, &dfops);
> +			resblks, imap, &nimaps, &dfops);
>  	if (error)
>  		goto out_bmap_cancel;
>  
> @@ -471,58 +472,17 @@ __xfs_reflink_allocate_cow(
>  
>  	error = xfs_trans_commit(tp);
>  	if (error)
> -		goto out_unlock;
> -
> -	*offset_fsb = imap.br_startoff + imap.br_blockcount;
> -
> -out_unlock:
> -	xfs_iunlock(ip, lockmode);
> -	return error;
> -
> +		return error;
> +convert:
> +	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
> +			&dfops);
>  out_bmap_cancel:
>  	xfs_defer_cancel(&dfops);
>  	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
>  			XFS_QMOPT_RES_REGBLKS);
> -out_trans_cancel:
> -	xfs_trans_cancel(tp);
> -	goto out_unlock;
> -}
> -
> -/* Allocate all CoW reservations covering a part of a file. */
> -int
> -xfs_reflink_allocate_cow_range(
> -	struct xfs_inode	*ip,
> -	xfs_off_t		offset,
> -	xfs_off_t		count)
> -{
> -	struct xfs_mount	*mp = ip->i_mount;
> -	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> -	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
> -	int			error;
> -
> -	ASSERT(xfs_is_reflink_inode(ip));
> -
> -	trace_xfs_reflink_allocate_cow_range(ip, offset, count);
> -
> -	/*
> -	 * Make sure that the dquots are there.
> -	 */
> -	error = xfs_qm_dqattach(ip, 0);
> -	if (error)
> -		return error;
> -
> -	while (offset_fsb < end_fsb) {
> -		error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
> -		if (error) {
> -			trace_xfs_reflink_allocate_cow_range_error(ip, error,
> -					_RET_IP_);
> -			goto out;
> -		}
> -	}
> -
> -	/* Convert the CoW extents to regular. */
> -	error = xfs_reflink_convert_cow(ip, offset, count);
>  out:
> +	if (tp)
> +		xfs_trans_cancel(tp);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index 1583c4727cb1..33ac9b8db683 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -28,8 +28,8 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
>  
>  extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
>  		struct xfs_bmbt_irec *imap, bool *shared);
> -extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
> -		xfs_off_t offset, xfs_off_t count);
> +extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
> +		struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
>  extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count);
>  extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 375c5e030e5b..32bad7bff840 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -3248,7 +3248,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
>  DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
>  
>  DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
> -DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
>  
>  DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
>  DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
> @@ -3258,7 +3257,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
>  DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
>  DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
>  
> -DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
>  DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
>  DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
>  
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-07  1:41   ` Darrick J. Wong
@ 2017-02-09  7:21     ` Christoph Hellwig
  2017-02-09  7:53       ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-09  7:21 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Mon, Feb 06, 2017 at 05:41:49PM -0800, Darrick J. Wong wrote:
> I'm going to run this series (and all the other stuff I've collected for
> 4.11) overnight and if nothing screams then you can consider this series:

Can you push your tree out?  I'd like to verify what made it before
heading off for a long weekend tonight. I'm especially curious if
the discard work made it.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-09  7:21     ` Christoph Hellwig
@ 2017-02-09  7:53       ` Darrick J. Wong
  2017-02-09  7:58         ` Christoph Hellwig
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2017-02-09  7:53 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Feb 09, 2017 at 08:21:07AM +0100, Christoph Hellwig wrote:
> On Mon, Feb 06, 2017 at 05:41:49PM -0800, Darrick J. Wong wrote:
> > I'm going to run this series (and all the other stuff I've collected for
> > 4.11) overnight and if nothing screams then you can consider this series:
> 
> Can you push your tree out?  I'd like to verify what made it before
> heading off for a long weekend tonight. I'm especially curious if
> the discard work made it.

It's very late tonight, so all the shiny polish is missing, but here's
what's in my tree for 4.11 right now:
https://git.kernel.org/cgit/fs/xfs/xfs-linux.git/log/?h=xfs-4.11-merge-20170208

Testing isn't done yet, but xfs/222 seems to be blowing up at
ASSERT(!rwsem_is_locked(&inode->i_rwsem)) in xfs_super.c fairly
consistently with blocksize=1k.  I haven't been able to reproduce it
quickly (i.e. without running the whole test suite) so I can't tell if
that's a side effect of something else blowing up or what.  generic/300
seems to blow up periodically and then blows the same assert on umount,
also in the 1k case.  xfs/348 fuzzes the fs, causes "kernel memory
exposure!" BUGs and then asserts with the same i_rwsem thing.
 
The all-defaults 4k blocksize test runs w/ regular disk and pmem all
finished without any new fireworks, though.

(You'll note I didn't merge the duplicate "xfs: improve handling of busy
extents in the low-level allocator"; if you want that done, please let me
know.)

--D

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-09  7:53       ` Darrick J. Wong
@ 2017-02-09  7:58         ` Christoph Hellwig
  2017-02-09  8:00           ` Christoph Hellwig
  2017-02-09 17:22           ` Christoph Hellwig
  0 siblings, 2 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-09  7:58 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Wed, Feb 08, 2017 at 11:53:22PM -0800, Darrick J. Wong wrote:
> Testing isn't done yet, but xfs/222 seems to be blowing up at
> ASSERT(!rwsem_is_locked(&inode->i_rwsem)) in xfs_super.c fairly
> consistently with blocksize=1k.  I haven't been able to reproduce it
> quickly (i.e. without running the whole test suite) so I can't tell if
> that's a side effect of something else blowing up or what.  generic/300
> seems to blow up periodically and then blows the same assert on umount,
> also in the 1k case.  xfs/348 fuzzes the fs, causes "kernel memory
> exposure!" BUGs and then asserts with the same i_rwsem thing.

I'll take a look at the umount assert while you're asleep.  348 is
a pretty new test, so I doubt it's a regrewssion.

> (You'll note I didn't merge the duplicate "xfs: improve handling of busy
> extents in the low-level allocator"; if you want that done, please let me
> know.)

Yes, it should be folded into the first patch of that name and descriptions.
It contains the fixups that Brian requested.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-09  7:58         ` Christoph Hellwig
@ 2017-02-09  8:00           ` Christoph Hellwig
  2017-02-09 17:22           ` Christoph Hellwig
  1 sibling, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-09  8:00 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Thu, Feb 09, 2017 at 08:58:41AM +0100, Christoph Hellwig wrote:
> I'll take a look at the umount assert while you're asleep.  348 is
> a pretty new test, so I doubt it's a regrewssion.
> 
> > (You'll note I didn't merge the duplicate "xfs: improve handling of busy
> > extents in the low-level allocator"; if you want that done, please let me
> > know.)
> 
> Yes, it should be folded into the first patch of that name and descriptions.
> It contains the fixups that Brian requested.

Actually the tree seems to have both now that I'm actually reading
through it.  But if you happen to rebase again please fold the second
into the first.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-09  7:58         ` Christoph Hellwig
  2017-02-09  8:00           ` Christoph Hellwig
@ 2017-02-09 17:22           ` Christoph Hellwig
  1 sibling, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-09 17:22 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Thu, Feb 09, 2017 at 08:58:41AM +0100, Christoph Hellwig wrote:
> On Wed, Feb 08, 2017 at 11:53:22PM -0800, Darrick J. Wong wrote:
> > Testing isn't done yet, but xfs/222 seems to be blowing up at
> > ASSERT(!rwsem_is_locked(&inode->i_rwsem)) in xfs_super.c fairly
> > consistently with blocksize=1k.  I haven't been able to reproduce it
> > quickly (i.e. without running the whole test suite) so I can't tell if
> > that's a side effect of something else blowing up or what.  generic/300
> > seems to blow up periodically and then blows the same assert on umount,
> > also in the 1k case.  xfs/348 fuzzes the fs, causes "kernel memory
> > exposure!" BUGs and then asserts with the same i_rwsem thing.
> 
> I'll take a look at the umount assert while you're asleep.  348 is
> a pretty new test, so I doubt it's a regrewssion.

I could not reproduce the issue here.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-03 10:53     ` Christoph Hellwig
@ 2017-02-05 20:13       ` Christoph Hellwig
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-05 20:13 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Fri, Feb 03, 2017 at 11:53:59AM +0100, Christoph Hellwig wrote:
> > I think this is incorrect -- we create an unwritten extent in the cow
> > fork above (BMAP_COWFORK | BMAP_PREALLOC) but we no longer convert the
> > part we're actually writing to a real extent.  This means that
> > _reflink_cow won't actually remap anything that we've written.
> 
> It's not correct and I messed up when rebasing - the version I tested
> still had it and worked.  Sigh..

Or rather I hadn't finished rebasing and the version I had was totally
b0rked.  Proper one in progress now..

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-03  0:55   ` Darrick J. Wong
@ 2017-02-03 10:53     ` Christoph Hellwig
  2017-02-05 20:13       ` Christoph Hellwig
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-03 10:53 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Thu, Feb 02, 2017 at 04:55:23PM -0800, Darrick J. Wong wrote:
> On Wed, Feb 01, 2017 at 10:42:39PM +0100, Christoph Hellwig wrote:
> > Instead of preallocating all the required COW blocks in the high-level
> > write code do it inside the iomap code, like we do for all other I/O.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/xfs/xfs_file.c    |   8 ---
> >  fs/xfs/xfs_iomap.c   |  31 +++++------
> >  fs/xfs/xfs_reflink.c | 147 ++++++++++++++++++++-------------------------------
> >  fs/xfs/xfs_reflink.h |   5 +-
> >  fs/xfs/xfs_trace.h   |   2 -
> >  5 files changed, 71 insertions(+), 122 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > index 06236bf..56ac5b7 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -559,14 +559,6 @@ xfs_file_dio_aio_write(
> >  	}
> >  
> >  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
> > -
> > -	/* If this is a block-aligned directio CoW, remap immediately. */
> > -	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
> > -		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
> > -		if (ret)
> > -			goto out;
> > -	}
> > -
> >  	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
> >  out:
> >  	xfs_iunlock(ip, iolock);
> > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > index 9d811eb..de717e7 100644
> > --- a/fs/xfs/xfs_iomap.c
> > +++ b/fs/xfs/xfs_iomap.c
> > @@ -995,37 +995,31 @@ xfs_file_iomap_begin(
> >  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
> >  	end_fsb = XFS_B_TO_FSB(mp, offset + length);
> >  
> > -	if (xfs_is_reflink_inode(ip) &&
> > -	    (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
> > -		shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
> > -		if (shared) {
> > -			xfs_iunlock(ip, lockmode);
> > -			goto alloc_done;
> > -		}
> > -		ASSERT(!isnullstartblock(imap.br_startblock));
> > -	}
> > -
> >  	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
> >  			       &nimaps, 0);
> >  	if (error)
> >  		goto out_unlock;
> >  
> > -	if ((flags & IOMAP_REPORT) ||
> > -	    (xfs_is_reflink_inode(ip) &&
> > -	     (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
> > +	if (flags & IOMAP_REPORT) {
> >  		/* Trim the mapping to the nearest shared extent boundary. */
> >  		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
> >  				&trimmed);
> >  		if (error)
> >  			goto out_unlock;
> > -
> > -		ASSERT((flags & IOMAP_REPORT) || !shared);
> >  	}
> >  
> >  	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
> > -		error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> > -		if (error)
> > -			goto out_unlock;
> > +		if (flags & IOMAP_DIRECT) {
> > +			/* may drop and re-acquire the ilock */
> > +			error = xfs_reflink_allocate_cow(ip, offset_fsb, end_fsb,
> > +					&imap, &shared, &lockmode);
> > +			if (error)
> > +				goto out_unlock;
> > +		} else {
> > +			error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> > +			if (error)
> > +				goto out_unlock;
> > +		}
> >  
> >  		end_fsb = imap.br_startoff + imap.br_blockcount;
> >  		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> > @@ -1054,7 +1048,6 @@ xfs_file_iomap_begin(
> >  		if (error)
> >  			return error;
> >  
> > -alloc_done:
> >  		iomap->flags = IOMAP_F_NEW;
> >  		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
> >  	} else {
> > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> > index 1a96741..d5a2cf2 100644
> > --- a/fs/xfs/xfs_reflink.c
> > +++ b/fs/xfs/xfs_reflink.c
> > @@ -384,74 +384,84 @@ xfs_reflink_convert_cow(
> >  }
> >  
> >  /* Allocate all CoW reservations covering a range of blocks in a file. */
> > -static int
> > -__xfs_reflink_allocate_cow(
> > +int
> > +xfs_reflink_allocate_cow(
> >  	struct xfs_inode	*ip,
> > -	xfs_fileoff_t		*offset_fsb,
> > -	xfs_fileoff_t		end_fsb)
> > +	xfs_fileoff_t		offset_fsb,
> > +	xfs_fileoff_t		end_fsb,
> > +	struct xfs_bmbt_irec	*imap,
> > +	bool			*shared,
> > +	uint			*lockmode)
> >  {
> >  	struct xfs_mount	*mp = ip->i_mount;
> > -	struct xfs_bmbt_irec	imap, got;
> > +	struct xfs_bmbt_irec	got;
> >  	struct xfs_defer_ops	dfops;
> > -	struct xfs_trans	*tp;
> > +	struct xfs_trans	*tp = NULL;
> >  	xfs_fsblock_t		first_block;
> > -	int			nimaps, error, lockmode;
> > -	bool			shared, trimmed;
> > +	int			nimaps, error = 0;
> > +	bool			trimmed;
> >  	xfs_filblks_t		resaligned;
> > -	xfs_extlen_t		resblks;
> > +	xfs_extlen_t		resblks = 0;
> >  	xfs_extnum_t		idx;
> >  
> > -	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
> > -			xfs_get_cowextsz_hint(ip));
> > -	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> > -
> > -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> > -	if (error)
> > -		return error;
> > -
> > -	lockmode = XFS_ILOCK_EXCL;
> > -	xfs_ilock(ip, lockmode);
> > +retry:
> > +	ASSERT(xfs_is_reflink_inode(ip));
> > +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
> >  
> >  	/*
> >  	 * Even if the extent is not shared we might have a preallocation for
> >  	 * it in the COW fork.  If so use it.
> >  	 */
> > -	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
> > -	    got.br_startoff <= *offset_fsb) {
> > +	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
> > +	    got.br_startoff <= offset_fsb) {
> > +		*shared = true;
> > +
> >  		/* If we have a real allocation in the COW fork we're done. */
> >  		if (!isnullstartblock(got.br_startblock)) {
> > -			xfs_trim_extent(&got, *offset_fsb,
> > -					end_fsb - *offset_fsb);
> > -			*offset_fsb = got.br_startoff + got.br_blockcount;
> > -			goto out_trans_cancel;
> > +			xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
> > +			*imap = got;
> > +			goto out;
> >  		}
> > +
> > +		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
> >  	} else {
> > -		nimaps = 1;
> > -		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> > -				&imap, &nimaps, 0);
> > +		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
> > +		if (error || !*shared)
> > +			goto out;
> > +	}
> > +
> > +	if (!tp) {
> > +		resaligned = xfs_aligned_fsb_count(offset_fsb,
> > +			end_fsb - offset_fsb, xfs_get_cowextsz_hint(ip));
> > +		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> > +
> > +		xfs_iunlock(ip, *lockmode);
> > +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> > +		*lockmode = XFS_ILOCK_EXCL;
> > +		xfs_ilock(ip, *lockmode);
> > +
> >  		if (error)
> > -			goto out_trans_cancel;
> > -		ASSERT(nimaps == 1);
> > +			return error;
> >  
> > -		/* Trim the mapping to the nearest shared extent boundary. */
> > -		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
> > -				&trimmed);
> > +		error = xfs_qm_dqattach_locked(ip, 0);
> >  		if (error)
> > -			goto out_trans_cancel;
> > +			goto out;
> >  
> > -		if (!shared) {
> > -			*offset_fsb = imap.br_startoff + imap.br_blockcount;
> > -			goto out_trans_cancel;
> > -		}
> > +		/* Read extent from the source file. */
> > +		nimaps = 1;
> > +		error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
> > +				imap, &nimaps, 0);
> > +		if (error)
> > +			goto out;
> > +		ASSERT(nimaps == 1);
> >  
> > -		*offset_fsb = imap.br_startoff;
> > -		end_fsb = imap.br_startoff + imap.br_blockcount;
> > +		goto retry;
> >  	}
> >  
> >  	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
> >  			XFS_QMOPT_RES_REGBLKS);
> >  	if (error)
> > -		goto out_trans_cancel;
> > +		goto out;
> >  
> >  	xfs_trans_ijoin(tp, ip, 0);
> >  
> > @@ -459,9 +469,9 @@ __xfs_reflink_allocate_cow(
> >  	nimaps = 1;
> >  
> >  	/* Allocate the entire reservation as unwritten blocks. */
> > -	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
> > +	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
> >  			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
> > -			resblks, &imap, &nimaps, &dfops);
> > +			resblks, imap, &nimaps, &dfops);
> >  	if (error)
> >  		goto out_bmap_cancel;
> >  
> > @@ -470,60 +480,15 @@ __xfs_reflink_allocate_cow(
> >  	if (error)
> >  		goto out_bmap_cancel;
> >  
> > -	error = xfs_trans_commit(tp);
> > -	if (error)
> > -		goto out_unlock;
> > -
> > -	*offset_fsb = imap.br_startoff + imap.br_blockcount;
> > -
> > -out_unlock:
> > -	xfs_iunlock(ip, lockmode);
> > -	return error;
> > +	return xfs_trans_commit(tp);
> >  
> >  out_bmap_cancel:
> >  	xfs_defer_cancel(&dfops);
> >  	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
> >  			XFS_QMOPT_RES_REGBLKS);
> > -out_trans_cancel:
> > -	xfs_trans_cancel(tp);
> > -	goto out_unlock;
> > -}
> > -
> > -/* Allocate all CoW reservations covering a part of a file. */
> > -int
> > -xfs_reflink_allocate_cow_range(
> > -	struct xfs_inode	*ip,
> > -	xfs_off_t		offset,
> > -	xfs_off_t		count)
> > -{
> > -	struct xfs_mount	*mp = ip->i_mount;
> > -	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> > -	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
> > -	int			error;
> > -
> > -	ASSERT(xfs_is_reflink_inode(ip));
> > -
> > -	trace_xfs_reflink_allocate_cow_range(ip, offset, count);
> > -
> > -	/*
> > -	 * Make sure that the dquots are there.
> > -	 */
> > -	error = xfs_qm_dqattach(ip, 0);
> > -	if (error)
> > -		return error;
> > -
> > -	while (offset_fsb < end_fsb) {
> > -		error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
> > -		if (error) {
> > -			trace_xfs_reflink_allocate_cow_range_error(ip, error,
> > -					_RET_IP_);
> > -			goto out;
> > -		}
> > -	}
> > -
> > -	/* Convert the CoW extents to regular. */
> > -	error = xfs_reflink_convert_cow(ip, offset, count);
> 
> I think this is incorrect -- we create an unwritten extent in the cow
> fork above (BMAP_COWFORK | BMAP_PREALLOC) but we no longer convert the
> part we're actually writing to a real extent.  This means that
> _reflink_cow won't actually remap anything that we've written.

It's not correct and I messed up when rebasing - the version I tested
still had it and worked.  Sigh..

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-01 21:42 ` [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin Christoph Hellwig
@ 2017-02-03  0:55   ` Darrick J. Wong
  2017-02-03 10:53     ` Christoph Hellwig
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2017-02-03  0:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, Feb 01, 2017 at 10:42:39PM +0100, Christoph Hellwig wrote:
> Instead of preallocating all the required COW blocks in the high-level
> write code do it inside the iomap code, like we do for all other I/O.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c    |   8 ---
>  fs/xfs/xfs_iomap.c   |  31 +++++------
>  fs/xfs/xfs_reflink.c | 147 ++++++++++++++++++++-------------------------------
>  fs/xfs/xfs_reflink.h |   5 +-
>  fs/xfs/xfs_trace.h   |   2 -
>  5 files changed, 71 insertions(+), 122 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 06236bf..56ac5b7 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -559,14 +559,6 @@ xfs_file_dio_aio_write(
>  	}
>  
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
> -
> -	/* If this is a block-aligned directio CoW, remap immediately. */
> -	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
> -		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
> -		if (ret)
> -			goto out;
> -	}
> -
>  	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
>  out:
>  	xfs_iunlock(ip, iolock);
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 9d811eb..de717e7 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -995,37 +995,31 @@ xfs_file_iomap_begin(
>  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
>  	end_fsb = XFS_B_TO_FSB(mp, offset + length);
>  
> -	if (xfs_is_reflink_inode(ip) &&
> -	    (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
> -		shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
> -		if (shared) {
> -			xfs_iunlock(ip, lockmode);
> -			goto alloc_done;
> -		}
> -		ASSERT(!isnullstartblock(imap.br_startblock));
> -	}
> -
>  	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
>  			       &nimaps, 0);
>  	if (error)
>  		goto out_unlock;
>  
> -	if ((flags & IOMAP_REPORT) ||
> -	    (xfs_is_reflink_inode(ip) &&
> -	     (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
> +	if (flags & IOMAP_REPORT) {
>  		/* Trim the mapping to the nearest shared extent boundary. */
>  		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
>  				&trimmed);
>  		if (error)
>  			goto out_unlock;
> -
> -		ASSERT((flags & IOMAP_REPORT) || !shared);
>  	}
>  
>  	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
> -		error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> -		if (error)
> -			goto out_unlock;
> +		if (flags & IOMAP_DIRECT) {
> +			/* may drop and re-acquire the ilock */
> +			error = xfs_reflink_allocate_cow(ip, offset_fsb, end_fsb,
> +					&imap, &shared, &lockmode);
> +			if (error)
> +				goto out_unlock;
> +		} else {
> +			error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> +			if (error)
> +				goto out_unlock;
> +		}
>  
>  		end_fsb = imap.br_startoff + imap.br_blockcount;
>  		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> @@ -1054,7 +1048,6 @@ xfs_file_iomap_begin(
>  		if (error)
>  			return error;
>  
> -alloc_done:
>  		iomap->flags = IOMAP_F_NEW;
>  		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
>  	} else {
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 1a96741..d5a2cf2 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -384,74 +384,84 @@ xfs_reflink_convert_cow(
>  }
>  
>  /* Allocate all CoW reservations covering a range of blocks in a file. */
> -static int
> -__xfs_reflink_allocate_cow(
> +int
> +xfs_reflink_allocate_cow(
>  	struct xfs_inode	*ip,
> -	xfs_fileoff_t		*offset_fsb,
> -	xfs_fileoff_t		end_fsb)
> +	xfs_fileoff_t		offset_fsb,
> +	xfs_fileoff_t		end_fsb,
> +	struct xfs_bmbt_irec	*imap,
> +	bool			*shared,
> +	uint			*lockmode)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_bmbt_irec	imap, got;
> +	struct xfs_bmbt_irec	got;
>  	struct xfs_defer_ops	dfops;
> -	struct xfs_trans	*tp;
> +	struct xfs_trans	*tp = NULL;
>  	xfs_fsblock_t		first_block;
> -	int			nimaps, error, lockmode;
> -	bool			shared, trimmed;
> +	int			nimaps, error = 0;
> +	bool			trimmed;
>  	xfs_filblks_t		resaligned;
> -	xfs_extlen_t		resblks;
> +	xfs_extlen_t		resblks = 0;
>  	xfs_extnum_t		idx;
>  
> -	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
> -			xfs_get_cowextsz_hint(ip));
> -	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> -
> -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> -	if (error)
> -		return error;
> -
> -	lockmode = XFS_ILOCK_EXCL;
> -	xfs_ilock(ip, lockmode);
> +retry:
> +	ASSERT(xfs_is_reflink_inode(ip));
> +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
>  
>  	/*
>  	 * Even if the extent is not shared we might have a preallocation for
>  	 * it in the COW fork.  If so use it.
>  	 */
> -	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
> -	    got.br_startoff <= *offset_fsb) {
> +	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
> +	    got.br_startoff <= offset_fsb) {
> +		*shared = true;
> +
>  		/* If we have a real allocation in the COW fork we're done. */
>  		if (!isnullstartblock(got.br_startblock)) {
> -			xfs_trim_extent(&got, *offset_fsb,
> -					end_fsb - *offset_fsb);
> -			*offset_fsb = got.br_startoff + got.br_blockcount;
> -			goto out_trans_cancel;
> +			xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
> +			*imap = got;
> +			goto out;
>  		}
> +
> +		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
>  	} else {
> -		nimaps = 1;
> -		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> -				&imap, &nimaps, 0);
> +		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
> +		if (error || !*shared)
> +			goto out;
> +	}
> +
> +	if (!tp) {
> +		resaligned = xfs_aligned_fsb_count(offset_fsb,
> +			end_fsb - offset_fsb, xfs_get_cowextsz_hint(ip));
> +		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
> +
> +		xfs_iunlock(ip, *lockmode);
> +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> +		*lockmode = XFS_ILOCK_EXCL;
> +		xfs_ilock(ip, *lockmode);
> +
>  		if (error)
> -			goto out_trans_cancel;
> -		ASSERT(nimaps == 1);
> +			return error;
>  
> -		/* Trim the mapping to the nearest shared extent boundary. */
> -		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
> -				&trimmed);
> +		error = xfs_qm_dqattach_locked(ip, 0);
>  		if (error)
> -			goto out_trans_cancel;
> +			goto out;
>  
> -		if (!shared) {
> -			*offset_fsb = imap.br_startoff + imap.br_blockcount;
> -			goto out_trans_cancel;
> -		}
> +		/* Read extent from the source file. */
> +		nimaps = 1;
> +		error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
> +				imap, &nimaps, 0);
> +		if (error)
> +			goto out;
> +		ASSERT(nimaps == 1);
>  
> -		*offset_fsb = imap.br_startoff;
> -		end_fsb = imap.br_startoff + imap.br_blockcount;
> +		goto retry;
>  	}
>  
>  	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
>  			XFS_QMOPT_RES_REGBLKS);
>  	if (error)
> -		goto out_trans_cancel;
> +		goto out;
>  
>  	xfs_trans_ijoin(tp, ip, 0);
>  
> @@ -459,9 +469,9 @@ __xfs_reflink_allocate_cow(
>  	nimaps = 1;
>  
>  	/* Allocate the entire reservation as unwritten blocks. */
> -	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
> +	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
>  			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
> -			resblks, &imap, &nimaps, &dfops);
> +			resblks, imap, &nimaps, &dfops);
>  	if (error)
>  		goto out_bmap_cancel;
>  
> @@ -470,60 +480,15 @@ __xfs_reflink_allocate_cow(
>  	if (error)
>  		goto out_bmap_cancel;
>  
> -	error = xfs_trans_commit(tp);
> -	if (error)
> -		goto out_unlock;
> -
> -	*offset_fsb = imap.br_startoff + imap.br_blockcount;
> -
> -out_unlock:
> -	xfs_iunlock(ip, lockmode);
> -	return error;
> +	return xfs_trans_commit(tp);
>  
>  out_bmap_cancel:
>  	xfs_defer_cancel(&dfops);
>  	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
>  			XFS_QMOPT_RES_REGBLKS);
> -out_trans_cancel:
> -	xfs_trans_cancel(tp);
> -	goto out_unlock;
> -}
> -
> -/* Allocate all CoW reservations covering a part of a file. */
> -int
> -xfs_reflink_allocate_cow_range(
> -	struct xfs_inode	*ip,
> -	xfs_off_t		offset,
> -	xfs_off_t		count)
> -{
> -	struct xfs_mount	*mp = ip->i_mount;
> -	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> -	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
> -	int			error;
> -
> -	ASSERT(xfs_is_reflink_inode(ip));
> -
> -	trace_xfs_reflink_allocate_cow_range(ip, offset, count);
> -
> -	/*
> -	 * Make sure that the dquots are there.
> -	 */
> -	error = xfs_qm_dqattach(ip, 0);
> -	if (error)
> -		return error;
> -
> -	while (offset_fsb < end_fsb) {
> -		error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
> -		if (error) {
> -			trace_xfs_reflink_allocate_cow_range_error(ip, error,
> -					_RET_IP_);
> -			goto out;
> -		}
> -	}
> -
> -	/* Convert the CoW extents to regular. */
> -	error = xfs_reflink_convert_cow(ip, offset, count);

I think this is incorrect -- we create an unwritten extent in the cow
fork above (BMAP_COWFORK | BMAP_PREALLOC) but we no longer convert the
part we're actually writing to a real extent.  This means that
_reflink_cow won't actually remap anything that we've written.

I saw generic/{139,183,187,188} fail and then g/190 crashed.  There's
something incorrect going on for sure, though it's late and I won't
have time to pinpoint it tonight.

--D

>  out:
> +	if (tp)
> +		xfs_trans_cancel(tp);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index 1583c47..08792a4 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -28,8 +28,9 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
>  
>  extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
>  		struct xfs_bmbt_irec *imap, bool *shared);
> -extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
> -		xfs_off_t offset, xfs_off_t count);
> +extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
> +		xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb,
> +		struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
>  extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count);
>  extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index d3d11905..5f0a0d6 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -3248,7 +3248,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
>  DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
>  
>  DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
> -DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
>  
>  DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
>  DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
> @@ -3258,7 +3257,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
>  DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
>  DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
>  
> -DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
>  DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
>  DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
>  
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin
  2017-02-01 21:42 reflink direct I/O improvements V2 Christoph Hellwig
@ 2017-02-01 21:42 ` Christoph Hellwig
  2017-02-03  0:55   ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2017-02-01 21:42 UTC (permalink / raw)
  To: linux-xfs

Instead of preallocating all the required COW blocks in the high-level
write code do it inside the iomap code, like we do for all other I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c    |   8 ---
 fs/xfs/xfs_iomap.c   |  31 +++++------
 fs/xfs/xfs_reflink.c | 147 ++++++++++++++++++++-------------------------------
 fs/xfs/xfs_reflink.h |   5 +-
 fs/xfs/xfs_trace.h   |   2 -
 5 files changed, 71 insertions(+), 122 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 06236bf..56ac5b7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -559,14 +559,6 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-
-	/* If this is a block-aligned directio CoW, remap immediately. */
-	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
-		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
-		if (ret)
-			goto out;
-	}
-
 	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
 out:
 	xfs_iunlock(ip, iolock);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 9d811eb..de717e7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,37 +995,31 @@ xfs_file_iomap_begin(
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
-	if (xfs_is_reflink_inode(ip) &&
-	    (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
-		shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
-		if (shared) {
-			xfs_iunlock(ip, lockmode);
-			goto alloc_done;
-		}
-		ASSERT(!isnullstartblock(imap.br_startblock));
-	}
-
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, 0);
 	if (error)
 		goto out_unlock;
 
-	if ((flags & IOMAP_REPORT) ||
-	    (xfs_is_reflink_inode(ip) &&
-	     (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
+	if (flags & IOMAP_REPORT) {
 		/* Trim the mapping to the nearest shared extent boundary. */
 		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
 				&trimmed);
 		if (error)
 			goto out_unlock;
-
-		ASSERT((flags & IOMAP_REPORT) || !shared);
 	}
 
 	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
-		error = xfs_reflink_reserve_cow(ip, &imap, &shared);
-		if (error)
-			goto out_unlock;
+		if (flags & IOMAP_DIRECT) {
+			/* may drop and re-acquire the ilock */
+			error = xfs_reflink_allocate_cow(ip, offset_fsb, end_fsb,
+					&imap, &shared, &lockmode);
+			if (error)
+				goto out_unlock;
+		} else {
+			error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+			if (error)
+				goto out_unlock;
+		}
 
 		end_fsb = imap.br_startoff + imap.br_blockcount;
 		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
@@ -1054,7 +1048,6 @@ xfs_file_iomap_begin(
 		if (error)
 			return error;
 
-alloc_done:
 		iomap->flags = IOMAP_F_NEW;
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 	} else {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 1a96741..d5a2cf2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -384,74 +384,84 @@ xfs_reflink_convert_cow(
 }
 
 /* Allocate all CoW reservations covering a range of blocks in a file. */
-static int
-__xfs_reflink_allocate_cow(
+int
+xfs_reflink_allocate_cow(
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		*offset_fsb,
-	xfs_fileoff_t		end_fsb)
+	xfs_fileoff_t		offset_fsb,
+	xfs_fileoff_t		end_fsb,
+	struct xfs_bmbt_irec	*imap,
+	bool			*shared,
+	uint			*lockmode)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	imap, got;
+	struct xfs_bmbt_irec	got;
 	struct xfs_defer_ops	dfops;
-	struct xfs_trans	*tp;
+	struct xfs_trans	*tp = NULL;
 	xfs_fsblock_t		first_block;
-	int			nimaps, error, lockmode;
-	bool			shared, trimmed;
+	int			nimaps, error = 0;
+	bool			trimmed;
 	xfs_filblks_t		resaligned;
-	xfs_extlen_t		resblks;
+	xfs_extlen_t		resblks = 0;
 	xfs_extnum_t		idx;
 
-	resaligned = xfs_aligned_fsb_count(*offset_fsb, end_fsb - *offset_fsb,
-			xfs_get_cowextsz_hint(ip));
-	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
-	if (error)
-		return error;
-
-	lockmode = XFS_ILOCK_EXCL;
-	xfs_ilock(ip, lockmode);
+retry:
+	ASSERT(xfs_is_reflink_inode(ip));
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
 
 	/*
 	 * Even if the extent is not shared we might have a preallocation for
 	 * it in the COW fork.  If so use it.
 	 */
-	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
-	    got.br_startoff <= *offset_fsb) {
+	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
+	    got.br_startoff <= offset_fsb) {
+		*shared = true;
+
 		/* If we have a real allocation in the COW fork we're done. */
 		if (!isnullstartblock(got.br_startblock)) {
-			xfs_trim_extent(&got, *offset_fsb,
-					end_fsb - *offset_fsb);
-			*offset_fsb = got.br_startoff + got.br_blockcount;
-			goto out_trans_cancel;
+			xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
+			*imap = got;
+			goto out;
 		}
+
+		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
 	} else {
-		nimaps = 1;
-		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-				&imap, &nimaps, 0);
+		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+		if (error || !*shared)
+			goto out;
+	}
+
+	if (!tp) {
+		resaligned = xfs_aligned_fsb_count(offset_fsb,
+			end_fsb - offset_fsb, xfs_get_cowextsz_hint(ip));
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+
+		xfs_iunlock(ip, *lockmode);
+		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+		*lockmode = XFS_ILOCK_EXCL;
+		xfs_ilock(ip, *lockmode);
+
 		if (error)
-			goto out_trans_cancel;
-		ASSERT(nimaps == 1);
+			return error;
 
-		/* Trim the mapping to the nearest shared extent boundary. */
-		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
-				&trimmed);
+		error = xfs_qm_dqattach_locked(ip, 0);
 		if (error)
-			goto out_trans_cancel;
+			goto out;
 
-		if (!shared) {
-			*offset_fsb = imap.br_startoff + imap.br_blockcount;
-			goto out_trans_cancel;
-		}
+		/* Read extent from the source file. */
+		nimaps = 1;
+		error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+				imap, &nimaps, 0);
+		if (error)
+			goto out;
+		ASSERT(nimaps == 1);
 
-		*offset_fsb = imap.br_startoff;
-		end_fsb = imap.br_startoff + imap.br_blockcount;
+		goto retry;
 	}
 
 	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
 	if (error)
-		goto out_trans_cancel;
+		goto out;
 
 	xfs_trans_ijoin(tp, ip, 0);
 
@@ -459,9 +469,9 @@ __xfs_reflink_allocate_cow(
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
-	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
+	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
-			resblks, &imap, &nimaps, &dfops);
+			resblks, imap, &nimaps, &dfops);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -470,60 +480,15 @@ __xfs_reflink_allocate_cow(
 	if (error)
 		goto out_bmap_cancel;
 
-	error = xfs_trans_commit(tp);
-	if (error)
-		goto out_unlock;
-
-	*offset_fsb = imap.br_startoff + imap.br_blockcount;
-
-out_unlock:
-	xfs_iunlock(ip, lockmode);
-	return error;
+	return xfs_trans_commit(tp);
 
 out_bmap_cancel:
 	xfs_defer_cancel(&dfops);
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
-out_trans_cancel:
-	xfs_trans_cancel(tp);
-	goto out_unlock;
-}
-
-/* Allocate all CoW reservations covering a part of a file. */
-int
-xfs_reflink_allocate_cow_range(
-	struct xfs_inode	*ip,
-	xfs_off_t		offset,
-	xfs_off_t		count)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
-	int			error;
-
-	ASSERT(xfs_is_reflink_inode(ip));
-
-	trace_xfs_reflink_allocate_cow_range(ip, offset, count);
-
-	/*
-	 * Make sure that the dquots are there.
-	 */
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
-		return error;
-
-	while (offset_fsb < end_fsb) {
-		error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
-		if (error) {
-			trace_xfs_reflink_allocate_cow_range_error(ip, error,
-					_RET_IP_);
-			goto out;
-		}
-	}
-
-	/* Convert the CoW extents to regular. */
-	error = xfs_reflink_convert_cow(ip, offset, count);
 out:
+	if (tp)
+		xfs_trans_cancel(tp);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 1583c47..08792a4 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,8 +28,9 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
 
 extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
 		struct xfs_bmbt_irec *imap, bool *shared);
-extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
-		xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
+		xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb,
+		struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
 extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index d3d11905..5f0a0d6 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3248,7 +3248,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
 
 DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
-DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
 
 DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
 DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
@@ -3258,7 +3257,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
 
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-02-09 17:23 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-06  7:47 reflink direct I/O improvements V3 Christoph Hellwig
2017-02-06  7:47 ` [PATCH 1/4] xfs: introduce xfs_aligned_fsb_count Christoph Hellwig
2017-02-06  7:47 ` [PATCH 2/4] xfs: return the converted extent in __xfs_reflink_convert_cow Christoph Hellwig
2017-02-06  7:47 ` [PATCH 3/4] xfs: go straight to real allocations for direct I/O COW writes Christoph Hellwig
2017-02-07  1:12   ` Darrick J. Wong
2017-02-06  7:47 ` [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin Christoph Hellwig
2017-02-07  1:41   ` Darrick J. Wong
2017-02-09  7:21     ` Christoph Hellwig
2017-02-09  7:53       ` Darrick J. Wong
2017-02-09  7:58         ` Christoph Hellwig
2017-02-09  8:00           ` Christoph Hellwig
2017-02-09 17:22           ` Christoph Hellwig
2017-02-06 18:41 ` reflink direct I/O improvements V3 Darrick J. Wong
2017-02-06 20:55   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2017-02-01 21:42 reflink direct I/O improvements V2 Christoph Hellwig
2017-02-01 21:42 ` [PATCH 4/4] xfs: allocate direct I/O COW blocks in iomap_begin Christoph Hellwig
2017-02-03  0:55   ` Darrick J. Wong
2017-02-03 10:53     ` Christoph Hellwig
2017-02-05 20:13       ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.