All of lore.kernel.org
 help / color / mirror / Atom feed
* small fixes and optimizations for delalloc v2
@ 2019-02-15 14:47 Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
                   ` (9 more replies)
  0 siblings, 10 replies; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Hi all,

this series contains my changes to fix up the delalloc and reflink
code to prepare for the always COW mode.  This sits on top of
the 'xfs: properly invalidate cached writeback mapping' series from
Brian.

To make reviewing easier I also have a git tree available here:

    git://git.infradead.org/users/hch/xfs.git xfs-mapping-validation.5

Gitweb:

    http://git.infradead.org/users/hch/xfs.git/shortlog/refs/heads/xfs-mapping-validation.5

Changes since v1:
 - fix the ilock hold time in delalloc conversions (Darrick)
 - switch handling of a 0 block allocation from an assert to a runtime
   error
 - fix an off by one false negative in an assert
 - move extent trimming from xfs_convert_blocks to the caller
 - update a few commit logs
 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks Christoph Hellwig
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster, Darrick J . Wong

The io_type field contains what is basically a summary of information
from the inode fork and the imap.  But we can just as easily use that
information directly, simplifying a few bits here and there and
improving the trace points.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c    | 93 ++++++++++++++++++++------------------------
 fs/xfs/xfs_aops.h    | 24 +-----------
 fs/xfs/xfs_iomap.c   |  8 ++--
 fs/xfs/xfs_reflink.c |  2 +-
 fs/xfs/xfs_trace.h   | 34 +++++++---------
 5 files changed, 63 insertions(+), 98 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 515532f45beb..a3fa60d1d2df 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -28,7 +28,7 @@
  */
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
-	unsigned int		io_type;
+	int			fork;
 	unsigned int		data_seq;
 	unsigned int		cow_seq;
 	struct xfs_ioend	*ioend;
@@ -256,30 +256,20 @@ xfs_end_io(
 	 */
 	error = blk_status_to_errno(ioend->io_bio->bi_status);
 	if (unlikely(error)) {
-		switch (ioend->io_type) {
-		case XFS_IO_COW:
+		if (ioend->io_fork == XFS_COW_FORK)
 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
-			break;
-		}
-
 		goto done;
 	}
 
 	/*
-	 * Success:  commit the COW or unwritten blocks if needed.
+	 * Success: commit the COW or unwritten blocks if needed.
 	 */
-	switch (ioend->io_type) {
-	case XFS_IO_COW:
+	if (ioend->io_fork == XFS_COW_FORK)
 		error = xfs_reflink_end_cow(ip, offset, size);
-		break;
-	case XFS_IO_UNWRITTEN:
-		/* writeback should never update isize */
+	else if (ioend->io_state == XFS_EXT_UNWRITTEN)
 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
-		break;
-	default:
+	else
 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
-		break;
-	}
 
 done:
 	if (ioend->io_append_trans)
@@ -294,7 +284,8 @@ xfs_end_bio(
 	struct xfs_ioend	*ioend = bio->bi_private;
 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
 
-	if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
+	if (ioend->io_fork == XFS_COW_FORK ||
+	    ioend->io_state == XFS_EXT_UNWRITTEN)
 		queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 	else if (ioend->io_append_trans)
 		queue_work(mp->m_data_workqueue, &ioend->io_work);
@@ -320,7 +311,7 @@ xfs_imap_valid(
 	 * covers the offset. Be careful to check this first because the caller
 	 * can revalidate a COW mapping without updating the data seqno.
 	 */
-	if (wpc->io_type == XFS_IO_COW)
+	if (wpc->fork == XFS_COW_FORK)
 		return true;
 
 	/*
@@ -350,7 +341,6 @@ xfs_map_blocks(
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
 	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
-	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
 
@@ -400,6 +390,9 @@ xfs_map_blocks(
 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
 		wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+		wpc->fork = XFS_COW_FORK;
+
 		/*
 		 * Truncate can race with writeback since writeback doesn't
 		 * take the iolock and truncate decreases the file size before
@@ -412,11 +405,13 @@ xfs_map_blocks(
 		 * will kill the contents anyway.
 		 */
 		if (offset > i_size_read(inode)) {
-			wpc->io_type = XFS_IO_HOLE;
+			wpc->imap.br_blockcount = end_fsb - offset_fsb;
+			wpc->imap.br_startoff = offset_fsb;
+			wpc->imap.br_startblock = HOLESTARTBLOCK;
+			wpc->imap.br_state = XFS_EXT_NORM;
 			return 0;
 		}
-		whichfork = XFS_COW_FORK;
-		wpc->io_type = XFS_IO_COW;
+
 		goto allocate_blocks;
 	}
 
@@ -439,12 +434,14 @@ xfs_map_blocks(
 	wpc->data_seq = READ_ONCE(ip->i_df.if_seq);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
+	wpc->fork = XFS_DATA_FORK;
+
 	if (imap.br_startoff > offset_fsb) {
 		/* landed in a hole or beyond EOF */
 		imap.br_blockcount = imap.br_startoff - offset_fsb;
 		imap.br_startoff = offset_fsb;
 		imap.br_startblock = HOLESTARTBLOCK;
-		wpc->io_type = XFS_IO_HOLE;
+		imap.br_state = XFS_EXT_NORM;
 	} else {
 		/*
 		 * Truncate to the next COW extent if there is one.  This is the
@@ -456,31 +453,24 @@ xfs_map_blocks(
 		    cow_fsb < imap.br_startoff + imap.br_blockcount)
 			imap.br_blockcount = cow_fsb - imap.br_startoff;
 
-		if (isnullstartblock(imap.br_startblock)) {
-			/* got a delalloc extent */
-			wpc->io_type = XFS_IO_DELALLOC;
+		/* got a delalloc extent? */
+		if (isnullstartblock(imap.br_startblock))
 			goto allocate_blocks;
-		}
-
-		if (imap.br_state == XFS_EXT_UNWRITTEN)
-			wpc->io_type = XFS_IO_UNWRITTEN;
-		else
-			wpc->io_type = XFS_IO_OVERWRITE;
 	}
 
 	wpc->imap = imap;
-	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
+	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
-			whichfork == XFS_COW_FORK ?
+	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
+			wpc->fork == XFS_COW_FORK ?
 					 &wpc->cow_seq : &wpc->data_seq);
 	if (error)
 		return error;
-	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
 	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
 	wpc->imap = imap;
-	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
+	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
 	return 0;
 }
 
@@ -505,7 +495,7 @@ xfs_submit_ioend(
 	int			status)
 {
 	/* Convert CoW extents to regular */
-	if (!status && ioend->io_type == XFS_IO_COW) {
+	if (!status && ioend->io_fork == XFS_COW_FORK) {
 		/*
 		 * Yuk. This can do memory allocation, but is not a
 		 * transactional operation so everything is done in GFP_KERNEL
@@ -523,7 +513,8 @@ xfs_submit_ioend(
 
 	/* Reserve log space if we might write beyond the on-disk inode size. */
 	if (!status &&
-	    ioend->io_type != XFS_IO_UNWRITTEN &&
+	    (ioend->io_fork == XFS_COW_FORK ||
+	     ioend->io_state != XFS_EXT_UNWRITTEN) &&
 	    xfs_ioend_is_append(ioend) &&
 	    !ioend->io_append_trans)
 		status = xfs_setfilesize_trans_alloc(ioend);
@@ -552,7 +543,8 @@ xfs_submit_ioend(
 static struct xfs_ioend *
 xfs_alloc_ioend(
 	struct inode		*inode,
-	unsigned int		type,
+	int			fork,
+	xfs_exntst_t		state,
 	xfs_off_t		offset,
 	struct block_device	*bdev,
 	sector_t		sector)
@@ -566,7 +558,8 @@ xfs_alloc_ioend(
 
 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
 	INIT_LIST_HEAD(&ioend->io_list);
-	ioend->io_type = type;
+	ioend->io_fork = fork;
+	ioend->io_state = state;
 	ioend->io_inode = inode;
 	ioend->io_size = 0;
 	ioend->io_offset = offset;
@@ -627,13 +620,15 @@ xfs_add_to_ioend(
 	sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
 		((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
 
-	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
+	if (!wpc->ioend ||
+	    wpc->fork != wpc->ioend->io_fork ||
+	    wpc->imap.br_state != wpc->ioend->io_state ||
 	    sector != bio_end_sector(wpc->ioend->io_bio) ||
 	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
 		if (wpc->ioend)
 			list_add(&wpc->ioend->io_list, iolist);
-		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
-				bdev, sector);
+		wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
+				wpc->imap.br_state, offset, bdev, sector);
 	}
 
 	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
@@ -742,7 +737,7 @@ xfs_writepage_map(
 		error = xfs_map_blocks(wpc, inode, file_offset);
 		if (error)
 			break;
-		if (wpc->io_type == XFS_IO_HOLE)
+		if (wpc->imap.br_startblock == HOLESTARTBLOCK)
 			continue;
 		xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
 				 &submit_list);
@@ -937,9 +932,7 @@ xfs_vm_writepage(
 	struct page		*page,
 	struct writeback_control *wbc)
 {
-	struct xfs_writepage_ctx wpc = {
-		.io_type = XFS_IO_HOLE,
-	};
+	struct xfs_writepage_ctx wpc = { };
 	int			ret;
 
 	ret = xfs_do_writepage(page, wbc, &wpc);
@@ -953,9 +946,7 @@ xfs_vm_writepages(
 	struct address_space	*mapping,
 	struct writeback_control *wbc)
 {
-	struct xfs_writepage_ctx wpc = {
-		.io_type = XFS_IO_HOLE,
-	};
+	struct xfs_writepage_ctx wpc = { };
 	int			ret;
 
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index e5c23948a8ab..6c2615b83c5d 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -8,33 +8,13 @@
 
 extern struct bio_set xfs_ioend_bioset;
 
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- *
- * This enum is used in string mapping in xfs_trace.h; please keep the
- * TRACE_DEFINE_ENUMs for it up to date.
- */
-enum {
-	XFS_IO_HOLE,		/* covers region without any block allocation */
-	XFS_IO_DELALLOC,	/* covers delalloc region */
-	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
-	XFS_IO_OVERWRITE,	/* covers already allocated extent */
-	XFS_IO_COW,		/* covers copy-on-write extent */
-};
-
-#define XFS_IO_TYPES \
-	{ XFS_IO_HOLE,			"hole" },	\
-	{ XFS_IO_DELALLOC,		"delalloc" },	\
-	{ XFS_IO_UNWRITTEN,		"unwritten" },	\
-	{ XFS_IO_OVERWRITE,		"overwrite" },	\
-	{ XFS_IO_COW,			"CoW" }
-
 /*
  * Structure for buffered I/O completions.
  */
 struct xfs_ioend {
 	struct list_head	io_list;	/* next ioend in chain */
-	unsigned int		io_type;	/* delalloc / unwritten */
+	int			io_fork;	/* inode fork written back */
+	xfs_exntst_t		io_state;	/* extent state */
 	struct inode		*io_inode;	/* file being written to */
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6af1d3ec0a9c..fd3aacd4bf02 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -575,7 +575,7 @@ xfs_file_iomap_begin_delay(
 				goto out_unlock;
 		}
 
-		trace_xfs_iomap_found(ip, offset, count, 0, &got);
+		trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got);
 		goto done;
 	}
 
@@ -647,7 +647,7 @@ xfs_file_iomap_begin_delay(
 	 * them out if the write happens to fail.
 	 */
 	iomap->flags |= IOMAP_F_NEW;
-	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
+	trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
 		got.br_startblock = DELAYSTARTBLOCK;
@@ -1082,7 +1082,7 @@ xfs_file_iomap_begin(
 		return error;
 
 	iomap->flags |= IOMAP_F_NEW;
-	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+	trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
 
 out_finish:
 	if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
@@ -1098,7 +1098,7 @@ xfs_file_iomap_begin(
 out_found:
 	ASSERT(nimaps);
 	xfs_iunlock(ip, lockmode);
-	trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+	trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
 	goto out_finish;
 
 out_unlock:
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c5b4fa004ca4..2babc2cbe103 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1192,7 +1192,7 @@ xfs_reflink_remap_blocks(
 			break;
 		ASSERT(nimaps == 1);
 
-		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
+		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_DATA_FORK,
 				&imap);
 
 		/* Translate imap into the destination file. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c83ce022a355..f1e18ae8a209 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1218,23 +1218,17 @@ DEFINE_EVENT(xfs_readpage_class, name,	\
 DEFINE_READPAGE_EVENT(xfs_vm_readpage);
 DEFINE_READPAGE_EVENT(xfs_vm_readpages);
 
-TRACE_DEFINE_ENUM(XFS_IO_HOLE);
-TRACE_DEFINE_ENUM(XFS_IO_DELALLOC);
-TRACE_DEFINE_ENUM(XFS_IO_UNWRITTEN);
-TRACE_DEFINE_ENUM(XFS_IO_OVERWRITE);
-TRACE_DEFINE_ENUM(XFS_IO_COW);
-
 DECLARE_EVENT_CLASS(xfs_imap_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-		 int type, struct xfs_bmbt_irec *irec),
-	TP_ARGS(ip, offset, count, type, irec),
+		 int whichfork, struct xfs_bmbt_irec *irec),
+	TP_ARGS(ip, offset, count, whichfork, irec),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(loff_t, size)
 		__field(loff_t, offset)
 		__field(size_t, count)
-		__field(int, type)
+		__field(int, whichfork)
 		__field(xfs_fileoff_t, startoff)
 		__field(xfs_fsblock_t, startblock)
 		__field(xfs_filblks_t, blockcount)
@@ -1245,33 +1239,33 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
 		__entry->size = ip->i_d.di_size;
 		__entry->offset = offset;
 		__entry->count = count;
-		__entry->type = type;
+		__entry->whichfork = whichfork;
 		__entry->startoff = irec ? irec->br_startoff : 0;
 		__entry->startblock = irec ? irec->br_startblock : 0;
 		__entry->blockcount = irec ? irec->br_blockcount : 0;
 	),
 	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
-		  "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
+		  "fork %s startoff 0x%llx startblock %lld blockcount 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
 		  __entry->offset,
 		  __entry->count,
-		  __print_symbolic(__entry->type, XFS_IO_TYPES),
+		  __entry->whichfork == XFS_COW_FORK ? "cow" : "data",
 		  __entry->startoff,
 		  (int64_t)__entry->startblock,
 		  __entry->blockcount)
 )
 
-#define DEFINE_IOMAP_EVENT(name)	\
+#define DEFINE_IMAP_EVENT(name)	\
 DEFINE_EVENT(xfs_imap_class, name,	\
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,	\
-		 int type, struct xfs_bmbt_irec *irec),		\
-	TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
-DEFINE_IOMAP_EVENT(xfs_iomap_found);
+		 int whichfork, struct xfs_bmbt_irec *irec),		\
+	TP_ARGS(ip, offset, count, whichfork, irec))
+DEFINE_IMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IMAP_EVENT(xfs_iomap_alloc);
+DEFINE_IMAP_EVENT(xfs_iomap_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -3078,7 +3072,7 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \
 DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag);
 DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag);
 DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size);
-DEFINE_IOMAP_EVENT(xfs_reflink_remap_imap);
+DEFINE_IMAP_EVENT(xfs_reflink_remap_imap);
 TRACE_EVENT(xfs_reflink_remap_blocks_loop,
 	TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset,
 		 xfs_filblks_t len, struct xfs_inode *dest,
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster, Darrick J . Wong

We already ensure all data fits into s_maxbytes in the write / fault
path.  The only reason we have them here is that they were copy and
pasted from xfs_bmapi_read when we stopped using that function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a3fa60d1d2df..8bfb62d8776f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -338,7 +338,8 @@ xfs_map_blocks(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
-	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
 	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	struct xfs_iext_cursor	icur;
@@ -374,11 +375,6 @@ xfs_map_blocks(
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
-	ASSERT(offset <= mp->m_super->s_maxbytes);
-
-	if (offset > mp->m_super->s_maxbytes - count)
-		count = mp->m_super->s_maxbytes - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 
 	/*
 	 * Check if this is offset is covered by a COW extents, and if yes use
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 16:41   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Move boilerplate code from the callers into xfs_bmap_btree_to_extents:

 - exit early without failure if we don't need to convert to the
   extent format
 - assert that we have a btree cursor
 - don't reinitialize the passed in logflags argument

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 78 ++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 52 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f4a65330a2a9..7fa454f71f46 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -577,42 +577,44 @@ __xfs_bmap_add_free(
  */
 
 /*
- * Transform a btree format file with only one leaf node, where the
- * extents list will fit in the inode, into an extents format file.
- * Since the file extents are already in-core, all we have to do is
- * give up the space for the btree root and pitch the leaf block.
+ * Convert the inode format to extent format if it currently is in btree format,
+ * but the extent list is small enough that it fits into the extent format.
+ 8
+ * Since the extents are already in-core, all we have to do is give up the space
+ * for the btree root and pitch the leaf block.
  */
 STATIC int				/* error */
 xfs_bmap_btree_to_extents(
-	xfs_trans_t		*tp,	/* transaction pointer */
-	xfs_inode_t		*ip,	/* incore inode pointer */
-	xfs_btree_cur_t		*cur,	/* btree cursor */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	struct xfs_inode	*ip,	/* incore inode pointer */
+	struct xfs_btree_cur	*cur,	/* btree cursor */
 	int			*logflagsp, /* inode logging flags */
 	int			whichfork)  /* data or attr fork */
 {
-	/* REFERENCED */
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_btree_block	*rblock = ifp->if_broot;
 	struct xfs_btree_block	*cblock;/* child btree block */
 	xfs_fsblock_t		cbno;	/* child block number */
 	xfs_buf_t		*cbp;	/* child block's buffer */
 	int			error;	/* error return value */
-	struct xfs_ifork	*ifp;	/* inode fork data */
-	xfs_mount_t		*mp;	/* mount point structure */
 	__be64			*pp;	/* ptr to block address */
-	struct xfs_btree_block	*rblock;/* root btree block */
 	struct xfs_owner_info	oinfo;
 
-	mp = ip->i_mount;
-	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/* check if we actually need the extent format first: */
+	if (!xfs_bmap_wants_extents(ip, whichfork))
+		return 0;
+
+	ASSERT(cur);
 	ASSERT(whichfork != XFS_COW_FORK);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
-	rblock = ifp->if_broot;
 	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
+
 	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 	cbno = be64_to_cpu(*pp);
-	*logflagsp = 0;
 #ifdef DEBUG
 	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
 			xfs_btree_check_lptr(cur, cbno, 1));
@@ -635,7 +637,7 @@ xfs_bmap_btree_to_extents(
 	ASSERT(ifp->if_broot == NULL);
 	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
-	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
+	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 	return 0;
 }
 
@@ -4424,19 +4426,10 @@ xfs_bmapi_write(
 	}
 	*nmap = n;
 
-	/*
-	 * Transform from btree to extents, give it cur.
-	 */
-	if (xfs_bmap_wants_extents(ip, whichfork)) {
-		int		tmp_logflags = 0;
-
-		ASSERT(bma.cur);
-		error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
-			&tmp_logflags, whichfork);
-		bma.logflags |= tmp_logflags;
-		if (error)
-			goto error0;
-	}
+	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
+			whichfork);
+	if (error)
+		goto error0;
 
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
 	       XFS_IFORK_NEXTENTS(ip, whichfork) >
@@ -4574,13 +4567,7 @@ xfs_bmapi_remap(
 	if (error)
 		goto error0;
 
-	if (xfs_bmap_wants_extents(ip, whichfork)) {
-		int		tmp_logflags = 0;
-
-		error = xfs_bmap_btree_to_extents(tp, ip, cur,
-			&tmp_logflags, whichfork);
-		logflags |= tmp_logflags;
-	}
+	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
 
 error0:
 	if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
@@ -5444,24 +5431,11 @@ __xfs_bunmapi(
 		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
 				&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
-		if (error)
-			goto error0;
-	}
-	/*
-	 * transform from btree to extents, give it cur
-	 */
-	else if (xfs_bmap_wants_extents(ip, whichfork)) {
-		ASSERT(cur != NULL);
-		error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
+	} else {
+		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
 			whichfork);
-		logflags |= tmp_logflags;
-		if (error)
-			goto error0;
 	}
-	/*
-	 * transform from extents to local?
-	 */
-	error = 0;
+
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 22:37   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

We want to be able to reuse them for the upcoming dedidcated delalloc
convert routine.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 78 ++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 34 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7fa454f71f46..a9c9bd39d822 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4194,6 +4194,44 @@ xfs_bmapi_convert_unwritten(
 	return 0;
 }
 
+static inline xfs_extlen_t
+xfs_bmapi_minleft(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	int			fork)
+{
+	if (tp && tp->t_firstblock != NULLFSBLOCK)
+		return 0;
+	if (XFS_IFORK_FORMAT(ip, fork) != XFS_DINODE_FMT_BTREE)
+		return 1;
+	return be16_to_cpu(XFS_IFORK_PTR(ip, fork)->if_broot->bb_level) + 1;
+}
+
+/*
+ * Log whatever the flags say, even if error.  Otherwise we might miss detecting
+ * a case where the data is changed, there's an error, and it's not logged so we
+ * don't shutdown when we should.  Don't bother logging extents/btree changes if
+ * we converted to the other format.
+ */
+static void
+xfs_bmapi_finish(
+	struct xfs_bmalloca	*bma,
+	int			whichfork,
+	int			error)
+{
+	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
+	    XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+		bma->logflags &= ~xfs_ilog_fext(whichfork);
+	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
+		 XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
+
+	if (bma->logflags)
+		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
+	if (bma->cur)
+		xfs_btree_del_cursor(bma->cur, error);
+}
+
 /*
  * Map file blocks to filesystem blocks, and allocate blocks or convert the
  * extent state if necessary.  Details behaviour is controlled by the flags
@@ -4273,15 +4311,6 @@ xfs_bmapi_write(
 
 	XFS_STATS_INC(mp, xs_blk_mapw);
 
-	if (!tp || tp->t_firstblock == NULLFSBLOCK) {
-		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
-			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
-		else
-			bma.minleft = 1;
-	} else {
-		bma.minleft = 0;
-	}
-
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(tp, ip, whichfork);
 		if (error)
@@ -4296,6 +4325,7 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
+	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
 
 	/*
 	 * The delalloc flag means the caller wants to allocate the entire
@@ -4434,32 +4464,12 @@ xfs_bmapi_write(
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
 	       XFS_IFORK_NEXTENTS(ip, whichfork) >
 		XFS_IFORK_MAXEXT(ip, whichfork));
-	error = 0;
+	xfs_bmapi_finish(&bma, whichfork, 0);
+	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
+		orig_nmap, *nmap);
+	return 0;
 error0:
-	/*
-	 * Log everything.  Do this after conversion, there's no point in
-	 * logging the extent records if we've converted to btree format.
-	 */
-	if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
-	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-		bma.logflags &= ~xfs_ilog_fext(whichfork);
-	else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
-		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
-		bma.logflags &= ~xfs_ilog_fbroot(whichfork);
-	/*
-	 * Log whatever the flags say, even if error.  Otherwise we might miss
-	 * detecting a case where the data is changed, there's an error,
-	 * and it's not logged so we don't shutdown when we should.
-	 */
-	if (bma.logflags)
-		xfs_trans_log_inode(tp, ip, bma.logflags);
-
-	if (bma.cur) {
-		xfs_btree_del_cursor(bma.cur, error);
-	}
-	if (!error)
-		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
-			orig_nmap, *nmap);
+	xfs_bmapi_finish(&bma, whichfork, error);
 	return error;
 }
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling from xfs_bmapi_write
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 23:38   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Delalloc conversion has traditionally been part of our function to
allocate blocks on disk (first xfs_bmapi, then xfs_bmapi_write), but
delalloc conversion is a little special as we really do not want
to allocate blocks over holes, for which we don't have reservations.

Split the delalloc conversions into a separate helper to keep the
code simple and structured.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 106 +++++++++++++++++++++------------------
 fs/xfs/libxfs/xfs_bmap.h |   4 --
 2 files changed, 58 insertions(+), 52 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c9bd39d822..5fdfa9f55fde 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4327,22 +4327,6 @@ xfs_bmapi_write(
 	bma.datatype = 0;
 	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
 
-	/*
-	 * The delalloc flag means the caller wants to allocate the entire
-	 * delalloc extent backing bno where bno may not necessarily match the
-	 * startoff. Now that we've looked up the extent, reset the range to
-	 * map based on the extent in the file. If we're in a hole, this may be
-	 * an error so don't adjust anything.
-	 */
-	if ((flags & XFS_BMAPI_DELALLOC) &&
-	    !eof && bno >= bma.got.br_startoff) {
-		bno = bma.got.br_startoff;
-		len = bma.got.br_blockcount;
-#ifdef DEBUG
-		orig_bno = bno;
-		orig_len = len;
-#endif
-	}
 	n = 0;
 	end = bno + len;
 	obno = bno;
@@ -4359,26 +4343,7 @@ xfs_bmapi_write(
 			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
 			         (flags & XFS_BMAPI_COWFORK)));
 
-			if (flags & XFS_BMAPI_DELALLOC) {
-				/*
-				 * For the COW fork we can reasonably get a
-				 * request for converting an extent that races
-				 * with other threads already having converted
-				 * part of it, as there converting COW to
-				 * regular blocks is not protected using the
-				 * IOLOCK.
-				 */
-				ASSERT(flags & XFS_BMAPI_COWFORK);
-				if (!(flags & XFS_BMAPI_COWFORK)) {
-					error = -EIO;
-					goto error0;
-				}
-
-				if (eof || bno >= end)
-					break;
-			} else {
-				need_alloc = true;
-			}
+			need_alloc = true;
 		} else if (isnullstartblock(bma.got.br_startblock)) {
 			wasdelay = true;
 		}
@@ -4487,23 +4452,68 @@ xfs_bmapi_convert_delalloc(
 	int			whichfork,
 	struct xfs_bmbt_irec	*imap)
 {
-	int			flags = XFS_BMAPI_DELALLOC;
-	int			nimaps = 1;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_bmalloca	bma = { NULL };
 	int			error;
-	int			total = XFS_EXTENTADD_SPACE_RES(ip->i_mount,
-								XFS_DATA_FORK);
 
-	if (whichfork == XFS_COW_FORK)
-		flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
+	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
+	    bma.got.br_startoff > offset_fsb) {
+		/*
+		 * No extent found in the range we are trying to convert.  This
+		 * should only happen for the COW fork, where another thread
+		 * might have moved the extent to the data fork in the meantime.
+		 */
+		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
+		return -EAGAIN;
+	}
 
 	/*
-	 * The delalloc flag means to allocate the entire extent; pass a dummy
-	 * length of 1.
+	 * If we find a real extent here we raced with another thread converting
+	 * the extent.  Just return the real extent at this offset.
 	 */
-	error = xfs_bmapi_write(tp, ip, offset_fsb, 1, flags, total, imap,
-				&nimaps);
-	if (!error && !nimaps)
-		error = -EFSCORRUPTED;
+	if (!isnullstartblock(bma.got.br_startblock)) {
+		*imap = bma.got;
+		return 0;
+	}
+
+	bma.tp = tp;
+	bma.ip = ip;
+	bma.wasdel = true;
+	bma.offset = bma.got.br_startoff;
+	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
+	bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
+	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
+	if (whichfork == XFS_COW_FORK)
+		bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
+
+	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
+		bma.prev.br_startoff = NULLFILEOFF;
+
+	error = xfs_bmapi_allocate(&bma);
+	if (error)
+		goto out_finish;
+
+	error = -ENOSPC;
+	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
+		goto out_finish;
+	error = -EFSCORRUPTED;
+	if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip)))
+		goto out_finish;
+
+	ASSERT(!isnullstartblock(bma.got.br_startblock));
+	*imap = bma.got;
+
+	if (whichfork == XFS_COW_FORK) {
+		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
+				bma.length);
+		if (error)
+			goto out_finish;
+	}
+
+	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
+			whichfork);
+out_finish:
+	xfs_bmapi_finish(&bma, whichfork, error);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 4dc7d1a02b35..b5eca7a26949 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -95,9 +95,6 @@ struct xfs_extent_free_item
 /* Map something in the CoW fork. */
 #define XFS_BMAPI_COWFORK	0x200
 
-/* Only convert delalloc space, don't allocate entirely new extents */
-#define XFS_BMAPI_DELALLOC	0x400
-
 /* Only convert unwritten extents, don't allocate new blocks */
 #define XFS_BMAPI_CONVERT_ONLY	0x800
 
@@ -117,7 +114,6 @@ struct xfs_extent_free_item
 	{ XFS_BMAPI_ZERO,	"ZERO" }, \
 	{ XFS_BMAPI_REMAP,	"REMAP" }, \
 	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
-	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
 	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }, \
 	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
 	{ XFS_BMAPI_NORMAP,	"NORMAP" }
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 22:49   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

No need to deal with the transaction and the inode locking in the
caller. Note that we also switch to passing whichfork as the second
paramter, matching what most related functions do.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 38 +++++++++++++++++++++++++++++++++-----
 fs/xfs/libxfs/xfs_bmap.h |  5 +++--
 fs/xfs/xfs_iomap.c       | 32 ++++----------------------------
 3 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 5fdfa9f55fde..fc4f1d3145c4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4446,16 +4446,30 @@ xfs_bmapi_write(
  */
 int
 xfs_bmapi_convert_delalloc(
-	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		offset_fsb,
 	int			whichfork,
-	struct xfs_bmbt_irec	*imap)
+	xfs_fileoff_t		offset_fsb,
+	struct xfs_bmbt_irec	*imap,
+	unsigned int		*seq)
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_bmalloca	bma = { NULL };
+	struct xfs_trans	*tp;
 	int			error;
 
+	/*
+	 * Space for the extent and indirect blocks was reserved when the
+	 * delalloc extent was created so there's no need to do so here.
+	 */
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
+				XFS_TRANS_RESERVE, &tp);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
+
 	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
 	    bma.got.br_startoff > offset_fsb) {
 		/*
@@ -4464,7 +4478,8 @@ xfs_bmapi_convert_delalloc(
 		 * might have moved the extent to the data fork in the meantime.
 		 */
 		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
-		return -EAGAIN;
+		error = -EAGAIN;
+		goto out_trans_cancel;
 	}
 
 	/*
@@ -4473,7 +4488,8 @@ xfs_bmapi_convert_delalloc(
 	 */
 	if (!isnullstartblock(bma.got.br_startblock)) {
 		*imap = bma.got;
-		return 0;
+		*seq = READ_ONCE(ifp->if_seq);
+		goto out_trans_cancel;
 	}
 
 	bma.tp = tp;
@@ -4502,6 +4518,7 @@ xfs_bmapi_convert_delalloc(
 
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
 	*imap = bma.got;
+	*seq = READ_ONCE(ifp->if_seq);
 
 	if (whichfork == XFS_COW_FORK) {
 		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
@@ -4512,8 +4529,19 @@ xfs_bmapi_convert_delalloc(
 
 	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
 			whichfork);
+	if (error)
+		goto out_finish;
+
+	xfs_bmapi_finish(&bma, whichfork, 0);
+	error = xfs_trans_commit(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
+
 out_finish:
 	xfs_bmapi_finish(&bma, whichfork, error);
+out_trans_cancel:
+	xfs_trans_cancel(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b5eca7a26949..78b190b6e908 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -223,8 +223,9 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
 		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
 		int eof);
-int	xfs_bmapi_convert_delalloc(struct xfs_trans *, struct xfs_inode *,
-		xfs_fileoff_t, int, struct xfs_bmbt_irec *);
+int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
+		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
+		unsigned int *seq);
 
 static inline void
 xfs_bmap_add_free(
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fd3aacd4bf02..39be741cac5a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -684,11 +684,9 @@ xfs_iomap_write_allocate(
 	unsigned int		*seq)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_fileoff_t		offset_fsb;
 	xfs_fileoff_t		map_start_fsb;
 	xfs_extlen_t		map_count_fsb;
-	struct xfs_trans	*tp;
 	int			error = 0;
 
 	/*
@@ -716,17 +714,8 @@ xfs_iomap_write_allocate(
 		/*
 		 * Allocate in a loop because it may take several attempts to
 		 * allocate real blocks for a contiguous delalloc extent if free
-		 * space is sufficiently fragmented. Note that space for the
-		 * extent and indirect blocks was reserved when the delalloc
-		 * extent was created so there's no need to do so here.
+		 * space is sufficiently fragmented.
 		 */
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
-					XFS_TRANS_RESERVE, &tp);
-		if (error)
-			return error;
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
 
 		/*
 		 * ilock was dropped since imap was populated which means it
@@ -737,17 +726,10 @@ xfs_iomap_write_allocate(
 		 * caller. We'll trim it down to the caller's most recently
 		 * validated range before we return.
 		 */
-		error = xfs_bmapi_convert_delalloc(tp, ip, offset_fsb,
-						   whichfork, imap);
-		if (error)
-			goto trans_cancel;
-
-		error = xfs_trans_commit(tp);
+		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
+				imap, seq);
 		if (error)
-			goto error0;
-
-		*seq = READ_ONCE(ifp->if_seq);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			return error;
 
 		/*
 		 * See if we were able to allocate an extent that covers at
@@ -766,12 +748,6 @@ xfs_iomap_write_allocate(
 			return 0;
 		}
 	}
-
-trans_cancel:
-	xfs_trans_cancel(tp);
-error0:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return error;
 }
 
 int
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 07/10] xfs: move stat accounting to xfs_bmapi_convert_delalloc
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 22:47   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

This way we can actually count how many bytes got converted and how many
calls we need, unlike in the caller which doesn't have the detailed
view.

Note that this includes a slight change in behavior as the
xs_xstrat_quick is now bumped for every allocation instead of just the
one covering the requested writeback offset, which makes a lot more
sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 3 +++
 fs/xfs/xfs_iomap.c       | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index fc4f1d3145c4..4cf83475f0d0 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4516,6 +4516,9 @@ xfs_bmapi_convert_delalloc(
 	if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip)))
 		goto out_finish;
 
+	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
+	XFS_STATS_INC(mp, xs_xstrat_quick);
+
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
 	*imap = bma.got;
 	*seq = READ_ONCE(ifp->if_seq);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 39be741cac5a..15da53b5fb53 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -707,9 +707,6 @@ xfs_iomap_write_allocate(
 	map_start_fsb = imap->br_startoff;
 	map_count_fsb = imap->br_blockcount;
 
-	XFS_STATS_ADD(mp, xs_xstrat_bytes,
-		      XFS_FSB_TO_B(mp, imap->br_blockcount));
-
 	while (true) {
 		/*
 		 * Allocate in a loop because it may take several attempts to
@@ -741,7 +738,6 @@ xfs_iomap_write_allocate(
 		if ((offset_fsb >= imap->br_startoff) &&
 		    (offset_fsb < (imap->br_startoff +
 				   imap->br_blockcount))) {
-			XFS_STATS_INC(mp, xs_xstrat_quick);
 			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
 			ASSERT(offset_fsb >= imap->br_startoff &&
 			       offset_fsb < imap->br_startoff + imap->br_blockcount);
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 23:40   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
  2019-02-15 14:47 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no Christoph Hellwig
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

This function is a small wrapper only used by the writeback code, so
move it together with the writeback code and simplify it down to the
glorified do { } while loop that is now is.

A few bits intentionally got lost here: no need to call xfs_qm_dqattach
because quotas are always attached when we create the delalloc
reservation, and no need for the imap->br_startblock == 0 check given
that xfs_bmapi_convert_delalloc already has a WARN_ON_ONCE for exactly
that condition.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 51 +++++++++++++++++++++++++----
 fs/xfs/xfs_iomap.c | 81 ----------------------------------------------
 fs/xfs/xfs_iomap.h |  2 --
 3 files changed, 45 insertions(+), 89 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8bfb62d8776f..42017ecf78ed 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -329,6 +329,38 @@ xfs_imap_valid(
 	return true;
 }
 
+/*
+ * Pass in a dellalloc extent and convert it to real extents, return the real
+ * extent that maps offset_fsb in wpc->imap.
+ *
+ * The current page is held locked so nothing could have removed the block
+ * backing offset_fsb.
+ */
+static int
+xfs_convert_blocks(
+	struct xfs_writepage_ctx *wpc,
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		offset_fsb)
+{
+	int			error;
+
+	/*
+	 * Attempt to allocate whatever delalloc extent currently backs
+	 * offset_fsb and put the result into wpc->imap.  Allocate in a loop
+	 * because it may take several attempts to allocate real blocks for a
+	 * contiguous delalloc extent if free space is sufficiently fragmented.
+	 */
+	do {
+		error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset_fsb,
+				&wpc->imap, wpc->fork == XFS_COW_FORK ?
+					&wpc->cow_seq : &wpc->data_seq);
+		if (error)
+			return error;
+	} while (wpc->imap.br_startoff + wpc->imap.br_blockcount <= offset_fsb);
+
+	return 0;
+}
+
 STATIC int
 xfs_map_blocks(
 	struct xfs_writepage_ctx *wpc,
@@ -458,14 +490,21 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
-			wpc->fork == XFS_COW_FORK ?
-					 &wpc->cow_seq : &wpc->data_seq);
+	error = xfs_convert_blocks(wpc, ip, offset_fsb);
 	if (error)
 		return error;
-	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
-	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
-	wpc->imap = imap;
+
+	/*
+	 * Due to merging the return real extent might be larger than the
+	 * original delalloc one.  Trim the return extent to the next COW
+	 * boundary again to force a re-lookup.
+	 */
+	if (wpc->fork != XFS_COW_FORK && cow_fsb != NULLFILEOFF &&
+	    cow_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount)
+		wpc->imap.br_blockcount = cow_fsb - wpc->imap.br_startoff;
+
+	ASSERT(wpc->imap.br_startoff <= offset_fsb);
+	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount > offset_fsb);
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
 	return 0;
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 15da53b5fb53..361dfe7af783 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -665,87 +665,6 @@ xfs_file_iomap_begin_delay(
 	return error;
 }
 
-/*
- * Pass in a delayed allocate extent, convert it to real extents;
- * return to the caller the extent we create which maps on top of
- * the originating callers request.
- *
- * Called without a lock on the inode.
- *
- * We no longer bother to look at the incoming map - all we have to
- * guarantee is that whatever we allocate fills the required range.
- */
-int
-xfs_iomap_write_allocate(
-	struct xfs_inode	*ip,
-	int			whichfork,
-	xfs_off_t		offset,
-	struct xfs_bmbt_irec	*imap,
-	unsigned int		*seq)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb;
-	xfs_fileoff_t		map_start_fsb;
-	xfs_extlen_t		map_count_fsb;
-	int			error = 0;
-
-	/*
-	 * Make sure that the dquots are there.
-	 */
-	error = xfs_qm_dqattach(ip);
-	if (error)
-		return error;
-
-	/*
-	 * Store the file range the caller is interested in because it encodes
-	 * state such as potential overlap with COW fork blocks. We must trim
-	 * the allocated extent down to this range to maintain consistency with
-	 * what the caller expects. Revalidation of the range itself is the
-	 * responsibility of the caller.
-	 */
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	map_start_fsb = imap->br_startoff;
-	map_count_fsb = imap->br_blockcount;
-
-	while (true) {
-		/*
-		 * Allocate in a loop because it may take several attempts to
-		 * allocate real blocks for a contiguous delalloc extent if free
-		 * space is sufficiently fragmented.
-		 */
-
-		/*
-		 * ilock was dropped since imap was populated which means it
-		 * might no longer be valid. The current page is held locked so
-		 * nothing could have removed the block backing offset_fsb.
-		 * Attempt to allocate whatever delalloc extent currently backs
-		 * offset_fsb and put the result in the imap pointer from the
-		 * caller. We'll trim it down to the caller's most recently
-		 * validated range before we return.
-		 */
-		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
-				imap, seq);
-		if (error)
-			return error;
-
-		/*
-		 * See if we were able to allocate an extent that covers at
-		 * least part of the callers request.
-		 */
-		if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
-			return xfs_alert_fsblock_zero(ip, imap);
-
-		if ((offset_fsb >= imap->br_startoff) &&
-		    (offset_fsb < (imap->br_startoff +
-				   imap->br_blockcount))) {
-			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
-			ASSERT(offset_fsb >= imap->br_startoff &&
-			       offset_fsb < imap->br_startoff + imap->br_blockcount);
-			return 0;
-		}
-	}
-}
-
 int
 xfs_iomap_write_unwritten(
 	xfs_inode_t	*ip,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index c6170548831b..6b16243db0b7 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -13,8 +13,6 @@ struct xfs_bmbt_irec;
 
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
-int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 23:37   ` Darrick J. Wong
  2019-02-15 14:47 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no Christoph Hellwig
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Now that we properly handle the race with truncate in the delalloc
allocator there is no need to short cut this exceptional case earlier
on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_aops.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 42017ecf78ed..a6abb7125203 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -420,26 +420,6 @@ xfs_map_blocks(
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 		wpc->fork = XFS_COW_FORK;
-
-		/*
-		 * Truncate can race with writeback since writeback doesn't
-		 * take the iolock and truncate decreases the file size before
-		 * it starts truncating the pages between new_size and old_size.
-		 * Therefore, we can end up in the situation where writeback
-		 * gets a CoW fork mapping but the truncate makes the mapping
-		 * invalid and we end up in here trying to get a new mapping.
-		 * bail out here so that we simply never get a valid mapping
-		 * and so we drop the write altogether.  The page truncation
-		 * will kill the contents anyway.
-		 */
-		if (offset > i_size_read(inode)) {
-			wpc->imap.br_blockcount = end_fsb - offset_fsb;
-			wpc->imap.br_startoff = offset_fsb;
-			wpc->imap.br_startblock = HOLESTARTBLOCK;
-			wpc->imap.br_state = XFS_EXT_NORM;
-			return 0;
-		}
-
 		goto allocate_blocks;
 	}
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 10/10] xfs: retry COW fork delalloc conversion when no
  2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
                   ` (8 preceding siblings ...)
  2019-02-15 14:47 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
@ 2019-02-15 14:47 ` Christoph Hellwig
  2019-02-15 23:32   ` Darrick J. Wong
  9 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-15 14:47 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

While we can only truncate a block under the page lock for the current
page, there is no high-level synchronization for moving extents from the
COW to the data fork.  This means that for example we can have another
thread doing a direct I/O completion that moves extents from the COW to
the data fork race with writeback.  While this race is very hard to hit
the always_cow seems to reproduce it reasonably well, and it also exists
without that.  Because of that there is a chance that a delalloc
conversion for the COW fork might not find any extents to convert.  In
that case we should retry the whole block lookup and now find the blocks
in the data fork.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a6abb7125203..2ed8733eca49 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -334,7 +334,8 @@ xfs_imap_valid(
  * extent that maps offset_fsb in wpc->imap.
  *
  * The current page is held locked so nothing could have removed the block
- * backing offset_fsb.
+ * backing offset_fsb, although it could have moved from the COW to the data
+ * fork by another thread.
  */
 static int
 xfs_convert_blocks(
@@ -375,6 +376,7 @@ xfs_map_blocks(
 	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	struct xfs_iext_cursor	icur;
+	int			retries = 0;
 	int			error = 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -404,6 +406,7 @@ xfs_map_blocks(
 	 * into real extents.  If we return without a valid map, it means we
 	 * landed in a hole and we skip the block.
 	 */
+retry:
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
@@ -471,8 +474,19 @@ xfs_map_blocks(
 	return 0;
 allocate_blocks:
 	error = xfs_convert_blocks(wpc, ip, offset_fsb);
-	if (error)
+	if (error) {
+		/*
+		 * If we failed to find the extent in the COW fork we might have
+		 * raced with a COW to data fork conversion or truncate.
+		 * Restart the lookup to catch the extent in the data fork for
+		 * the former case, but prevent additional retries to avoid
+		 * looping forever for the latter case.
+		 */
+		if (error == -EAGAIN && wpc->fork == XFS_COW_FORK && !retries++)
+			goto retry;
+		ASSERT(error != -EAGAIN);
 		return error;
+	}
 
 	/*
 	 * Due to merging the return real extent might be larger than the
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions
  2019-02-15 14:47 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
@ 2019-02-15 16:41   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 16:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:18PM +0100, Christoph Hellwig wrote:
> Move boilerplate code from the callers into xfs_bmap_btree_to_extents:
> 
>  - exit early without failure if we don't need to convert to the
>    extent format
>  - assert that we have a btree cursor
>  - don't reinitialize the passed in logflags argument
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Brian Foster <bfoster@redhat.com>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 78 ++++++++++++++--------------------------
>  1 file changed, 26 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index f4a65330a2a9..7fa454f71f46 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -577,42 +577,44 @@ __xfs_bmap_add_free(
>   */
>  
>  /*
> - * Transform a btree format file with only one leaf node, where the
> - * extents list will fit in the inode, into an extents format file.
> - * Since the file extents are already in-core, all we have to do is
> - * give up the space for the btree root and pitch the leaf block.
> + * Convert the inode format to extent format if it currently is in btree format,
> + * but the extent list is small enough that it fits into the extent format.
> + 8

"*", not "8"; I'll fix that on import if I pull this series.

Otherwise looks ok to me,

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> + * Since the extents are already in-core, all we have to do is give up the space
> + * for the btree root and pitch the leaf block.
>   */
>  STATIC int				/* error */
>  xfs_bmap_btree_to_extents(
> -	xfs_trans_t		*tp,	/* transaction pointer */
> -	xfs_inode_t		*ip,	/* incore inode pointer */
> -	xfs_btree_cur_t		*cur,	/* btree cursor */
> +	struct xfs_trans	*tp,	/* transaction pointer */
> +	struct xfs_inode	*ip,	/* incore inode pointer */
> +	struct xfs_btree_cur	*cur,	/* btree cursor */
>  	int			*logflagsp, /* inode logging flags */
>  	int			whichfork)  /* data or attr fork */
>  {
> -	/* REFERENCED */
> +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_btree_block	*rblock = ifp->if_broot;
>  	struct xfs_btree_block	*cblock;/* child btree block */
>  	xfs_fsblock_t		cbno;	/* child block number */
>  	xfs_buf_t		*cbp;	/* child block's buffer */
>  	int			error;	/* error return value */
> -	struct xfs_ifork	*ifp;	/* inode fork data */
> -	xfs_mount_t		*mp;	/* mount point structure */
>  	__be64			*pp;	/* ptr to block address */
> -	struct xfs_btree_block	*rblock;/* root btree block */
>  	struct xfs_owner_info	oinfo;
>  
> -	mp = ip->i_mount;
> -	ifp = XFS_IFORK_PTR(ip, whichfork);
> +	/* check if we actually need the extent format first: */
> +	if (!xfs_bmap_wants_extents(ip, whichfork))
> +		return 0;
> +
> +	ASSERT(cur);
>  	ASSERT(whichfork != XFS_COW_FORK);
>  	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
>  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
> -	rblock = ifp->if_broot;
>  	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
>  	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
>  	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
> +
>  	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
>  	cbno = be64_to_cpu(*pp);
> -	*logflagsp = 0;
>  #ifdef DEBUG
>  	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
>  			xfs_btree_check_lptr(cur, cbno, 1));
> @@ -635,7 +637,7 @@ xfs_bmap_btree_to_extents(
>  	ASSERT(ifp->if_broot == NULL);
>  	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
>  	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
> -	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
> +	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
>  	return 0;
>  }
>  
> @@ -4424,19 +4426,10 @@ xfs_bmapi_write(
>  	}
>  	*nmap = n;
>  
> -	/*
> -	 * Transform from btree to extents, give it cur.
> -	 */
> -	if (xfs_bmap_wants_extents(ip, whichfork)) {
> -		int		tmp_logflags = 0;
> -
> -		ASSERT(bma.cur);
> -		error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
> -			&tmp_logflags, whichfork);
> -		bma.logflags |= tmp_logflags;
> -		if (error)
> -			goto error0;
> -	}
> +	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
> +			whichfork);
> +	if (error)
> +		goto error0;
>  
>  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
>  	       XFS_IFORK_NEXTENTS(ip, whichfork) >
> @@ -4574,13 +4567,7 @@ xfs_bmapi_remap(
>  	if (error)
>  		goto error0;
>  
> -	if (xfs_bmap_wants_extents(ip, whichfork)) {
> -		int		tmp_logflags = 0;
> -
> -		error = xfs_bmap_btree_to_extents(tp, ip, cur,
> -			&tmp_logflags, whichfork);
> -		logflags |= tmp_logflags;
> -	}
> +	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
>  
>  error0:
>  	if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
> @@ -5444,24 +5431,11 @@ __xfs_bunmapi(
>  		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
>  				&tmp_logflags, whichfork);
>  		logflags |= tmp_logflags;
> -		if (error)
> -			goto error0;
> -	}
> -	/*
> -	 * transform from btree to extents, give it cur
> -	 */
> -	else if (xfs_bmap_wants_extents(ip, whichfork)) {
> -		ASSERT(cur != NULL);
> -		error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
> +	} else {
> +		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
>  			whichfork);
> -		logflags |= tmp_logflags;
> -		if (error)
> -			goto error0;
>  	}
> -	/*
> -	 * transform from extents to local?
> -	 */
> -	error = 0;
> +
>  error0:
>  	/*
>  	 * Log everything.  Do this after conversion, there's no point in
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write
  2019-02-15 14:47 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
@ 2019-02-15 22:37   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 22:37 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:19PM +0100, Christoph Hellwig wrote:
> We want to be able to reuse them for the upcoming dedidcated delalloc
> convert routine.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Brian Foster <bfoster@redhat.com>

Looks ok to me...
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_bmap.c | 78 ++++++++++++++++++++++------------------
>  1 file changed, 44 insertions(+), 34 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 7fa454f71f46..a9c9bd39d822 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4194,6 +4194,44 @@ xfs_bmapi_convert_unwritten(
>  	return 0;
>  }
>  
> +static inline xfs_extlen_t
> +xfs_bmapi_minleft(
> +	struct xfs_trans	*tp,
> +	struct xfs_inode	*ip,
> +	int			fork)
> +{
> +	if (tp && tp->t_firstblock != NULLFSBLOCK)
> +		return 0;
> +	if (XFS_IFORK_FORMAT(ip, fork) != XFS_DINODE_FMT_BTREE)
> +		return 1;
> +	return be16_to_cpu(XFS_IFORK_PTR(ip, fork)->if_broot->bb_level) + 1;
> +}
> +
> +/*
> + * Log whatever the flags say, even if error.  Otherwise we might miss detecting
> + * a case where the data is changed, there's an error, and it's not logged so we
> + * don't shutdown when we should.  Don't bother logging extents/btree changes if
> + * we converted to the other format.
> + */
> +static void
> +xfs_bmapi_finish(
> +	struct xfs_bmalloca	*bma,
> +	int			whichfork,
> +	int			error)
> +{
> +	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
> +	    XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
> +		bma->logflags &= ~xfs_ilog_fext(whichfork);
> +	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
> +		 XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_BTREE)
> +		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
> +
> +	if (bma->logflags)
> +		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
> +	if (bma->cur)
> +		xfs_btree_del_cursor(bma->cur, error);
> +}
> +
>  /*
>   * Map file blocks to filesystem blocks, and allocate blocks or convert the
>   * extent state if necessary.  Details behaviour is controlled by the flags
> @@ -4273,15 +4311,6 @@ xfs_bmapi_write(
>  
>  	XFS_STATS_INC(mp, xs_blk_mapw);
>  
> -	if (!tp || tp->t_firstblock == NULLFSBLOCK) {
> -		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
> -			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
> -		else
> -			bma.minleft = 1;
> -	} else {
> -		bma.minleft = 0;
> -	}
> -
>  	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
>  		error = xfs_iread_extents(tp, ip, whichfork);
>  		if (error)
> @@ -4296,6 +4325,7 @@ xfs_bmapi_write(
>  	bma.ip = ip;
>  	bma.total = total;
>  	bma.datatype = 0;
> +	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
>  
>  	/*
>  	 * The delalloc flag means the caller wants to allocate the entire
> @@ -4434,32 +4464,12 @@ xfs_bmapi_write(
>  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
>  	       XFS_IFORK_NEXTENTS(ip, whichfork) >
>  		XFS_IFORK_MAXEXT(ip, whichfork));
> -	error = 0;
> +	xfs_bmapi_finish(&bma, whichfork, 0);
> +	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
> +		orig_nmap, *nmap);
> +	return 0;
>  error0:
> -	/*
> -	 * Log everything.  Do this after conversion, there's no point in
> -	 * logging the extent records if we've converted to btree format.
> -	 */
> -	if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
> -	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
> -		bma.logflags &= ~xfs_ilog_fext(whichfork);
> -	else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
> -		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
> -		bma.logflags &= ~xfs_ilog_fbroot(whichfork);
> -	/*
> -	 * Log whatever the flags say, even if error.  Otherwise we might miss
> -	 * detecting a case where the data is changed, there's an error,
> -	 * and it's not logged so we don't shutdown when we should.
> -	 */
> -	if (bma.logflags)
> -		xfs_trans_log_inode(tp, ip, bma.logflags);
> -
> -	if (bma.cur) {
> -		xfs_btree_del_cursor(bma.cur, error);
> -	}
> -	if (!error)
> -		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
> -			orig_nmap, *nmap);
> +	xfs_bmapi_finish(&bma, whichfork, error);
>  	return error;
>  }
>  
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 07/10] xfs: move stat accounting to xfs_bmapi_convert_delalloc
  2019-02-15 14:47 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
@ 2019-02-15 22:47   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 22:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:22PM +0100, Christoph Hellwig wrote:
> This way we can actually count how many bytes got converted and how many
> calls we need, unlike in the caller which doesn't have the detailed
> view.
> 
> Note that this includes a slight change in behavior as the
> xs_xstrat_quick is now bumped for every allocation instead of just the
> one covering the requested writeback offset, which makes a lot more
> sense.

/me wonders if it's really supposed to be the case that we never touch
xs_xstrat_split and it stays zero, but as it hasn't done anything since
the start of linux.git I'll just set that aside for now and say...

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Brian Foster <bfoster@redhat.com>

...this by itself seems reasonable to me.

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_bmap.c | 3 +++
>  fs/xfs/xfs_iomap.c       | 4 ----
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index fc4f1d3145c4..4cf83475f0d0 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4516,6 +4516,9 @@ xfs_bmapi_convert_delalloc(
>  	if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip)))
>  		goto out_finish;
>  
> +	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
> +	XFS_STATS_INC(mp, xs_xstrat_quick);
> +
>  	ASSERT(!isnullstartblock(bma.got.br_startblock));
>  	*imap = bma.got;
>  	*seq = READ_ONCE(ifp->if_seq);
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 39be741cac5a..15da53b5fb53 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -707,9 +707,6 @@ xfs_iomap_write_allocate(
>  	map_start_fsb = imap->br_startoff;
>  	map_count_fsb = imap->br_blockcount;
>  
> -	XFS_STATS_ADD(mp, xs_xstrat_bytes,
> -		      XFS_FSB_TO_B(mp, imap->br_blockcount));
> -
>  	while (true) {
>  		/*
>  		 * Allocate in a loop because it may take several attempts to
> @@ -741,7 +738,6 @@ xfs_iomap_write_allocate(
>  		if ((offset_fsb >= imap->br_startoff) &&
>  		    (offset_fsb < (imap->br_startoff +
>  				   imap->br_blockcount))) {
> -			XFS_STATS_INC(mp, xs_xstrat_quick);
>  			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
>  			ASSERT(offset_fsb >= imap->br_startoff &&
>  			       offset_fsb < imap->br_startoff + imap->br_blockcount);
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc
  2019-02-15 14:47 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
@ 2019-02-15 22:49   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 22:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:21PM +0100, Christoph Hellwig wrote:
> No need to deal with the transaction and the inode locking in the
> caller. Note that we also switch to passing whichfork as the second
> paramter, matching what most related functions do.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_bmap.c | 38 +++++++++++++++++++++++++++++++++-----
>  fs/xfs/libxfs/xfs_bmap.h |  5 +++--
>  fs/xfs/xfs_iomap.c       | 32 ++++----------------------------
>  3 files changed, 40 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 5fdfa9f55fde..fc4f1d3145c4 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4446,16 +4446,30 @@ xfs_bmapi_write(
>   */
>  int
>  xfs_bmapi_convert_delalloc(
> -	struct xfs_trans	*tp,
>  	struct xfs_inode	*ip,
> -	xfs_fileoff_t		offset_fsb,
>  	int			whichfork,
> -	struct xfs_bmbt_irec	*imap)
> +	xfs_fileoff_t		offset_fsb,
> +	struct xfs_bmbt_irec	*imap,
> +	unsigned int		*seq)
>  {
>  	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> +	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_bmalloca	bma = { NULL };
> +	struct xfs_trans	*tp;
>  	int			error;
>  
> +	/*
> +	 * Space for the extent and indirect blocks was reserved when the
> +	 * delalloc extent was created so there's no need to do so here.
> +	 */
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> +				XFS_TRANS_RESERVE, &tp);
> +	if (error)
> +		return error;
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	xfs_trans_ijoin(tp, ip, 0);
> +
>  	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
>  	    bma.got.br_startoff > offset_fsb) {
>  		/*
> @@ -4464,7 +4478,8 @@ xfs_bmapi_convert_delalloc(
>  		 * might have moved the extent to the data fork in the meantime.
>  		 */
>  		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
> -		return -EAGAIN;
> +		error = -EAGAIN;
> +		goto out_trans_cancel;
>  	}
>  
>  	/*
> @@ -4473,7 +4488,8 @@ xfs_bmapi_convert_delalloc(
>  	 */
>  	if (!isnullstartblock(bma.got.br_startblock)) {
>  		*imap = bma.got;
> -		return 0;
> +		*seq = READ_ONCE(ifp->if_seq);
> +		goto out_trans_cancel;
>  	}
>  
>  	bma.tp = tp;
> @@ -4502,6 +4518,7 @@ xfs_bmapi_convert_delalloc(
>  
>  	ASSERT(!isnullstartblock(bma.got.br_startblock));
>  	*imap = bma.got;
> +	*seq = READ_ONCE(ifp->if_seq);
>  
>  	if (whichfork == XFS_COW_FORK) {
>  		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
> @@ -4512,8 +4529,19 @@ xfs_bmapi_convert_delalloc(
>  
>  	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
>  			whichfork);
> +	if (error)
> +		goto out_finish;
> +
> +	xfs_bmapi_finish(&bma, whichfork, 0);
> +	error = xfs_trans_commit(tp);
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	return error;
> +
>  out_finish:
>  	xfs_bmapi_finish(&bma, whichfork, error);
> +out_trans_cancel:
> +	xfs_trans_cancel(tp);
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index b5eca7a26949..78b190b6e908 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -223,8 +223,9 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
>  		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
>  		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
>  		int eof);
> -int	xfs_bmapi_convert_delalloc(struct xfs_trans *, struct xfs_inode *,
> -		xfs_fileoff_t, int, struct xfs_bmbt_irec *);
> +int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
> +		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
> +		unsigned int *seq);
>  
>  static inline void
>  xfs_bmap_add_free(
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index fd3aacd4bf02..39be741cac5a 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -684,11 +684,9 @@ xfs_iomap_write_allocate(
>  	unsigned int		*seq)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
>  	xfs_fileoff_t		offset_fsb;
>  	xfs_fileoff_t		map_start_fsb;
>  	xfs_extlen_t		map_count_fsb;
> -	struct xfs_trans	*tp;
>  	int			error = 0;
>  
>  	/*
> @@ -716,17 +714,8 @@ xfs_iomap_write_allocate(
>  		/*
>  		 * Allocate in a loop because it may take several attempts to
>  		 * allocate real blocks for a contiguous delalloc extent if free
> -		 * space is sufficiently fragmented. Note that space for the
> -		 * extent and indirect blocks was reserved when the delalloc
> -		 * extent was created so there's no need to do so here.
> +		 * space is sufficiently fragmented.
>  		 */
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> -					XFS_TRANS_RESERVE, &tp);
> -		if (error)
> -			return error;
> -
> -		xfs_ilock(ip, XFS_ILOCK_EXCL);
> -		xfs_trans_ijoin(tp, ip, 0);
>  
>  		/*
>  		 * ilock was dropped since imap was populated which means it
> @@ -737,17 +726,10 @@ xfs_iomap_write_allocate(
>  		 * caller. We'll trim it down to the caller's most recently
>  		 * validated range before we return.
>  		 */
> -		error = xfs_bmapi_convert_delalloc(tp, ip, offset_fsb,
> -						   whichfork, imap);
> -		if (error)
> -			goto trans_cancel;
> -
> -		error = xfs_trans_commit(tp);
> +		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
> +				imap, seq);
>  		if (error)
> -			goto error0;
> -
> -		*seq = READ_ONCE(ifp->if_seq);
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +			return error;
>  
>  		/*
>  		 * See if we were able to allocate an extent that covers at
> @@ -766,12 +748,6 @@ xfs_iomap_write_allocate(
>  			return 0;
>  		}
>  	}
> -
> -trans_cancel:
> -	xfs_trans_cancel(tp);
> -error0:
> -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -	return error;
>  }
>  
>  int
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 10/10] xfs: retry COW fork delalloc conversion when no
  2019-02-15 14:47 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no Christoph Hellwig
@ 2019-02-15 23:32   ` Darrick J. Wong
  2019-02-18  9:09     ` Christoph Hellwig
  0 siblings, 1 reply; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 23:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:25PM +0100, Christoph Hellwig wrote:
> While we can only truncate a block under the page lock for the current
> page, there is no high-level synchronization for moving extents from the
> COW to the data fork.  This means that for example we can have another
> thread doing a direct I/O completion that moves extents from the COW to
> the data fork race with writeback.  While this race is very hard to hit
> the always_cow seems to reproduce it reasonably well, and it also exists
> without that.  Because of that there is a chance that a delalloc
> conversion for the COW fork might not find any extents to convert.  In
> that case we should retry the whole block lookup and now find the blocks
> in the data fork.

<thinking aloud mode>

I /think/ the way that this series (+ Brian's before that) solve the
truncate/writeback race is that we now only convert existing delalloc
reservations to real extents when we're preparing to do writeback;
_writepage_map only cares about the mapping of the offset_fsb that it
happens to be looping right now (because the page lock serializes with
page cache truncate/punch); and we use the new sequence counters for
both the data and cow forks to decide when our cached mapping might be
invalid and therefore we need to get a new mapping?

Therefore we don't need to keep calling _trim_extent_eof or checking
offset against i_size or any of those other games because writeback
won't go allocating blocks into holes that are being punched and now we
have an explicit mechanism to invalidate wpc->imap instead of the
scattered detection code we had before?

If that's true, then:

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_aops.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index a6abb7125203..2ed8733eca49 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -334,7 +334,8 @@ xfs_imap_valid(
>   * extent that maps offset_fsb in wpc->imap.
>   *
>   * The current page is held locked so nothing could have removed the block
> - * backing offset_fsb.
> + * backing offset_fsb, although it could have moved from the COW to the data
> + * fork by another thread.
>   */
>  static int
>  xfs_convert_blocks(
> @@ -375,6 +376,7 @@ xfs_map_blocks(
>  	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
>  	struct xfs_bmbt_irec	imap;
>  	struct xfs_iext_cursor	icur;
> +	int			retries = 0;
>  	int			error = 0;
>  
>  	if (XFS_FORCED_SHUTDOWN(mp))
> @@ -404,6 +406,7 @@ xfs_map_blocks(
>  	 * into real extents.  If we return without a valid map, it means we
>  	 * landed in a hole and we skip the block.
>  	 */
> +retry:
>  	xfs_ilock(ip, XFS_ILOCK_SHARED);
>  	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
>  	       (ip->i_df.if_flags & XFS_IFEXTENTS));
> @@ -471,8 +474,19 @@ xfs_map_blocks(
>  	return 0;
>  allocate_blocks:
>  	error = xfs_convert_blocks(wpc, ip, offset_fsb);
> -	if (error)
> +	if (error) {
> +		/*
> +		 * If we failed to find the extent in the COW fork we might have
> +		 * raced with a COW to data fork conversion or truncate.
> +		 * Restart the lookup to catch the extent in the data fork for
> +		 * the former case, but prevent additional retries to avoid
> +		 * looping forever for the latter case.
> +		 */
> +		if (error == -EAGAIN && wpc->fork == XFS_COW_FORK && !retries++)
> +			goto retry;
> +		ASSERT(error != -EAGAIN);
>  		return error;
> +	}
>  
>  	/*
>  	 * Due to merging the return real extent might be larger than the
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks
  2019-02-15 14:47 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
@ 2019-02-15 23:37   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 23:37 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:24PM +0100, Christoph Hellwig wrote:
> Now that we properly handle the race with truncate in the delalloc
> allocator there is no need to short cut this exceptional case earlier
> on.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Brian Foster <bfoster@redhat.com>

Looks ok modulo my comments in patch 10/10,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_aops.c | 20 --------------------
>  1 file changed, 20 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 42017ecf78ed..a6abb7125203 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -420,26 +420,6 @@ xfs_map_blocks(
>  		xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  
>  		wpc->fork = XFS_COW_FORK;
> -
> -		/*
> -		 * Truncate can race with writeback since writeback doesn't
> -		 * take the iolock and truncate decreases the file size before
> -		 * it starts truncating the pages between new_size and old_size.
> -		 * Therefore, we can end up in the situation where writeback
> -		 * gets a CoW fork mapping but the truncate makes the mapping
> -		 * invalid and we end up in here trying to get a new mapping.
> -		 * bail out here so that we simply never get a valid mapping
> -		 * and so we drop the write altogether.  The page truncation
> -		 * will kill the contents anyway.
> -		 */
> -		if (offset > i_size_read(inode)) {
> -			wpc->imap.br_blockcount = end_fsb - offset_fsb;
> -			wpc->imap.br_startoff = offset_fsb;
> -			wpc->imap.br_startblock = HOLESTARTBLOCK;
> -			wpc->imap.br_state = XFS_EXT_NORM;
> -			return 0;
> -		}
> -
>  		goto allocate_blocks;
>  	}
>  
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling from xfs_bmapi_write
  2019-02-15 14:47 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
@ 2019-02-15 23:38   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 23:38 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:20PM +0100, Christoph Hellwig wrote:
> Delalloc conversion has traditionally been part of our function to
> allocate blocks on disk (first xfs_bmapi, then xfs_bmapi_write), but
> delalloc conversion is a little special as we really do not want
> to allocate blocks over holes, for which we don't have reservations.
> 
> Split the delalloc conversions into a separate helper to keep the
> code simple and structured.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_bmap.c | 106 +++++++++++++++++++++------------------
>  fs/xfs/libxfs/xfs_bmap.h |   4 --
>  2 files changed, 58 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index a9c9bd39d822..5fdfa9f55fde 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -4327,22 +4327,6 @@ xfs_bmapi_write(
>  	bma.datatype = 0;
>  	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
>  
> -	/*
> -	 * The delalloc flag means the caller wants to allocate the entire
> -	 * delalloc extent backing bno where bno may not necessarily match the
> -	 * startoff. Now that we've looked up the extent, reset the range to
> -	 * map based on the extent in the file. If we're in a hole, this may be
> -	 * an error so don't adjust anything.
> -	 */
> -	if ((flags & XFS_BMAPI_DELALLOC) &&
> -	    !eof && bno >= bma.got.br_startoff) {
> -		bno = bma.got.br_startoff;
> -		len = bma.got.br_blockcount;
> -#ifdef DEBUG
> -		orig_bno = bno;
> -		orig_len = len;
> -#endif
> -	}
>  	n = 0;
>  	end = bno + len;
>  	obno = bno;
> @@ -4359,26 +4343,7 @@ xfs_bmapi_write(
>  			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
>  			         (flags & XFS_BMAPI_COWFORK)));
>  
> -			if (flags & XFS_BMAPI_DELALLOC) {
> -				/*
> -				 * For the COW fork we can reasonably get a
> -				 * request for converting an extent that races
> -				 * with other threads already having converted
> -				 * part of it, as there converting COW to
> -				 * regular blocks is not protected using the
> -				 * IOLOCK.
> -				 */
> -				ASSERT(flags & XFS_BMAPI_COWFORK);
> -				if (!(flags & XFS_BMAPI_COWFORK)) {
> -					error = -EIO;
> -					goto error0;
> -				}
> -
> -				if (eof || bno >= end)
> -					break;
> -			} else {
> -				need_alloc = true;
> -			}
> +			need_alloc = true;
>  		} else if (isnullstartblock(bma.got.br_startblock)) {
>  			wasdelay = true;
>  		}
> @@ -4487,23 +4452,68 @@ xfs_bmapi_convert_delalloc(
>  	int			whichfork,
>  	struct xfs_bmbt_irec	*imap)
>  {
> -	int			flags = XFS_BMAPI_DELALLOC;
> -	int			nimaps = 1;
> +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> +	struct xfs_bmalloca	bma = { NULL };
>  	int			error;
> -	int			total = XFS_EXTENTADD_SPACE_RES(ip->i_mount,
> -								XFS_DATA_FORK);
>  
> -	if (whichfork == XFS_COW_FORK)
> -		flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
> +	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
> +	    bma.got.br_startoff > offset_fsb) {
> +		/*
> +		 * No extent found in the range we are trying to convert.  This
> +		 * should only happen for the COW fork, where another thread
> +		 * might have moved the extent to the data fork in the meantime.
> +		 */
> +		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
> +		return -EAGAIN;
> +	}
>  
>  	/*
> -	 * The delalloc flag means to allocate the entire extent; pass a dummy
> -	 * length of 1.
> +	 * If we find a real extent here we raced with another thread converting
> +	 * the extent.  Just return the real extent at this offset.
>  	 */
> -	error = xfs_bmapi_write(tp, ip, offset_fsb, 1, flags, total, imap,
> -				&nimaps);
> -	if (!error && !nimaps)
> -		error = -EFSCORRUPTED;
> +	if (!isnullstartblock(bma.got.br_startblock)) {
> +		*imap = bma.got;
> +		return 0;
> +	}
> +
> +	bma.tp = tp;
> +	bma.ip = ip;
> +	bma.wasdel = true;
> +	bma.offset = bma.got.br_startoff;
> +	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
> +	bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
> +	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
> +	if (whichfork == XFS_COW_FORK)
> +		bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
> +
> +	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
> +		bma.prev.br_startoff = NULLFILEOFF;
> +
> +	error = xfs_bmapi_allocate(&bma);
> +	if (error)
> +		goto out_finish;
> +
> +	error = -ENOSPC;
> +	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
> +		goto out_finish;
> +	error = -EFSCORRUPTED;
> +	if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip)))
> +		goto out_finish;
> +
> +	ASSERT(!isnullstartblock(bma.got.br_startblock));
> +	*imap = bma.got;
> +
> +	if (whichfork == XFS_COW_FORK) {
> +		error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
> +				bma.length);
> +		if (error)
> +			goto out_finish;
> +	}
> +
> +	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
> +			whichfork);
> +out_finish:
> +	xfs_bmapi_finish(&bma, whichfork, error);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index 4dc7d1a02b35..b5eca7a26949 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -95,9 +95,6 @@ struct xfs_extent_free_item
>  /* Map something in the CoW fork. */
>  #define XFS_BMAPI_COWFORK	0x200
>  
> -/* Only convert delalloc space, don't allocate entirely new extents */
> -#define XFS_BMAPI_DELALLOC	0x400
> -
>  /* Only convert unwritten extents, don't allocate new blocks */
>  #define XFS_BMAPI_CONVERT_ONLY	0x800
>  
> @@ -117,7 +114,6 @@ struct xfs_extent_free_item
>  	{ XFS_BMAPI_ZERO,	"ZERO" }, \
>  	{ XFS_BMAPI_REMAP,	"REMAP" }, \
>  	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
> -	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
>  	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }, \
>  	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
>  	{ XFS_BMAPI_NORMAP,	"NORMAP" }
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c
  2019-02-15 14:47 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
@ 2019-02-15 23:40   ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-15 23:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:47:23PM +0100, Christoph Hellwig wrote:
> This function is a small wrapper only used by the writeback code, so
> move it together with the writeback code and simplify it down to the
> glorified do { } while loop that is now is.
> 
> A few bits intentionally got lost here: no need to call xfs_qm_dqattach
> because quotas are always attached when we create the delalloc
> reservation, and no need for the imap->br_startblock == 0 check given
> that xfs_bmapi_convert_delalloc already has a WARN_ON_ONCE for exactly
> that condition.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_aops.c  | 51 +++++++++++++++++++++++++----
>  fs/xfs/xfs_iomap.c | 81 ----------------------------------------------
>  fs/xfs/xfs_iomap.h |  2 --
>  3 files changed, 45 insertions(+), 89 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 8bfb62d8776f..42017ecf78ed 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -329,6 +329,38 @@ xfs_imap_valid(
>  	return true;
>  }
>  
> +/*
> + * Pass in a dellalloc extent and convert it to real extents, return the real
> + * extent that maps offset_fsb in wpc->imap.
> + *
> + * The current page is held locked so nothing could have removed the block
> + * backing offset_fsb.
> + */
> +static int
> +xfs_convert_blocks(
> +	struct xfs_writepage_ctx *wpc,
> +	struct xfs_inode	*ip,
> +	xfs_fileoff_t		offset_fsb)
> +{
> +	int			error;
> +
> +	/*
> +	 * Attempt to allocate whatever delalloc extent currently backs
> +	 * offset_fsb and put the result into wpc->imap.  Allocate in a loop
> +	 * because it may take several attempts to allocate real blocks for a
> +	 * contiguous delalloc extent if free space is sufficiently fragmented.
> +	 */
> +	do {
> +		error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset_fsb,
> +				&wpc->imap, wpc->fork == XFS_COW_FORK ?
> +					&wpc->cow_seq : &wpc->data_seq);
> +		if (error)
> +			return error;
> +	} while (wpc->imap.br_startoff + wpc->imap.br_blockcount <= offset_fsb);
> +
> +	return 0;
> +}
> +
>  STATIC int
>  xfs_map_blocks(
>  	struct xfs_writepage_ctx *wpc,
> @@ -458,14 +490,21 @@ xfs_map_blocks(
>  	trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
>  	return 0;
>  allocate_blocks:
> -	error = xfs_iomap_write_allocate(ip, wpc->fork, offset, &imap,
> -			wpc->fork == XFS_COW_FORK ?
> -					 &wpc->cow_seq : &wpc->data_seq);
> +	error = xfs_convert_blocks(wpc, ip, offset_fsb);
>  	if (error)
>  		return error;
> -	ASSERT(wpc->fork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> -	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
> -	wpc->imap = imap;
> +
> +	/*
> +	 * Due to merging the return real extent might be larger than the
> +	 * original delalloc one.  Trim the return extent to the next COW
> +	 * boundary again to force a re-lookup.
> +	 */
> +	if (wpc->fork != XFS_COW_FORK && cow_fsb != NULLFILEOFF &&
> +	    cow_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount)
> +		wpc->imap.br_blockcount = cow_fsb - wpc->imap.br_startoff;
> +
> +	ASSERT(wpc->imap.br_startoff <= offset_fsb);
> +	ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount > offset_fsb);
>  	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
>  	return 0;
>  }
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 15da53b5fb53..361dfe7af783 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -665,87 +665,6 @@ xfs_file_iomap_begin_delay(
>  	return error;
>  }
>  
> -/*
> - * Pass in a delayed allocate extent, convert it to real extents;
> - * return to the caller the extent we create which maps on top of
> - * the originating callers request.
> - *
> - * Called without a lock on the inode.
> - *
> - * We no longer bother to look at the incoming map - all we have to
> - * guarantee is that whatever we allocate fills the required range.
> - */
> -int
> -xfs_iomap_write_allocate(
> -	struct xfs_inode	*ip,
> -	int			whichfork,
> -	xfs_off_t		offset,
> -	struct xfs_bmbt_irec	*imap,
> -	unsigned int		*seq)
> -{
> -	struct xfs_mount	*mp = ip->i_mount;
> -	xfs_fileoff_t		offset_fsb;
> -	xfs_fileoff_t		map_start_fsb;
> -	xfs_extlen_t		map_count_fsb;
> -	int			error = 0;
> -
> -	/*
> -	 * Make sure that the dquots are there.
> -	 */
> -	error = xfs_qm_dqattach(ip);
> -	if (error)
> -		return error;
> -
> -	/*
> -	 * Store the file range the caller is interested in because it encodes
> -	 * state such as potential overlap with COW fork blocks. We must trim
> -	 * the allocated extent down to this range to maintain consistency with
> -	 * what the caller expects. Revalidation of the range itself is the
> -	 * responsibility of the caller.
> -	 */
> -	offset_fsb = XFS_B_TO_FSBT(mp, offset);
> -	map_start_fsb = imap->br_startoff;
> -	map_count_fsb = imap->br_blockcount;
> -
> -	while (true) {
> -		/*
> -		 * Allocate in a loop because it may take several attempts to
> -		 * allocate real blocks for a contiguous delalloc extent if free
> -		 * space is sufficiently fragmented.
> -		 */
> -
> -		/*
> -		 * ilock was dropped since imap was populated which means it
> -		 * might no longer be valid. The current page is held locked so
> -		 * nothing could have removed the block backing offset_fsb.
> -		 * Attempt to allocate whatever delalloc extent currently backs
> -		 * offset_fsb and put the result in the imap pointer from the
> -		 * caller. We'll trim it down to the caller's most recently
> -		 * validated range before we return.
> -		 */
> -		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset_fsb,
> -				imap, seq);
> -		if (error)
> -			return error;
> -
> -		/*
> -		 * See if we were able to allocate an extent that covers at
> -		 * least part of the callers request.
> -		 */
> -		if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
> -			return xfs_alert_fsblock_zero(ip, imap);
> -
> -		if ((offset_fsb >= imap->br_startoff) &&
> -		    (offset_fsb < (imap->br_startoff +
> -				   imap->br_blockcount))) {
> -			xfs_trim_extent(imap, map_start_fsb, map_count_fsb);
> -			ASSERT(offset_fsb >= imap->br_startoff &&
> -			       offset_fsb < imap->br_startoff + imap->br_blockcount);
> -			return 0;
> -		}
> -	}
> -}
> -
>  int
>  xfs_iomap_write_unwritten(
>  	xfs_inode_t	*ip,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index c6170548831b..6b16243db0b7 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -13,8 +13,6 @@ struct xfs_bmbt_irec;
>  
>  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
>  			struct xfs_bmbt_irec *, int);
> -int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> -			struct xfs_bmbt_irec *, unsigned int *);
>  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>  
>  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 10/10] xfs: retry COW fork delalloc conversion when no
  2019-02-15 23:32   ` Darrick J. Wong
@ 2019-02-18  9:09     ` Christoph Hellwig
  2019-02-19  5:19       ` Darrick J. Wong
  0 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-18  9:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs, Brian Foster

On Fri, Feb 15, 2019 at 03:32:25PM -0800, Darrick J. Wong wrote:
> I /think/ the way that this series (+ Brian's before that) solve the
> truncate/writeback race is that we now only convert existing delalloc
> reservations to real extents when we're preparing to do writeback;
> _writepage_map only cares about the mapping of the offset_fsb that it
> happens to be looping right now (because the page lock serializes with
> page cache truncate/punch); and we use the new sequence counters for
> both the data and cow forks to decide when our cached mapping might be
> invalid and therefore we need to get a new mapping?

Well, we already tried to do that before, we just weren't all that good
at it.  The big difference is that the delalloc conversion now doesn't
blindly reuse the range looked up a long time before under a different
ilock critical section, but instead just uses that as a hint and
only converts the extent that the writeback offset falls into, and
only does so if it actually still is in delalloc state.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 10/10] xfs: retry COW fork delalloc conversion when no
  2019-02-18  9:09     ` Christoph Hellwig
@ 2019-02-19  5:19       ` Darrick J. Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Darrick J. Wong @ 2019-02-19  5:19 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs, Brian Foster

On Mon, Feb 18, 2019 at 10:09:42AM +0100, Christoph Hellwig wrote:
> On Fri, Feb 15, 2019 at 03:32:25PM -0800, Darrick J. Wong wrote:
> > I /think/ the way that this series (+ Brian's before that) solve the
> > truncate/writeback race is that we now only convert existing delalloc
> > reservations to real extents when we're preparing to do writeback;
> > _writepage_map only cares about the mapping of the offset_fsb that it
> > happens to be looping right now (because the page lock serializes with
> > page cache truncate/punch); and we use the new sequence counters for
> > both the data and cow forks to decide when our cached mapping might be
> > invalid and therefore we need to get a new mapping?
> 
> Well, we already tried to do that before, we just weren't all that good
> at it.  The big difference is that the delalloc conversion now doesn't
> blindly reuse the range looked up a long time before under a different
> ilock critical section, but instead just uses that as a hint and
> only converts the extent that the writeback offset falls into, and
> only does so if it actually still is in delalloc state.

Got it.  I'll pull this in if I don't hear any loud yelling. :)

(FWIW it tested ok over the weekend.)

--D

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks
  2019-02-11 12:54 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
@ 2019-02-11 15:22   ` Brian Foster
  0 siblings, 0 replies; 23+ messages in thread
From: Brian Foster @ 2019-02-11 15:22 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Feb 11, 2019 at 01:54:26PM +0100, Christoph Hellwig wrote:
> Now that we properly handle the race with truncate in the delalloc
> allocator there is no need to short cut this exceptional case earlier
> on.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_aops.c | 20 --------------------
>  1 file changed, 20 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 403df647c0e4..6a8937a833ad 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -426,26 +426,6 @@ xfs_map_blocks(
>  		xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  
>  		wpc->fork = XFS_COW_FORK;
> -
> -		/*
> -		 * Truncate can race with writeback since writeback doesn't
> -		 * take the iolock and truncate decreases the file size before
> -		 * it starts truncating the pages between new_size and old_size.
> -		 * Therefore, we can end up in the situation where writeback
> -		 * gets a CoW fork mapping but the truncate makes the mapping
> -		 * invalid and we end up in here trying to get a new mapping.
> -		 * bail out here so that we simply never get a valid mapping
> -		 * and so we drop the write altogether.  The page truncation
> -		 * will kill the contents anyway.
> -		 */
> -		if (offset > i_size_read(inode)) {
> -			wpc->imap.br_blockcount = end_fsb - offset_fsb;
> -			wpc->imap.br_startoff = offset_fsb;
> -			wpc->imap.br_startblock = HOLESTARTBLOCK;
> -			wpc->imap.br_state = XFS_EXT_NORM;
> -			return 0;
> -		}
> -
>  		goto allocate_blocks;
>  	}
>  
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks
  2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
@ 2019-02-11 12:54 ` Christoph Hellwig
  2019-02-11 15:22   ` Brian Foster
  0 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2019-02-11 12:54 UTC (permalink / raw)
  To: linux-xfs; +Cc: Brian Foster

Now that we properly handle the race with truncate in the delalloc
allocator there is no need to short cut this exceptional case earlier
on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 403df647c0e4..6a8937a833ad 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -426,26 +426,6 @@ xfs_map_blocks(
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 		wpc->fork = XFS_COW_FORK;
-
-		/*
-		 * Truncate can race with writeback since writeback doesn't
-		 * take the iolock and truncate decreases the file size before
-		 * it starts truncating the pages between new_size and old_size.
-		 * Therefore, we can end up in the situation where writeback
-		 * gets a CoW fork mapping but the truncate makes the mapping
-		 * invalid and we end up in here trying to get a new mapping.
-		 * bail out here so that we simply never get a valid mapping
-		 * and so we drop the write altogether.  The page truncation
-		 * will kill the contents anyway.
-		 */
-		if (offset > i_size_read(inode)) {
-			wpc->imap.br_blockcount = end_fsb - offset_fsb;
-			wpc->imap.br_startoff = offset_fsb;
-			wpc->imap.br_startblock = HOLESTARTBLOCK;
-			wpc->imap.br_state = XFS_EXT_NORM;
-			return 0;
-		}
-
 		goto allocate_blocks;
 	}
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2019-02-19  5:19 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-15 14:47 small fixes and optimizations for delalloc v2 Christoph Hellwig
2019-02-15 14:47 ` [PATCH 01/10] xfs: remove the io_type field from the writeback context and ioend Christoph Hellwig
2019-02-15 14:47 ` [PATCH 02/10] xfs: remove the s_maxbytes checks in xfs_map_blocks Christoph Hellwig
2019-02-15 14:47 ` [PATCH 03/10] xfs: simplify the xfs_bmap_btree_to_extents calling conventions Christoph Hellwig
2019-02-15 16:41   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 04/10] xfs: factor out two helpers from xfs_bmapi_write Christoph Hellwig
2019-02-15 22:37   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 05/10] xfs: split XFS_BMAPI_DELALLOC handling " Christoph Hellwig
2019-02-15 23:38   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 06/10] xfs: move transaction handling to xfs_bmapi_convert_delalloc Christoph Hellwig
2019-02-15 22:49   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 07/10] xfs: move stat accounting " Christoph Hellwig
2019-02-15 22:47   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 08/10] xfs: move xfs_iomap_write_allocate to xfs_aops.c Christoph Hellwig
2019-02-15 23:40   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
2019-02-15 23:37   ` Darrick J. Wong
2019-02-15 14:47 ` [PATCH 10/10] xfs: retry COW fork delalloc conversion when no Christoph Hellwig
2019-02-15 23:32   ` Darrick J. Wong
2019-02-18  9:09     ` Christoph Hellwig
2019-02-19  5:19       ` Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2019-02-11 12:54 small fixes and optimizations for delalloc and reflink Christoph Hellwig
2019-02-11 12:54 ` [PATCH 09/10] xfs: remove the truncate short cut in xfs_map_blocks Christoph Hellwig
2019-02-11 15:22   ` Brian Foster

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.