All of lore.kernel.org
 help / color / mirror / Atom feed
* reduce lookups in the COW extent tree V3
@ 2018-07-17 23:23 Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 1/6] xfs: remove if_real_bytes Christoph Hellwig
                   ` (5 more replies)
  0 siblings, 6 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:23 UTC (permalink / raw)
  To: linux-xfs

This series (on top of the iomap series) reduce the number of lookups we
do in the COW tree by adding a sequence number that counts modifications
to the extent tree.

Changes since v2:
 - bump if_seq before modifying the extent tree

Changes since v1:
 - don't update wpc->cow_seq for non-COW allocations
 - update a comment
 - rebase to the lastest tree as baseline

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/6] xfs: remove if_real_bytes
  2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
@ 2018-07-17 23:24 ` Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 2/6] xfs: simplify xfs_idata_realloc Christoph Hellwig
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:24 UTC (permalink / raw)
  To: linux-xfs

The field is only used for asserts, and to track if we really need to do
realloc when growing the inode fork data.  But the krealloc function
already performs this check internally, so there is no need to keep track
of the real allocation size.

This will free space in the inode fork for keeping a sequence counter of
changes to the extent list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_fork.c | 19 ++++---------------
 fs/xfs/libxfs/xfs_inode_fork.h |  1 -
 fs/xfs/xfs_inode.c             |  3 +--
 fs/xfs/xfs_inode_item.c        |  4 ----
 4 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 183ec0cb8921..dee85b0f8846 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -158,7 +158,6 @@ xfs_init_local_fork(
 	}
 
 	ifp->if_bytes = size;
-	ifp->if_real_bytes = real_size;
 	ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
 	ifp->if_flags |= XFS_IFINLINE;
 }
@@ -226,7 +225,6 @@ xfs_iformat_extents(
 		return -EFSCORRUPTED;
 	}
 
-	ifp->if_real_bytes = 0;
 	ifp->if_bytes = 0;
 	ifp->if_u1.if_root = NULL;
 	ifp->if_height = 0;
@@ -317,7 +315,6 @@ xfs_iformat_btree(
 	ifp->if_flags &= ~XFS_IFEXTENTS;
 	ifp->if_flags |= XFS_IFBROOT;
 
-	ifp->if_real_bytes = 0;
 	ifp->if_bytes = 0;
 	ifp->if_u1.if_root = NULL;
 	ifp->if_height = 0;
@@ -501,7 +498,6 @@ xfs_idata_realloc(
 		 */
 		real_size = roundup(new_size, 4);
 		if (ifp->if_u1.if_data == NULL) {
-			ASSERT(ifp->if_real_bytes == 0);
 			ifp->if_u1.if_data = kmem_alloc(real_size,
 							KM_SLEEP | KM_NOFS);
 		} else {
@@ -509,15 +505,12 @@ xfs_idata_realloc(
 			 * Only do the realloc if the underlying size
 			 * is really changing.
 			 */
-			if (ifp->if_real_bytes != real_size) {
-				ifp->if_u1.if_data =
-					kmem_realloc(ifp->if_u1.if_data,
-							real_size,
-							KM_SLEEP | KM_NOFS);
-			}
+			ifp->if_u1.if_data =
+				kmem_realloc(ifp->if_u1.if_data,
+						real_size,
+						KM_SLEEP | KM_NOFS);
 		}
 	}
-	ifp->if_real_bytes = real_size;
 	ifp->if_bytes = new_size;
 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 }
@@ -543,17 +536,13 @@ xfs_idestroy_fork(
 	 */
 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 		if (ifp->if_u1.if_data != NULL) {
-			ASSERT(ifp->if_real_bytes != 0);
 			kmem_free(ifp->if_u1.if_data);
 			ifp->if_u1.if_data = NULL;
-			ifp->if_real_bytes = 0;
 		}
 	} else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
 		xfs_iext_destroy(ifp);
 	}
 
-	ASSERT(ifp->if_real_bytes == 0);
-
 	if (whichfork == XFS_ATTR_FORK) {
 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
 		ip->i_afp = NULL;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 781b1603df5e..46242052aad0 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -14,7 +14,6 @@ struct xfs_dinode;
  */
 typedef struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
-	int			if_real_bytes;	/* bytes allocated in if_u1 */
 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
 	short			if_broot_bytes;	/* bytes allocated for root */
 	unsigned char		if_flags;	/* per-fork flags */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7b2694d3901a..d6cd545f5d1d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -927,7 +927,7 @@ xfs_ialloc(
 	case S_IFLNK:
 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 		ip->i_df.if_flags = XFS_IFEXTENTS;
-		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+		ip->i_df.if_bytes = 0;
 		ip->i_df.if_u1.if_root = NULL;
 		break;
 	default:
@@ -1872,7 +1872,6 @@ xfs_inactive(
 	 * to clean up here.
 	 */
 	if (VFS_I(ip)->i_mode == 0) {
-		ASSERT(ip->i_df.if_real_bytes == 0);
 		ASSERT(ip->i_df.if_broot_bytes == 0);
 		return;
 	}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 2389c34c172d..fa1c4fe2ffbf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -194,8 +194,6 @@ xfs_inode_item_format_data_fork(
 			 * to be there by xfs_idata_realloc().
 			 */
 			data_bytes = roundup(ip->i_df.if_bytes, 4);
-			ASSERT(ip->i_df.if_real_bytes == 0 ||
-			       ip->i_df.if_real_bytes >= data_bytes);
 			ASSERT(ip->i_df.if_u1.if_data != NULL);
 			ASSERT(ip->i_d.di_size > 0);
 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
@@ -280,8 +278,6 @@ xfs_inode_item_format_attr_fork(
 			 * to be there by xfs_idata_realloc().
 			 */
 			data_bytes = roundup(ip->i_afp->if_bytes, 4);
-			ASSERT(ip->i_afp->if_real_bytes == 0 ||
-			       ip->i_afp->if_real_bytes >= data_bytes);
 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
 					ip->i_afp->if_u1.if_data,
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/6] xfs: simplify xfs_idata_realloc
  2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 1/6] xfs: remove if_real_bytes Christoph Hellwig
@ 2018-07-17 23:24 ` Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 3/6] xfs: remove the xfs_ifork_t typedef Christoph Hellwig
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:24 UTC (permalink / raw)
  To: linux-xfs

Streamline the code and take advantage of the fact that kmem_realloc
through krealloc will be have like a normal allocation if passing in a
NULL old pointer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_fork.c | 55 ++++++++++++----------------------
 1 file changed, 19 insertions(+), 36 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index dee85b0f8846..a0e3fb804605 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -468,51 +468,34 @@ xfs_iroot_realloc(
  */
 void
 xfs_idata_realloc(
-	xfs_inode_t	*ip,
-	int		byte_diff,
-	int		whichfork)
+	struct xfs_inode	*ip,
+	int			byte_diff,
+	int			whichfork)
 {
-	xfs_ifork_t	*ifp;
-	int		new_size;
-	int		real_size;
-
-	if (byte_diff == 0) {
-		return;
-	}
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	int			new_size = (int)ifp->if_bytes + byte_diff;
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	new_size = (int)ifp->if_bytes + byte_diff;
 	ASSERT(new_size >= 0);
+	ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork));
+
+	if (byte_diff == 0)
+		return;
 
 	if (new_size == 0) {
 		kmem_free(ifp->if_u1.if_data);
 		ifp->if_u1.if_data = NULL;
-		real_size = 0;
-	} else {
-		/*
-		 * Stuck with malloc/realloc.
-		 * For inline data, the underlying buffer must be
-		 * a multiple of 4 bytes in size so that it can be
-		 * logged and stay on word boundaries.  We enforce
-		 * that here.
-		 */
-		real_size = roundup(new_size, 4);
-		if (ifp->if_u1.if_data == NULL) {
-			ifp->if_u1.if_data = kmem_alloc(real_size,
-							KM_SLEEP | KM_NOFS);
-		} else {
-			/*
-			 * Only do the realloc if the underlying size
-			 * is really changing.
-			 */
-			ifp->if_u1.if_data =
-				kmem_realloc(ifp->if_u1.if_data,
-						real_size,
-						KM_SLEEP | KM_NOFS);
-		}
+		ifp->if_bytes = 0;
+		return;
 	}
+
+	/*
+	 * For inline data, the underlying buffer must be a multiple of 4 bytes
+	 * in size so that it can be logged and stay on word boundaries.
+	 * We enforce that here.
+	 */
+	ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data,
+			roundup(new_size, 4), KM_SLEEP | KM_NOFS);
 	ifp->if_bytes = new_size;
-	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 }
 
 void
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/6] xfs: remove the xfs_ifork_t typedef
  2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 1/6] xfs: remove if_real_bytes Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 2/6] xfs: simplify xfs_idata_realloc Christoph Hellwig
@ 2018-07-17 23:24 ` Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper Christoph Hellwig
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:24 UTC (permalink / raw)
  To: linux-xfs

We only have a few more callers left, so seize the opportunity and kill
it off.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c  |  6 +++---
 fs/xfs/libxfs/xfs_bmap.c       | 18 +++++++++---------
 fs/xfs/libxfs/xfs_inode_fork.c |  8 ++++----
 fs/xfs/libxfs/xfs_inode_fork.h |  4 ++--
 fs/xfs/xfs_icache.c            |  2 +-
 fs/xfs/xfs_inode.h             |  6 +++---
 fs/xfs/xfs_super.c             |  2 +-
 7 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 251304f3bc5d..bdf971c1fd77 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -506,7 +506,7 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
 {
 	xfs_attr_sf_hdr_t *hdr;
 	xfs_inode_t *dp;
-	xfs_ifork_t *ifp;
+	struct xfs_ifork *ifp;
 
 	trace_xfs_attr_sf_create(args);
 
@@ -541,7 +541,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 	int i, offset, size;
 	xfs_mount_t *mp;
 	xfs_inode_t *dp;
-	xfs_ifork_t *ifp;
+	struct xfs_ifork *ifp;
 
 	trace_xfs_attr_sf_add(args);
 
@@ -682,7 +682,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
 	xfs_attr_shortform_t *sf;
 	xfs_attr_sf_entry_t *sfe;
 	int i;
-	xfs_ifork_t *ifp;
+	struct xfs_ifork *ifp;
 
 	trace_xfs_attr_sf_lookup(args);
 
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7b93b1e16ad9..fa6a704ac1c0 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -326,7 +326,7 @@ xfs_bmap_check_leaf_extents(
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
 	xfs_extnum_t		i=0, j;	/* index into the extents list */
-	xfs_ifork_t		*ifp;	/* fork structure */
+	struct xfs_ifork	*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
 	xfs_mount_t		*mp;	/* file system mount structure */
 	__be64			*pp;	/* pointer to block address */
@@ -594,7 +594,7 @@ xfs_bmap_btree_to_extents(
 	xfs_fsblock_t		cbno;	/* child block number */
 	xfs_buf_t		*cbp;	/* child block's buffer */
 	int			error;	/* error return value */
-	xfs_ifork_t		*ifp;	/* inode fork data */
+	struct xfs_ifork	*ifp;	/* inode fork data */
 	xfs_mount_t		*mp;	/* mount point structure */
 	__be64			*pp;	/* ptr to block address */
 	struct xfs_btree_block	*rblock;/* root btree block */
@@ -817,7 +817,7 @@ xfs_bmap_local_to_extents(
 {
 	int		error = 0;
 	int		flags;		/* logging flags returned */
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	struct xfs_ifork *ifp;		/* inode fork pointer */
 	xfs_alloc_arg_t	args;		/* allocation arguments */
 	xfs_buf_t	*bp;		/* buffer for extent block */
 	struct xfs_bmbt_irec rec;
@@ -1479,7 +1479,7 @@ xfs_bmap_one_block(
 	xfs_inode_t	*ip,		/* incore inode */
 	int		whichfork)	/* data or attr fork */
 {
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	struct xfs_ifork *ifp;		/* inode fork pointer */
 	int		rval;		/* return value */
 	xfs_bmbt_irec_t	s;		/* internal version of extent */
 	struct xfs_iext_cursor icur;
@@ -1517,7 +1517,7 @@ xfs_bmap_add_extent_delay_real(
 	struct xfs_bmbt_irec	*new = &bma->got;
 	int			error;	/* error return value */
 	int			i;	/* temp state */
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -2026,7 +2026,7 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	int			error;	/* error return value */
 	int			i;	/* temp state */
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -2494,7 +2494,7 @@ xfs_bmap_add_extent_hole_delay(
 	struct xfs_iext_cursor	*icur,
 	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
 {
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_filblks_t		newlen=0;	/* new indirect size */
 	xfs_filblks_t		oldlen=0;	/* old indirect size */
@@ -4855,7 +4855,7 @@ xfs_bmap_del_extent_real(
 	struct xfs_bmbt_irec	got;	/* current extent entry */
 	xfs_fileoff_t		got_endoff;	/* first offset past got */
 	int			i;	/* temp state */
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_mount_t		*mp;	/* mount structure */
 	xfs_filblks_t		nblks;	/* quota/sb block count */
 	xfs_bmbt_irec_t		new;	/* new record to be inserted */
@@ -5103,7 +5103,7 @@ __xfs_bunmapi(
 	int			error;		/* error return value */
 	xfs_extnum_t		extno;		/* extent number in list */
 	struct xfs_bmbt_irec	got;		/* current extent record */
-	xfs_ifork_t		*ifp;		/* inode fork pointer */
+	struct xfs_ifork	*ifp;		/* inode fork pointer */
 	int			isrt;		/* freeing in rt area */
 	int			logflags;	/* transaction logging flags */
 	xfs_extlen_t		mod;		/* rt extent offset */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index a0e3fb804605..f9acf1d436f6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -269,7 +269,7 @@ xfs_iformat_btree(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	xfs_bmdr_block_t	*dfp;
-	xfs_ifork_t		*ifp;
+	struct xfs_ifork	*ifp;
 	/* REFERENCED */
 	int			nrecs;
 	int			size;
@@ -347,7 +347,7 @@ xfs_iroot_realloc(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	int			cur_max;
-	xfs_ifork_t		*ifp;
+	struct xfs_ifork	*ifp;
 	struct xfs_btree_block	*new_broot;
 	int			new_max;
 	size_t			new_size;
@@ -503,7 +503,7 @@ xfs_idestroy_fork(
 	xfs_inode_t	*ip,
 	int		whichfork)
 {
-	xfs_ifork_t	*ifp;
+	struct xfs_ifork	*ifp;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (ifp->if_broot != NULL) {
@@ -592,7 +592,7 @@ xfs_iflush_fork(
 	int			whichfork)
 {
 	char			*cp;
-	xfs_ifork_t		*ifp;
+	struct xfs_ifork	*ifp;
 	xfs_mount_t		*mp;
 	static const short	brootflag[2] =
 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 46242052aad0..1492143371f3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -12,7 +12,7 @@ struct xfs_dinode;
 /*
  * File incore extent information, present for each of data & attr forks.
  */
-typedef struct xfs_ifork {
+struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
 	short			if_broot_bytes;	/* bytes allocated for root */
@@ -22,7 +22,7 @@ typedef struct xfs_ifork {
 		void		*if_root;	/* extent tree root */
 		char		*if_data;	/* inline file data */
 	} if_u1;
-} xfs_ifork_t;
+};
 
 /*
  * Per-fork incore inode flags.
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 47f417d20a30..79f344fa8b14 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,7 @@ xfs_inode_alloc(
 	ip->i_cowfp = NULL;
 	ip->i_cnextents = 0;
 	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
-	memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+	memset(&ip->i_df, 0, sizeof(ip->i_df));
 	ip->i_flags = 0;
 	ip->i_delayed_blks = 0;
 	memset(&ip->i_d, 0, sizeof(ip->i_d));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b1f0e8394f3b..51ae5d79de0d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -34,9 +34,9 @@ typedef struct xfs_inode {
 	struct xfs_imap		i_imap;		/* location for xfs_imap() */
 
 	/* Extent information. */
-	xfs_ifork_t		*i_afp;		/* attribute fork pointer */
-	xfs_ifork_t		*i_cowfp;	/* copy on write extents */
-	xfs_ifork_t		i_df;		/* data fork */
+	struct xfs_ifork	*i_afp;		/* attribute fork pointer */
+	struct xfs_ifork	*i_cowfp;	/* copy on write extents */
+	struct xfs_ifork	i_df;		/* data fork */
 
 	/* operations vectors */
 	const struct xfs_dir_ops *d_ops;		/* directory ops vector */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f9f8dc490d3d..62312b4cef64 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1886,7 +1886,7 @@ xfs_init_zones(void)
 	if (!xfs_da_state_zone)
 		goto out_destroy_btree_cur_zone;
 
-	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
 	if (!xfs_ifork_zone)
 		goto out_destroy_da_state_zone;
 
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper
  2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2018-07-17 23:24 ` [PATCH 3/6] xfs: remove the xfs_ifork_t typedef Christoph Hellwig
@ 2018-07-17 23:24 ` Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
  2018-07-17 23:24 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
  5 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:24 UTC (permalink / raw)
  To: linux-xfs

We have a few places that already check if an inode has actual data in
the COW fork to avoid work on reflink inodes that do not actually have
outstanding COW blocks.  There are a few more places that can avoid
working if doing the same check, so add a documented helper for this
condition and use it in all places where it makes sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c      |  4 ++--
 fs/xfs/xfs_bmap_util.c |  2 +-
 fs/xfs/xfs_icache.c    | 10 ++++------
 fs/xfs/xfs_inode.c     |  3 +--
 fs/xfs/xfs_inode.h     |  9 +++++++++
 fs/xfs/xfs_reflink.c   |  2 +-
 6 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f4d3252236c1..814100d27343 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -338,7 +338,7 @@ xfs_map_blocks(
 	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
 		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
 	if (imap_valid &&
-	    (!xfs_is_reflink_inode(ip) || wpc->io_type == XFS_IO_COW))
+	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -363,7 +363,7 @@ xfs_map_blocks(
 	 * Check if this is offset is covered by a COW extents, and if yes use
 	 * it directly instead of looking up anything in the data fork.
 	 */
-	if (xfs_is_reflink_inode(ip) &&
+	if (xfs_inode_has_cow_data(ip) &&
 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
 	    imap.br_startoff <= offset_fsb) {
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d3a314fd721f..5c918e8bf496 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1277,7 +1277,7 @@ xfs_prepare_shift(
 	 * we've flushed all the dirty data out to disk to avoid having
 	 * CoW extents at the wrong offsets.
 	 */
-	if (xfs_is_reflink_inode(ip)) {
+	if (xfs_inode_has_cow_data(ip)) {
 		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
 				true);
 		if (error)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 79f344fa8b14..fdae4c2d461e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1697,14 +1697,13 @@ xfs_inode_clear_eofblocks_tag(
  */
 static bool
 xfs_prep_free_cowblocks(
-	struct xfs_inode	*ip,
-	struct xfs_ifork	*ifp)
+	struct xfs_inode	*ip)
 {
 	/*
 	 * Just clear the tag if we have an empty cow fork or none at all. It's
 	 * possible the inode was fully unshared since it was originally tagged.
 	 */
-	if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
+	if (!xfs_inode_has_cow_data(ip)) {
 		trace_xfs_inode_free_cowblocks_invalid(ip);
 		xfs_inode_clear_cowblocks_tag(ip);
 		return false;
@@ -1742,11 +1741,10 @@ xfs_inode_free_cowblocks(
 	void			*args)
 {
 	struct xfs_eofblocks	*eofb = args;
-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	int			match;
 	int			ret = 0;
 
-	if (!xfs_prep_free_cowblocks(ip, ifp))
+	if (!xfs_prep_free_cowblocks(ip))
 		return 0;
 
 	if (eofb) {
@@ -1771,7 +1769,7 @@ xfs_inode_free_cowblocks(
 	 * Check again, nobody else should be able to dirty blocks or change
 	 * the reflink iflag now that we have the first two locks held.
 	 */
-	if (xfs_prep_free_cowblocks(ip, ifp))
+	if (xfs_prep_free_cowblocks(ip))
 		ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
 
 	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d6cd545f5d1d..b5c6464b2307 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1863,7 +1863,6 @@ xfs_inactive(
 	xfs_inode_t	*ip)
 {
 	struct xfs_mount	*mp;
-	struct xfs_ifork	*cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	int			error;
 	int			truncate = 0;
 
@@ -1884,7 +1883,7 @@ xfs_inactive(
 		return;
 
 	/* Try to clean out the cow blocks if there are any. */
-	if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
+	if (xfs_inode_has_cow_data(ip))
 		xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
 
 	if (VFS_I(ip)->i_nlink != 0) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 51ae5d79de0d..9cd02852d193 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -198,6 +198,15 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
 	return ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
 }
 
+/*
+ * Check if an inode has any data in the COW fork.  This might be often false
+ * even for inodes with the reflink flag when there is no pending COW operation.
+ */
+static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
+{
+	return ip->i_cowfp && ip->i_cowfp->if_bytes;
+}
+
 /*
  * In-core inode flags.
  */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3143889097f1..1c59814d8bb2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -487,7 +487,7 @@ xfs_reflink_cancel_cow_blocks(
 	struct xfs_defer_ops		*odfops = (*tpp)->t_dfops;
 	int				error = 0;
 
-	if (!xfs_is_reflink_inode(ip))
+	if (!xfs_inode_has_cow_data(ip))
 		return 0;
 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
 		return 0;
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
  2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2018-07-17 23:24 ` [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper Christoph Hellwig
@ 2018-07-17 23:24 ` Christoph Hellwig
  2018-07-18 14:40   ` Carlos Maiolino
  2018-07-17 23:24 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
  5 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:24 UTC (permalink / raw)
  To: linux-xfs

Add a simple 32-bit unsigned integer as the sequence count for
modifications to the extent list in the inode fork.  This will be
used to optimize away extent list lookups in the writeback code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_iext_tree.c  | 6 ++++++
 fs/xfs/libxfs/xfs_inode_fork.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index b80c63faace2..8a7aea041ee1 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -624,6 +624,8 @@ xfs_iext_insert(
 	struct xfs_iext_leaf	*new = NULL;
 	int			nr_entries, i;
 
+	ifp->if_seq++;
+
 	if (ifp->if_height == 0)
 		xfs_iext_alloc_root(ifp, cur);
 	else if (ifp->if_height == 1)
@@ -864,6 +866,8 @@ xfs_iext_remove(
 	ASSERT(ifp->if_u1.if_root != NULL);
 	ASSERT(xfs_iext_valid(ifp, cur));
 
+	ifp->if_seq++;
+
 	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
 	for (i = cur->pos; i < nr_entries; i++)
 		leaf->recs[i] = leaf->recs[i + 1];
@@ -970,6 +974,8 @@ xfs_iext_update_extent(
 {
 	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
 
+	ifp->if_seq++;
+
 	if (cur->pos == 0) {
 		struct xfs_bmbt_irec	old;
 
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 1492143371f3..f20b2468ca35 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -14,6 +14,7 @@ struct xfs_dinode;
  */
 struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
+	unsigned int		if_seq;
 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
 	short			if_broot_bytes;	/* bytes allocated for root */
 	unsigned char		if_flags;	/* per-fork flags */
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2018-07-17 23:24 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
@ 2018-07-17 23:24 ` Christoph Hellwig
  2018-07-18 14:51   ` Carlos Maiolino
  2018-07-21 23:23   ` Dave Chinner
  5 siblings, 2 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 23:24 UTC (permalink / raw)
  To: linux-xfs

Used the per-fork sequence counter to avoid lookups in the writeback code
unless the COW fork actually changed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 30 +++++++++++++++++++++++++-----
 fs/xfs/xfs_iomap.c |  5 ++++-
 fs/xfs/xfs_iomap.h |  2 +-
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 814100d27343..aff9d44fa338 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -29,6 +29,7 @@
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
 	unsigned int		io_type;
+	unsigned int		cow_seq;
 	struct xfs_ioend	*ioend;
 };
 
@@ -310,6 +311,7 @@ xfs_map_blocks(
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
@@ -333,12 +335,15 @@ xfs_map_blocks(
 	 * COW fork blocks can overlap data fork blocks even if the blocks
 	 * aren't shared.  COW I/O always takes precedent, so we must always
 	 * check for overlap on reflink inodes unless the mapping is already a
-	 * COW one.
+	 * COW one, or the COW fork hasn't changed from the last time we looked
+	 * at it.
 	 */
 	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
 		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
 	if (imap_valid &&
-	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
+	    (!xfs_inode_has_cow_data(ip) ||
+	     wpc->io_type == XFS_IO_COW ||
+	     wpc->cow_seq == ip->i_cowfp->if_seq))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -364,8 +369,10 @@ xfs_map_blocks(
 	 * it directly instead of looking up anything in the data fork.
 	 */
 	if (xfs_inode_has_cow_data(ip) &&
-	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
-	    imap.br_startoff <= offset_fsb) {
+	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
+		cow_fsb = imap.br_startoff;
+	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+		wpc->cow_seq = ip->i_cowfp->if_seq;
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		/*
 		 * Truncate can race with writeback since writeback doesn't
@@ -411,6 +418,16 @@ xfs_map_blocks(
 		imap.br_startblock = HOLESTARTBLOCK;
 		wpc->io_type = XFS_IO_HOLE;
 	} else {
+		/*
+		 * Truncate to the next COW extent if there is one.  This is the
+		 * only opportunity to do this because we can skip COW fork
+		 * lookups for the subsequent blocks in the mapping; however,
+		 * the requirement to treat the COW range separately remains.
+		 */
+		if (cow_fsb != NULLFILEOFF &&
+		    cow_fsb < imap.br_startoff + imap.br_blockcount)
+			imap.br_blockcount = cow_fsb - imap.br_startoff;
+
 		if (isnullstartblock(imap.br_startblock)) {
 			/* got a delalloc extent */
 			wpc->io_type = XFS_IO_DELALLOC;
@@ -427,9 +444,12 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
+			&wpc->cow_seq);
 	if (error)
 		return error;
+	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
 	wpc->imap = imap;
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
 	return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 756694219f77..43a7ff7f450c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -658,7 +658,8 @@ xfs_iomap_write_allocate(
 	xfs_inode_t	*ip,
 	int		whichfork,
 	xfs_off_t	offset,
-	xfs_bmbt_irec_t *imap)
+	xfs_bmbt_irec_t *imap,
+	unsigned int	*cow_seq)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, last_block;
@@ -780,6 +781,8 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto error0;
 
+			if (whichfork == XFS_COW_FORK)
+				*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
 
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 83474c9cede9..c6170548831b 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *);
+			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
  2018-07-17 23:24 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
@ 2018-07-18 14:40   ` Carlos Maiolino
  2018-07-19 16:32     ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Carlos Maiolino @ 2018-07-18 14:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs



>  struct xfs_ifork {
>  	int			if_bytes;	/* bytes in if_u1 */
> +	unsigned int		if_seq;

I wonder if a comment here wouldn't be helpful in the future, like: /* ext list modification counter */

It's just a suggestion though, any way you can add:

Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>




>  	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
>  	short			if_broot_bytes;	/* bytes allocated for root */
>  	unsigned char		if_flags;	/* per-fork flags */

> -- 
> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Carlos

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-17 23:24 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
@ 2018-07-18 14:51   ` Carlos Maiolino
  2018-07-21 23:23   ` Dave Chinner
  1 sibling, 0 replies; 28+ messages in thread
From: Carlos Maiolino @ 2018-07-18 14:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Tue, Jul 17, 2018 at 04:24:05PM -0700, Christoph Hellwig wrote:
> Used the per-fork sequence counter to avoid lookups in the writeback code
> unless the COW fork actually changed.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_aops.c  | 30 +++++++++++++++++++++++++-----
>  fs/xfs/xfs_iomap.c |  5 ++++-
>  fs/xfs/xfs_iomap.h |  2 +-
>  3 files changed, 30 insertions(+), 7 deletions(-)
> 

Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>

> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 814100d27343..aff9d44fa338 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -29,6 +29,7 @@
>  struct xfs_writepage_ctx {
>  	struct xfs_bmbt_irec    imap;
>  	unsigned int		io_type;
> +	unsigned int		cow_seq;
>  	struct xfs_ioend	*ioend;
>  };
>  
> @@ -310,6 +311,7 @@ xfs_map_blocks(
>  	struct xfs_mount	*mp = ip->i_mount;
>  	ssize_t			count = i_blocksize(inode);
>  	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
> +	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
>  	struct xfs_bmbt_irec	imap;
>  	int			whichfork = XFS_DATA_FORK;
>  	struct xfs_iext_cursor	icur;
> @@ -333,12 +335,15 @@ xfs_map_blocks(
>  	 * COW fork blocks can overlap data fork blocks even if the blocks
>  	 * aren't shared.  COW I/O always takes precedent, so we must always
>  	 * check for overlap on reflink inodes unless the mapping is already a
> -	 * COW one.
> +	 * COW one, or the COW fork hasn't changed from the last time we looked
> +	 * at it.
>  	 */
>  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
>  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
>  	if (imap_valid &&
> -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> +	    (!xfs_inode_has_cow_data(ip) ||
> +	     wpc->io_type == XFS_IO_COW ||
> +	     wpc->cow_seq == ip->i_cowfp->if_seq))
>  		return 0;
>  
>  	if (XFS_FORCED_SHUTDOWN(mp))
> @@ -364,8 +369,10 @@ xfs_map_blocks(
>  	 * it directly instead of looking up anything in the data fork.
>  	 */
>  	if (xfs_inode_has_cow_data(ip) &&
> -	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
> -	    imap.br_startoff <= offset_fsb) {
> +	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
> +		cow_fsb = imap.br_startoff;
> +	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
> +		wpc->cow_seq = ip->i_cowfp->if_seq;
>  		xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  		/*
>  		 * Truncate can race with writeback since writeback doesn't
> @@ -411,6 +418,16 @@ xfs_map_blocks(
>  		imap.br_startblock = HOLESTARTBLOCK;
>  		wpc->io_type = XFS_IO_HOLE;
>  	} else {
> +		/*
> +		 * Truncate to the next COW extent if there is one.  This is the
> +		 * only opportunity to do this because we can skip COW fork
> +		 * lookups for the subsequent blocks in the mapping; however,
> +		 * the requirement to treat the COW range separately remains.
> +		 */
> +		if (cow_fsb != NULLFILEOFF &&
> +		    cow_fsb < imap.br_startoff + imap.br_blockcount)
> +			imap.br_blockcount = cow_fsb - imap.br_startoff;
> +
>  		if (isnullstartblock(imap.br_startblock)) {
>  			/* got a delalloc extent */
>  			wpc->io_type = XFS_IO_DELALLOC;
> @@ -427,9 +444,12 @@ xfs_map_blocks(
>  	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
>  allocate_blocks:
> -	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
> +	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
> +			&wpc->cow_seq);
>  	if (error)
>  		return error;
> +	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> +	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
>  	wpc->imap = imap;
>  	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 756694219f77..43a7ff7f450c 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -658,7 +658,8 @@ xfs_iomap_write_allocate(
>  	xfs_inode_t	*ip,
>  	int		whichfork,
>  	xfs_off_t	offset,
> -	xfs_bmbt_irec_t *imap)
> +	xfs_bmbt_irec_t *imap,
> +	unsigned int	*cow_seq)
>  {
>  	xfs_mount_t	*mp = ip->i_mount;
>  	xfs_fileoff_t	offset_fsb, last_block;
> @@ -780,6 +781,8 @@ xfs_iomap_write_allocate(
>  			if (error)
>  				goto error0;
>  
> +			if (whichfork == XFS_COW_FORK)
> +				*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
>  			xfs_iunlock(ip, XFS_ILOCK_EXCL);
>  		}
>  
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 83474c9cede9..c6170548831b 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
>  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
>  			struct xfs_bmbt_irec *, int);
>  int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> -			struct xfs_bmbt_irec *);
> +			struct xfs_bmbt_irec *, unsigned int *);
>  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>  
>  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> -- 
> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Carlos

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
  2018-07-18 14:40   ` Carlos Maiolino
@ 2018-07-19 16:32     ` Christoph Hellwig
  2018-07-19 18:27       ` Darrick J. Wong
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-19 16:32 UTC (permalink / raw)
  To: Christoph Hellwig, linux-xfs

On Wed, Jul 18, 2018 at 04:40:04PM +0200, Carlos Maiolino wrote:
> 
> 
> >  struct xfs_ifork {
> >  	int			if_bytes;	/* bytes in if_u1 */
> > +	unsigned int		if_seq;
> 
> I wonder if a comment here wouldn't be helpful in the future, like: /* ext list modification counter */

Does that really add more value?  If so I'm happy having it added
when applying.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
  2018-07-19 16:32     ` Christoph Hellwig
@ 2018-07-19 18:27       ` Darrick J. Wong
  2018-07-23 12:11         ` Carlos Maiolino
  0 siblings, 1 reply; 28+ messages in thread
From: Darrick J. Wong @ 2018-07-19 18:27 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 19, 2018 at 06:32:33PM +0200, Christoph Hellwig wrote:
> On Wed, Jul 18, 2018 at 04:40:04PM +0200, Carlos Maiolino wrote:
> > 
> > 
> > >  struct xfs_ifork {
> > >  	int			if_bytes;	/* bytes in if_u1 */
> > > +	unsigned int		if_seq;
> > 
> > I wonder if a comment here wouldn't be helpful in the future, like: /* ext list modification counter */
> 
> Does that really add more value?  If so I'm happy having it added
> when applying.

Seeing as we have anecdotal evidence that more work will be needed if we
ever want to use it to track change count in the data fork, I think I'll
add the following comment:

	unsigned int		if_seq;		/* cow fork mod count */

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-17 23:24 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
  2018-07-18 14:51   ` Carlos Maiolino
@ 2018-07-21 23:23   ` Dave Chinner
  2018-07-23  7:49     ` Christoph Hellwig
  1 sibling, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2018-07-21 23:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Tue, Jul 17, 2018 at 04:24:05PM -0700, Christoph Hellwig wrote:
> @@ -333,12 +335,15 @@ xfs_map_blocks(
>  	 * COW fork blocks can overlap data fork blocks even if the blocks
>  	 * aren't shared.  COW I/O always takes precedent, so we must always
>  	 * check for overlap on reflink inodes unless the mapping is already a
> -	 * COW one.
> +	 * COW one, or the COW fork hasn't changed from the last time we looked
> +	 * at it.
>  	 */
>  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
>  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
>  	if (imap_valid &&
> -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> +	    (!xfs_inode_has_cow_data(ip) ||
> +	     wpc->io_type == XFS_IO_COW ||
> +	     wpc->cow_seq == ip->i_cowfp->if_seq))
>  		return 0;

This seqno check is still racy against concurrent extent tree
modification. We aren't holding the ILOCK here at all, the sequence
numbers aren't atomic and there are no memory barriers to serialise
cross-cpu load/store operations...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-21 23:23   ` Dave Chinner
@ 2018-07-23  7:49     ` Christoph Hellwig
  2018-07-24 22:35       ` Darrick J. Wong
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-23  7:49 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs

On Sun, Jul 22, 2018 at 09:23:11AM +1000, Dave Chinner wrote:
> On Tue, Jul 17, 2018 at 04:24:05PM -0700, Christoph Hellwig wrote:
> > @@ -333,12 +335,15 @@ xfs_map_blocks(
> >  	 * COW fork blocks can overlap data fork blocks even if the blocks
> >  	 * aren't shared.  COW I/O always takes precedent, so we must always
> >  	 * check for overlap on reflink inodes unless the mapping is already a
> > -	 * COW one.
> > +	 * COW one, or the COW fork hasn't changed from the last time we looked
> > +	 * at it.
> >  	 */
> >  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
> >  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
> >  	if (imap_valid &&
> > -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> > +	    (!xfs_inode_has_cow_data(ip) ||
> > +	     wpc->io_type == XFS_IO_COW ||
> > +	     wpc->cow_seq == ip->i_cowfp->if_seq))
> >  		return 0;
> 
> This seqno check is still racy against concurrent extent tree
> modification. We aren't holding the ILOCK here at all, the sequence
> numbers aren't atomic and there are no memory barriers to serialise
> cross-cpu load/store operations...

mutex_unlock contains a barrier, and the sequence is a single register
read.  There is nothing holding a lock here would help us with.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
  2018-07-19 18:27       ` Darrick J. Wong
@ 2018-07-23 12:11         ` Carlos Maiolino
  0 siblings, 0 replies; 28+ messages in thread
From: Carlos Maiolino @ 2018-07-23 12:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Thu, Jul 19, 2018 at 11:27:16AM -0700, Darrick J. Wong wrote:
> On Thu, Jul 19, 2018 at 06:32:33PM +0200, Christoph Hellwig wrote:
> > On Wed, Jul 18, 2018 at 04:40:04PM +0200, Carlos Maiolino wrote:
> > > 
> > > 
> > > >  struct xfs_ifork {
> > > >  	int			if_bytes;	/* bytes in if_u1 */
> > > > +	unsigned int		if_seq;
> > > 
> > > I wonder if a comment here wouldn't be helpful in the future, like: /* ext list modification counter */
> > 
> > Does that really add more value?  If so I'm happy having it added
> > when applying.
> 

For me at least it adds, IMHO, it's simpler to see a small comment in the struct
definition other than search what exactly it's supposed to mean in the
implementation, but well, this is my view.

> Seeing as we have anecdotal evidence that more work will be needed if we
> ever want to use it to track change count in the data fork, I think I'll
> add the following comment:
> 
> 	unsigned int		if_seq;		/* cow fork mod count */
>

/me didn't understand, but *shurg*
 
> --D
> 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Carlos

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-23  7:49     ` Christoph Hellwig
@ 2018-07-24 22:35       ` Darrick J. Wong
  2018-07-27 15:10         ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Darrick J. Wong @ 2018-07-24 22:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Dave Chinner, linux-xfs

On Mon, Jul 23, 2018 at 09:49:41AM +0200, Christoph Hellwig wrote:
> On Sun, Jul 22, 2018 at 09:23:11AM +1000, Dave Chinner wrote:
> > On Tue, Jul 17, 2018 at 04:24:05PM -0700, Christoph Hellwig wrote:
> > > @@ -333,12 +335,15 @@ xfs_map_blocks(
> > >  	 * COW fork blocks can overlap data fork blocks even if the blocks
> > >  	 * aren't shared.  COW I/O always takes precedent, so we must always
> > >  	 * check for overlap on reflink inodes unless the mapping is already a
> > > -	 * COW one.
> > > +	 * COW one, or the COW fork hasn't changed from the last time we looked
> > > +	 * at it.
> > >  	 */
> > >  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
> > >  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
> > >  	if (imap_valid &&
> > > -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> > > +	    (!xfs_inode_has_cow_data(ip) ||
> > > +	     wpc->io_type == XFS_IO_COW ||
> > > +	     wpc->cow_seq == ip->i_cowfp->if_seq))
> > >  		return 0;
> > 
> > This seqno check is still racy against concurrent extent tree
> > modification. We aren't holding the ILOCK here at all, the sequence
> > numbers aren't atomic and there are no memory barriers to serialise
> > cross-cpu load/store operations...
> 
> mutex_unlock contains a barrier, and the sequence is a single register
> read.  There is nothing holding a lock here would help us with.

Which mutex_unlock is that?  I took a second look at the i_cowfp access
and realized that we don't take ILOCK_SHARED until after the comparison.
Writeback doesn't take any of the other inode locks AFAIK... so either
there's some locking subtlety here that ought to be explained in a
comment, or is this a bug waiting to happen?

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-24 22:35       ` Darrick J. Wong
@ 2018-07-27 15:10         ` Christoph Hellwig
  2018-08-06  2:37           ` Dave Chinner
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-27 15:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, Dave Chinner, linux-xfs

On Tue, Jul 24, 2018 at 03:35:23PM -0700, Darrick J. Wong wrote:
> > mutex_unlock contains a barrier, and the sequence is a single register
> > read.  There is nothing holding a lock here would help us with.
> 
> Which mutex_unlock is that?

Actually an up_write (on i_lock), but the result is the same.

> I took a second look at the i_cowfp access
> and realized that we don't take ILOCK_SHARED until after the comparison.
> Writeback doesn't take any of the other inode locks AFAIK... so either
> there's some locking subtlety here that ought to be explained in a
> comment, or is this a bug waiting to happen?

What would you want the lock to protect?  As said above the if_seq
field itself doesn't need a lock, it can be read atomic because it
is a register or less.

And for the actual imap we don't have a lock anywhere in the writeback
code - as soon as we did the lookup or allocation we drop the i_lock
and keep using it.  That is generally fine because we only ever remove
extents from it after previously waiting for writeback and using
the iolock to prevent new writeback from being started.  The only new
bit with reflink support is that a valid data fork block mapping might
now be shadowed by a new reflink mapping, and that is exactly what the
sequence count protects against (instead of doing a lookup in the
cowfp and then instanctly dropping the lock before)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-27 15:10         ` Christoph Hellwig
@ 2018-08-06  2:37           ` Dave Chinner
  2018-08-06 16:45             ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2018-08-06  2:37 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Darrick J. Wong, linux-xfs

On Fri, Jul 27, 2018 at 05:10:39PM +0200, Christoph Hellwig wrote:
> On Tue, Jul 24, 2018 at 03:35:23PM -0700, Darrick J. Wong wrote:
> > > mutex_unlock contains a barrier, and the sequence is a single register
> > > read.  There is nothing holding a lock here would help us with.
> > 
> > Which mutex_unlock is that?
> 
> Actually an up_write (on i_lock), but the result is the same.

I'm not sure it is - I've lost count of the number of times I've
heard the phrase "unlock does not provide a memory barrier, only
unlock to lock provides a full memory barrier".  As such, I've
always treated unlock as a store (release) barrier to keep stores
inside the critical section, and lock as the correspending load
barrier to keep loads inside the critical section.

I think that's irrelevant, anyway, because there is no memory
barrier implied until the ILOCK is dropped. i.e. we can change the
value before making the extent modification, but there's no
guarantee that it's visible to other CPUs until the store memory
barrier occurs when the ILOCK is dropped. i.e. after the extent map
has actually been changed.

And, FWIW, without read-side memory barriers before checking the
value, the load can be re-ordered and perhaps even elided by the
optimising compiler.

IOWs, I can't see any reliable access serialisation being provided
by holding the ILOCK while modifying the value. Perhaps this should
use WRITE_ONCE/READ_ONCE to ensure the correct memory barriers are
used to serialise tthe sequence count against itself rather than the
wider inode extent modification process?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-08-06  2:37           ` Dave Chinner
@ 2018-08-06 16:45             ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-08-06 16:45 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, Darrick J. Wong, linux-xfs

On Mon, Aug 06, 2018 at 12:37:38PM +1000, Dave Chinner wrote:
> IOWs, I can't see any reliable access serialisation being provided
> by holding the ILOCK while modifying the value. Perhaps this should
> use WRITE_ONCE/READ_ONCE to ensure the correct memory barriers are
> used to serialise tthe sequence count against itself rather than the
> wider inode extent modification process?

Yes, I think we need a barrier before the later actual modification
of the extent tree and thus should use WRITE_ONCE/READ_ONCE.

I've got a patch that is undergoing testing at the moment.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-17 13:37     ` Christoph Hellwig
@ 2018-07-17 23:13       ` Dave Chinner
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Chinner @ 2018-07-17 23:13 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Tue, Jul 17, 2018 at 03:37:10PM +0200, Christoph Hellwig wrote:
> On Sat, Jul 14, 2018 at 10:03:01AM +1000, Dave Chinner wrote:
> > >  	if (imap_valid &&
> > > -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> > > +	    (!xfs_inode_has_cow_data(ip) ||
> > > +	     wpc->io_type == XFS_IO_COW ||
> > > +	     wpc->cow_seq == ip->i_cowfp->if_seq))
> > >  		return 0;
> > 
> > Isn't this racy? It's not an atomic variable, we hold no locks,
> > there are no memory barriers, etc. Hence we can miss changes made by
> > concurrent mapping changes...
> > 
> > Which makes me ask - is the sequence number bumped before or after
> > we modify the extent map? It seems to me that it needs to be done
> > before we make a modification so that we invalidate the current map
> > before we change the extent map to avoid issuing IO to an extent map
> > we are know we changing, right?
> 
> Right now they are bumped later, but they really should be first.
> 
> I've got a series that bumps them first now.

Cool.

> That being said at least for the COW fork I don't think we care
> about the races too much.  All the actual updates to the COW fork
> are under the page lock, so for a given page we are synchronized
> already.  And for the bigger picture we either convert a COW
> fork to a real fork, in which case the change doesn't matter, or
> we drop it, in which case we already have higher level synchronization.

Ok.

> 
> For the data fork things might be more nasty, and that could explain
> why my trivial extension to that just didn't work at all..

Yeah, my patch was catching data fork modifications as well.
IIRC I ended up putting the seqno bump and checks under the
ip->i_flags_lock as a quick method of serialising updates, and that
made the update vs check races go away. It didn't make all the
problems go away - just the ones that were easy to reproduce - but
those remaining bugs may have been in other parts of the patchset
that I never got to the bottom of...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-14  0:03   ` Dave Chinner
@ 2018-07-17 13:37     ` Christoph Hellwig
  2018-07-17 23:13       ` Dave Chinner
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-17 13:37 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs

On Sat, Jul 14, 2018 at 10:03:01AM +1000, Dave Chinner wrote:
> >  	if (imap_valid &&
> > -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> > +	    (!xfs_inode_has_cow_data(ip) ||
> > +	     wpc->io_type == XFS_IO_COW ||
> > +	     wpc->cow_seq == ip->i_cowfp->if_seq))
> >  		return 0;
> 
> Isn't this racy? It's not an atomic variable, we hold no locks,
> there are no memory barriers, etc. Hence we can miss changes made by
> concurrent mapping changes...
> 
> Which makes me ask - is the sequence number bumped before or after
> we modify the extent map? It seems to me that it needs to be done
> before we make a modification so that we invalidate the current map
> before we change the extent map to avoid issuing IO to an extent map
> we are know we changing, right?

Right now they are bumped later, but they really should be first.

I've got a series that bumps them first now.

That being said at least for the COW fork I don't think we care
about the races too much.  All the actual updates to the COW fork
are under the page lock, so for a given page we are synchronized
already.  And for the bigger picture we either convert a COW
fork to a real fork, in which case the change doesn't matter, or
we drop it, in which case we already have higher level synchronization.

For the data fork things might be more nasty, and that could explain
why my trivial extension to that just didn't work at all..

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
  2018-07-13 22:51   ` Darrick J. Wong
@ 2018-07-14  0:03   ` Dave Chinner
  2018-07-17 13:37     ` Christoph Hellwig
  1 sibling, 1 reply; 28+ messages in thread
From: Dave Chinner @ 2018-07-14  0:03 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 12, 2018 at 03:49:10PM +0200, Christoph Hellwig wrote:
> @@ -333,12 +335,15 @@ xfs_map_blocks(
>  	 * COW fork blocks can overlap data fork blocks even if the blocks
>  	 * aren't shared.  COW I/O always takes precedent, so we must always
>  	 * check for overlap on reflink inodes unless the mapping is already a
> -	 * COW one.
> +	 * COW one, or the COW fork hasn't changed from the last time we looked
> +	 * at it.
>  	 */
>  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
>  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
>  	if (imap_valid &&
> -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> +	    (!xfs_inode_has_cow_data(ip) ||
> +	     wpc->io_type == XFS_IO_COW ||
> +	     wpc->cow_seq == ip->i_cowfp->if_seq))
>  		return 0;

Isn't this racy? It's not an atomic variable, we hold no locks,
there are no memory barriers, etc. Hence we can miss changes made by
concurrent mapping changes...

Which makes me ask - is the sequence number bumped before or after
we modify the extent map? It seems to me that it needs to be done
before we make a modification so that we invalidate the current map
before we change the extent map to avoid issuing IO to an extent map
we are know we changing, right?

/me had prototype seqno patches like this from way back and kept
hitting races because of these "imap_valid checks are done without
locks" issues...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
@ 2018-07-13 22:51   ` Darrick J. Wong
  2018-07-14  0:03   ` Dave Chinner
  1 sibling, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2018-07-13 22:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 12, 2018 at 03:49:10PM +0200, Christoph Hellwig wrote:
> Used the per-fork sequence counter to avoid lookups in the writeback code
> unless the COW fork actually changed.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_aops.c  | 30 +++++++++++++++++++++++++-----
>  fs/xfs/xfs_iomap.c |  5 ++++-
>  fs/xfs/xfs_iomap.h |  2 +-
>  3 files changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 814100d27343..aff9d44fa338 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -29,6 +29,7 @@
>  struct xfs_writepage_ctx {
>  	struct xfs_bmbt_irec    imap;
>  	unsigned int		io_type;
> +	unsigned int		cow_seq;
>  	struct xfs_ioend	*ioend;
>  };
>  
> @@ -310,6 +311,7 @@ xfs_map_blocks(
>  	struct xfs_mount	*mp = ip->i_mount;
>  	ssize_t			count = i_blocksize(inode);
>  	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
> +	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
>  	struct xfs_bmbt_irec	imap;
>  	int			whichfork = XFS_DATA_FORK;
>  	struct xfs_iext_cursor	icur;
> @@ -333,12 +335,15 @@ xfs_map_blocks(
>  	 * COW fork blocks can overlap data fork blocks even if the blocks
>  	 * aren't shared.  COW I/O always takes precedent, so we must always
>  	 * check for overlap on reflink inodes unless the mapping is already a
> -	 * COW one.
> +	 * COW one, or the COW fork hasn't changed from the last time we looked
> +	 * at it.
>  	 */
>  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
>  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
>  	if (imap_valid &&
> -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> +	    (!xfs_inode_has_cow_data(ip) ||
> +	     wpc->io_type == XFS_IO_COW ||
> +	     wpc->cow_seq == ip->i_cowfp->if_seq))
>  		return 0;
>  
>  	if (XFS_FORCED_SHUTDOWN(mp))
> @@ -364,8 +369,10 @@ xfs_map_blocks(
>  	 * it directly instead of looking up anything in the data fork.
>  	 */
>  	if (xfs_inode_has_cow_data(ip) &&
> -	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
> -	    imap.br_startoff <= offset_fsb) {
> +	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
> +		cow_fsb = imap.br_startoff;
> +	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
> +		wpc->cow_seq = ip->i_cowfp->if_seq;
>  		xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  		/*
>  		 * Truncate can race with writeback since writeback doesn't
> @@ -411,6 +418,16 @@ xfs_map_blocks(
>  		imap.br_startblock = HOLESTARTBLOCK;
>  		wpc->io_type = XFS_IO_HOLE;
>  	} else {
> +		/*
> +		 * Truncate to the next COW extent if there is one.  This is the
> +		 * only opportunity to do this because we can skip COW fork
> +		 * lookups for the subsequent blocks in the mapping; however,
> +		 * the requirement to treat the COW range separately remains.
> +		 */
> +		if (cow_fsb != NULLFILEOFF &&
> +		    cow_fsb < imap.br_startoff + imap.br_blockcount)
> +			imap.br_blockcount = cow_fsb - imap.br_startoff;
> +
>  		if (isnullstartblock(imap.br_startblock)) {
>  			/* got a delalloc extent */
>  			wpc->io_type = XFS_IO_DELALLOC;
> @@ -427,9 +444,12 @@ xfs_map_blocks(
>  	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
>  allocate_blocks:
> -	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
> +	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
> +			&wpc->cow_seq);
>  	if (error)
>  		return error;
> +	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> +	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
>  	wpc->imap = imap;
>  	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 756694219f77..43a7ff7f450c 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -658,7 +658,8 @@ xfs_iomap_write_allocate(
>  	xfs_inode_t	*ip,
>  	int		whichfork,
>  	xfs_off_t	offset,
> -	xfs_bmbt_irec_t *imap)
> +	xfs_bmbt_irec_t *imap,
> +	unsigned int	*cow_seq)
>  {
>  	xfs_mount_t	*mp = ip->i_mount;
>  	xfs_fileoff_t	offset_fsb, last_block;
> @@ -780,6 +781,8 @@ xfs_iomap_write_allocate(
>  			if (error)
>  				goto error0;
>  
> +			if (whichfork == XFS_COW_FORK)
> +				*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
>  			xfs_iunlock(ip, XFS_ILOCK_EXCL);
>  		}
>  
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 83474c9cede9..c6170548831b 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
>  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
>  			struct xfs_bmbt_irec *, int);
>  int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> -			struct xfs_bmbt_irec *);
> +			struct xfs_bmbt_irec *, unsigned int *);
>  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>  
>  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> -- 
> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
  2018-07-13 22:51   ` Darrick J. Wong
  2018-07-14  0:03   ` Dave Chinner
  0 siblings, 2 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
  To: linux-xfs

Used the per-fork sequence counter to avoid lookups in the writeback code
unless the COW fork actually changed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 30 +++++++++++++++++++++++++-----
 fs/xfs/xfs_iomap.c |  5 ++++-
 fs/xfs/xfs_iomap.h |  2 +-
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 814100d27343..aff9d44fa338 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -29,6 +29,7 @@
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
 	unsigned int		io_type;
+	unsigned int		cow_seq;
 	struct xfs_ioend	*ioend;
 };
 
@@ -310,6 +311,7 @@ xfs_map_blocks(
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
@@ -333,12 +335,15 @@ xfs_map_blocks(
 	 * COW fork blocks can overlap data fork blocks even if the blocks
 	 * aren't shared.  COW I/O always takes precedent, so we must always
 	 * check for overlap on reflink inodes unless the mapping is already a
-	 * COW one.
+	 * COW one, or the COW fork hasn't changed from the last time we looked
+	 * at it.
 	 */
 	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
 		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
 	if (imap_valid &&
-	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
+	    (!xfs_inode_has_cow_data(ip) ||
+	     wpc->io_type == XFS_IO_COW ||
+	     wpc->cow_seq == ip->i_cowfp->if_seq))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -364,8 +369,10 @@ xfs_map_blocks(
 	 * it directly instead of looking up anything in the data fork.
 	 */
 	if (xfs_inode_has_cow_data(ip) &&
-	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
-	    imap.br_startoff <= offset_fsb) {
+	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
+		cow_fsb = imap.br_startoff;
+	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+		wpc->cow_seq = ip->i_cowfp->if_seq;
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		/*
 		 * Truncate can race with writeback since writeback doesn't
@@ -411,6 +418,16 @@ xfs_map_blocks(
 		imap.br_startblock = HOLESTARTBLOCK;
 		wpc->io_type = XFS_IO_HOLE;
 	} else {
+		/*
+		 * Truncate to the next COW extent if there is one.  This is the
+		 * only opportunity to do this because we can skip COW fork
+		 * lookups for the subsequent blocks in the mapping; however,
+		 * the requirement to treat the COW range separately remains.
+		 */
+		if (cow_fsb != NULLFILEOFF &&
+		    cow_fsb < imap.br_startoff + imap.br_blockcount)
+			imap.br_blockcount = cow_fsb - imap.br_startoff;
+
 		if (isnullstartblock(imap.br_startblock)) {
 			/* got a delalloc extent */
 			wpc->io_type = XFS_IO_DELALLOC;
@@ -427,9 +444,12 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
+			&wpc->cow_seq);
 	if (error)
 		return error;
+	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
 	wpc->imap = imap;
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
 	return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 756694219f77..43a7ff7f450c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -658,7 +658,8 @@ xfs_iomap_write_allocate(
 	xfs_inode_t	*ip,
 	int		whichfork,
 	xfs_off_t	offset,
-	xfs_bmbt_irec_t *imap)
+	xfs_bmbt_irec_t *imap,
+	unsigned int	*cow_seq)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, last_block;
@@ -780,6 +781,8 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto error0;
 
+			if (whichfork == XFS_COW_FORK)
+				*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
 
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 83474c9cede9..c6170548831b 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *);
+			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-11 17:31       ` Darrick J. Wong
@ 2018-07-11 17:35         ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-11 17:35 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Wed, Jul 11, 2018 at 10:31:52AM -0700, Darrick J. Wong wrote:
> On Wed, Jul 11, 2018 at 07:20:32PM +0200, Christoph Hellwig wrote:
> > On Wed, Jul 11, 2018 at 10:15:51AM -0700, Darrick J. Wong wrote:
> > > Hmm, this troubles me slightly -- a short time ago you removed the "trim
> > > this data fork mapping to the first overlap with a cow fork mapping"
> > > (i.e. xfs_reflink_trim_irec_to_next_cow) behavior on the grounds that
> > > _writepage_map calls _map_blocks for every block in the page and so it
> > > was unnecessary.  But this seems to put that back.  Why is that?
> > 
> > Because back then map_blocks did a lookup in the COW fork everytime
> > (unless we are already on a COW mapping), and this patch wants to
> > only look it up when the COW fork actually changed, so the trim is
> > required now.
> 
> Ah, ok.   Mind if I change the comment to:
> 
> /*
>  * Truncate to the next COW extent if there is one.  This is the only
>  * opportunity to do this because we can skip COW fork lookups for the
>  * subsequent blocks in the mapping; however, the requirement to treat
>  * the COW range separately remains.
>  */
> 
> I also wonder why we don't need to do the same for holes, but I think
> the answer is that any dirty page with a cow fork mapping must also have
> a data fork mapping (even if it's merely a delalloc reservation) and so
> a hole will never overlap with a cow fork mapping.

I'll update the comment.  I'll need to resend against the 4.19-merge
tree anyway, and I also found a little buglet in this patch (it updates
wpc->cow_seq for all allocations and not just COW ones, with that fixed
we should do even less lookups)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-11 17:20     ` Christoph Hellwig
@ 2018-07-11 17:31       ` Darrick J. Wong
  2018-07-11 17:35         ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Darrick J. Wong @ 2018-07-11 17:31 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, Jul 11, 2018 at 07:20:32PM +0200, Christoph Hellwig wrote:
> On Wed, Jul 11, 2018 at 10:15:51AM -0700, Darrick J. Wong wrote:
> > Hmm, this troubles me slightly -- a short time ago you removed the "trim
> > this data fork mapping to the first overlap with a cow fork mapping"
> > (i.e. xfs_reflink_trim_irec_to_next_cow) behavior on the grounds that
> > _writepage_map calls _map_blocks for every block in the page and so it
> > was unnecessary.  But this seems to put that back.  Why is that?
> 
> Because back then map_blocks did a lookup in the COW fork everytime
> (unless we are already on a COW mapping), and this patch wants to
> only look it up when the COW fork actually changed, so the trim is
> required now.

Ah, ok.   Mind if I change the comment to:

/*
 * Truncate to the next COW extent if there is one.  This is the only
 * opportunity to do this because we can skip COW fork lookups for the
 * subsequent blocks in the mapping; however, the requirement to treat
 * the COW range separately remains.
 */

I also wonder why we don't need to do the same for holes, but I think
the answer is that any dirty page with a cow fork mapping must also have
a data fork mapping (even if it's merely a delalloc reservation) and so
a hole will never overlap with a cow fork mapping.

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-11 17:15   ` Darrick J. Wong
@ 2018-07-11 17:20     ` Christoph Hellwig
  2018-07-11 17:31       ` Darrick J. Wong
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-11 17:20 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Wed, Jul 11, 2018 at 10:15:51AM -0700, Darrick J. Wong wrote:
> Hmm, this troubles me slightly -- a short time ago you removed the "trim
> this data fork mapping to the first overlap with a cow fork mapping"
> (i.e. xfs_reflink_trim_irec_to_next_cow) behavior on the grounds that
> _writepage_map calls _map_blocks for every block in the page and so it
> was unnecessary.  But this seems to put that back.  Why is that?

Because back then map_blocks did a lookup in the COW fork everytime
(unless we are already on a COW mapping), and this patch wants to
only look it up when the COW fork actually changed, so the trim is
required now.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-10  6:05 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
@ 2018-07-11 17:15   ` Darrick J. Wong
  2018-07-11 17:20     ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Darrick J. Wong @ 2018-07-11 17:15 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Tue, Jul 10, 2018 at 08:05:28AM +0200, Christoph Hellwig wrote:
> Used the per-fork sequence counter to avoid lookups in the writeback code
> unless the COW fork actually changed.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_aops.c  | 28 +++++++++++++++++++++++-----
>  fs/xfs/xfs_iomap.c |  4 +++-
>  fs/xfs/xfs_iomap.h |  2 +-
>  3 files changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 814100d27343..495d5e74fd00 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -29,6 +29,7 @@
>  struct xfs_writepage_ctx {
>  	struct xfs_bmbt_irec    imap;
>  	unsigned int		io_type;
> +	unsigned int		cow_seq;
>  	struct xfs_ioend	*ioend;
>  };
>  
> @@ -310,6 +311,7 @@ xfs_map_blocks(
>  	struct xfs_mount	*mp = ip->i_mount;
>  	ssize_t			count = i_blocksize(inode);
>  	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
> +	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
>  	struct xfs_bmbt_irec	imap;
>  	int			whichfork = XFS_DATA_FORK;
>  	struct xfs_iext_cursor	icur;
> @@ -333,12 +335,15 @@ xfs_map_blocks(
>  	 * COW fork blocks can overlap data fork blocks even if the blocks
>  	 * aren't shared.  COW I/O always takes precedent, so we must always
>  	 * check for overlap on reflink inodes unless the mapping is already a
> -	 * COW one.
> +	 * COW one, or the COW fork hasn't changed from the last time we looked
> +	 * at it.
>  	 */
>  	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
>  		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
>  	if (imap_valid &&
> -	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> +	    (!xfs_inode_has_cow_data(ip) ||
> +	     wpc->io_type == XFS_IO_COW ||
> +	     wpc->cow_seq == ip->i_cowfp->if_seq))
>  		return 0;
>  
>  	if (XFS_FORCED_SHUTDOWN(mp))
> @@ -364,8 +369,10 @@ xfs_map_blocks(
>  	 * it directly instead of looking up anything in the data fork.
>  	 */
>  	if (xfs_inode_has_cow_data(ip) &&
> -	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
> -	    imap.br_startoff <= offset_fsb) {
> +	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
> +		cow_fsb = imap.br_startoff;
> +	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
> +		wpc->cow_seq = ip->i_cowfp->if_seq;
>  		xfs_iunlock(ip, XFS_ILOCK_SHARED);
>  		/*
>  		 * Truncate can race with writeback since writeback doesn't
> @@ -411,6 +418,14 @@ xfs_map_blocks(
>  		imap.br_startblock = HOLESTARTBLOCK;
>  		wpc->io_type = XFS_IO_HOLE;
>  	} else {
> +		/*
> +		 * Truncate to the next COW extent if there is one.  We'll
> +		 * need to treat the COW range separately.
> +		 */
> +		if (cow_fsb != NULLFILEOFF &&
> +		    cow_fsb < imap.br_startoff + imap.br_blockcount)
> +			imap.br_blockcount = cow_fsb - imap.br_startoff;

Hmm, this troubles me slightly -- a short time ago you removed the "trim
this data fork mapping to the first overlap with a cow fork mapping"
(i.e. xfs_reflink_trim_irec_to_next_cow) behavior on the grounds that
_writepage_map calls _map_blocks for every block in the page and so it
was unnecessary.  But this seems to put that back.  Why is that?

--D

> +
>  		if (isnullstartblock(imap.br_startblock)) {
>  			/* got a delalloc extent */
>  			wpc->io_type = XFS_IO_DELALLOC;
> @@ -427,9 +442,12 @@ xfs_map_blocks(
>  	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
>  allocate_blocks:
> -	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
> +	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
> +			&wpc->cow_seq);
>  	if (error)
>  		return error;
> +	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> +	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
>  	wpc->imap = imap;
>  	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index fb9746cc7338..a843766bf7c5 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -660,7 +660,8 @@ xfs_iomap_write_allocate(
>  	xfs_inode_t	*ip,
>  	int		whichfork,
>  	xfs_off_t	offset,
> -	xfs_bmbt_irec_t *imap)
> +	xfs_bmbt_irec_t *imap,
> +	unsigned int	*cow_seq)
>  {
>  	xfs_mount_t	*mp = ip->i_mount;
>  	xfs_fileoff_t	offset_fsb, last_block;
> @@ -784,6 +785,7 @@ xfs_iomap_write_allocate(
>  			if (error)
>  				goto error0;
>  
> +			*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
>  			xfs_iunlock(ip, XFS_ILOCK_EXCL);
>  		}
>  
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 83474c9cede9..c6170548831b 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
>  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
>  			struct xfs_bmbt_irec *, int);
>  int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> -			struct xfs_bmbt_irec *);
> +			struct xfs_bmbt_irec *, unsigned int *);
>  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>  
>  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> -- 
> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
  2018-07-10  6:05 reduce lookups in the COW extent tree Christoph Hellwig
@ 2018-07-10  6:05 ` Christoph Hellwig
  2018-07-11 17:15   ` Darrick J. Wong
  0 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2018-07-10  6:05 UTC (permalink / raw)
  To: linux-xfs

Used the per-fork sequence counter to avoid lookups in the writeback code
unless the COW fork actually changed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  | 28 +++++++++++++++++++++++-----
 fs/xfs/xfs_iomap.c |  4 +++-
 fs/xfs/xfs_iomap.h |  2 +-
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 814100d27343..495d5e74fd00 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -29,6 +29,7 @@
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
 	unsigned int		io_type;
+	unsigned int		cow_seq;
 	struct xfs_ioend	*ioend;
 };
 
@@ -310,6 +311,7 @@ xfs_map_blocks(
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
@@ -333,12 +335,15 @@ xfs_map_blocks(
 	 * COW fork blocks can overlap data fork blocks even if the blocks
 	 * aren't shared.  COW I/O always takes precedent, so we must always
 	 * check for overlap on reflink inodes unless the mapping is already a
-	 * COW one.
+	 * COW one, or the COW fork hasn't changed from the last time we looked
+	 * at it.
 	 */
 	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
 		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
 	if (imap_valid &&
-	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
+	    (!xfs_inode_has_cow_data(ip) ||
+	     wpc->io_type == XFS_IO_COW ||
+	     wpc->cow_seq == ip->i_cowfp->if_seq))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -364,8 +369,10 @@ xfs_map_blocks(
 	 * it directly instead of looking up anything in the data fork.
 	 */
 	if (xfs_inode_has_cow_data(ip) &&
-	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
-	    imap.br_startoff <= offset_fsb) {
+	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
+		cow_fsb = imap.br_startoff;
+	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+		wpc->cow_seq = ip->i_cowfp->if_seq;
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		/*
 		 * Truncate can race with writeback since writeback doesn't
@@ -411,6 +418,14 @@ xfs_map_blocks(
 		imap.br_startblock = HOLESTARTBLOCK;
 		wpc->io_type = XFS_IO_HOLE;
 	} else {
+		/*
+		 * Truncate to the next COW extent if there is one.  We'll
+		 * need to treat the COW range separately.
+		 */
+		if (cow_fsb != NULLFILEOFF &&
+		    cow_fsb < imap.br_startoff + imap.br_blockcount)
+			imap.br_blockcount = cow_fsb - imap.br_startoff;
+
 		if (isnullstartblock(imap.br_startblock)) {
 			/* got a delalloc extent */
 			wpc->io_type = XFS_IO_DELALLOC;
@@ -427,9 +442,12 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
+			&wpc->cow_seq);
 	if (error)
 		return error;
+	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
 	wpc->imap = imap;
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
 	return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb9746cc7338..a843766bf7c5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -660,7 +660,8 @@ xfs_iomap_write_allocate(
 	xfs_inode_t	*ip,
 	int		whichfork,
 	xfs_off_t	offset,
-	xfs_bmbt_irec_t *imap)
+	xfs_bmbt_irec_t *imap,
+	unsigned int	*cow_seq)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, last_block;
@@ -784,6 +785,7 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto error0;
 
+			*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
 
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 83474c9cede9..c6170548831b 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *);
+			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2018-08-06 18:49 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-17 23:23 reduce lookups in the COW extent tree V3 Christoph Hellwig
2018-07-17 23:24 ` [PATCH 1/6] xfs: remove if_real_bytes Christoph Hellwig
2018-07-17 23:24 ` [PATCH 2/6] xfs: simplify xfs_idata_realloc Christoph Hellwig
2018-07-17 23:24 ` [PATCH 3/6] xfs: remove the xfs_ifork_t typedef Christoph Hellwig
2018-07-17 23:24 ` [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper Christoph Hellwig
2018-07-17 23:24 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
2018-07-18 14:40   ` Carlos Maiolino
2018-07-19 16:32     ` Christoph Hellwig
2018-07-19 18:27       ` Darrick J. Wong
2018-07-23 12:11         ` Carlos Maiolino
2018-07-17 23:24 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
2018-07-18 14:51   ` Carlos Maiolino
2018-07-21 23:23   ` Dave Chinner
2018-07-23  7:49     ` Christoph Hellwig
2018-07-24 22:35       ` Darrick J. Wong
2018-07-27 15:10         ` Christoph Hellwig
2018-08-06  2:37           ` Dave Chinner
2018-08-06 16:45             ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
2018-07-13 22:51   ` Darrick J. Wong
2018-07-14  0:03   ` Dave Chinner
2018-07-17 13:37     ` Christoph Hellwig
2018-07-17 23:13       ` Dave Chinner
2018-07-10  6:05 reduce lookups in the COW extent tree Christoph Hellwig
2018-07-10  6:05 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
2018-07-11 17:15   ` Darrick J. Wong
2018-07-11 17:20     ` Christoph Hellwig
2018-07-11 17:31       ` Darrick J. Wong
2018-07-11 17:35         ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.