linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] xfs: fixes for 5.4
@ 2019-08-26 21:48 Darrick J. Wong
  2019-08-26 21:48 ` [PATCH 1/4] xfs: bmap scrub should only scrub records once Darrick J. Wong
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:48 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, bfoster

Hi all,

Various fixes for 5.4.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=xfs-5.4-fixes

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=xfs-5.4-fixes

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/4] xfs: bmap scrub should only scrub records once
  2019-08-26 21:48 [PATCH 0/4] xfs: fixes for 5.4 Darrick J. Wong
@ 2019-08-26 21:48 ` Darrick J. Wong
  2019-08-26 23:08   ` Dave Chinner
  2019-08-27 13:14   ` Brian Foster
  2019-08-26 21:48 ` [PATCH 2/4] xfs: fix maxicount division by zero error Darrick J. Wong
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:48 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

The inode block mapping scrub function does more work for btree format
extent maps than is absolutely necessary -- first it will walk the bmbt
and check all the entries, and then it will load the incore tree and
check every entry in that tree, possibly for a second time.

Simplify the code and decrease check runtime by separating the two
responsibilities.  The bmbt walk will make sure the incore extent
mappings are loaded, check the shape of the bmap btree (via xchk_btree)
and check that every bmbt record has a corresponding incore extent map;
and the incore extent map walk takes all the responsibility for checking
the mapping records and cross referencing them with other AG metadata.

This enables us to clean up some messy parameter handling and reduce
redundant code.  Rename a few functions to make the split of
responsibilities clearer.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/bmap.c |   76 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 31 deletions(-)


diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 1bd29fdc2ab5..f6ed6eb133a6 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -75,6 +75,7 @@ struct xchk_bmap_info {
 	xfs_fileoff_t		lastoff;
 	bool			is_rt;
 	bool			is_shared;
+	bool			was_loaded;
 	int			whichfork;
 };
 
@@ -213,25 +214,20 @@ xchk_bmap_xref_rmap(
 
 /* Cross-reference a single rtdev extent record. */
 STATIC void
-xchk_bmap_rt_extent_xref(
-	struct xchk_bmap_info	*info,
+xchk_bmap_rt_iextent_xref(
 	struct xfs_inode	*ip,
-	struct xfs_btree_cur	*cur,
+	struct xchk_bmap_info	*info,
 	struct xfs_bmbt_irec	*irec)
 {
-	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-		return;
-
 	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
 			irec->br_blockcount);
 }
 
 /* Cross-reference a single datadev extent record. */
 STATIC void
-xchk_bmap_extent_xref(
-	struct xchk_bmap_info	*info,
+xchk_bmap_iextent_xref(
 	struct xfs_inode	*ip,
-	struct xfs_btree_cur	*cur,
+	struct xchk_bmap_info	*info,
 	struct xfs_bmbt_irec	*irec)
 {
 	struct xfs_mount	*mp = info->sc->mp;
@@ -240,9 +236,6 @@ xchk_bmap_extent_xref(
 	xfs_extlen_t		len;
 	int			error;
 
-	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-		return;
-
 	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
 	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
 	len = irec->br_blockcount;
@@ -300,20 +293,15 @@ xchk_bmap_dirattr_extent(
 
 /* Scrub a single extent record. */
 STATIC int
-xchk_bmap_extent(
+xchk_bmap_iextent(
 	struct xfs_inode	*ip,
-	struct xfs_btree_cur	*cur,
 	struct xchk_bmap_info	*info,
 	struct xfs_bmbt_irec	*irec)
 {
 	struct xfs_mount	*mp = info->sc->mp;
-	struct xfs_buf		*bp = NULL;
 	xfs_filblks_t		end;
 	int			error = 0;
 
-	if (cur)
-		xfs_btree_get_block(cur, 0, &bp);
-
 	/*
 	 * Check for out-of-order extents.  This record could have come
 	 * from the incore list, for which there is no ordering check.
@@ -364,10 +352,13 @@ xchk_bmap_extent(
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
+	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		return 0;
+
 	if (info->is_rt)
-		xchk_bmap_rt_extent_xref(info, ip, cur, irec);
+		xchk_bmap_rt_iextent_xref(ip, info, irec);
 	else
-		xchk_bmap_extent_xref(info, ip, cur, irec);
+		xchk_bmap_iextent_xref(ip, info, irec);
 
 	info->lastoff = irec->br_startoff + irec->br_blockcount;
 	return error;
@@ -380,10 +371,13 @@ xchk_bmapbt_rec(
 	union xfs_btree_rec	*rec)
 {
 	struct xfs_bmbt_irec	irec;
+	struct xfs_bmbt_irec	iext_irec;
+	struct xfs_iext_cursor	icur;
 	struct xchk_bmap_info	*info = bs->private;
 	struct xfs_inode	*ip = bs->cur->bc_private.b.ip;
 	struct xfs_buf		*bp = NULL;
 	struct xfs_btree_block	*block;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, info->whichfork);
 	uint64_t		owner;
 	int			i;
 
@@ -402,9 +396,25 @@ xchk_bmapbt_rec(
 		}
 	}
 
-	/* Set up the in-core record and scrub it. */
+	/*
+	 * Check that the incore extent tree contains an extent that matches
+	 * this one exactly.  We validate those cached bmaps later, so we don't
+	 * need to check them here.  If the extent tree was freshly loaded by
+	 * the scrubber then we skip the check entirely.
+	 */
+	if (info->was_loaded)
+		return 0;
+
 	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
-	return xchk_bmap_extent(ip, bs->cur, info, &irec);
+	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
+				&iext_irec) ||
+	    irec.br_startoff != iext_irec.br_startoff ||
+	    irec.br_startblock != iext_irec.br_startblock ||
+	    irec.br_blockcount != iext_irec.br_blockcount ||
+	    irec.br_state != iext_irec.br_state)
+		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
+				irec.br_startoff);
+	return 0;
 }
 
 /* Scan the btree records. */
@@ -415,15 +425,26 @@ xchk_bmap_btree(
 	struct xchk_bmap_info	*info)
 {
 	struct xfs_owner_info	oinfo;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, whichfork);
 	struct xfs_mount	*mp = sc->mp;
 	struct xfs_inode	*ip = sc->ip;
 	struct xfs_btree_cur	*cur;
 	int			error;
 
+	/* Load the incore bmap cache if it's not loaded. */
+	info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
+	if (!info->was_loaded) {
+		error = xfs_iread_extents(sc->tp, ip, whichfork);
+		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
+			goto out;
+	}
+
+	/* Check the btree structure. */
 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
 	xfs_btree_del_cursor(cur, error);
+out:
 	return error;
 }
 
@@ -671,13 +692,6 @@ xchk_bmap(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
-	/* Now try to scrub the in-memory extent list. */
-        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-		error = xfs_iread_extents(sc->tp, ip, whichfork);
-		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
-			goto out;
-	}
-
 	/* Find the offset of the last extent in the mapping. */
 	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
 	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
@@ -689,7 +703,7 @@ xchk_bmap(
 	for_each_xfs_iext(ifp, &icur, &irec) {
 		if (xchk_should_terminate(sc, &error) ||
 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
-			break;
+			goto out;
 		if (isnullstartblock(irec.br_startblock))
 			continue;
 		if (irec.br_startoff >= endoff) {
@@ -697,7 +711,7 @@ xchk_bmap(
 					irec.br_startoff);
 			goto out;
 		}
-		error = xchk_bmap_extent(ip, NULL, &info, &irec);
+		error = xchk_bmap_iextent(ip, &info, &irec);
 		if (error)
 			goto out;
 	}


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/4] xfs: fix maxicount division by zero error
  2019-08-26 21:48 [PATCH 0/4] xfs: fixes for 5.4 Darrick J. Wong
  2019-08-26 21:48 ` [PATCH 1/4] xfs: bmap scrub should only scrub records once Darrick J. Wong
@ 2019-08-26 21:48 ` Darrick J. Wong
  2019-08-26 23:09   ` Dave Chinner
  2019-08-26 21:49 ` [PATCH 3/4] xfs: don't return _QUERY_ABORT from xfs_rmap_has_other_keys Darrick J. Wong
  2019-08-26 21:49 ` [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys Darrick J. Wong
  3 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:48 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

In xfs_ialloc_setup_geometry, it's possible for a malicious/corrupt fs
image to set an unreasonably large value for sb_inopblog which will
cause ialloc_blks to be zero.  If sb_imax_pct is also set, this results
in a division by zero error in the second do_div call.  Therefore, force
maxicount to zero if ialloc_blks is zero.

Note that the kernel metadata verifiers will catch the garbage inopblog
value and abort the fs mount long before it tries to set up the inode
geometry; this is needed to avoid a crash in xfs_db while setting up the
xfs_mount structure.

Found by fuzzing sb_inopblog to 122 in xfs/350.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_ialloc.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 04377ab75863..aa190a502326 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2788,7 +2788,7 @@ xfs_ialloc_setup_geometry(
 			inodes);
 
 	/* Set the maximum inode count for this filesystem. */
-	if (sbp->sb_imax_pct) {
+	if (sbp->sb_imax_pct && igeo->ialloc_blks) {
 		/*
 		 * Make sure the maximum inode count is a multiple
 		 * of the units we allocate inodes in.


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/4] xfs: don't return _QUERY_ABORT from xfs_rmap_has_other_keys
  2019-08-26 21:48 [PATCH 0/4] xfs: fixes for 5.4 Darrick J. Wong
  2019-08-26 21:48 ` [PATCH 1/4] xfs: bmap scrub should only scrub records once Darrick J. Wong
  2019-08-26 21:48 ` [PATCH 2/4] xfs: fix maxicount division by zero error Darrick J. Wong
@ 2019-08-26 21:49 ` Darrick J. Wong
  2019-08-26 23:15   ` Dave Chinner
  2019-08-26 21:49 ` [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys Darrick J. Wong
  3 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:49 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

The xfs_rmap_has_other_keys helper aborts the iteration as soon as it
has an answer.  Don't let this abort leak out to callers.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_rmap.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)


diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index e6aeb390b2fb..819693ef852c 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2540,8 +2540,11 @@ xfs_rmap_has_other_keys(
 
 	error = xfs_rmap_query_range(cur, &low, &high,
 			xfs_rmap_has_other_keys_helper, &rks);
+	if (error < 0)
+		return error;
+
 	*has_rmap = rks.has_rmap;
-	return error;
+	return 0;
 }
 
 const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys
  2019-08-26 21:48 [PATCH 0/4] xfs: fixes for 5.4 Darrick J. Wong
                   ` (2 preceding siblings ...)
  2019-08-26 21:49 ` [PATCH 3/4] xfs: don't return _QUERY_ABORT from xfs_rmap_has_other_keys Darrick J. Wong
@ 2019-08-26 21:49 ` Darrick J. Wong
  2019-08-26 23:15   ` Dave Chinner
  3 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:49 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

In xfs_bmbt_diff_two_keys, we perform a signed int64_t subtraction with
two unsigned 64-bit quantities.  If the second quantity is actually the
"maximum" key (all ones) as used in _query_all, the subtraction
effectively becomes addition of two positive numbers and the function
returns incorrect results.  Fix this with explicit comparisons of the
unsigned values.  Nobody needs this now, but the online repair patches
will need this to work properly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap_btree.c |   16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index fbb18ba5d905..3c1a805b3775 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys(
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
-			  be64_to_cpu(k2->bmbt.br_startoff);
+	uint64_t		a = be64_to_cpu(k1->bmbt.br_startoff);
+	uint64_t		b = be64_to_cpu(k2->bmbt.br_startoff);
+
+	/*
+	 * Note: This routine previously casted a and b to int64 and subtracted
+	 * them to generate a result.  This lead to problems if b was the
+	 * "maximum" key value (all ones) being signed incorrectly, hence this
+	 * somewhat less efficient version.
+	 */
+	if (a > b)
+		return 1;
+	else if (b > a)
+		return -1;
+	return 0;
 }
 
 static xfs_failaddr_t


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] xfs: bmap scrub should only scrub records once
  2019-08-26 21:48 ` [PATCH 1/4] xfs: bmap scrub should only scrub records once Darrick J. Wong
@ 2019-08-26 23:08   ` Dave Chinner
  2019-08-27 13:14   ` Brian Foster
  1 sibling, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2019-08-26 23:08 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, bfoster

On Mon, Aug 26, 2019 at 02:48:49PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The inode block mapping scrub function does more work for btree format
> extent maps than is absolutely necessary -- first it will walk the bmbt
> and check all the entries, and then it will load the incore tree and
> check every entry in that tree, possibly for a second time.
> 
> Simplify the code and decrease check runtime by separating the two
> responsibilities.  The bmbt walk will make sure the incore extent
> mappings are loaded, check the shape of the bmap btree (via xchk_btree)
> and check that every bmbt record has a corresponding incore extent map;
> and the incore extent map walk takes all the responsibility for checking
> the mapping records and cross referencing them with other AG metadata.
> 
> This enables us to clean up some messy parameter handling and reduce
> redundant code.  Rename a few functions to make the split of
> responsibilities clearer.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks fine.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] xfs: fix maxicount division by zero error
  2019-08-26 21:48 ` [PATCH 2/4] xfs: fix maxicount division by zero error Darrick J. Wong
@ 2019-08-26 23:09   ` Dave Chinner
  0 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2019-08-26 23:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, bfoster

On Mon, Aug 26, 2019 at 02:48:56PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> In xfs_ialloc_setup_geometry, it's possible for a malicious/corrupt fs
> image to set an unreasonably large value for sb_inopblog which will
> cause ialloc_blks to be zero.  If sb_imax_pct is also set, this results
> in a division by zero error in the second do_div call.  Therefore, force
> maxicount to zero if ialloc_blks is zero.
> 
> Note that the kernel metadata verifiers will catch the garbage inopblog
> value and abort the fs mount long before it tries to set up the inode
> geometry; this is needed to avoid a crash in xfs_db while setting up the
> xfs_mount structure.
> 
> Found by fuzzing sb_inopblog to 122 in xfs/350.

Harmless for the kernel, makes sense for shared code.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] xfs: don't return _QUERY_ABORT from xfs_rmap_has_other_keys
  2019-08-26 21:49 ` [PATCH 3/4] xfs: don't return _QUERY_ABORT from xfs_rmap_has_other_keys Darrick J. Wong
@ 2019-08-26 23:15   ` Dave Chinner
  0 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2019-08-26 23:15 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, bfoster

On Mon, Aug 26, 2019 at 02:49:02PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The xfs_rmap_has_other_keys helper aborts the iteration as soon as it
> has an answer.  Don't let this abort leak out to callers.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_rmap.c |    5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
> index e6aeb390b2fb..819693ef852c 100644
> --- a/fs/xfs/libxfs/xfs_rmap.c
> +++ b/fs/xfs/libxfs/xfs_rmap.c
> @@ -2540,8 +2540,11 @@ xfs_rmap_has_other_keys(
>  
>  	error = xfs_rmap_query_range(cur, &low, &high,
>  			xfs_rmap_has_other_keys_helper, &rks);
> +	if (error < 0)
> +		return error;
> +
>  	*has_rmap = rks.has_rmap;
> -	return error;
> +	return 0;
>  }

Yup, makes sense - XFS_BTREE_QUERY_RANGE_ABORT has a positive value,
so this makes success have a return value of 0 and that fixes
the spurious error detection in xrep_reap_block().

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys
  2019-08-26 21:49 ` [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys Darrick J. Wong
@ 2019-08-26 23:15   ` Dave Chinner
  2019-08-27 13:01     ` Eric Sandeen
  0 siblings, 1 reply; 16+ messages in thread
From: Dave Chinner @ 2019-08-26 23:15 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, bfoster

On Mon, Aug 26, 2019 at 02:49:09PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> In xfs_bmbt_diff_two_keys, we perform a signed int64_t subtraction with
> two unsigned 64-bit quantities.  If the second quantity is actually the
> "maximum" key (all ones) as used in _query_all, the subtraction
> effectively becomes addition of two positive numbers and the function
> returns incorrect results.  Fix this with explicit comparisons of the
> unsigned values.  Nobody needs this now, but the online repair patches
> will need this to work properly.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_bmap_btree.c |   16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
> index fbb18ba5d905..3c1a805b3775 100644
> --- a/fs/xfs/libxfs/xfs_bmap_btree.c
> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
> @@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys(
>  	union xfs_btree_key	*k1,
>  	union xfs_btree_key	*k2)
>  {
> -	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
> -			  be64_to_cpu(k2->bmbt.br_startoff);
> +	uint64_t		a = be64_to_cpu(k1->bmbt.br_startoff);
> +	uint64_t		b = be64_to_cpu(k2->bmbt.br_startoff);
> +
> +	/*
> +	 * Note: This routine previously casted a and b to int64 and subtracted
> +	 * them to generate a result.  This lead to problems if b was the
> +	 * "maximum" key value (all ones) being signed incorrectly, hence this
> +	 * somewhat less efficient version.
> +	 */
> +	if (a > b)
> +		return 1;
> +	else if (b > a)
> +		return -1;

No need for an else here, but otherwise OK.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys
  2019-08-26 23:15   ` Dave Chinner
@ 2019-08-27 13:01     ` Eric Sandeen
  2019-08-27 15:19       ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Sandeen @ 2019-08-27 13:01 UTC (permalink / raw)
  To: Dave Chinner, Darrick J. Wong; +Cc: linux-xfs, bfoster

On 8/26/19 6:15 PM, Dave Chinner wrote:
> On Mon, Aug 26, 2019 at 02:49:09PM -0700, Darrick J. Wong wrote:
>> From: Darrick J. Wong <darrick.wong@oracle.com>
>>
>> In xfs_bmbt_diff_two_keys, we perform a signed int64_t subtraction with
>> two unsigned 64-bit quantities.  If the second quantity is actually the
>> "maximum" key (all ones) as used in _query_all, the subtraction
>> effectively becomes addition of two positive numbers and the function
>> returns incorrect results.  Fix this with explicit comparisons of the
>> unsigned values.  Nobody needs this now, but the online repair patches
>> will need this to work properly.
>>
>> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
>> ---
>>  fs/xfs/libxfs/xfs_bmap_btree.c |   16 ++++++++++++++--
>>  1 file changed, 14 insertions(+), 2 deletions(-)
>>
>>
>> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
>> index fbb18ba5d905..3c1a805b3775 100644
>> --- a/fs/xfs/libxfs/xfs_bmap_btree.c
>> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
>> @@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys(
>>  	union xfs_btree_key	*k1,
>>  	union xfs_btree_key	*k2)
>>  {
>> -	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
>> -			  be64_to_cpu(k2->bmbt.br_startoff);
>> +	uint64_t		a = be64_to_cpu(k1->bmbt.br_startoff);
>> +	uint64_t		b = be64_to_cpu(k2->bmbt.br_startoff);
>> +
>> +	/*
>> +	 * Note: This routine previously casted a and b to int64 and subtracted
>> +	 * them to generate a result.  This lead to problems if b was the
>> +	 * "maximum" key value (all ones) being signed incorrectly, hence this
>> +	 * somewhat less efficient version.
>> +	 */
>> +	if (a > b)
>> +		return 1;
>> +	else if (b > a)
>> +		return -1;
> 
> No need for an else here, but otherwise OK.
> 
> Reviewed-by: Dave Chinner <dchinner@redhat.com>

In fact having the else means the a == b case isn't handled, even if it
should never happen, so might a static checker eventually complain about
reaching the end of a non-void function?

-Eric


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] xfs: bmap scrub should only scrub records once
  2019-08-26 21:48 ` [PATCH 1/4] xfs: bmap scrub should only scrub records once Darrick J. Wong
  2019-08-26 23:08   ` Dave Chinner
@ 2019-08-27 13:14   ` Brian Foster
  2019-08-27 15:18     ` Darrick J. Wong
  1 sibling, 1 reply; 16+ messages in thread
From: Brian Foster @ 2019-08-27 13:14 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, Aug 26, 2019 at 02:48:49PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The inode block mapping scrub function does more work for btree format
> extent maps than is absolutely necessary -- first it will walk the bmbt
> and check all the entries, and then it will load the incore tree and
> check every entry in that tree, possibly for a second time.
> 
> Simplify the code and decrease check runtime by separating the two
> responsibilities.  The bmbt walk will make sure the incore extent
> mappings are loaded, check the shape of the bmap btree (via xchk_btree)
> and check that every bmbt record has a corresponding incore extent map;
> and the incore extent map walk takes all the responsibility for checking
> the mapping records and cross referencing them with other AG metadata.
> 
> This enables us to clean up some messy parameter handling and reduce
> redundant code.  Rename a few functions to make the split of
> responsibilities clearer.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/scrub/bmap.c |   76 ++++++++++++++++++++++++++++++---------------------
>  1 file changed, 45 insertions(+), 31 deletions(-)
> 
> 
> diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
> index 1bd29fdc2ab5..f6ed6eb133a6 100644
> --- a/fs/xfs/scrub/bmap.c
> +++ b/fs/xfs/scrub/bmap.c
...
> @@ -402,9 +396,25 @@ xchk_bmapbt_rec(
>  		}
>  	}
>  
> -	/* Set up the in-core record and scrub it. */
> +	/*
> +	 * Check that the incore extent tree contains an extent that matches
> +	 * this one exactly.  We validate those cached bmaps later, so we don't
> +	 * need to check them here.  If the extent tree was freshly loaded by
> +	 * the scrubber then we skip the check entirely.
> +	 */
> +	if (info->was_loaded)
> +		return 0;
> +

This all looks fine to me except that I don't follow the reasoning for
skipping the lookup from the comment. Are we just saying that if we
loaded the extent tree, then we can assume it reflects what is on disk?
If so, can we fix up the last sentence in the comment to explain?

Brian

>  	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
> -	return xchk_bmap_extent(ip, bs->cur, info, &irec);
> +	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
> +				&iext_irec) ||
> +	    irec.br_startoff != iext_irec.br_startoff ||
> +	    irec.br_startblock != iext_irec.br_startblock ||
> +	    irec.br_blockcount != iext_irec.br_blockcount ||
> +	    irec.br_state != iext_irec.br_state)
> +		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
> +				irec.br_startoff);
> +	return 0;
>  }
>  
>  /* Scan the btree records. */
> @@ -415,15 +425,26 @@ xchk_bmap_btree(
>  	struct xchk_bmap_info	*info)
>  {
>  	struct xfs_owner_info	oinfo;
> +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, whichfork);
>  	struct xfs_mount	*mp = sc->mp;
>  	struct xfs_inode	*ip = sc->ip;
>  	struct xfs_btree_cur	*cur;
>  	int			error;
>  
> +	/* Load the incore bmap cache if it's not loaded. */
> +	info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
> +	if (!info->was_loaded) {
> +		error = xfs_iread_extents(sc->tp, ip, whichfork);
> +		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> +			goto out;
> +	}
> +
> +	/* Check the btree structure. */
>  	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
>  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
>  	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
>  	xfs_btree_del_cursor(cur, error);
> +out:
>  	return error;
>  }
>  
> @@ -671,13 +692,6 @@ xchk_bmap(
>  	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
>  		goto out;
>  
> -	/* Now try to scrub the in-memory extent list. */
> -        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> -		error = xfs_iread_extents(sc->tp, ip, whichfork);
> -		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> -			goto out;
> -	}
> -
>  	/* Find the offset of the last extent in the mapping. */
>  	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
>  	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> @@ -689,7 +703,7 @@ xchk_bmap(
>  	for_each_xfs_iext(ifp, &icur, &irec) {
>  		if (xchk_should_terminate(sc, &error) ||
>  		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
> -			break;
> +			goto out;
>  		if (isnullstartblock(irec.br_startblock))
>  			continue;
>  		if (irec.br_startoff >= endoff) {
> @@ -697,7 +711,7 @@ xchk_bmap(
>  					irec.br_startoff);
>  			goto out;
>  		}
> -		error = xchk_bmap_extent(ip, NULL, &info, &irec);
> +		error = xchk_bmap_iextent(ip, &info, &irec);
>  		if (error)
>  			goto out;
>  	}
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] xfs: bmap scrub should only scrub records once
  2019-08-27 13:14   ` Brian Foster
@ 2019-08-27 15:18     ` Darrick J. Wong
  2019-08-27 15:21       ` Brian Foster
  0 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-27 15:18 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Tue, Aug 27, 2019 at 09:14:03AM -0400, Brian Foster wrote:
> On Mon, Aug 26, 2019 at 02:48:49PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > The inode block mapping scrub function does more work for btree format
> > extent maps than is absolutely necessary -- first it will walk the bmbt
> > and check all the entries, and then it will load the incore tree and
> > check every entry in that tree, possibly for a second time.
> > 
> > Simplify the code and decrease check runtime by separating the two
> > responsibilities.  The bmbt walk will make sure the incore extent
> > mappings are loaded, check the shape of the bmap btree (via xchk_btree)
> > and check that every bmbt record has a corresponding incore extent map;
> > and the incore extent map walk takes all the responsibility for checking
> > the mapping records and cross referencing them with other AG metadata.
> > 
> > This enables us to clean up some messy parameter handling and reduce
> > redundant code.  Rename a few functions to make the split of
> > responsibilities clearer.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/scrub/bmap.c |   76 ++++++++++++++++++++++++++++++---------------------
> >  1 file changed, 45 insertions(+), 31 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
> > index 1bd29fdc2ab5..f6ed6eb133a6 100644
> > --- a/fs/xfs/scrub/bmap.c
> > +++ b/fs/xfs/scrub/bmap.c
> ...
> > @@ -402,9 +396,25 @@ xchk_bmapbt_rec(
> >  		}
> >  	}
> >  
> > -	/* Set up the in-core record and scrub it. */
> > +	/*
> > +	 * Check that the incore extent tree contains an extent that matches
> > +	 * this one exactly.  We validate those cached bmaps later, so we don't
> > +	 * need to check them here.  If the extent tree was freshly loaded by
> > +	 * the scrubber then we skip the check entirely.
> > +	 */
> > +	if (info->was_loaded)
> > +		return 0;
> > +
> 
> This all looks fine to me except that I don't follow the reasoning for
> skipping the lookup from the comment. Are we just saying that if we
> loaded the extent tree, then we can assume it reflects what is on disk?
> If so, can we fix up the last sentence in the comment to explain?

Yes.

"If the incore extent tree was just loaded from disk by the scrubber,
we assume that its contents match what's on disk and skip the
equivalence check." ?

--D

> Brian
> 
> >  	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
> > -	return xchk_bmap_extent(ip, bs->cur, info, &irec);
> > +	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
> > +				&iext_irec) ||
> > +	    irec.br_startoff != iext_irec.br_startoff ||
> > +	    irec.br_startblock != iext_irec.br_startblock ||
> > +	    irec.br_blockcount != iext_irec.br_blockcount ||
> > +	    irec.br_state != iext_irec.br_state)
> > +		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
> > +				irec.br_startoff);
> > +	return 0;
> >  }
> >  
> >  /* Scan the btree records. */
> > @@ -415,15 +425,26 @@ xchk_bmap_btree(
> >  	struct xchk_bmap_info	*info)
> >  {
> >  	struct xfs_owner_info	oinfo;
> > +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, whichfork);
> >  	struct xfs_mount	*mp = sc->mp;
> >  	struct xfs_inode	*ip = sc->ip;
> >  	struct xfs_btree_cur	*cur;
> >  	int			error;
> >  
> > +	/* Load the incore bmap cache if it's not loaded. */
> > +	info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
> > +	if (!info->was_loaded) {
> > +		error = xfs_iread_extents(sc->tp, ip, whichfork);
> > +		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > +			goto out;
> > +	}
> > +
> > +	/* Check the btree structure. */
> >  	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
> >  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
> >  	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
> >  	xfs_btree_del_cursor(cur, error);
> > +out:
> >  	return error;
> >  }
> >  
> > @@ -671,13 +692,6 @@ xchk_bmap(
> >  	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
> >  		goto out;
> >  
> > -	/* Now try to scrub the in-memory extent list. */
> > -        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > -		error = xfs_iread_extents(sc->tp, ip, whichfork);
> > -		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > -			goto out;
> > -	}
> > -
> >  	/* Find the offset of the last extent in the mapping. */
> >  	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
> >  	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > @@ -689,7 +703,7 @@ xchk_bmap(
> >  	for_each_xfs_iext(ifp, &icur, &irec) {
> >  		if (xchk_should_terminate(sc, &error) ||
> >  		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
> > -			break;
> > +			goto out;
> >  		if (isnullstartblock(irec.br_startblock))
> >  			continue;
> >  		if (irec.br_startoff >= endoff) {
> > @@ -697,7 +711,7 @@ xchk_bmap(
> >  					irec.br_startoff);
> >  			goto out;
> >  		}
> > -		error = xchk_bmap_extent(ip, NULL, &info, &irec);
> > +		error = xchk_bmap_iextent(ip, &info, &irec);
> >  		if (error)
> >  			goto out;
> >  	}
> > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys
  2019-08-27 13:01     ` Eric Sandeen
@ 2019-08-27 15:19       ` Darrick J. Wong
  2019-08-27 15:20         ` Eric Sandeen
  0 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-27 15:19 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Dave Chinner, linux-xfs, bfoster

On Tue, Aug 27, 2019 at 08:01:16AM -0500, Eric Sandeen wrote:
> On 8/26/19 6:15 PM, Dave Chinner wrote:
> > On Mon, Aug 26, 2019 at 02:49:09PM -0700, Darrick J. Wong wrote:
> >> From: Darrick J. Wong <darrick.wong@oracle.com>
> >>
> >> In xfs_bmbt_diff_two_keys, we perform a signed int64_t subtraction with
> >> two unsigned 64-bit quantities.  If the second quantity is actually the
> >> "maximum" key (all ones) as used in _query_all, the subtraction
> >> effectively becomes addition of two positive numbers and the function
> >> returns incorrect results.  Fix this with explicit comparisons of the
> >> unsigned values.  Nobody needs this now, but the online repair patches
> >> will need this to work properly.
> >>
> >> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> >> ---
> >>  fs/xfs/libxfs/xfs_bmap_btree.c |   16 ++++++++++++++--
> >>  1 file changed, 14 insertions(+), 2 deletions(-)
> >>
> >>
> >> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
> >> index fbb18ba5d905..3c1a805b3775 100644
> >> --- a/fs/xfs/libxfs/xfs_bmap_btree.c
> >> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
> >> @@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys(
> >>  	union xfs_btree_key	*k1,
> >>  	union xfs_btree_key	*k2)
> >>  {
> >> -	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
> >> -			  be64_to_cpu(k2->bmbt.br_startoff);
> >> +	uint64_t		a = be64_to_cpu(k1->bmbt.br_startoff);
> >> +	uint64_t		b = be64_to_cpu(k2->bmbt.br_startoff);
> >> +
> >> +	/*
> >> +	 * Note: This routine previously casted a and b to int64 and subtracted
> >> +	 * them to generate a result.  This lead to problems if b was the
> >> +	 * "maximum" key value (all ones) being signed incorrectly, hence this
> >> +	 * somewhat less efficient version.
> >> +	 */
> >> +	if (a > b)
> >> +		return 1;
> >> +	else if (b > a)
> >> +		return -1;
> > 
> > No need for an else here, but otherwise OK.
> > 
> > Reviewed-by: Dave Chinner <dchinner@redhat.com>
> 
> In fact having the else means the a == b case isn't handled, even if it
> should never happen, so might a static checker eventually complain about
> reaching the end of a non-void function?

Hmm?  There's a return 0 after that which Dave's reply clipped.

--D

> -Eric
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys
  2019-08-27 15:19       ` Darrick J. Wong
@ 2019-08-27 15:20         ` Eric Sandeen
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Sandeen @ 2019-08-27 15:20 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Dave Chinner, linux-xfs, bfoster

On 8/27/19 10:19 AM, Darrick J. Wong wrote:
> On Tue, Aug 27, 2019 at 08:01:16AM -0500, Eric Sandeen wrote:
>> On 8/26/19 6:15 PM, Dave Chinner wrote:
>>> On Mon, Aug 26, 2019 at 02:49:09PM -0700, Darrick J. Wong wrote:
>>>> From: Darrick J. Wong <darrick.wong@oracle.com>
>>>>
>>>> In xfs_bmbt_diff_two_keys, we perform a signed int64_t subtraction with
>>>> two unsigned 64-bit quantities.  If the second quantity is actually the
>>>> "maximum" key (all ones) as used in _query_all, the subtraction
>>>> effectively becomes addition of two positive numbers and the function
>>>> returns incorrect results.  Fix this with explicit comparisons of the
>>>> unsigned values.  Nobody needs this now, but the online repair patches
>>>> will need this to work properly.
>>>>
>>>> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
>>>> ---
>>>>  fs/xfs/libxfs/xfs_bmap_btree.c |   16 ++++++++++++++--
>>>>  1 file changed, 14 insertions(+), 2 deletions(-)
>>>>
>>>>
>>>> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
>>>> index fbb18ba5d905..3c1a805b3775 100644
>>>> --- a/fs/xfs/libxfs/xfs_bmap_btree.c
>>>> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
>>>> @@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys(
>>>>  	union xfs_btree_key	*k1,
>>>>  	union xfs_btree_key	*k2)
>>>>  {
>>>> -	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
>>>> -			  be64_to_cpu(k2->bmbt.br_startoff);
>>>> +	uint64_t		a = be64_to_cpu(k1->bmbt.br_startoff);
>>>> +	uint64_t		b = be64_to_cpu(k2->bmbt.br_startoff);
>>>> +
>>>> +	/*
>>>> +	 * Note: This routine previously casted a and b to int64 and subtracted
>>>> +	 * them to generate a result.  This lead to problems if b was the
>>>> +	 * "maximum" key value (all ones) being signed incorrectly, hence this
>>>> +	 * somewhat less efficient version.
>>>> +	 */
>>>> +	if (a > b)
>>>> +		return 1;
>>>> +	else if (b > a)
>>>> +		return -1;
>>>
>>> No need for an else here, but otherwise OK.
>>>
>>> Reviewed-by: Dave Chinner <dchinner@redhat.com>
>>
>> In fact having the else means the a == b case isn't handled, even if it
>> should never happen, so might a static checker eventually complain about
>> reaching the end of a non-void function?
> 
> Hmm?  There's a return 0 after that which Dave's reply clipped.

Oh sorry, patch/thread reading skills lacking, too early in the AM.

-Eric

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] xfs: bmap scrub should only scrub records once
  2019-08-27 15:18     ` Darrick J. Wong
@ 2019-08-27 15:21       ` Brian Foster
  2019-08-28 16:01         ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2019-08-27 15:21 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tue, Aug 27, 2019 at 08:18:12AM -0700, Darrick J. Wong wrote:
> On Tue, Aug 27, 2019 at 09:14:03AM -0400, Brian Foster wrote:
> > On Mon, Aug 26, 2019 at 02:48:49PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > The inode block mapping scrub function does more work for btree format
> > > extent maps than is absolutely necessary -- first it will walk the bmbt
> > > and check all the entries, and then it will load the incore tree and
> > > check every entry in that tree, possibly for a second time.
> > > 
> > > Simplify the code and decrease check runtime by separating the two
> > > responsibilities.  The bmbt walk will make sure the incore extent
> > > mappings are loaded, check the shape of the bmap btree (via xchk_btree)
> > > and check that every bmbt record has a corresponding incore extent map;
> > > and the incore extent map walk takes all the responsibility for checking
> > > the mapping records and cross referencing them with other AG metadata.
> > > 
> > > This enables us to clean up some messy parameter handling and reduce
> > > redundant code.  Rename a few functions to make the split of
> > > responsibilities clearer.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  fs/xfs/scrub/bmap.c |   76 ++++++++++++++++++++++++++++++---------------------
> > >  1 file changed, 45 insertions(+), 31 deletions(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
> > > index 1bd29fdc2ab5..f6ed6eb133a6 100644
> > > --- a/fs/xfs/scrub/bmap.c
> > > +++ b/fs/xfs/scrub/bmap.c
> > ...
> > > @@ -402,9 +396,25 @@ xchk_bmapbt_rec(
> > >  		}
> > >  	}
> > >  
> > > -	/* Set up the in-core record and scrub it. */
> > > +	/*
> > > +	 * Check that the incore extent tree contains an extent that matches
> > > +	 * this one exactly.  We validate those cached bmaps later, so we don't
> > > +	 * need to check them here.  If the extent tree was freshly loaded by
> > > +	 * the scrubber then we skip the check entirely.
> > > +	 */
> > > +	if (info->was_loaded)
> > > +		return 0;
> > > +
> > 
> > This all looks fine to me except that I don't follow the reasoning for
> > skipping the lookup from the comment. Are we just saying that if we
> > loaded the extent tree, then we can assume it reflects what is on disk?
> > If so, can we fix up the last sentence in the comment to explain?
> 
> Yes.
> 

Ok.

> "If the incore extent tree was just loaded from disk by the scrubber,
> we assume that its contents match what's on disk and skip the
> equivalence check." ?
> 

Sounds good. With that tweak:

Reviewed-by: Brian Foster <bfoster@redhat.com>

> --D
> 
> > Brian
> > 
> > >  	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
> > > -	return xchk_bmap_extent(ip, bs->cur, info, &irec);
> > > +	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
> > > +				&iext_irec) ||
> > > +	    irec.br_startoff != iext_irec.br_startoff ||
> > > +	    irec.br_startblock != iext_irec.br_startblock ||
> > > +	    irec.br_blockcount != iext_irec.br_blockcount ||
> > > +	    irec.br_state != iext_irec.br_state)
> > > +		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
> > > +				irec.br_startoff);
> > > +	return 0;
> > >  }
> > >  
> > >  /* Scan the btree records. */
> > > @@ -415,15 +425,26 @@ xchk_bmap_btree(
> > >  	struct xchk_bmap_info	*info)
> > >  {
> > >  	struct xfs_owner_info	oinfo;
> > > +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, whichfork);
> > >  	struct xfs_mount	*mp = sc->mp;
> > >  	struct xfs_inode	*ip = sc->ip;
> > >  	struct xfs_btree_cur	*cur;
> > >  	int			error;
> > >  
> > > +	/* Load the incore bmap cache if it's not loaded. */
> > > +	info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
> > > +	if (!info->was_loaded) {
> > > +		error = xfs_iread_extents(sc->tp, ip, whichfork);
> > > +		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > > +			goto out;
> > > +	}
> > > +
> > > +	/* Check the btree structure. */
> > >  	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
> > >  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
> > >  	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
> > >  	xfs_btree_del_cursor(cur, error);
> > > +out:
> > >  	return error;
> > >  }
> > >  
> > > @@ -671,13 +692,6 @@ xchk_bmap(
> > >  	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
> > >  		goto out;
> > >  
> > > -	/* Now try to scrub the in-memory extent list. */
> > > -        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > > -		error = xfs_iread_extents(sc->tp, ip, whichfork);
> > > -		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > > -			goto out;
> > > -	}
> > > -
> > >  	/* Find the offset of the last extent in the mapping. */
> > >  	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
> > >  	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > > @@ -689,7 +703,7 @@ xchk_bmap(
> > >  	for_each_xfs_iext(ifp, &icur, &irec) {
> > >  		if (xchk_should_terminate(sc, &error) ||
> > >  		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
> > > -			break;
> > > +			goto out;
> > >  		if (isnullstartblock(irec.br_startblock))
> > >  			continue;
> > >  		if (irec.br_startoff >= endoff) {
> > > @@ -697,7 +711,7 @@ xchk_bmap(
> > >  					irec.br_startoff);
> > >  			goto out;
> > >  		}
> > > -		error = xchk_bmap_extent(ip, NULL, &info, &irec);
> > > +		error = xchk_bmap_iextent(ip, &info, &irec);
> > >  		if (error)
> > >  			goto out;
> > >  	}
> > > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] xfs: bmap scrub should only scrub records once
  2019-08-27 15:21       ` Brian Foster
@ 2019-08-28 16:01         ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2019-08-28 16:01 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Tue, Aug 27, 2019 at 11:21:47AM -0400, Brian Foster wrote:
> On Tue, Aug 27, 2019 at 08:18:12AM -0700, Darrick J. Wong wrote:
> > On Tue, Aug 27, 2019 at 09:14:03AM -0400, Brian Foster wrote:
> > > On Mon, Aug 26, 2019 at 02:48:49PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > The inode block mapping scrub function does more work for btree format
> > > > extent maps than is absolutely necessary -- first it will walk the bmbt
> > > > and check all the entries, and then it will load the incore tree and
> > > > check every entry in that tree, possibly for a second time.
> > > > 
> > > > Simplify the code and decrease check runtime by separating the two
> > > > responsibilities.  The bmbt walk will make sure the incore extent
> > > > mappings are loaded, check the shape of the bmap btree (via xchk_btree)
> > > > and check that every bmbt record has a corresponding incore extent map;
> > > > and the incore extent map walk takes all the responsibility for checking
> > > > the mapping records and cross referencing them with other AG metadata.
> > > > 
> > > > This enables us to clean up some messy parameter handling and reduce
> > > > redundant code.  Rename a few functions to make the split of
> > > > responsibilities clearer.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  fs/xfs/scrub/bmap.c |   76 ++++++++++++++++++++++++++++++---------------------
> > > >  1 file changed, 45 insertions(+), 31 deletions(-)
> > > > 
> > > > 
> > > > diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
> > > > index 1bd29fdc2ab5..f6ed6eb133a6 100644
> > > > --- a/fs/xfs/scrub/bmap.c
> > > > +++ b/fs/xfs/scrub/bmap.c
> > > ...
> > > > @@ -402,9 +396,25 @@ xchk_bmapbt_rec(
> > > >  		}
> > > >  	}
> > > >  
> > > > -	/* Set up the in-core record and scrub it. */
> > > > +	/*
> > > > +	 * Check that the incore extent tree contains an extent that matches
> > > > +	 * this one exactly.  We validate those cached bmaps later, so we don't
> > > > +	 * need to check them here.  If the extent tree was freshly loaded by
> > > > +	 * the scrubber then we skip the check entirely.
> > > > +	 */
> > > > +	if (info->was_loaded)

Heh, I didn't even spot the backwards logic here.

If the bmap scrub just loaded the extent tree then was_loaded will be
false, so this needs to be "if (!info->was_loaded) return 0;".

I fixed it before I sent it to for-next.

--D

> > > > +		return 0;
> > > > +
> > > 
> > > This all looks fine to me except that I don't follow the reasoning for
> > > skipping the lookup from the comment. Are we just saying that if we
> > > loaded the extent tree, then we can assume it reflects what is on disk?
> > > If so, can we fix up the last sentence in the comment to explain?
> > 
> > Yes.
> > 
> 
> Ok.
> 
> > "If the incore extent tree was just loaded from disk by the scrubber,
> > we assume that its contents match what's on disk and skip the
> > equivalence check." ?
> > 
> 
> Sounds good. With that tweak:
> 
> Reviewed-by: Brian Foster <bfoster@redhat.com>
> 
> > --D
> > 
> > > Brian
> > > 
> > > >  	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
> > > > -	return xchk_bmap_extent(ip, bs->cur, info, &irec);
> > > > +	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
> > > > +				&iext_irec) ||
> > > > +	    irec.br_startoff != iext_irec.br_startoff ||
> > > > +	    irec.br_startblock != iext_irec.br_startblock ||
> > > > +	    irec.br_blockcount != iext_irec.br_blockcount ||
> > > > +	    irec.br_state != iext_irec.br_state)
> > > > +		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
> > > > +				irec.br_startoff);
> > > > +	return 0;
> > > >  }
> > > >  
> > > >  /* Scan the btree records. */
> > > > @@ -415,15 +425,26 @@ xchk_bmap_btree(
> > > >  	struct xchk_bmap_info	*info)
> > > >  {
> > > >  	struct xfs_owner_info	oinfo;
> > > > +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, whichfork);
> > > >  	struct xfs_mount	*mp = sc->mp;
> > > >  	struct xfs_inode	*ip = sc->ip;
> > > >  	struct xfs_btree_cur	*cur;
> > > >  	int			error;
> > > >  
> > > > +	/* Load the incore bmap cache if it's not loaded. */
> > > > +	info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
> > > > +	if (!info->was_loaded) {
> > > > +		error = xfs_iread_extents(sc->tp, ip, whichfork);
> > > > +		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > > > +			goto out;
> > > > +	}
> > > > +
> > > > +	/* Check the btree structure. */
> > > >  	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
> > > >  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
> > > >  	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
> > > >  	xfs_btree_del_cursor(cur, error);
> > > > +out:
> > > >  	return error;
> > > >  }
> > > >  
> > > > @@ -671,13 +692,6 @@ xchk_bmap(
> > > >  	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
> > > >  		goto out;
> > > >  
> > > > -	/* Now try to scrub the in-memory extent list. */
> > > > -        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > > > -		error = xfs_iread_extents(sc->tp, ip, whichfork);
> > > > -		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > > > -			goto out;
> > > > -	}
> > > > -
> > > >  	/* Find the offset of the last extent in the mapping. */
> > > >  	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
> > > >  	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
> > > > @@ -689,7 +703,7 @@ xchk_bmap(
> > > >  	for_each_xfs_iext(ifp, &icur, &irec) {
> > > >  		if (xchk_should_terminate(sc, &error) ||
> > > >  		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
> > > > -			break;
> > > > +			goto out;
> > > >  		if (isnullstartblock(irec.br_startblock))
> > > >  			continue;
> > > >  		if (irec.br_startoff >= endoff) {
> > > > @@ -697,7 +711,7 @@ xchk_bmap(
> > > >  					irec.br_startoff);
> > > >  			goto out;
> > > >  		}
> > > > -		error = xchk_bmap_extent(ip, NULL, &info, &irec);
> > > > +		error = xchk_bmap_iextent(ip, &info, &irec);
> > > >  		if (error)
> > > >  			goto out;
> > > >  	}
> > > > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-08-28 16:03 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-26 21:48 [PATCH 0/4] xfs: fixes for 5.4 Darrick J. Wong
2019-08-26 21:48 ` [PATCH 1/4] xfs: bmap scrub should only scrub records once Darrick J. Wong
2019-08-26 23:08   ` Dave Chinner
2019-08-27 13:14   ` Brian Foster
2019-08-27 15:18     ` Darrick J. Wong
2019-08-27 15:21       ` Brian Foster
2019-08-28 16:01         ` Darrick J. Wong
2019-08-26 21:48 ` [PATCH 2/4] xfs: fix maxicount division by zero error Darrick J. Wong
2019-08-26 23:09   ` Dave Chinner
2019-08-26 21:49 ` [PATCH 3/4] xfs: don't return _QUERY_ABORT from xfs_rmap_has_other_keys Darrick J. Wong
2019-08-26 23:15   ` Dave Chinner
2019-08-26 21:49 ` [PATCH 4/4] xfs: fix sign handling problem in xfs_bmbt_diff_two_keys Darrick J. Wong
2019-08-26 23:15   ` Dave Chinner
2019-08-27 13:01     ` Eric Sandeen
2019-08-27 15:19       ` Darrick J. Wong
2019-08-27 15:20         ` Eric Sandeen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).