linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHSET v2 0/2] xfs: add the ability to flag a fs for repair
@ 2021-01-16  1:24 Darrick J. Wong
  2021-01-16  1:24 ` [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command Darrick J. Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-16  1:24 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs

Hi all,

This series adds a new incompat feature flag so that we can force a
sysadmin to run xfs_repair on a filesystem before mounting.  This will
be used in conjunction with v5 feature upgrades.

v2: tweak the "can't upgrade" behavior and repair messages

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=needsrepair

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=needsrepair

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=needsrepair
---
 db/check.c           |    5 ++
 db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
 db/xfs_admin.sh      |   10 ++-
 man/man8/xfs_admin.8 |   15 +++++
 man/man8/xfs_db.8    |    5 ++
 repair/agheader.c    |   15 +++++
 6 files changed, 193 insertions(+), 10 deletions(-)


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command
  2021-01-16  1:24 [PATCHSET v2 0/2] xfs: add the ability to flag a fs for repair Darrick J. Wong
@ 2021-01-16  1:24 ` Darrick J. Wong
  2021-01-19 14:37   ` Brian Foster
  2021-01-16  1:24 ` [PATCH 2/2] xfs_repair: clear the needsrepair flag Darrick J. Wong
  2021-01-20  4:31 ` [PATCH 3/2] xfs_repair: fix unmount error message to have a newline Darrick J. Wong
  2 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-16  1:24 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Teach the xfs_db version command about the 'needsrepair' flag, which can
be used to force the system administrator to repair the filesystem with
xfs_repair.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 db/check.c           |    5 ++
 db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
 db/xfs_admin.sh      |   10 ++-
 man/man8/xfs_admin.8 |   15 +++++
 man/man8/xfs_db.8    |    5 ++
 5 files changed, 178 insertions(+), 10 deletions(-)


diff --git a/db/check.c b/db/check.c
index 33736e33..485e855e 100644
--- a/db/check.c
+++ b/db/check.c
@@ -3970,6 +3970,11 @@ scan_ag(
 			dbprintf(_("mkfs not completed successfully\n"));
 		error++;
 	}
+	if (xfs_sb_version_needsrepair(sb)) {
+		if (!sflag)
+			dbprintf(_("filesystem needs xfs_repair\n"));
+		error++;
+	}
 	set_dbmap(agno, XFS_SB_BLOCK(mp), 1, DBM_SB, agno, XFS_SB_BLOCK(mp));
 	if (sb->sb_logstart && XFS_FSB_TO_AGNO(mp, sb->sb_logstart) == agno)
 		set_dbmap(agno, XFS_FSB_TO_AGBNO(mp, sb->sb_logstart),
diff --git a/db/sb.c b/db/sb.c
index d09f653d..fcc2a0ed 100644
--- a/db/sb.c
+++ b/db/sb.c
@@ -379,6 +379,11 @@ uuid_f(
 				progname);
 			return 0;
 		}
+		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+			dbprintf(_("%s: filesystem needs xfs_repair\n"),
+				progname);
+			return 0;
+		}
 
 		if (!strcasecmp(argv[1], "generate")) {
 			platform_uuid_generate(&uu);
@@ -501,6 +506,7 @@ do_label(xfs_agnumber_t agno, char *label)
 		memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname));
 		return &lbl[0];
 	}
+
 	/* set label */
 	if ((len = strlen(label)) > sizeof(tsb.sb_fname)) {
 		if (agno == 0)
@@ -543,6 +549,12 @@ label_f(
 			return 0;
 		}
 
+		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+			dbprintf(_("%s: filesystem needs xfs_repair\n"),
+				progname);
+			return 0;
+		}
+
 		dbprintf(_("writing all SBs\n"));
 		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++)
 			if ((p = do_label(ag, argv[1])) == NULL) {
@@ -584,6 +596,7 @@ version_help(void)
 " 'version attr1'    - enable v1 inline extended attributes\n"
 " 'version attr2'    - enable v2 inline extended attributes\n"
 " 'version log2'     - enable v2 log format\n"
+" 'version needsrepair' - flag filesystem as requiring repair\n"
 "\n"
 "The version function prints currently enabled features for a filesystem\n"
 "according to the version field of its primary superblock.\n"
@@ -620,6 +633,112 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
 	return 1;
 }
 
+struct v5feat {
+	uint32_t		compat;
+	uint32_t		ro_compat;
+	uint32_t		incompat;
+	uint32_t		log_incompat;
+};
+
+static void
+get_v5_features(
+	struct v5feat		*feat,
+	const struct xfs_sb	*sbp)
+{
+	feat->compat = sbp->sb_features_compat;
+	feat->ro_compat = sbp->sb_features_ro_compat;
+	feat->incompat = sbp->sb_features_incompat;
+	feat->log_incompat = sbp->sb_features_log_incompat;
+}
+
+static void
+set_v5_features(
+	struct xfs_sb		*sbp,
+	const struct v5feat	*feat)
+{
+	sbp->sb_features_compat = feat->compat;
+	sbp->sb_features_ro_compat = feat->ro_compat;
+	sbp->sb_features_incompat = feat->incompat;
+	sbp->sb_features_log_incompat = feat->log_incompat;
+}
+
+static bool
+upgrade_v5_features(
+	struct xfs_mount	*mp,
+	const struct v5feat	*upgrade)
+{
+	struct xfs_sb		tsb;
+	struct v5feat		old;
+	xfs_agnumber_t		agno = 0;
+	xfs_agnumber_t		revert_agno = 0;
+
+	if (upgrade->compat == mp->m_sb.sb_features_compat &&
+	    upgrade->ro_compat == mp->m_sb.sb_features_ro_compat &&
+	    upgrade->incompat == mp->m_sb.sb_features_incompat &&
+	    upgrade->log_incompat == mp->m_sb.sb_features_log_incompat)
+		return true;
+
+	/* Upgrade primary superblock. */
+	if (!get_sb(agno, &tsb))
+		goto fail;
+
+	dbprintf(_("Upgrading V5 filesystem\n"));
+
+	/* Save the old primary super features in case we revert. */
+	get_v5_features(&old, &tsb);
+
+	/* Update features and force user to run repair before mounting. */
+	set_v5_features(&tsb, upgrade);
+	tsb.sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+
+	/* Write new primary superblock */
+	libxfs_sb_to_disk(iocur_top->data, &tsb);
+	write_cur();
+	if (!iocur_top->bp || iocur_top->bp->b_error)
+		goto fail;
+
+	/* Update the secondary superblocks, or revert. */
+	for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
+		if (!get_sb(agno, &tsb)) {
+			agno--;
+			goto revert;
+		}
+
+		/* Set features and write secondary super. */
+		set_v5_features(&tsb, upgrade);
+		libxfs_sb_to_disk(iocur_top->data, &tsb);
+		write_cur();
+
+		/* Write or abort. */
+		if (!iocur_top->bp || iocur_top->bp->b_error)
+			goto revert;
+	}
+
+	/* All superblocks updated, update the incore values. */
+	set_v5_features(&mp->m_sb, upgrade);
+	dbprintf(_("Upgraded V5 filesystem.  Please run xfs_repair.\n"));
+	return true;
+
+revert:
+	/*
+	 * Try to revert feature flag changes, and don't worry if we fail.
+	 * We're probably in a mess anyhow, and the admin will have to run
+	 * repair anyways.
+	 */
+	for (revert_agno = 0; revert_agno <= agno; revert_agno++) {
+		if (!get_sb(revert_agno, &tsb))
+			continue;
+
+		set_v5_features(&tsb, &old);
+		libxfs_sb_to_disk(iocur_top->data, &tsb);
+		write_cur();
+	}
+fail:
+	dbprintf(
+_("Failed to upgrade V5 filesystem at AG %d, please run xfs_repair.\n"), agno);
+	return false;
+}
+
 static char *
 version_string(
 	xfs_sb_t	*sbp)
@@ -691,15 +810,12 @@ version_string(
 		strcat(s, ",INOBTCNT");
 	if (xfs_sb_version_hasbigtime(sbp))
 		strcat(s, ",BIGTIME");
+	if (xfs_sb_version_needsrepair(sbp))
+		strcat(s, ",NEEDSREPAIR");
 	return s;
 }
 
-/*
- * XXX: this only supports reading and writing to version 4 superblock fields.
- * V5 superblocks always define certain V4 feature bits - they are blocked from
- * being changed if a V5 sb is detected, but otherwise v5 superblock features
- * are not handled here.
- */
+/* Upgrade a superblock to support a feature. */
 static int
 version_f(
 	int		argc,
@@ -710,6 +826,9 @@ version_f(
 	xfs_agnumber_t	ag;
 
 	if (argc == 2) {	/* WRITE VERSION */
+		struct v5feat	v5features;
+
+		get_v5_features(&v5features, &mp->m_sb);
 
 		if ((x.isreadonly & LIBXFS_ISREADONLY) || !expert_mode) {
 			dbprintf(_("%s: not in expert mode, writing disabled\n"),
@@ -717,8 +836,23 @@ version_f(
 			return 0;
 		}
 
+		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+			dbprintf(_("%s: filesystem needs xfs_repair\n"),
+				progname);
+			return 0;
+		}
+
 		/* Logic here derived from the IRIX xfs_chver(1M) script. */
-		if (!strcasecmp(argv[1], "extflg")) {
+		if (!strcasecmp(argv[1], "needsrepair")) {
+			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+				dbprintf(
+		_("needsrepair flag cannot be enabled on pre-V5 filesystems\n"));
+				exitcode = 2;
+				return 1;
+			}
+
+			v5features.incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+		} else if (!strcasecmp(argv[1], "extflg")) {
 			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
 			case XFS_SB_VERSION_1:
 				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
@@ -809,6 +943,11 @@ version_f(
 			mp->m_sb.sb_versionnum = version;
 			mp->m_sb.sb_features2 = features;
 		}
+
+		if (!upgrade_v5_features(mp, &v5features)) {
+			exitcode = 1;
+			return 1;
+		}
 	}
 
 	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
index bd325da2..0e79bbf9 100755
--- a/db/xfs_admin.sh
+++ b/db/xfs_admin.sh
@@ -7,9 +7,9 @@
 status=0
 DB_OPTS=""
 REPAIR_OPTS=""
-USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
+USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] [-O v5_feature] device [logdev]"
 
-while getopts "efjlpuc:L:U:V" c
+while getopts "efjlpuc:L:O:U:V" c
 do
 	case $c in
 	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
@@ -19,6 +19,9 @@ do
 	l)	DB_OPTS=$DB_OPTS" -r -c label";;
 	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
 	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
+	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
+		# Force repair to run by adding a single space to REPAIR_OPTS
+		REPAIR_OPTS="$REPAIR_OPTS ";;
 	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
 	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
 	V)	xfs_db -p xfs_admin -V
@@ -34,6 +37,7 @@ set -- extra $@
 shift $OPTIND
 case $# in
 	1|2)
+		status=0
 		# Pick up the log device, if present
 		if [ -n "$2" ]; then
 			DB_OPTS=$DB_OPTS" -l '$2'"
@@ -46,7 +50,7 @@ case $# in
 			eval xfs_db -x -p xfs_admin $DB_OPTS $1
 			status=$?
 		fi
-		if [ -n "$REPAIR_OPTS" ]
+		if [ -n "$REPAIR_OPTS" ] && [ $status -ne 2 ]
 		then
 			# Hide normal repair output which is sent to stderr
 			# assuming the filesystem is fine when a user is
diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
index 8afc873f..b423981d 100644
--- a/man/man8/xfs_admin.8
+++ b/man/man8/xfs_admin.8
@@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
 [
 .B \-eflpu
 ] [
+.BI \-O " feature"
+] [
 .BR "\-c 0" | 1
 ] [
 .B \-L
@@ -103,6 +105,19 @@ The filesystem label can be cleared using the special "\c
 " value for
 .IR label .
 .TP
+.BI \-O " feature"
+Add a new feature to the filesystem.
+Only one feature can be specified at a time.
+Features are as follows:
+.RS 0.7i
+.TP
+.B needsrepair
+If this is a V5 filesystem, flag the filesystem as needing repairs.
+Until
+.BR xfs_repair (8)
+is run, the filesystem will not be mountable.
+.RE
+.TP
 .BI \-U " uuid"
 Set the UUID of the filesystem to
 .IR uuid .
diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
index 58727495..7331cf19 100644
--- a/man/man8/xfs_db.8
+++ b/man/man8/xfs_db.8
@@ -971,6 +971,11 @@ may toggle between
 and
 .B attr2
 at will (older kernels may not support the newer version).
+The filesystem can be flagged as requiring a run through
+.BR xfs_repair (8)
+if the
+.B needsrepair
+option is specified and the filesystem is formatted with the V5 format.
 .IP
 If no argument is given, the current version and feature bits are printed.
 With one argument, this command will write the updated version number


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/2] xfs_repair: clear the needsrepair flag
  2021-01-16  1:24 [PATCHSET v2 0/2] xfs: add the ability to flag a fs for repair Darrick J. Wong
  2021-01-16  1:24 ` [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command Darrick J. Wong
@ 2021-01-16  1:24 ` Darrick J. Wong
  2021-01-19 14:37   ` Brian Foster
  2021-01-20  4:31   ` [PATCH v2.1 " Darrick J. Wong
  2021-01-20  4:31 ` [PATCH 3/2] xfs_repair: fix unmount error message to have a newline Darrick J. Wong
  2 siblings, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-16  1:24 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Clear the needsrepair flag, since it's used to prevent mounting of an
inconsistent filesystem.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 repair/agheader.c |   15 +++++++++++++++
 1 file changed, 15 insertions(+)


diff --git a/repair/agheader.c b/repair/agheader.c
index 8bb99489..d9b72d3a 100644
--- a/repair/agheader.c
+++ b/repair/agheader.c
@@ -452,6 +452,21 @@ secondary_sb_whack(
 			rval |= XR_AG_SB_SEC;
 	}
 
+	if (xfs_sb_version_needsrepair(sb)) {
+		if (!no_modify)
+			sb->sb_features_incompat &=
+					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+		if (i == 0) {
+			if (!no_modify)
+				do_warn(
+	_("clearing needsrepair flag and regenerating metadata\n"));
+			else
+				do_warn(
+	_("would clear needsrepair flag and regenerate metadata\n"));
+		}
+		rval |= XR_AG_SB_SEC;
+	}
+
 	return(rval);
 }
 


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command
  2021-01-16  1:24 ` [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command Darrick J. Wong
@ 2021-01-19 14:37   ` Brian Foster
  2021-02-02 21:09     ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2021-01-19 14:37 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, Jan 15, 2021 at 05:24:47PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Teach the xfs_db version command about the 'needsrepair' flag, which can
> be used to force the system administrator to repair the filesystem with
> xfs_repair.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  db/check.c           |    5 ++
>  db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
>  db/xfs_admin.sh      |   10 ++-
>  man/man8/xfs_admin.8 |   15 +++++
>  man/man8/xfs_db.8    |    5 ++
>  5 files changed, 178 insertions(+), 10 deletions(-)
> 
> 
> diff --git a/db/check.c b/db/check.c
> index 33736e33..485e855e 100644
> --- a/db/check.c
> +++ b/db/check.c
> @@ -3970,6 +3970,11 @@ scan_ag(
>  			dbprintf(_("mkfs not completed successfully\n"));
>  		error++;
>  	}
> +	if (xfs_sb_version_needsrepair(sb)) {
> +		if (!sflag)
> +			dbprintf(_("filesystem needs xfs_repair\n"));
> +		error++;
> +	}
>  	set_dbmap(agno, XFS_SB_BLOCK(mp), 1, DBM_SB, agno, XFS_SB_BLOCK(mp));
>  	if (sb->sb_logstart && XFS_FSB_TO_AGNO(mp, sb->sb_logstart) == agno)
>  		set_dbmap(agno, XFS_FSB_TO_AGBNO(mp, sb->sb_logstart),
> diff --git a/db/sb.c b/db/sb.c
> index d09f653d..fcc2a0ed 100644
> --- a/db/sb.c
> +++ b/db/sb.c
...
> @@ -620,6 +633,112 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
>  	return 1;
>  }
>  
...
> +static bool
> +upgrade_v5_features(
> +	struct xfs_mount	*mp,
> +	const struct v5feat	*upgrade)
> +{
> +	struct xfs_sb		tsb;
> +	struct v5feat		old;
> +	xfs_agnumber_t		agno = 0;
> +	xfs_agnumber_t		revert_agno = 0;
> +
> +	if (upgrade->compat == mp->m_sb.sb_features_compat &&
> +	    upgrade->ro_compat == mp->m_sb.sb_features_ro_compat &&
> +	    upgrade->incompat == mp->m_sb.sb_features_incompat &&
> +	    upgrade->log_incompat == mp->m_sb.sb_features_log_incompat)
> +		return true;
> +
> +	/* Upgrade primary superblock. */
> +	if (!get_sb(agno, &tsb))
> +		goto fail;
> +
> +	dbprintf(_("Upgrading V5 filesystem\n"));
> +
> +	/* Save the old primary super features in case we revert. */
> +	get_v5_features(&old, &tsb);
> +
> +	/* Update features and force user to run repair before mounting. */
> +	set_v5_features(&tsb, upgrade);
> +	tsb.sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> +

So we explicitly update the primary sb with NEEDSREPAIR...

> +	/* Write new primary superblock */
> +	libxfs_sb_to_disk(iocur_top->data, &tsb);
> +	write_cur();
> +	if (!iocur_top->bp || iocur_top->bp->b_error)
> +		goto fail;
> +
> +	/* Update the secondary superblocks, or revert. */
> +	for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
> +		if (!get_sb(agno, &tsb)) {
> +			agno--;
> +			goto revert;
> +		}
> +
> +		/* Set features and write secondary super. */
> +		set_v5_features(&tsb, upgrade);
> +		libxfs_sb_to_disk(iocur_top->data, &tsb);
> +		write_cur();

... but only set NEEDSREPAIR on secondaries if it was set in upgrade by
the caller?

> +
> +		/* Write or abort. */
> +		if (!iocur_top->bp || iocur_top->bp->b_error)
> +			goto revert;
> +	}
> +
> +	/* All superblocks updated, update the incore values. */
> +	set_v5_features(&mp->m_sb, upgrade);

This also looks like it has the same behavior. I.e., on a 'version
bigtime' upgrade wouldn't have NEEDSREPAIR set.

> +	dbprintf(_("Upgraded V5 filesystem.  Please run xfs_repair.\n"));
> +	return true;
> +
> +revert:
> +	/*
> +	 * Try to revert feature flag changes, and don't worry if we fail.
> +	 * We're probably in a mess anyhow, and the admin will have to run
> +	 * repair anyways.
> +	 */
> +	for (revert_agno = 0; revert_agno <= agno; revert_agno++) {
> +		if (!get_sb(revert_agno, &tsb))
> +			continue;
> +
> +		set_v5_features(&tsb, &old);
> +		libxfs_sb_to_disk(iocur_top->data, &tsb);
> +		write_cur();
> +	}
> +fail:
> +	dbprintf(
> +_("Failed to upgrade V5 filesystem at AG %d, please run xfs_repair.\n"), agno);
> +	return false;
> +}
> +
>  static char *
>  version_string(
>  	xfs_sb_t	*sbp)
...
> @@ -717,8 +836,23 @@ version_f(
>  			return 0;
>  		}
>  
> +		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
> +			dbprintf(_("%s: filesystem needs xfs_repair\n"),
> +				progname);
> +			return 0;
> +		}
> +
>  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> -		if (!strcasecmp(argv[1], "extflg")) {
> +		if (!strcasecmp(argv[1], "needsrepair")) {
> +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> +				dbprintf(
> +		_("needsrepair flag cannot be enabled on pre-V5 filesystems\n"));
> +				exitcode = 2;
> +				return 1;

Hmm.. I see that exitcode 1 means error && xfs_repair while exitcode 2
means error && !xfs_repair, but I'm still not sure I follow the high
level error semantics, particularly if we happen to fail updating
secondary supers. I wonder if it would be more straightforward to have
xfs_db only return an error when an update attempt occurs and fails and
then let xfs_admin run xfs_repair if status == 0 && NEEDSREPAIR is set.
I suppose the various other ".. bit already set" or "v5 super required"
conditions don't really need to be errors and thus repair would only run
in those cases if NEEDSREPAIR was still set on the fs. Otherwise if
xfs_db fails we dump an error message and encourage the user to run
xfs_repair themselves.

There are still corner cases I guess, but that does _seem_ a bit more
elegant to me. Otherwise I suppose a comment somewhere that explains
when/why to use which error code would be helpful.

Brian

> +			}
> +
> +			v5features.incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> +		} else if (!strcasecmp(argv[1], "extflg")) {
>  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
>  			case XFS_SB_VERSION_1:
>  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> @@ -809,6 +943,11 @@ version_f(
>  			mp->m_sb.sb_versionnum = version;
>  			mp->m_sb.sb_features2 = features;
>  		}
> +
> +		if (!upgrade_v5_features(mp, &v5features)) {
> +			exitcode = 1;
> +			return 1;
> +		}
>  	}
>  
>  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> index bd325da2..0e79bbf9 100755
> --- a/db/xfs_admin.sh
> +++ b/db/xfs_admin.sh
> @@ -7,9 +7,9 @@
>  status=0
>  DB_OPTS=""
>  REPAIR_OPTS=""
> -USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
> +USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] [-O v5_feature] device [logdev]"
>  
> -while getopts "efjlpuc:L:U:V" c
> +while getopts "efjlpuc:L:O:U:V" c
>  do
>  	case $c in
>  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> @@ -19,6 +19,9 @@ do
>  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
>  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
>  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> +		# Force repair to run by adding a single space to REPAIR_OPTS
> +		REPAIR_OPTS="$REPAIR_OPTS ";;
>  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
>  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
>  	V)	xfs_db -p xfs_admin -V
> @@ -34,6 +37,7 @@ set -- extra $@
>  shift $OPTIND
>  case $# in
>  	1|2)
> +		status=0
>  		# Pick up the log device, if present
>  		if [ -n "$2" ]; then
>  			DB_OPTS=$DB_OPTS" -l '$2'"
> @@ -46,7 +50,7 @@ case $# in
>  			eval xfs_db -x -p xfs_admin $DB_OPTS $1
>  			status=$?
>  		fi
> -		if [ -n "$REPAIR_OPTS" ]
> +		if [ -n "$REPAIR_OPTS" ] && [ $status -ne 2 ]
>  		then
>  			# Hide normal repair output which is sent to stderr
>  			# assuming the filesystem is fine when a user is
> diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> index 8afc873f..b423981d 100644
> --- a/man/man8/xfs_admin.8
> +++ b/man/man8/xfs_admin.8
> @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
>  [
>  .B \-eflpu
>  ] [
> +.BI \-O " feature"
> +] [
>  .BR "\-c 0" | 1
>  ] [
>  .B \-L
> @@ -103,6 +105,19 @@ The filesystem label can be cleared using the special "\c
>  " value for
>  .IR label .
>  .TP
> +.BI \-O " feature"
> +Add a new feature to the filesystem.
> +Only one feature can be specified at a time.
> +Features are as follows:
> +.RS 0.7i
> +.TP
> +.B needsrepair
> +If this is a V5 filesystem, flag the filesystem as needing repairs.
> +Until
> +.BR xfs_repair (8)
> +is run, the filesystem will not be mountable.
> +.RE
> +.TP
>  .BI \-U " uuid"
>  Set the UUID of the filesystem to
>  .IR uuid .
> diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
> index 58727495..7331cf19 100644
> --- a/man/man8/xfs_db.8
> +++ b/man/man8/xfs_db.8
> @@ -971,6 +971,11 @@ may toggle between
>  and
>  .B attr2
>  at will (older kernels may not support the newer version).
> +The filesystem can be flagged as requiring a run through
> +.BR xfs_repair (8)
> +if the
> +.B needsrepair
> +option is specified and the filesystem is formatted with the V5 format.
>  .IP
>  If no argument is given, the current version and feature bits are printed.
>  With one argument, this command will write the updated version number
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] xfs_repair: clear the needsrepair flag
  2021-01-16  1:24 ` [PATCH 2/2] xfs_repair: clear the needsrepair flag Darrick J. Wong
@ 2021-01-19 14:37   ` Brian Foster
  2021-01-19 18:15     ` Darrick J. Wong
  2021-01-20  4:31   ` [PATCH v2.1 " Darrick J. Wong
  1 sibling, 1 reply; 18+ messages in thread
From: Brian Foster @ 2021-01-19 14:37 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, Jan 15, 2021 at 05:24:53PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Clear the needsrepair flag, since it's used to prevent mounting of an
> inconsistent filesystem.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---

Code/errors look much cleaner. Though looking at the repair code again,
I wonder... if we clear the needsrepair bit and dirty/write the sb in
phase 2 and then xfs_repair happens to crash, do we risk clearing the
bit and thus allowing a potential mount before whatever requisite
metadata updates have been made?

Brian

>  repair/agheader.c |   15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> 
> diff --git a/repair/agheader.c b/repair/agheader.c
> index 8bb99489..d9b72d3a 100644
> --- a/repair/agheader.c
> +++ b/repair/agheader.c
> @@ -452,6 +452,21 @@ secondary_sb_whack(
>  			rval |= XR_AG_SB_SEC;
>  	}
>  
> +	if (xfs_sb_version_needsrepair(sb)) {
> +		if (!no_modify)
> +			sb->sb_features_incompat &=
> +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> +		if (i == 0) {
> +			if (!no_modify)
> +				do_warn(
> +	_("clearing needsrepair flag and regenerating metadata\n"));
> +			else
> +				do_warn(
> +	_("would clear needsrepair flag and regenerate metadata\n"));
> +		}
> +		rval |= XR_AG_SB_SEC;
> +	}
> +
>  	return(rval);
>  }
>  
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] xfs_repair: clear the needsrepair flag
  2021-01-19 14:37   ` Brian Foster
@ 2021-01-19 18:15     ` Darrick J. Wong
  2021-01-19 19:44       ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-19 18:15 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 09:37:54AM -0500, Brian Foster wrote:
> On Fri, Jan 15, 2021 at 05:24:53PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Clear the needsrepair flag, since it's used to prevent mounting of an
> > inconsistent filesystem.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> 
> Code/errors look much cleaner. Though looking at the repair code again,
> I wonder... if we clear the needsrepair bit and dirty/write the sb in
> phase 2 and then xfs_repair happens to crash, do we risk clearing the
> bit and thus allowing a potential mount before whatever requisite
> metadata updates have been made?

[Oh good, now mail.kernel.org is having problems...]

Yes, though I think that falls into the realm of "sysadmins should be
sufficiently self-aware not to expect mount to work after repair
fails/system crashes during an upgrade".

I've thought about how to solve the general problem of preventing people
from mounting filesystems if repair doesn't run to completion.  I think
xfs_repair could be modified so that once it finds the primary super, it
writes it back out with NEEDSREPAIR set (V5) or inprogress set (V4).
Once we've finished the buffer cache flush at the end of repair, we
clear needsrepair/inprogress and write the primary super again.

An optimization on that would be to find a way to avoid that first super
write until we flush the first dirty buffer.

Another way to make repair more "transactional" would be to do it would
be to fiddle with the buffer manager so that writes are sent to a
metadump file which could be mdrestore'd if repair completes
successfully.  But that's a short-circuit around the even bigger project
of porting the kernel logging code to userspace and use that in repair.

--D

> Brian
> 
> >  repair/agheader.c |   15 +++++++++++++++
> >  1 file changed, 15 insertions(+)
> > 
> > 
> > diff --git a/repair/agheader.c b/repair/agheader.c
> > index 8bb99489..d9b72d3a 100644
> > --- a/repair/agheader.c
> > +++ b/repair/agheader.c
> > @@ -452,6 +452,21 @@ secondary_sb_whack(
> >  			rval |= XR_AG_SB_SEC;
> >  	}
> >  
> > +	if (xfs_sb_version_needsrepair(sb)) {
> > +		if (!no_modify)
> > +			sb->sb_features_incompat &=
> > +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > +		if (i == 0) {
> > +			if (!no_modify)
> > +				do_warn(
> > +	_("clearing needsrepair flag and regenerating metadata\n"));
> > +			else
> > +				do_warn(
> > +	_("would clear needsrepair flag and regenerate metadata\n"));
> > +		}
> > +		rval |= XR_AG_SB_SEC;
> > +	}
> > +
> >  	return(rval);
> >  }
> >  
> > 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] xfs_repair: clear the needsrepair flag
  2021-01-19 18:15     ` Darrick J. Wong
@ 2021-01-19 19:44       ` Brian Foster
  2021-01-19 20:31         ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2021-01-19 19:44 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 10:15:49AM -0800, Darrick J. Wong wrote:
> On Tue, Jan 19, 2021 at 09:37:54AM -0500, Brian Foster wrote:
> > On Fri, Jan 15, 2021 at 05:24:53PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Clear the needsrepair flag, since it's used to prevent mounting of an
> > > inconsistent filesystem.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > 
> > Code/errors look much cleaner. Though looking at the repair code again,
> > I wonder... if we clear the needsrepair bit and dirty/write the sb in
> > phase 2 and then xfs_repair happens to crash, do we risk clearing the
> > bit and thus allowing a potential mount before whatever requisite
> > metadata updates have been made?
> 
> [Oh good, now mail.kernel.org is having problems...]
> 
> Yes, though I think that falls into the realm of "sysadmins should be
> sufficiently self-aware not to expect mount to work after repair
> fails/system crashes during an upgrade".
> 
> I've thought about how to solve the general problem of preventing people
> from mounting filesystems if repair doesn't run to completion.  I think
> xfs_repair could be modified so that once it finds the primary super, it
> writes it back out with NEEDSREPAIR set (V5) or inprogress set (V4).
> Once we've finished the buffer cache flush at the end of repair, we
> clear needsrepair/inprogress and write the primary super again.
> 

That's kind of what I was thinking.. set a global flag if/when we come
across the bit set on disk and clear it as a final step.

> An optimization on that would be to find a way to avoid that first super
> write until we flush the first dirty buffer.
> 
> Another way to make repair more "transactional" would be to do it would
> be to fiddle with the buffer manager so that writes are sent to a
> metadump file which could be mdrestore'd if repair completes
> successfully.  But that's a short-circuit around the even bigger project
> of porting the kernel logging code to userspace and use that in repair.
> 

Yeah, I wouldn't want to have clearing a feature bit depend on such a
significant rework of core functionality. The bit is not going to be set
in most cases, so I'd suspect an extra superblock write at the end of
repair wouldn't be much of a problem. In fact, it looks like main()
already has an unconditional sb write before the final libxfs_unmount()
call. Perhaps we could just hitch onto that for the primary super
update (and continue to clear the secondaries earlier as the current
patch does)..?

Brian

> --D
> 
> > Brian
> > 
> > >  repair/agheader.c |   15 +++++++++++++++
> > >  1 file changed, 15 insertions(+)
> > > 
> > > 
> > > diff --git a/repair/agheader.c b/repair/agheader.c
> > > index 8bb99489..d9b72d3a 100644
> > > --- a/repair/agheader.c
> > > +++ b/repair/agheader.c
> > > @@ -452,6 +452,21 @@ secondary_sb_whack(
> > >  			rval |= XR_AG_SB_SEC;
> > >  	}
> > >  
> > > +	if (xfs_sb_version_needsrepair(sb)) {
> > > +		if (!no_modify)
> > > +			sb->sb_features_incompat &=
> > > +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > +		if (i == 0) {
> > > +			if (!no_modify)
> > > +				do_warn(
> > > +	_("clearing needsrepair flag and regenerating metadata\n"));
> > > +			else
> > > +				do_warn(
> > > +	_("would clear needsrepair flag and regenerate metadata\n"));
> > > +		}
> > > +		rval |= XR_AG_SB_SEC;
> > > +	}
> > > +
> > >  	return(rval);
> > >  }
> > >  
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] xfs_repair: clear the needsrepair flag
  2021-01-19 19:44       ` Brian Foster
@ 2021-01-19 20:31         ` Darrick J. Wong
  2021-01-19 23:49           ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-19 20:31 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 02:44:06PM -0500, Brian Foster wrote:
> On Tue, Jan 19, 2021 at 10:15:49AM -0800, Darrick J. Wong wrote:
> > On Tue, Jan 19, 2021 at 09:37:54AM -0500, Brian Foster wrote:
> > > On Fri, Jan 15, 2021 at 05:24:53PM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Clear the needsrepair flag, since it's used to prevent mounting of an
> > > > inconsistent filesystem.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > 
> > > Code/errors look much cleaner. Though looking at the repair code again,
> > > I wonder... if we clear the needsrepair bit and dirty/write the sb in
> > > phase 2 and then xfs_repair happens to crash, do we risk clearing the
> > > bit and thus allowing a potential mount before whatever requisite
> > > metadata updates have been made?
> > 
> > [Oh good, now mail.kernel.org is having problems...]
> > 
> > Yes, though I think that falls into the realm of "sysadmins should be
> > sufficiently self-aware not to expect mount to work after repair
> > fails/system crashes during an upgrade".
> > 
> > I've thought about how to solve the general problem of preventing people
> > from mounting filesystems if repair doesn't run to completion.  I think
> > xfs_repair could be modified so that once it finds the primary super, it
> > writes it back out with NEEDSREPAIR set (V5) or inprogress set (V4).
> > Once we've finished the buffer cache flush at the end of repair, we
> > clear needsrepair/inprogress and write the primary super again.
> > 
> 
> That's kind of what I was thinking.. set a global flag if/when we come
> across the bit set on disk and clear it as a final step.

I think if I found it set on the primary sb I would leave it alone, and
if the bit wasn't set then I'd set it and bwrite the buffer immediately.
Probably we're talking about more or less the same thing...

> > An optimization on that would be to find a way to avoid that first super
> > write until we flush the first dirty buffer.
> > 
> > Another way to make repair more "transactional" would be to do it would
> > be to fiddle with the buffer manager so that writes are sent to a
> > metadump file which could be mdrestore'd if repair completes
> > successfully.  But that's a short-circuit around the even bigger project
> > of porting the kernel logging code to userspace and use that in repair.
> > 
> 
> Yeah, I wouldn't want to have clearing a feature bit depend on such a
> significant rework of core functionality. The bit is not going to be set
> in most cases, so I'd suspect an extra superblock write at the end of
> repair wouldn't be much of a problem. In fact, it looks like main()
> already has an unconditional sb write before the final libxfs_unmount()
> call. Perhaps we could just hitch onto that for the primary super
> update (and continue to clear the secondaries earlier as the current
> patch does)..?

Clearing the bit has to happen after the bcache purge and disk flush,
because we need to make sure that everything else we wrote actually made
it to stable storage.

Hm, I guess we could export libxfs_flush_mount so that repair could call
that, clear the fields in the sb, and then go ahead with the
libxfs_umount.

--D

> Brian
> 
> > --D
> > 
> > > Brian
> > > 
> > > >  repair/agheader.c |   15 +++++++++++++++
> > > >  1 file changed, 15 insertions(+)
> > > > 
> > > > 
> > > > diff --git a/repair/agheader.c b/repair/agheader.c
> > > > index 8bb99489..d9b72d3a 100644
> > > > --- a/repair/agheader.c
> > > > +++ b/repair/agheader.c
> > > > @@ -452,6 +452,21 @@ secondary_sb_whack(
> > > >  			rval |= XR_AG_SB_SEC;
> > > >  	}
> > > >  
> > > > +	if (xfs_sb_version_needsrepair(sb)) {
> > > > +		if (!no_modify)
> > > > +			sb->sb_features_incompat &=
> > > > +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > > +		if (i == 0) {
> > > > +			if (!no_modify)
> > > > +				do_warn(
> > > > +	_("clearing needsrepair flag and regenerating metadata\n"));
> > > > +			else
> > > > +				do_warn(
> > > > +	_("would clear needsrepair flag and regenerate metadata\n"));
> > > > +		}
> > > > +		rval |= XR_AG_SB_SEC;
> > > > +	}
> > > > +
> > > >  	return(rval);
> > > >  }
> > > >  
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] xfs_repair: clear the needsrepair flag
  2021-01-19 20:31         ` Darrick J. Wong
@ 2021-01-19 23:49           ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-19 23:49 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 12:31:10PM -0800, Darrick J. Wong wrote:
> On Tue, Jan 19, 2021 at 02:44:06PM -0500, Brian Foster wrote:
> > On Tue, Jan 19, 2021 at 10:15:49AM -0800, Darrick J. Wong wrote:
> > > On Tue, Jan 19, 2021 at 09:37:54AM -0500, Brian Foster wrote:
> > > > On Fri, Jan 15, 2021 at 05:24:53PM -0800, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > > 
> > > > > Clear the needsrepair flag, since it's used to prevent mounting of an
> > > > > inconsistent filesystem.
> > > > > 
> > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > > ---
> > > > 
> > > > Code/errors look much cleaner. Though looking at the repair code again,
> > > > I wonder... if we clear the needsrepair bit and dirty/write the sb in
> > > > phase 2 and then xfs_repair happens to crash, do we risk clearing the
> > > > bit and thus allowing a potential mount before whatever requisite
> > > > metadata updates have been made?
> > > 
> > > [Oh good, now mail.kernel.org is having problems...]
> > > 
> > > Yes, though I think that falls into the realm of "sysadmins should be
> > > sufficiently self-aware not to expect mount to work after repair
> > > fails/system crashes during an upgrade".
> > > 
> > > I've thought about how to solve the general problem of preventing people
> > > from mounting filesystems if repair doesn't run to completion.  I think
> > > xfs_repair could be modified so that once it finds the primary super, it
> > > writes it back out with NEEDSREPAIR set (V5) or inprogress set (V4).
> > > Once we've finished the buffer cache flush at the end of repair, we
> > > clear needsrepair/inprogress and write the primary super again.
> > > 
> > 
> > That's kind of what I was thinking.. set a global flag if/when we come
> > across the bit set on disk and clear it as a final step.
> 
> I think if I found it set on the primary sb I would leave it alone, and
> if the bit wasn't set then I'd set it and bwrite the buffer immediately.
> Probably we're talking about more or less the same thing...

...though after talking to Eric some more, I think I should change this
patch to clear needsrepair at the end of repair, like you said.

--D

> > > An optimization on that would be to find a way to avoid that first super
> > > write until we flush the first dirty buffer.
> > > 
> > > Another way to make repair more "transactional" would be to do it would
> > > be to fiddle with the buffer manager so that writes are sent to a
> > > metadump file which could be mdrestore'd if repair completes
> > > successfully.  But that's a short-circuit around the even bigger project
> > > of porting the kernel logging code to userspace and use that in repair.
> > > 
> > 
> > Yeah, I wouldn't want to have clearing a feature bit depend on such a
> > significant rework of core functionality. The bit is not going to be set
> > in most cases, so I'd suspect an extra superblock write at the end of
> > repair wouldn't be much of a problem. In fact, it looks like main()
> > already has an unconditional sb write before the final libxfs_unmount()
> > call. Perhaps we could just hitch onto that for the primary super
> > update (and continue to clear the secondaries earlier as the current
> > patch does)..?
> 
> Clearing the bit has to happen after the bcache purge and disk flush,
> because we need to make sure that everything else we wrote actually made
> it to stable storage.
> 
> Hm, I guess we could export libxfs_flush_mount so that repair could call
> that, clear the fields in the sb, and then go ahead with the
> libxfs_umount.
> 
> --D
> 
> > Brian
> > 
> > > --D
> > > 
> > > > Brian
> > > > 
> > > > >  repair/agheader.c |   15 +++++++++++++++
> > > > >  1 file changed, 15 insertions(+)
> > > > > 
> > > > > 
> > > > > diff --git a/repair/agheader.c b/repair/agheader.c
> > > > > index 8bb99489..d9b72d3a 100644
> > > > > --- a/repair/agheader.c
> > > > > +++ b/repair/agheader.c
> > > > > @@ -452,6 +452,21 @@ secondary_sb_whack(
> > > > >  			rval |= XR_AG_SB_SEC;
> > > > >  	}
> > > > >  
> > > > > +	if (xfs_sb_version_needsrepair(sb)) {
> > > > > +		if (!no_modify)
> > > > > +			sb->sb_features_incompat &=
> > > > > +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > > > +		if (i == 0) {
> > > > > +			if (!no_modify)
> > > > > +				do_warn(
> > > > > +	_("clearing needsrepair flag and regenerating metadata\n"));
> > > > > +			else
> > > > > +				do_warn(
> > > > > +	_("would clear needsrepair flag and regenerate metadata\n"));
> > > > > +		}
> > > > > +		rval |= XR_AG_SB_SEC;
> > > > > +	}
> > > > > +
> > > > >  	return(rval);
> > > > >  }
> > > > >  
> > > > > 
> > > > 
> > > 
> > 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2.1 2/2] xfs_repair: clear the needsrepair flag
  2021-01-16  1:24 ` [PATCH 2/2] xfs_repair: clear the needsrepair flag Darrick J. Wong
  2021-01-19 14:37   ` Brian Foster
@ 2021-01-20  4:31   ` Darrick J. Wong
  2021-01-20 17:38     ` Brian Foster
  1 sibling, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-20  4:31 UTC (permalink / raw)
  To: sandeen; +Cc: linux-xfs, Brian Foster

From: Darrick J. Wong <djwong@kernel.org>

Clear the needsrepair flag, since it's used to prevent mounting of an
inconsistent filesystem.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
v2.1: only remove needsrepair at the end of the xfs_repair run
---
 include/xfs_mount.h |    1 +
 libxfs/init.c       |    2 +-
 repair/agheader.c   |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 repair/agheader.h   |    2 ++
 repair/xfs_repair.c |    4 +++-
 5 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 36594643..75230ca5 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -181,6 +181,7 @@ xfs_perag_resv(
 
 extern xfs_mount_t	*libxfs_mount (xfs_mount_t *, xfs_sb_t *,
 				dev_t, dev_t, dev_t, int);
+int libxfs_flush_mount(struct xfs_mount *mp);
 int		libxfs_umount(struct xfs_mount *mp);
 extern void	libxfs_rtmount_destroy (xfs_mount_t *);
 
diff --git a/libxfs/init.c b/libxfs/init.c
index 9fe13b8d..083060bf 100644
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -870,7 +870,7 @@ _("%s: Flushing the %s failed, err=%d!\n"),
  * Flush all dirty buffers to stable storage and report on writes that didn't
  * make it to stable storage.
  */
-static int
+int
 libxfs_flush_mount(
 	struct xfs_mount	*mp)
 {
diff --git a/repair/agheader.c b/repair/agheader.c
index 8bb99489..56a7f45c 100644
--- a/repair/agheader.c
+++ b/repair/agheader.c
@@ -220,6 +220,40 @@ compare_sb(xfs_mount_t *mp, xfs_sb_t *sb)
 	return(XR_OK);
 }
 
+/* Clear needsrepair after a successful repair run. */
+int
+clear_needsrepair(
+	struct xfs_mount	*mp)
+{
+	struct xfs_buf		*bp;
+	int			error;
+
+	if (!xfs_sb_version_needsrepair(&mp->m_sb) || no_modify)
+		return 0;
+
+	/* We must succeed at flushing all dirty buffers to disk. */
+	error = -libxfs_flush_mount(mp);
+	if (error)
+		return error;
+
+	/* Clear needsrepair from the superblock. */
+	bp = libxfs_getsb(mp);
+	if (!bp)
+		return ENOMEM;
+	if (bp->b_error) {
+		error = bp->b_error;
+		libxfs_buf_relse(bp);
+		return -error;
+	}
+
+	mp->m_sb.sb_features_incompat &= ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+
+	libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+	libxfs_buf_mark_dirty(bp);
+	libxfs_buf_relse(bp);
+	return 0;
+}
+
 /*
  * Possible fields that may have been set at mkfs time,
  * sb_inoalignmt, sb_unit, sb_width and sb_dirblklog.
@@ -452,6 +486,27 @@ secondary_sb_whack(
 			rval |= XR_AG_SB_SEC;
 	}
 
+	if (xfs_sb_version_needsrepair(sb)) {
+		if (!no_modify)
+			sb->sb_features_incompat &=
+					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+		if (i == 0) {
+			if (!no_modify)
+				do_warn(
+	_("clearing needsrepair flag and regenerating metadata\n"));
+			else
+				do_warn(
+	_("would clear needsrepair flag and regenerate metadata\n"));
+		} else {
+			/*
+			 * Quietly clear needsrepair on the secondary supers as
+			 * part of ensuring them.  If needsrepair is set on the
+			 * primary, it will be done at the very end of repair.
+			 */
+			rval |= XR_AG_SB_SEC;
+		}
+	}
+
 	return(rval);
 }
 
diff --git a/repair/agheader.h b/repair/agheader.h
index a63827c8..552c1f70 100644
--- a/repair/agheader.h
+++ b/repair/agheader.h
@@ -82,3 +82,5 @@ typedef struct fs_geo_list  {
 #define XR_AG_AGF	0x2
 #define XR_AG_AGI	0x4
 #define XR_AG_SB_SEC	0x8
+
+int clear_needsrepair(struct xfs_mount *mp);
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 9409f0d8..d36c5a21 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -1133,7 +1133,9 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
 	format_log_max_lsn(mp);
 
 	/* Report failure if anything failed to get written to our fs. */
-	error = -libxfs_umount(mp);
+	error = clear_needsrepair(mp);
+	if (!error)
+		error = -libxfs_umount(mp);
 	if (error)
 		do_error(
 	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair.\n"),

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/2] xfs_repair: fix unmount error message to have a newline
  2021-01-16  1:24 [PATCHSET v2 0/2] xfs: add the ability to flag a fs for repair Darrick J. Wong
  2021-01-16  1:24 ` [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command Darrick J. Wong
  2021-01-16  1:24 ` [PATCH 2/2] xfs_repair: clear the needsrepair flag Darrick J. Wong
@ 2021-01-20  4:31 ` Darrick J. Wong
  2021-01-20 17:38   ` Brian Foster
  2 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-01-20  4:31 UTC (permalink / raw)
  To: sandeen; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Add a newline so that this is consistent with the other error messages.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 repair/xfs_repair.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 724661d8..9409f0d8 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -1136,7 +1136,7 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
 	error = -libxfs_umount(mp);
 	if (error)
 		do_error(
-	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair."),
+	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair.\n"),
 				error);
 
 	libxfs_destroy(&x);

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2.1 2/2] xfs_repair: clear the needsrepair flag
  2021-01-20  4:31   ` [PATCH v2.1 " Darrick J. Wong
@ 2021-01-20 17:38     ` Brian Foster
  2021-02-02 22:22       ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2021-01-20 17:38 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 08:31:28PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Clear the needsrepair flag, since it's used to prevent mounting of an
> inconsistent filesystem.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> v2.1: only remove needsrepair at the end of the xfs_repair run
> ---
>  include/xfs_mount.h |    1 +
>  libxfs/init.c       |    2 +-
>  repair/agheader.c   |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  repair/agheader.h   |    2 ++
>  repair/xfs_repair.c |    4 +++-
>  5 files changed, 62 insertions(+), 2 deletions(-)
> 
...
> diff --git a/repair/agheader.c b/repair/agheader.c
> index 8bb99489..56a7f45c 100644
> --- a/repair/agheader.c
> +++ b/repair/agheader.c
> @@ -220,6 +220,40 @@ compare_sb(xfs_mount_t *mp, xfs_sb_t *sb)
>  	return(XR_OK);
>  }
>  
> +/* Clear needsrepair after a successful repair run. */
> +int
> +clear_needsrepair(
> +	struct xfs_mount	*mp)
> +{
> +	struct xfs_buf		*bp;
> +	int			error;
> +
> +	if (!xfs_sb_version_needsrepair(&mp->m_sb) || no_modify)
> +		return 0;
> +
> +	/* We must succeed at flushing all dirty buffers to disk. */
> +	error = -libxfs_flush_mount(mp);
> +	if (error)
> +		return error;
> +

Do we really need a new helper and buf get/relse cycle just to
incorporate the above flush? ISTM we could either lift the
libxfs_bcache_flush() call above the superblock update in the caller,
insert the libxfs_flush_mount() right after that, and just do:

	dsb->sb_features_incompat &= ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;

... in the hunk that also updates the quota flags.

Though perhaps cleaner would be to keep the helper, but genericize it a
bit to something like final_sb_update() and fold in the qflags update
and whatever flush/ordering is required for the feature bit.

> +	/* Clear needsrepair from the superblock. */
> +	bp = libxfs_getsb(mp);
> +	if (!bp)
> +		return ENOMEM;
> +	if (bp->b_error) {
> +		error = bp->b_error;
> +		libxfs_buf_relse(bp);
> +		return -error;
> +	}
> +
> +	mp->m_sb.sb_features_incompat &= ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> +
> +	libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> +	libxfs_buf_mark_dirty(bp);
> +	libxfs_buf_relse(bp);
> +	return 0;
> +}
> +
>  /*
>   * Possible fields that may have been set at mkfs time,
>   * sb_inoalignmt, sb_unit, sb_width and sb_dirblklog.
> @@ -452,6 +486,27 @@ secondary_sb_whack(
>  			rval |= XR_AG_SB_SEC;
>  	}
>  
> +	if (xfs_sb_version_needsrepair(sb)) {
> +		if (!no_modify)
> +			sb->sb_features_incompat &=
> +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;

I suspect this should be folded into the check below so we don't modify
the primary sb by accident (should some other check dirty it).

Brian

> +		if (i == 0) {
> +			if (!no_modify)
> +				do_warn(
> +	_("clearing needsrepair flag and regenerating metadata\n"));
> +			else
> +				do_warn(
> +	_("would clear needsrepair flag and regenerate metadata\n"));
> +		} else {
> +			/*
> +			 * Quietly clear needsrepair on the secondary supers as
> +			 * part of ensuring them.  If needsrepair is set on the
> +			 * primary, it will be done at the very end of repair.
> +			 */
> +			rval |= XR_AG_SB_SEC;
> +		}
> +	}
> +
>  	return(rval);
>  }
>  
> diff --git a/repair/agheader.h b/repair/agheader.h
> index a63827c8..552c1f70 100644
> --- a/repair/agheader.h
> +++ b/repair/agheader.h
> @@ -82,3 +82,5 @@ typedef struct fs_geo_list  {
>  #define XR_AG_AGF	0x2
>  #define XR_AG_AGI	0x4
>  #define XR_AG_SB_SEC	0x8
> +
> +int clear_needsrepair(struct xfs_mount *mp);
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 9409f0d8..d36c5a21 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -1133,7 +1133,9 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
>  	format_log_max_lsn(mp);
>  
>  	/* Report failure if anything failed to get written to our fs. */
> -	error = -libxfs_umount(mp);
> +	error = clear_needsrepair(mp);
> +	if (!error)
> +		error = -libxfs_umount(mp);
>  	if (error)
>  		do_error(
>  	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair.\n"),
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/2] xfs_repair: fix unmount error message to have a newline
  2021-01-20  4:31 ` [PATCH 3/2] xfs_repair: fix unmount error message to have a newline Darrick J. Wong
@ 2021-01-20 17:38   ` Brian Foster
  0 siblings, 0 replies; 18+ messages in thread
From: Brian Foster @ 2021-01-20 17:38 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 08:31:57PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Add a newline so that this is consistent with the other error messages.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  repair/xfs_repair.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 724661d8..9409f0d8 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -1136,7 +1136,7 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
>  	error = -libxfs_umount(mp);
>  	if (error)
>  		do_error(
> -	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair."),
> +	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair.\n"),
>  				error);
>  
>  	libxfs_destroy(&x);
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command
  2021-01-19 14:37   ` Brian Foster
@ 2021-02-02 21:09     ` Darrick J. Wong
  2021-02-03 13:07       ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-02-02 21:09 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Jan 19, 2021 at 09:37:14AM -0500, Brian Foster wrote:
> On Fri, Jan 15, 2021 at 05:24:47PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Teach the xfs_db version command about the 'needsrepair' flag, which can
> > be used to force the system administrator to repair the filesystem with
> > xfs_repair.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  db/check.c           |    5 ++
> >  db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
> >  db/xfs_admin.sh      |   10 ++-
> >  man/man8/xfs_admin.8 |   15 +++++
> >  man/man8/xfs_db.8    |    5 ++
> >  5 files changed, 178 insertions(+), 10 deletions(-)
> > 
> > 
> > diff --git a/db/check.c b/db/check.c
> > index 33736e33..485e855e 100644
> > --- a/db/check.c
> > +++ b/db/check.c
> > @@ -3970,6 +3970,11 @@ scan_ag(
> >  			dbprintf(_("mkfs not completed successfully\n"));
> >  		error++;
> >  	}
> > +	if (xfs_sb_version_needsrepair(sb)) {
> > +		if (!sflag)
> > +			dbprintf(_("filesystem needs xfs_repair\n"));
> > +		error++;
> > +	}
> >  	set_dbmap(agno, XFS_SB_BLOCK(mp), 1, DBM_SB, agno, XFS_SB_BLOCK(mp));
> >  	if (sb->sb_logstart && XFS_FSB_TO_AGNO(mp, sb->sb_logstart) == agno)
> >  		set_dbmap(agno, XFS_FSB_TO_AGBNO(mp, sb->sb_logstart),
> > diff --git a/db/sb.c b/db/sb.c
> > index d09f653d..fcc2a0ed 100644
> > --- a/db/sb.c
> > +++ b/db/sb.c
> ...
> > @@ -620,6 +633,112 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
> >  	return 1;
> >  }
> >  
> ...
> > +static bool
> > +upgrade_v5_features(
> > +	struct xfs_mount	*mp,
> > +	const struct v5feat	*upgrade)
> > +{
> > +	struct xfs_sb		tsb;
> > +	struct v5feat		old;
> > +	xfs_agnumber_t		agno = 0;
> > +	xfs_agnumber_t		revert_agno = 0;
> > +
> > +	if (upgrade->compat == mp->m_sb.sb_features_compat &&
> > +	    upgrade->ro_compat == mp->m_sb.sb_features_ro_compat &&
> > +	    upgrade->incompat == mp->m_sb.sb_features_incompat &&
> > +	    upgrade->log_incompat == mp->m_sb.sb_features_log_incompat)
> > +		return true;
> > +
> > +	/* Upgrade primary superblock. */
> > +	if (!get_sb(agno, &tsb))
> > +		goto fail;
> > +
> > +	dbprintf(_("Upgrading V5 filesystem\n"));
> > +
> > +	/* Save the old primary super features in case we revert. */
> > +	get_v5_features(&old, &tsb);
> > +
> > +	/* Update features and force user to run repair before mounting. */
> > +	set_v5_features(&tsb, upgrade);
> > +	tsb.sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > +
> 
> So we explicitly update the primary sb with NEEDSREPAIR...
> 
> > +	/* Write new primary superblock */
> > +	libxfs_sb_to_disk(iocur_top->data, &tsb);
> > +	write_cur();
> > +	if (!iocur_top->bp || iocur_top->bp->b_error)
> > +		goto fail;
> > +
> > +	/* Update the secondary superblocks, or revert. */
> > +	for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
> > +		if (!get_sb(agno, &tsb)) {
> > +			agno--;
> > +			goto revert;
> > +		}
> > +
> > +		/* Set features and write secondary super. */
> > +		set_v5_features(&tsb, upgrade);
> > +		libxfs_sb_to_disk(iocur_top->data, &tsb);
> > +		write_cur();
> 
> ... but only set NEEDSREPAIR on secondaries if it was set in upgrade by
> the caller?

I didn't think it was necessary to set needsrepair on the secondaries
since xfs_repair is the only program that reads them.  However, I don't
mind doing that for consistency sake.

> > +
> > +		/* Write or abort. */
> > +		if (!iocur_top->bp || iocur_top->bp->b_error)
> > +			goto revert;
> > +	}
> > +
> > +	/* All superblocks updated, update the incore values. */
> > +	set_v5_features(&mp->m_sb, upgrade);
> 
> This also looks like it has the same behavior. I.e., on a 'version
> bigtime' upgrade wouldn't have NEEDSREPAIR set.

Oops.  Good catch, will fix.

> > +	dbprintf(_("Upgraded V5 filesystem.  Please run xfs_repair.\n"));
> > +	return true;
> > +
> > +revert:
> > +	/*
> > +	 * Try to revert feature flag changes, and don't worry if we fail.
> > +	 * We're probably in a mess anyhow, and the admin will have to run
> > +	 * repair anyways.
> > +	 */
> > +	for (revert_agno = 0; revert_agno <= agno; revert_agno++) {
> > +		if (!get_sb(revert_agno, &tsb))
> > +			continue;
> > +
> > +		set_v5_features(&tsb, &old);
> > +		libxfs_sb_to_disk(iocur_top->data, &tsb);
> > +		write_cur();
> > +	}
> > +fail:
> > +	dbprintf(
> > +_("Failed to upgrade V5 filesystem at AG %d, please run xfs_repair.\n"), agno);
> > +	return false;
> > +}
> > +
> >  static char *
> >  version_string(
> >  	xfs_sb_t	*sbp)
> ...
> > @@ -717,8 +836,23 @@ version_f(
> >  			return 0;
> >  		}
> >  
> > +		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
> > +			dbprintf(_("%s: filesystem needs xfs_repair\n"),
> > +				progname);
> > +			return 0;
> > +		}
> > +
> >  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> > -		if (!strcasecmp(argv[1], "extflg")) {
> > +		if (!strcasecmp(argv[1], "needsrepair")) {
> > +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> > +				dbprintf(
> > +		_("needsrepair flag cannot be enabled on pre-V5 filesystems\n"));
> > +				exitcode = 2;
> > +				return 1;
> 
> Hmm.. I see that exitcode 1 means error && xfs_repair while exitcode 2
> means error && !xfs_repair, but I'm still not sure I follow the high
> level error semantics, particularly if we happen to fail updating
> secondary supers. I wonder if it would be more straightforward to have
> xfs_db only return an error when an update attempt occurs and fails and
> then let xfs_admin run xfs_repair if status == 0 && NEEDSREPAIR is set.

Hm.  I'd be even more tempted to make it run if xfs_db just failed.

> I suppose the various other ".. bit already set" or "v5 super required"
> conditions don't really need to be errors and thus repair would only run
> in those cases if NEEDSREPAIR was still set on the fs. Otherwise if
> xfs_db fails we dump an error message and encourage the user to run
> xfs_repair themselves.

Yeah, that does seem more reasonable.  I'll change xfs_admin to force a
run through repair if the NEEDSREPAIR feature is set or if the xfs_db
command failed, since that probably means something's wrong with the fs.

> There are still corner cases I guess, but that does _seem_ a bit more
> elegant to me. Otherwise I suppose a comment somewhere that explains
> when/why to use which error code would be helpful.

<nod> I'll put them in.

--D

> Brian
> 
> > +			}
> > +
> > +			v5features.incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > +		} else if (!strcasecmp(argv[1], "extflg")) {
> >  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
> >  			case XFS_SB_VERSION_1:
> >  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> > @@ -809,6 +943,11 @@ version_f(
> >  			mp->m_sb.sb_versionnum = version;
> >  			mp->m_sb.sb_features2 = features;
> >  		}
> > +
> > +		if (!upgrade_v5_features(mp, &v5features)) {
> > +			exitcode = 1;
> > +			return 1;
> > +		}
> >  	}
> >  
> >  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> > diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> > index bd325da2..0e79bbf9 100755
> > --- a/db/xfs_admin.sh
> > +++ b/db/xfs_admin.sh
> > @@ -7,9 +7,9 @@
> >  status=0
> >  DB_OPTS=""
> >  REPAIR_OPTS=""
> > -USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
> > +USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] [-O v5_feature] device [logdev]"
> >  
> > -while getopts "efjlpuc:L:U:V" c
> > +while getopts "efjlpuc:L:O:U:V" c
> >  do
> >  	case $c in
> >  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> > @@ -19,6 +19,9 @@ do
> >  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
> >  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
> >  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> > +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> > +		# Force repair to run by adding a single space to REPAIR_OPTS
> > +		REPAIR_OPTS="$REPAIR_OPTS ";;
> >  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
> >  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
> >  	V)	xfs_db -p xfs_admin -V
> > @@ -34,6 +37,7 @@ set -- extra $@
> >  shift $OPTIND
> >  case $# in
> >  	1|2)
> > +		status=0
> >  		# Pick up the log device, if present
> >  		if [ -n "$2" ]; then
> >  			DB_OPTS=$DB_OPTS" -l '$2'"
> > @@ -46,7 +50,7 @@ case $# in
> >  			eval xfs_db -x -p xfs_admin $DB_OPTS $1
> >  			status=$?
> >  		fi
> > -		if [ -n "$REPAIR_OPTS" ]
> > +		if [ -n "$REPAIR_OPTS" ] && [ $status -ne 2 ]
> >  		then
> >  			# Hide normal repair output which is sent to stderr
> >  			# assuming the filesystem is fine when a user is
> > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > index 8afc873f..b423981d 100644
> > --- a/man/man8/xfs_admin.8
> > +++ b/man/man8/xfs_admin.8
> > @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
> >  [
> >  .B \-eflpu
> >  ] [
> > +.BI \-O " feature"
> > +] [
> >  .BR "\-c 0" | 1
> >  ] [
> >  .B \-L
> > @@ -103,6 +105,19 @@ The filesystem label can be cleared using the special "\c
> >  " value for
> >  .IR label .
> >  .TP
> > +.BI \-O " feature"
> > +Add a new feature to the filesystem.
> > +Only one feature can be specified at a time.
> > +Features are as follows:
> > +.RS 0.7i
> > +.TP
> > +.B needsrepair
> > +If this is a V5 filesystem, flag the filesystem as needing repairs.
> > +Until
> > +.BR xfs_repair (8)
> > +is run, the filesystem will not be mountable.
> > +.RE
> > +.TP
> >  .BI \-U " uuid"
> >  Set the UUID of the filesystem to
> >  .IR uuid .
> > diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
> > index 58727495..7331cf19 100644
> > --- a/man/man8/xfs_db.8
> > +++ b/man/man8/xfs_db.8
> > @@ -971,6 +971,11 @@ may toggle between
> >  and
> >  .B attr2
> >  at will (older kernels may not support the newer version).
> > +The filesystem can be flagged as requiring a run through
> > +.BR xfs_repair (8)
> > +if the
> > +.B needsrepair
> > +option is specified and the filesystem is formatted with the V5 format.
> >  .IP
> >  If no argument is given, the current version and feature bits are printed.
> >  With one argument, this command will write the updated version number
> > 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2.1 2/2] xfs_repair: clear the needsrepair flag
  2021-01-20 17:38     ` Brian Foster
@ 2021-02-02 22:22       ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2021-02-02 22:22 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, Jan 20, 2021 at 12:38:20PM -0500, Brian Foster wrote:
> On Tue, Jan 19, 2021 at 08:31:28PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Clear the needsrepair flag, since it's used to prevent mounting of an
> > inconsistent filesystem.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > v2.1: only remove needsrepair at the end of the xfs_repair run
> > ---
> >  include/xfs_mount.h |    1 +
> >  libxfs/init.c       |    2 +-
> >  repair/agheader.c   |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  repair/agheader.h   |    2 ++
> >  repair/xfs_repair.c |    4 +++-
> >  5 files changed, 62 insertions(+), 2 deletions(-)
> > 
> ...
> > diff --git a/repair/agheader.c b/repair/agheader.c
> > index 8bb99489..56a7f45c 100644
> > --- a/repair/agheader.c
> > +++ b/repair/agheader.c
> > @@ -220,6 +220,40 @@ compare_sb(xfs_mount_t *mp, xfs_sb_t *sb)
> >  	return(XR_OK);
> >  }
> >  
> > +/* Clear needsrepair after a successful repair run. */
> > +int
> > +clear_needsrepair(
> > +	struct xfs_mount	*mp)
> > +{
> > +	struct xfs_buf		*bp;
> > +	int			error;
> > +
> > +	if (!xfs_sb_version_needsrepair(&mp->m_sb) || no_modify)
> > +		return 0;
> > +
> > +	/* We must succeed at flushing all dirty buffers to disk. */
> > +	error = -libxfs_flush_mount(mp);
> > +	if (error)
> > +		return error;
> > +
> 
> Do we really need a new helper and buf get/relse cycle just to
> incorporate the above flush? ISTM we could either lift the

Yes.  If quotacheck thinks the dquots are bad, we always want to try to
clear the CHKD flags in the primary superblock, even if other disk
writes fail due to IO errors or write verifiers failing, etc.  Note that
libxfs_bcache_flush only pwrite()s the dirty buffers to disk, it doesn't
force the disks to persist to stable media.

However, if NEEDSREPAIR was set and /any/ part of persisting the dirty
buffers fails (write verifier trips, media errors, etc.) then we don't
even want to try to clear NEEDSREPAIR.

Because of that requirement, the two writes have to be separate steps,
separated by a big flush.

--D

> libxfs_bcache_flush() call above the superblock update in the caller,
> insert the libxfs_flush_mount() right after that, and just do:
> 
> 	dsb->sb_features_incompat &= ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> 
> ... in the hunk that also updates the quota flags.
> 
> Though perhaps cleaner would be to keep the helper, but genericize it a
> bit to something like final_sb_update() and fold in the qflags update
> and whatever flush/ordering is required for the feature bit.
> 
> > +	/* Clear needsrepair from the superblock. */
> > +	bp = libxfs_getsb(mp);
> > +	if (!bp)
> > +		return ENOMEM;
> > +	if (bp->b_error) {
> > +		error = bp->b_error;
> > +		libxfs_buf_relse(bp);
> > +		return -error;
> > +	}
> > +
> > +	mp->m_sb.sb_features_incompat &= ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > +
> > +	libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> > +	libxfs_buf_mark_dirty(bp);
> > +	libxfs_buf_relse(bp);
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Possible fields that may have been set at mkfs time,
> >   * sb_inoalignmt, sb_unit, sb_width and sb_dirblklog.
> > @@ -452,6 +486,27 @@ secondary_sb_whack(
> >  			rval |= XR_AG_SB_SEC;
> >  	}
> >  
> > +	if (xfs_sb_version_needsrepair(sb)) {
> > +		if (!no_modify)
> > +			sb->sb_features_incompat &=
> > +					~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> 
> I suspect this should be folded into the check below so we don't modify
> the primary sb by accident (should some other check dirty it).

Yup.  Fixed.

--D

> 
> Brian
> 
> > +		if (i == 0) {
> > +			if (!no_modify)
> > +				do_warn(
> > +	_("clearing needsrepair flag and regenerating metadata\n"));
> > +			else
> > +				do_warn(
> > +	_("would clear needsrepair flag and regenerate metadata\n"));
> > +		} else {
> > +			/*
> > +			 * Quietly clear needsrepair on the secondary supers as
> > +			 * part of ensuring them.  If needsrepair is set on the
> > +			 * primary, it will be done at the very end of repair.
> > +			 */
> > +			rval |= XR_AG_SB_SEC;
> > +		}
> > +	}
> > +
> >  	return(rval);
> >  }
> >  
> > diff --git a/repair/agheader.h b/repair/agheader.h
> > index a63827c8..552c1f70 100644
> > --- a/repair/agheader.h
> > +++ b/repair/agheader.h
> > @@ -82,3 +82,5 @@ typedef struct fs_geo_list  {
> >  #define XR_AG_AGF	0x2
> >  #define XR_AG_AGI	0x4
> >  #define XR_AG_SB_SEC	0x8
> > +
> > +int clear_needsrepair(struct xfs_mount *mp);
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 9409f0d8..d36c5a21 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -1133,7 +1133,9 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
> >  	format_log_max_lsn(mp);
> >  
> >  	/* Report failure if anything failed to get written to our fs. */
> > -	error = -libxfs_umount(mp);
> > +	error = clear_needsrepair(mp);
> > +	if (!error)
> > +		error = -libxfs_umount(mp);
> >  	if (error)
> >  		do_error(
> >  	_("File system metadata writeout failed, err=%d.  Re-run xfs_repair.\n"),
> > 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command
  2021-02-02 21:09     ` Darrick J. Wong
@ 2021-02-03 13:07       ` Brian Foster
  2021-02-03 18:30         ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2021-02-03 13:07 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Tue, Feb 02, 2021 at 01:09:15PM -0800, Darrick J. Wong wrote:
> On Tue, Jan 19, 2021 at 09:37:14AM -0500, Brian Foster wrote:
> > On Fri, Jan 15, 2021 at 05:24:47PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Teach the xfs_db version command about the 'needsrepair' flag, which can
> > > be used to force the system administrator to repair the filesystem with
> > > xfs_repair.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  db/check.c           |    5 ++
> > >  db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
> > >  db/xfs_admin.sh      |   10 ++-
> > >  man/man8/xfs_admin.8 |   15 +++++
> > >  man/man8/xfs_db.8    |    5 ++
> > >  5 files changed, 178 insertions(+), 10 deletions(-)
> > > 
> > > 
...
> > > diff --git a/db/sb.c b/db/sb.c
> > > index d09f653d..fcc2a0ed 100644
> > > --- a/db/sb.c
> > > +++ b/db/sb.c
...
> > > @@ -717,8 +836,23 @@ version_f(
> > >  			return 0;
> > >  		}
> > >  
> > > +		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
> > > +			dbprintf(_("%s: filesystem needs xfs_repair\n"),
> > > +				progname);
> > > +			return 0;
> > > +		}
> > > +
> > >  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> > > -		if (!strcasecmp(argv[1], "extflg")) {
> > > +		if (!strcasecmp(argv[1], "needsrepair")) {
> > > +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> > > +				dbprintf(
> > > +		_("needsrepair flag cannot be enabled on pre-V5 filesystems\n"));
> > > +				exitcode = 2;
> > > +				return 1;
> > 
> > Hmm.. I see that exitcode 1 means error && xfs_repair while exitcode 2
> > means error && !xfs_repair, but I'm still not sure I follow the high
> > level error semantics, particularly if we happen to fail updating
> > secondary supers. I wonder if it would be more straightforward to have
> > xfs_db only return an error when an update attempt occurs and fails and
> > then let xfs_admin run xfs_repair if status == 0 && NEEDSREPAIR is set.
> 
> Hm.  I'd be even more tempted to make it run if xfs_db just failed.
> 
> > I suppose the various other ".. bit already set" or "v5 super required"
> > conditions don't really need to be errors and thus repair would only run
> > in those cases if NEEDSREPAIR was still set on the fs. Otherwise if
> > xfs_db fails we dump an error message and encourage the user to run
> > xfs_repair themselves.
> 
> Yeah, that does seem more reasonable.  I'll change xfs_admin to force a
> run through repair if the NEEDSREPAIR feature is set or if the xfs_db
> command failed, since that probably means something's wrong with the fs.
> 

Perhaps, or the storage if an I/O happens to fail or something. I'm not
sure we should go that route where if the version upgrade happens to
fail we do a "well this operation failed, but something is probably
wrong so let me try and repair that for you." I'd personally be kind of
annoyed by that if I didn't have an opportunity to analyze the problem
before making the decision to run a (potentially destructive) repair
operation. I agree it's an unlikely situation, but IMO the automatically
invoked repair should be isolated to the specific case that warrants it
and everything else should probably just bail out.

The user really doesn't need to know or care that a repair is involved
in the first place, so ISTM that either "upgrade operation succeeded" or
"upgrade operation failed, fs has problems" is fairly expected failure
handling behavior (as opposed to allowing things like "upgrade operation
failed, we ran repair anyways and fixed some other stuff, maybe try that
upgrade again since the last one basically just invoked xfs_repair?").
If we really wanted to be careful, we could even run a repair -n first
and require it succeed before attempting to touch the superblock. That
might be annoying in cases where repair takes forever, but at least if
the user bails on it it would likely be before we've modified anything.

Brian

> > There are still corner cases I guess, but that does _seem_ a bit more
> > elegant to me. Otherwise I suppose a comment somewhere that explains
> > when/why to use which error code would be helpful.
> 
> <nod> I'll put them in.
> 
> --D
> 
> > Brian
> > 
> > > +			}
> > > +
> > > +			v5features.incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > +		} else if (!strcasecmp(argv[1], "extflg")) {
> > >  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
> > >  			case XFS_SB_VERSION_1:
> > >  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> > > @@ -809,6 +943,11 @@ version_f(
> > >  			mp->m_sb.sb_versionnum = version;
> > >  			mp->m_sb.sb_features2 = features;
> > >  		}
> > > +
> > > +		if (!upgrade_v5_features(mp, &v5features)) {
> > > +			exitcode = 1;
> > > +			return 1;
> > > +		}
> > >  	}
> > >  
> > >  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> > > diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> > > index bd325da2..0e79bbf9 100755
> > > --- a/db/xfs_admin.sh
> > > +++ b/db/xfs_admin.sh
> > > @@ -7,9 +7,9 @@
> > >  status=0
> > >  DB_OPTS=""
> > >  REPAIR_OPTS=""
> > > -USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
> > > +USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] [-O v5_feature] device [logdev]"
> > >  
> > > -while getopts "efjlpuc:L:U:V" c
> > > +while getopts "efjlpuc:L:O:U:V" c
> > >  do
> > >  	case $c in
> > >  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> > > @@ -19,6 +19,9 @@ do
> > >  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
> > >  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
> > >  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> > > +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> > > +		# Force repair to run by adding a single space to REPAIR_OPTS
> > > +		REPAIR_OPTS="$REPAIR_OPTS ";;
> > >  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
> > >  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
> > >  	V)	xfs_db -p xfs_admin -V
> > > @@ -34,6 +37,7 @@ set -- extra $@
> > >  shift $OPTIND
> > >  case $# in
> > >  	1|2)
> > > +		status=0
> > >  		# Pick up the log device, if present
> > >  		if [ -n "$2" ]; then
> > >  			DB_OPTS=$DB_OPTS" -l '$2'"
> > > @@ -46,7 +50,7 @@ case $# in
> > >  			eval xfs_db -x -p xfs_admin $DB_OPTS $1
> > >  			status=$?
> > >  		fi
> > > -		if [ -n "$REPAIR_OPTS" ]
> > > +		if [ -n "$REPAIR_OPTS" ] && [ $status -ne 2 ]
> > >  		then
> > >  			# Hide normal repair output which is sent to stderr
> > >  			# assuming the filesystem is fine when a user is
> > > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > > index 8afc873f..b423981d 100644
> > > --- a/man/man8/xfs_admin.8
> > > +++ b/man/man8/xfs_admin.8
> > > @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
> > >  [
> > >  .B \-eflpu
> > >  ] [
> > > +.BI \-O " feature"
> > > +] [
> > >  .BR "\-c 0" | 1
> > >  ] [
> > >  .B \-L
> > > @@ -103,6 +105,19 @@ The filesystem label can be cleared using the special "\c
> > >  " value for
> > >  .IR label .
> > >  .TP
> > > +.BI \-O " feature"
> > > +Add a new feature to the filesystem.
> > > +Only one feature can be specified at a time.
> > > +Features are as follows:
> > > +.RS 0.7i
> > > +.TP
> > > +.B needsrepair
> > > +If this is a V5 filesystem, flag the filesystem as needing repairs.
> > > +Until
> > > +.BR xfs_repair (8)
> > > +is run, the filesystem will not be mountable.
> > > +.RE
> > > +.TP
> > >  .BI \-U " uuid"
> > >  Set the UUID of the filesystem to
> > >  .IR uuid .
> > > diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
> > > index 58727495..7331cf19 100644
> > > --- a/man/man8/xfs_db.8
> > > +++ b/man/man8/xfs_db.8
> > > @@ -971,6 +971,11 @@ may toggle between
> > >  and
> > >  .B attr2
> > >  at will (older kernels may not support the newer version).
> > > +The filesystem can be flagged as requiring a run through
> > > +.BR xfs_repair (8)
> > > +if the
> > > +.B needsrepair
> > > +option is specified and the filesystem is formatted with the V5 format.
> > >  .IP
> > >  If no argument is given, the current version and feature bits are printed.
> > >  With one argument, this command will write the updated version number
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command
  2021-02-03 13:07       ` Brian Foster
@ 2021-02-03 18:30         ` Darrick J. Wong
  2021-02-03 19:09           ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2021-02-03 18:30 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, Feb 03, 2021 at 08:07:21AM -0500, Brian Foster wrote:
> On Tue, Feb 02, 2021 at 01:09:15PM -0800, Darrick J. Wong wrote:
> > On Tue, Jan 19, 2021 at 09:37:14AM -0500, Brian Foster wrote:
> > > On Fri, Jan 15, 2021 at 05:24:47PM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Teach the xfs_db version command about the 'needsrepair' flag, which can
> > > > be used to force the system administrator to repair the filesystem with
> > > > xfs_repair.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  db/check.c           |    5 ++
> > > >  db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
> > > >  db/xfs_admin.sh      |   10 ++-
> > > >  man/man8/xfs_admin.8 |   15 +++++
> > > >  man/man8/xfs_db.8    |    5 ++
> > > >  5 files changed, 178 insertions(+), 10 deletions(-)
> > > > 
> > > > 
> ...
> > > > diff --git a/db/sb.c b/db/sb.c
> > > > index d09f653d..fcc2a0ed 100644
> > > > --- a/db/sb.c
> > > > +++ b/db/sb.c
> ...
> > > > @@ -717,8 +836,23 @@ version_f(
> > > >  			return 0;
> > > >  		}
> > > >  
> > > > +		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
> > > > +			dbprintf(_("%s: filesystem needs xfs_repair\n"),
> > > > +				progname);
> > > > +			return 0;
> > > > +		}
> > > > +
> > > >  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> > > > -		if (!strcasecmp(argv[1], "extflg")) {
> > > > +		if (!strcasecmp(argv[1], "needsrepair")) {
> > > > +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> > > > +				dbprintf(
> > > > +		_("needsrepair flag cannot be enabled on pre-V5 filesystems\n"));
> > > > +				exitcode = 2;
> > > > +				return 1;
> > > 
> > > Hmm.. I see that exitcode 1 means error && xfs_repair while exitcode 2
> > > means error && !xfs_repair, but I'm still not sure I follow the high
> > > level error semantics, particularly if we happen to fail updating
> > > secondary supers. I wonder if it would be more straightforward to have
> > > xfs_db only return an error when an update attempt occurs and fails and
> > > then let xfs_admin run xfs_repair if status == 0 && NEEDSREPAIR is set.
> > 
> > Hm.  I'd be even more tempted to make it run if xfs_db just failed.
> > 
> > > I suppose the various other ".. bit already set" or "v5 super required"
> > > conditions don't really need to be errors and thus repair would only run
> > > in those cases if NEEDSREPAIR was still set on the fs. Otherwise if
> > > xfs_db fails we dump an error message and encourage the user to run
> > > xfs_repair themselves.
> > 
> > Yeah, that does seem more reasonable.  I'll change xfs_admin to force a
> > run through repair if the NEEDSREPAIR feature is set or if the xfs_db
> > command failed, since that probably means something's wrong with the fs.
> > 
> 
> Perhaps, or the storage if an I/O happens to fail or something. I'm not
> sure we should go that route where if the version upgrade happens to
> fail we do a "well this operation failed, but something is probably
> wrong so let me try and repair that for you." I'd personally be kind of
> annoyed by that if I didn't have an opportunity to analyze the problem
> before making the decision to run a (potentially destructive) repair
> operation. I agree it's an unlikely situation, but IMO the automatically
> invoked repair should be isolated to the specific case that warrants it
> and everything else should probably just bail out.

Hm, good point, considering how long it can take to run repair.  I'm
sure there's some sysadmin out there who would actually prefer to nuke
the whole fs/node if the upgrade fails.

> The user really doesn't need to know or care that a repair is involved
> in the first place, so ISTM that either "upgrade operation succeeded" or
> "upgrade operation failed, fs has problems" is fairly expected failure
> handling behavior (as opposed to allowing things like "upgrade operation
> failed, we ran repair anyways and fixed some other stuff, maybe try that
> upgrade again since the last one basically just invoked xfs_repair?").
> If we really wanted to be careful, we could even run a repair -n first
> and require it succeed before attempting to touch the superblock. That
> might be annoying in cases where repair takes forever, but at least if
> the user bails on it it would likely be before we've modified anything.

...or make it more likely that the user ^Cs and never uses our
scurrilous upgrader tool. :)

I dunno, I've a slight preference for /knowing/ that the fs isn't a
crazy tangle of crap before we try to upgrade it.  Though as I've
mentioned before, resize2fs has weird heuristics to guesstimate if a
filesystem is clean "enough", and ext4 even tracks the last fsck and
mount times.  I don't really want to go down that path.

Perhaps we should simply document xfs_repair -n as a preparation step?

--D

> 
> Brian
> 
> > > There are still corner cases I guess, but that does _seem_ a bit more
> > > elegant to me. Otherwise I suppose a comment somewhere that explains
> > > when/why to use which error code would be helpful.
> > 
> > <nod> I'll put them in.
> > 
> > --D
> > 
> > > Brian
> > > 
> > > > +			}
> > > > +
> > > > +			v5features.incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > > +		} else if (!strcasecmp(argv[1], "extflg")) {
> > > >  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
> > > >  			case XFS_SB_VERSION_1:
> > > >  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> > > > @@ -809,6 +943,11 @@ version_f(
> > > >  			mp->m_sb.sb_versionnum = version;
> > > >  			mp->m_sb.sb_features2 = features;
> > > >  		}
> > > > +
> > > > +		if (!upgrade_v5_features(mp, &v5features)) {
> > > > +			exitcode = 1;
> > > > +			return 1;
> > > > +		}
> > > >  	}
> > > >  
> > > >  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> > > > diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> > > > index bd325da2..0e79bbf9 100755
> > > > --- a/db/xfs_admin.sh
> > > > +++ b/db/xfs_admin.sh
> > > > @@ -7,9 +7,9 @@
> > > >  status=0
> > > >  DB_OPTS=""
> > > >  REPAIR_OPTS=""
> > > > -USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
> > > > +USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] [-O v5_feature] device [logdev]"
> > > >  
> > > > -while getopts "efjlpuc:L:U:V" c
> > > > +while getopts "efjlpuc:L:O:U:V" c
> > > >  do
> > > >  	case $c in
> > > >  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> > > > @@ -19,6 +19,9 @@ do
> > > >  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
> > > >  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
> > > >  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> > > > +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> > > > +		# Force repair to run by adding a single space to REPAIR_OPTS
> > > > +		REPAIR_OPTS="$REPAIR_OPTS ";;
> > > >  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
> > > >  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
> > > >  	V)	xfs_db -p xfs_admin -V
> > > > @@ -34,6 +37,7 @@ set -- extra $@
> > > >  shift $OPTIND
> > > >  case $# in
> > > >  	1|2)
> > > > +		status=0
> > > >  		# Pick up the log device, if present
> > > >  		if [ -n "$2" ]; then
> > > >  			DB_OPTS=$DB_OPTS" -l '$2'"
> > > > @@ -46,7 +50,7 @@ case $# in
> > > >  			eval xfs_db -x -p xfs_admin $DB_OPTS $1
> > > >  			status=$?
> > > >  		fi
> > > > -		if [ -n "$REPAIR_OPTS" ]
> > > > +		if [ -n "$REPAIR_OPTS" ] && [ $status -ne 2 ]
> > > >  		then
> > > >  			# Hide normal repair output which is sent to stderr
> > > >  			# assuming the filesystem is fine when a user is
> > > > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > > > index 8afc873f..b423981d 100644
> > > > --- a/man/man8/xfs_admin.8
> > > > +++ b/man/man8/xfs_admin.8
> > > > @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
> > > >  [
> > > >  .B \-eflpu
> > > >  ] [
> > > > +.BI \-O " feature"
> > > > +] [
> > > >  .BR "\-c 0" | 1
> > > >  ] [
> > > >  .B \-L
> > > > @@ -103,6 +105,19 @@ The filesystem label can be cleared using the special "\c
> > > >  " value for
> > > >  .IR label .
> > > >  .TP
> > > > +.BI \-O " feature"
> > > > +Add a new feature to the filesystem.
> > > > +Only one feature can be specified at a time.
> > > > +Features are as follows:
> > > > +.RS 0.7i
> > > > +.TP
> > > > +.B needsrepair
> > > > +If this is a V5 filesystem, flag the filesystem as needing repairs.
> > > > +Until
> > > > +.BR xfs_repair (8)
> > > > +is run, the filesystem will not be mountable.
> > > > +.RE
> > > > +.TP
> > > >  .BI \-U " uuid"
> > > >  Set the UUID of the filesystem to
> > > >  .IR uuid .
> > > > diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
> > > > index 58727495..7331cf19 100644
> > > > --- a/man/man8/xfs_db.8
> > > > +++ b/man/man8/xfs_db.8
> > > > @@ -971,6 +971,11 @@ may toggle between
> > > >  and
> > > >  .B attr2
> > > >  at will (older kernels may not support the newer version).
> > > > +The filesystem can be flagged as requiring a run through
> > > > +.BR xfs_repair (8)
> > > > +if the
> > > > +.B needsrepair
> > > > +option is specified and the filesystem is formatted with the V5 format.
> > > >  .IP
> > > >  If no argument is given, the current version and feature bits are printed.
> > > >  With one argument, this command will write the updated version number
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command
  2021-02-03 18:30         ` Darrick J. Wong
@ 2021-02-03 19:09           ` Brian Foster
  0 siblings, 0 replies; 18+ messages in thread
From: Brian Foster @ 2021-02-03 19:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, Feb 03, 2021 at 10:30:57AM -0800, Darrick J. Wong wrote:
> On Wed, Feb 03, 2021 at 08:07:21AM -0500, Brian Foster wrote:
> > On Tue, Feb 02, 2021 at 01:09:15PM -0800, Darrick J. Wong wrote:
> > > On Tue, Jan 19, 2021 at 09:37:14AM -0500, Brian Foster wrote:
> > > > On Fri, Jan 15, 2021 at 05:24:47PM -0800, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > > 
> > > > > Teach the xfs_db version command about the 'needsrepair' flag, which can
> > > > > be used to force the system administrator to repair the filesystem with
> > > > > xfs_repair.
> > > > > 
> > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > > ---
> > > > >  db/check.c           |    5 ++
> > > > >  db/sb.c              |  153 ++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > >  db/xfs_admin.sh      |   10 ++-
> > > > >  man/man8/xfs_admin.8 |   15 +++++
> > > > >  man/man8/xfs_db.8    |    5 ++
> > > > >  5 files changed, 178 insertions(+), 10 deletions(-)
> > > > > 
> > > > > 
> > ...
> > > > > diff --git a/db/sb.c b/db/sb.c
> > > > > index d09f653d..fcc2a0ed 100644
> > > > > --- a/db/sb.c
> > > > > +++ b/db/sb.c
> > ...
> > > > > @@ -717,8 +836,23 @@ version_f(
> > > > >  			return 0;
> > > > >  		}
> > > > >  
> > > > > +		if (xfs_sb_version_needsrepair(&mp->m_sb)) {
> > > > > +			dbprintf(_("%s: filesystem needs xfs_repair\n"),
> > > > > +				progname);
> > > > > +			return 0;
> > > > > +		}
> > > > > +
> > > > >  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> > > > > -		if (!strcasecmp(argv[1], "extflg")) {
> > > > > +		if (!strcasecmp(argv[1], "needsrepair")) {
> > > > > +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> > > > > +				dbprintf(
> > > > > +		_("needsrepair flag cannot be enabled on pre-V5 filesystems\n"));
> > > > > +				exitcode = 2;
> > > > > +				return 1;
> > > > 
> > > > Hmm.. I see that exitcode 1 means error && xfs_repair while exitcode 2
> > > > means error && !xfs_repair, but I'm still not sure I follow the high
> > > > level error semantics, particularly if we happen to fail updating
> > > > secondary supers. I wonder if it would be more straightforward to have
> > > > xfs_db only return an error when an update attempt occurs and fails and
> > > > then let xfs_admin run xfs_repair if status == 0 && NEEDSREPAIR is set.
> > > 
> > > Hm.  I'd be even more tempted to make it run if xfs_db just failed.
> > > 
> > > > I suppose the various other ".. bit already set" or "v5 super required"
> > > > conditions don't really need to be errors and thus repair would only run
> > > > in those cases if NEEDSREPAIR was still set on the fs. Otherwise if
> > > > xfs_db fails we dump an error message and encourage the user to run
> > > > xfs_repair themselves.
> > > 
> > > Yeah, that does seem more reasonable.  I'll change xfs_admin to force a
> > > run through repair if the NEEDSREPAIR feature is set or if the xfs_db
> > > command failed, since that probably means something's wrong with the fs.
> > > 
> > 
> > Perhaps, or the storage if an I/O happens to fail or something. I'm not
> > sure we should go that route where if the version upgrade happens to
> > fail we do a "well this operation failed, but something is probably
> > wrong so let me try and repair that for you." I'd personally be kind of
> > annoyed by that if I didn't have an opportunity to analyze the problem
> > before making the decision to run a (potentially destructive) repair
> > operation. I agree it's an unlikely situation, but IMO the automatically
> > invoked repair should be isolated to the specific case that warrants it
> > and everything else should probably just bail out.
> 
> Hm, good point, considering how long it can take to run repair.  I'm
> sure there's some sysadmin out there who would actually prefer to nuke
> the whole fs/node if the upgrade fails.
> 

I don't know if there are any likely scenarios where a user might throw
a feature upgrade attempt at a borked fs only to find it falling into
repair for some other reason, but I'd rather not hear about those cases
from a support perspective, whether it be complaints about feature
upgrade taking forever, confusion over why a feature upgrade failed but
then spit out some other repair related errors, losing the ability to
gather repair -n output, a metadump, etc. We do rarely have cases where
users are surprised a repair throws out a bunch of directory entries or
makes other big and unexpected changes to the fs because they don't
quite understand that repair != data recovery. With those types of
support cases in mind, I just think less can go wrong or down an
unexpected path if the upgrade repair logic is very explicitly tied to
feature bit upgrade success && needsrepair.

> > The user really doesn't need to know or care that a repair is involved
> > in the first place, so ISTM that either "upgrade operation succeeded" or
> > "upgrade operation failed, fs has problems" is fairly expected failure
> > handling behavior (as opposed to allowing things like "upgrade operation
> > failed, we ran repair anyways and fixed some other stuff, maybe try that
> > upgrade again since the last one basically just invoked xfs_repair?").
> > If we really wanted to be careful, we could even run a repair -n first
> > and require it succeed before attempting to touch the superblock. That
> > might be annoying in cases where repair takes forever, but at least if
> > the user bails on it it would likely be before we've modified anything.
> 
> ...or make it more likely that the user ^Cs and never uses our
> scurrilous upgrader tool. :)
> 

Heh. :P

> I dunno, I've a slight preference for /knowing/ that the fs isn't a
> crazy tangle of crap before we try to upgrade it.  Though as I've
> mentioned before, resize2fs has weird heuristics to guesstimate if a
> filesystem is clean "enough", and ext4 even tracks the last fsck and
> mount times.  I don't really want to go down that path.
> 
> Perhaps we should simply document xfs_repair -n as a preparation step?
> 

Yeah, it's probably a good idea to put that in writing. I don't have a
strong opinion on doing the pre-update repair thing. I was just throwing
it out there as a thought. Users may not be aware that a repair is part
of the normal procedure in the first place, so documenting this might be
helpful for those responsible enough to follow best practices.

Brian

> --D
> 
> > 
> > Brian
> > 
> > > > There are still corner cases I guess, but that does _seem_ a bit more
> > > > elegant to me. Otherwise I suppose a comment somewhere that explains
> > > > when/why to use which error code would be helpful.
> > > 
> > > <nod> I'll put them in.
> > > 
> > > --D
> > > 
> > > > Brian
> > > > 
> > > > > +			}
> > > > > +
> > > > > +			v5features.incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > > > +		} else if (!strcasecmp(argv[1], "extflg")) {
> > > > >  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
> > > > >  			case XFS_SB_VERSION_1:
> > > > >  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> > > > > @@ -809,6 +943,11 @@ version_f(
> > > > >  			mp->m_sb.sb_versionnum = version;
> > > > >  			mp->m_sb.sb_features2 = features;
> > > > >  		}
> > > > > +
> > > > > +		if (!upgrade_v5_features(mp, &v5features)) {
> > > > > +			exitcode = 1;
> > > > > +			return 1;
> > > > > +		}
> > > > >  	}
> > > > >  
> > > > >  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> > > > > diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> > > > > index bd325da2..0e79bbf9 100755
> > > > > --- a/db/xfs_admin.sh
> > > > > +++ b/db/xfs_admin.sh
> > > > > @@ -7,9 +7,9 @@
> > > > >  status=0
> > > > >  DB_OPTS=""
> > > > >  REPAIR_OPTS=""
> > > > > -USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
> > > > > +USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] [-O v5_feature] device [logdev]"
> > > > >  
> > > > > -while getopts "efjlpuc:L:U:V" c
> > > > > +while getopts "efjlpuc:L:O:U:V" c
> > > > >  do
> > > > >  	case $c in
> > > > >  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> > > > > @@ -19,6 +19,9 @@ do
> > > > >  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
> > > > >  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
> > > > >  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> > > > > +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> > > > > +		# Force repair to run by adding a single space to REPAIR_OPTS
> > > > > +		REPAIR_OPTS="$REPAIR_OPTS ";;
> > > > >  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
> > > > >  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
> > > > >  	V)	xfs_db -p xfs_admin -V
> > > > > @@ -34,6 +37,7 @@ set -- extra $@
> > > > >  shift $OPTIND
> > > > >  case $# in
> > > > >  	1|2)
> > > > > +		status=0
> > > > >  		# Pick up the log device, if present
> > > > >  		if [ -n "$2" ]; then
> > > > >  			DB_OPTS=$DB_OPTS" -l '$2'"
> > > > > @@ -46,7 +50,7 @@ case $# in
> > > > >  			eval xfs_db -x -p xfs_admin $DB_OPTS $1
> > > > >  			status=$?
> > > > >  		fi
> > > > > -		if [ -n "$REPAIR_OPTS" ]
> > > > > +		if [ -n "$REPAIR_OPTS" ] && [ $status -ne 2 ]
> > > > >  		then
> > > > >  			# Hide normal repair output which is sent to stderr
> > > > >  			# assuming the filesystem is fine when a user is
> > > > > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > > > > index 8afc873f..b423981d 100644
> > > > > --- a/man/man8/xfs_admin.8
> > > > > +++ b/man/man8/xfs_admin.8
> > > > > @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
> > > > >  [
> > > > >  .B \-eflpu
> > > > >  ] [
> > > > > +.BI \-O " feature"
> > > > > +] [
> > > > >  .BR "\-c 0" | 1
> > > > >  ] [
> > > > >  .B \-L
> > > > > @@ -103,6 +105,19 @@ The filesystem label can be cleared using the special "\c
> > > > >  " value for
> > > > >  .IR label .
> > > > >  .TP
> > > > > +.BI \-O " feature"
> > > > > +Add a new feature to the filesystem.
> > > > > +Only one feature can be specified at a time.
> > > > > +Features are as follows:
> > > > > +.RS 0.7i
> > > > > +.TP
> > > > > +.B needsrepair
> > > > > +If this is a V5 filesystem, flag the filesystem as needing repairs.
> > > > > +Until
> > > > > +.BR xfs_repair (8)
> > > > > +is run, the filesystem will not be mountable.
> > > > > +.RE
> > > > > +.TP
> > > > >  .BI \-U " uuid"
> > > > >  Set the UUID of the filesystem to
> > > > >  .IR uuid .
> > > > > diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
> > > > > index 58727495..7331cf19 100644
> > > > > --- a/man/man8/xfs_db.8
> > > > > +++ b/man/man8/xfs_db.8
> > > > > @@ -971,6 +971,11 @@ may toggle between
> > > > >  and
> > > > >  .B attr2
> > > > >  at will (older kernels may not support the newer version).
> > > > > +The filesystem can be flagged as requiring a run through
> > > > > +.BR xfs_repair (8)
> > > > > +if the
> > > > > +.B needsrepair
> > > > > +option is specified and the filesystem is formatted with the V5 format.
> > > > >  .IP
> > > > >  If no argument is given, the current version and feature bits are printed.
> > > > >  With one argument, this command will write the updated version number
> > > > > 
> > > > 
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-02-03 19:11 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-16  1:24 [PATCHSET v2 0/2] xfs: add the ability to flag a fs for repair Darrick J. Wong
2021-01-16  1:24 ` [PATCH 1/2] xfs_db: support the needsrepair feature flag in the version command Darrick J. Wong
2021-01-19 14:37   ` Brian Foster
2021-02-02 21:09     ` Darrick J. Wong
2021-02-03 13:07       ` Brian Foster
2021-02-03 18:30         ` Darrick J. Wong
2021-02-03 19:09           ` Brian Foster
2021-01-16  1:24 ` [PATCH 2/2] xfs_repair: clear the needsrepair flag Darrick J. Wong
2021-01-19 14:37   ` Brian Foster
2021-01-19 18:15     ` Darrick J. Wong
2021-01-19 19:44       ` Brian Foster
2021-01-19 20:31         ` Darrick J. Wong
2021-01-19 23:49           ` Darrick J. Wong
2021-01-20  4:31   ` [PATCH v2.1 " Darrick J. Wong
2021-01-20 17:38     ` Brian Foster
2021-02-02 22:22       ` Darrick J. Wong
2021-01-20  4:31 ` [PATCH 3/2] xfs_repair: fix unmount error message to have a newline Darrick J. Wong
2021-01-20 17:38   ` Brian Foster

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).