Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/1] xfsprogs: online health tracking support
@ 2019-09-06  3:33 Darrick J. Wong
  2019-09-06  3:33 ` [PATCH 1/1] xfs_spaceman: report health problems Darrick J. Wong
  0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:33 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

This series adds online health tracking capabilities to XFS, which
enables userspace to discover if any metadata corruptions have been
found (and not fixed) within a given class of metadata.

Reporting to userspace is handled by three ioctl modifications:
enhancements of the existing fs geometry ioctl to include a health
field; enhancement of the existing bulkstat ioctl to report health, and
a totally new ioctl to report allocation group geometry and status.

At this point we've merged everything but the changes to spaceman.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=health-tracking

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=health-tracking

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=health-tracking

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/1] xfs_spaceman: report health problems
  2019-09-06  3:33 [PATCH 0/1] xfsprogs: online health tracking support Darrick J. Wong
@ 2019-09-06  3:33 ` Darrick J. Wong
  2019-09-09 23:28   ` Dave Chinner
  2019-09-16 19:29   ` [PATCH v2 " Darrick J. Wong
  0 siblings, 2 replies; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:33 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the fs and ag geometry ioctls to report health problems to users.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/fsgeom.c        |   16 ++
 libfrog/fsgeom.h        |    1 
 man/man8/xfs_spaceman.8 |   28 +++
 spaceman/Makefile       |    2 
 spaceman/health.c       |  459 +++++++++++++++++++++++++++++++++++++++++++++++
 spaceman/init.c         |    1 
 spaceman/space.h        |    1 
 7 files changed, 507 insertions(+), 1 deletion(-)
 create mode 100644 spaceman/health.c


diff --git a/libfrog/fsgeom.c b/libfrog/fsgeom.c
index 631286cd..3ea91e3f 100644
--- a/libfrog/fsgeom.c
+++ b/libfrog/fsgeom.c
@@ -159,3 +159,19 @@ xfd_close(
 
 	return 0;
 }
+
+/* Try to obtain an AG's geometry.  Returns zero or a positive error code. */
+int
+xfrog_ag_geometry(
+	int			fd,
+	unsigned int		agno,
+	struct xfs_ag_geometry	*ageo)
+{
+	int			ret;
+
+	ageo->ag_number = agno;
+	ret = ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo);
+	if (ret)
+		return errno;
+	return 0;
+}
diff --git a/libfrog/fsgeom.h b/libfrog/fsgeom.h
index 5dcfc1bb..55b14c2b 100644
--- a/libfrog/fsgeom.h
+++ b/libfrog/fsgeom.h
@@ -8,6 +8,7 @@
 void xfs_report_geom(struct xfs_fsop_geom *geo, const char *mntpoint,
 		const char *logname, const char *rtname);
 int xfrog_geometry(int fd, struct xfs_fsop_geom *fsgeo);
+int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo);
 
 /*
  * Structure for recording whatever observations we want about the level of
diff --git a/man/man8/xfs_spaceman.8 b/man/man8/xfs_spaceman.8
index 12dd04e4..ece840d7 100644
--- a/man/man8/xfs_spaceman.8
+++ b/man/man8/xfs_spaceman.8
@@ -91,6 +91,34 @@ The output will have the same format that
 .BR "xfs_info" "(8)"
 prints when querying a filesystem.
 .TP
+.BI "health [ \-a agno] [ \-c ] [ \-f ] [ \-i inum ] [ \-q ] [ paths ]"
+Reports the health of the given group of filesystem metadata.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.B \-a agno
+Report on the health of the given allocation group.
+.TP
+.B \-c
+Scan all inodes in the filesystem and report each file's health status.
+If the
+.B \-a
+option is given, scan only the inodes in that AG.
+.TP
+.B \-f
+Report on the health of metadata that affect the entire filesystem.
+.TP
+.B \-i inum
+Report on the health of a specific inode.
+.TP
+.B \-q
+Report only unhealthy metadata.
+.TP
+.B paths
+Report on the health of the files at the given path.
+.PD
+.RE
+.TP
 .BR "help [ " command " ]"
 Display a brief description of one or all commands.
 .TP
diff --git a/spaceman/Makefile b/spaceman/Makefile
index b1c1b16d..d01aa74a 100644
--- a/spaceman/Makefile
+++ b/spaceman/Makefile
@@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs
 
 LTCOMMAND = xfs_spaceman
 HFILES = init.h space.h
-CFILES = info.c init.c file.c prealloc.c trim.c
+CFILES = info.c init.c file.c health.c prealloc.c trim.c
 LSRCFILES = xfs_info.sh
 
 LLDLIBS = $(LIBXCMD) $(LIBFROG)
diff --git a/spaceman/health.c b/spaceman/health.c
new file mode 100644
index 00000000..c3575b8e
--- /dev/null
+++ b/spaceman/health.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2019 Oracle.
+ * All Rights Reserved.
+ */
+#include "platform_defs.h"
+#include "libxfs.h"
+#include "command.h"
+#include "init.h"
+#include "input.h"
+#include "libfrog/paths.h"
+#include "libfrog/fsgeom.h"
+#include "libfrog/bulkstat.h"
+#include "space.h"
+
+static cmdinfo_t health_cmd;
+static unsigned long long reported;
+static bool comprehensive;
+static bool quiet;
+
+static bool has_realtime(const struct xfs_fsop_geom *g)
+{
+	return g->rtblocks > 0;
+}
+
+static bool has_finobt(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_FINOBT;
+}
+
+static bool has_rmapbt(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_RMAPBT;
+}
+
+static bool has_reflink(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_REFLINK;
+}
+
+struct flag_map {
+	unsigned int		mask;
+	bool			(*has_fn)(const struct xfs_fsop_geom *g);
+	const char		*descr;
+};
+
+static const struct flag_map fs_flags[] = {
+	{
+		.mask = XFS_FSOP_GEOM_SICK_COUNTERS,
+		.descr = "summary counters",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_UQUOTA,
+		.descr = "user quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_GQUOTA,
+		.descr = "group quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_PQUOTA,
+		.descr = "project quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_RT_BITMAP,
+		.descr = "realtime bitmap",
+		.has_fn = has_realtime,
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_RT_SUMMARY,
+		.descr = "realtime summary",
+		.has_fn = has_realtime,
+	},
+	{0},
+};
+
+static const struct flag_map ag_flags[] = {
+	{
+		.mask = XFS_AG_GEOM_SICK_SB,
+		.descr = "superblock",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGF,
+		.descr = "AGF header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGFL,
+		.descr = "AGFL header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGI,
+		.descr = "AGI header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_BNOBT,
+		.descr = "free space by block btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_CNTBT,
+		.descr = "free space by length btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_INOBT,
+		.descr = "inode btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_FINOBT,
+		.descr = "free inode btree",
+		.has_fn = has_finobt,
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_RMAPBT,
+		.descr = "reverse mappings btree",
+		.has_fn = has_rmapbt,
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_REFCNTBT,
+		.descr = "reference count btree",
+		.has_fn = has_reflink,
+	},
+	{0},
+};
+
+static const struct flag_map inode_flags[] = {
+	{
+		.mask = XFS_BS_SICK_INODE,
+		.descr = "inode core",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTD,
+		.descr = "data fork",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTA,
+		.descr = "extended attribute fork",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTC,
+		.descr = "copy on write fork",
+	},
+	{
+		.mask = XFS_BS_SICK_DIR,
+		.descr = "directory",
+	},
+	{
+		.mask = XFS_BS_SICK_XATTR,
+		.descr = "extended attributes",
+	},
+	{
+		.mask = XFS_BS_SICK_SYMLINK,
+		.descr = "symbolic link target",
+	},
+	{
+		.mask = XFS_BS_SICK_PARENT,
+		.descr = "parent pointers",
+	},
+	{0},
+};
+
+/* Convert a flag mask to a report. */
+static void
+report_sick(
+	const char			*descr,
+	const struct flag_map		*maps,
+	unsigned int			sick,
+	unsigned int			checked)
+{
+	const struct flag_map		*f;
+	bool				bad;
+
+	for (f = maps; f->mask != 0; f++) {
+		if (f->has_fn && !f->has_fn(&file->xfd.fsgeom))
+			continue;
+		if (!(checked & f->mask))
+			continue;
+		reported++;
+		bad = sick & f->mask;
+		if (!bad && quiet)
+			continue;
+		printf("%s %s: %s\n", descr, _(f->descr),
+				bad ? _("unhealthy") : _("ok"));
+	}
+}
+
+/* Report on an AG's health. */
+static int
+report_ag_sick(
+	xfs_agnumber_t		agno)
+{
+	struct xfs_ag_geometry	ageo = { 0 };
+	char			descr[256];
+	int			ret;
+
+	ret = xfrog_ag_geometry(file->xfd.fd, agno, &ageo);
+	if (ret) {
+		errno = ret;
+		perror("ag_geometry");
+		return 1;
+	}
+	snprintf(descr, sizeof(descr) - 1, _("AG %u"), agno);
+	report_sick(descr, ag_flags, ageo.ag_sick, ageo.ag_checked);
+	return 0;
+}
+
+/* Report on an inode's health. */
+static int
+report_inode_health(
+	unsigned long long	ino,
+	const char		*descr)
+{
+	struct xfs_bstat	bs;
+	char			d[256];
+	int			ret;
+
+	if (!descr) {
+		snprintf(d, sizeof(d) - 1, _("inode %llu"), ino);
+		descr = d;
+	}
+
+	ret = xfrog_bulkstat_single(&file->xfd, ino, &bs);
+	if (ret) {
+		errno = ret;
+		perror(descr);
+		return 1;
+	}
+
+	report_sick(descr, inode_flags, bs.bs_sick, bs.bs_checked);
+	return 0;
+}
+
+/* Report on a file's health. */
+static int
+report_file_health(
+	const char	*path)
+{
+	struct stat	stata, statb;
+	int		ret;
+
+	ret = lstat(path, &statb);
+	if (ret) {
+		perror(path);
+		return 1;
+	}
+
+	ret = fstat(file->xfd.fd, &stata);
+	if (ret) {
+		perror(file->name);
+		return 1;
+	}
+
+	if (stata.st_dev != statb.st_dev) {
+		fprintf(stderr, _("%s: not on the open filesystem"), path);
+		return 1;
+	}
+
+	return report_inode_health(statb.st_ino, path);
+}
+
+#define BULKSTAT_NR		(128)
+
+/*
+ * Report on all files' health for a given @agno.  If @agno is NULLAGNUMBER,
+ * report on all files in the filesystem.
+ */
+static int
+report_bulkstat_health(
+	xfs_agnumber_t		agno)
+{
+	struct xfs_bstat	bstat[BULKSTAT_NR];
+	char			descr[256];
+	uint64_t		startino = 0;
+	uint64_t		lastino = -1ULL;
+	uint32_t		ocount;
+	uint32_t		i;
+	int			error;
+
+	if (agno != NULLAGNUMBER) {
+		startino = cvt_agino_to_ino(&file->xfd, agno, 0);
+		lastino = cvt_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
+	}
+
+	while ((error = xfrog_bulkstat(&file->xfd, &startino, BULKSTAT_NR,
+			bstat, &ocount) == 0) && ocount > 0) {
+		for (i = 0; i < ocount; i++) {
+			if (bstat[i].bs_ino > lastino)
+				goto out;
+			snprintf(descr, sizeof(descr) - 1, _("inode %llu"),
+					bstat[i].bs_ino);
+			report_sick(descr, inode_flags, bstat[i].bs_sick,
+					bstat[i].bs_checked);
+		}
+	}
+	if (error) {
+		errno = error;
+		perror("bulkstat");
+	}
+out:
+	return error;
+}
+
+#define OPT_STRING ("a:cfi:q")
+
+/* Report on health problems in XFS filesystem. */
+static int
+health_f(
+	int			argc,
+	char			**argv)
+{
+	unsigned long long	x;
+	xfs_agnumber_t		agno;
+	bool			default_report = true;
+	int			c;
+	int			ret;
+
+	reported = 0;
+
+	if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) {
+		perror("health");
+		return 1;
+	}
+
+	/* Set our reporting options appropriately in the first pass. */
+	while ((c = getopt(argc, argv, OPT_STRING)) != EOF) {
+		switch (c) {
+		case 'a':
+			default_report = false;
+			errno = 0;
+			x = strtoll(optarg, NULL, 10);
+			if (!errno && x >= NULLAGNUMBER)
+				errno = ERANGE;
+			if (errno) {
+				perror("ag health");
+				return 1;
+			}
+			break;
+		case 'c':
+			comprehensive = true;
+			break;
+		case 'f':
+			default_report = false;
+			break;
+		case 'i':
+			default_report = false;
+			errno = 0;
+			x = strtoll(optarg, NULL, 10);
+			if (errno) {
+				perror("inode health");
+				return 1;
+			}
+			break;
+		case 'q':
+			quiet = true;
+			break;
+		default:
+			return command_usage(&health_cmd);
+		}
+	}
+	if (optind < argc)
+		default_report = false;
+
+	/* Reparse arguments, this time for reporting actions. */
+	optind = 1;
+	while ((c = getopt(argc, argv, OPT_STRING)) != EOF) {
+		switch (c) {
+		case 'a':
+			agno = strtoll(optarg, NULL, 10);
+			ret = report_ag_sick(agno);
+			if (!ret && comprehensive)
+				ret = report_bulkstat_health(agno);
+			if (ret)
+				return 1;
+			break;
+		case 'f':
+			report_sick(_("filesystem"), fs_flags,
+					file->xfd.fsgeom.sick,
+					file->xfd.fsgeom.checked);
+			if (comprehensive) {
+				ret = report_bulkstat_health(NULLAGNUMBER);
+				if (ret)
+					return 1;
+			}
+			break;
+		case 'i':
+			x = strtoll(optarg, NULL, 10);
+			ret = report_inode_health(x, NULL);
+			if (ret)
+				return 1;
+			break;
+		default:
+			break;
+		}
+	}
+
+	for (c = optind; c < argc; c++) {
+		ret = report_file_health(argv[c]);
+		if (ret)
+			return 1;
+	}
+
+	/* No arguments gets us a summary of fs state. */
+	if (default_report) {
+		report_sick(_("filesystem"), fs_flags, file->xfd.fsgeom.sick,
+				file->xfd.fsgeom.checked);
+
+		for (agno = 0; agno < file->xfd.fsgeom.agcount; agno++) {
+			ret = report_ag_sick(agno);
+			if (ret)
+				return 1;
+		}
+		if (comprehensive) {
+			ret = report_bulkstat_health(NULLAGNUMBER);
+			if (ret)
+				return 1;
+		}
+	}
+
+	if (!reported) {
+		fprintf(stderr,
+_("Health status has not been collected for this filesystem.\n"));
+		fprintf(stderr,
+_("Please run xfs_scrub(8) to remedy this situation.\n"));
+	}
+
+	return 0;
+}
+
+static void
+health_help(void)
+{
+	printf(_(
+"\n"
+"Report all observed filesystem health problems.\n"
+"\n"
+" -a agno  -- Report health of the given allocation group.\n"
+" -c       -- Report on the health of all inodes.\n"
+" -f       -- Report health of the overall filesystem.\n"
+" -i inum  -- Report health of a given inode number.\n"
+" -q       -- Only report unhealthy metadata.\n"
+" paths    -- Report health of the given file path.\n"
+"\n"));
+
+}
+
+static cmdinfo_t health_cmd = {
+	.name = "health",
+	.cfunc = health_f,
+	.argmin = 0,
+	.argmax = -1,
+	.args = "[-a agno] [-c] [-f] [-i inum] [-q] [paths]",
+	.flags = CMD_FLAG_ONESHOT,
+	.help = health_help,
+};
+
+void
+health_init(void)
+{
+	health_cmd.oneline = _("Report observed XFS health problems."),
+	add_command(&health_cmd);
+}
diff --git a/spaceman/init.c b/spaceman/init.c
index 4afdb386..cf1ff3cb 100644
--- a/spaceman/init.c
+++ b/spaceman/init.c
@@ -34,6 +34,7 @@ init_commands(void)
 	quit_init();
 	trim_init();
 	freesp_init();
+	health_init();
 }
 
 static int
diff --git a/spaceman/space.h b/spaceman/space.h
index 2c26884a..723209ed 100644
--- a/spaceman/space.h
+++ b/spaceman/space.h
@@ -32,5 +32,6 @@ extern void	freesp_init(void);
 # define freesp_init()	do { } while (0)
 #endif
 extern void	info_init(void);
+extern void	health_init(void);
 
 #endif /* XFS_SPACEMAN_SPACE_H_ */


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] xfs_spaceman: report health problems
  2019-09-06  3:33 ` [PATCH 1/1] xfs_spaceman: report health problems Darrick J. Wong
@ 2019-09-09 23:28   ` Dave Chinner
  2019-09-16 19:29   ` [PATCH v2 " Darrick J. Wong
  1 sibling, 0 replies; 7+ messages in thread
From: Dave Chinner @ 2019-09-09 23:28 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Thu, Sep 05, 2019 at 08:33:17PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Use the fs and ag geometry ioctls to report health problems to users.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  libfrog/fsgeom.c        |   16 ++
>  libfrog/fsgeom.h        |    1 
>  man/man8/xfs_spaceman.8 |   28 +++
>  spaceman/Makefile       |    2 
>  spaceman/health.c       |  459 +++++++++++++++++++++++++++++++++++++++++++++++
>  spaceman/init.c         |    1 
>  spaceman/space.h        |    1 
>  7 files changed, 507 insertions(+), 1 deletion(-)
>  create mode 100644 spaceman/health.c

Looks good. Minor nit below, but otherwise:

Reviewed-by: Dave Chinner <dchinner@redhat.com>

> +static int
> +report_bulkstat_health(
> +	xfs_agnumber_t		agno)
> +{
> +	struct xfs_bstat	bstat[BULKSTAT_NR];
> +	char			descr[256];
> +	uint64_t		startino = 0;
> +	uint64_t		lastino = -1ULL;
> +	uint32_t		ocount;
> +	uint32_t		i;
> +	int			error;
> +
> +	if (agno != NULLAGNUMBER) {
> +		startino = cvt_agino_to_ino(&file->xfd, agno, 0);
> +		lastino = cvt_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
> +	}
> +
> +	while ((error = xfrog_bulkstat(&file->xfd, &startino, BULKSTAT_NR,
> +			bstat, &ocount) == 0) && ocount > 0) {
> +		for (i = 0; i < ocount; i++) {
> +			if (bstat[i].bs_ino > lastino)
> +				goto out;
> +			snprintf(descr, sizeof(descr) - 1, _("inode %llu"),
> +					bstat[i].bs_ino);
> +			report_sick(descr, inode_flags, bstat[i].bs_sick,
> +					bstat[i].bs_checked);
> +		}
> +	}

This could be done as a do { } while loop:

	do {
		error = xfrog_bulkstat(&file->xfd, &startino, BULKSTAT_NR,
					bstat, &ocount);
		if (error)
			break;
		for (i = 0; i < ocount; i++) {
			[....]
		}
	} while (ocount > 0);

This could be done as a followup patch as it's not critical, just
a little bit of cleanup...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/1] xfs_spaceman: report health problems
  2019-09-06  3:33 ` [PATCH 1/1] xfs_spaceman: report health problems Darrick J. Wong
  2019-09-09 23:28   ` Dave Chinner
@ 2019-09-16 19:29   ` " Darrick J. Wong
  1 sibling, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-16 19:29 UTC (permalink / raw)
  To: sandeen, Dave Chinner; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the fs and ag geometry ioctls to report health problems to users.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: refactor the bulkstat loop per dchinner suggestion
---
 libfrog/fsgeom.c        |   16 ++
 libfrog/fsgeom.h        |    1 
 man/man8/xfs_spaceman.8 |   28 +++
 spaceman/Makefile       |    2 
 spaceman/health.c       |  463 +++++++++++++++++++++++++++++++++++++++++++++++
 spaceman/init.c         |    1 
 spaceman/space.h        |    1 
 7 files changed, 511 insertions(+), 1 deletion(-)
 create mode 100644 spaceman/health.c

diff --git a/libfrog/fsgeom.c b/libfrog/fsgeom.c
index 631286cd..3ea91e3f 100644
--- a/libfrog/fsgeom.c
+++ b/libfrog/fsgeom.c
@@ -159,3 +159,19 @@ xfd_close(
 
 	return 0;
 }
+
+/* Try to obtain an AG's geometry.  Returns zero or a positive error code. */
+int
+xfrog_ag_geometry(
+	int			fd,
+	unsigned int		agno,
+	struct xfs_ag_geometry	*ageo)
+{
+	int			ret;
+
+	ageo->ag_number = agno;
+	ret = ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo);
+	if (ret)
+		return errno;
+	return 0;
+}
diff --git a/libfrog/fsgeom.h b/libfrog/fsgeom.h
index 5dcfc1bb..55b14c2b 100644
--- a/libfrog/fsgeom.h
+++ b/libfrog/fsgeom.h
@@ -8,6 +8,7 @@
 void xfs_report_geom(struct xfs_fsop_geom *geo, const char *mntpoint,
 		const char *logname, const char *rtname);
 int xfrog_geometry(int fd, struct xfs_fsop_geom *fsgeo);
+int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo);
 
 /*
  * Structure for recording whatever observations we want about the level of
diff --git a/man/man8/xfs_spaceman.8 b/man/man8/xfs_spaceman.8
index 12dd04e4..ece840d7 100644
--- a/man/man8/xfs_spaceman.8
+++ b/man/man8/xfs_spaceman.8
@@ -91,6 +91,34 @@ The output will have the same format that
 .BR "xfs_info" "(8)"
 prints when querying a filesystem.
 .TP
+.BI "health [ \-a agno] [ \-c ] [ \-f ] [ \-i inum ] [ \-q ] [ paths ]"
+Reports the health of the given group of filesystem metadata.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.B \-a agno
+Report on the health of the given allocation group.
+.TP
+.B \-c
+Scan all inodes in the filesystem and report each file's health status.
+If the
+.B \-a
+option is given, scan only the inodes in that AG.
+.TP
+.B \-f
+Report on the health of metadata that affect the entire filesystem.
+.TP
+.B \-i inum
+Report on the health of a specific inode.
+.TP
+.B \-q
+Report only unhealthy metadata.
+.TP
+.B paths
+Report on the health of the files at the given path.
+.PD
+.RE
+.TP
 .BR "help [ " command " ]"
 Display a brief description of one or all commands.
 .TP
diff --git a/spaceman/Makefile b/spaceman/Makefile
index b1c1b16d..d01aa74a 100644
--- a/spaceman/Makefile
+++ b/spaceman/Makefile
@@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs
 
 LTCOMMAND = xfs_spaceman
 HFILES = init.h space.h
-CFILES = info.c init.c file.c prealloc.c trim.c
+CFILES = info.c init.c file.c health.c prealloc.c trim.c
 LSRCFILES = xfs_info.sh
 
 LLDLIBS = $(LIBXCMD) $(LIBFROG)
diff --git a/spaceman/health.c b/spaceman/health.c
new file mode 100644
index 00000000..a8bd3f3e
--- /dev/null
+++ b/spaceman/health.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2019 Oracle.
+ * All Rights Reserved.
+ */
+#include "platform_defs.h"
+#include "libxfs.h"
+#include "command.h"
+#include "init.h"
+#include "input.h"
+#include "libfrog/paths.h"
+#include "libfrog/fsgeom.h"
+#include "libfrog/bulkstat.h"
+#include "space.h"
+
+static cmdinfo_t health_cmd;
+static unsigned long long reported;
+static bool comprehensive;
+static bool quiet;
+
+static bool has_realtime(const struct xfs_fsop_geom *g)
+{
+	return g->rtblocks > 0;
+}
+
+static bool has_finobt(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_FINOBT;
+}
+
+static bool has_rmapbt(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_RMAPBT;
+}
+
+static bool has_reflink(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_REFLINK;
+}
+
+struct flag_map {
+	unsigned int		mask;
+	bool			(*has_fn)(const struct xfs_fsop_geom *g);
+	const char		*descr;
+};
+
+static const struct flag_map fs_flags[] = {
+	{
+		.mask = XFS_FSOP_GEOM_SICK_COUNTERS,
+		.descr = "summary counters",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_UQUOTA,
+		.descr = "user quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_GQUOTA,
+		.descr = "group quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_PQUOTA,
+		.descr = "project quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_RT_BITMAP,
+		.descr = "realtime bitmap",
+		.has_fn = has_realtime,
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_RT_SUMMARY,
+		.descr = "realtime summary",
+		.has_fn = has_realtime,
+	},
+	{0},
+};
+
+static const struct flag_map ag_flags[] = {
+	{
+		.mask = XFS_AG_GEOM_SICK_SB,
+		.descr = "superblock",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGF,
+		.descr = "AGF header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGFL,
+		.descr = "AGFL header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGI,
+		.descr = "AGI header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_BNOBT,
+		.descr = "free space by block btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_CNTBT,
+		.descr = "free space by length btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_INOBT,
+		.descr = "inode btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_FINOBT,
+		.descr = "free inode btree",
+		.has_fn = has_finobt,
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_RMAPBT,
+		.descr = "reverse mappings btree",
+		.has_fn = has_rmapbt,
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_REFCNTBT,
+		.descr = "reference count btree",
+		.has_fn = has_reflink,
+	},
+	{0},
+};
+
+static const struct flag_map inode_flags[] = {
+	{
+		.mask = XFS_BS_SICK_INODE,
+		.descr = "inode core",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTD,
+		.descr = "data fork",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTA,
+		.descr = "extended attribute fork",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTC,
+		.descr = "copy on write fork",
+	},
+	{
+		.mask = XFS_BS_SICK_DIR,
+		.descr = "directory",
+	},
+	{
+		.mask = XFS_BS_SICK_XATTR,
+		.descr = "extended attributes",
+	},
+	{
+		.mask = XFS_BS_SICK_SYMLINK,
+		.descr = "symbolic link target",
+	},
+	{
+		.mask = XFS_BS_SICK_PARENT,
+		.descr = "parent pointers",
+	},
+	{0},
+};
+
+/* Convert a flag mask to a report. */
+static void
+report_sick(
+	const char			*descr,
+	const struct flag_map		*maps,
+	unsigned int			sick,
+	unsigned int			checked)
+{
+	const struct flag_map		*f;
+	bool				bad;
+
+	for (f = maps; f->mask != 0; f++) {
+		if (f->has_fn && !f->has_fn(&file->xfd.fsgeom))
+			continue;
+		if (!(checked & f->mask))
+			continue;
+		reported++;
+		bad = sick & f->mask;
+		if (!bad && quiet)
+			continue;
+		printf("%s %s: %s\n", descr, _(f->descr),
+				bad ? _("unhealthy") : _("ok"));
+	}
+}
+
+/* Report on an AG's health. */
+static int
+report_ag_sick(
+	xfs_agnumber_t		agno)
+{
+	struct xfs_ag_geometry	ageo = { 0 };
+	char			descr[256];
+	int			ret;
+
+	ret = xfrog_ag_geometry(file->xfd.fd, agno, &ageo);
+	if (ret) {
+		errno = ret;
+		perror("ag_geometry");
+		return 1;
+	}
+	snprintf(descr, sizeof(descr) - 1, _("AG %u"), agno);
+	report_sick(descr, ag_flags, ageo.ag_sick, ageo.ag_checked);
+	return 0;
+}
+
+/* Report on an inode's health. */
+static int
+report_inode_health(
+	unsigned long long	ino,
+	const char		*descr)
+{
+	struct xfs_bstat	bs;
+	char			d[256];
+	int			ret;
+
+	if (!descr) {
+		snprintf(d, sizeof(d) - 1, _("inode %llu"), ino);
+		descr = d;
+	}
+
+	ret = xfrog_bulkstat_single(&file->xfd, ino, &bs);
+	if (ret) {
+		errno = ret;
+		perror(descr);
+		return 1;
+	}
+
+	report_sick(descr, inode_flags, bs.bs_sick, bs.bs_checked);
+	return 0;
+}
+
+/* Report on a file's health. */
+static int
+report_file_health(
+	const char	*path)
+{
+	struct stat	stata, statb;
+	int		ret;
+
+	ret = lstat(path, &statb);
+	if (ret) {
+		perror(path);
+		return 1;
+	}
+
+	ret = fstat(file->xfd.fd, &stata);
+	if (ret) {
+		perror(file->name);
+		return 1;
+	}
+
+	if (stata.st_dev != statb.st_dev) {
+		fprintf(stderr, _("%s: not on the open filesystem"), path);
+		return 1;
+	}
+
+	return report_inode_health(statb.st_ino, path);
+}
+
+#define BULKSTAT_NR		(128)
+
+/*
+ * Report on all files' health for a given @agno.  If @agno is NULLAGNUMBER,
+ * report on all files in the filesystem.
+ */
+static int
+report_bulkstat_health(
+	xfs_agnumber_t		agno)
+{
+	struct xfs_bstat	bstat[BULKSTAT_NR];
+	char			descr[256];
+	uint64_t		startino = 0;
+	uint64_t		lastino = -1ULL;
+	uint32_t		ocount;
+	uint32_t		i;
+	int			error;
+
+	if (agno != NULLAGNUMBER) {
+		startino = cvt_agino_to_ino(&file->xfd, agno, 0);
+		lastino = cvt_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
+	}
+
+	do {
+		error = xfrog_bulkstat(&file->xfd, &startino, BULKSTAT_NR,
+				bstat, &ocount);
+		if (error)
+			break;
+		for (i = 0; i < ocount; i++) {
+			if (bstat[i].bs_ino > lastino)
+				goto out;
+			snprintf(descr, sizeof(descr) - 1, _("inode %llu"),
+					bstat[i].bs_ino);
+			report_sick(descr, inode_flags, bstat[i].bs_sick,
+					bstat[i].bs_checked);
+		}
+	} while (ocount > 0);
+
+	if (error) {
+		errno = error;
+		perror("bulkstat");
+	}
+out:
+	return error;
+}
+
+#define OPT_STRING ("a:cfi:q")
+
+/* Report on health problems in XFS filesystem. */
+static int
+health_f(
+	int			argc,
+	char			**argv)
+{
+	unsigned long long	x;
+	xfs_agnumber_t		agno;
+	bool			default_report = true;
+	int			c;
+	int			ret;
+
+	reported = 0;
+
+	if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) {
+		perror("health");
+		return 1;
+	}
+
+	/* Set our reporting options appropriately in the first pass. */
+	while ((c = getopt(argc, argv, OPT_STRING)) != EOF) {
+		switch (c) {
+		case 'a':
+			default_report = false;
+			errno = 0;
+			x = strtoll(optarg, NULL, 10);
+			if (!errno && x >= NULLAGNUMBER)
+				errno = ERANGE;
+			if (errno) {
+				perror("ag health");
+				return 1;
+			}
+			break;
+		case 'c':
+			comprehensive = true;
+			break;
+		case 'f':
+			default_report = false;
+			break;
+		case 'i':
+			default_report = false;
+			errno = 0;
+			x = strtoll(optarg, NULL, 10);
+			if (errno) {
+				perror("inode health");
+				return 1;
+			}
+			break;
+		case 'q':
+			quiet = true;
+			break;
+		default:
+			return command_usage(&health_cmd);
+		}
+	}
+	if (optind < argc)
+		default_report = false;
+
+	/* Reparse arguments, this time for reporting actions. */
+	optind = 1;
+	while ((c = getopt(argc, argv, OPT_STRING)) != EOF) {
+		switch (c) {
+		case 'a':
+			agno = strtoll(optarg, NULL, 10);
+			ret = report_ag_sick(agno);
+			if (!ret && comprehensive)
+				ret = report_bulkstat_health(agno);
+			if (ret)
+				return 1;
+			break;
+		case 'f':
+			report_sick(_("filesystem"), fs_flags,
+					file->xfd.fsgeom.sick,
+					file->xfd.fsgeom.checked);
+			if (comprehensive) {
+				ret = report_bulkstat_health(NULLAGNUMBER);
+				if (ret)
+					return 1;
+			}
+			break;
+		case 'i':
+			x = strtoll(optarg, NULL, 10);
+			ret = report_inode_health(x, NULL);
+			if (ret)
+				return 1;
+			break;
+		default:
+			break;
+		}
+	}
+
+	for (c = optind; c < argc; c++) {
+		ret = report_file_health(argv[c]);
+		if (ret)
+			return 1;
+	}
+
+	/* No arguments gets us a summary of fs state. */
+	if (default_report) {
+		report_sick(_("filesystem"), fs_flags, file->xfd.fsgeom.sick,
+				file->xfd.fsgeom.checked);
+
+		for (agno = 0; agno < file->xfd.fsgeom.agcount; agno++) {
+			ret = report_ag_sick(agno);
+			if (ret)
+				return 1;
+		}
+		if (comprehensive) {
+			ret = report_bulkstat_health(NULLAGNUMBER);
+			if (ret)
+				return 1;
+		}
+	}
+
+	if (!reported) {
+		fprintf(stderr,
+_("Health status has not been collected for this filesystem.\n"));
+		fprintf(stderr,
+_("Please run xfs_scrub(8) to remedy this situation.\n"));
+	}
+
+	return 0;
+}
+
+static void
+health_help(void)
+{
+	printf(_(
+"\n"
+"Report all observed filesystem health problems.\n"
+"\n"
+" -a agno  -- Report health of the given allocation group.\n"
+" -c       -- Report on the health of all inodes.\n"
+" -f       -- Report health of the overall filesystem.\n"
+" -i inum  -- Report health of a given inode number.\n"
+" -q       -- Only report unhealthy metadata.\n"
+" paths    -- Report health of the given file path.\n"
+"\n"));
+
+}
+
+static cmdinfo_t health_cmd = {
+	.name = "health",
+	.cfunc = health_f,
+	.argmin = 0,
+	.argmax = -1,
+	.args = "[-a agno] [-c] [-f] [-i inum] [-q] [paths]",
+	.flags = CMD_FLAG_ONESHOT,
+	.help = health_help,
+};
+
+void
+health_init(void)
+{
+	health_cmd.oneline = _("Report observed XFS health problems."),
+	add_command(&health_cmd);
+}
diff --git a/spaceman/init.c b/spaceman/init.c
index 4afdb386..cf1ff3cb 100644
--- a/spaceman/init.c
+++ b/spaceman/init.c
@@ -34,6 +34,7 @@ init_commands(void)
 	quit_init();
 	trim_init();
 	freesp_init();
+	health_init();
 }
 
 static int
diff --git a/spaceman/space.h b/spaceman/space.h
index 2c26884a..723209ed 100644
--- a/spaceman/space.h
+++ b/spaceman/space.h
@@ -32,5 +32,6 @@ extern void	freesp_init(void);
 # define freesp_init()	do { } while (0)
 #endif
 extern void	info_init(void);
+extern void	health_init(void);
 
 #endif /* XFS_SPACEMAN_SPACE_H_ */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] xfs_spaceman: report health problems
  2019-09-04  4:52   ` Dave Chinner
@ 2019-09-04 14:51     ` Darrick J. Wong
  0 siblings, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-04 14:51 UTC (permalink / raw)
  To: Dave Chinner; +Cc: sandeen, linux-xfs

On Wed, Sep 04, 2019 at 02:52:40PM +1000, Dave Chinner wrote:
> On Mon, Aug 26, 2019 at 02:20:54PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Use the fs and ag geometry ioctls to report health problems to users.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  include/xfrog.h         |    2 
> >  libfrog/fsgeom.c        |   11 +
> >  man/man8/xfs_spaceman.8 |   28 +++
> >  spaceman/Makefile       |    2 
> >  spaceman/health.c       |  432 +++++++++++++++++++++++++++++++++++++++++++++++
> >  spaceman/init.c         |    1 
> >  spaceman/space.h        |    1 
> >  7 files changed, 476 insertions(+), 1 deletion(-)
> >  create mode 100644 spaceman/health.c
> > 
> > 
> > diff --git a/include/xfrog.h b/include/xfrog.h
> > index 5748e967..3a43a403 100644
> > --- a/include/xfrog.h
> > +++ b/include/xfrog.h
> > @@ -177,4 +177,6 @@ struct xfs_inogrp;
> >  int xfrog_inumbers(struct xfs_fd *xfd, uint64_t *lastino, uint32_t icount,
> >  		struct xfs_inogrp *ubuffer, uint32_t *ocount);
> >  
> > +int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo);
> > +
> >  #endif	/* __XFROG_H__ */
> > diff --git a/libfrog/fsgeom.c b/libfrog/fsgeom.c
> > index 17479e4a..cddb5a39 100644
> > --- a/libfrog/fsgeom.c
> > +++ b/libfrog/fsgeom.c
> > @@ -131,3 +131,14 @@ xfrog_close(
> >  	xfd->fd = -1;
> >  	return ret;
> >  }
> > +
> > +/* Try to obtain an AG's geometry. */
> > +int
> > +xfrog_ag_geometry(
> > +	int			fd,
> > +	unsigned int		agno,
> > +	struct xfs_ag_geometry	*ageo)
> > +{
> > +	ageo->ag_number = agno;
> > +	return ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo);
> 
> Does it need this first:
> 
> 	memset(ageo, 0, sizeof(*ageo));
> 
> Because I don't think the callers zero it....

Yep, and the caller was fixed up when I, er, twiddled the AG geometry
ioctl definition last week.

> > +static const struct flag_map inode_flags[] = {
> > +	{
> > +		.mask = XFS_BS_SICK_INODE,
> > +		.descr = "inode core",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_BMBTD,
> > +		.descr = "data fork",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_BMBTA,
> > +		.descr = "extended attribute fork",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_BMBTC,
> > +		.descr = "copy on write fork",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_DIR,
> > +		.descr = "directory",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_XATTR,
> > +		.descr = "extended attributes",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_SYMLINK,
> > +		.descr = "symbolic link target",
> > +	},
> > +	{
> > +		.mask = XFS_BS_SICK_PARENT,
> > +		.descr = "parent pointers",
> 
> This needs a "has_parent_pointers" feature check function,
> doesn't it? Or is this already a valid status for directory inodes?

It's already a valid status for directories, since xscrub checks that
a directory's '..' points to another valid directory that points back.

> > +static int
> > +report_bulkstat_health(
> > +	xfs_agnumber_t		agno)
> > +{
> > +	struct xfs_bstat	bstat[128];
> > +	char			descr[256];
> > +	uint64_t		startino = 0;
> > +	uint64_t		lastino = -1ULL;
> > +	uint32_t		ocount;
> > +	uint32_t		i;
> > +	int			error;
> > +
> > +	if (agno != NULLAGNUMBER) {
> > +		startino = xfrog_agino_to_ino(&file->xfd, agno, 0);
> > +		lastino = xfrog_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
> > +	}
> > +
> > +	while ((error = xfrog_bulkstat(&file->xfd, &startino, 128, bstat,
> 
> Nit: use a define for the number of inodes to bulkstat.

Ok.  IIRC the xfrog_bulkstat conversion later on will zap most of this.

> > +health_f(
> > +	int			argc,
> > +	char			**argv)
> > +{
> > +	unsigned long long	x;
> > +	xfs_agnumber_t		agno;
> > +	bool			default_report = true;
> > +	int			c;
> > +	int			ret;
> > +
> > +	reported = 0;
> > +
> > +	if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) {
> > +		perror("health");
> > +		return 1;
> > +	}
> > +
> > +	while ((c = getopt(argc, argv, "a:cfi:q")) != EOF) {
> > +		switch (c) {
> > +		case 'a':
> > +			default_report = false;
> > +			errno = 0;
> > +			x = strtoll(optarg, NULL, 10);
> > +			if (!errno && x >= NULLAGNUMBER)
> > +				errno = ERANGE;
> > +			if (errno) {
> > +				perror("ag health");
> > +				return 1;
> > +			}
> > +			agno = x;
> > +			ret = report_ag_sick(agno);
> > +			if (!ret && comprehensive)
> > +				ret = report_bulkstat_health(agno);
> > +			if (ret)
> > +				return 1;
> > +			break;
> > +		case 'c':
> > +			comprehensive = true;
> > +			break;
> 
> There's a command line ordering problem here - - "-a" and "-f" 
> check the comprehensive flag and do additional stuff based on it.
> 
> So I think the -a and -f processing need to be done outside
> the args processing loop, or we need two loops to extract the
> -c first...
> 
> Otherwise looks OK.

Ok, will do.  Thanks for the review. :)

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] xfs_spaceman: report health problems
  2019-08-26 21:20 ` [PATCH 1/1] xfs_spaceman: report health problems Darrick J. Wong
@ 2019-09-04  4:52   ` Dave Chinner
  2019-09-04 14:51     ` Darrick J. Wong
  0 siblings, 1 reply; 7+ messages in thread
From: Dave Chinner @ 2019-09-04  4:52 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:20:54PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Use the fs and ag geometry ioctls to report health problems to users.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  include/xfrog.h         |    2 
>  libfrog/fsgeom.c        |   11 +
>  man/man8/xfs_spaceman.8 |   28 +++
>  spaceman/Makefile       |    2 
>  spaceman/health.c       |  432 +++++++++++++++++++++++++++++++++++++++++++++++
>  spaceman/init.c         |    1 
>  spaceman/space.h        |    1 
>  7 files changed, 476 insertions(+), 1 deletion(-)
>  create mode 100644 spaceman/health.c
> 
> 
> diff --git a/include/xfrog.h b/include/xfrog.h
> index 5748e967..3a43a403 100644
> --- a/include/xfrog.h
> +++ b/include/xfrog.h
> @@ -177,4 +177,6 @@ struct xfs_inogrp;
>  int xfrog_inumbers(struct xfs_fd *xfd, uint64_t *lastino, uint32_t icount,
>  		struct xfs_inogrp *ubuffer, uint32_t *ocount);
>  
> +int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo);
> +
>  #endif	/* __XFROG_H__ */
> diff --git a/libfrog/fsgeom.c b/libfrog/fsgeom.c
> index 17479e4a..cddb5a39 100644
> --- a/libfrog/fsgeom.c
> +++ b/libfrog/fsgeom.c
> @@ -131,3 +131,14 @@ xfrog_close(
>  	xfd->fd = -1;
>  	return ret;
>  }
> +
> +/* Try to obtain an AG's geometry. */
> +int
> +xfrog_ag_geometry(
> +	int			fd,
> +	unsigned int		agno,
> +	struct xfs_ag_geometry	*ageo)
> +{
> +	ageo->ag_number = agno;
> +	return ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo);

Does it need this first:

	memset(ageo, 0, sizeof(*ageo));

Because I don't think the callers zero it....

> +static const struct flag_map inode_flags[] = {
> +	{
> +		.mask = XFS_BS_SICK_INODE,
> +		.descr = "inode core",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_BMBTD,
> +		.descr = "data fork",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_BMBTA,
> +		.descr = "extended attribute fork",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_BMBTC,
> +		.descr = "copy on write fork",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_DIR,
> +		.descr = "directory",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_XATTR,
> +		.descr = "extended attributes",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_SYMLINK,
> +		.descr = "symbolic link target",
> +	},
> +	{
> +		.mask = XFS_BS_SICK_PARENT,
> +		.descr = "parent pointers",

This needs a "has_parent_pointers" feature check function,
doesn't it? Or is this already a valid status for directory inodes?

> +static int
> +report_bulkstat_health(
> +	xfs_agnumber_t		agno)
> +{
> +	struct xfs_bstat	bstat[128];
> +	char			descr[256];
> +	uint64_t		startino = 0;
> +	uint64_t		lastino = -1ULL;
> +	uint32_t		ocount;
> +	uint32_t		i;
> +	int			error;
> +
> +	if (agno != NULLAGNUMBER) {
> +		startino = xfrog_agino_to_ino(&file->xfd, agno, 0);
> +		lastino = xfrog_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
> +	}
> +
> +	while ((error = xfrog_bulkstat(&file->xfd, &startino, 128, bstat,

Nit: use a define for the number of inodes to bulkstat.

> +health_f(
> +	int			argc,
> +	char			**argv)
> +{
> +	unsigned long long	x;
> +	xfs_agnumber_t		agno;
> +	bool			default_report = true;
> +	int			c;
> +	int			ret;
> +
> +	reported = 0;
> +
> +	if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) {
> +		perror("health");
> +		return 1;
> +	}
> +
> +	while ((c = getopt(argc, argv, "a:cfi:q")) != EOF) {
> +		switch (c) {
> +		case 'a':
> +			default_report = false;
> +			errno = 0;
> +			x = strtoll(optarg, NULL, 10);
> +			if (!errno && x >= NULLAGNUMBER)
> +				errno = ERANGE;
> +			if (errno) {
> +				perror("ag health");
> +				return 1;
> +			}
> +			agno = x;
> +			ret = report_ag_sick(agno);
> +			if (!ret && comprehensive)
> +				ret = report_bulkstat_health(agno);
> +			if (ret)
> +				return 1;
> +			break;
> +		case 'c':
> +			comprehensive = true;
> +			break;

There's a command line ordering problem here - - "-a" and "-f" 
check the comprehensive flag and do additional stuff based on it.

So I think the -a and -f processing need to be done outside
the args processing loop, or we need two loops to extract the
-c first...

Otherwise looks OK.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/1] xfs_spaceman: report health problems
  2019-08-26 21:20 [PATCH 0/1] xfsprogs: online health tracking support Darrick J. Wong
@ 2019-08-26 21:20 ` Darrick J. Wong
  2019-09-04  4:52   ` Dave Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:20 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the fs and ag geometry ioctls to report health problems to users.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/xfrog.h         |    2 
 libfrog/fsgeom.c        |   11 +
 man/man8/xfs_spaceman.8 |   28 +++
 spaceman/Makefile       |    2 
 spaceman/health.c       |  432 +++++++++++++++++++++++++++++++++++++++++++++++
 spaceman/init.c         |    1 
 spaceman/space.h        |    1 
 7 files changed, 476 insertions(+), 1 deletion(-)
 create mode 100644 spaceman/health.c


diff --git a/include/xfrog.h b/include/xfrog.h
index 5748e967..3a43a403 100644
--- a/include/xfrog.h
+++ b/include/xfrog.h
@@ -177,4 +177,6 @@ struct xfs_inogrp;
 int xfrog_inumbers(struct xfs_fd *xfd, uint64_t *lastino, uint32_t icount,
 		struct xfs_inogrp *ubuffer, uint32_t *ocount);
 
+int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo);
+
 #endif	/* __XFROG_H__ */
diff --git a/libfrog/fsgeom.c b/libfrog/fsgeom.c
index 17479e4a..cddb5a39 100644
--- a/libfrog/fsgeom.c
+++ b/libfrog/fsgeom.c
@@ -131,3 +131,14 @@ xfrog_close(
 	xfd->fd = -1;
 	return ret;
 }
+
+/* Try to obtain an AG's geometry. */
+int
+xfrog_ag_geometry(
+	int			fd,
+	unsigned int		agno,
+	struct xfs_ag_geometry	*ageo)
+{
+	ageo->ag_number = agno;
+	return ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo);
+}
diff --git a/man/man8/xfs_spaceman.8 b/man/man8/xfs_spaceman.8
index 12dd04e4..ece840d7 100644
--- a/man/man8/xfs_spaceman.8
+++ b/man/man8/xfs_spaceman.8
@@ -91,6 +91,34 @@ The output will have the same format that
 .BR "xfs_info" "(8)"
 prints when querying a filesystem.
 .TP
+.BI "health [ \-a agno] [ \-c ] [ \-f ] [ \-i inum ] [ \-q ] [ paths ]"
+Reports the health of the given group of filesystem metadata.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.B \-a agno
+Report on the health of the given allocation group.
+.TP
+.B \-c
+Scan all inodes in the filesystem and report each file's health status.
+If the
+.B \-a
+option is given, scan only the inodes in that AG.
+.TP
+.B \-f
+Report on the health of metadata that affect the entire filesystem.
+.TP
+.B \-i inum
+Report on the health of a specific inode.
+.TP
+.B \-q
+Report only unhealthy metadata.
+.TP
+.B paths
+Report on the health of the files at the given path.
+.PD
+.RE
+.TP
 .BR "help [ " command " ]"
 Display a brief description of one or all commands.
 .TP
diff --git a/spaceman/Makefile b/spaceman/Makefile
index b1c1b16d..d01aa74a 100644
--- a/spaceman/Makefile
+++ b/spaceman/Makefile
@@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs
 
 LTCOMMAND = xfs_spaceman
 HFILES = init.h space.h
-CFILES = info.c init.c file.c prealloc.c trim.c
+CFILES = info.c init.c file.c health.c prealloc.c trim.c
 LSRCFILES = xfs_info.sh
 
 LLDLIBS = $(LIBXCMD) $(LIBFROG)
diff --git a/spaceman/health.c b/spaceman/health.c
new file mode 100644
index 00000000..6c9c75a1
--- /dev/null
+++ b/spaceman/health.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2019 Oracle.
+ * All Rights Reserved.
+ */
+#include "platform_defs.h"
+#include "libxfs.h"
+#include "xfrog.h"
+#include "command.h"
+#include "init.h"
+#include "path.h"
+#include "space.h"
+#include "input.h"
+#include "fsgeom.h"
+#include "libfrog.h"
+
+static cmdinfo_t health_cmd;
+static unsigned long long reported;
+static bool comprehensive;
+static bool quiet;
+
+static bool has_realtime(const struct xfs_fsop_geom *g)
+{
+	return g->rtblocks > 0;
+}
+
+static bool has_finobt(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_FINOBT;
+}
+
+static bool has_rmapbt(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_RMAPBT;
+}
+
+static bool has_reflink(const struct xfs_fsop_geom *g)
+{
+	return g->flags & XFS_FSOP_GEOM_FLAGS_REFLINK;
+}
+
+struct flag_map {
+	unsigned int		mask;
+	bool			(*has_fn)(const struct xfs_fsop_geom *g);
+	const char		*descr;
+};
+
+static const struct flag_map fs_flags[] = {
+	{
+		.mask = XFS_FSOP_GEOM_SICK_COUNTERS,
+		.descr = "summary counters",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_UQUOTA,
+		.descr = "user quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_GQUOTA,
+		.descr = "group quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_PQUOTA,
+		.descr = "project quota",
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_RT_BITMAP,
+		.descr = "realtime bitmap",
+		.has_fn = has_realtime,
+	},
+	{
+		.mask = XFS_FSOP_GEOM_SICK_RT_SUMMARY,
+		.descr = "realtime summary",
+		.has_fn = has_realtime,
+	},
+	{0},
+};
+
+static const struct flag_map ag_flags[] = {
+	{
+		.mask = XFS_AG_GEOM_SICK_SB,
+		.descr = "superblock",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGF,
+		.descr = "AGF header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGFL,
+		.descr = "AGFL header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_AGI,
+		.descr = "AGI header",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_BNOBT,
+		.descr = "free space by block btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_CNTBT,
+		.descr = "free space by length btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_INOBT,
+		.descr = "inode btree",
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_FINOBT,
+		.descr = "free inode btree",
+		.has_fn = has_finobt,
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_RMAPBT,
+		.descr = "reverse mappings btree",
+		.has_fn = has_rmapbt,
+	},
+	{
+		.mask = XFS_AG_GEOM_SICK_REFCNTBT,
+		.descr = "reference count btree",
+		.has_fn = has_reflink,
+	},
+	{0},
+};
+
+static const struct flag_map inode_flags[] = {
+	{
+		.mask = XFS_BS_SICK_INODE,
+		.descr = "inode core",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTD,
+		.descr = "data fork",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTA,
+		.descr = "extended attribute fork",
+	},
+	{
+		.mask = XFS_BS_SICK_BMBTC,
+		.descr = "copy on write fork",
+	},
+	{
+		.mask = XFS_BS_SICK_DIR,
+		.descr = "directory",
+	},
+	{
+		.mask = XFS_BS_SICK_XATTR,
+		.descr = "extended attributes",
+	},
+	{
+		.mask = XFS_BS_SICK_SYMLINK,
+		.descr = "symbolic link target",
+	},
+	{
+		.mask = XFS_BS_SICK_PARENT,
+		.descr = "parent pointers",
+	},
+	{0},
+};
+
+/* Convert a flag mask to a report. */
+static void
+report_sick(
+	const char			*descr,
+	const struct flag_map		*maps,
+	unsigned int			sick,
+	unsigned int			checked)
+{
+	const struct flag_map		*f;
+	bool				bad;
+
+	for (f = maps; f->mask != 0; f++) {
+		if (f->has_fn && !f->has_fn(&file->xfd.fsgeom))
+			continue;
+		if (!(checked & f->mask))
+			continue;
+		reported++;
+		bad = sick & f->mask;
+		if (!bad && quiet)
+			continue;
+		printf("%s %s: %s\n", descr, _(f->descr),
+				bad ? _("unhealthy") : _("ok"));
+	}
+}
+
+/* Report on an AG's health. */
+static int
+report_ag_sick(
+	xfs_agnumber_t		agno)
+{
+	struct xfs_ag_geometry	ageo;
+	char			descr[256];
+	int			ret;
+
+	ret = xfrog_ag_geometry(file->xfd.fd, agno, &ageo);
+	if (ret) {
+		perror("ag_geometry");
+		return 1;
+	}
+	snprintf(descr, sizeof(descr) - 1, _("AG %u"), agno);
+	report_sick(descr, ag_flags, ageo.ag_sick, ageo.ag_checked);
+	return 0;
+}
+
+/* Report on an inode's health. */
+static int
+report_inode_health(
+	unsigned long long	ino,
+	const char		*descr)
+{
+	struct xfs_bstat	bs;
+	char			d[256];
+	int			ret;
+
+	if (!descr) {
+		snprintf(d, sizeof(d) - 1, _("inode %llu"), ino);
+		descr = d;
+	}
+
+	ret = xfrog_bulkstat_single(&file->xfd, ino, &bs);
+	if (ret) {
+		perror(descr);
+		return 1;
+	}
+
+	report_sick(descr, inode_flags, bs.bs_sick, bs.bs_checked);
+	return 0;
+}
+
+/* Report on a file's health. */
+static int
+report_file_health(
+	const char	*path)
+{
+	struct stat	stata, statb;
+	int		ret;
+
+	ret = lstat(path, &statb);
+	if (ret) {
+		perror(path);
+		return 1;
+	}
+
+	ret = fstat(file->xfd.fd, &stata);
+	if (ret) {
+		perror(file->name);
+		return 1;
+	}
+
+	if (stata.st_dev != statb.st_dev) {
+		fprintf(stderr, _("%s: not on the open filesystem"), path);
+		return 1;
+	}
+
+	return report_inode_health(statb.st_ino, path);
+}
+
+/*
+ * Report on all files' health for a given @agno.  If @agno is NULLAGNUMBER,
+ * report on all files in the filesystem.
+ */
+static int
+report_bulkstat_health(
+	xfs_agnumber_t		agno)
+{
+	struct xfs_bstat	bstat[128];
+	char			descr[256];
+	uint64_t		startino = 0;
+	uint64_t		lastino = -1ULL;
+	uint32_t		ocount;
+	uint32_t		i;
+	int			error;
+
+	if (agno != NULLAGNUMBER) {
+		startino = xfrog_agino_to_ino(&file->xfd, agno, 0);
+		lastino = xfrog_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
+	}
+
+	while ((error = xfrog_bulkstat(&file->xfd, &startino, 128, bstat,
+			&ocount) == 0) && ocount > 0) {
+		for (i = 0; i < ocount; i++) {
+			if (bstat[i].bs_ino > lastino)
+				goto out;
+			snprintf(descr, sizeof(descr) - 1, _("inode %llu"),
+					bstat[i].bs_ino);
+			report_sick(descr, inode_flags, bstat[i].bs_sick,
+					bstat[i].bs_checked);
+		}
+	}
+out:
+	return error;
+}
+
+/* Report on health problems in XFS filesystem. */
+static int
+health_f(
+	int			argc,
+	char			**argv)
+{
+	unsigned long long	x;
+	xfs_agnumber_t		agno;
+	bool			default_report = true;
+	int			c;
+	int			ret;
+
+	reported = 0;
+
+	if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) {
+		perror("health");
+		return 1;
+	}
+
+	while ((c = getopt(argc, argv, "a:cfi:q")) != EOF) {
+		switch (c) {
+		case 'a':
+			default_report = false;
+			errno = 0;
+			x = strtoll(optarg, NULL, 10);
+			if (!errno && x >= NULLAGNUMBER)
+				errno = ERANGE;
+			if (errno) {
+				perror("ag health");
+				return 1;
+			}
+			agno = x;
+			ret = report_ag_sick(agno);
+			if (!ret && comprehensive)
+				ret = report_bulkstat_health(agno);
+			if (ret)
+				return 1;
+			break;
+		case 'c':
+			comprehensive = true;
+			break;
+		case 'f':
+			default_report = false;
+			report_sick(_("filesystem"), fs_flags,
+					file->xfd.fsgeom.sick,
+					file->xfd.fsgeom.checked);
+			if (comprehensive) {
+				ret = report_bulkstat_health(NULLAGNUMBER);
+				if (ret)
+					return 1;
+			}
+			break;
+		case 'i':
+			default_report = false;
+			errno = 0;
+			x = strtoll(optarg, NULL, 10);
+			if (errno) {
+				perror("inode health");
+				return 1;
+			}
+			ret = report_inode_health(x, NULL);
+			if (ret)
+				return 1;
+			break;
+		case 'q':
+			quiet = true;
+			break;
+		default:
+			return command_usage(&health_cmd);
+		}
+	}
+
+	for (c = optind; c < argc; c++) {
+		default_report = false;
+		ret = report_file_health(argv[c]);
+		if (ret)
+			return 1;
+	}
+
+	/* No arguments gets us a summary of fs state. */
+	if (default_report) {
+		report_sick(_("filesystem"), fs_flags, file->xfd.fsgeom.sick,
+				file->xfd.fsgeom.checked);
+
+		for (agno = 0; agno < file->xfd.fsgeom.agcount; agno++) {
+			ret = report_ag_sick(agno);
+			if (ret)
+				return 1;
+		}
+		if (comprehensive) {
+			ret = report_bulkstat_health(NULLAGNUMBER);
+			if (ret)
+				return 1;
+		}
+	}
+
+	if (!reported) {
+		fprintf(stderr,
+_("Health status has not been collected for this filesystem.\n"));
+		fprintf(stderr,
+_("Please run xfs_scrub(8) to remedy this situation.\n"));
+	}
+
+	return 0;
+}
+
+static void
+health_help(void)
+{
+	printf(_(
+"\n"
+"Report all observed filesystem health problems.\n"
+"\n"
+" -a agno  -- Report health of the given allocation group.\n"
+" -c       -- Report on the health of all inodes.\n"
+" -f       -- Report health of the overall filesystem.\n"
+" -i inum  -- Report health of a given inode number.\n"
+" -q       -- Only report unhealthy metadata.\n"
+" paths    -- Report health of the given file path.\n"
+"\n"));
+
+}
+
+static cmdinfo_t health_cmd = {
+	.name = "health",
+	.cfunc = health_f,
+	.argmin = 0,
+	.argmax = -1,
+	.args = "[-a agno] [-c] [-f] [-i inum] [-q] [paths]",
+	.flags = CMD_FLAG_ONESHOT,
+	.help = health_help,
+};
+
+void
+health_init(void)
+{
+	health_cmd.oneline = _("Report observed XFS health problems."),
+	add_command(&health_cmd);
+}
diff --git a/spaceman/init.c b/spaceman/init.c
index 2698f420..80740cda 100644
--- a/spaceman/init.c
+++ b/spaceman/init.c
@@ -34,6 +34,7 @@ init_commands(void)
 	quit_init();
 	trim_init();
 	freesp_init();
+	health_init();
 }
 
 static int
diff --git a/spaceman/space.h b/spaceman/space.h
index 2c26884a..723209ed 100644
--- a/spaceman/space.h
+++ b/spaceman/space.h
@@ -32,5 +32,6 @@ extern void	freesp_init(void);
 # define freesp_init()	do { } while (0)
 #endif
 extern void	info_init(void);
+extern void	health_init(void);
 
 #endif /* XFS_SPACEMAN_SPACE_H_ */


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-06  3:33 [PATCH 0/1] xfsprogs: online health tracking support Darrick J. Wong
2019-09-06  3:33 ` [PATCH 1/1] xfs_spaceman: report health problems Darrick J. Wong
2019-09-09 23:28   ` Dave Chinner
2019-09-16 19:29   ` [PATCH v2 " Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2019-08-26 21:20 [PATCH 0/1] xfsprogs: online health tracking support Darrick J. Wong
2019-08-26 21:20 ` [PATCH 1/1] xfs_spaceman: report health problems Darrick J. Wong
2019-09-04  4:52   ` Dave Chinner
2019-09-04 14:51     ` Darrick J. Wong

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org linux-xfs@archiver.kernel.org
	public-inbox-index linux-xfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox