All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: cem@kernel.org, kent.overstreet@linux.dev, djwong@kernel.org
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 4/7] xfs_io: monitor filesystem health events
Date: Fri, 23 Feb 2024 17:35:13 -0800	[thread overview]
Message-ID: <170873836612.1902540.13429166309518341696.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <170873836546.1902540.13109376239205481967.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

Create a subcommand to monitor for health events generated by the kernel.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 io/Makefile       |    1 
 io/healthmon.c    |  172 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 io/init.c         |    1 
 io/io.h           |    1 
 man/man8/xfs_io.8 |   22 +++++++
 5 files changed, 197 insertions(+)
 create mode 100644 io/healthmon.c


diff --git a/io/Makefile b/io/Makefile
index 787027fe10ed..b1f9cebd63b0 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -24,6 +24,7 @@ CFILES = \
 	fsuuid.c \
 	fsync.c \
 	getrusage.c \
+	healthmon.c \
 	imap.c \
 	inject.c \
 	label.c \
diff --git a/io/healthmon.c b/io/healthmon.c
new file mode 100644
index 000000000000..7db8c52c96c0
--- /dev/null
+++ b/io/healthmon.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "libxfs.h"
+#include "libfrog/fsgeom.h"
+#include "libfrog/paths.h"
+#include "command.h"
+#include "init.h"
+#include "io.h"
+
+static void
+healthmon_help(void)
+{
+	printf(_(
+"Monitor filesystem health events"
+"\n"
+"-c             Replace the open file with the monitor file.\n"
+"-d delay_ms    Sleep this many milliseconds between reads.\n"
+"-p             Only probe for the existence of the ioctl.\n"
+"-v             Request all events.\n"
+"\n"));
+}
+
+static inline int
+monitor_sleep(
+	int			delay_ms)
+{
+	struct timespec		ts;
+
+	if (!delay_ms)
+		return 0;
+
+	ts.tv_sec = delay_ms / 1000;
+	ts.tv_nsec = (delay_ms % 1000) * 1000000;
+
+	return nanosleep(&ts, NULL);
+}
+
+#define BUFSIZE			(4096)
+
+static int
+monitor(
+	bool			consume,
+	int			delay_ms,
+	bool			verbose,
+	bool			only_probe)
+{
+	struct xfs_health_monitor	hmo = {
+		.format		= XFS_HEALTH_MONITOR_FMT_JSON,
+	};
+	char			*buf;
+	ssize_t			bytes_read;
+	int			mon_fd;
+	int			ret = 1;
+
+	if (verbose)
+		hmo.flags |= XFS_HEALTH_MONITOR_ALL;
+
+	mon_fd = ioctl(file->fd, XFS_IOC_HEALTH_MONITOR, &hmo);
+	if (mon_fd < 0) {
+		perror("XFS_IOC_HEALTH_MONITOR");
+		return 1;
+	}
+
+	if (only_probe) {
+		ret = 0;
+		goto out_mon;
+	}
+
+	buf = malloc(BUFSIZE);
+	if (!buf) {
+		perror("malloc");
+		goto out_mon;
+	}
+
+	if (consume) {
+		close(file->fd);
+		file->fd = mon_fd;
+	}
+
+	monitor_sleep(delay_ms);
+	while ((bytes_read = read(mon_fd, buf, BUFSIZE)) > 0) {
+		char		*write_ptr = buf;
+		ssize_t		bytes_written;
+		size_t		to_write = bytes_read;
+
+		while ((bytes_written = write(STDOUT_FILENO, write_ptr, to_write)) > 0) {
+			write_ptr += bytes_written;
+			to_write -= bytes_written;
+		}
+		if (bytes_written < 0) {
+			perror("healthdump");
+			goto out_buf;
+		}
+
+		monitor_sleep(delay_ms);
+	}
+	if (bytes_read < 0) {
+		perror("healthmon");
+		goto out_buf;
+	}
+
+	ret = 0;
+
+out_buf:
+	free(buf);
+out_mon:
+	close(mon_fd);
+	return ret;
+}
+
+static int
+healthmon_f(
+	int			argc,
+	char			**argv)
+{
+	bool			consume = false;
+	bool			verbose = false;
+	bool			only_probe = false;
+	int			delay_ms = 0;
+	int			c;
+
+	while ((c = getopt(argc, argv, "cd:pv")) != EOF) {
+		switch (c) {
+		case 'c':
+			consume = true;
+			break;
+		case 'd':
+			errno = 0;
+			delay_ms = atoi(optarg);
+			if (delay_ms < 0 || errno) {
+				printf("%s: delay must be positive msecs\n",
+						optarg);
+				exitcode = 1;
+				return 0;
+			}
+			break;
+		case 'p':
+			only_probe = true;
+			break;
+		case 'v':
+			verbose = true;
+			break;
+		default:
+			exitcode = 1;
+			healthmon_help();
+			return 0;
+		}
+	}
+
+	return monitor(consume, delay_ms, verbose, only_probe);
+}
+
+static struct cmdinfo healthmon_cmd = {
+	.name		= "healthmon",
+	.cfunc		= healthmon_f,
+	.argmin		= 0,
+	.argmax		= -1,
+	.flags		= CMD_FLAG_ONESHOT | CMD_NOMAP_OK,
+	.args		= "[-c] [-d delay_ms] [-v]",
+	.help		= healthmon_help,
+};
+
+void
+healthmon_init(void)
+{
+	healthmon_cmd.oneline = _("monitor filesystem health events");
+
+	add_command(&healthmon_cmd);
+}
diff --git a/io/init.c b/io/init.c
index 452f4cfc898c..ef32e74bc744 100644
--- a/io/init.c
+++ b/io/init.c
@@ -91,6 +91,7 @@ init_commands(void)
 	utimes_init();
 	crc32cselftest_init();
 	exchrange_init();
+	healthmon_init();
 }
 
 /*
diff --git a/io/io.h b/io/io.h
index 06a8ae1db496..b8bed3b66171 100644
--- a/io/io.h
+++ b/io/io.h
@@ -192,3 +192,4 @@ extern void		bulkstat_init(void);
 extern void		exchrange_init(void);
 extern void		aginfo_init(void);
 extern void		fsrefcounts_init(void);
+extern void		healthmon_init(void);
diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8
index 93a4f0790d8e..9f00d26a0b49 100644
--- a/man/man8/xfs_io.8
+++ b/man/man8/xfs_io.8
@@ -1407,6 +1407,28 @@ flag.
 .RE
 .PD
 
+.TP
+.BI "healthmon [ \-c ] [ \-d " delay_ms " ] [ \-p ] [ \-v ]"
+Watch for filesystem health events and write them to the console.
+.RE
+.RS 1.0i
+.PD 0
+.TP
+.BI \-c
+Close the open file and replace it with the monitor file.
+.TP
+.BI "\-d " delay_ms
+Sleep for this long between read attempts.
+.TP
+.B \-p
+Probe for the existence of the functionality by opening the monitoring fd and
+closing it immediately.
+.TP
+.BI \-v
+Request all health events, even if nothing changed.
+.PD
+.RE
+
 .TP
 .BI "inject [ " tag " ]"
 Inject errors into a filesystem to observe filesystem behavior at


  parent reply	other threads:[~2024-02-24  1:35 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-24  1:00 [PATCHBOMB] time_stats, thread_with_file: lifting generic code to lib Darrick J. Wong
2024-02-24  1:07 ` [PATCHSET 1/6] time_stats: promote to lib/ Darrick J. Wong
2024-02-24  1:09   ` [PATCH 1/4] mean and variance: Promote to lib/math Darrick J. Wong
2024-02-24  1:09   ` [PATCH 2/4] eytzinger: Promote to include/linux/ Darrick J. Wong
2024-02-24  1:09   ` [PATCH 3/4] bcachefs: bch2_time_stats_to_seq_buf() Darrick J. Wong
2024-02-24  1:10   ` [PATCH 4/4] time_stats: Promote to lib/ Darrick J. Wong
2024-02-24  1:08 ` [PATCHSET 2/6] time_stats: cleanups and fixes Darrick J. Wong
2024-02-24  1:10   ` [PATCH 01/10] time_stats: report lifetime of the stats object Darrick J. Wong
2024-02-24  1:10   ` [PATCH 02/10] time_stats: split stats-with-quantiles into a separate structure Darrick J. Wong
2024-02-24  1:10   ` [PATCH 03/10] time_stats: fix struct layout bloat Darrick J. Wong
2024-02-24  1:11   ` [PATCH 04/10] time_stats: add larger units Darrick J. Wong
2024-02-24  1:11   ` [PATCH 05/10] time_stats: don't print any output if event count is zero Darrick J. Wong
2024-02-24  1:11   ` [PATCH 06/10] time_stats: allow custom epoch names Darrick J. Wong
2024-02-24  1:11   ` [PATCH 07/10] mean_and_variance: put struct mean_and_variance_weighted on a diet Darrick J. Wong
2024-02-24  1:12   ` [PATCH 08/10] time_stats: shrink time_stat_buffer for better alignment Darrick J. Wong
2024-02-24  1:12   ` [PATCH 09/10] time_stats: report information in json format Darrick J. Wong
2024-02-24  4:15     ` Darrick J. Wong
2024-02-24  5:10       ` Kent Overstreet
2024-02-24  6:02         ` Darrick J. Wong
2024-02-24  1:12   ` [PATCH 10/10] time_stats: Kill TIME_STATS_HAVE_QUANTILES Darrick J. Wong
2024-02-24  1:08 ` [PATCHSET RFC 3/6] xfs: capture statistics about wait times Darrick J. Wong
2024-02-24  1:12   ` [PATCH 1/4] xfs: present wait time statistics Darrick J. Wong
2024-02-24  1:13   ` [PATCH 2/4] xfs: present time stats for scrubbers Darrick J. Wong
2024-02-24  1:13   ` [PATCH 3/4] xfs: present timestats in json format Darrick J. Wong
2024-02-24  1:13   ` [PATCH 4/4] xfs: create debugfs uuid aliases Darrick J. Wong
2024-02-24  1:08 ` [PATCHSET 4/6] thread_with_file: promote to lib/ Darrick J. Wong
2024-02-24  1:14   ` [PATCH 01/10] bcachefs: thread_with_stdio: eliminate double buffering Darrick J. Wong
2024-02-24  1:14   ` [PATCH 02/10] bcachefs: thread_with_stdio: convert to darray Darrick J. Wong
2024-02-24  1:14   ` [PATCH 03/10] bcachefs: thread_with_stdio: kill thread_with_stdio_done() Darrick J. Wong
2024-02-24  1:14   ` [PATCH 04/10] bcachefs: thread_with_stdio: fix bch2_stdio_redirect_readline() Darrick J. Wong
2024-02-24  1:15   ` [PATCH 05/10] bcachefs: Thread with file documentation Darrick J. Wong
2024-02-24  1:15   ` [PATCH 06/10] darray: lift from bcachefs Darrick J. Wong
2024-02-24  1:15   ` [PATCH 07/10] thread_with_file: Lift " Darrick J. Wong
2024-02-24  1:15   ` [PATCH 08/10] thread_with_stdio: Mark completed in ->release() Darrick J. Wong
2024-02-24  1:16   ` [PATCH 09/10] kernel/hung_task.c: export sysctl_hung_task_timeout_secs Darrick J. Wong
2024-02-24  1:16   ` [PATCH 10/10] thread_with_stdio: suppress hung task warning Darrick J. Wong
2024-02-24  1:08 ` [PATCHSET 5/6] thread_with_file: cleanups and fixes Darrick J. Wong
2024-02-24  1:16   ` [PATCH 1/5] thread_with_file: allow creation of readonly files Darrick J. Wong
2024-02-24  1:16   ` [PATCH 2/5] thread_with_file: fix various printf problems Darrick J. Wong
2024-02-24  1:17   ` [PATCH 3/5] thread_with_file: create ops structure for thread_with_stdio Darrick J. Wong
2024-02-24  1:17   ` [PATCH 4/5] thread_with_file: allow ioctls against these files Darrick J. Wong
2024-02-24  1:17   ` [PATCH 5/5] thread_with_file: Fix missing va_end() Darrick J. Wong
2024-02-24  1:09 ` [PATCHSET RFC 6/6] xfs: live health monitoring of filesystems Darrick J. Wong
2024-02-24  1:17   ` [PATCH 1/8] xfs: use thread_with_file to create a monitoring file Darrick J. Wong
2024-02-24  1:18   ` [PATCH 2/8] xfs: create hooks for monitoring health updates Darrick J. Wong
2024-02-24  1:18   ` [PATCH 3/8] xfs: create a filesystem shutdown hook Darrick J. Wong
2024-02-24  1:18   ` [PATCH 4/8] xfs: report shutdown events through healthmon Darrick J. Wong
2024-02-24  1:18   ` [PATCH 5/8] xfs: report metadata health " Darrick J. Wong
2024-02-24  1:19   ` [PATCH 6/8] xfs: report media errors " Darrick J. Wong
2024-02-24  1:19   ` [PATCH 7/8] xfs: allow reconfiguration of the health monitoring device Darrick J. Wong
2024-02-24  1:19   ` [PATCH 8/8] xfs: send uevents when mounting and unmounting a filesystem Darrick J. Wong
2024-02-24  1:34 ` [PATCHSET RFC] xfsprogs: live health monitoring of filesystems Darrick J. Wong
2024-02-24  1:34   ` [PATCH 1/7] xfs: use thread_with_file to create a monitoring file Darrick J. Wong
2024-02-24  1:34   ` [PATCH 2/7] xfs: create hooks for monitoring health updates Darrick J. Wong
2024-02-24  1:34   ` [PATCH 3/7] xfs: report shutdown events through healthmon Darrick J. Wong
2024-02-24  1:35   ` Darrick J. Wong [this message]
2024-02-24  1:35   ` [PATCH 5/7] xfs_scrubbed: create daemon to listen for health events Darrick J. Wong
2024-02-24  1:35   ` [PATCH 6/7] xfs_scrubbed: enable repairing filesystems Darrick J. Wong
2024-02-24  1:36   ` [PATCH 7/7] xfs_scrubbed: create a background monitoring service Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=170873836612.1902540.13429166309518341696.stgit@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=cem@kernel.org \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.