linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: mwilck@suse.com
To: Sagi Grimberg <sagi@grimberg.me>, Hannes Reinecke <hare@suse.de>,
	Keith Busch <kbusch@kernel.org>
Cc: Chaitanya Kulkarni <Chaitanya.Kulkarni@wdc.com>,
	linux-nvme@lists.infradead.org,
	Enzo Matsumiya <ematsumiya@suse.de>,
	Martin Wilck <mwilck@suse.com>
Subject: [PATCH v2 04/16] monitor: implement uevent handling
Date: Sat,  6 Mar 2021 01:36:47 +0100	[thread overview]
Message-ID: <20210306003659.21207-5-mwilck@suse.com> (raw)
In-Reply-To: <20210306003659.21207-1-mwilck@suse.com>

From: Martin Wilck <mwilck@suse.com>

This patch implements handling of events received via NVMe-FC
(fc_udev_device, detection of FC remote ports with NVMe support)
and AEN events from persistent discovery controller connections.

For actual discovery, we fork and basically run a "nvme connect-all"
process with for the newly detected discovery controller.
The reason for forking discovery tasks is twofold: Firstly, we'd
otherwise be forced to make all discovery connections sequentially,
which would be slow, as connecting controllers can block on
the order of seconds even in successful cases.

Secondly, this allows us to use the some global variables like
fabrics_cfg and tracked_ctrls in the discovery code path. Without forking,
we'd have to re-write much more code in fabrics.c. In general,
the alternative to forking would be creating threads, but the
that would require a large rewrite of our code base.

A single-threaded server that forks off the actual discovery makes
most sense at this point.

All options known from "nvme connect-all" can be passed to the discovery
processes as usual, with the exception of the "peristent" option, which
is always enabled: The monitor must try to create persistent discovery
connections in order to monitor them.
---
 monitor.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 277 insertions(+), 5 deletions(-)

diff --git a/monitor.c b/monitor.c
index 32f53a3..f544319 100644
--- a/monitor.c
+++ b/monitor.c
@@ -17,18 +17,24 @@
 
 #include <stddef.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
 #include <libudev.h>
 #include <signal.h>
 #include <time.h>
 #include <syslog.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include <sys/epoll.h>
 
 #include "nvme-status.h"
+#include "nvme.h"
 #include "util/argconfig.h"
 #include "util/cleanup.h"
 #include "common.h"
+#include "fabrics.h"
 #include "monitor.h"
 #define LOG_FUNCNAME 1
 #include "util/log.h"
@@ -87,12 +93,19 @@ static int create_udev_monitor(struct udev *udev, struct udev_monitor **pmon)
 }
 
 static sig_atomic_t must_exit;
+static sig_atomic_t got_sigchld;
+static sigset_t orig_sigmask;
 
 static void monitor_int_handler(int sig)
 {
 	must_exit = 1;
 }
 
+static void monitor_chld_handler(int sig)
+{
+	got_sigchld = 1;
+}
+
 static int monitor_init_signals(sigset_t *wait_mask)
 {
 	sigset_t mask;
@@ -103,26 +116,228 @@ static int monitor_init_signals(sigset_t *wait_mask)
 	 * for events.
 	 */
 	sigfillset(&mask);
-	if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
+	if (sigprocmask(SIG_BLOCK, &mask, &orig_sigmask) == -1)
 		return -errno;
 	if (sigaction(SIGTERM, &sa, NULL) == -1)
 		return -errno;
 	if (sigaction(SIGINT, &sa, NULL) == -1)
 		return -errno;
 
+	sa.sa_handler = monitor_chld_handler;
+	if (sigaction(SIGCHLD, &sa, NULL) == -1)
+		return -errno;
+
 	/* signal mask to be used in epoll_pwait() */
 	sigfillset(wait_mask);
 	sigdelset(wait_mask, SIGTERM);
 	sigdelset(wait_mask, SIGINT);
+	sigdelset(wait_mask, SIGCHLD);
 
 	return 0;
 }
 
+static int child_reset_signals(void)
+{
+	int err = 0;
+	struct sigaction sa = { .sa_handler = SIG_DFL, };
+
+	if (sigaction(SIGTERM, &sa, NULL) == -1)
+		err = errno;
+	if (sigaction(SIGINT, &sa, NULL) == -1 && !err)
+		err = errno;
+	if (sigaction(SIGCHLD, &sa, NULL) == -1 && !err)
+		err = errno;
+
+	if (sigprocmask(SIG_SETMASK, &orig_sigmask, NULL) == -1 && !err)
+		err = errno;
+
+	if (err)
+		msg(LOG_ERR, "error resetting signal handlers and mask\n");
+	return -err;
+}
+
+static int monitor_get_fc_uev_props(struct udev_device *ud,
+				    char *traddr, size_t tra_sz,
+				    char *host_traddr, size_t htra_sz)
+{
+	const char *sysname = udev_device_get_sysname(ud);
+	const char *tra = NULL, *host_tra = NULL;
+	bool fc_event_seen = false;
+	struct udev_list_entry *entry;
+
+	entry = udev_device_get_properties_list_entry(ud);
+	if (!entry) {
+		msg(LOG_NOTICE, "%s: emtpy properties list\n", sysname);
+		return -ENOENT;
+	}
+
+	for (; entry; entry = udev_list_entry_get_next(entry)) {
+		const char *name = udev_list_entry_get_name(entry);
+
+		if (!strcmp(name, "FC_EVENT") &&
+		    !strcmp(udev_list_entry_get_value(entry), "nvmediscovery"))
+				fc_event_seen = true;
+		else if (!strcmp(name, "NVMEFC_HOST_TRADDR"))
+			host_tra = udev_list_entry_get_value(entry);
+		else if (!strcmp(name, "NVMEFC_TRADDR"))
+			tra = udev_list_entry_get_value(entry);
+	}
+	if (!fc_event_seen) {
+		msg(LOG_DEBUG, "%s: FC_EVENT property missing or unsupported\n",
+		    sysname);
+		return -EINVAL;
+	}
+	if (!tra || !host_tra) {
+		msg(LOG_WARNING, "%s: transport properties missing\n", sysname);
+		return -EINVAL;
+	}
+
+	if (!memccpy(traddr, tra, '\0', tra_sz) ||
+	    !memccpy(host_traddr, host_tra, '\0', htra_sz)) {
+		msg(LOG_ERR, "traddr (%zu) or host_traddr (%zu) overflow\n",
+		    strlen(traddr), strlen(host_traddr));
+		return -ENAMETOOLONG;
+	}
+
+	return 0;
+}
+
+static int monitor_discovery(char *transport, char *traddr, char *trsvcid,
+			     char *host_traddr)
+{
+	char argstr[BUF_SIZE];
+	pid_t pid;
+	int rc;
+
+	pid = fork();
+	if (pid == -1) {
+		msg(LOG_ERR, "failed to fork discovery task: %m");
+		return -errno;
+	} else if (pid > 0) {
+		msg(LOG_DEBUG, "started discovery task %ld\n", (long)pid);
+		return 0;
+	}
+
+	child_reset_signals();
+	free_dispatcher(mon_dsp);
+
+	msg(LOG_NOTICE, "starting discovery\n");
+	fabrics_cfg.nqn = NVME_DISC_SUBSYS_NAME;
+	fabrics_cfg.transport = transport;
+	fabrics_cfg.traddr = traddr;
+	fabrics_cfg.trsvcid = trsvcid;
+	fabrics_cfg.host_traddr = host_traddr;
+	/* Without the following, the kernel returns EINVAL */
+	fabrics_cfg.tos = -1;
+	fabrics_cfg.persistent = true;
+
+	rc = build_options(argstr, sizeof(argstr), true);
+	msg(LOG_DEBUG, "%s\n", argstr);
+	rc = do_discover(argstr, mon_cfg.autoconnect, NORMAL);
+
+	exit(-rc);
+	/* not reached */
+	return rc;
+}
+
+static void monitor_handle_fc_uev(struct udev_device *ud)
+{
+	const char *action = udev_device_get_action(ud);
+	const char *sysname = udev_device_get_sysname(ud);
+	char traddr[NVMF_TRADDR_SIZE], host_traddr[NVMF_TRADDR_SIZE];
+
+	if (strcmp(action, "change") || strcmp(sysname, "fc_udev_device"))
+		return;
+
+	if (monitor_get_fc_uev_props(ud, traddr, sizeof(traddr),
+				     host_traddr, sizeof(host_traddr)))
+		return;
+
+	monitor_discovery("fc", traddr, NULL, host_traddr);
+}
+
+static int monitor_get_nvme_uev_props(struct udev_device *ud,
+				      char *transport, size_t tr_sz,
+				      char *traddr, size_t tra_sz,
+				      char *trsvcid, size_t trs_sz,
+				      char *host_traddr, size_t htra_sz)
+{
+	const char *sysname = udev_device_get_sysname(ud);
+	bool aen_disc = false;
+	struct udev_list_entry *entry;
+
+	entry = udev_device_get_properties_list_entry(ud);
+	if (!entry) {
+		msg(LOG_NOTICE, "%s: emtpy properties list\n", sysname);
+		return -ENOENT;
+	}
+
+	*transport = *traddr = *trsvcid = *host_traddr = '\0';
+	for (; entry; entry = udev_list_entry_get_next(entry)) {
+		const char *name = udev_list_entry_get_name(entry);
+
+		if (!strcmp(name, "NVME_AEN") &&
+		    !strcmp(udev_list_entry_get_value(entry), "0x70f002"))
+				aen_disc = true;
+		else if (!strcmp(name, "NVME_TRTYPE"))
+			memccpy(transport, udev_list_entry_get_value(entry),
+				'\0', tr_sz);
+		else if (!strcmp(name, "NVME_TRADDR"))
+			memccpy(traddr, udev_list_entry_get_value(entry),
+				'\0', htra_sz);
+		else if (!strcmp(name, "NVME_TRSVCID"))
+			memccpy(trsvcid, udev_list_entry_get_value(entry),
+				'\0', trs_sz);
+		else if (!strcmp(name, "NVME_HOST_TRADDR"))
+			memccpy(host_traddr, udev_list_entry_get_value(entry),
+				'\0', tra_sz);
+	}
+	if (!aen_disc) {
+		msg(LOG_DEBUG, "%s: not a \"discovery log changed\" AEN, ignoring event\n",
+		    sysname);
+		return -EINVAL;
+	}
+
+	if (!*traddr || !*transport) {
+		msg(LOG_WARNING, "%s: transport properties missing\n", sysname);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void monitor_handle_nvme_uev(struct udev_device *ud)
+{
+	char traddr[NVMF_TRADDR_SIZE], host_traddr[NVMF_TRADDR_SIZE];
+	char trsvcid[NVMF_TRSVCID_SIZE], transport[5];
+
+	if (strcmp(udev_device_get_action(ud), "change"))
+		return;
+
+	if (monitor_get_nvme_uev_props(ud, transport, sizeof(transport),
+				       traddr, sizeof(traddr),
+				       trsvcid, sizeof(trsvcid),
+				       host_traddr, sizeof(host_traddr)))
+		return;
+
+	monitor_discovery(transport, traddr,
+			  strcmp(trsvcid, "none") ? trsvcid : NULL, host_traddr);
+}
+
 static void monitor_handle_udevice(struct udev_device *ud)
 {
-	msg(LOG_INFO, "uevent: %s %s\n",
-		udev_device_get_action(ud),
-		udev_device_get_sysname(ud));
+	const char *subsys  = udev_device_get_subsystem(ud);
+
+	if (log_level >= LOG_INFO) {
+		const char *action = udev_device_get_action(ud);
+		const char *syspath = udev_device_get_syspath(ud);
+
+		msg(LOG_INFO, "%s %s\n", action, syspath);
+	}
+	if (!strcmp(subsys, "fc"))
+		monitor_handle_fc_uev(ud);
+	else if (!strcmp(subsys, "nvme"))
+		monitor_handle_nvme_uev(ud);
 }
 
 struct udev_monitor_event {
@@ -147,6 +362,49 @@ static int monitor_handle_uevents(struct event *ev,
 	return EVENTCB_CONTINUE;
 }
 
+static int handle_epoll_err(int errcode)
+{
+	if (errcode != -EINTR)
+		return errcode;
+	else if (must_exit) {
+		msg(LOG_NOTICE, "monitor: exit signal received\n");
+		return ELOOP_QUIT;
+	} else if (!got_sigchld) {
+		msg(LOG_WARNING, "monitor: unexpected interruption, ignoring\n");
+		return ELOOP_CONTINUE;
+	}
+
+	got_sigchld = 0;
+	while (true) {
+		int wstatus;
+		pid_t pid;
+
+		pid = waitpid(-1, &wstatus, WNOHANG);
+		switch(pid) {
+		case -1:
+			if (errno != ECHILD)
+				msg(LOG_ERR, "error in waitpid: %m\n");
+			goto out;
+		case 0:
+			goto out;
+		default:
+			break;
+		}
+		if (!WIFEXITED(wstatus))
+			msg(LOG_WARNING, "child %ld didn't exit normally\n",
+			    (long)pid);
+		else if (WEXITSTATUS(wstatus) != 0)
+			msg(LOG_NOTICE, "child %ld exited with status \"%s\"\n",
+			    (long)pid, strerror(WEXITSTATUS(wstatus)));
+		else
+			msg(LOG_DEBUG, "child %ld exited normally\n", (long)pid);
+	};
+
+out:
+	/* tell event_loop() to continue */
+	return ELOOP_CONTINUE;
+}
+
 static int monitor_parse_opts(const char *desc, int argc, char **argv)
 {
 	bool quiet = false;
@@ -156,6 +414,19 @@ static int monitor_parse_opts(const char *desc, int argc, char **argv)
 	int ret;
 	OPT_ARGS(opts) = {
 		OPT_FLAG("no-connect",     'N', &noauto,              "dry run, do not autoconnect to discovered controllers"),
+		OPT_LIST("hostnqn",        'q', &fabrics_cfg.hostnqn,         "user-defined hostnqn (if default not used)"),
+		OPT_LIST("hostid",         'I', &fabrics_cfg.hostid,          "user-defined hostid (if default not used)"),
+		OPT_INT("keep-alive-tmo",  'k', &fabrics_cfg.keep_alive_tmo,  "keep alive timeout period in seconds"),
+		OPT_INT("reconnect-delay", 'c', &fabrics_cfg.reconnect_delay, "reconnect timeout period in seconds"),
+		OPT_INT("ctrl-loss-tmo",   'l', &fabrics_cfg.ctrl_loss_tmo,   "controller loss timeout period in seconds"),
+		OPT_INT("tos",             'T', &fabrics_cfg.tos,             "type of service"),
+		OPT_FLAG("hdr_digest",     'g', &fabrics_cfg.hdr_digest,      "enable transport protocol header digest (TCP transport)"),
+		OPT_FLAG("data_digest",    'G', &fabrics_cfg.data_digest,     "enable transport protocol data digest (TCP transport)"),
+		OPT_INT("nr-io-queues",    'i', &fabrics_cfg.nr_io_queues,    "number of io queues to use (default is core count)"),
+		OPT_INT("nr-write-queues", 'W', &fabrics_cfg.nr_write_queues, "number of write queues to use (default 0)"),
+		OPT_INT("nr-poll-queues",  'P', &fabrics_cfg.nr_poll_queues,  "number of poll queues to use (default 0)"),
+		OPT_INT("queue-size",      'Q', &fabrics_cfg.queue_size,      "number of io queue elements to use (default 128)"),
+		OPT_FLAG("matching",       'm', &fabrics_cfg.matching_only,   "connect only records matching the traddr"),
 		OPT_FLAG("silent",         'S', &quiet,               "log level: silent"),
 		OPT_FLAG("verbose",        'v', &verbose,             "log level: verbose"),
 		OPT_FLAG("debug",          'D', &debug,               "log level: debug"),
@@ -163,6 +434,7 @@ static int monitor_parse_opts(const char *desc, int argc, char **argv)
 		OPT_END()
 	};
 
+	log_pid = true;
 	ret = argconfig_parse(argc, argv, desc, opts);
 	if (ret)
 		return ret;
@@ -238,7 +510,7 @@ int aen_monitor(const char *desc, int argc, char **argv)
 		goto out;
 	}
 
-	ret = event_loop(mon_dsp, &wait_mask, NULL);
+	ret = event_loop(mon_dsp, &wait_mask, handle_epoll_err);
 
 out:
 	free_dispatcher(mon_dsp);
-- 
2.29.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

  parent reply	other threads:[~2021-03-06  0:40 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-06  0:36 [PATCH v2 00/16] nvme-cli: add "nvme monitor" subcommand mwilck
2021-03-06  0:36 ` [PATCH v2 01/16] fabrics: export symbols required for monitor functionality mwilck
2021-03-06  0:36 ` [PATCH v2 02/16] nvme-cli: add code for event and timeout handling mwilck
2021-03-17  0:32   ` Martin Wilck
2021-03-19 16:42     ` Martin Wilck
2021-03-30 22:06       ` Martin Wilck
2021-03-06  0:36 ` [PATCH v2 03/16] monitor: add basic "nvme monitor" functionality mwilck
2021-03-06  0:36 ` mwilck [this message]
2021-03-06  0:36 ` [PATCH v2 05/16] conn-db: add simple connection registry mwilck
2021-03-06  0:36 ` [PATCH v2 06/16] monitor: monitor_discovery(): try to reuse existing controllers mwilck
2021-03-06  0:36 ` [PATCH v2 07/16] monitor: kill running discovery tasks on exit mwilck
2021-03-06  0:36 ` [PATCH v2 08/16] monitor: add option --cleanup / -C mwilck
2021-03-06  0:36 ` [PATCH v2 09/16] monitor: handling of add/remove uevents for nvme controllers mwilck
2021-03-06  0:36 ` [PATCH v2 10/16] monitor: discover from conf file on startup mwilck
2021-03-06  0:36 ` [PATCH v2 11/16] monitor: watch discovery.conf with inotify mwilck
2021-03-06  0:36 ` [PATCH v2 12/16] monitor: add parent/child messaging and "notify" message exchange mwilck
2021-03-06  0:36 ` [PATCH v2 13/16] monitor: add "query device" " mwilck
2021-03-06  0:36 ` [PATCH v2 14/16] completions: add completions for nvme monitor mwilck
2021-03-06  0:36 ` [PATCH v2 15/16] nvmf-autoconnect: add unit file for nvme-monitor.service mwilck
2021-03-06  0:36 ` [PATCH v2 16/16] nvme-monitor(1): add man page for nvme-monitor mwilck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210306003659.21207-5-mwilck@suse.com \
    --to=mwilck@suse.com \
    --cc=Chaitanya.Kulkarni@wdc.com \
    --cc=ematsumiya@suse.de \
    --cc=hare@suse.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).