All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] multipath -u: test socket connection in non-blocking mode
@ 2019-04-23 19:32 Martin Wilck
  2019-04-24  5:48 ` Hannes Reinecke
  0 siblings, 1 reply; 3+ messages in thread
From: Martin Wilck @ 2019-04-23 19:32 UTC (permalink / raw)
  To: Christophe Varoqui, Benjamin Marzinski; +Cc: dm-devel, Martin Wilck

Since commit d7188fcd "multipathd: start daemon after udev trigger",
multipathd startup is delayed during boot until after "udev settle"
terminates. But "multipath -u" is run by udev workers for storage devices,
and attempts to connect to the multipathd socket. This causes a start job
for multipathd to be scheduled by systemd, but that job won't be started
until "udev settle" finishes. This is not a problem on systems with 129 or
less storage units, because the connect() call of "multipath -u" will
succeed anyway. But on larger systems, the listen backlog of the systemd
socket can be exceeded, which causes connect() calls for the socket to
block until multipathd starts up and begins calling accept(). This creates
a deadlock situation, because "multipath -u" (called by udev workers)
blocks, and thus "udev settle" doesn't finish, delaying multipathd
startup. This situation then persists until either the workers or "udev
settle" time out. In the former case, path devices might be misclassified
as non-multipath devices by "multipath -u".

Fix this by using a non-blocking socket fd for connect() and interpret the
errno appropriately.

This patch reverts most of the changes from commit 8cdf6661 "multipath:
check on multipathd without starting it". Instead, "multipath -u" does
access the socket and start multipath again (which is what we want IMO),
but it is now able to detect and handle the "full backlog" situation.
---
 libmpathcmd/mpath_cmd.c | 33 +++++++++++++++++---
 libmpathcmd/mpath_cmd.h | 15 +++++++++
 multipath/main.c        | 67 ++++++++++++-----------------------------
 3 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/libmpathcmd/mpath_cmd.c b/libmpathcmd/mpath_cmd.c
index df4ca541..265af1fb 100644
--- a/libmpathcmd/mpath_cmd.c
+++ b/libmpathcmd/mpath_cmd.c
@@ -26,6 +26,7 @@
 #include <poll.h>
 #include <string.h>
 #include <errno.h>
+#include <fcntl.h>
 
 #include "mpath_cmd.h"
 
@@ -93,10 +94,11 @@ static size_t write_all(int fd, const void *buf, size_t len)
 /*
  * connect to a unix domain socket
  */
-int mpath_connect(void)
+int __mpath_connect(int nonblocking)
 {
-	int fd, len;
+	int fd, len, err;
 	struct sockaddr_un addr;
+	int flags = 0;
 
 	memset(&addr, 0, sizeof(addr));
 	addr.sun_family = AF_LOCAL;
@@ -106,16 +108,39 @@ int mpath_connect(void)
 
 	fd = socket(AF_LOCAL, SOCK_STREAM, 0);
 	if (fd == -1)
-		return -1;
+		return -errno;
+
+	if (nonblocking) {
+		flags = fcntl(fd, F_GETFL, 0);
+		if (flags != -1)
+			(void)fcntl(fd, F_SETFL, flags|O_NONBLOCK);
+	}
 
 	if (connect(fd, (struct sockaddr *)&addr, len) == -1) {
+		err = -errno;
 		close(fd);
-		return -1;
+		return err;
 	}
 
+	if (nonblocking && flags != -1)
+		(void)fcntl(fd, F_SETFL, flags);
+
 	return fd;
 }
 
+/*
+ * connect to a unix domain socket
+ */
+int mpath_connect(void)
+{
+	int fd = __mpath_connect(0);
+
+	if (fd >= 0)
+		return fd;
+	errno = -fd;
+	return -1;
+}
+
 int mpath_disconnect(int fd)
 {
 	return close(fd);
diff --git a/libmpathcmd/mpath_cmd.h b/libmpathcmd/mpath_cmd.h
index 15aeb067..0d4c45cd 100644
--- a/libmpathcmd/mpath_cmd.h
+++ b/libmpathcmd/mpath_cmd.h
@@ -34,6 +34,21 @@ extern "C" {
 #define DEFAULT_REPLY_TIMEOUT	4000
 
 
+/*
+ * DESCRIPTION:
+ *	Same as mpath_connect() (see below) except for the error return code
+ *	and the "nonblocking" parameter.
+ *	If "nonblocking" is set, connects in non-blocking mode. This is
+ *	useful to avoid blocking if the listening socket's backlog is
+ *	exceeded. In this case, -EAGAIN will be returned.
+ *	Even with "nonblocking" set, the returned file descriptor is in
+ *	blocking mode in case of success.
+ *
+ * RETURNS:
+ *	A file descriptor (>= 0) on success. -errno on failure.
+ */
+int __mpath_connect(int nonblocking);
+
 /*
  * DESCRIPTION:
  *	Connect to the running multipathd daemon. On systems with the
diff --git a/multipath/main.c b/multipath/main.c
index 008e3d3f..52220dbf 100644
--- a/multipath/main.c
+++ b/multipath/main.c
@@ -850,55 +850,28 @@ out:
 	return r;
 }
 
-int is_multipathd_running(void)
+static int test_multipathd_socket(void)
 {
-	FILE *f = NULL;
-	char buf[16];
-	char path[PATH_MAX];
-	int pid;
-	char *end;
+	int fd;
+	/*
+	 * "multipath -u" may be run before the daemon is started. In this
+	 * case, systemd might own the socket but might delay multipathd
+	 * startup until some other unit (udev settle!)  has finished
+	 * starting. With many LUNs, the listen backlog may be exceeded, which
+	 * would cause connect() to block. This causes udev workers calling
+	 * "multipath -u" to hang, and thus creates a deadlock, until "udev
+	 * settle" times out.  To avoid this, call connect() in non-blocking
+	 * mode here, and take EAGAIN as indication for a filled-up systemd
+	 * backlog.
+	 */
 
-	f = fopen(DEFAULT_PIDFILE, "r");
-	if (!f) {
-		if (errno != ENOENT)
-			condlog(4, "can't open " DEFAULT_PIDFILE ": %s",
-				strerror(errno));
-		return 0;
-	}
-	if (!fgets(buf, sizeof(buf), f)) {
-		if (ferror(f))
-			condlog(4, "read of " DEFAULT_PIDFILE " failed: %s",
-				strerror(errno));
-		fclose(f);
-		return 0;
-	}
-	fclose(f);
-	errno = 0;
-	strchop(buf);
-	pid = strtol(buf, &end, 10);
-	if (errno != 0 || pid <= 0 || *end != '\0') {
-		condlog(4, "invalid contents in " DEFAULT_PIDFILE ": '%s'",
-			buf);
-		return 0;
-	}
-	snprintf(path, sizeof(path), "/proc/%d/comm", pid);
-	f = fopen(path, "r");
-	if (!f) {
-		if (errno != ENOENT)
-			condlog(4, "can't open %s: %s", path, strerror(errno));
-		return 0;
-	}
-	if (!fgets(buf, sizeof(buf), f)) {
-		if (ferror(f))
-			condlog(4, "read of %s failed: %s", path,
-				strerror(errno));
-		fclose(f);
-		return 0;
-	}
-	fclose(f);
-	strchop(buf);
-	if (strcmp(buf, "multipathd") != 0)
+	fd = __mpath_connect(1);
+	if (fd == -EAGAIN)
+		condlog(3, "daemon backlog exceeded");
+	else if (fd < 0)
 		return 0;
+	else
+		close(fd);
 	return 1;
 }
 
@@ -1080,7 +1053,7 @@ main (int argc, char *argv[])
 	}
 	if (cmd == CMD_VALID_PATH &&
 	    dev_type == DEV_UEVENT) {
-		if (!is_multipathd_running()) {
+		if (!test_multipathd_socket()) {
 			condlog(3, "%s: daemon is not running", dev);
 			if (!systemd_service_enabled(dev)) {
 				r = print_cmd_valid(RTVL_NO, NULL, conf);
-- 
2.21.0

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] multipath -u: test socket connection in non-blocking mode
  2019-04-23 19:32 [PATCH] multipath -u: test socket connection in non-blocking mode Martin Wilck
@ 2019-04-24  5:48 ` Hannes Reinecke
  2019-04-24  8:14   ` Martin Wilck
  0 siblings, 1 reply; 3+ messages in thread
From: Hannes Reinecke @ 2019-04-24  5:48 UTC (permalink / raw)
  To: Martin Wilck, Christophe Varoqui, Benjamin Marzinski; +Cc: dm-devel

On 4/23/19 9:32 PM, Martin Wilck wrote:
> Since commit d7188fcd "multipathd: start daemon after udev trigger",
> multipathd startup is delayed during boot until after "udev settle"
> terminates. But "multipath -u" is run by udev workers for storage devices,
> and attempts to connect to the multipathd socket. This causes a start job
> for multipathd to be scheduled by systemd, but that job won't be started
> until "udev settle" finishes. This is not a problem on systems with 129 or
> less storage units, because the connect() call of "multipath -u" will
> succeed anyway. But on larger systems, the listen backlog of the systemd
> socket can be exceeded, which causes connect() calls for the socket to
> block until multipathd starts up and begins calling accept(). This creates
> a deadlock situation, because "multipath -u" (called by udev workers)
> blocks, and thus "udev settle" doesn't finish, delaying multipathd
> startup. This situation then persists until either the workers or "udev
> settle" time out. In the former case, path devices might be misclassified
> as non-multipath devices by "multipath -u".
> 
> Fix this by using a non-blocking socket fd for connect() and interpret the
> errno appropriately.
> 
> This patch reverts most of the changes from commit 8cdf6661 "multipath:
> check on multipathd without starting it". Instead, "multipath -u" does
> access the socket and start multipath again (which is what we want IMO),
> but it is now able to detect and handle the "full backlog" situation.
> ---
>   libmpathcmd/mpath_cmd.c | 33 +++++++++++++++++---
>   libmpathcmd/mpath_cmd.h | 15 +++++++++
>   multipath/main.c        | 67 ++++++++++++-----------------------------
>   3 files changed, 64 insertions(+), 51 deletions(-)
> 
> diff --git a/libmpathcmd/mpath_cmd.c b/libmpathcmd/mpath_cmd.c
> index df4ca541..265af1fb 100644
> --- a/libmpathcmd/mpath_cmd.c
> +++ b/libmpathcmd/mpath_cmd.c
> @@ -26,6 +26,7 @@
>   #include <poll.h>
>   #include <string.h>
>   #include <errno.h>
> +#include <fcntl.h>
>   
>   #include "mpath_cmd.h"
>   
> @@ -93,10 +94,11 @@ static size_t write_all(int fd, const void *buf, size_t len)
>   /*
>    * connect to a unix domain socket
>    */
> -int mpath_connect(void)
> +int __mpath_connect(int nonblocking)
>   {
> -	int fd, len;
> +	int fd, len, err;
>   	struct sockaddr_un addr;
> +	int flags = 0;
>   
>   	memset(&addr, 0, sizeof(addr));
>   	addr.sun_family = AF_LOCAL;
> @@ -106,16 +108,39 @@ int mpath_connect(void)
>   
>   	fd = socket(AF_LOCAL, SOCK_STREAM, 0);
>   	if (fd == -1)
> -		return -1;
> +		return -errno;
> +
> +	if (nonblocking) {
> +		flags = fcntl(fd, F_GETFL, 0);
> +		if (flags != -1)
> +			(void)fcntl(fd, F_SETFL, flags|O_NONBLOCK);
> +	}
>   
>   	if (connect(fd, (struct sockaddr *)&addr, len) == -1) {
> +		err = -errno;
>   		close(fd);
> -		return -1;
> +		return err;
>   	}
>   
> +	if (nonblocking && flags != -1)
> +		(void)fcntl(fd, F_SETFL, flags);
> +
>   	return fd;
>   }
>   
> +/*
> + * connect to a unix domain socket
> + */
> +int mpath_connect(void)
> +{
> +	int fd = __mpath_connect(0);
> +
> +	if (fd >= 0)
> +		return fd;
> +	errno = -fd;
> +	return -1;
> +}
> +
>   int mpath_disconnect(int fd)
>   {
>   	return close(fd);
Please do away with the wrapper.
No point in having it; rather use a consistent return value
(ie either -1 and setting errno, or returning -errno directly).

> diff --git a/libmpathcmd/mpath_cmd.h b/libmpathcmd/mpath_cmd.h
> index 15aeb067..0d4c45cd 100644
> --- a/libmpathcmd/mpath_cmd.h
> +++ b/libmpathcmd/mpath_cmd.h
> @@ -34,6 +34,21 @@ extern "C" {
>   #define DEFAULT_REPLY_TIMEOUT	4000
>   
>   
> +/*
> + * DESCRIPTION:
> + *	Same as mpath_connect() (see below) except for the error return code
> + *	and the "nonblocking" parameter.
> + *	If "nonblocking" is set, connects in non-blocking mode. This is
> + *	useful to avoid blocking if the listening socket's backlog is
> + *	exceeded. In this case, -EAGAIN will be returned.
> + *	Even with "nonblocking" set, the returned file descriptor is in
> + *	blocking mode in case of success.
> + *
> + * RETURNS:
> + *	A file descriptor (>= 0) on success. -errno on failure.
> + */
> +int __mpath_connect(int nonblocking);
> +
>   /*
>    * DESCRIPTION:
>    *	Connect to the running multipathd daemon. On systems with the
> diff --git a/multipath/main.c b/multipath/main.c
> index 008e3d3f..52220dbf 100644
> --- a/multipath/main.c
> +++ b/multipath/main.c
> @@ -850,55 +850,28 @@ out:
>   	return r;
>   }
>   
> -int is_multipathd_running(void)
> +static int test_multipathd_socket(void)
>   {
> -	FILE *f = NULL;
> -	char buf[16];
> -	char path[PATH_MAX];
> -	int pid;
> -	char *end;
> +	int fd;
> +	/*
> +	 * "multipath -u" may be run before the daemon is started. In this
> +	 * case, systemd might own the socket but might delay multipathd
> +	 * startup until some other unit (udev settle!)  has finished
> +	 * starting. With many LUNs, the listen backlog may be exceeded, which
> +	 * would cause connect() to block. This causes udev workers calling
> +	 * "multipath -u" to hang, and thus creates a deadlock, until "udev
> +	 * settle" times out.  To avoid this, call connect() in non-blocking
> +	 * mode here, and take EAGAIN as indication for a filled-up systemd
> +	 * backlog.
> +	 */
>   
> -	f = fopen(DEFAULT_PIDFILE, "r");
> -	if (!f) {
> -		if (errno != ENOENT)
> -			condlog(4, "can't open " DEFAULT_PIDFILE ": %s",
> -				strerror(errno));
> -		return 0;
> -	}
> -	if (!fgets(buf, sizeof(buf), f)) {
> -		if (ferror(f))
> -			condlog(4, "read of " DEFAULT_PIDFILE " failed: %s",
> -				strerror(errno));
> -		fclose(f);
> -		return 0;
> -	}
> -	fclose(f);
> -	errno = 0;
> -	strchop(buf);
> -	pid = strtol(buf, &end, 10);
> -	if (errno != 0 || pid <= 0 || *end != '\0') {
> -		condlog(4, "invalid contents in " DEFAULT_PIDFILE ": '%s'",
> -			buf);
> -		return 0;
> -	}
> -	snprintf(path, sizeof(path), "/proc/%d/comm", pid);
> -	f = fopen(path, "r");
> -	if (!f) {
> -		if (errno != ENOENT)
> -			condlog(4, "can't open %s: %s", path, strerror(errno));
> -		return 0;
> -	}
> -	if (!fgets(buf, sizeof(buf), f)) {
> -		if (ferror(f))
> -			condlog(4, "read of %s failed: %s", path,
> -				strerror(errno));
> -		fclose(f);
> -		return 0;
> -	}
> -	fclose(f);
> -	strchop(buf);
> -	if (strcmp(buf, "multipathd") != 0)
> +	fd = __mpath_connect(1);
> +	if (fd == -EAGAIN)
> +		condlog(3, "daemon backlog exceeded");
> +	else if (fd < 0)
>   		return 0;
> +	else
> +		close(fd);
>   	return 1;
>   }
>   
> @@ -1080,7 +1053,7 @@ main (int argc, char *argv[])
>   	}
>   	if (cmd == CMD_VALID_PATH &&
>   	    dev_type == DEV_UEVENT) {
> -		if (!is_multipathd_running()) {
> +		if (!test_multipathd_socket()) {
>   			condlog(3, "%s: daemon is not running", dev);
>   			if (!systemd_service_enabled(dev)) {
>   				r = print_cmd_valid(RTVL_NO, NULL, conf);
> 
Remainder looks okay.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] multipath -u: test socket connection in non-blocking mode
  2019-04-24  5:48 ` Hannes Reinecke
@ 2019-04-24  8:14   ` Martin Wilck
  0 siblings, 0 replies; 3+ messages in thread
From: Martin Wilck @ 2019-04-24  8:14 UTC (permalink / raw)
  To: Hannes Reinecke, Christophe Varoqui, Benjamin Marzinski, mwilck+gmail
  Cc: dm-devel

On Wed, 2019-04-24 at 07:48 +0200, Hannes Reinecke wrote:
> On 4/23/19 9:32 PM, Martin Wilck wrote:
> >   
> > +/*
> > + * connect to a unix domain socket
> > + */
> > +int mpath_connect(void)
> > +{
> > +	int fd = __mpath_connect(0);
> > +
> > +	if (fd >= 0)
> > +		return fd;
> > +	errno = -fd;
> > +	return -1;
> > +}
> > +
> >   int mpath_disconnect(int fd)
> >   {
> >   	return close(fd);
> Please do away with the wrapper.
> No point in having it; rather use a consistent return value
> (ie either -1 and setting errno, or returning -errno directly).

I can't easily do away with the wrapper, as it's part of the public
libmpathcmd API, external commands may rely on its behavior. I'll
repost with -1 / errno semantics.

Martin

-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-04-24  8:14 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-23 19:32 [PATCH] multipath -u: test socket connection in non-blocking mode Martin Wilck
2019-04-24  5:48 ` Hannes Reinecke
2019-04-24  8:14   ` Martin Wilck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.