All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libmultipath: simplify failed wwid code
@ 2020-05-01 22:39 Benjamin Marzinski
  2020-05-08  9:15 ` Martin Wilck
  0 siblings, 1 reply; 4+ messages in thread
From: Benjamin Marzinski @ 2020-05-01 22:39 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

The (is|mark|unmark)_failed_wwid code is needlessly complicated.
Locking a file is necssary if multiple processes could otherwise be
writing to it at the same time. That is not the case with the
failed_wwids files. They can simply be empty files in a directory.  Even
with all the locking in place, two processes accessing or modifying a
file at the same time will still race. And even without the locking, if
two processes try to access or modify a file at the same time, they will
both see a reasonable result, and will leave the files in a valid state
afterwards.

Instead of doing all the locking work (which made it necessary to write
a file, even just to check if a file existed), simply check for files
with lstat(), create them with open(), and remove them with unlink().

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/wwids.c | 131 ++++++++++++++++++-------------------------
 1 file changed, 56 insertions(+), 75 deletions(-)

diff --git a/libmultipath/wwids.c b/libmultipath/wwids.c
index 28a2150d..b15b146b 100644
--- a/libmultipath/wwids.c
+++ b/libmultipath/wwids.c
@@ -6,6 +6,7 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <fcntl.h>
 
 #include "util.h"
 #include "checkers.h"
@@ -348,109 +349,89 @@ remember_wwid(char *wwid)
 }
 
 static const char shm_dir[] = MULTIPATH_SHM_BASE "failed_wwids";
-static const char shm_lock[] = ".lock";
-static const char shm_header[] = "multipath shm lock file, don't edit";
-static char _shm_lock_path[sizeof(shm_dir)+sizeof(shm_lock)];
-static const char *shm_lock_path = &_shm_lock_path[0];
 
-static void init_shm_paths(void)
+static void print_failed_wwid_result(const char * msg, const char *wwid, int r)
 {
-	snprintf(_shm_lock_path, sizeof(_shm_lock_path),
-		 "%s/%s", shm_dir, shm_lock);
+	switch(r) {
+	case WWID_FAILED_ERROR:
+		condlog(1, "%s: %s: %m", msg, wwid);
+		return;
+	case WWID_IS_FAILED:
+	case WWID_IS_NOT_FAILED:
+		condlog(4, "%s: %s is %s", msg, wwid,
+			r == WWID_IS_FAILED ? "failed" : "good");
+		return;
+	case WWID_FAILED_CHANGED:
+		condlog(3, "%s: %s", msg, wwid);
+	}
 }
 
-static pthread_once_t shm_path_once = PTHREAD_ONCE_INIT;
-
-static int multipath_shm_open(bool rw)
+int is_failed_wwid(const char *wwid)
 {
-	int fd;
-	int can_write;
-
-	pthread_once(&shm_path_once, init_shm_paths);
-	fd = open_file(shm_lock_path, &can_write, shm_header);
+	struct stat st;
+	char path[PATH_MAX];
+	int r;
 
-	if (fd >= 0 && rw && !can_write) {
-		close(fd);
-		condlog(1, "failed to open %s for writing", shm_dir);
+	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
+		condlog(1, "%s: path name overflow", __func__);
 		return -1;
 	}
 
-	return fd;
-}
-
-static void multipath_shm_close(void *arg)
-{
-	long fd = (long)arg;
+	if (lstat(path, &st) == 0)
+		r = WWID_IS_FAILED;
+	else if (errno == ENOENT)
+		r = WWID_IS_NOT_FAILED;
+	else
+		r = WWID_FAILED_ERROR;
 
-	close(fd);
-	unlink(shm_lock_path);
+	print_failed_wwid_result("is_failed", wwid, r);
+	return r;
 }
 
-static int _failed_wwid_op(const char *wwid, bool rw,
-			   int (*func)(const char *), const char *msg)
+int mark_failed_wwid(const char *wwid)
 {
 	char path[PATH_MAX];
-	long lockfd;
-	int r = -1;
+	int r, fd;
 
 	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
 		condlog(1, "%s: path name overflow", __func__);
 		return -1;
 	}
-
-	lockfd = multipath_shm_open(rw);
-	if (lockfd == -1)
+	if (ensure_directories_exist(path, 0700) < 0) {
+		condlog(1, "%s: can't setup directories", __func__);
 		return -1;
+	}
 
-	pthread_cleanup_push(multipath_shm_close, (void *)lockfd);
-	r = func(path);
-	pthread_cleanup_pop(1);
-
-	if (r == WWID_FAILED_ERROR)
-		condlog(1, "%s: %s: %s", msg, wwid, strerror(errno));
-	else if (r == WWID_FAILED_CHANGED)
-		condlog(3, "%s: %s", msg, wwid);
-	else if (!rw)
-		condlog(4, "%s: %s is %s", msg, wwid,
-			r == WWID_IS_FAILED ? "failed" : "good");
+	fd = open(path, O_RDONLY | O_CREAT | O_EXCL, S_IRUSR);
+	if (fd >= 0) {
+		close(fd);
+		r = WWID_FAILED_CHANGED;
+	} else if (errno == EEXIST)
+		r = WWID_FAILED_UNCHANGED;
+	else
+		r = WWID_FAILED_ERROR;
 
+	print_failed_wwid_result("mark_failed", wwid, r);
 	return r;
 }
 
-static int _is_failed(const char *path)
+int unmark_failed_wwid(const char *wwid)
 {
-	struct stat st;
+	char path[PATH_MAX];
+	int r;
 
-	if (lstat(path, &st) == 0)
-		return WWID_IS_FAILED;
+	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
+		condlog(1, "%s: path name overflow", __func__);
+		return -1;
+	}
+
+	if (unlink(path) == 0)
+		r = WWID_FAILED_CHANGED;
 	else if (errno == ENOENT)
-		return WWID_IS_NOT_FAILED;
+		r = WWID_FAILED_UNCHANGED;
 	else
-		return WWID_FAILED_ERROR;
-}
-
-static int _mark_failed(const char *path)
-{
-	/* Called from _failed_wwid_op: we know that shm_lock_path exists */
-	if (_is_failed(path) == WWID_IS_FAILED)
-		return WWID_FAILED_UNCHANGED;
-	return (link(shm_lock_path, path) == 0 ? WWID_FAILED_CHANGED :
-		WWID_FAILED_ERROR);
-}
+		r = WWID_FAILED_ERROR;
 
-static int _unmark_failed(const char *path)
-{
-	if (_is_failed(path) == WWID_IS_NOT_FAILED)
-		return WWID_FAILED_UNCHANGED;
-	return (unlink(path) == 0 ? WWID_FAILED_CHANGED : WWID_FAILED_ERROR);
-}
-
-#define declare_failed_wwid_op(op, rw) \
-int op ## _wwid(const char *wwid) \
-{ \
-	return _failed_wwid_op(wwid, (rw), _ ## op, #op); \
+	print_failed_wwid_result("unmark_failed", wwid, r);
+	return r;
 }
-
-declare_failed_wwid_op(is_failed, false)
-declare_failed_wwid_op(mark_failed, true)
-declare_failed_wwid_op(unmark_failed, true)
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] libmultipath: simplify failed wwid code
  2020-05-01 22:39 [PATCH] libmultipath: simplify failed wwid code Benjamin Marzinski
@ 2020-05-08  9:15 ` Martin Wilck
  2020-05-08 16:32   ` Benjamin Marzinski
  0 siblings, 1 reply; 4+ messages in thread
From: Martin Wilck @ 2020-05-08  9:15 UTC (permalink / raw)
  To: bmarzins, christophe.varoqui; +Cc: dm-devel

[-- Attachment #1: Type: text/plain, Size: 1563 bytes --]

On Fri, 2020-05-01 at 17:39 -0500, Benjamin Marzinski wrote:
> The (is|mark|unmark)_failed_wwid code is needlessly complicated.
> Locking a file is necssary if multiple processes could otherwise be
> writing to it at the same time. That is not the case with the
> failed_wwids files. They can simply be empty files in a
> directory.  Even
> with all the locking in place, two processes accessing or modifying a
> file at the same time will still race. And even without the locking,
> if
> two processes try to access or modify a file at the same time, they
> will
> both see a reasonable result, and will leave the files in a valid
> state
> afterwards.
> 
> Instead of doing all the locking work (which made it necessary to
> write
> a file, even just to check if a file existed), simply check for files
> with lstat(), create them with open(), and remove them with unlink().
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/wwids.c | 131 ++++++++++++++++++-----------------------
> --
>  1 file changed, 56 insertions(+), 75 deletions(-)

I agree, almost :-)

Please consider adding the attached patch on top, which switches back
to atomic creation of the failed_wwids file, with just a little bit
more compexity. I prefer the creation of the file to be atomic on the
file system level. IMO that's how "flag" files like this should be
handled in principle, and doing it correctly makes me feel better, even
though I have to concur that an actual race is hardly possible.

Regards,
Martin


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-libmultipath-use-atomic-linkat-in-mark_failed_wwid.patch --]
[-- Type: text/x-patch; name="0001-libmultipath-use-atomic-linkat-in-mark_failed_wwid.patch", Size: 2840 bytes --]

From d0e4669c92863f98cdbe7b41a8a9b64223958bec Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Fri, 8 May 2020 00:51:50 +0200
Subject: [PATCH] libmultipath: use atomic linkat() in mark_failed_wwid()

This keeps (almost) the simplicity of the previous patch, while
making sure that the return value of mark_failed_wwid()
(WWID_FAILED_CHANGED vs. WWID_FAILED_UNCHANGED) is correct, even
if several processes access this WWID at the same time.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/wwids.c | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/libmultipath/wwids.c b/libmultipath/wwids.c
index b15b146b..ccdd13d2 100644
--- a/libmultipath/wwids.c
+++ b/libmultipath/wwids.c
@@ -374,7 +374,7 @@ int is_failed_wwid(const char *wwid)
 
 	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
 		condlog(1, "%s: path name overflow", __func__);
-		return -1;
+		return WWID_FAILED_ERROR;
 	}
 
 	if (lstat(path, &st) == 0)
@@ -390,27 +390,43 @@ int is_failed_wwid(const char *wwid)
 
 int mark_failed_wwid(const char *wwid)
 {
-	char path[PATH_MAX];
-	int r, fd;
+	char tmpfile[WWID_SIZE + 2 * sizeof(long) + 1];
+	int r = WWID_FAILED_ERROR, fd, dfd;
 
-	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
-		condlog(1, "%s: path name overflow", __func__);
-		return -1;
+	dfd = open(shm_dir, O_RDONLY|O_DIRECTORY);
+	if (dfd == -1 && errno == ENOENT) {
+		char path[sizeof(shm_dir) + 2];
+
+		/* arg for ensure_directories_exist() must not end with "/" */
+		safe_sprintf(path, "%s/_", shm_dir);
+		ensure_directories_exist(path, 0700);
+		dfd = open(shm_dir, O_RDONLY|O_DIRECTORY);
 	}
-	if (ensure_directories_exist(path, 0700) < 0) {
-		condlog(1, "%s: can't setup directories", __func__);
-		return -1;
+	if (dfd == -1) {
+		condlog(1, "%s: can't setup %s: %m", __func__, shm_dir);
+		return WWID_FAILED_ERROR;
 	}
 
-	fd = open(path, O_RDONLY | O_CREAT | O_EXCL, S_IRUSR);
-	if (fd >= 0) {
+	safe_sprintf(tmpfile, "%s.%lx", wwid, (long)getpid());
+	fd = openat(dfd, tmpfile, O_RDONLY | O_CREAT | O_EXCL, S_IRUSR);
+	if (fd >= 0)
 		close(fd);
+	else
+		goto out_closedir;
+
+	if (linkat(dfd, tmpfile, dfd, wwid, 0) == 0)
 		r = WWID_FAILED_CHANGED;
-	} else if (errno == EEXIST)
+	else if (errno == EEXIST)
 		r = WWID_FAILED_UNCHANGED;
 	else
 		r = WWID_FAILED_ERROR;
 
+	if (unlinkat(dfd, tmpfile, 0) == -1)
+		condlog(2, "%s: failed to unlink %s/%s: %m",
+			__func__, shm_dir, tmpfile);
+
+out_closedir:
+	close(dfd);
 	print_failed_wwid_result("mark_failed", wwid, r);
 	return r;
 }
@@ -422,7 +438,7 @@ int unmark_failed_wwid(const char *wwid)
 
 	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
 		condlog(1, "%s: path name overflow", __func__);
-		return -1;
+		return WWID_FAILED_ERROR;
 	}
 
 	if (unlink(path) == 0)
-- 
2.26.2


[-- Attachment #3: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] libmultipath: simplify failed wwid code
  2020-05-08  9:15 ` Martin Wilck
@ 2020-05-08 16:32   ` Benjamin Marzinski
  2020-05-08 19:25     ` Martin Wilck
  0 siblings, 1 reply; 4+ messages in thread
From: Benjamin Marzinski @ 2020-05-08 16:32 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel

On Fri, May 08, 2020 at 09:15:32AM +0000, Martin Wilck wrote:
> On Fri, 2020-05-01 at 17:39 -0500, Benjamin Marzinski wrote:
> > The (is|mark|unmark)_failed_wwid code is needlessly complicated.
> > Locking a file is necssary if multiple processes could otherwise be
> > writing to it at the same time. That is not the case with the
> > failed_wwids files. They can simply be empty files in a
> > directory.  Even
> > with all the locking in place, two processes accessing or modifying a
> > file at the same time will still race. And even without the locking,
> > if
> > two processes try to access or modify a file at the same time, they
> > will
> > both see a reasonable result, and will leave the files in a valid
> > state
> > afterwards.
> > 
> > Instead of doing all the locking work (which made it necessary to
> > write
> > a file, even just to check if a file existed), simply check for files
> > with lstat(), create them with open(), and remove them with unlink().
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> > ---
> >  libmultipath/wwids.c | 131 ++++++++++++++++++-----------------------
> > --
> >  1 file changed, 56 insertions(+), 75 deletions(-)
> 
> I agree, almost :-)
> 
> Please consider adding the attached patch on top, which switches back
> to atomic creation of the failed_wwids file, with just a little bit
> more compexity. I prefer the creation of the file to be atomic on the
> file system level. IMO that's how "flag" files like this should be
> handled in principle, and doing it correctly makes me feel better, even
> though I have to concur that an actual race is hardly possible.

Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>

Or where you looking for me to respin with this included? Either way is
fine.
 
> Regards,
> Martin
> 

> From d0e4669c92863f98cdbe7b41a8a9b64223958bec Mon Sep 17 00:00:00 2001
> From: Martin Wilck <mwilck@suse.com>
> Date: Fri, 8 May 2020 00:51:50 +0200
> Subject: [PATCH] libmultipath: use atomic linkat() in mark_failed_wwid()
> 
> This keeps (almost) the simplicity of the previous patch, while
> making sure that the return value of mark_failed_wwid()
> (WWID_FAILED_CHANGED vs. WWID_FAILED_UNCHANGED) is correct, even
> if several processes access this WWID at the same time.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  libmultipath/wwids.c | 42 +++++++++++++++++++++++++++++-------------
>  1 file changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/libmultipath/wwids.c b/libmultipath/wwids.c
> index b15b146b..ccdd13d2 100644
> --- a/libmultipath/wwids.c
> +++ b/libmultipath/wwids.c
> @@ -374,7 +374,7 @@ int is_failed_wwid(const char *wwid)
>  
>  	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
>  		condlog(1, "%s: path name overflow", __func__);
> -		return -1;
> +		return WWID_FAILED_ERROR;
>  	}
>  
>  	if (lstat(path, &st) == 0)
> @@ -390,27 +390,43 @@ int is_failed_wwid(const char *wwid)
>  
>  int mark_failed_wwid(const char *wwid)
>  {
> -	char path[PATH_MAX];
> -	int r, fd;
> +	char tmpfile[WWID_SIZE + 2 * sizeof(long) + 1];
> +	int r = WWID_FAILED_ERROR, fd, dfd;
>  
> -	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
> -		condlog(1, "%s: path name overflow", __func__);
> -		return -1;
> +	dfd = open(shm_dir, O_RDONLY|O_DIRECTORY);
> +	if (dfd == -1 && errno == ENOENT) {
> +		char path[sizeof(shm_dir) + 2];
> +
> +		/* arg for ensure_directories_exist() must not end with "/" */
> +		safe_sprintf(path, "%s/_", shm_dir);
> +		ensure_directories_exist(path, 0700);
> +		dfd = open(shm_dir, O_RDONLY|O_DIRECTORY);
>  	}
> -	if (ensure_directories_exist(path, 0700) < 0) {
> -		condlog(1, "%s: can't setup directories", __func__);
> -		return -1;
> +	if (dfd == -1) {
> +		condlog(1, "%s: can't setup %s: %m", __func__, shm_dir);
> +		return WWID_FAILED_ERROR;
>  	}
>  
> -	fd = open(path, O_RDONLY | O_CREAT | O_EXCL, S_IRUSR);
> -	if (fd >= 0) {
> +	safe_sprintf(tmpfile, "%s.%lx", wwid, (long)getpid());
> +	fd = openat(dfd, tmpfile, O_RDONLY | O_CREAT | O_EXCL, S_IRUSR);
> +	if (fd >= 0)
>  		close(fd);
> +	else
> +		goto out_closedir;
> +
> +	if (linkat(dfd, tmpfile, dfd, wwid, 0) == 0)
>  		r = WWID_FAILED_CHANGED;
> -	} else if (errno == EEXIST)
> +	else if (errno == EEXIST)
>  		r = WWID_FAILED_UNCHANGED;
>  	else
>  		r = WWID_FAILED_ERROR;
>  
> +	if (unlinkat(dfd, tmpfile, 0) == -1)
> +		condlog(2, "%s: failed to unlink %s/%s: %m",
> +			__func__, shm_dir, tmpfile);
> +
> +out_closedir:
> +	close(dfd);
>  	print_failed_wwid_result("mark_failed", wwid, r);
>  	return r;
>  }
> @@ -422,7 +438,7 @@ int unmark_failed_wwid(const char *wwid)
>  
>  	if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) {
>  		condlog(1, "%s: path name overflow", __func__);
> -		return -1;
> +		return WWID_FAILED_ERROR;
>  	}
>  
>  	if (unlink(path) == 0)
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] libmultipath: simplify failed wwid code
  2020-05-08 16:32   ` Benjamin Marzinski
@ 2020-05-08 19:25     ` Martin Wilck
  0 siblings, 0 replies; 4+ messages in thread
From: Martin Wilck @ 2020-05-08 19:25 UTC (permalink / raw)
  To: bmarzins; +Cc: dm-devel

On Fri, 2020-05-08 at 11:32 -0500, Benjamin Marzinski wrote:
> On Fri, May 08, 2020 at 09:15:32AM +0000, Martin Wilck wrote:
> > 
> > Please consider adding the attached patch on top, which switches
> > back
> > to atomic creation of the failed_wwids file, with just a little bit
> > more compexity. I prefer the creation of the file to be atomic on
> > the
> > file system level. IMO that's how "flag" files like this should be
> > handled in principle, and doing it correctly makes me feel better,
> > even
> > though I have to concur that an actual race is hardly possible.
> 
> Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> 
> Or where you looking for me to respin with this included? Either way
> is
> fine.

Please respin, to have a clean patch set on the ML.

Regards,
Martin

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-05-08 19:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-01 22:39 [PATCH] libmultipath: simplify failed wwid code Benjamin Marzinski
2020-05-08  9:15 ` Martin Wilck
2020-05-08 16:32   ` Benjamin Marzinski
2020-05-08 19:25     ` Martin Wilck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.