All of lore.kernel.org
 help / color / mirror / Atom feed
* fsck -l: fix racing between unlock/unlink and open
@ 2016-04-07 21:59 Yuriy M. Kaminskiy
  2016-04-18 10:22 ` Karel Zak
  2016-04-22  9:09 ` Karel Zak
  0 siblings, 2 replies; 5+ messages in thread
From: Yuriy M. Kaminskiy @ 2016-04-07 21:59 UTC (permalink / raw)
  To: util-linux

[-- Attachment #1: Type: text/plain, Size: 57 bytes --]

Probably, there are better solutions, but I prefer KISS.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-fsck-fix-racing-between-unlock-and-unlink.patch --]
[-- Type: text/x-diff, Size: 1016 bytes --]

>From fcb97ddcbc336bc8860828c47cdaf21c7b1ca655 Mon Sep 17 00:00:00 2001
From: "Yuriy M. Kaminskiy" <yumkam@gmail.com>
Date: Fri, 8 Apr 2016 00:38:56 +0300
Subject: [PATCH] fsck: fix racing between unlock/unlink and open

Process A	Process B	Process C
open()
[creates file]
lock()
[succeed]
		open()
		[open existing]
		lock()...
running()
close()
		[...succeed]
unlink()
		running()
				open()
				[creates file] {BAD!}
				lock()
				[succeed] {BAD!}
				running() {BAD!}
		close()

Cons: leaves empty (unlocked/harmless) .lock files in /run/fsck/
Signed-off-by: Yuriy M. Kaminskiy <yumkam@gmail.com>
---
 disk-utils/fsck.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/disk-utils/fsck.c b/disk-utils/fsck.c
index 05cfbc4..84d2dcc 100644
--- a/disk-utils/fsck.c
+++ b/disk-utils/fsck.c
@@ -410,7 +410,6 @@ static void unlock_disk(struct fsck_instance *inst)
 		printf(_("Unlocking %s.\n"), inst->lockpath);
 
 	close(inst->lock);			/* unlock */
-	unlink(inst->lockpath);
 
 	free(inst->lockpath);
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: fsck -l: fix racing between unlock/unlink and open
  2016-04-07 21:59 fsck -l: fix racing between unlock/unlink and open Yuriy M. Kaminskiy
@ 2016-04-18 10:22 ` Karel Zak
  2016-04-19 14:58   ` Yuriy M. Kaminskiy
  2016-04-20 10:13   ` Nir Soffer
  2016-04-22  9:09 ` Karel Zak
  1 sibling, 2 replies; 5+ messages in thread
From: Karel Zak @ 2016-04-18 10:22 UTC (permalink / raw)
  To: Yuriy M. Kaminskiy; +Cc: util-linux

On Fri, Apr 08, 2016 at 12:59:29AM +0300, Yuriy M. Kaminskiy wrote:
> Probably, there are better solutions, but I prefer KISS.

> From fcb97ddcbc336bc8860828c47cdaf21c7b1ca655 Mon Sep 17 00:00:00 2001
> From: "Yuriy M. Kaminskiy" <yumkam@gmail.com>
> Date: Fri, 8 Apr 2016 00:38:56 +0300
> Subject: [PATCH] fsck: fix racing between unlock/unlink and open
> 
> Process A	Process B	Process C
> open()
> [creates file]
> lock()
> [succeed]
> 		open()
> 		[open existing]
> 		lock()...
> running()
> close()
> 		[...succeed]
> unlink()
> 		running()
> 				open()
> 				[creates file] {BAD!}
> 				lock()
> 				[succeed] {BAD!}
> 				running() {BAD!}
> 		close()
> 

It would be better to check if the file still exist on the filesystem
with the same inode number, if not then re-open. It means that on race
you will create a new lock file. (You have to unlock after unlink.)
See http://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition

Simple example:

#include <stdio.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

int main(int argc, char **argv)
{
	const char *lockpath = "test.lock";
	int fd;
	struct stat st_lock, st_file;

	while (1) {
		fd = open(lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
				    S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
		fstat(fd, &st_lock);
		flock(fd, LOCK_EX);

		errno = 0;
		if (stat(lockpath, &st_file) == 0 && st_file.st_ino == st_lock.st_ino)
			break;
		if (errno)
			fprintf(stderr, "%d: stat failed: %m\n", getpid());
		else
			fprintf(stderr, "%d: ignore %lu (fs has:%lu)\n", getpid(), st_lock.st_ino, st_file.st_ino);
		close(fd);
	}

	fprintf(stderr, "%d: -->locked ino: %lu\n", getpid(), st_lock.st_ino);
	sleep(10);
	unlink(lockpath);
	close(fd);
	return 0;
}


See untested fsck patch below. Comments?

    Karel


diff --git a/disk-utils/fsck.c b/disk-utils/fsck.c
index 05cfbc4..0a7dbc8 100644
--- a/disk-utils/fsck.c
+++ b/disk-utils/fsck.c
@@ -370,23 +370,38 @@ static void lock_disk(struct fsck_instance *inst)
 	if (verbose)
 		printf(_("Locking disk by %s ... "), inst->lockpath);
 
-	inst->lock = open(inst->lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
-				    S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
-	if (inst->lock >= 0) {
-		int rc = -1;
-
-		/* inform users that we're waiting on the lock */
-		if (verbose &&
-		    (rc = flock(inst->lock, LOCK_EX | LOCK_NB)) != 0 &&
-		    errno == EWOULDBLOCK)
-			printf(_("(waiting) "));
-
-		if (rc != 0 && flock(inst->lock, LOCK_EX) != 0) {
-			close(inst->lock);			/* failed */
-			inst->lock = -1;
+	while (1) {
+		struct stat st_lock, st_file;
+
+		inst->lock = open(inst->lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
+					    S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
+		if (inst->lock >= 0) {
+			int rc = -1;
+
+			/* inform users that we're waiting on the lock */
+			if (verbose &&
+			    (rc = flock(inst->lock, LOCK_EX | LOCK_NB)) != 0 &&
+			    errno == EWOULDBLOCK)
+				printf(_("(waiting) "));
+
+			if (rc != 0 && flock(inst->lock, LOCK_EX) != 0) {
+				close(inst->lock);			/* failed */
+				inst->lock = -1;
+			}
 		}
+
+		/*
+		 * Make sure the locked lockfile really exist on the filesystem
+		 * to avoid race between lock and unlink (see unlock_disk())
+		 */
+		fstat(inst->lock, &st_lock);
+		if (stat(inst->lockpath, &st_file) == 0
+		    && st_file.st_ino == st_lock.st_ino)
+			break;				/* success */
+		close(inst->lock);
 	}
 
+
 	if (verbose)
 		/* TRANSLATORS: These are followups to "Locking disk...". */
 		printf("%s.\n", inst->lock >= 0 ? _("succeeded") : _("failed"));
@@ -409,8 +424,8 @@ static void unlock_disk(struct fsck_instance *inst)
 	if (verbose)
 		printf(_("Unlocking %s.\n"), inst->lockpath);
 
-	close(inst->lock);			/* unlock */
 	unlink(inst->lockpath);
+	close(inst->lock);			/* unlock */
 
 	free(inst->lockpath);
 
 

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: fsck -l: fix racing between unlock/unlink and open
  2016-04-18 10:22 ` Karel Zak
@ 2016-04-19 14:58   ` Yuriy M. Kaminskiy
  2016-04-20 10:13   ` Nir Soffer
  1 sibling, 0 replies; 5+ messages in thread
From: Yuriy M. Kaminskiy @ 2016-04-19 14:58 UTC (permalink / raw)
  To: util-linux

Karel Zak <kzak@redhat.com> writes:

> On Fri, Apr 08, 2016 at 12:59:29AM +0300, Yuriy M. Kaminskiy wrote:
>> Probably, there are better solutions, but I prefer KISS.
>
>> From fcb97ddcbc336bc8860828c47cdaf21c7b1ca655 Mon Sep 17 00:00:00 2001
>> From: "Yuriy M. Kaminskiy" <yumkam@gmail.com>
>> Date: Fri, 8 Apr 2016 00:38:56 +0300
>> Subject: [PATCH] fsck: fix racing between unlock/unlink and open
>> 
>> Process A	Process B	Process C
>> open()
>> [creates file]
>> lock()
>> [succeed]
>> 		open()
>> 		[open existing]
>> 		lock()...
>> running()
>> close()
>> 		[...succeed]
>> unlink()
>> 		running()
>> 				open()
>> 				[creates file] {BAD!}
>> 				lock()
>> 				[succeed] {BAD!}
>> 				running() {BAD!}
>> 		close()
>> 
>
> It would be better to check if the file still exist on the filesystem
> with the same inode number, if not then re-open. It means that on race
> you will create a new lock file. (You have to unlock after unlink.)
> See http://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
>
> Simple example:
>
> #include <stdio.h>
> #include <sys/file.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <unistd.h>
> #include <errno.h>
>
> int main(int argc, char **argv)
> {
> 	const char *lockpath = "test.lock";
> 	int fd;
> 	struct stat st_lock, st_file;
>
> 	while (1) {
> 		fd = open(lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
> 				    S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
> 		fstat(fd, &st_lock);
> 		flock(fd, LOCK_EX);
>
> 		errno = 0;
> 		if (stat(lockpath, &st_file) == 0 && st_file.st_ino == st_lock.st_ino)
> 			break;

Requires stable inode numbers (well, probably acceptable assumption for {/var,}/run).

> 		if (errno)
> 			fprintf(stderr, "%d: stat failed: %m\n", getpid());
> 		else
> 			fprintf(stderr, "%d: ignore %lu (fs has:%lu)\n", getpid(), st_lock.st_ino, st_file.st_ino);
> 		close(fd);
> 	}
>
> 	fprintf(stderr, "%d: -->locked ino: %lu\n", getpid(), st_lock.st_ino);
> 	sleep(10);
> 	unlink(lockpath);
> 	close(fd);
> 	return 0;
> }
>
>
> See untested fsck patch below. Comments?

I still prefer KISS. (And I bet it will eat more resources than just
leaving those empty files around.)
(But either way, bug will be fixed, so I do as you like).

[...]


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: fsck -l: fix racing between unlock/unlink and open
  2016-04-18 10:22 ` Karel Zak
  2016-04-19 14:58   ` Yuriy M. Kaminskiy
@ 2016-04-20 10:13   ` Nir Soffer
  1 sibling, 0 replies; 5+ messages in thread
From: Nir Soffer @ 2016-04-20 10:13 UTC (permalink / raw)
  To: Karel Zak; +Cc: Yuriy M. Kaminskiy, util-linux

On Mon, Apr 18, 2016 at 1:22 PM, Karel Zak <kzak@redhat.com> wrote:
>
> On Fri, Apr 08, 2016 at 12:59:29AM +0300, Yuriy M. Kaminskiy wrote:
> > Probably, there are better solutions, but I prefer KISS.
>
> > From fcb97ddcbc336bc8860828c47cdaf21c7b1ca655 Mon Sep 17 00:00:00 2001
> > From: "Yuriy M. Kaminskiy" <yumkam@gmail.com>
> > Date: Fri, 8 Apr 2016 00:38:56 +0300
> > Subject: [PATCH] fsck: fix racing between unlock/unlink and open
> >
> > Process A     Process B       Process C
> > open()
> > [creates file]
> > lock()
> > [succeed]
> >               open()
> >               [open existing]
> >               lock()...
> > running()
> > close()
> >               [...succeed]
> > unlink()
> >               running()
> >                               open()
> >                               [creates file] {BAD!}
> >                               lock()
> >                               [succeed] {BAD!}
> >                               running() {BAD!}
> >               close()
> >
>
> It would be better to check if the file still exist on the filesystem
> with the same inode number, if not then re-open. It means that on race
> you will create a new lock file. (You have to unlock after unlink.)
> See http://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
>
> Simple example:
>
> #include <stdio.h>
> #include <sys/file.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <unistd.h>
> #include <errno.h>
>
> int main(int argc, char **argv)
> {
>         const char *lockpath = "test.lock";
>         int fd;
>         struct stat st_lock, st_file;
>
>         while (1) {
>                 fd = open(lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
>                                     S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
>                 fstat(fd, &st_lock);
>                 flock(fd, LOCK_EX);
>
>                 errno = 0;
>                 if (stat(lockpath, &st_file) == 0 && st_file.st_ino == st_lock.st_ino)
>                         break;
>                 if (errno)
>                         fprintf(stderr, "%d: stat failed: %m\n", getpid());
>                 else
>                         fprintf(stderr, "%d: ignore %lu (fs has:%lu)\n", getpid(), st_lock.st_ino, st_file.st_ino);
>                 close(fd);
>         }
>
>         fprintf(stderr, "%d: -->locked ino: %lu\n", getpid(), st_lock.st_ino);
>         sleep(10);
>         unlink(lockpath);
>         close(fd);
>         return 0;
> }
> See untested fsck patch below. Comments?

When multiple processes blocked on the same lock file, this
ensure that all of them will fail to validate the lock when flock
return, and will have to try again, but this is probably fine for
this use case.

>
>     Karel
>
>
> diff --git a/disk-utils/fsck.c b/disk-utils/fsck.c
> index 05cfbc4..0a7dbc8 100644
> --- a/disk-utils/fsck.c
> +++ b/disk-utils/fsck.c
> @@ -370,23 +370,38 @@ static void lock_disk(struct fsck_instance *inst)
>         if (verbose)
>                 printf(_("Locking disk by %s ... "), inst->lockpath);
>
> -       inst->lock = open(inst->lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
> -                                   S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
> -       if (inst->lock >= 0) {
> -               int rc = -1;
> -
> -               /* inform users that we're waiting on the lock */
> -               if (verbose &&
> -                   (rc = flock(inst->lock, LOCK_EX | LOCK_NB)) != 0 &&
> -                   errno == EWOULDBLOCK)
> -                       printf(_("(waiting) "));
> -
> -               if (rc != 0 && flock(inst->lock, LOCK_EX) != 0) {
> -                       close(inst->lock);                      /* failed */
> -                       inst->lock = -1;
> +       while (1) {
> +               struct stat st_lock, st_file;

I would rename st_file to st_lockpath.

> +
> +               inst->lock = open(inst->lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
> +                                           S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
> +               if (inst->lock >= 0) {

We can replace this with

    if (inst->lock == -1)
        break;  /* failed */

Simplifying the normal flow, and eliminating the issue of
validating a lock after open failed.

> +                       int rc = -1;
> +
> +                       /* inform users that we're waiting on the lock */
> +                       if (verbose &&
> +                           (rc = flock(inst->lock, LOCK_EX | LOCK_NB)) != 0 &&
> +                           errno == EWOULDBLOCK)
> +                               printf(_("(waiting) "));
> +
> +                       if (rc != 0 && flock(inst->lock, LOCK_EX) != 0) {
> +                               close(inst->lock);                      /* failed */
> +                               inst->lock = -1;
> +                       }
>                 }
> +
> +               /*
> +                * Make sure the locked lockfile really exist on the filesystem
> +                * to avoid race between lock and unlink (see unlock_disk())
> +                */
> +               fstat(inst->lock, &st_lock);

This is going to fail when inst->lock == -1. Then the contents of
st_lock are random junk from the stack.

> +               if (stat(inst->lockpath, &st_file) == 0
> +                   && st_file.st_ino == st_lock.st_ino)

This check is relevant only if inst->lock >= 0

> +                       break;                          /* success */
> +               close(inst->lock);
>         }
>
> +
>         if (verbose)
>                 /* TRANSLATORS: These are followups to "Locking disk...". */
>                 printf("%s.\n", inst->lock >= 0 ? _("succeeded") : _("failed"));
> @@ -409,8 +424,8 @@ static void unlock_disk(struct fsck_instance *inst)
>         if (verbose)
>                 printf(_("Unlocking %s.\n"), inst->lockpath);
>
> -       close(inst->lock);                      /* unlock */

Here we should add a comment about the order and link to
lock_disk() depending on this order.

>         unlink(inst->lockpath);
> +       close(inst->lock);                      /* unlock */
>
>         free(inst->lockpath);

Nir

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: fsck -l: fix racing between unlock/unlink and open
  2016-04-07 21:59 fsck -l: fix racing between unlock/unlink and open Yuriy M. Kaminskiy
  2016-04-18 10:22 ` Karel Zak
@ 2016-04-22  9:09 ` Karel Zak
  1 sibling, 0 replies; 5+ messages in thread
From: Karel Zak @ 2016-04-22  9:09 UTC (permalink / raw)
  To: Yuriy M. Kaminskiy; +Cc: util-linux

On Fri, Apr 08, 2016 at 12:59:29AM +0300, Yuriy M. Kaminskiy wrote:
> Cons: leaves empty (unlocked/harmless) .lock files in /run/fsck/
> Signed-off-by: Yuriy M. Kaminskiy <yumkam@gmail.com>
> ---
>  disk-utils/fsck.c | 1 -
>  1 file changed, 1 deletion(-)

I have applied this KISS solution.

    Karel

-- 
 Karel Zak  <kzak@redhat.com>
 http://karelzak.blogspot.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-04-22  9:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-07 21:59 fsck -l: fix racing between unlock/unlink and open Yuriy M. Kaminskiy
2016-04-18 10:22 ` Karel Zak
2016-04-19 14:58   ` Yuriy M. Kaminskiy
2016-04-20 10:13   ` Nir Soffer
2016-04-22  9:09 ` Karel Zak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.