All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] CIFS: Fix VFS lock usage for oplocked files
@ 2012-03-27 11:36 Pavel Shilovsky
       [not found] ` <1332848175-6441-1-git-send-email-piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Pavel Shilovsky @ 2012-03-27 11:36 UTC (permalink / raw)
  To: linux-cifs-u79uwXL29TY76Z2rM5mHXA; +Cc: stable-DgEjT+Ai2ygdnm+yROfE0A

We can deadlock if we have a write oplock and two processes
use the same file handle. In this case the first process can't
unlock its lock if another process blocked on the lock in the
same time.

Fix this by removing lock_mutex protection from waiting on a
blocked lock and protect only posix_lock_file call.

Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
Signed-off-by: Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
---
 fs/cifs/file.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 159fcc5..2bf04e9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -671,6 +671,21 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
 	}
 }
 
+/*
+ * Copied from fs/locks.c with small changes.
+ * Remove waiter from blocker's block list.
+ * When blocker ends up pointing to itself then the list is empty.
+ */
+static void
+cifs_locks_delete_block(struct file_lock *waiter)
+{
+	lock_flocks();
+	list_del_init(&waiter->fl_block);
+	list_del_init(&waiter->fl_link);
+	waiter->fl_next = NULL;
+	unlock_flocks();
+}
+
 static bool
 __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
 			__u64 length, __u8 type, __u16 netfid,
@@ -820,6 +835,39 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 	return rc;
 }
 
+/* Called with locked lock_mutex, return with unlocked. */
+static int
+cifs_posix_lock_file_wait_locked(struct file *file, struct file_lock *flock)
+{
+	struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
+	int rc;
+
+	while (true) {
+		rc = posix_lock_file(file, flock, NULL);
+		mutex_unlock(&cinode->lock_mutex);
+		if (rc != FILE_LOCK_DEFERRED)
+			break;
+		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
+		if (!rc) {
+			mutex_lock(&cinode->lock_mutex);
+			continue;
+		}
+		cifs_locks_delete_block(flock);
+		break;
+	}
+	return rc;
+}
+
+static int
+cifs_posix_lock_file_wait(struct file *file, struct file_lock *flock)
+{
+	struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
+
+	mutex_lock(&cinode->lock_mutex);
+	/* lock_mutex will be released by the function below */
+	return cifs_posix_lock_file_wait_locked(file, flock);
+}
+
 /*
  * Set the byte-range lock (posix style). Returns:
  * 1) 0, if we set the lock and don't need to request to the server;
@@ -840,9 +888,9 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 		mutex_unlock(&cinode->lock_mutex);
 		return rc;
 	}
-	rc = posix_lock_file_wait(file, flock);
-	mutex_unlock(&cinode->lock_mutex);
-	return rc;
+
+	/* lock_mutex will be released by the function below */
+	return cifs_posix_lock_file_wait_locked(file, flock);
 }
 
 static int
@@ -1338,7 +1386,7 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u8 type,
 
 out:
 	if (flock->fl_flags & FL_POSIX)
-		posix_lock_file_wait(file, flock);
+		cifs_posix_lock_file_wait(file, flock);
 	return rc;
 }
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] CIFS: Fix VFS lock usage for oplocked files
       [not found] ` <1332848175-6441-1-git-send-email-piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
@ 2012-03-27 18:02   ` Pavel Shilovsky
  2012-03-27 21:30   ` Jeff Layton
  1 sibling, 0 replies; 5+ messages in thread
From: Pavel Shilovsky @ 2012-03-27 18:02 UTC (permalink / raw)
  To: linux-cifs-u79uwXL29TY76Z2rM5mHXA; +Cc: stable-DgEjT+Ai2ygdnm+yROfE0A

[-- Attachment #1: Type: text/plain, Size: 3897 bytes --]

27 марта 2012 г. 15:36 пользователь Pavel Shilovsky
<piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org> написал:
> We can deadlock if we have a write oplock and two processes
> use the same file handle. In this case the first process can't
> unlock its lock if another process blocked on the lock in the
> same time.
>
> Fix this by removing lock_mutex protection from waiting on a
> blocked lock and protect only posix_lock_file call.
>
> Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Signed-off-by: Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
> ---
>  fs/cifs/file.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 files changed, 52 insertions(+), 4 deletions(-)
>
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 159fcc5..2bf04e9 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -671,6 +671,21 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
>        }
>  }
>
> +/*
> + * Copied from fs/locks.c with small changes.
> + * Remove waiter from blocker's block list.
> + * When blocker ends up pointing to itself then the list is empty.
> + */
> +static void
> +cifs_locks_delete_block(struct file_lock *waiter)
> +{
> +       lock_flocks();
> +       list_del_init(&waiter->fl_block);
> +       list_del_init(&waiter->fl_link);
> +       waiter->fl_next = NULL;
> +       unlock_flocks();
> +}
> +
>  static bool
>  __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
>                        __u64 length, __u8 type, __u16 netfid,
> @@ -820,6 +835,39 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
>        return rc;
>  }
>
> +/* Called with locked lock_mutex, return with unlocked. */
> +static int
> +cifs_posix_lock_file_wait_locked(struct file *file, struct file_lock *flock)
> +{
> +       struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
> +       int rc;
> +
> +       while (true) {
> +               rc = posix_lock_file(file, flock, NULL);
> +               mutex_unlock(&cinode->lock_mutex);
> +               if (rc != FILE_LOCK_DEFERRED)
> +                       break;
> +               rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
> +               if (!rc) {
> +                       mutex_lock(&cinode->lock_mutex);
> +                       continue;
> +               }
> +               cifs_locks_delete_block(flock);
> +               break;
> +       }
> +       return rc;
> +}
> +
> +static int
> +cifs_posix_lock_file_wait(struct file *file, struct file_lock *flock)
> +{
> +       struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
> +
> +       mutex_lock(&cinode->lock_mutex);
> +       /* lock_mutex will be released by the function below */
> +       return cifs_posix_lock_file_wait_locked(file, flock);
> +}
> +
>  /*
>  * Set the byte-range lock (posix style). Returns:
>  * 1) 0, if we set the lock and don't need to request to the server;
> @@ -840,9 +888,9 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
>                mutex_unlock(&cinode->lock_mutex);
>                return rc;
>        }
> -       rc = posix_lock_file_wait(file, flock);
> -       mutex_unlock(&cinode->lock_mutex);
> -       return rc;
> +
> +       /* lock_mutex will be released by the function below */
> +       return cifs_posix_lock_file_wait_locked(file, flock);
>  }
>
>  static int
> @@ -1338,7 +1386,7 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u8 type,
>
>  out:
>        if (flock->fl_flags & FL_POSIX)
> -               posix_lock_file_wait(file, flock);
> +               cifs_posix_lock_file_wait(file, flock);
>        return rc;
>  }
>
> --
> 1.7.1
>

Include the posix_lock_bug.c file that reproduces the problem.

-- 
Best regards,
Pavel Shilovsky.

[-- Attachment #2: posix_lock_bug.c --]
[-- Type: text/x-csrc, Size: 967 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>

int main(int args, char **argv) {
	int fd, rc, pid;
	struct flock flock;
	char *name;

	if (args == 2)
		name = argv[1];
	else {
		perror("no file name\n");
		return 1;
	}

	fd = open(name, O_CREAT | O_RDWR);
	if (!fd) {
		perror("can't open a file\n");
		return 1;
	}

	flock.l_start = 0;
	flock.l_len = 1;
	flock.l_type = F_WRLCK;
	flock.l_whence = 0;

	rc = fcntl(fd, F_SETLK, &flock);
	if (rc) {
		perror("can't lock\n");
		return 1;
	}

	pid = fork();
	if (pid) {
		/* parent process */
		printf("parent process started\n");
		sleep(2);
		flock.l_type = F_UNLCK;
		rc = fcntl(fd, F_SETLK, &flock);
		if (rc) {
			perror("can't lock\n");
			return 1;
		}
		printf("parent process ended\n");
	} else {
		/* child process */
		printf("child process started\n");
		rc = fcntl(fd, F_SETLKW, &flock);
		if (rc) {
			perror("can't lock\n");
			return 1;
		}
		printf("child process ended\n");
	}

	return 0;
}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] CIFS: Fix VFS lock usage for oplocked files
       [not found] ` <1332848175-6441-1-git-send-email-piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
  2012-03-27 18:02   ` Pavel Shilovsky
@ 2012-03-27 21:30   ` Jeff Layton
       [not found]     ` <20120327173032.5bd6b60d-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
  1 sibling, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2012-03-27 21:30 UTC (permalink / raw)
  To: Pavel Shilovsky
  Cc: linux-cifs-u79uwXL29TY76Z2rM5mHXA, stable-DgEjT+Ai2ygdnm+yROfE0A

On Tue, 27 Mar 2012 15:36:15 +0400
Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org> wrote:

> We can deadlock if we have a write oplock and two processes
> use the same file handle. In this case the first process can't
> unlock its lock if another process blocked on the lock in the
> same time.
> 
> Fix this by removing lock_mutex protection from waiting on a
> blocked lock and protect only posix_lock_file call.
> 

Perhaps this means that the model of using a giant mutex around all of
this code needs a fundamental rethink?

Any time you wrap a large section of code under a giant lock (like the
lock_mutex here), then you invite a whole host of problems --
scalability issues for one, and there's also the problem of ensuring
that you understand what the lock is intended to protect. Witness the
pain and agony that accompanied the BKL removal effort for the last
decade...

> Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Signed-off-by: Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
> ---
>  fs/cifs/file.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 files changed, 52 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 159fcc5..2bf04e9 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -671,6 +671,21 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
>  	}
>  }
>  
> +/*
> + * Copied from fs/locks.c with small changes.
> + * Remove waiter from blocker's block list.
> + * When blocker ends up pointing to itself then the list is empty.
> + */
> +static void
> +cifs_locks_delete_block(struct file_lock *waiter)
> +{
> +	lock_flocks();
> +	list_del_init(&waiter->fl_block);
> +	list_del_init(&waiter->fl_link);
> +	waiter->fl_next = NULL;
> +	unlock_flocks();
> +}
> +

This is the same exact function as locks_delete_block. What is the
point of copying it here instead of using that? It will of course need
to be exported...

>  static bool
>  __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
>  			__u64 length, __u8 type, __u16 netfid,
> @@ -820,6 +835,39 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
>  	return rc;
>  }
>  
> +/* Called with locked lock_mutex, return with unlocked. */
> +static int
> +cifs_posix_lock_file_wait_locked(struct file *file, struct file_lock *flock)
> +{
> +	struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
> +	int rc;
> +
> +	while (true) {
> +		rc = posix_lock_file(file, flock, NULL);
> +		mutex_unlock(&cinode->lock_mutex);
> +		if (rc != FILE_LOCK_DEFERRED)
> +			break;
> +		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
> +		if (!rc) {
> +			mutex_lock(&cinode->lock_mutex);
> +			continue;
> +		}
> +		cifs_locks_delete_block(flock);
> +		break;
> +	}
> +	return rc;
> +}
> +
> +static int
> +cifs_posix_lock_file_wait(struct file *file, struct file_lock *flock)
> +{
> +	struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
> +
> +	mutex_lock(&cinode->lock_mutex);
> +	/* lock_mutex will be released by the function below */
> +	return cifs_posix_lock_file_wait_locked(file, flock);
> +}
> +

lock handling that crosses function boundaries is almost always a red
flag that something is not well-designed and will end up broken or
being rewritten at some point in the future. I'd urge you not to
establish this sort of API.

>  /*
>   * Set the byte-range lock (posix style). Returns:
>   * 1) 0, if we set the lock and don't need to request to the server;
> @@ -840,9 +888,9 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
>  		mutex_unlock(&cinode->lock_mutex);
>  		return rc;
>  	}
> -	rc = posix_lock_file_wait(file, flock);
> -	mutex_unlock(&cinode->lock_mutex);
> -	return rc;

Ok, so the original bug was here. When one thread is blocked here and
waiting for the lock then you can't get the mutex in order to release
it...

This patch looks like it'll "fix" the immediate problem, but I'm
concerned that the purpose of the lock_mutex is not entirely clear.

What data structures does it protect? A better solution probably will
involve moving more of this code outside of it by determining which
data structures are protected by it.

> +
> +	/* lock_mutex will be released by the function below */
> +	return cifs_posix_lock_file_wait_locked(file, flock);
>  }
>  
>  static int
> @@ -1338,7 +1386,7 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u8 type,
>  
>  out:
>  	if (flock->fl_flags & FL_POSIX)
> -		posix_lock_file_wait(file, flock);
> +		cifs_posix_lock_file_wait(file, flock);
>  	return rc;
>  }
>  


-- 
Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] CIFS: Fix VFS lock usage for oplocked files
       [not found]     ` <20120327173032.5bd6b60d-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
@ 2012-03-28  5:33       ` Pavel Shilovsky
       [not found]         ` <CAKywueSvhFT=PQSLZ=AFs6v3+Bz1-vT=9WGz5SJs8j9dajpMKg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Pavel Shilovsky @ 2012-03-28  5:33 UTC (permalink / raw)
  To: Jeff Layton
  Cc: linux-cifs-u79uwXL29TY76Z2rM5mHXA, stable-DgEjT+Ai2ygdnm+yROfE0A

28 марта 2012 г. 1:30 пользователь Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> написал:
> On Tue, 27 Mar 2012 15:36:15 +0400
> Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org> wrote:
>
>> We can deadlock if we have a write oplock and two processes
>> use the same file handle. In this case the first process can't
>> unlock its lock if another process blocked on the lock in the
>> same time.
>>
>> Fix this by removing lock_mutex protection from waiting on a
>> blocked lock and protect only posix_lock_file call.
>>
>
> Perhaps this means that the model of using a giant mutex around all of
> this code needs a fundamental rethink?
>
> Any time you wrap a large section of code under a giant lock (like the
> lock_mutex here), then you invite a whole host of problems --
> scalability issues for one, and there's also the problem of ensuring
> that you understand what the lock is intended to protect. Witness the
> pain and agony that accompanied the BKL removal effort for the last
> decade...
>
>> Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
>> Signed-off-by: Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
>> ---
>>  fs/cifs/file.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>>  1 files changed, 52 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
>> index 159fcc5..2bf04e9 100644
>> --- a/fs/cifs/file.c
>> +++ b/fs/cifs/file.c
>> @@ -671,6 +671,21 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
>>       }
>>  }
>>
>> +/*
>> + * Copied from fs/locks.c with small changes.
>> + * Remove waiter from blocker's block list.
>> + * When blocker ends up pointing to itself then the list is empty.
>> + */
>> +static void
>> +cifs_locks_delete_block(struct file_lock *waiter)
>> +{
>> +     lock_flocks();
>> +     list_del_init(&waiter->fl_block);
>> +     list_del_init(&waiter->fl_link);
>> +     waiter->fl_next = NULL;
>> +     unlock_flocks();
>> +}
>> +
>
> This is the same exact function as locks_delete_block. What is the
> point of copying it here instead of using that? It will of course need
> to be exported...
>
>>  static bool
>>  __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
>>                       __u64 length, __u8 type, __u16 netfid,
>> @@ -820,6 +835,39 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
>>       return rc;
>>  }
>>
>> +/* Called with locked lock_mutex, return with unlocked. */
>> +static int
>> +cifs_posix_lock_file_wait_locked(struct file *file, struct file_lock *flock)
>> +{
>> +     struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
>> +     int rc;
>> +
>> +     while (true) {
>> +             rc = posix_lock_file(file, flock, NULL);
>> +             mutex_unlock(&cinode->lock_mutex);
>> +             if (rc != FILE_LOCK_DEFERRED)
>> +                     break;
>> +             rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
>> +             if (!rc) {
>> +                     mutex_lock(&cinode->lock_mutex);
>> +                     continue;
>> +             }
>> +             cifs_locks_delete_block(flock);
>> +             break;
>> +     }
>> +     return rc;
>> +}
>> +
>> +static int
>> +cifs_posix_lock_file_wait(struct file *file, struct file_lock *flock)
>> +{
>> +     struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
>> +
>> +     mutex_lock(&cinode->lock_mutex);
>> +     /* lock_mutex will be released by the function below */
>> +     return cifs_posix_lock_file_wait_locked(file, flock);
>> +}
>> +
>
> lock handling that crosses function boundaries is almost always a red
> flag that something is not well-designed and will end up broken or
> being rewritten at some point in the future. I'd urge you not to
> establish this sort of API.
>
>>  /*
>>   * Set the byte-range lock (posix style). Returns:
>>   * 1) 0, if we set the lock and don't need to request to the server;
>> @@ -840,9 +888,9 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
>>               mutex_unlock(&cinode->lock_mutex);
>>               return rc;
>>       }
>> -     rc = posix_lock_file_wait(file, flock);
>> -     mutex_unlock(&cinode->lock_mutex);
>> -     return rc;
>
> Ok, so the original bug was here. When one thread is blocked here and
> waiting for the lock then you can't get the mutex in order to release
> it...
>
> This patch looks like it'll "fix" the immediate problem, but I'm
> concerned that the purpose of the lock_mutex is not entirely clear.
>
> What data structures does it protect? A better solution probably will
> involve moving more of this code outside of it by determining which
> data structures are protected by it.
>
>> +
>> +     /* lock_mutex will be released by the function below */
>> +     return cifs_posix_lock_file_wait_locked(file, flock);
>>  }
>>
>>  static int
>> @@ -1338,7 +1386,7 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u8 type,
>>
>>  out:
>>       if (flock->fl_flags & FL_POSIX)
>> -             posix_lock_file_wait(file, flock);
>> +             cifs_posix_lock_file_wait(file, flock);
>>       return rc;
>>  }
>>
>
>
> --
> Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>


So, to be clear:

the main purpose of lock_mutex is protecting _all_ lock operations on
the inode as well as can_cache_brlocks flag. We have to deal with
simultaneous oplock break and fcntl requests to be sure that:

we push all the lock we have in the cache when oplock break comes. All
other locks that comes after that will be sent to the server.

This is the main problem I tried to solve when designing brlock cache
- races with oplock breaks.

The posix locking is slightly more complex than mandatory one because
it has its own protection - lock/unlock_flocks(). But I really don't
think we have problems using two protection mechanism at the same
time: lock_mutex is always surrounds flocks.

As for the bug that this patch fixes, it was my fault of not
understanding fluently how posix_lock_file_wait works - not a design
problem.

As for cross-locking function, I suggest to drop this change from the
patch because it is not directly related to the problem it solves (I
made this change to be more safe, but seems it needs more thoughts):
>>       if (flock->fl_flags & FL_POSIX)
>> -             posix_lock_file_wait(file, flock);
>> +             cifs_posix_lock_file_wait(file, flock);

in this case we don't need cifs_posix_lock_file_wait/locked variants
and the code can be moved into
cifs_posix_lock_set function that not introduces new API.

-- 
Best regards,
Pavel Shilovsky.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] CIFS: Fix VFS lock usage for oplocked files
       [not found]         ` <CAKywueSvhFT=PQSLZ=AFs6v3+Bz1-vT=9WGz5SJs8j9dajpMKg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2012-03-28 17:41           ` Jeff Layton
  0 siblings, 0 replies; 5+ messages in thread
From: Jeff Layton @ 2012-03-28 17:41 UTC (permalink / raw)
  To: Pavel Shilovsky
  Cc: linux-cifs-u79uwXL29TY76Z2rM5mHXA, stable-DgEjT+Ai2ygdnm+yROfE0A

On Wed, 28 Mar 2012 09:33:47 +0400
Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org> wrote:

> 28 марта 2012 г. 1:30 пользователь Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> написал:
> > On Tue, 27 Mar 2012 15:36:15 +0400
> > Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org> wrote:
> >
> >> We can deadlock if we have a write oplock and two processes
> >> use the same file handle. In this case the first process can't
> >> unlock its lock if another process blocked on the lock in the
> >> same time.
> >>
> >> Fix this by removing lock_mutex protection from waiting on a
> >> blocked lock and protect only posix_lock_file call.
> >>
> >
> > Perhaps this means that the model of using a giant mutex around all of
> > this code needs a fundamental rethink?
> >
> > Any time you wrap a large section of code under a giant lock (like the
> > lock_mutex here), then you invite a whole host of problems --
> > scalability issues for one, and there's also the problem of ensuring
> > that you understand what the lock is intended to protect. Witness the
> > pain and agony that accompanied the BKL removal effort for the last
> > decade...
> >
> >> Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> >> Signed-off-by: Pavel Shilovsky <piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
> >> ---
> >>  fs/cifs/file.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
> >>  1 files changed, 52 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> >> index 159fcc5..2bf04e9 100644
> >> --- a/fs/cifs/file.c
> >> +++ b/fs/cifs/file.c
> >> @@ -671,6 +671,21 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
> >>       }
> >>  }
> >>
> >> +/*
> >> + * Copied from fs/locks.c with small changes.
> >> + * Remove waiter from blocker's block list.
> >> + * When blocker ends up pointing to itself then the list is empty.
> >> + */
> >> +static void
> >> +cifs_locks_delete_block(struct file_lock *waiter)
> >> +{
> >> +     lock_flocks();
> >> +     list_del_init(&waiter->fl_block);
> >> +     list_del_init(&waiter->fl_link);
> >> +     waiter->fl_next = NULL;
> >> +     unlock_flocks();
> >> +}
> >> +
> >
> > This is the same exact function as locks_delete_block. What is the
> > point of copying it here instead of using that? It will of course need
> > to be exported...
> >
> >>  static bool
> >>  __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
> >>                       __u64 length, __u8 type, __u16 netfid,
> >> @@ -820,6 +835,39 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
> >>       return rc;
> >>  }
> >>
> >> +/* Called with locked lock_mutex, return with unlocked. */
> >> +static int
> >> +cifs_posix_lock_file_wait_locked(struct file *file, struct file_lock *flock)
> >> +{
> >> +     struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
> >> +     int rc;
> >> +
> >> +     while (true) {
> >> +             rc = posix_lock_file(file, flock, NULL);
> >> +             mutex_unlock(&cinode->lock_mutex);
> >> +             if (rc != FILE_LOCK_DEFERRED)
> >> +                     break;
> >> +             rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
> >> +             if (!rc) {
> >> +                     mutex_lock(&cinode->lock_mutex);
> >> +                     continue;
> >> +             }
> >> +             cifs_locks_delete_block(flock);
> >> +             break;
> >> +     }
> >> +     return rc;
> >> +}
> >> +
> >> +static int
> >> +cifs_posix_lock_file_wait(struct file *file, struct file_lock *flock)
> >> +{
> >> +     struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
> >> +
> >> +     mutex_lock(&cinode->lock_mutex);
> >> +     /* lock_mutex will be released by the function below */
> >> +     return cifs_posix_lock_file_wait_locked(file, flock);
> >> +}
> >> +
> >
> > lock handling that crosses function boundaries is almost always a red
> > flag that something is not well-designed and will end up broken or
> > being rewritten at some point in the future. I'd urge you not to
> > establish this sort of API.
> >
> >>  /*
> >>   * Set the byte-range lock (posix style). Returns:
> >>   * 1) 0, if we set the lock and don't need to request to the server;
> >> @@ -840,9 +888,9 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
> >>               mutex_unlock(&cinode->lock_mutex);
> >>               return rc;
> >>       }
> >> -     rc = posix_lock_file_wait(file, flock);
> >> -     mutex_unlock(&cinode->lock_mutex);
> >> -     return rc;
> >
> > Ok, so the original bug was here. When one thread is blocked here and
> > waiting for the lock then you can't get the mutex in order to release
> > it...
> >
> > This patch looks like it'll "fix" the immediate problem, but I'm
> > concerned that the purpose of the lock_mutex is not entirely clear.
> >
> > What data structures does it protect? A better solution probably will
> > involve moving more of this code outside of it by determining which
> > data structures are protected by it.
> >
> >> +
> >> +     /* lock_mutex will be released by the function below */
> >> +     return cifs_posix_lock_file_wait_locked(file, flock);
> >>  }
> >>
> >>  static int
> >> @@ -1338,7 +1386,7 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u8 type,
> >>
> >>  out:
> >>       if (flock->fl_flags & FL_POSIX)
> >> -             posix_lock_file_wait(file, flock);
> >> +             cifs_posix_lock_file_wait(file, flock);
> >>       return rc;
> >>  }
> >>
> >
> >
> > --
> > Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> 
> 
> So, to be clear:
> 
> the main purpose of lock_mutex is protecting _all_ lock operations on
> the inode as well as can_cache_brlocks flag. We have to deal with
> simultaneous oplock break and fcntl requests to be sure that:
> 

> we push all the lock we have in the cache when oplock break comes. All
> other locks that comes after that will be sent to the server.
> 
> This is the main problem I tried to solve when designing brlock cache
> - races with oplock breaks.
> 
> The posix locking is slightly more complex than mandatory one because
> it has its own protection - lock/unlock_flocks(). But I really don't
> think we have problems using two protection mechanism at the same
> time: lock_mutex is always surrounds flocks.
> 
> As for the bug that this patch fixes, it was my fault of not
> understanding fluently how posix_lock_file_wait works - not a design
> problem.
> 
> As for cross-locking function, I suggest to drop this change from the
> patch because it is not directly related to the problem it solves (I
> made this change to be more safe, but seems it needs more thoughts):
> >>       if (flock->fl_flags & FL_POSIX)
> >> -             posix_lock_file_wait(file, flock);
> >> +             cifs_posix_lock_file_wait(file, flock);
> 
> in this case we don't need cifs_posix_lock_file_wait/locked variants
> and the code can be moved into
> cifs_posix_lock_set function that not introduces new API.
> 


That's exactly what I'm talking about. You're using the lock to
serialize a vague set of "lock operations", instead of using it to
protect data. That's almost always a recipe for problems as it's almost
impossible to get it provably correct.

Sometimes doing that sort of thing is justified. The mutex that
serializes access to the socket that we use to communicate with the
server for example. In that case we have a single stream of data that
to and from the server, and access to it must be serialized. We also
use it to protect the signature. Even that lock surrounds more code
than I'm comfortable with, but I don't see a better way to handle it.

The generic VFS level locking code does something similar with
lock_flocks(). Why? Because it was originally protected by the BKL, and
*no one* understood how the locking was intended to work well enough to
do anything but wrap it in a similar spinlock in order to finish up the
BKL removal.

Here, you're repeating many of those same mistakes and it's already
proving to be problematic. I think you really ought to take a step back
and determine what _data_ this lock is intended to protect and ensure
that the locking does what you expect.

The patch you're proposing may be an expedient fix in the interim, but
you're almost certainly going to have to revisit this locking in the
future.

-- 
Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-03-28 17:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-27 11:36 [PATCH] CIFS: Fix VFS lock usage for oplocked files Pavel Shilovsky
     [not found] ` <1332848175-6441-1-git-send-email-piastry-7qunaywFIewox3rIn2DAYQ@public.gmane.org>
2012-03-27 18:02   ` Pavel Shilovsky
2012-03-27 21:30   ` Jeff Layton
     [not found]     ` <20120327173032.5bd6b60d-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
2012-03-28  5:33       ` Pavel Shilovsky
     [not found]         ` <CAKywueSvhFT=PQSLZ=AFs6v3+Bz1-vT=9WGz5SJs8j9dajpMKg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-03-28 17:41           ` Jeff Layton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.