All of lore.kernel.org
 help / color / mirror / Atom feed
* fs: WARNING in locks_free_lock_context()
@ 2015-12-23 10:37 Dmitry Vyukov
  2015-12-23 13:54 ` Jeff Layton
  2016-01-08  2:22 ` [PATCH] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
  0 siblings, 2 replies; 20+ messages in thread
From: Dmitry Vyukov @ 2015-12-23 10:37 UTC (permalink / raw)
  To: Jeff Layton, J. Bruce Fields, Alexander Viro, linux-fsdevel, LKML
  Cc: syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

Hello,

The following program triggers
WARN_ON_ONCE(!list_empty(&ctx->flc_posix)) warning in
locks_free_lock_context (run it in a loop):

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <unistd.h>
#include <sys/syscall.h>
#include <string.h>
#include <stdint.h>
#include <pthread.h>

#ifndef SYS_memfd_create
#define SYS_memfd_create 319
#endif

long r[15];
long done[14];

void *thr(void *arg)
{
        if (rand()%2)
                usleep(100);

        switch ((long)arg) {
        case 0:
                r[0] = syscall(SYS_mmap, 0x20000000ul, 0x5000ul,
0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
                break;
        case 1:
                memcpy((void*)0x20000c49,
"\xb6\x70\x70\x70\x31\x73\x65\x63\x75\x72\x69\x74\x79\x9e\x00", 15);
                r[2] = syscall(SYS_memfd_create, 0x20000c49ul, 0x3ul,
0, 0, 0, 0);
                break;
        case 2:
                r[3] = syscall(SYS_socketpair, 0x1ul, 0x1ul, 0x0ul,
0x20001000ul, 0, 0);
                if (r[3] != -1)
                                r[4] = *(uint32_t*)0x20001000;
                if (r[3] != -1)
                                r[5] = *(uint32_t*)0x20001004;
                break;
        case 3:
                *(uint16_t*)0x20000000 = (uint16_t)0x0;
                *(uint16_t*)0x20000002 = (uint16_t)0x1;
                *(uint64_t*)0x20000008 = (uint64_t)0x6;
                *(uint64_t*)0x20000010 = (uint64_t)0xad;
                *(uint32_t*)0x20000018 = (uint32_t)0x0;
                r[11] = syscall(SYS_fcntl, r[5], 0x7ul, 0x20000000ul, 0, 0, 0);
                break;
        case 4:
                r[12] = syscall(SYS_write, r[5], 0x200006cbul,
0x1000ul, 0, 0, 0);
                break;
        case 5:
                r[13] = syscall(SYS_close, r[5], 0, 0, 0, 0, 0);
                break;
        case 6:
                r[14] = syscall(SYS_dup2, r[2], r[4], 0, 0, 0, 0);
                break;
        }
        done[(long)arg] = 1;
        return 0;
}

int main()
{
        long i, j;
        pthread_t th[14];

        srand(time(0)+getpid());
        memset(r, -1, sizeof(r));
        for (i = 0; i < 7; i++) {
                pthread_create(&th[i], 0, thr, (void*)i);
                for (j = 0; j < 10; j++) {
                        if (done[i])
                                break;
                        usleep(100);
                }
        }
        for (i = 0; i < 7; i++)
                done[i] = 0;
        for (i = 0; i < 7; i++) {
                pthread_create(&th[7+i], 0, thr, (void*)i);
                if (rand()%2)
                        continue;
                for (j = 0; j < 10; j++) {
                        if (done[i])
                                break;
                        usleep(100);
                }
        }
        usleep(100);
        return 0;
}


------------[ cut here ]------------
WARNING: CPU: 3 PID: 1975 at fs/locks.c:241
locks_free_lock_context+0x118/0x180()
Modules linked in:
CPU: 3 PID: 1975 Comm: a.out Not tainted 4.4.0-rc6+ #173
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 00000000ffffffff ffff880068e67bf8 ffffffff82899ffd 0000000000000000
 ffff88006130af00 ffffffff85e17d60 ffff880068e67c38 ffffffff812ebbb9
 ffffffff818162d8 ffffffff85e17d60 00000000000000f1 ffff8800685c2828
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82899ffd>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
 [<ffffffff812ebbb9>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460
 [<ffffffff812ebde9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:493
 [<ffffffff818162d8>] locks_free_lock_context+0x118/0x180 fs/locks.c:241
 [<ffffffff81765783>] __destroy_inode+0x1d3/0x4d0 fs/inode.c:228
 [<ffffffff81765acb>] destroy_inode+0x4b/0x120 fs/inode.c:253
 [<ffffffff81765ec0>] evict+0x320/0x4f0 fs/inode.c:559
 [<     inline     >] iput_final fs/inode.c:1477
 [<ffffffff817665dc>] iput+0x45c/0x850 fs/inode.c:1504
 [<     inline     >] dentry_iput fs/dcache.c:358
 [<ffffffff81757237>] __dentry_kill+0x457/0x620 fs/dcache.c:543
 [<     inline     >] dentry_kill fs/dcache.c:587
 [<ffffffff8175c499>] dput+0x659/0x740 fs/dcache.c:796
 [<ffffffff817162fc>] __fput+0x42c/0x780 fs/file_table.c:226
 [<ffffffff817166d5>] ____fput+0x15/0x20 fs/file_table.c:244
 [<ffffffff8134679b>] task_work_run+0x16b/0x200 kernel/task_work.c:115
 [<     inline     >] tracehook_notify_resume include/linux/tracehook.h:191
 [<ffffffff81003990>] exit_to_usermode_loop+0x180/0x1a0
arch/x86/entry/common.c:251
 [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:282
 [<ffffffff8100631f>] syscall_return_slowpath+0x19f/0x210
arch/x86/entry/common.c:344
 [<ffffffff85ccea22>] int_ret_from_sys_call+0x25/0x9f
arch/x86/entry/entry_64.S:281
---[ end trace 2dde0624dd974a19 ]---


On commit 4ef7675344d687a0ef5b0d7c0cee12da005870c0 (Dec 20).

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: fs: WARNING in locks_free_lock_context()
  2015-12-23 10:37 fs: WARNING in locks_free_lock_context() Dmitry Vyukov
@ 2015-12-23 13:54 ` Jeff Layton
  2016-02-03 18:19   ` William Dauchy
  2016-01-08  2:22 ` [PATCH] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
  1 sibling, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2015-12-23 13:54 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: J. Bruce Fields, Alexander Viro, linux-fsdevel, LKML, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Wed, 23 Dec 2015 11:37:39 +0100
Dmitry Vyukov <dvyukov@google.com> wrote:

> Hello,
> 
> The following program triggers
> WARN_ON_ONCE(!list_empty(&ctx->flc_posix)) warning in
> locks_free_lock_context (run it in a loop):
> 
> // autogenerated by syzkaller (http://github.com/google/syzkaller)
> #include <unistd.h>
> #include <sys/syscall.h>
> #include <string.h>
> #include <stdint.h>
> #include <pthread.h>
> 
> #ifndef SYS_memfd_create
> #define SYS_memfd_create 319
> #endif
> 
> long r[15];
> long done[14];
> 
> void *thr(void *arg)
> {
>         if (rand()%2)
>                 usleep(100);
> 
>         switch ((long)arg) {
>         case 0:
>                 r[0] = syscall(SYS_mmap, 0x20000000ul, 0x5000ul,
> 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
>                 break;
>         case 1:
>                 memcpy((void*)0x20000c49,
> "\xb6\x70\x70\x70\x31\x73\x65\x63\x75\x72\x69\x74\x79\x9e\x00", 15);
>                 r[2] = syscall(SYS_memfd_create, 0x20000c49ul, 0x3ul,
> 0, 0, 0, 0);
>                 break;
>         case 2:
>                 r[3] = syscall(SYS_socketpair, 0x1ul, 0x1ul, 0x0ul,
> 0x20001000ul, 0, 0);
>                 if (r[3] != -1)
>                                 r[4] = *(uint32_t*)0x20001000;
>                 if (r[3] != -1)
>                                 r[5] = *(uint32_t*)0x20001004;
>                 break;
>         case 3:
>                 *(uint16_t*)0x20000000 = (uint16_t)0x0;
>                 *(uint16_t*)0x20000002 = (uint16_t)0x1;
>                 *(uint64_t*)0x20000008 = (uint64_t)0x6;
>                 *(uint64_t*)0x20000010 = (uint64_t)0xad;
>                 *(uint32_t*)0x20000018 = (uint32_t)0x0;
>                 r[11] = syscall(SYS_fcntl, r[5], 0x7ul, 0x20000000ul, 0, 0, 0);
>                 break;
>         case 4:
>                 r[12] = syscall(SYS_write, r[5], 0x200006cbul,
> 0x1000ul, 0, 0, 0);
>                 break;
>         case 5:
>                 r[13] = syscall(SYS_close, r[5], 0, 0, 0, 0, 0);
>                 break;
>         case 6:
>                 r[14] = syscall(SYS_dup2, r[2], r[4], 0, 0, 0, 0);
>                 break;
>         }
>         done[(long)arg] = 1;
>         return 0;
> }
> 
> int main()
> {
>         long i, j;
>         pthread_t th[14];
> 
>         srand(time(0)+getpid());
>         memset(r, -1, sizeof(r));
>         for (i = 0; i < 7; i++) {
>                 pthread_create(&th[i], 0, thr, (void*)i);
>                 for (j = 0; j < 10; j++) {
>                         if (done[i])
>                                 break;
>                         usleep(100);
>                 }
>         }
>         for (i = 0; i < 7; i++)
>                 done[i] = 0;
>         for (i = 0; i < 7; i++) {
>                 pthread_create(&th[7+i], 0, thr, (void*)i);
>                 if (rand()%2)
>                         continue;
>                 for (j = 0; j < 10; j++) {
>                         if (done[i])
>                                 break;
>                         usleep(100);
>                 }
>         }
>         usleep(100);
>         return 0;
> }
> 
> 
> ------------[ cut here ]------------
> WARNING: CPU: 3 PID: 1975 at fs/locks.c:241
> locks_free_lock_context+0x118/0x180()
> Modules linked in:
> CPU: 3 PID: 1975 Comm: a.out Not tainted 4.4.0-rc6+ #173
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>  00000000ffffffff ffff880068e67bf8 ffffffff82899ffd 0000000000000000
>  ffff88006130af00 ffffffff85e17d60 ffff880068e67c38 ffffffff812ebbb9
>  ffffffff818162d8 ffffffff85e17d60 00000000000000f1 ffff8800685c2828
> Call Trace:
>  [<     inline     >] __dump_stack lib/dump_stack.c:15
>  [<ffffffff82899ffd>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
>  [<ffffffff812ebbb9>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460
>  [<ffffffff812ebde9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:493
>  [<ffffffff818162d8>] locks_free_lock_context+0x118/0x180 fs/locks.c:241
>  [<ffffffff81765783>] __destroy_inode+0x1d3/0x4d0 fs/inode.c:228
>  [<ffffffff81765acb>] destroy_inode+0x4b/0x120 fs/inode.c:253
>  [<ffffffff81765ec0>] evict+0x320/0x4f0 fs/inode.c:559
>  [<     inline     >] iput_final fs/inode.c:1477
>  [<ffffffff817665dc>] iput+0x45c/0x850 fs/inode.c:1504
>  [<     inline     >] dentry_iput fs/dcache.c:358
>  [<ffffffff81757237>] __dentry_kill+0x457/0x620 fs/dcache.c:543
>  [<     inline     >] dentry_kill fs/dcache.c:587
>  [<ffffffff8175c499>] dput+0x659/0x740 fs/dcache.c:796
>  [<ffffffff817162fc>] __fput+0x42c/0x780 fs/file_table.c:226
>  [<ffffffff817166d5>] ____fput+0x15/0x20 fs/file_table.c:244
>  [<ffffffff8134679b>] task_work_run+0x16b/0x200 kernel/task_work.c:115
>  [<     inline     >] tracehook_notify_resume include/linux/tracehook.h:191
>  [<ffffffff81003990>] exit_to_usermode_loop+0x180/0x1a0
> arch/x86/entry/common.c:251
>  [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:282
>  [<ffffffff8100631f>] syscall_return_slowpath+0x19f/0x210
> arch/x86/entry/common.c:344
>  [<ffffffff85ccea22>] int_ret_from_sys_call+0x25/0x9f
> arch/x86/entry/entry_64.S:281
> ---[ end trace 2dde0624dd974a19 ]---
> 
> 
> On commit 4ef7675344d687a0ef5b0d7c0cee12da005870c0 (Dec 20).

Ooh, nice catch...and just in time for Christmas.

filp_close does this after the fd has been detached from the file table
in __close_fd:

        if (likely(!(filp->f_mode & FMODE_PATH))) {
                dnotify_flush(filp, id);
                locks_remove_posix(filp, id);
        }
        fput(filp);

...and fcntl_setlk does this:

        /*
         * Attempt to detect a close/fcntl race and recover by
         * releasing the lock that was just acquired.
         */
        /*
         * we need that spin_lock here - it prevents reordering between
         * update of i_flctx->flc_posix and check for it done in close().
         * rcu_read_lock() wouldn't do.
         */
        spin_lock(&current->files->file_lock);
        f = fcheck(fd);
        spin_unlock(&current->files->file_lock);
        if (!error && f != filp && flock.l_type != F_UNLCK) {
                flock.l_type = F_UNLCK;
                goto again;
        }

...so in principle that should keep new locks from racing onto the list
just after we call filp_close. Hmm...I'll see if I can reproduce and
figure out how this could happen.

Thanks,
-- 
Jeff Layton <jlayton@poochiereds.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] locks: fix unlock when fcntl_setlk races with a close
  2015-12-23 10:37 fs: WARNING in locks_free_lock_context() Dmitry Vyukov
  2015-12-23 13:54 ` Jeff Layton
@ 2016-01-08  2:22 ` Jeff Layton
  2016-01-08 12:48   ` Jeff Layton
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
  1 sibling, 2 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08  2:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
fires in locks_free_lock_context when the flc_posix list isn't empty.

The problem turns out to be that we're basically rebuilding the
file_lock from scratch in fcntl_setlk when we discover that the setlk
has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
then we may end up with fl_start and fl_end values that differ from
when the lock was initially set, if the file position or length of the
file has changed in the interim.

Fix this by just reusing the same lock request structure, and simply
override fl_type value with F_UNLCK as appropriate. That ensures that
we really are unlocking the lock that was initially set.

While we're there, make sure that we do pop a WARN_ON_ONCE if the
removal ever fails. Also return -EBADF in this event, since that's
what we would have returned if the close had happened earlier.

Cc: <stable@vger.kernel.org>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 593dca300b29..0db640e4ced4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-again:
 	error = flock_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2231,9 +2230,11 @@ again:
 	spin_lock(&current->files->file_lock);
 	f = fcheck(fd);
 	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && f != filp && file_lock->fl_type != F_UNLCK) {
+		file_lock->fl_type = F_UNLCK;
+		error = do_lock_file_wait(filp, cmd, file_lock);
+		WARN_ON_ONCE(error);
+		error = -EBADF;
 	}
 
 out:
@@ -2321,7 +2322,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-again:
 	error = flock64_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2366,11 +2366,12 @@ again:
 	spin_lock(&current->files->file_lock);
 	f = fcheck(fd);
 	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && f != filp && file_lock->fl_type != F_UNLCK) {
+		file_lock->fl_type = F_UNLCK;
+		error = do_lock_file_wait(filp, cmd, file_lock);
+		WARN_ON_ONCE(error);
+		error = -EBADF;
 	}
-
 out:
 	locks_free_lock(file_lock);
 	return error;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08  2:22 ` [PATCH] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
@ 2016-01-08 12:48   ` Jeff Layton
  2016-01-08 16:16     ` J. Bruce Fields
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
  1 sibling, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 12:48 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Thu,  7 Jan 2016 21:22:22 -0500
Jeff Layton <jlayton@poochiereds.net> wrote:

> Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
> fires in locks_free_lock_context when the flc_posix list isn't empty.
> 
> The problem turns out to be that we're basically rebuilding the
> file_lock from scratch in fcntl_setlk when we discover that the setlk
> has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
> then we may end up with fl_start and fl_end values that differ from
> when the lock was initially set, if the file position or length of the
> file has changed in the interim.
> 
> Fix this by just reusing the same lock request structure, and simply
> override fl_type value with F_UNLCK as appropriate. That ensures that
> we really are unlocking the lock that was initially set.
> 
> While we're there, make sure that we do pop a WARN_ON_ONCE if the
> removal ever fails. Also return -EBADF in this event, since that's
> what we would have returned if the close had happened earlier.
> 
> Cc: <stable@vger.kernel.org>
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> ---
>  fs/locks.c | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index 593dca300b29..0db640e4ced4 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
>  		goto out;
>  	}
>  
> -again:
>  	error = flock_to_posix_lock(filp, file_lock, &flock);
>  	if (error)
>  		goto out;
> @@ -2231,9 +2230,11 @@ again:
>  	spin_lock(&current->files->file_lock);
>  	f = fcheck(fd);
>  	spin_unlock(&current->files->file_lock);
> -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> -		flock.l_type = F_UNLCK;
> -		goto again;
> +	if (!error && f != filp && file_lock->fl_type != F_UNLCK) {
> +		file_lock->fl_type = F_UNLCK;
> +		error = do_lock_file_wait(filp, cmd, file_lock);
> +		WARN_ON_ONCE(error);
> +		error = -EBADF;
>  	}
>  
>  out:
> @@ -2321,7 +2322,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
>  		goto out;
>  	}
>  
> -again:
>  	error = flock64_to_posix_lock(filp, file_lock, &flock);
>  	if (error)
>  		goto out;
> @@ -2366,11 +2366,12 @@ again:
>  	spin_lock(&current->files->file_lock);
>  	f = fcheck(fd);
>  	spin_unlock(&current->files->file_lock);
> -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> -		flock.l_type = F_UNLCK;
> -		goto again;
> +	if (!error && f != filp && file_lock->fl_type != F_UNLCK) {
> +		file_lock->fl_type = F_UNLCK;
> +		error = do_lock_file_wait(filp, cmd, file_lock);
> +		WARN_ON_ONCE(error);
> +		error = -EBADF;
>  	}
> -
>  out:
>  	locks_free_lock(file_lock);
>  	return error;

While this does fix Dmitri's reproducer, I think the basic concept of
removing locks like this after they are set is racy. Consider where we
have two threads:

Thread1				Thread2
----------------------------------------------------------------------------
fd1 = memfd_create(...);
fd2 = dup(fd1);
				fcntl(fd2, F_SETLK);
				(Here we call fcntl, and lock is set, but
				 task gets scheduled out before fcheck)
close(fd2)
fcntl(fd1, F_SETLK...);

				Task scheduled back in, does fcheck for fd2
				and finds that it's gone. Removes the lock
				that Thread1 just set.

So that seems wrong...in the face of the race above we can end up with
no lock set on the file, even though Thread1 thinks it has one. It is a
pretty unlikely race, but I don't see anything that prevents it.

The fix for filesystems that do not define their own ->lock op would be
pretty simple. We could do a fcheck after taking the flc_lock, but
before setting the lock on the file. The flc_lock should be enough to
prevent that race (though we may need to revisit some of the lockless
checks in locks_remove_posix). That wouldn't work for filesystems that
do set ->lock though, and I think we really do need a more general
solution there.

The good news is that OFD locks should be exempt from that fcheck
altogether. I'll spin up another patch for that, so we can at least
ensure that they aren't subject to that race.

Any thoughts on how to fix the above for traditional POSIX locks though?
-- 
Jeff Layton <jlayton@poochiereds.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling
  2016-01-08  2:22 ` [PATCH] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
  2016-01-08 12:48   ` Jeff Layton
@ 2016-01-08 13:50   ` Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
                       ` (5 more replies)
  1 sibling, 6 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

The first patch in this patchset fixes a very long-standing bug in the
handling of races between setlk and close. That one should be appropriate
for all stable kernels and should apply to most kernels as-is.

The second patch exempts OFD locks from setlk/close race handling since
they shouldn't need it anyway. The rest of the patches add some better
debugging for these problems and do a little function name cleanup.

I'm planning to go ahead and put these into linux-next today and send
them to Linus for the 4.5 merge, unless there are any objections...

Jeff Layton (6):
  locks: fix unlock when fcntl_setlk races with a close
  locks: don't check for race with close when setting OFD lock
  locks: sprinkle some tracepoints around the file locking code
  locks: pass inode pointer to locks_free_lock_context
  locks: prink more detail when there are leaked locks
  locks: rename __posix_lock_file to posix_lock_inode

 fs/inode.c                      |   2 +-
 fs/locks.c                      | 123 +++++++++++++++++++++++++++-------------
 include/linux/fs.h              |   4 +-
 include/trace/events/filelock.h |  77 +++++++++++++++++++++++++
 4 files changed, 165 insertions(+), 41 deletions(-)

-- 
2.5.0

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
@ 2016-01-08 13:50     ` Jeff Layton
  2016-01-08 15:55       ` J. Bruce Fields
  2016-01-08 13:50     ` [PATCH v2 2/6] locks: don't check for race with close when setting OFD lock Jeff Layton
                       ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
fires in locks_free_lock_context when the flc_posix list isn't empty.

The problem turns out to be that we're basically rebuilding the
file_lock from scratch in fcntl_setlk when we discover that the setlk
has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
then we may end up with fl_start and fl_end values that differ from
when the lock was initially set, if the file position or length of the
file has changed in the interim.

Fix this by just reusing the same lock request structure, and simply
override fl_type value with F_UNLCK as appropriate. That ensures that
we really are unlocking the lock that was initially set.

While we're there, make sure that we do pop a WARN_ON_ONCE if the
removal ever fails. Also return -EBADF in this event, since that's
what we would have returned if the close had happened earlier.

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Fixes: c293621bbf67 (stale POSIX lock handling)
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 51 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 593dca300b29..c263aff793bc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-again:
 	error = flock_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2223,19 +2222,22 @@ again:
 	 * Attempt to detect a close/fcntl race and recover by
 	 * releasing the lock that was just acquired.
 	 */
-	/*
-	 * we need that spin_lock here - it prevents reordering between
-	 * update of i_flctx->flc_posix and check for it done in close().
-	 * rcu_read_lock() wouldn't do.
-	 */
-	spin_lock(&current->files->file_lock);
-	f = fcheck(fd);
-	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && file_lock->fl_type != F_UNLCK) {
+		/*
+		 * We need that spin_lock here - it prevents reordering between
+		 * update of i_flctx->flc_posix and check for it done in
+		 * close(). rcu_read_lock() wouldn't do.
+		 */
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
+		if (f != filp) {
+			file_lock->fl_type = F_UNLCK;
+			error = do_lock_file_wait(filp, cmd, file_lock);
+			WARN_ON_ONCE(error);
+			error = -EBADF;
+		}
 	}
-
 out:
 	locks_free_lock(file_lock);
 	return error;
@@ -2321,7 +2323,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-again:
 	error = flock64_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2363,14 +2364,22 @@ again:
 	 * Attempt to detect a close/fcntl race and recover by
 	 * releasing the lock that was just acquired.
 	 */
-	spin_lock(&current->files->file_lock);
-	f = fcheck(fd);
-	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && file_lock->fl_type != F_UNLCK) {
+		/*
+		 * We need that spin_lock here - it prevents reordering between
+		 * update of i_flctx->flc_posix and check for it done in
+		 * close(). rcu_read_lock() wouldn't do.
+		 */
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
+		if (f != filp) {
+			file_lock->fl_type = F_UNLCK;
+			error = do_lock_file_wait(filp, cmd, file_lock);
+			WARN_ON_ONCE(error);
+			error = -EBADF;
+		}
 	}
-
 out:
 	locks_free_lock(file_lock);
 	return error;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/6] locks: don't check for race with close when setting OFD lock
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
@ 2016-01-08 13:50     ` Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 3/6] locks: sprinkle some tracepoints around the file locking code Jeff Layton
                       ` (3 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

We don't clean out OFD locks on close(), so there's no need to check
for a race with them here. They'll get cleaned out at the same time
that flock locks are.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index c263aff793bc..e72077d5a664 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2219,10 +2219,12 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
-	 * Attempt to detect a close/fcntl race and recover by
-	 * releasing the lock that was just acquired.
+	 * Attempt to detect a close/fcntl race and recover by releasing the
+	 * lock that was just acquired. There is no need to do that when we're
+	 * unlocking though, or for OFD locks.
 	 */
-	if (!error && file_lock->fl_type != F_UNLCK) {
+	if (!error && file_lock->fl_type != F_UNLCK &&
+	    !(file_lock->fl_flags & FL_OFDLCK)) {
 		/*
 		 * We need that spin_lock here - it prevents reordering between
 		 * update of i_flctx->flc_posix and check for it done in
@@ -2361,10 +2363,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
-	 * Attempt to detect a close/fcntl race and recover by
-	 * releasing the lock that was just acquired.
+	 * Attempt to detect a close/fcntl race and recover by releasing the
+	 * lock that was just acquired. There is no need to do that when we're
+	 * unlocking though, or for OFD locks.
 	 */
-	if (!error && file_lock->fl_type != F_UNLCK) {
+	if (!error && file_lock->fl_type != F_UNLCK &&
+	    !(file_lock->fl_flags & FL_OFDLCK)) {
 		/*
 		 * We need that spin_lock here - it prevents reordering between
 		 * update of i_flctx->flc_posix and check for it done in
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 3/6] locks: sprinkle some tracepoints around the file locking code
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 2/6] locks: don't check for race with close when setting OFD lock Jeff Layton
@ 2016-01-08 13:50     ` Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 4/6] locks: pass inode pointer to locks_free_lock_context Jeff Layton
                       ` (2 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

Add some tracepoints around the POSIX locking code. These were useful
when tracking down problems when handling the race between setlk and
close.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c                      | 12 +++++--
 include/trace/events/filelock.h | 77 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index e72077d5a664..0af2387bd91e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -229,6 +229,7 @@ locks_get_lock_context(struct inode *inode, int type)
 		ctx = smp_load_acquire(&inode->i_flctx);
 	}
 out:
+	trace_locks_get_lock_context(inode, type, ctx);
 	return ctx;
 }
 
@@ -1141,6 +1142,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 	if (new_fl2)
 		locks_free_lock(new_fl2);
 	locks_dispose_list(&dispose);
+	trace_posix_lock_inode(inode, request, error);
+
 	return error;
 }
 
@@ -2164,6 +2167,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (file_lock == NULL)
 		return -ENOLCK;
 
+	inode = file_inode(filp);
+
 	/*
 	 * This might block, so we do it before checking the inode.
 	 */
@@ -2171,8 +2176,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = file_inode(filp);
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2241,6 +2244,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		}
 	}
 out:
+	trace_fcntl_setlk(inode, file_lock, error);
 	locks_free_lock(file_lock);
 	return error;
 }
@@ -2397,6 +2401,7 @@ out:
  */
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
+	int error;
 	struct file_lock lock;
 	struct file_lock_context *ctx;
 
@@ -2419,10 +2424,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;
 
-	vfs_lock_file(filp, F_SETLK, &lock, NULL);
+	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
 
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
+	trace_locks_remove_posix(file_inode(filp), &lock, error);
 }
 
 EXPORT_SYMBOL(locks_remove_posix);
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index c72f2dc01d0b..63a7680347cb 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -34,6 +34,83 @@
 			{ F_WRLCK, "F_WRLCK" },		\
 			{ F_UNLCK, "F_UNLCK" })
 
+TRACE_EVENT(locks_get_lock_context,
+	TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
+
+	TP_ARGS(inode, type, ctx),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(unsigned char, type)
+		__field(struct file_lock_context *, ctx)
+	),
+
+	TP_fast_assign(
+		__entry->s_dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->type = type;
+		__entry->ctx = ctx;
+	),
+
+	TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
+		  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		  __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
+);
+
+DECLARE_EVENT_CLASS(filelock_lock,
+	TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+
+	TP_ARGS(inode, fl, ret),
+
+	TP_STRUCT__entry(
+		__field(struct file_lock *, fl)
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(struct file_lock *, fl_next)
+		__field(fl_owner_t, fl_owner)
+		__field(unsigned int, fl_pid)
+		__field(unsigned int, fl_flags)
+		__field(unsigned char, fl_type)
+		__field(loff_t, fl_start)
+		__field(loff_t, fl_end)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->fl = fl ? fl : NULL;
+		__entry->s_dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->fl_next = fl ? fl->fl_next : NULL;
+		__entry->fl_owner = fl ? fl->fl_owner : NULL;
+		__entry->fl_pid = fl ? fl->fl_pid : 0;
+		__entry->fl_flags = fl ? fl->fl_flags : 0;
+		__entry->fl_type = fl ? fl->fl_type : 0;
+		__entry->fl_start = fl ? fl->fl_start : 0;
+		__entry->fl_end = fl ? fl->fl_end : 0;
+		__entry->ret = ret;
+	),
+
+	TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
+		__entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_ino, __entry->fl_next, __entry->fl_owner,
+		__entry->fl_pid, show_fl_flags(__entry->fl_flags),
+		show_fl_type(__entry->fl_type),
+		__entry->fl_start, __entry->fl_end, __entry->ret)
+);
+
+DEFINE_EVENT(filelock_lock, posix_lock_inode,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, fcntl_setlk,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, locks_remove_posix,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
+
 DECLARE_EVENT_CLASS(filelock_lease,
 
 	TP_PROTO(struct inode *inode, struct file_lock *fl),
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 4/6] locks: pass inode pointer to locks_free_lock_context
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
                       ` (2 preceding siblings ...)
  2016-01-08 13:50     ` [PATCH v2 3/6] locks: sprinkle some tracepoints around the file locking code Jeff Layton
@ 2016-01-08 13:50     ` Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 5/6] locks: prink more detail when there are leaked locks Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 6/6] locks: rename __posix_lock_file to posix_lock_inode Jeff Layton
  5 siblings, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

...so we can print information about it if there are leaked locks.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/inode.c         | 2 +-
 fs/locks.c         | 4 +++-
 include/linux/fs.h | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 1be5f9003eb3..ab6c84159f9d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -225,7 +225,7 @@ void __destroy_inode(struct inode *inode)
 	inode_detach_wb(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
-	locks_free_lock_context(inode->i_flctx);
+	locks_free_lock_context(inode);
 	if (!inode->i_nlink) {
 		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
 		atomic_long_dec(&inode->i_sb->s_remove_count);
diff --git a/fs/locks.c b/fs/locks.c
index 0af2387bd91e..ed9ab930d093 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -234,8 +234,10 @@ out:
 }
 
 void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
 {
+	struct file_lock_context *ctx = inode->i_flctx;
+
 	if (ctx) {
 		WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
 		WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cbf08d5c246e..6c4983aceb02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1042,7 +1042,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
 extern int fcntl_getlease(struct file *filp);
 
 /* fs/locks.c */
-void locks_free_lock_context(struct file_lock_context *ctx);
+void locks_free_lock_context(struct inode *inode);
 void locks_free_lock(struct file_lock *fl);
 extern void locks_init_lock(struct file_lock *);
 extern struct file_lock * locks_alloc_lock(void);
@@ -1103,7 +1103,7 @@ static inline int fcntl_getlease(struct file *filp)
 }
 
 static inline void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
 {
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 5/6] locks: prink more detail when there are leaked locks
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
                       ` (3 preceding siblings ...)
  2016-01-08 13:50     ` [PATCH v2 4/6] locks: pass inode pointer to locks_free_lock_context Jeff Layton
@ 2016-01-08 13:50     ` Jeff Layton
  2016-01-08 13:50     ` [PATCH v2 6/6] locks: rename __posix_lock_file to posix_lock_inode Jeff Layton
  5 siblings, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

Right now, we just get WARN_ON_ONCE, which is not particularly helpful.
Have it dump some info about the locks and the inode to make it easier
to track down leaked locks in the future.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index ed9ab930d093..ca272eb63c30 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -233,15 +233,40 @@ out:
 	return ctx;
 }
 
+static void
+locks_dump_ctx_list(struct list_head *list, char *list_type)
+{
+	struct file_lock *fl;
+
+	list_for_each_entry(fl, list, fl_list) {
+		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+	}
+}
+
+static void
+locks_check_ctx_lists(struct inode *inode)
+{
+	struct file_lock_context *ctx = inode->i_flctx;
+
+	if (unlikely(!list_empty(&ctx->flc_flock) ||
+		     !list_empty(&ctx->flc_posix) ||
+		     !list_empty(&ctx->flc_lease))) {
+		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+			MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+			inode->i_ino);
+		locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
+		locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
+		locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
+	}
+}
+
 void
 locks_free_lock_context(struct inode *inode)
 {
 	struct file_lock_context *ctx = inode->i_flctx;
 
-	if (ctx) {
-		WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
-		WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
-		WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
+	if (unlikely(ctx)) {
+		locks_check_ctx_lists(inode);
 		kmem_cache_free(flctx_cache, ctx);
 	}
 }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 6/6] locks: rename __posix_lock_file to posix_lock_inode
  2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
                       ` (4 preceding siblings ...)
  2016-01-08 13:50     ` [PATCH v2 5/6] locks: prink more detail when there are leaked locks Jeff Layton
@ 2016-01-08 13:50     ` Jeff Layton
  5 siblings, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 13:50 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

...a more descriptive name and we can drop the double underscore prefix.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index ca272eb63c30..a91f4ab00a90 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -961,7 +961,8 @@ out:
 	return error;
 }
 
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+			    struct file_lock *conflock)
 {
 	struct file_lock *fl, *tmp;
 	struct file_lock *new_fl = NULL;
@@ -1191,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return __posix_lock_file(file_inode(filp), fl, conflock);
+	return posix_lock_inode(file_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);
 
@@ -1207,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	int error;
 	might_sleep ();
 	for (;;) {
-		error = __posix_lock_file(inode, fl, NULL);
+		error = posix_lock_inode(inode, fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1290,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 		if (filp) {
 			fl.fl_owner = filp;
 			fl.fl_flags &= ~FL_SLEEP;
-			error = __posix_lock_file(inode, &fl, NULL);
+			error = posix_lock_inode(inode, &fl, NULL);
 			if (!error)
 				break;
 		}
@@ -1298,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 		if (sleep)
 			fl.fl_flags |= FL_SLEEP;
 		fl.fl_owner = current->files;
-		error = __posix_lock_file(inode, &fl, NULL);
+		error = posix_lock_inode(inode, &fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 13:50     ` [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
@ 2016-01-08 15:55       ` J. Bruce Fields
  2016-01-08 16:11         ` Jeff Layton
  0 siblings, 1 reply; 20+ messages in thread
From: J. Bruce Fields @ 2016-01-08 15:55 UTC (permalink / raw)
  To: Jeff Layton
  Cc: linux-fsdevel, linux-kernel, Dmitry Vyukov, Alexander Viro,
	syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Fri, Jan 08, 2016 at 08:50:09AM -0500, Jeff Layton wrote:
> Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
> fires in locks_free_lock_context when the flc_posix list isn't empty.
> 
> The problem turns out to be that we're basically rebuilding the
> file_lock from scratch in fcntl_setlk when we discover that the setlk
> has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
> then we may end up with fl_start and fl_end values that differ from
> when the lock was initially set, if the file position or length of the
> file has changed in the interim.
> 
> Fix this by just reusing the same lock request structure, and simply
> override fl_type value with F_UNLCK as appropriate. That ensures that
> we really are unlocking the lock that was initially set.

You could also just do a whole-file unlock, couldn't you?  That would
seem less confusing to me.  But maybe I'm missing something.

--b.

> 
> While we're there, make sure that we do pop a WARN_ON_ONCE if the
> removal ever fails. Also return -EBADF in this event, since that's
> what we would have returned if the close had happened earlier.
> 
> Cc: "J. Bruce Fields" <bfields@fieldses.org>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: <stable@vger.kernel.org>
> Fixes: c293621bbf67 (stale POSIX lock handling)
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> ---
>  fs/locks.c | 51 ++++++++++++++++++++++++++++++---------------------
>  1 file changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index 593dca300b29..c263aff793bc 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
>  		goto out;
>  	}
>  
> -again:
>  	error = flock_to_posix_lock(filp, file_lock, &flock);
>  	if (error)
>  		goto out;
> @@ -2223,19 +2222,22 @@ again:
>  	 * Attempt to detect a close/fcntl race and recover by
>  	 * releasing the lock that was just acquired.
>  	 */
> -	/*
> -	 * we need that spin_lock here - it prevents reordering between
> -	 * update of i_flctx->flc_posix and check for it done in close().
> -	 * rcu_read_lock() wouldn't do.
> -	 */
> -	spin_lock(&current->files->file_lock);
> -	f = fcheck(fd);
> -	spin_unlock(&current->files->file_lock);
> -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> -		flock.l_type = F_UNLCK;
> -		goto again;
> +	if (!error && file_lock->fl_type != F_UNLCK) {
> +		/*
> +		 * We need that spin_lock here - it prevents reordering between
> +		 * update of i_flctx->flc_posix and check for it done in
> +		 * close(). rcu_read_lock() wouldn't do.
> +		 */
> +		spin_lock(&current->files->file_lock);
> +		f = fcheck(fd);
> +		spin_unlock(&current->files->file_lock);
> +		if (f != filp) {
> +			file_lock->fl_type = F_UNLCK;
> +			error = do_lock_file_wait(filp, cmd, file_lock);
> +			WARN_ON_ONCE(error);
> +			error = -EBADF;
> +		}
>  	}
> -
>  out:
>  	locks_free_lock(file_lock);
>  	return error;
> @@ -2321,7 +2323,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
>  		goto out;
>  	}
>  
> -again:
>  	error = flock64_to_posix_lock(filp, file_lock, &flock);
>  	if (error)
>  		goto out;
> @@ -2363,14 +2364,22 @@ again:
>  	 * Attempt to detect a close/fcntl race and recover by
>  	 * releasing the lock that was just acquired.
>  	 */
> -	spin_lock(&current->files->file_lock);
> -	f = fcheck(fd);
> -	spin_unlock(&current->files->file_lock);
> -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> -		flock.l_type = F_UNLCK;
> -		goto again;
> +	if (!error && file_lock->fl_type != F_UNLCK) {
> +		/*
> +		 * We need that spin_lock here - it prevents reordering between
> +		 * update of i_flctx->flc_posix and check for it done in
> +		 * close(). rcu_read_lock() wouldn't do.
> +		 */
> +		spin_lock(&current->files->file_lock);
> +		f = fcheck(fd);
> +		spin_unlock(&current->files->file_lock);
> +		if (f != filp) {
> +			file_lock->fl_type = F_UNLCK;
> +			error = do_lock_file_wait(filp, cmd, file_lock);
> +			WARN_ON_ONCE(error);
> +			error = -EBADF;
> +		}
>  	}
> -
>  out:
>  	locks_free_lock(file_lock);
>  	return error;
> -- 
> 2.5.0

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 15:55       ` J. Bruce Fields
@ 2016-01-08 16:11         ` Jeff Layton
  2016-01-08 16:21           ` J. Bruce Fields
  0 siblings, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2016-01-08 16:11 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-fsdevel, linux-kernel, Dmitry Vyukov, Alexander Viro,
	syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Fri, 8 Jan 2016 10:55:33 -0500
"J. Bruce Fields" <bfields@fieldses.org> wrote:

> On Fri, Jan 08, 2016 at 08:50:09AM -0500, Jeff Layton wrote:
> > Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
> > fires in locks_free_lock_context when the flc_posix list isn't empty.
> > 
> > The problem turns out to be that we're basically rebuilding the
> > file_lock from scratch in fcntl_setlk when we discover that the setlk
> > has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
> > then we may end up with fl_start and fl_end values that differ from
> > when the lock was initially set, if the file position or length of the
> > file has changed in the interim.
> > 
> > Fix this by just reusing the same lock request structure, and simply
> > override fl_type value with F_UNLCK as appropriate. That ensures that
> > we really are unlocking the lock that was initially set.  
> 
> You could also just do a whole-file unlock, couldn't you?  That would
> seem less confusing to me.  But maybe I'm missing something.
> 
> --b.
> 

I considered that too...but I was thinking that might make things even
worse. Consider:

Thread1				Thread2
----------------------------------------------------------------------------
fd1 = open(...);
fd2 = dup(fd1);
				fcntl(fd2, F_SETLK);
				(Here we call fcntl, and lock is set, but
				 task gets scheduled out before fcheck)
close(fd2)
fcntl(fd1, F_SETLK...);
				Task scheduled back in, does fcheck for fd2
				and finds that it's gone. Removes the lock
				that Thread1 just set.

If we just unlock the range that was set then Thread1 won't be affected
if his lock doesn't overlap Thread2's.

Is that better or worse? :)

TBH, I guess all of this is somewhat academic. If you're playing with
traditional POSIX locks and threads like this, then you really are
playing with fire.

We should try to fix that if we can though...

> > 
> > While we're there, make sure that we do pop a WARN_ON_ONCE if the
> > removal ever fails. Also return -EBADF in this event, since that's
> > what we would have returned if the close had happened earlier.
> > 
> > Cc: "J. Bruce Fields" <bfields@fieldses.org>
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: <stable@vger.kernel.org>
> > Fixes: c293621bbf67 (stale POSIX lock handling)
> > Reported-by: Dmitry Vyukov <dvyukov@google.com>
> > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> > ---
> >  fs/locks.c | 51 ++++++++++++++++++++++++++++++---------------------
> >  1 file changed, 30 insertions(+), 21 deletions(-)
> > 
> > diff --git a/fs/locks.c b/fs/locks.c
> > index 593dca300b29..c263aff793bc 100644
> > --- a/fs/locks.c
> > +++ b/fs/locks.c
> > @@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
> >  		goto out;
> >  	}
> >  
> > -again:
> >  	error = flock_to_posix_lock(filp, file_lock, &flock);
> >  	if (error)
> >  		goto out;
> > @@ -2223,19 +2222,22 @@ again:
> >  	 * Attempt to detect a close/fcntl race and recover by
> >  	 * releasing the lock that was just acquired.
> >  	 */
> > -	/*
> > -	 * we need that spin_lock here - it prevents reordering between
> > -	 * update of i_flctx->flc_posix and check for it done in close().
> > -	 * rcu_read_lock() wouldn't do.
> > -	 */
> > -	spin_lock(&current->files->file_lock);
> > -	f = fcheck(fd);
> > -	spin_unlock(&current->files->file_lock);
> > -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> > -		flock.l_type = F_UNLCK;
> > -		goto again;
> > +	if (!error && file_lock->fl_type != F_UNLCK) {
> > +		/*
> > +		 * We need that spin_lock here - it prevents reordering between
> > +		 * update of i_flctx->flc_posix and check for it done in
> > +		 * close(). rcu_read_lock() wouldn't do.
> > +		 */
> > +		spin_lock(&current->files->file_lock);
> > +		f = fcheck(fd);
> > +		spin_unlock(&current->files->file_lock);
> > +		if (f != filp) {
> > +			file_lock->fl_type = F_UNLCK;
> > +			error = do_lock_file_wait(filp, cmd, file_lock);
> > +			WARN_ON_ONCE(error);
> > +			error = -EBADF;
> > +		}
> >  	}
> > -
> >  out:
> >  	locks_free_lock(file_lock);
> >  	return error;
> > @@ -2321,7 +2323,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
> >  		goto out;
> >  	}
> >  
> > -again:
> >  	error = flock64_to_posix_lock(filp, file_lock, &flock);
> >  	if (error)
> >  		goto out;
> > @@ -2363,14 +2364,22 @@ again:
> >  	 * Attempt to detect a close/fcntl race and recover by
> >  	 * releasing the lock that was just acquired.
> >  	 */
> > -	spin_lock(&current->files->file_lock);
> > -	f = fcheck(fd);
> > -	spin_unlock(&current->files->file_lock);
> > -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> > -		flock.l_type = F_UNLCK;
> > -		goto again;
> > +	if (!error && file_lock->fl_type != F_UNLCK) {
> > +		/*
> > +		 * We need that spin_lock here - it prevents reordering between
> > +		 * update of i_flctx->flc_posix and check for it done in
> > +		 * close(). rcu_read_lock() wouldn't do.
> > +		 */
> > +		spin_lock(&current->files->file_lock);
> > +		f = fcheck(fd);
> > +		spin_unlock(&current->files->file_lock);
> > +		if (f != filp) {
> > +			file_lock->fl_type = F_UNLCK;
> > +			error = do_lock_file_wait(filp, cmd, file_lock);
> > +			WARN_ON_ONCE(error);
> > +			error = -EBADF;
> > +		}
> >  	}
> > -
> >  out:
> >  	locks_free_lock(file_lock);
> >  	return error;
> > -- 
> > 2.5.0  


-- 
Jeff Layton <jlayton@poochiereds.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 12:48   ` Jeff Layton
@ 2016-01-08 16:16     ` J. Bruce Fields
  0 siblings, 0 replies; 20+ messages in thread
From: J. Bruce Fields @ 2016-01-08 16:16 UTC (permalink / raw)
  To: Jeff Layton
  Cc: linux-fsdevel, linux-kernel, Dmitry Vyukov, Alexander Viro,
	syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Fri, Jan 08, 2016 at 07:48:04AM -0500, Jeff Layton wrote:
> On Thu,  7 Jan 2016 21:22:22 -0500
> Jeff Layton <jlayton@poochiereds.net> wrote:
> 
> > Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
> > fires in locks_free_lock_context when the flc_posix list isn't empty.
> > 
> > The problem turns out to be that we're basically rebuilding the
> > file_lock from scratch in fcntl_setlk when we discover that the setlk
> > has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
> > then we may end up with fl_start and fl_end values that differ from
> > when the lock was initially set, if the file position or length of the
> > file has changed in the interim.
> > 
> > Fix this by just reusing the same lock request structure, and simply
> > override fl_type value with F_UNLCK as appropriate. That ensures that
> > we really are unlocking the lock that was initially set.
> > 
> > While we're there, make sure that we do pop a WARN_ON_ONCE if the
> > removal ever fails. Also return -EBADF in this event, since that's
> > what we would have returned if the close had happened earlier.
> > 
> > Cc: <stable@vger.kernel.org>
> > Reported-by: Dmitry Vyukov <dvyukov@google.com>
> > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> > ---
> >  fs/locks.c | 19 ++++++++++---------
> >  1 file changed, 10 insertions(+), 9 deletions(-)
> > 
> > diff --git a/fs/locks.c b/fs/locks.c
> > index 593dca300b29..0db640e4ced4 100644
> > --- a/fs/locks.c
> > +++ b/fs/locks.c
> > @@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
> >  		goto out;
> >  	}
> >  
> > -again:
> >  	error = flock_to_posix_lock(filp, file_lock, &flock);
> >  	if (error)
> >  		goto out;
> > @@ -2231,9 +2230,11 @@ again:
> >  	spin_lock(&current->files->file_lock);
> >  	f = fcheck(fd);
> >  	spin_unlock(&current->files->file_lock);
> > -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> > -		flock.l_type = F_UNLCK;
> > -		goto again;
> > +	if (!error && f != filp && file_lock->fl_type != F_UNLCK) {
> > +		file_lock->fl_type = F_UNLCK;
> > +		error = do_lock_file_wait(filp, cmd, file_lock);
> > +		WARN_ON_ONCE(error);
> > +		error = -EBADF;
> >  	}
> >  
> >  out:
> > @@ -2321,7 +2322,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
> >  		goto out;
> >  	}
> >  
> > -again:
> >  	error = flock64_to_posix_lock(filp, file_lock, &flock);
> >  	if (error)
> >  		goto out;
> > @@ -2366,11 +2366,12 @@ again:
> >  	spin_lock(&current->files->file_lock);
> >  	f = fcheck(fd);
> >  	spin_unlock(&current->files->file_lock);
> > -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> > -		flock.l_type = F_UNLCK;
> > -		goto again;
> > +	if (!error && f != filp && file_lock->fl_type != F_UNLCK) {
> > +		file_lock->fl_type = F_UNLCK;
> > +		error = do_lock_file_wait(filp, cmd, file_lock);
> > +		WARN_ON_ONCE(error);
> > +		error = -EBADF;
> >  	}
> > -
> >  out:
> >  	locks_free_lock(file_lock);
> >  	return error;
> 
> While this does fix Dmitri's reproducer, I think the basic concept of
> removing locks like this after they are set is racy. Consider where we
> have two threads:
> 
> Thread1				Thread2
> ----------------------------------------------------------------------------
> fd1 = memfd_create(...);
> fd2 = dup(fd1);
> 				fcntl(fd2, F_SETLK);
> 				(Here we call fcntl, and lock is set, but
> 				 task gets scheduled out before fcheck)
> close(fd2)
> fcntl(fd1, F_SETLK...);
> 
> 				Task scheduled back in, does fcheck for fd2
> 				and finds that it's gone. Removes the lock
> 				that Thread1 just set.
> 
> So that seems wrong...in the face of the race above we can end up with
> no lock set on the file, even though Thread1 thinks it has one. It is a
> pretty unlikely race, but I don't see anything that prevents it.
> 
> The fix for filesystems that do not define their own ->lock op would be
> pretty simple. We could do a fcheck after taking the flc_lock, but
> before setting the lock on the file. The flc_lock should be enough to
> prevent that race (though we may need to revisit some of the lockless
> checks in locks_remove_posix). That wouldn't work for filesystems that
> do set ->lock though, and I think we really do need a more general
> solution there.
> 
> The good news is that OFD locks should be exempt from that fcheck
> altogether. I'll spin up another patch for that, so we can at least
> ensure that they aren't subject to that race.
> 
> Any thoughts on how to fix the above for traditional POSIX locks though?

This logic seems to fall into a common trap by assuming that the result
of a posix lock followed by an unlock is a no-op.  The assumption is
false because the region, or parts of it, may have been locked by the
same owner before the initial lock.  You might think you're free of that
logic since closing is a scorched-earth "remove every lock owned by this
owner" event, except that as you point out the lock+unlock isn't atomic
here....

OK, I'm just repeating what you've said really.  I don't know how to fix
it.

--b.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 16:11         ` Jeff Layton
@ 2016-01-08 16:21           ` J. Bruce Fields
  2016-01-08 16:22             ` J. Bruce Fields
  0 siblings, 1 reply; 20+ messages in thread
From: J. Bruce Fields @ 2016-01-08 16:21 UTC (permalink / raw)
  To: Jeff Layton
  Cc: linux-fsdevel, linux-kernel, Dmitry Vyukov, Alexander Viro,
	syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Fri, Jan 08, 2016 at 11:11:54AM -0500, Jeff Layton wrote:
> On Fri, 8 Jan 2016 10:55:33 -0500
> "J. Bruce Fields" <bfields@fieldses.org> wrote:
> 
> > On Fri, Jan 08, 2016 at 08:50:09AM -0500, Jeff Layton wrote:
> > > Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
> > > fires in locks_free_lock_context when the flc_posix list isn't empty.
> > > 
> > > The problem turns out to be that we're basically rebuilding the
> > > file_lock from scratch in fcntl_setlk when we discover that the setlk
> > > has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
> > > then we may end up with fl_start and fl_end values that differ from
> > > when the lock was initially set, if the file position or length of the
> > > file has changed in the interim.
> > > 
> > > Fix this by just reusing the same lock request structure, and simply
> > > override fl_type value with F_UNLCK as appropriate. That ensures that
> > > we really are unlocking the lock that was initially set.  
> > 
> > You could also just do a whole-file unlock, couldn't you?  That would
> > seem less confusing to me.  But maybe I'm missing something.
> > 
> > --b.
> > 
> 
> I considered that too...but I was thinking that might make things even
> worse. Consider:
> 
> Thread1				Thread2
> ----------------------------------------------------------------------------
> fd1 = open(...);
> fd2 = dup(fd1);
> 				fcntl(fd2, F_SETLK);
> 				(Here we call fcntl, and lock is set, but
> 				 task gets scheduled out before fcheck)
> close(fd2)
> fcntl(fd1, F_SETLK...);
> 				Task scheduled back in, does fcheck for fd2
> 				and finds that it's gone. Removes the lock
> 				that Thread1 just set.
> 
> If we just unlock the range that was set then Thread1 won't be affected
> if his lock doesn't overlap Thread2's.
> 
> Is that better or worse? :)
> 
> TBH, I guess all of this is somewhat academic. If you're playing with
> traditional POSIX locks and threads like this, then you really are
> playing with fire.
> 
> We should try to fix that if we can though...

Yeah.  I almost think an OK iterim solution would be just to document
the race in the appropriate man page and tell people that if they really
want to use posix locks in an application with lots of threads sharing
file descriptors then they should consider OFD locks.

But maybe there's a fix.

--b.

> 
> > > 
> > > While we're there, make sure that we do pop a WARN_ON_ONCE if the
> > > removal ever fails. Also return -EBADF in this event, since that's
> > > what we would have returned if the close had happened earlier.
> > > 
> > > Cc: "J. Bruce Fields" <bfields@fieldses.org>
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: <stable@vger.kernel.org>
> > > Fixes: c293621bbf67 (stale POSIX lock handling)
> > > Reported-by: Dmitry Vyukov <dvyukov@google.com>
> > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> > > ---
> > >  fs/locks.c | 51 ++++++++++++++++++++++++++++++---------------------
> > >  1 file changed, 30 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/fs/locks.c b/fs/locks.c
> > > index 593dca300b29..c263aff793bc 100644
> > > --- a/fs/locks.c
> > > +++ b/fs/locks.c
> > > @@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
> > >  		goto out;
> > >  	}
> > >  
> > > -again:
> > >  	error = flock_to_posix_lock(filp, file_lock, &flock);
> > >  	if (error)
> > >  		goto out;
> > > @@ -2223,19 +2222,22 @@ again:
> > >  	 * Attempt to detect a close/fcntl race and recover by
> > >  	 * releasing the lock that was just acquired.
> > >  	 */
> > > -	/*
> > > -	 * we need that spin_lock here - it prevents reordering between
> > > -	 * update of i_flctx->flc_posix and check for it done in close().
> > > -	 * rcu_read_lock() wouldn't do.
> > > -	 */
> > > -	spin_lock(&current->files->file_lock);
> > > -	f = fcheck(fd);
> > > -	spin_unlock(&current->files->file_lock);
> > > -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> > > -		flock.l_type = F_UNLCK;
> > > -		goto again;
> > > +	if (!error && file_lock->fl_type != F_UNLCK) {
> > > +		/*
> > > +		 * We need that spin_lock here - it prevents reordering between
> > > +		 * update of i_flctx->flc_posix and check for it done in
> > > +		 * close(). rcu_read_lock() wouldn't do.
> > > +		 */
> > > +		spin_lock(&current->files->file_lock);
> > > +		f = fcheck(fd);
> > > +		spin_unlock(&current->files->file_lock);
> > > +		if (f != filp) {
> > > +			file_lock->fl_type = F_UNLCK;
> > > +			error = do_lock_file_wait(filp, cmd, file_lock);
> > > +			WARN_ON_ONCE(error);
> > > +			error = -EBADF;
> > > +		}
> > >  	}
> > > -
> > >  out:
> > >  	locks_free_lock(file_lock);
> > >  	return error;
> > > @@ -2321,7 +2323,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
> > >  		goto out;
> > >  	}
> > >  
> > > -again:
> > >  	error = flock64_to_posix_lock(filp, file_lock, &flock);
> > >  	if (error)
> > >  		goto out;
> > > @@ -2363,14 +2364,22 @@ again:
> > >  	 * Attempt to detect a close/fcntl race and recover by
> > >  	 * releasing the lock that was just acquired.
> > >  	 */
> > > -	spin_lock(&current->files->file_lock);
> > > -	f = fcheck(fd);
> > > -	spin_unlock(&current->files->file_lock);
> > > -	if (!error && f != filp && flock.l_type != F_UNLCK) {
> > > -		flock.l_type = F_UNLCK;
> > > -		goto again;
> > > +	if (!error && file_lock->fl_type != F_UNLCK) {
> > > +		/*
> > > +		 * We need that spin_lock here - it prevents reordering between
> > > +		 * update of i_flctx->flc_posix and check for it done in
> > > +		 * close(). rcu_read_lock() wouldn't do.
> > > +		 */
> > > +		spin_lock(&current->files->file_lock);
> > > +		f = fcheck(fd);
> > > +		spin_unlock(&current->files->file_lock);
> > > +		if (f != filp) {
> > > +			file_lock->fl_type = F_UNLCK;
> > > +			error = do_lock_file_wait(filp, cmd, file_lock);
> > > +			WARN_ON_ONCE(error);
> > > +			error = -EBADF;
> > > +		}
> > >  	}
> > > -
> > >  out:
> > >  	locks_free_lock(file_lock);
> > >  	return error;
> > > -- 
> > > 2.5.0  
> 
> 
> -- 
> Jeff Layton <jlayton@poochiereds.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 16:21           ` J. Bruce Fields
@ 2016-01-08 16:22             ` J. Bruce Fields
  2016-01-08 16:26               ` J. Bruce Fields
  0 siblings, 1 reply; 20+ messages in thread
From: J. Bruce Fields @ 2016-01-08 16:22 UTC (permalink / raw)
  To: Jeff Layton
  Cc: linux-fsdevel, linux-kernel, Dmitry Vyukov, Alexander Viro,
	syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Fri, Jan 08, 2016 at 11:21:01AM -0500, J. Bruce Fields wrote:
> On Fri, Jan 08, 2016 at 11:11:54AM -0500, Jeff Layton wrote:
> > On Fri, 8 Jan 2016 10:55:33 -0500
> > "J. Bruce Fields" <bfields@fieldses.org> wrote:
> > 
> > > On Fri, Jan 08, 2016 at 08:50:09AM -0500, Jeff Layton wrote:
> > > > Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
> > > > fires in locks_free_lock_context when the flc_posix list isn't empty.
> > > > 
> > > > The problem turns out to be that we're basically rebuilding the
> > > > file_lock from scratch in fcntl_setlk when we discover that the setlk
> > > > has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
> > > > then we may end up with fl_start and fl_end values that differ from
> > > > when the lock was initially set, if the file position or length of the
> > > > file has changed in the interim.
> > > > 
> > > > Fix this by just reusing the same lock request structure, and simply
> > > > override fl_type value with F_UNLCK as appropriate. That ensures that
> > > > we really are unlocking the lock that was initially set.  
> > > 
> > > You could also just do a whole-file unlock, couldn't you?  That would
> > > seem less confusing to me.  But maybe I'm missing something.
> > > 
> > > --b.
> > > 
> > 
> > I considered that too...but I was thinking that might make things even
> > worse. Consider:
> > 
> > Thread1				Thread2
> > ----------------------------------------------------------------------------
> > fd1 = open(...);
> > fd2 = dup(fd1);
> > 				fcntl(fd2, F_SETLK);
> > 				(Here we call fcntl, and lock is set, but
> > 				 task gets scheduled out before fcheck)
> > close(fd2)
> > fcntl(fd1, F_SETLK...);
> > 				Task scheduled back in, does fcheck for fd2
> > 				and finds that it's gone. Removes the lock
> > 				that Thread1 just set.
> > 
> > If we just unlock the range that was set then Thread1 won't be affected
> > if his lock doesn't overlap Thread2's.
> > 
> > Is that better or worse? :)
> > 
> > TBH, I guess all of this is somewhat academic. If you're playing with
> > traditional POSIX locks and threads like this, then you really are
> > playing with fire.
> > 
> > We should try to fix that if we can though...
> 
> Yeah.  I almost think an OK iterim solution would be just to document
> the race in the appropriate man page and tell people that if they really
> want to use posix locks in an application with lots of threads sharing
> file descriptors then they should consider OFD locks.

(Especially if this race has always existed.)

--b.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close
  2016-01-08 16:22             ` J. Bruce Fields
@ 2016-01-08 16:26               ` J. Bruce Fields
  0 siblings, 0 replies; 20+ messages in thread
From: J. Bruce Fields @ 2016-01-08 16:26 UTC (permalink / raw)
  To: Jeff Layton
  Cc: linux-fsdevel, linux-kernel, Dmitry Vyukov, Alexander Viro,
	syzkaller, Kostya Serebryany, Alexander Potapenko, Sasha Levin,
	Eric Dumazet

On Fri, Jan 08, 2016 at 11:22:04AM -0500, J. Bruce Fields wrote:
> On Fri, Jan 08, 2016 at 11:21:01AM -0500, J. Bruce Fields wrote:
> > Yeah.  I almost think an OK iterim solution would be just to document
> > the race in the appropriate man page and tell people that if they really
> > want to use posix locks in an application with lots of threads sharing
> > file descriptors then they should consider OFD locks.
> 
> (Especially if this race has always existed.)

Oh but note I'm only talking about this additional race that you've
identified.

I agree that the problem addressed by these patches need fixing, and
gave them a quick read and they look OK to me--feel free to add my ACK.

--b.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: fs: WARNING in locks_free_lock_context()
  2015-12-23 13:54 ` Jeff Layton
@ 2016-02-03 18:19   ` William Dauchy
  2016-02-03 18:26     ` Jeff Layton
  0 siblings, 1 reply; 20+ messages in thread
From: William Dauchy @ 2016-02-03 18:19 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, linux-fsdevel,
	LKML, syzkaller, Kostya Serebryany, Alexander Potapenko,
	Sasha Levin, Eric Dumazet

Hello Jeff,

On Wed, Dec 23, 2015 at 2:54 PM, Jeff Layton <jlayton@poochiereds.net> wrote:
> Ooh, nice catch...and just in time for Christmas.
>
> filp_close does this after the fd has been detached from the file table
> in __close_fd:
>
>         if (likely(!(filp->f_mode & FMODE_PATH))) {
>                 dnotify_flush(filp, id);
>                 locks_remove_posix(filp, id);
>         }
>         fput(filp);
>
> ...and fcntl_setlk does this:
>
>         /*
>          * Attempt to detect a close/fcntl race and recover by
>          * releasing the lock that was just acquired.
>          */
>         /*
>          * we need that spin_lock here - it prevents reordering between
>          * update of i_flctx->flc_posix and check for it done in close().
>          * rcu_read_lock() wouldn't do.
>          */
>         spin_lock(&current->files->file_lock);
>         f = fcheck(fd);
>         spin_unlock(&current->files->file_lock);
>         if (!error && f != filp && flock.l_type != F_UNLCK) {
>                 flock.l_type = F_UNLCK;
>                 goto again;
>         }
>
> ...so in principle that should keep new locks from racing onto the list
> just after we call filp_close. Hmm...I'll see if I can reproduce and
> figure out how this could happen.

Just wondering if you had the time to figure out this warning?

Thanks,
-- 
William

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: fs: WARNING in locks_free_lock_context()
  2016-02-03 18:19   ` William Dauchy
@ 2016-02-03 18:26     ` Jeff Layton
  2016-02-03 18:28       ` William Dauchy
  0 siblings, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2016-02-03 18:26 UTC (permalink / raw)
  To: William Dauchy
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, linux-fsdevel,
	LKML, syzkaller, Kostya Serebryany, Alexander Potapenko,
	Sasha Levin, Eric Dumazet

On Wed, 3 Feb 2016 19:19:37 +0100
William Dauchy <wdauchy@gmail.com> wrote:

> Hello Jeff,
> 
> On Wed, Dec 23, 2015 at 2:54 PM, Jeff Layton <jlayton@poochiereds.net> wrote:
> > Ooh, nice catch...and just in time for Christmas.
> >
> > filp_close does this after the fd has been detached from the file table
> > in __close_fd:
> >
> >         if (likely(!(filp->f_mode & FMODE_PATH))) {
> >                 dnotify_flush(filp, id);
> >                 locks_remove_posix(filp, id);
> >         }
> >         fput(filp);
> >
> > ...and fcntl_setlk does this:
> >
> >         /*
> >          * Attempt to detect a close/fcntl race and recover by
> >          * releasing the lock that was just acquired.
> >          */
> >         /*
> >          * we need that spin_lock here - it prevents reordering between
> >          * update of i_flctx->flc_posix and check for it done in close().
> >          * rcu_read_lock() wouldn't do.
> >          */
> >         spin_lock(&current->files->file_lock);
> >         f = fcheck(fd);
> >         spin_unlock(&current->files->file_lock);
> >         if (!error && f != filp && flock.l_type != F_UNLCK) {
> >                 flock.l_type = F_UNLCK;
> >                 goto again;
> >         }
> >
> > ...so in principle that should keep new locks from racing onto the list
> > just after we call filp_close. Hmm...I'll see if I can reproduce and
> > figure out how this could happen.  
> 
> Just wondering if you had the time to figure out this warning?
> 
> Thanks,

Yes...this commit in mainline fixes it:

commit 7f3697e24dc3820b10f445a4a7d914fc356012d1
Author: Jeff Layton <jeff.layton@primarydata.com>
Date:   Thu Jan 7 16:38:10 2016 -0500

    locks: fix unlock when fcntl_setlk races with a close


...and the patch is applicable to all kernels currently in circulation.
The original bug is very old (from 2005).

-- 
Jeff Layton <jlayton@poochiereds.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: fs: WARNING in locks_free_lock_context()
  2016-02-03 18:26     ` Jeff Layton
@ 2016-02-03 18:28       ` William Dauchy
  0 siblings, 0 replies; 20+ messages in thread
From: William Dauchy @ 2016-02-03 18:28 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Dmitry Vyukov, J. Bruce Fields, Alexander Viro, linux-fsdevel,
	LKML, syzkaller, Kostya Serebryany, Alexander Potapenko,
	Sasha Levin, Eric Dumazet

On Wed, Feb 3, 2016 at 7:26 PM, Jeff Layton <jlayton@poochiereds.net> wrote:
> Yes...this commit in mainline fixes it:

Thanks Jeff, I missed it.

-- 
William

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2016-02-03 18:28 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-23 10:37 fs: WARNING in locks_free_lock_context() Dmitry Vyukov
2015-12-23 13:54 ` Jeff Layton
2016-02-03 18:19   ` William Dauchy
2016-02-03 18:26     ` Jeff Layton
2016-02-03 18:28       ` William Dauchy
2016-01-08  2:22 ` [PATCH] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
2016-01-08 12:48   ` Jeff Layton
2016-01-08 16:16     ` J. Bruce Fields
2016-01-08 13:50   ` [PATCH v2 0/6] locks: better debugging and fix for setlk/close race handling Jeff Layton
2016-01-08 13:50     ` [PATCH v2 1/6] locks: fix unlock when fcntl_setlk races with a close Jeff Layton
2016-01-08 15:55       ` J. Bruce Fields
2016-01-08 16:11         ` Jeff Layton
2016-01-08 16:21           ` J. Bruce Fields
2016-01-08 16:22             ` J. Bruce Fields
2016-01-08 16:26               ` J. Bruce Fields
2016-01-08 13:50     ` [PATCH v2 2/6] locks: don't check for race with close when setting OFD lock Jeff Layton
2016-01-08 13:50     ` [PATCH v2 3/6] locks: sprinkle some tracepoints around the file locking code Jeff Layton
2016-01-08 13:50     ` [PATCH v2 4/6] locks: pass inode pointer to locks_free_lock_context Jeff Layton
2016-01-08 13:50     ` [PATCH v2 5/6] locks: prink more detail when there are leaked locks Jeff Layton
2016-01-08 13:50     ` [PATCH v2 6/6] locks: rename __posix_lock_file to posix_lock_inode Jeff Layton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.