linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] aio: Fix locking in aio_poll()
@ 2019-02-04 17:45 Bart Van Assche
  2019-02-04 17:49 ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2019-02-04 17:45 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-fsdevel, linux-aio, Bart Van Assche, syzbot,
	Christoph Hellwig, Avi Kivity, Eric Dumazet, stable

Since kioctx.ctx_lock may be acquired from IRQ context, all code that
acquires that lock from thread context must disable interrupts. This
patch fixes the following lockdep complaint:

=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
5.0.0-rc4-next-20190131 #23 Not tainted
-----------------------------------------------------
syz-executor2/13779 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
0000000098ac1230 (&fiq->waitq){+.+.}, at: spin_lock include/linux/spinlock.h:329 [inline]
0000000098ac1230 (&fiq->waitq){+.+.}, at: aio_poll fs/aio.c:1772 [inline]
0000000098ac1230 (&fiq->waitq){+.+.}, at: __io_submit_one fs/aio.c:1875 [inline]
0000000098ac1230 (&fiq->waitq){+.+.}, at: io_submit_one+0xedf/0x1cf0 fs/aio.c:1908

and this task is already holding:
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:354 [inline]
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1771 [inline]
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one fs/aio.c:1875 [inline]
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908
which would create a new lock dependency:
 (&(&ctx->ctx_lock)->rlock){..-.} -> (&fiq->waitq){+.+.}

but this new dependency connects a SOFTIRQ-irq-safe lock:
 (&(&ctx->ctx_lock)->rlock){..-.}

... which became SOFTIRQ-irq-safe at:
  lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
  __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
  _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
  spin_lock_irq include/linux/spinlock.h:354 [inline]
  free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
  percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline]
  percpu_ref_put include/linux/percpu-refcount.h:301 [inline]
  percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline]
  percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158
  __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
  rcu_do_batch kernel/rcu/tree.c:2486 [inline]
  invoke_rcu_callbacks kernel/rcu/tree.c:2799 [inline]
  rcu_core+0x928/0x1390 kernel/rcu/tree.c:2780
  __do_softirq+0x266/0x95a kernel/softirq.c:292
  run_ksoftirqd kernel/softirq.c:654 [inline]
  run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646
  smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164
  kthread+0x357/0x430 kernel/kthread.c:247
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

to a SOFTIRQ-irq-unsafe lock:
 (&fiq->waitq){+.+.}

... which became SOFTIRQ-irq-unsafe at:
...
  lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
  spin_lock include/linux/spinlock.h:329 [inline]
  flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
  fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
  fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
  fuse_send_init fs/fuse/inode.c:989 [inline]
  fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
  mount_nodev+0x68/0x110 fs/super.c:1392
  fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
  legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
  vfs_get_tree+0x123/0x450 fs/super.c:1481
  do_new_mount fs/namespace.c:2610 [inline]
  do_mount+0x1436/0x2c40 fs/namespace.c:2932
  ksys_mount+0xdb/0x150 fs/namespace.c:3148
  __do_sys_mount fs/namespace.c:3162 [inline]
  __se_sys_mount fs/namespace.c:3159 [inline]
  __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
  do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&fiq->waitq);
                               local_irq_disable();
                               lock(&(&ctx->ctx_lock)->rlock);
                               lock(&fiq->waitq);
  <Interrupt>
    lock(&(&ctx->ctx_lock)->rlock);

 *** DEADLOCK ***

1 lock held by syz-executor2/13779:
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:354 [inline]
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1771 [inline]
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one fs/aio.c:1875 [inline]
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908

the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&(&ctx->ctx_lock)->rlock){..-.} {
   IN-SOFTIRQ-W at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                    __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
                    _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
                    spin_lock_irq include/linux/spinlock.h:354 [inline]
                    free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
                    percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline]
                    percpu_ref_put include/linux/percpu-refcount.h:301 [inline]
                    percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline]
                    percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158
                    __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
                    rcu_do_batch kernel/rcu/tree.c:2486 [inline]
                    invoke_rcu_callbacks kernel/rcu/tree.c:2799 [inline]
                    rcu_core+0x928/0x1390 kernel/rcu/tree.c:2780
                    __do_softirq+0x266/0x95a kernel/softirq.c:292
                    run_ksoftirqd kernel/softirq.c:654 [inline]
                    run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646
                    smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164
                    kthread+0x357/0x430 kernel/kthread.c:247
                    ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
   INITIAL USE at:
                   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                   __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
                   _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
                   spin_lock_irq include/linux/spinlock.h:354 [inline]
                   __do_sys_io_cancel fs/aio.c:2052 [inline]
                   __se_sys_io_cancel fs/aio.c:2035 [inline]
                   __x64_sys_io_cancel+0xd5/0x5a0 fs/aio.c:2035
                   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                   entry_SYSCALL_64_after_hwframe+0x49/0xbe
 }
 ... key      at: [<ffffffff8a574140>] __key.52370+0x0/0x40
 ... acquired at:
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
   spin_lock include/linux/spinlock.h:329 [inline]
   aio_poll fs/aio.c:1772 [inline]
   __io_submit_one fs/aio.c:1875 [inline]
   io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
   __do_sys_io_submit fs/aio.c:1953 [inline]
   __se_sys_io_submit fs/aio.c:1923 [inline]
   __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

the dependencies between the lock to be acquired
 and SOFTIRQ-irq-unsafe lock:
-> (&fiq->waitq){+.+.} {
   HARDIRQ-ON-W at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                    spin_lock include/linux/spinlock.h:329 [inline]
                    flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
                    fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
                    fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
                    fuse_send_init fs/fuse/inode.c:989 [inline]
                    fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
                    mount_nodev+0x68/0x110 fs/super.c:1392
                    fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
                    legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
                    vfs_get_tree+0x123/0x450 fs/super.c:1481
                    do_new_mount fs/namespace.c:2610 [inline]
                    do_mount+0x1436/0x2c40 fs/namespace.c:2932
                    ksys_mount+0xdb/0x150 fs/namespace.c:3148
                    __do_sys_mount fs/namespace.c:3162 [inline]
                    __se_sys_mount fs/namespace.c:3159 [inline]
                    __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
                    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                    entry_SYSCALL_64_after_hwframe+0x49/0xbe
   SOFTIRQ-ON-W at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                    spin_lock include/linux/spinlock.h:329 [inline]
                    flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
                    fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
                    fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
                    fuse_send_init fs/fuse/inode.c:989 [inline]
                    fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
                    mount_nodev+0x68/0x110 fs/super.c:1392
                    fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
                    legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
                    vfs_get_tree+0x123/0x450 fs/super.c:1481
                    do_new_mount fs/namespace.c:2610 [inline]
                    do_mount+0x1436/0x2c40 fs/namespace.c:2932
                    ksys_mount+0xdb/0x150 fs/namespace.c:3148
                    __do_sys_mount fs/namespace.c:3162 [inline]
                    __se_sys_mount fs/namespace.c:3159 [inline]
                    __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
                    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                    entry_SYSCALL_64_after_hwframe+0x49/0xbe
   INITIAL USE at:
                   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                   spin_lock include/linux/spinlock.h:329 [inline]
                   flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
                   fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
                   fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
                   fuse_send_init fs/fuse/inode.c:989 [inline]
                   fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
                   mount_nodev+0x68/0x110 fs/super.c:1392
                   fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
                   legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
                   vfs_get_tree+0x123/0x450 fs/super.c:1481
                   do_new_mount fs/namespace.c:2610 [inline]
                   do_mount+0x1436/0x2c40 fs/namespace.c:2932
                   ksys_mount+0xdb/0x150 fs/namespace.c:3148
                   __do_sys_mount fs/namespace.c:3162 [inline]
                   __se_sys_mount fs/namespace.c:3159 [inline]
                   __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
                   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                   entry_SYSCALL_64_after_hwframe+0x49/0xbe
 }
 ... key      at: [<ffffffff8a60dec0>] __key.43450+0x0/0x40
 ... acquired at:
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
   spin_lock include/linux/spinlock.h:329 [inline]
   aio_poll fs/aio.c:1772 [inline]
   __io_submit_one fs/aio.c:1875 [inline]
   io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
   __do_sys_io_submit fs/aio.c:1953 [inline]
   __se_sys_io_submit fs/aio.c:1923 [inline]
   __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

stack backtrace:
CPU: 0 PID: 13779 Comm: syz-executor2 Not tainted 5.0.0-rc4-next-20190131 #23
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_bad_irq_dependency kernel/locking/lockdep.c:1573 [inline]
 check_usage.cold+0x60f/0x940 kernel/locking/lockdep.c:1605
 check_irq_usage kernel/locking/lockdep.c:1650 [inline]
 check_prev_add_irq kernel/locking/lockdep_states.h:8 [inline]
 check_prev_add kernel/locking/lockdep.c:1860 [inline]
 check_prevs_add kernel/locking/lockdep.c:1968 [inline]
 validate_chain kernel/locking/lockdep.c:2339 [inline]
 __lock_acquire+0x1f12/0x4790 kernel/locking/lockdep.c:3320
 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
 spin_lock include/linux/spinlock.h:329 [inline]
 aio_poll fs/aio.c:1772 [inline]
 __io_submit_one fs/aio.c:1875 [inline]
 io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
 __do_sys_io_submit fs/aio.c:1953 [inline]
 __se_sys_io_submit fs/aio.c:1923 [inline]
 __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: <stable@vger.kernel.org>
Fixes: e8693bcfa0b4 ("aio: allow direct aio poll comletions for keyed wakeups") # v4.19
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 fs/aio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..41bb7114678e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1688,9 +1688,9 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			return 0;
 
 		/* try to complete the iocb inline if we can: */
-		if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+		if (spin_trylock_irq(&iocb->ki_ctx->ctx_lock)) {
 			list_del(&iocb->ki_list);
-			spin_unlock(&iocb->ki_ctx->ctx_lock);
+			spin_unlock_irq(&iocb->ki_ctx->ctx_lock);
 
 			list_del_init(&req->wait.entry);
 			aio_poll_complete(iocb, mask);
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-04 17:45 [PATCH] aio: Fix locking in aio_poll() Bart Van Assche
@ 2019-02-04 17:49 ` Christoph Hellwig
  2019-02-05  8:12   ` Miklos Szeredi
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2019-02-04 17:49 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Alexander Viro, linux-fsdevel, linux-aio, syzbot,
	Christoph Hellwig, Avi Kivity, Eric Dumazet, stable

On Mon, Feb 04, 2019 at 09:45:55AM -0800, Bart Van Assche wrote:
> Since kioctx.ctx_lock may be acquired from IRQ context, all code that
> acquires that lock from thread context must disable interrupts. This
> patch fixes the following lockdep complaint:

But breaks the real life users of this interface :(

aio_poll_wake is assigned as the wake function to a waitqueue,
and the waitqueue interface requires that function to be called
with irqs disabled.  It looks like the fuse code is breaking that
contract, so we need to fix that instead of disable irqs.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-04 17:49 ` Christoph Hellwig
@ 2019-02-05  8:12   ` Miklos Szeredi
  2019-02-06  0:53     ` Bart Van Assche
  0 siblings, 1 reply; 12+ messages in thread
From: Miklos Szeredi @ 2019-02-05  8:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Bart Van Assche, Alexander Viro, linux-fsdevel, linux-aio,
	syzbot, Avi Kivity, Eric Dumazet, stable

On Mon, Feb 4, 2019 at 6:49 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Feb 04, 2019 at 09:45:55AM -0800, Bart Van Assche wrote:
> > Since kioctx.ctx_lock may be acquired from IRQ context, all code that
> > acquires that lock from thread context must disable interrupts. This
> > patch fixes the following lockdep complaint:
>
> But breaks the real life users of this interface :(
>
> aio_poll_wake is assigned as the wake function to a waitqueue,
> and the waitqueue interface requires that function to be called
> with irqs disabled.  It looks like the fuse code is breaking that
> contract, so we need to fix that instead of disable irqs.

Not all variants of the waitqueue interface require irqs to be
disabled, and fuse has nothing whatsoever to do with irqs, so there's
no sane reason to disable them.

Also, AFAICS, the fuse device does not support asynchronous IO.  I
just don't get what this is about...

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-05  8:12   ` Miklos Szeredi
@ 2019-02-06  0:53     ` Bart Van Assche
  2019-02-06  8:36       ` Miklos Szeredi
  2019-02-06 13:39       ` Christoph Hellwig
  0 siblings, 2 replies; 12+ messages in thread
From: Bart Van Assche @ 2019-02-06  0:53 UTC (permalink / raw)
  To: Miklos Szeredi, Christoph Hellwig
  Cc: Alexander Viro, linux-fsdevel, linux-aio, syzbot, Avi Kivity,
	Eric Dumazet, stable

On Tue, 2019-02-05 at 09:12 +0100, Miklos Szeredi wrote:
> Not all variants of the waitqueue interface require irqs to be
> disabled, and fuse has nothing whatsoever to do with irqs, so there's
> no sane reason to disable them.
> 
> Also, AFAICS, the fuse device does not support asynchronous IO.  I
> just don't get what this is about...

Hi Miklos,

Could this be what happens?

aio_poll() calls vfs_poll()
  vfs_poll() calls fuse_dev_poll()
    fuse_dev_poll() calls poll_wait(file, &fiq->waitq, wait)
      poll_wait() calls aio_poll_queue_proc(file, &fiq->waitq, wait)
        aio_poll_queue_proc() stores &fiq->waitq in pt->iocb->poll.head
aio_poll() calls spin_lock_irq(&ctx->ctx_lock)
aio_poll() calls spin_lock(&req->head->lock) (req == &pt->iocb->poll).

I think the lockdep complaint is about the FUSE fiq->waitq lock not being
IRQ-safe and about aio_poll() creating a dependency between an IRQ-safe lock
(ctx->ctx_lock) and a lock that is not IRQ-safe (fiq->waitq).

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-06  0:53     ` Bart Van Assche
@ 2019-02-06  8:36       ` Miklos Szeredi
  2019-02-06 13:47         ` Christoph Hellwig
  2019-02-06 13:39       ` Christoph Hellwig
  1 sibling, 1 reply; 12+ messages in thread
From: Miklos Szeredi @ 2019-02-06  8:36 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Christoph Hellwig, Alexander Viro, linux-fsdevel, linux-aio,
	syzbot, Avi Kivity, Eric Dumazet, stable

On Tue, Feb 05, 2019 at 04:53:04PM -0800, Bart Van Assche wrote:
> On Tue, 2019-02-05 at 09:12 +0100, Miklos Szeredi wrote:
> > Not all variants of the waitqueue interface require irqs to be
> > disabled, and fuse has nothing whatsoever to do with irqs, so there's
> > no sane reason to disable them.
> > 
> > Also, AFAICS, the fuse device does not support asynchronous IO.  I
> > just don't get what this is about...
> 
> Hi Miklos,
> 
> Could this be what happens?
> 
> aio_poll() calls vfs_poll()
>   vfs_poll() calls fuse_dev_poll()
>     fuse_dev_poll() calls poll_wait(file, &fiq->waitq, wait)
>       poll_wait() calls aio_poll_queue_proc(file, &fiq->waitq, wait)
>         aio_poll_queue_proc() stores &fiq->waitq in pt->iocb->poll.head
> aio_poll() calls spin_lock_irq(&ctx->ctx_lock)
> aio_poll() calls spin_lock(&req->head->lock) (req == &pt->iocb->poll).
> 
> I think the lockdep complaint is about the FUSE fiq->waitq lock not being
> IRQ-safe and about aio_poll() creating a dependency between an IRQ-safe lock
> (ctx->ctx_lock) and a lock that is not IRQ-safe (fiq->waitq).

Yep.  And is AIO doing anything in irq context?  If so, why?

Would it make sense to just disable AIO on file descriptors where it makes zero
sense?

E.g. this patch:

Thanks,
Miklos

---
diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..6f5edd28a83c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1435,6 +1435,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 	req->ki_filp = fget(iocb->aio_fildes);
 	if (unlikely(!req->ki_filp))
 		return -EBADF;
+	ret = -EINVAL;
+	if (req->ki_filp->f_mode & FMODE_NOAIO)
+		goto out_fput;
 	req->ki_complete = aio_complete_rw;
 	req->ki_pos = iocb->aio_offset;
 	req->ki_flags = iocb_flags(req->ki_filp);
@@ -1607,7 +1610,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 	req->file = fget(iocb->aio_fildes);
 	if (unlikely(!req->file))
 		return -EBADF;
-	if (unlikely(!req->file->f_op->fsync)) {
+	if (unlikely(req->file->f_mode & FMODE_NOAIO) ||
+	    unlikely(!req->file->f_op->fsync)) {
 		fput(req->file);
 		return -EINVAL;
 	}
@@ -1755,6 +1759,9 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	apt.iocb = aiocb;
 	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
 
+	if (req->file->f_mode & FMODE_NOAIO)
+		goto out;
+
 	/* initialized the list so that we can do list_empty checks */
 	INIT_LIST_HEAD(&req->wait.entry);
 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a5e516a40e7a..c00aa39ec7f0 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1397,6 +1397,7 @@ static int fuse_dev_open(struct inode *inode, struct file *file)
 	 * keep track of whether the file has been mounted already.
 	 */
 	file->private_data = NULL;
+	file->f_mode |= FMODE_NOAIO;
 	return 0;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29d8e2cfed0e..7cda153baad3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -162,6 +162,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File does not contribute to nr_files count */
 #define FMODE_NOACCOUNT	((__force fmode_t)0x20000000)
 
+/* File does not support AIO ops */
+#define FMODE_NOAIO	((__force fmode_t)0x40000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-06  0:53     ` Bart Van Assche
  2019-02-06  8:36       ` Miklos Szeredi
@ 2019-02-06 13:39       ` Christoph Hellwig
  1 sibling, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2019-02-06 13:39 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Miklos Szeredi, Christoph Hellwig, Alexander Viro, linux-fsdevel,
	linux-aio, syzbot, Avi Kivity, Eric Dumazet, stable

On Tue, Feb 05, 2019 at 04:53:04PM -0800, Bart Van Assche wrote:
> Could this be what happens?
> 
> aio_poll() calls vfs_poll()
>   vfs_poll() calls fuse_dev_poll()
>     fuse_dev_poll() calls poll_wait(file, &fiq->waitq, wait)
>       poll_wait() calls aio_poll_queue_proc(file, &fiq->waitq, wait)
>         aio_poll_queue_proc() stores &fiq->waitq in pt->iocb->poll.head
> aio_poll() calls spin_lock_irq(&ctx->ctx_lock)
> aio_poll() calls spin_lock(&req->head->lock) (req == &pt->iocb->poll).
> 
> I think the lockdep complaint is about the FUSE fiq->waitq lock not being
> IRQ-safe and about aio_poll() creating a dependency between an IRQ-safe lock
> (ctx->ctx_lock) and a lock that is not IRQ-safe (fiq->waitq).

That is exactly the scenario.  and the ->wake routine assumes irqs
are disabled - you really need to bypass the proper APIs to not have
the irqs disabled.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-06  8:36       ` Miklos Szeredi
@ 2019-02-06 13:47         ` Christoph Hellwig
  2019-02-06 14:31           ` Miklos Szeredi
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2019-02-06 13:47 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Bart Van Assche, Christoph Hellwig, Alexander Viro,
	linux-fsdevel, linux-aio, syzbot, Avi Kivity, Eric Dumazet,
	stable

On Wed, Feb 06, 2019 at 09:36:26AM +0100, Miklos Szeredi wrote:
> Yep.  And is AIO doing anything in irq context?  If so, why?

Because all waitqueues can be woken, and often are woken from irq
context.

> 
> Would it make sense to just disable AIO on file descriptors where it makes zero
> sense?

using aio poll makes sense on every fd, just like you can use epoll
or the upcoming io_uring poll on every fd.

Just use the waitqueue API properly in fuse and we are done:

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a5e516a40e7a..1c693bb6339a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -387,7 +387,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 	forget->forget_one.nodeid = nodeid;
 	forget->forget_one.nlookup = nlookup;
 
-	spin_lock(&fiq->waitq.lock);
+	spin_lock_irq(&fiq->waitq.lock);
 	if (fiq->connected) {
 		fiq->forget_list_tail->next = forget;
 		fiq->forget_list_tail = forget;
@@ -396,7 +396,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 	} else {
 		kfree(forget);
 	}
-	spin_unlock(&fiq->waitq.lock);
+	spin_unlock_irq(&fiq->waitq.lock);
 }
 
 static void flush_bg_queue(struct fuse_conn *fc)
@@ -410,10 +410,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
 		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
 		list_del(&req->list);
 		fc->active_background++;
-		spin_lock(&fiq->waitq.lock);
+		spin_lock_irq(&fiq->waitq.lock);
 		req->in.h.unique = fuse_get_unique(fiq);
 		queue_request(fiq, req);
-		spin_unlock(&fiq->waitq.lock);
+		spin_unlock_irq(&fiq->waitq.lock);
 	}
 }
 
@@ -472,7 +472,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 
 static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
-	spin_lock(&fiq->waitq.lock);
+	spin_lock_irq(&fiq->waitq.lock);
 	if (test_bit(FR_FINISHED, &req->flags)) {
 		spin_unlock(&fiq->waitq.lock);
 		return;
@@ -481,7 +481,7 @@ static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 		list_add_tail(&req->intr_entry, &fiq->interrupts);
 		wake_up_locked(&fiq->waitq);
 	}
-	spin_unlock(&fiq->waitq.lock);
+	spin_unlock_irq(&fiq->waitq.lock);
 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 }
 
@@ -535,9 +535,9 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 	struct fuse_iqueue *fiq = &fc->iq;
 
 	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
-	spin_lock(&fiq->waitq.lock);
+	spin_lock_irq(&fiq->waitq.lock);
 	if (!fiq->connected) {
-		spin_unlock(&fiq->waitq.lock);
+		spin_unlock_irq(&fiq->waitq.lock);
 		req->out.h.error = -ENOTCONN;
 	} else {
 		req->in.h.unique = fuse_get_unique(fiq);
@@ -545,7 +545,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 		/* acquire extra reference, since request is still needed
 		   after request_end() */
 		__fuse_get_request(req);
-		spin_unlock(&fiq->waitq.lock);
+		spin_unlock_irq(&fiq->waitq.lock);
 
 		request_wait_answer(fc, req);
 		/* Pairs with smp_wmb() in request_end() */
@@ -676,12 +676,12 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 
 	__clear_bit(FR_ISREPLY, &req->flags);
 	req->in.h.unique = unique;
-	spin_lock(&fiq->waitq.lock);
+	spin_lock_irq(&fiq->waitq.lock);
 	if (fiq->connected) {
 		queue_request(fiq, req);
 		err = 0;
 	}
-	spin_unlock(&fiq->waitq.lock);
+	spin_unlock_irq(&fiq->waitq.lock);
 
 	return err;
 }

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-06 13:47         ` Christoph Hellwig
@ 2019-02-06 14:31           ` Miklos Szeredi
  0 siblings, 0 replies; 12+ messages in thread
From: Miklos Szeredi @ 2019-02-06 14:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Bart Van Assche, Alexander Viro, linux-fsdevel, linux-aio,
	syzbot, Avi Kivity, Eric Dumazet, stable

On Wed, Feb 06, 2019 at 02:47:58PM +0100, Christoph Hellwig wrote:

> Just use the waitqueue API properly in fuse and we are done:

No, the waitqueue implementation is not assuming disabled irqs, only AIO is.

How about this?

Thanks,
Miklos
---

diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..18b1d3947592 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1679,6 +1679,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
 	__poll_t mask = key_to_poll(key);
+	unsigned long flags;
 
 	req->woken = true;
 
@@ -1688,9 +1689,9 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			return 0;
 
 		/* try to complete the iocb inline if we can: */
-		if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+		if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
 			list_del(&iocb->ki_list);
-			spin_unlock(&iocb->ki_ctx->ctx_lock);
+			spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
 
 			list_del_init(&req->wait.entry);
 			aio_poll_complete(iocb, mask);

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-21 22:28   ` Bart Van Assche
@ 2019-02-22  3:17     ` Al Viro
  0 siblings, 0 replies; 12+ messages in thread
From: Al Viro @ 2019-02-22  3:17 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Christoph Hellwig, linux-fsdevel, linux-aio, syzbot, Avi Kivity,
	Miklos Szeredi, stable

On Thu, Feb 21, 2019 at 02:28:59PM -0800, Bart Van Assche wrote:
> On Tue, 2019-02-12 at 08:56 +0100, Christoph Hellwig wrote:
> > Hi Bart,
> > 
> > this seems to survive my testing, even if this probably just means
> > the "creative" abusers of the wait queue API will run into other
> > problems down the road..
> > 
> > Reluctantly-Acked-by: Christoph Hellwig <hch@lst.de>
> 
> Thank you Christoph for the ack.
> 
> Al, do you need anything additional from me before this patch can be queued?

Applied.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-12  7:56 ` Christoph Hellwig
@ 2019-02-21 22:28   ` Bart Van Assche
  2019-02-22  3:17     ` Al Viro
  0 siblings, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2019-02-21 22:28 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alexander Viro, linux-fsdevel, linux-aio, syzbot, Avi Kivity,
	Miklos Szeredi, stable

On Tue, 2019-02-12 at 08:56 +0100, Christoph Hellwig wrote:
> Hi Bart,
> 
> this seems to survive my testing, even if this probably just means
> the "creative" abusers of the wait queue API will run into other
> problems down the road..
> 
> Reluctantly-Acked-by: Christoph Hellwig <hch@lst.de>

Thank you Christoph for the ack.

Al, do you need anything additional from me before this patch can be queued?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] aio: Fix locking in aio_poll()
  2019-02-09  0:59 Bart Van Assche
@ 2019-02-12  7:56 ` Christoph Hellwig
  2019-02-21 22:28   ` Bart Van Assche
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2019-02-12  7:56 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Alexander Viro, linux-fsdevel, linux-aio, syzbot,
	Christoph Hellwig, Avi Kivity, Miklos Szeredi, stable

Hi Bart,

this seems to survive my testing, even if this probably just means
the "creative" abusers of the wait queue API will run into other
problems down the road..

Reluctantly-Acked-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] aio: Fix locking in aio_poll()
@ 2019-02-09  0:59 Bart Van Assche
  2019-02-12  7:56 ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2019-02-09  0:59 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-fsdevel, linux-aio, Bart Van Assche, syzbot,
	Christoph Hellwig, Avi Kivity, Miklos Szeredi, stable

wake_up_locked() may but does not have to be called with interrupts
disabled. Since the fuse filesystem calls wake_up_locked() without
disabling interrupts aio_poll_wake() may be called with interrupts
enabled. Since the kioctx.ctx_lock may be acquired from IRQ context,
all code that acquires that lock from thread context must disable
interrupts. Hence change the spin_trylock() call in aio_poll_wake()
into a spin_trylock_irqsave() call. This patch fixes the following
lockdep complaint:

=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
5.0.0-rc4-next-20190131 #23 Not tainted
-----------------------------------------------------
syz-executor2/13779 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
0000000098ac1230 (&fiq->waitq){+.+.}, at: spin_lock include/linux/spinlock.h:329 [inline]
0000000098ac1230 (&fiq->waitq){+.+.}, at: aio_poll fs/aio.c:1772 [inline]
0000000098ac1230 (&fiq->waitq){+.+.}, at: __io_submit_one fs/aio.c:1875 [inline]
0000000098ac1230 (&fiq->waitq){+.+.}, at: io_submit_one+0xedf/0x1cf0 fs/aio.c:1908

and this task is already holding:
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:354 [inline]
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1771 [inline]
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one fs/aio.c:1875 [inline]
000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908
which would create a new lock dependency:
 (&(&ctx->ctx_lock)->rlock){..-.} -> (&fiq->waitq){+.+.}

but this new dependency connects a SOFTIRQ-irq-safe lock:
 (&(&ctx->ctx_lock)->rlock){..-.}

... which became SOFTIRQ-irq-safe at:
  lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
  __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
  _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
  spin_lock_irq include/linux/spinlock.h:354 [inline]
  free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
  percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline]
  percpu_ref_put include/linux/percpu-refcount.h:301 [inline]
  percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline]
  percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158
  __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
  rcu_do_batch kernel/rcu/tree.c:2486 [inline]
  invoke_rcu_callbacks kernel/rcu/tree.c:2799 [inline]
  rcu_core+0x928/0x1390 kernel/rcu/tree.c:2780
  __do_softirq+0x266/0x95a kernel/softirq.c:292
  run_ksoftirqd kernel/softirq.c:654 [inline]
  run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646
  smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164
  kthread+0x357/0x430 kernel/kthread.c:247
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

to a SOFTIRQ-irq-unsafe lock:
 (&fiq->waitq){+.+.}

... which became SOFTIRQ-irq-unsafe at:
...
  lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
  spin_lock include/linux/spinlock.h:329 [inline]
  flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
  fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
  fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
  fuse_send_init fs/fuse/inode.c:989 [inline]
  fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
  mount_nodev+0x68/0x110 fs/super.c:1392
  fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
  legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
  vfs_get_tree+0x123/0x450 fs/super.c:1481
  do_new_mount fs/namespace.c:2610 [inline]
  do_mount+0x1436/0x2c40 fs/namespace.c:2932
  ksys_mount+0xdb/0x150 fs/namespace.c:3148
  __do_sys_mount fs/namespace.c:3162 [inline]
  __se_sys_mount fs/namespace.c:3159 [inline]
  __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
  do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&fiq->waitq);
                               local_irq_disable();
                               lock(&(&ctx->ctx_lock)->rlock);
                               lock(&fiq->waitq);
  <Interrupt>
    lock(&(&ctx->ctx_lock)->rlock);

 *** DEADLOCK ***

1 lock held by syz-executor2/13779:
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:354 [inline]
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1771 [inline]
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one fs/aio.c:1875 [inline]
 #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908

the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&(&ctx->ctx_lock)->rlock){..-.} {
   IN-SOFTIRQ-W at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                    __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
                    _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
                    spin_lock_irq include/linux/spinlock.h:354 [inline]
                    free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
                    percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline]
                    percpu_ref_put include/linux/percpu-refcount.h:301 [inline]
                    percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline]
                    percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158
                    __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
                    rcu_do_batch kernel/rcu/tree.c:2486 [inline]
                    invoke_rcu_callbacks kernel/rcu/tree.c:2799 [inline]
                    rcu_core+0x928/0x1390 kernel/rcu/tree.c:2780
                    __do_softirq+0x266/0x95a kernel/softirq.c:292
                    run_ksoftirqd kernel/softirq.c:654 [inline]
                    run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646
                    smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164
                    kthread+0x357/0x430 kernel/kthread.c:247
                    ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
   INITIAL USE at:
                   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                   __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
                   _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
                   spin_lock_irq include/linux/spinlock.h:354 [inline]
                   __do_sys_io_cancel fs/aio.c:2052 [inline]
                   __se_sys_io_cancel fs/aio.c:2035 [inline]
                   __x64_sys_io_cancel+0xd5/0x5a0 fs/aio.c:2035
                   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                   entry_SYSCALL_64_after_hwframe+0x49/0xbe
 }
 ... key      at: [<ffffffff8a574140>] __key.52370+0x0/0x40
 ... acquired at:
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
   spin_lock include/linux/spinlock.h:329 [inline]
   aio_poll fs/aio.c:1772 [inline]
   __io_submit_one fs/aio.c:1875 [inline]
   io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
   __do_sys_io_submit fs/aio.c:1953 [inline]
   __se_sys_io_submit fs/aio.c:1923 [inline]
   __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

the dependencies between the lock to be acquired
 and SOFTIRQ-irq-unsafe lock:
-> (&fiq->waitq){+.+.} {
   HARDIRQ-ON-W at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                    spin_lock include/linux/spinlock.h:329 [inline]
                    flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
                    fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
                    fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
                    fuse_send_init fs/fuse/inode.c:989 [inline]
                    fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
                    mount_nodev+0x68/0x110 fs/super.c:1392
                    fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
                    legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
                    vfs_get_tree+0x123/0x450 fs/super.c:1481
                    do_new_mount fs/namespace.c:2610 [inline]
                    do_mount+0x1436/0x2c40 fs/namespace.c:2932
                    ksys_mount+0xdb/0x150 fs/namespace.c:3148
                    __do_sys_mount fs/namespace.c:3162 [inline]
                    __se_sys_mount fs/namespace.c:3159 [inline]
                    __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
                    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                    entry_SYSCALL_64_after_hwframe+0x49/0xbe
   SOFTIRQ-ON-W at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                    spin_lock include/linux/spinlock.h:329 [inline]
                    flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
                    fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
                    fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
                    fuse_send_init fs/fuse/inode.c:989 [inline]
                    fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
                    mount_nodev+0x68/0x110 fs/super.c:1392
                    fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
                    legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
                    vfs_get_tree+0x123/0x450 fs/super.c:1481
                    do_new_mount fs/namespace.c:2610 [inline]
                    do_mount+0x1436/0x2c40 fs/namespace.c:2932
                    ksys_mount+0xdb/0x150 fs/namespace.c:3148
                    __do_sys_mount fs/namespace.c:3162 [inline]
                    __se_sys_mount fs/namespace.c:3159 [inline]
                    __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
                    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                    entry_SYSCALL_64_after_hwframe+0x49/0xbe
   INITIAL USE at:
                   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
                   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                   spin_lock include/linux/spinlock.h:329 [inline]
                   flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415
                   fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676
                   fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687
                   fuse_send_init fs/fuse/inode.c:989 [inline]
                   fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214
                   mount_nodev+0x68/0x110 fs/super.c:1392
                   fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239
                   legacy_get_tree+0xf2/0x200 fs/fs_context.c:590
                   vfs_get_tree+0x123/0x450 fs/super.c:1481
                   do_new_mount fs/namespace.c:2610 [inline]
                   do_mount+0x1436/0x2c40 fs/namespace.c:2932
                   ksys_mount+0xdb/0x150 fs/namespace.c:3148
                   __do_sys_mount fs/namespace.c:3162 [inline]
                   __se_sys_mount fs/namespace.c:3159 [inline]
                   __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159
                   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                   entry_SYSCALL_64_after_hwframe+0x49/0xbe
 }
 ... key      at: [<ffffffff8a60dec0>] __key.43450+0x0/0x40
 ... acquired at:
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
   spin_lock include/linux/spinlock.h:329 [inline]
   aio_poll fs/aio.c:1772 [inline]
   __io_submit_one fs/aio.c:1875 [inline]
   io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
   __do_sys_io_submit fs/aio.c:1953 [inline]
   __se_sys_io_submit fs/aio.c:1923 [inline]
   __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
   do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

stack backtrace:
CPU: 0 PID: 13779 Comm: syz-executor2 Not tainted 5.0.0-rc4-next-20190131 #23
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_bad_irq_dependency kernel/locking/lockdep.c:1573 [inline]
 check_usage.cold+0x60f/0x940 kernel/locking/lockdep.c:1605
 check_irq_usage kernel/locking/lockdep.c:1650 [inline]
 check_prev_add_irq kernel/locking/lockdep_states.h:8 [inline]
 check_prev_add kernel/locking/lockdep.c:1860 [inline]
 check_prevs_add kernel/locking/lockdep.c:1968 [inline]
 validate_chain kernel/locking/lockdep.c:2339 [inline]
 __lock_acquire+0x1f12/0x4790 kernel/locking/lockdep.c:3320
 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
 spin_lock include/linux/spinlock.h:329 [inline]
 aio_poll fs/aio.c:1772 [inline]
 __io_submit_one fs/aio.c:1875 [inline]
 io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
 __do_sys_io_submit fs/aio.c:1953 [inline]
 __se_sys_io_submit fs/aio.c:1923 [inline]
 __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: <stable@vger.kernel.org>
Fixes: e8693bcfa0b4 ("aio: allow direct aio poll comletions for keyed wakeups") # v4.19
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
[ bvanassche: added a comment ]
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 fs/aio.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..3083180a54c8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1679,6 +1679,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
 	__poll_t mask = key_to_poll(key);
+	unsigned long flags;
 
 	req->woken = true;
 
@@ -1687,10 +1688,15 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		if (!(mask & req->events))
 			return 0;
 
-		/* try to complete the iocb inline if we can: */
-		if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+		/*
+		 * Try to complete the iocb inline if we can. Use
+		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
+		 * call this function with IRQs disabled and because IRQs
+		 * have to be disabled before ctx_lock is obtained.
+		 */
+		if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
 			list_del(&iocb->ki_list);
-			spin_unlock(&iocb->ki_ctx->ctx_lock);
+			spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
 
 			list_del_init(&req->wait.entry);
 			aio_poll_complete(iocb, mask);
-- 
2.20.1.791.gb4d0f1c61a-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-02-22  3:17 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-04 17:45 [PATCH] aio: Fix locking in aio_poll() Bart Van Assche
2019-02-04 17:49 ` Christoph Hellwig
2019-02-05  8:12   ` Miklos Szeredi
2019-02-06  0:53     ` Bart Van Assche
2019-02-06  8:36       ` Miklos Szeredi
2019-02-06 13:47         ` Christoph Hellwig
2019-02-06 14:31           ` Miklos Szeredi
2019-02-06 13:39       ` Christoph Hellwig
2019-02-09  0:59 Bart Van Assche
2019-02-12  7:56 ` Christoph Hellwig
2019-02-21 22:28   ` Bart Van Assche
2019-02-22  3:17     ` Al Viro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).