linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* possible deadlock in io_submit_one
@ 2019-02-05  2:03 syzbot
  2019-03-11 13:23 ` syzbot
  2019-06-12 19:48 ` Eric Biggers
  0 siblings, 2 replies; 4+ messages in thread
From: syzbot @ 2019-02-05  2:03 UTC (permalink / raw)
  To: bcrl, linux-aio, linux-fsdevel, linux-kernel, syzkaller-bugs, viro

Hello,

syzbot found the following crash on:

HEAD commit:    5eeb63359b1e Merge tag 'for-linus' of git://git.kernel.org..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=17906f64c00000
kernel config:  https://syzkaller.appspot.com/x/.config?x=2e0064f906afee10
dashboard link: https://syzkaller.appspot.com/bug?extid=a3accb352f9c22041cfa
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=156479f8c00000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=128c75c4c00000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com

=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
5.0.0-rc4+ #56 Not tainted
-----------------------------------------------------
syz-executor263/8874 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
00000000c469f622 (&ctx->fd_wqh){....}, at: spin_lock  
include/linux/spinlock.h:329 [inline]
00000000c469f622 (&ctx->fd_wqh){....}, at: aio_poll fs/aio.c:1772 [inline]
00000000c469f622 (&ctx->fd_wqh){....}, at: __io_submit_one fs/aio.c:1875  
[inline]
00000000c469f622 (&ctx->fd_wqh){....}, at: io_submit_one+0xedf/0x1cf0  
fs/aio.c:1908

and this task is already holding:
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq  
include/linux/spinlock.h:354 [inline]
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll  
fs/aio.c:1771 [inline]
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one  
fs/aio.c:1875 [inline]
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at:  
io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908
which would create a new lock dependency:
  (&(&ctx->ctx_lock)->rlock){..-.} -> (&ctx->fd_wqh){....}

but this new dependency connects a SOFTIRQ-irq-safe lock:
  (&(&ctx->ctx_lock)->rlock){..-.}

... which became SOFTIRQ-irq-safe at:
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
   __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
   _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
   spin_lock_irq include/linux/spinlock.h:354 [inline]
   free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
   percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline]
   percpu_ref_put include/linux/percpu-refcount.h:301 [inline]
   percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline]
   percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158
   __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
   rcu_do_batch kernel/rcu/tree.c:2452 [inline]
   invoke_rcu_callbacks kernel/rcu/tree.c:2773 [inline]
   rcu_process_callbacks+0x928/0x1390 kernel/rcu/tree.c:2754
   __do_softirq+0x266/0x95a kernel/softirq.c:292
   invoke_softirq kernel/softirq.c:373 [inline]
   irq_exit+0x180/0x1d0 kernel/softirq.c:413
   exiting_irq arch/x86/include/asm/apic.h:536 [inline]
   smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062
   apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807
   native_safe_halt+0x2/0x10 arch/x86/include/asm/irqflags.h:57
   arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555
   default_idle_call+0x36/0x90 kernel/sched/idle.c:93
   cpuidle_idle_call kernel/sched/idle.c:153 [inline]
   do_idle+0x386/0x570 kernel/sched/idle.c:262
   cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353
   rest_init+0x245/0x37b init/main.c:442
   arch_call_rest_init+0xe/0x1b
   start_kernel+0x808/0x841 init/main.c:740
   x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:470
   x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:451
   secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243

to a SOFTIRQ-irq-unsafe lock:
  (&ctx->fault_pending_wqh){+.+.}

... which became SOFTIRQ-irq-unsafe at:
...
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
   spin_lock include/linux/spinlock.h:329 [inline]
   userfaultfd_release+0x497/0x6d0 fs/userfaultfd.c:916
   __fput+0x2df/0x8d0 fs/file_table.c:278
   ____fput+0x16/0x20 fs/file_table.c:309
   task_work_run+0x14a/0x1c0 kernel/task_work.c:113
   tracehook_notify_resume include/linux/tracehook.h:188 [inline]
   exit_to_usermode_loop+0x273/0x2c0 arch/x86/entry/common.c:166
   prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
   syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
   do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
   &(&ctx->ctx_lock)->rlock --> &ctx->fd_wqh --> &ctx->fault_pending_wqh

  Possible interrupt unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&ctx->fault_pending_wqh);
                                local_irq_disable();
                                lock(&(&ctx->ctx_lock)->rlock);
                                lock(&ctx->fd_wqh);
   <Interrupt>
     lock(&(&ctx->ctx_lock)->rlock);

  *** DEADLOCK ***

1 lock held by syz-executor263/8874:
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq  
include/linux/spinlock.h:354 [inline]
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll  
fs/aio.c:1771 [inline]
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one  
fs/aio.c:1875 [inline]
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at:  
io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908

the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&(&ctx->ctx_lock)->rlock){..-.} {
    IN-SOFTIRQ-W at:
                     lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                     __raw_spin_lock_irq  
include/linux/spinlock_api_smp.h:128 [inline]
                     _raw_spin_lock_irq+0x60/0x80  
kernel/locking/spinlock.c:160
                     spin_lock_irq include/linux/spinlock.h:354 [inline]
                     free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
                     percpu_ref_put_many include/linux/percpu-refcount.h:285  
[inline]
                     percpu_ref_put include/linux/percpu-refcount.h:301  
[inline]
                     percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123  
[inline]
                     percpu_ref_switch_to_atomic_rcu+0x3e7/0x520  
lib/percpu-refcount.c:158
                     __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
                     rcu_do_batch kernel/rcu/tree.c:2452 [inline]
                     invoke_rcu_callbacks kernel/rcu/tree.c:2773 [inline]
                     rcu_process_callbacks+0x928/0x1390  
kernel/rcu/tree.c:2754
                     __do_softirq+0x266/0x95a kernel/softirq.c:292
                     invoke_softirq kernel/softirq.c:373 [inline]
                     irq_exit+0x180/0x1d0 kernel/softirq.c:413
                     exiting_irq arch/x86/include/asm/apic.h:536 [inline]
                     smp_apic_timer_interrupt+0x14a/0x570  
arch/x86/kernel/apic/apic.c:1062
                     apic_timer_interrupt+0xf/0x20  
arch/x86/entry/entry_64.S:807
                     native_safe_halt+0x2/0x10  
arch/x86/include/asm/irqflags.h:57
                     arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555
                     default_idle_call+0x36/0x90 kernel/sched/idle.c:93
                     cpuidle_idle_call kernel/sched/idle.c:153 [inline]
                     do_idle+0x386/0x570 kernel/sched/idle.c:262
                     cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353
                     rest_init+0x245/0x37b init/main.c:442
                     arch_call_rest_init+0xe/0x1b
                     start_kernel+0x808/0x841 init/main.c:740
                     x86_64_start_reservations+0x29/0x2b  
arch/x86/kernel/head64.c:470
                     x86_64_start_kernel+0x77/0x7b  
arch/x86/kernel/head64.c:451
                     secondary_startup_64+0xa4/0xb0  
arch/x86/kernel/head_64.S:243
    INITIAL USE at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                    __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128  
[inline]
                    _raw_spin_lock_irq+0x60/0x80  
kernel/locking/spinlock.c:160
                    spin_lock_irq include/linux/spinlock.h:354 [inline]
                    free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
                    percpu_ref_put_many include/linux/percpu-refcount.h:285  
[inline]
                    percpu_ref_put include/linux/percpu-refcount.h:301  
[inline]
                    percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123  
[inline]
                    percpu_ref_switch_to_atomic_rcu+0x3e7/0x520  
lib/percpu-refcount.c:158
                    __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
                    rcu_do_batch kernel/rcu/tree.c:2452 [inline]
                    invoke_rcu_callbacks kernel/rcu/tree.c:2773 [inline]
                    rcu_process_callbacks+0x928/0x1390 kernel/rcu/tree.c:2754
                    __do_softirq+0x266/0x95a kernel/softirq.c:292
                    invoke_softirq kernel/softirq.c:373 [inline]
                    irq_exit+0x180/0x1d0 kernel/softirq.c:413
                    exiting_irq arch/x86/include/asm/apic.h:536 [inline]
                    smp_apic_timer_interrupt+0x14a/0x570  
arch/x86/kernel/apic/apic.c:1062
                    apic_timer_interrupt+0xf/0x20  
arch/x86/entry/entry_64.S:807
                    native_safe_halt+0x2/0x10  
arch/x86/include/asm/irqflags.h:57
                    arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555
                    default_idle_call+0x36/0x90 kernel/sched/idle.c:93
                    cpuidle_idle_call kernel/sched/idle.c:153 [inline]
                    do_idle+0x386/0x570 kernel/sched/idle.c:262
                    cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353
                    start_secondary+0x404/0x5c0 arch/x86/kernel/smpboot.c:271
                    secondary_startup_64+0xa4/0xb0  
arch/x86/kernel/head_64.S:243
  }
  ... key      at: [<ffffffff8a5760a0>] __key.51972+0x0/0x40
  ... acquired at:
    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
    spin_lock include/linux/spinlock.h:329 [inline]
    aio_poll fs/aio.c:1772 [inline]
    __io_submit_one fs/aio.c:1875 [inline]
    io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
    __do_sys_io_submit fs/aio.c:1953 [inline]
    __se_sys_io_submit fs/aio.c:1923 [inline]
    __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe


the dependencies between the lock to be acquired
  and SOFTIRQ-irq-unsafe lock:
  -> (&ctx->fault_pending_wqh){+.+.} {
     HARDIRQ-ON-W at:
                       lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                       __raw_spin_lock include/linux/spinlock_api_smp.h:142  
[inline]
                       _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                       spin_lock include/linux/spinlock.h:329 [inline]
                       userfaultfd_release+0x497/0x6d0 fs/userfaultfd.c:916
                       __fput+0x2df/0x8d0 fs/file_table.c:278
                       ____fput+0x16/0x20 fs/file_table.c:309
                       task_work_run+0x14a/0x1c0 kernel/task_work.c:113
                       tracehook_notify_resume include/linux/tracehook.h:188  
[inline]
                       exit_to_usermode_loop+0x273/0x2c0  
arch/x86/entry/common.c:166
                       prepare_exit_to_usermode arch/x86/entry/common.c:197  
[inline]
                       syscall_return_slowpath arch/x86/entry/common.c:268  
[inline]
                       do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
                       entry_SYSCALL_64_after_hwframe+0x49/0xbe
     SOFTIRQ-ON-W at:
                       lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                       __raw_spin_lock include/linux/spinlock_api_smp.h:142  
[inline]
                       _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                       spin_lock include/linux/spinlock.h:329 [inline]
                       userfaultfd_release+0x497/0x6d0 fs/userfaultfd.c:916
                       __fput+0x2df/0x8d0 fs/file_table.c:278
                       ____fput+0x16/0x20 fs/file_table.c:309
                       task_work_run+0x14a/0x1c0 kernel/task_work.c:113
                       tracehook_notify_resume include/linux/tracehook.h:188  
[inline]
                       exit_to_usermode_loop+0x273/0x2c0  
arch/x86/entry/common.c:166
                       prepare_exit_to_usermode arch/x86/entry/common.c:197  
[inline]
                       syscall_return_slowpath arch/x86/entry/common.c:268  
[inline]
                       do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
                       entry_SYSCALL_64_after_hwframe+0x49/0xbe
     INITIAL USE at:
                      lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                      __raw_spin_lock include/linux/spinlock_api_smp.h:142  
[inline]
                      _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                      spin_lock include/linux/spinlock.h:329 [inline]
                      userfaultfd_ctx_read fs/userfaultfd.c:1040 [inline]
                      userfaultfd_read+0x540/0x1940 fs/userfaultfd.c:1198
                      __vfs_read+0x116/0x8c0 fs/read_write.c:416
                      vfs_read+0x194/0x3e0 fs/read_write.c:452
                      ksys_read+0xea/0x1f0 fs/read_write.c:578
                      __do_sys_read fs/read_write.c:588 [inline]
                      __se_sys_read fs/read_write.c:586 [inline]
                      __x64_sys_read+0x73/0xb0 fs/read_write.c:586
                      do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                      entry_SYSCALL_64_after_hwframe+0x49/0xbe
   }
   ... key      at: [<ffffffff8a575ee0>] __key.44851+0x0/0x40
   ... acquired at:
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
    spin_lock include/linux/spinlock.h:329 [inline]
    userfaultfd_ctx_read fs/userfaultfd.c:1040 [inline]
    userfaultfd_read+0x540/0x1940 fs/userfaultfd.c:1198
    __vfs_read+0x116/0x8c0 fs/read_write.c:416
    vfs_read+0x194/0x3e0 fs/read_write.c:452
    ksys_read+0xea/0x1f0 fs/read_write.c:578
    __do_sys_read fs/read_write.c:588 [inline]
    __se_sys_read fs/read_write.c:586 [inline]
    __x64_sys_read+0x73/0xb0 fs/read_write.c:586
    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe

-> (&ctx->fd_wqh){....} {
    INITIAL USE at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                    __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128  
[inline]
                    _raw_spin_lock_irq+0x60/0x80  
kernel/locking/spinlock.c:160
                    spin_lock_irq include/linux/spinlock.h:354 [inline]
                    userfaultfd_ctx_read fs/userfaultfd.c:1036 [inline]
                    userfaultfd_read+0x27a/0x1940 fs/userfaultfd.c:1198
                    __vfs_read+0x116/0x8c0 fs/read_write.c:416
                    vfs_read+0x194/0x3e0 fs/read_write.c:452
                    ksys_read+0xea/0x1f0 fs/read_write.c:578
                    __do_sys_read fs/read_write.c:588 [inline]
                    __se_sys_read fs/read_write.c:586 [inline]
                    __x64_sys_read+0x73/0xb0 fs/read_write.c:586
                    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                    entry_SYSCALL_64_after_hwframe+0x49/0xbe
  }
  ... key      at: [<ffffffff8a575e20>] __key.44854+0x0/0x40
  ... acquired at:
    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
    spin_lock include/linux/spinlock.h:329 [inline]
    aio_poll fs/aio.c:1772 [inline]
    __io_submit_one fs/aio.c:1875 [inline]
    io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
    __do_sys_io_submit fs/aio.c:1953 [inline]
    __se_sys_io_submit fs/aio.c:1923 [inline]
    __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe


stack backtrace:
CPU: 1 PID: 8874 Comm: syz-executor263 Not tainted 5.0.0-rc4+ #56
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x172/0x1f0 lib/dump_stack.c:113
  print_bad_irq_dependency kernel/locking/lockdep.c:1573 [inline]
  check_usage.cold+0x60f/0x940 kernel/locking/lockdep.c:1605
  check_irq_usage kernel/locking/lockdep.c:1661 [inline]
  check_prev_add_irq kernel/locking/lockdep_states.h:8 [inline]
  check_prev_add kernel/locking/lockdep.c:1871 [inline]
  check_prevs_add kernel/locking/lockdep.c:1979 [inline]
  validate_chain kernel/locking/lockdep.c:2350 [inline]
  __lock_acquire+0x1f47/0x4700 kernel/locking/lockdep.c:3338
  lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
  spin_lock include/linux/spinlock.h:329 [inline]
  aio_poll fs/aio.c:1772 [inline]
  __io_submit_one fs/aio.c:1875 [inline]
  io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
  __do_sys_io_submit fs/aio.c:1953 [inline]
  __se_sys_io_submit fs/aio.c:1923 [inline]
  __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: possible deadlock in io_submit_one
  2019-02-05  2:03 possible deadlock in io_submit_one syzbot
@ 2019-03-11 13:23 ` syzbot
  2019-06-12 19:48 ` Eric Biggers
  1 sibling, 0 replies; 4+ messages in thread
From: syzbot @ 2019-03-11 13:23 UTC (permalink / raw)
  To: adam.manzanares, avi, bcrl, darrick.wong, gregkh, hch, jmoyer,
	linux-aio, linux-fsdevel, linux-kernel, syzkaller-bugs, viro

syzbot has bisected this bug to:

commit bfe4037e722ec672c9dafd5730d9132afeeb76e9
Author: Christoph Hellwig <hch@lst.de>
Date:   Mon Jul 16 07:08:20 2018 +0000

     aio: implement IOCB_CMD_POLL

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=108886bd200000
start commit:   bfe4037e aio: implement IOCB_CMD_POLL
git tree:       upstream
final crash:    https://syzkaller.appspot.com/x/report.txt?x=128886bd200000
console output: https://syzkaller.appspot.com/x/log.txt?x=148886bd200000
kernel config:  https://syzkaller.appspot.com/x/.config?x=2e0064f906afee10
dashboard link: https://syzkaller.appspot.com/bug?extid=a3accb352f9c22041cfa
userspace arch: amd64
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=156479f8c00000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=128c75c4c00000

Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com
Fixes: bfe4037e ("aio: implement IOCB_CMD_POLL")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: possible deadlock in io_submit_one
  2019-02-05  2:03 possible deadlock in io_submit_one syzbot
  2019-03-11 13:23 ` syzbot
@ 2019-06-12 19:48 ` Eric Biggers
  2019-06-27  7:50   ` [PATCH] userfaultfd: disable irqs for fault_pending and event locks Eric Biggers
  1 sibling, 1 reply; 4+ messages in thread
From: Eric Biggers @ 2019-06-12 19:48 UTC (permalink / raw)
  To: Bart Van Assche, Christoph Hellwig
  Cc: syzbot, bcrl, linux-aio, linux-fsdevel, linux-kernel,
	syzkaller-bugs, viro

Hi Bart and Christoph,

On Mon, Feb 04, 2019 at 06:03:04PM -0800, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:    5eeb63359b1e Merge tag 'for-linus' of git://git.kernel.org..
> git tree:       upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=17906f64c00000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=2e0064f906afee10
> dashboard link: https://syzkaller.appspot.com/bug?extid=a3accb352f9c22041cfa
> compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=156479f8c00000
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=128c75c4c00000
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com
> 
> =====================================================
> WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
> 5.0.0-rc4+ #56 Not tainted
> -----------------------------------------------------
> syz-executor263/8874 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
> 00000000c469f622 (&ctx->fd_wqh){....}, at: spin_lock
> include/linux/spinlock.h:329 [inline]
> 00000000c469f622 (&ctx->fd_wqh){....}, at: aio_poll fs/aio.c:1772 [inline]
> 00000000c469f622 (&ctx->fd_wqh){....}, at: __io_submit_one fs/aio.c:1875
> [inline]
> 00000000c469f622 (&ctx->fd_wqh){....}, at: io_submit_one+0xedf/0x1cf0
> fs/aio.c:1908
> 
> and this task is already holding:
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq
> include/linux/spinlock.h:354 [inline]
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll
> fs/aio.c:1771 [inline]
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one
> fs/aio.c:1875 [inline]
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at:
> io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908
> which would create a new lock dependency:
>  (&(&ctx->ctx_lock)->rlock){..-.} -> (&ctx->fd_wqh){....}
> 

This is still happening.  See
https://syzkaller.appspot.com/text?tag=CrashReport&x=129eb971a00000 for a report
on Linus' tree from 5 days ago.

I see that a few months ago there was a commit

	commit d3d6a18d7d351cbcc9b33dbedf710e65f8ce1595
	Author: Bart Van Assche <bvanassche@acm.org>
	Date:   Fri Feb 8 16:59:49 2019 -0800

	    aio: Fix locking in aio_poll()

but apparently it didn't fully fix the problem.

- Eric

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] userfaultfd: disable irqs for fault_pending and event locks
  2019-06-12 19:48 ` Eric Biggers
@ 2019-06-27  7:50   ` Eric Biggers
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Biggers @ 2019-06-27  7:50 UTC (permalink / raw)
  To: linux-mm, Andrew Morton
  Cc: linux-aio, linux-fsdevel, syzkaller-bugs, Christoph Hellwig,
	Andrea Arcangeli

From: Eric Biggers <ebiggers@google.com>

When IOCB_CMD_POLL is used on a userfaultfd, aio_poll() disables IRQs
and takes kioctx::ctx_lock, then userfaultfd_ctx::fd_wqh.lock.  This may
have to wait for userfaultfd_ctx::fd_wqh.lock to be released by
userfaultfd_ctx_read(), which can be waiting for
userfaultfd_ctx::fault_pending_wqh.lock or
userfaultfd_ctx::event_wqh.lock.  But elsewhere the fault_pending_wqh
and event_wqh locks are taken with IRQs enabled.  Since the IRQ handler
may take kioctx::ctx_lock, lockdep reports that a deadlock is possible.

Fix it by always disabling IRQs when taking the fault_pending_wqh and
event_wqh locks.

Commit ae62c16e105a ("userfaultfd: disable irqs when taking the
waitqueue lock") didn't fix this because it only accounted for the
fd_wqh lock, not the other locks nested inside it.

Reported-by: syzbot+fab6de82892b6b9c6191@syzkaller.appspotmail.com
Reported-by: syzbot+53c0b767f7ca0dc0c451@syzkaller.appspotmail.com
Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com
Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL")
Cc: <stable@vger.kernel.org> # v4.19+
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/userfaultfd.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ae0b8b5f69e6..ccbdbd62f0d8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -40,6 +40,16 @@ enum userfaultfd_state {
 /*
  * Start with fault_pending_wqh and fault_wqh so they're more likely
  * to be in the same cacheline.
+ *
+ * Locking order:
+ *	fd_wqh.lock
+ *		fault_pending_wqh.lock
+ *			fault_wqh.lock
+ *		event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
  */
 struct userfaultfd_ctx {
 	/* waitqueue head for the pending (i.e. not read) userfaults */
@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
 			 TASK_KILLABLE;
 
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	/*
 	 * After the __add_wait_queue the uwq is visible to userland
 	 * through poll/read().
@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * __add_wait_queue.
 	 */
 	set_current_state(blocking_state);
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	if (!is_vm_hugetlb_page(vmf->vma))
 		must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * kernel stack can be released after the list_del_init.
 	 */
 	if (!list_empty_careful(&uwq.wq.entry)) {
-		spin_lock(&ctx->fault_pending_wqh.lock);
+		spin_lock_irq(&ctx->fault_pending_wqh.lock);
 		/*
 		 * No need of list_del_init(), the uwq on the stack
 		 * will be freed shortly anyway.
 		 */
 		list_del(&uwq.wq.entry);
-		spin_unlock(&ctx->fault_pending_wqh.lock);
+		spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 	}
 
 	/*
@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	init_waitqueue_entry(&ewq->wq, current);
 	release_new_ctx = NULL;
 
-	spin_lock(&ctx->event_wqh.lock);
+	spin_lock_irq(&ctx->event_wqh.lock);
 	/*
 	 * After the __add_wait_queue the uwq is visible to userland
 	 * through poll/read().
@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 			break;
 		}
 
-		spin_unlock(&ctx->event_wqh.lock);
+		spin_unlock_irq(&ctx->event_wqh.lock);
 
 		wake_up_poll(&ctx->fd_wqh, EPOLLIN);
 		schedule();
 
-		spin_lock(&ctx->event_wqh.lock);
+		spin_lock_irq(&ctx->event_wqh.lock);
 	}
 	__set_current_state(TASK_RUNNING);
-	spin_unlock(&ctx->event_wqh.lock);
+	spin_unlock_irq(&ctx->event_wqh.lock);
 
 	if (release_new_ctx) {
 		struct vm_area_struct *vma;
@@ -918,10 +928,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	 * the last page faults that may have been already waiting on
 	 * the fault_*wqh.
 	 */
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
 	__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	/* Flush pending events that may still wait on event_wqh */
 	wake_up_all(&ctx->event_wqh);
@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 
 	if (!ret && msg->event == UFFD_EVENT_FORK) {
 		ret = resolve_userfault_fork(ctx, fork_nctx, msg);
-		spin_lock(&ctx->event_wqh.lock);
+		spin_lock_irq(&ctx->event_wqh.lock);
 		if (!list_empty(&fork_event)) {
 			/*
 			 * The fork thread didn't abort, so we can
@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			if (ret)
 				userfaultfd_ctx_put(fork_nctx);
 		}
-		spin_unlock(&ctx->event_wqh.lock);
+		spin_unlock_irq(&ctx->event_wqh.lock);
 	}
 
 	return ret;
@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 static void __wake_userfault(struct userfaultfd_ctx *ctx,
 			     struct userfaultfd_wake_range *range)
 {
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	/* wake all in the range and autoremove */
 	if (waitqueue_active(&ctx->fault_pending_wqh))
 		__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
 				     range);
 	if (waitqueue_active(&ctx->fault_wqh))
 		__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 }
 
 static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	wait_queue_entry_t *wq;
 	unsigned long pending = 0, total = 0;
 
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
 		pending++;
 		total++;
@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
 		total++;
 	}
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	/*
 	 * If more protocols will be added, there will be all shown
-- 
2.22.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-06-27  7:55 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-05  2:03 possible deadlock in io_submit_one syzbot
2019-03-11 13:23 ` syzbot
2019-06-12 19:48 ` Eric Biggers
2019-06-27  7:50   ` [PATCH] userfaultfd: disable irqs for fault_pending and event locks Eric Biggers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).