All of lore.kernel.org
 help / color / mirror / Atom feed
* possible deadlock in io_submit_one
@ 2019-02-05  2:03 syzbot
  2019-03-11 13:23 ` syzbot
  2019-06-12 19:48 ` Eric Biggers
  0 siblings, 2 replies; 4+ messages in thread
From: syzbot @ 2019-02-05  2:03 UTC (permalink / raw)
  To: bcrl, linux-aio, linux-fsdevel, linux-kernel, syzkaller-bugs, viro

Hello,

syzbot found the following crash on:

HEAD commit:    5eeb63359b1e Merge tag 'for-linus' of git://git.kernel.org..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=17906f64c00000
kernel config:  https://syzkaller.appspot.com/x/.config?x=2e0064f906afee10
dashboard link: https://syzkaller.appspot.com/bug?extid=a3accb352f9c22041cfa
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=156479f8c00000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=128c75c4c00000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com

=====================================================
WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
5.0.0-rc4+ #56 Not tainted
-----------------------------------------------------
syz-executor263/8874 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
00000000c469f622 (&ctx->fd_wqh){....}, at: spin_lock  
include/linux/spinlock.h:329 [inline]
00000000c469f622 (&ctx->fd_wqh){....}, at: aio_poll fs/aio.c:1772 [inline]
00000000c469f622 (&ctx->fd_wqh){....}, at: __io_submit_one fs/aio.c:1875  
[inline]
00000000c469f622 (&ctx->fd_wqh){....}, at: io_submit_one+0xedf/0x1cf0  
fs/aio.c:1908

and this task is already holding:
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq  
include/linux/spinlock.h:354 [inline]
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll  
fs/aio.c:1771 [inline]
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one  
fs/aio.c:1875 [inline]
00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at:  
io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908
which would create a new lock dependency:
  (&(&ctx->ctx_lock)->rlock){..-.} -> (&ctx->fd_wqh){....}

but this new dependency connects a SOFTIRQ-irq-safe lock:
  (&(&ctx->ctx_lock)->rlock){..-.}

... which became SOFTIRQ-irq-safe at:
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
   __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline]
   _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160
   spin_lock_irq include/linux/spinlock.h:354 [inline]
   free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
   percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline]
   percpu_ref_put include/linux/percpu-refcount.h:301 [inline]
   percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline]
   percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158
   __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
   rcu_do_batch kernel/rcu/tree.c:2452 [inline]
   invoke_rcu_callbacks kernel/rcu/tree.c:2773 [inline]
   rcu_process_callbacks+0x928/0x1390 kernel/rcu/tree.c:2754
   __do_softirq+0x266/0x95a kernel/softirq.c:292
   invoke_softirq kernel/softirq.c:373 [inline]
   irq_exit+0x180/0x1d0 kernel/softirq.c:413
   exiting_irq arch/x86/include/asm/apic.h:536 [inline]
   smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062
   apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807
   native_safe_halt+0x2/0x10 arch/x86/include/asm/irqflags.h:57
   arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555
   default_idle_call+0x36/0x90 kernel/sched/idle.c:93
   cpuidle_idle_call kernel/sched/idle.c:153 [inline]
   do_idle+0x386/0x570 kernel/sched/idle.c:262
   cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353
   rest_init+0x245/0x37b init/main.c:442
   arch_call_rest_init+0xe/0x1b
   start_kernel+0x808/0x841 init/main.c:740
   x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:470
   x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:451
   secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243

to a SOFTIRQ-irq-unsafe lock:
  (&ctx->fault_pending_wqh){+.+.}

... which became SOFTIRQ-irq-unsafe at:
...
   lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
   spin_lock include/linux/spinlock.h:329 [inline]
   userfaultfd_release+0x497/0x6d0 fs/userfaultfd.c:916
   __fput+0x2df/0x8d0 fs/file_table.c:278
   ____fput+0x16/0x20 fs/file_table.c:309
   task_work_run+0x14a/0x1c0 kernel/task_work.c:113
   tracehook_notify_resume include/linux/tracehook.h:188 [inline]
   exit_to_usermode_loop+0x273/0x2c0 arch/x86/entry/common.c:166
   prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
   syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
   do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
   &(&ctx->ctx_lock)->rlock --> &ctx->fd_wqh --> &ctx->fault_pending_wqh

  Possible interrupt unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&ctx->fault_pending_wqh);
                                local_irq_disable();
                                lock(&(&ctx->ctx_lock)->rlock);
                                lock(&ctx->fd_wqh);
   <Interrupt>
     lock(&(&ctx->ctx_lock)->rlock);

  *** DEADLOCK ***

1 lock held by syz-executor263/8874:
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq  
include/linux/spinlock.h:354 [inline]
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll  
fs/aio.c:1771 [inline]
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one  
fs/aio.c:1875 [inline]
  #0: 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at:  
io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908

the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
-> (&(&ctx->ctx_lock)->rlock){..-.} {
    IN-SOFTIRQ-W at:
                     lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                     __raw_spin_lock_irq  
include/linux/spinlock_api_smp.h:128 [inline]
                     _raw_spin_lock_irq+0x60/0x80  
kernel/locking/spinlock.c:160
                     spin_lock_irq include/linux/spinlock.h:354 [inline]
                     free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
                     percpu_ref_put_many include/linux/percpu-refcount.h:285  
[inline]
                     percpu_ref_put include/linux/percpu-refcount.h:301  
[inline]
                     percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123  
[inline]
                     percpu_ref_switch_to_atomic_rcu+0x3e7/0x520  
lib/percpu-refcount.c:158
                     __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
                     rcu_do_batch kernel/rcu/tree.c:2452 [inline]
                     invoke_rcu_callbacks kernel/rcu/tree.c:2773 [inline]
                     rcu_process_callbacks+0x928/0x1390  
kernel/rcu/tree.c:2754
                     __do_softirq+0x266/0x95a kernel/softirq.c:292
                     invoke_softirq kernel/softirq.c:373 [inline]
                     irq_exit+0x180/0x1d0 kernel/softirq.c:413
                     exiting_irq arch/x86/include/asm/apic.h:536 [inline]
                     smp_apic_timer_interrupt+0x14a/0x570  
arch/x86/kernel/apic/apic.c:1062
                     apic_timer_interrupt+0xf/0x20  
arch/x86/entry/entry_64.S:807
                     native_safe_halt+0x2/0x10  
arch/x86/include/asm/irqflags.h:57
                     arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555
                     default_idle_call+0x36/0x90 kernel/sched/idle.c:93
                     cpuidle_idle_call kernel/sched/idle.c:153 [inline]
                     do_idle+0x386/0x570 kernel/sched/idle.c:262
                     cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353
                     rest_init+0x245/0x37b init/main.c:442
                     arch_call_rest_init+0xe/0x1b
                     start_kernel+0x808/0x841 init/main.c:740
                     x86_64_start_reservations+0x29/0x2b  
arch/x86/kernel/head64.c:470
                     x86_64_start_kernel+0x77/0x7b  
arch/x86/kernel/head64.c:451
                     secondary_startup_64+0xa4/0xb0  
arch/x86/kernel/head_64.S:243
    INITIAL USE at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                    __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128  
[inline]
                    _raw_spin_lock_irq+0x60/0x80  
kernel/locking/spinlock.c:160
                    spin_lock_irq include/linux/spinlock.h:354 [inline]
                    free_ioctx_users+0x2d/0x4a0 fs/aio.c:610
                    percpu_ref_put_many include/linux/percpu-refcount.h:285  
[inline]
                    percpu_ref_put include/linux/percpu-refcount.h:301  
[inline]
                    percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123  
[inline]
                    percpu_ref_switch_to_atomic_rcu+0x3e7/0x520  
lib/percpu-refcount.c:158
                    __rcu_reclaim kernel/rcu/rcu.h:240 [inline]
                    rcu_do_batch kernel/rcu/tree.c:2452 [inline]
                    invoke_rcu_callbacks kernel/rcu/tree.c:2773 [inline]
                    rcu_process_callbacks+0x928/0x1390 kernel/rcu/tree.c:2754
                    __do_softirq+0x266/0x95a kernel/softirq.c:292
                    invoke_softirq kernel/softirq.c:373 [inline]
                    irq_exit+0x180/0x1d0 kernel/softirq.c:413
                    exiting_irq arch/x86/include/asm/apic.h:536 [inline]
                    smp_apic_timer_interrupt+0x14a/0x570  
arch/x86/kernel/apic/apic.c:1062
                    apic_timer_interrupt+0xf/0x20  
arch/x86/entry/entry_64.S:807
                    native_safe_halt+0x2/0x10  
arch/x86/include/asm/irqflags.h:57
                    arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:555
                    default_idle_call+0x36/0x90 kernel/sched/idle.c:93
                    cpuidle_idle_call kernel/sched/idle.c:153 [inline]
                    do_idle+0x386/0x570 kernel/sched/idle.c:262
                    cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:353
                    start_secondary+0x404/0x5c0 arch/x86/kernel/smpboot.c:271
                    secondary_startup_64+0xa4/0xb0  
arch/x86/kernel/head_64.S:243
  }
  ... key      at: [<ffffffff8a5760a0>] __key.51972+0x0/0x40
  ... acquired at:
    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
    spin_lock include/linux/spinlock.h:329 [inline]
    aio_poll fs/aio.c:1772 [inline]
    __io_submit_one fs/aio.c:1875 [inline]
    io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
    __do_sys_io_submit fs/aio.c:1953 [inline]
    __se_sys_io_submit fs/aio.c:1923 [inline]
    __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe


the dependencies between the lock to be acquired
  and SOFTIRQ-irq-unsafe lock:
  -> (&ctx->fault_pending_wqh){+.+.} {
     HARDIRQ-ON-W at:
                       lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                       __raw_spin_lock include/linux/spinlock_api_smp.h:142  
[inline]
                       _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                       spin_lock include/linux/spinlock.h:329 [inline]
                       userfaultfd_release+0x497/0x6d0 fs/userfaultfd.c:916
                       __fput+0x2df/0x8d0 fs/file_table.c:278
                       ____fput+0x16/0x20 fs/file_table.c:309
                       task_work_run+0x14a/0x1c0 kernel/task_work.c:113
                       tracehook_notify_resume include/linux/tracehook.h:188  
[inline]
                       exit_to_usermode_loop+0x273/0x2c0  
arch/x86/entry/common.c:166
                       prepare_exit_to_usermode arch/x86/entry/common.c:197  
[inline]
                       syscall_return_slowpath arch/x86/entry/common.c:268  
[inline]
                       do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
                       entry_SYSCALL_64_after_hwframe+0x49/0xbe
     SOFTIRQ-ON-W at:
                       lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                       __raw_spin_lock include/linux/spinlock_api_smp.h:142  
[inline]
                       _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                       spin_lock include/linux/spinlock.h:329 [inline]
                       userfaultfd_release+0x497/0x6d0 fs/userfaultfd.c:916
                       __fput+0x2df/0x8d0 fs/file_table.c:278
                       ____fput+0x16/0x20 fs/file_table.c:309
                       task_work_run+0x14a/0x1c0 kernel/task_work.c:113
                       tracehook_notify_resume include/linux/tracehook.h:188  
[inline]
                       exit_to_usermode_loop+0x273/0x2c0  
arch/x86/entry/common.c:166
                       prepare_exit_to_usermode arch/x86/entry/common.c:197  
[inline]
                       syscall_return_slowpath arch/x86/entry/common.c:268  
[inline]
                       do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293
                       entry_SYSCALL_64_after_hwframe+0x49/0xbe
     INITIAL USE at:
                      lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                      __raw_spin_lock include/linux/spinlock_api_smp.h:142  
[inline]
                      _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
                      spin_lock include/linux/spinlock.h:329 [inline]
                      userfaultfd_ctx_read fs/userfaultfd.c:1040 [inline]
                      userfaultfd_read+0x540/0x1940 fs/userfaultfd.c:1198
                      __vfs_read+0x116/0x8c0 fs/read_write.c:416
                      vfs_read+0x194/0x3e0 fs/read_write.c:452
                      ksys_read+0xea/0x1f0 fs/read_write.c:578
                      __do_sys_read fs/read_write.c:588 [inline]
                      __se_sys_read fs/read_write.c:586 [inline]
                      __x64_sys_read+0x73/0xb0 fs/read_write.c:586
                      do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                      entry_SYSCALL_64_after_hwframe+0x49/0xbe
   }
   ... key      at: [<ffffffff8a575ee0>] __key.44851+0x0/0x40
   ... acquired at:
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
    spin_lock include/linux/spinlock.h:329 [inline]
    userfaultfd_ctx_read fs/userfaultfd.c:1040 [inline]
    userfaultfd_read+0x540/0x1940 fs/userfaultfd.c:1198
    __vfs_read+0x116/0x8c0 fs/read_write.c:416
    vfs_read+0x194/0x3e0 fs/read_write.c:452
    ksys_read+0xea/0x1f0 fs/read_write.c:578
    __do_sys_read fs/read_write.c:588 [inline]
    __se_sys_read fs/read_write.c:586 [inline]
    __x64_sys_read+0x73/0xb0 fs/read_write.c:586
    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe

-> (&ctx->fd_wqh){....} {
    INITIAL USE at:
                    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
                    __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128  
[inline]
                    _raw_spin_lock_irq+0x60/0x80  
kernel/locking/spinlock.c:160
                    spin_lock_irq include/linux/spinlock.h:354 [inline]
                    userfaultfd_ctx_read fs/userfaultfd.c:1036 [inline]
                    userfaultfd_read+0x27a/0x1940 fs/userfaultfd.c:1198
                    __vfs_read+0x116/0x8c0 fs/read_write.c:416
                    vfs_read+0x194/0x3e0 fs/read_write.c:452
                    ksys_read+0xea/0x1f0 fs/read_write.c:578
                    __do_sys_read fs/read_write.c:588 [inline]
                    __se_sys_read fs/read_write.c:586 [inline]
                    __x64_sys_read+0x73/0xb0 fs/read_write.c:586
                    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
                    entry_SYSCALL_64_after_hwframe+0x49/0xbe
  }
  ... key      at: [<ffffffff8a575e20>] __key.44854+0x0/0x40
  ... acquired at:
    lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
    spin_lock include/linux/spinlock.h:329 [inline]
    aio_poll fs/aio.c:1772 [inline]
    __io_submit_one fs/aio.c:1875 [inline]
    io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
    __do_sys_io_submit fs/aio.c:1953 [inline]
    __se_sys_io_submit fs/aio.c:1923 [inline]
    __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923
    do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe


stack backtrace:
CPU: 1 PID: 8874 Comm: syz-executor263 Not tainted 5.0.0-rc4+ #56
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x172/0x1f0 lib/dump_stack.c:113
  print_bad_irq_dependency kernel/locking/lockdep.c:1573 [inline]
  check_usage.cold+0x60f/0x940 kernel/locking/lockdep.c:1605
  check_irq_usage kernel/locking/lockdep.c:1661 [inline]
  check_prev_add_irq kernel/locking/lockdep_states.h:8 [inline]
  check_prev_add kernel/locking/lockdep.c:1871 [inline]
  check_prevs_add kernel/locking/lockdep.c:1979 [inline]
  validate_chain kernel/locking/lockdep.c:2350 [inline]
  __lock_acquire+0x1f47/0x4700 kernel/locking/lockdep.c:3338
  lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144
  spin_lock include/linux/spinlock.h:329 [inline]
  aio_poll fs/aio.c:1772 [inline]
  __io_submit_one fs/aio.c:1875 [inline]
  io_submit_one+0xedf/0x1cf0 fs/aio.c:1908
  __do_sys_io_submit fs/aio.c:1953 [inline]
  __se_sys_io_submit fs/aio.c:1923 [inline]
  __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: possible deadlock in io_submit_one
  2019-02-05  2:03 possible deadlock in io_submit_one syzbot
@ 2019-03-11 13:23 ` syzbot
  2019-06-12 19:48 ` Eric Biggers
  1 sibling, 0 replies; 4+ messages in thread
From: syzbot @ 2019-03-11 13:23 UTC (permalink / raw)
  To: adam.manzanares, avi, bcrl, darrick.wong, gregkh, hch, jmoyer,
	linux-aio, linux-fsdevel, linux-kernel, syzkaller-bugs, viro

syzbot has bisected this bug to:

commit bfe4037e722ec672c9dafd5730d9132afeeb76e9
Author: Christoph Hellwig <hch@lst.de>
Date:   Mon Jul 16 07:08:20 2018 +0000

     aio: implement IOCB_CMD_POLL

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=108886bd200000
start commit:   bfe4037e aio: implement IOCB_CMD_POLL
git tree:       upstream
final crash:    https://syzkaller.appspot.com/x/report.txt?x=128886bd200000
console output: https://syzkaller.appspot.com/x/log.txt?x=148886bd200000
kernel config:  https://syzkaller.appspot.com/x/.config?x=2e0064f906afee10
dashboard link: https://syzkaller.appspot.com/bug?extid=a3accb352f9c22041cfa
userspace arch: amd64
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=156479f8c00000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=128c75c4c00000

Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com
Fixes: bfe4037e ("aio: implement IOCB_CMD_POLL")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: possible deadlock in io_submit_one
  2019-02-05  2:03 possible deadlock in io_submit_one syzbot
  2019-03-11 13:23 ` syzbot
@ 2019-06-12 19:48 ` Eric Biggers
  2019-06-27  7:50   ` [PATCH] userfaultfd: disable irqs for fault_pending and event locks Eric Biggers
  1 sibling, 1 reply; 4+ messages in thread
From: Eric Biggers @ 2019-06-12 19:48 UTC (permalink / raw)
  To: Bart Van Assche, Christoph Hellwig
  Cc: syzbot, bcrl, linux-aio, linux-fsdevel, linux-kernel,
	syzkaller-bugs, viro

Hi Bart and Christoph,

On Mon, Feb 04, 2019 at 06:03:04PM -0800, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:    5eeb63359b1e Merge tag 'for-linus' of git://git.kernel.org..
> git tree:       upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=17906f64c00000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=2e0064f906afee10
> dashboard link: https://syzkaller.appspot.com/bug?extid=a3accb352f9c22041cfa
> compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=156479f8c00000
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=128c75c4c00000
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com
> 
> =====================================================
> WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
> 5.0.0-rc4+ #56 Not tainted
> -----------------------------------------------------
> syz-executor263/8874 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
> 00000000c469f622 (&ctx->fd_wqh){....}, at: spin_lock
> include/linux/spinlock.h:329 [inline]
> 00000000c469f622 (&ctx->fd_wqh){....}, at: aio_poll fs/aio.c:1772 [inline]
> 00000000c469f622 (&ctx->fd_wqh){....}, at: __io_submit_one fs/aio.c:1875
> [inline]
> 00000000c469f622 (&ctx->fd_wqh){....}, at: io_submit_one+0xedf/0x1cf0
> fs/aio.c:1908
> 
> and this task is already holding:
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq
> include/linux/spinlock.h:354 [inline]
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll
> fs/aio.c:1771 [inline]
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one
> fs/aio.c:1875 [inline]
> 00000000829de875 (&(&ctx->ctx_lock)->rlock){..-.}, at:
> io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908
> which would create a new lock dependency:
>  (&(&ctx->ctx_lock)->rlock){..-.} -> (&ctx->fd_wqh){....}
> 

This is still happening.  See
https://syzkaller.appspot.com/text?tag=CrashReport&x=129eb971a00000 for a report
on Linus' tree from 5 days ago.

I see that a few months ago there was a commit

	commit d3d6a18d7d351cbcc9b33dbedf710e65f8ce1595
	Author: Bart Van Assche <bvanassche@acm.org>
	Date:   Fri Feb 8 16:59:49 2019 -0800

	    aio: Fix locking in aio_poll()

but apparently it didn't fully fix the problem.

- Eric

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] userfaultfd: disable irqs for fault_pending and event locks
  2019-06-12 19:48 ` Eric Biggers
@ 2019-06-27  7:50   ` Eric Biggers
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Biggers @ 2019-06-27  7:50 UTC (permalink / raw)
  To: linux-mm, Andrew Morton
  Cc: linux-aio, linux-fsdevel, syzkaller-bugs, Christoph Hellwig,
	Andrea Arcangeli

From: Eric Biggers <ebiggers@google.com>

When IOCB_CMD_POLL is used on a userfaultfd, aio_poll() disables IRQs
and takes kioctx::ctx_lock, then userfaultfd_ctx::fd_wqh.lock.  This may
have to wait for userfaultfd_ctx::fd_wqh.lock to be released by
userfaultfd_ctx_read(), which can be waiting for
userfaultfd_ctx::fault_pending_wqh.lock or
userfaultfd_ctx::event_wqh.lock.  But elsewhere the fault_pending_wqh
and event_wqh locks are taken with IRQs enabled.  Since the IRQ handler
may take kioctx::ctx_lock, lockdep reports that a deadlock is possible.

Fix it by always disabling IRQs when taking the fault_pending_wqh and
event_wqh locks.

Commit ae62c16e105a ("userfaultfd: disable irqs when taking the
waitqueue lock") didn't fix this because it only accounted for the
fd_wqh lock, not the other locks nested inside it.

Reported-by: syzbot+fab6de82892b6b9c6191@syzkaller.appspotmail.com
Reported-by: syzbot+53c0b767f7ca0dc0c451@syzkaller.appspotmail.com
Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com
Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL")
Cc: <stable@vger.kernel.org> # v4.19+
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/userfaultfd.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ae0b8b5f69e6..ccbdbd62f0d8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -40,6 +40,16 @@ enum userfaultfd_state {
 /*
  * Start with fault_pending_wqh and fault_wqh so they're more likely
  * to be in the same cacheline.
+ *
+ * Locking order:
+ *	fd_wqh.lock
+ *		fault_pending_wqh.lock
+ *			fault_wqh.lock
+ *		event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
  */
 struct userfaultfd_ctx {
 	/* waitqueue head for the pending (i.e. not read) userfaults */
@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
 			 TASK_KILLABLE;
 
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	/*
 	 * After the __add_wait_queue the uwq is visible to userland
 	 * through poll/read().
@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * __add_wait_queue.
 	 */
 	set_current_state(blocking_state);
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	if (!is_vm_hugetlb_page(vmf->vma))
 		must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * kernel stack can be released after the list_del_init.
 	 */
 	if (!list_empty_careful(&uwq.wq.entry)) {
-		spin_lock(&ctx->fault_pending_wqh.lock);
+		spin_lock_irq(&ctx->fault_pending_wqh.lock);
 		/*
 		 * No need of list_del_init(), the uwq on the stack
 		 * will be freed shortly anyway.
 		 */
 		list_del(&uwq.wq.entry);
-		spin_unlock(&ctx->fault_pending_wqh.lock);
+		spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 	}
 
 	/*
@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	init_waitqueue_entry(&ewq->wq, current);
 	release_new_ctx = NULL;
 
-	spin_lock(&ctx->event_wqh.lock);
+	spin_lock_irq(&ctx->event_wqh.lock);
 	/*
 	 * After the __add_wait_queue the uwq is visible to userland
 	 * through poll/read().
@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 			break;
 		}
 
-		spin_unlock(&ctx->event_wqh.lock);
+		spin_unlock_irq(&ctx->event_wqh.lock);
 
 		wake_up_poll(&ctx->fd_wqh, EPOLLIN);
 		schedule();
 
-		spin_lock(&ctx->event_wqh.lock);
+		spin_lock_irq(&ctx->event_wqh.lock);
 	}
 	__set_current_state(TASK_RUNNING);
-	spin_unlock(&ctx->event_wqh.lock);
+	spin_unlock_irq(&ctx->event_wqh.lock);
 
 	if (release_new_ctx) {
 		struct vm_area_struct *vma;
@@ -918,10 +928,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	 * the last page faults that may have been already waiting on
 	 * the fault_*wqh.
 	 */
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
 	__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	/* Flush pending events that may still wait on event_wqh */
 	wake_up_all(&ctx->event_wqh);
@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 
 	if (!ret && msg->event == UFFD_EVENT_FORK) {
 		ret = resolve_userfault_fork(ctx, fork_nctx, msg);
-		spin_lock(&ctx->event_wqh.lock);
+		spin_lock_irq(&ctx->event_wqh.lock);
 		if (!list_empty(&fork_event)) {
 			/*
 			 * The fork thread didn't abort, so we can
@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			if (ret)
 				userfaultfd_ctx_put(fork_nctx);
 		}
-		spin_unlock(&ctx->event_wqh.lock);
+		spin_unlock_irq(&ctx->event_wqh.lock);
 	}
 
 	return ret;
@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 static void __wake_userfault(struct userfaultfd_ctx *ctx,
 			     struct userfaultfd_wake_range *range)
 {
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	/* wake all in the range and autoremove */
 	if (waitqueue_active(&ctx->fault_pending_wqh))
 		__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
 				     range);
 	if (waitqueue_active(&ctx->fault_wqh))
 		__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 }
 
 static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	wait_queue_entry_t *wq;
 	unsigned long pending = 0, total = 0;
 
-	spin_lock(&ctx->fault_pending_wqh.lock);
+	spin_lock_irq(&ctx->fault_pending_wqh.lock);
 	list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
 		pending++;
 		total++;
@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
 		total++;
 	}
-	spin_unlock(&ctx->fault_pending_wqh.lock);
+	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	/*
 	 * If more protocols will be added, there will be all shown
-- 
2.22.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-06-27  7:55 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-05  2:03 possible deadlock in io_submit_one syzbot
2019-03-11 13:23 ` syzbot
2019-06-12 19:48 ` Eric Biggers
2019-06-27  7:50   ` [PATCH] userfaultfd: disable irqs for fault_pending and event locks Eric Biggers

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.