linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* possible deadlock in send_sigurg
@ 2020-04-03  6:15 syzbot
  2020-04-03  9:11 ` Oleg Nesterov
  0 siblings, 1 reply; 6+ messages in thread
From: syzbot @ 2020-04-03  6:15 UTC (permalink / raw)
  To: adobriyan, akpm, allison, areber, aubrey.li, avagin, bfields,
	christian, cyphar, ebiederm, gregkh, guro, jlayton, joel,
	keescook, linmiaohe, linux-fsdevel, linux-kernel, mhocko, mingo,
	oleg, peterz, sargun, syzkaller-bugs, tglx, viro

Hello,

syzbot found the following crash on:

HEAD commit:    7be97138 Merge tag 'xfs-5.7-merge-8' of git://git.kernel.o..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=14952b6de00000
kernel config:  https://syzkaller.appspot.com/x/.config?x=d6a1e2f9a9986236
dashboard link: https://syzkaller.appspot.com/bug?extid=f675f964019f884dbd0f
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
userspace arch: i386
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=1643bf2fe00000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=13ef5733e00000

The bug was bisected to:

commit 7bc3e6e55acf065500a24621f3b313e7e5998acf
Author: Eric W. Biederman <ebiederm@xmission.com>
Date:   Thu Feb 20 00:22:26 2020 +0000

    proc: Use a list of inodes to flush from proc

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=11aa9747e00000
final crash:    https://syzkaller.appspot.com/x/report.txt?x=13aa9747e00000
console output: https://syzkaller.appspot.com/x/log.txt?x=15aa9747e00000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+f675f964019f884dbd0f@syzkaller.appspotmail.com
Fixes: 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")

========================================================
WARNING: possible irq lock inversion dependency detected
5.6.0-syzkaller #0 Not tainted
--------------------------------------------------------
swapper/1/0 just changed the state of lock:
ffffffff898090d8 (tasklist_lock){.+.?}-{2:2}, at: send_sigurg+0x9f/0x320 fs/fcntl.c:840
but this lock took another, SOFTIRQ-unsafe lock in the past:
 (&pid->wait_pidfd){+.+.}-{2:2}


and interrupts could create inverse lock ordering between them.


other info that might help us debug this:
 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&pid->wait_pidfd);
                               local_irq_disable();
                               lock(tasklist_lock);
                               lock(&pid->wait_pidfd);
  <Interrupt>
    lock(tasklist_lock);

 *** DEADLOCK ***

4 locks held by swapper/1/0:
 #0: ffffffff899bacc0 (rcu_read_lock){....}-{1:2}, at: __write_once_size include/linux/compiler.h:226 [inline]
 #0: ffffffff899bacc0 (rcu_read_lock){....}-{1:2}, at: __skb_unlink include/linux/skbuff.h:2078 [inline]
 #0: ffffffff899bacc0 (rcu_read_lock){....}-{1:2}, at: __skb_dequeue include/linux/skbuff.h:2093 [inline]
 #0: ffffffff899bacc0 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x1ad/0x7a0 net/core/dev.c:6131
 #1: ffffffff899bacc0 (rcu_read_lock){....}-{1:2}, at: __skb_pull include/linux/skbuff.h:2309 [inline]
 #1: ffffffff899bacc0 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x124/0x360 net/ipv4/ip_input.c:228
 #2: ffff888093e42de0 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x2d09/0x39c0 net/ipv4/tcp_ipv4.c:1997
 #3: ffff8880950c23b8 (&f->f_owner.lock){.+.?}-{2:2}, at: send_sigurg+0x1a/0x320 fs/fcntl.c:824

the shortest dependencies between 2nd lock and 1st lock:
 -> (&pid->wait_pidfd){+.+.}-{2:2} {
    HARDIRQ-ON-W at:
                      lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                      __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                      _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
                      spin_lock include/linux/spinlock.h:353 [inline]
                      proc_pid_make_inode+0x1f9/0x3c0 fs/proc/base.c:1880
                      proc_pid_instantiate+0x51/0x150 fs/proc/base.c:3285
                      proc_pid_lookup+0x1da/0x340 fs/proc/base.c:3320
                      proc_root_lookup+0x20/0x60 fs/proc/root.c:243
                      __lookup_slow+0x256/0x490 fs/namei.c:1530
                      lookup_slow fs/namei.c:1547 [inline]
                      walk_component+0x418/0x6a0 fs/namei.c:1846
                      link_path_walk.part.0+0x4f1/0xb50 fs/namei.c:2166
                      link_path_walk fs/namei.c:2098 [inline]
                      path_openat+0x25a/0x27b0 fs/namei.c:3342
                      do_filp_open+0x203/0x260 fs/namei.c:3375
                      do_sys_openat2+0x585/0x770 fs/open.c:1148
                      do_sys_open+0xc3/0x140 fs/open.c:1164
                      do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
                      entry_SYSCALL_64_after_hwframe+0x49/0xb3
    SOFTIRQ-ON-W at:
                      lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                      __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
                      _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
                      spin_lock include/linux/spinlock.h:353 [inline]
                      proc_pid_make_inode+0x1f9/0x3c0 fs/proc/base.c:1880
                      proc_pid_instantiate+0x51/0x150 fs/proc/base.c:3285
                      proc_pid_lookup+0x1da/0x340 fs/proc/base.c:3320
                      proc_root_lookup+0x20/0x60 fs/proc/root.c:243
                      __lookup_slow+0x256/0x490 fs/namei.c:1530
                      lookup_slow fs/namei.c:1547 [inline]
                      walk_component+0x418/0x6a0 fs/namei.c:1846
                      link_path_walk.part.0+0x4f1/0xb50 fs/namei.c:2166
                      link_path_walk fs/namei.c:2098 [inline]
                      path_openat+0x25a/0x27b0 fs/namei.c:3342
                      do_filp_open+0x203/0x260 fs/namei.c:3375
                      do_sys_openat2+0x585/0x770 fs/open.c:1148
                      do_sys_open+0xc3/0x140 fs/open.c:1164
                      do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
                      entry_SYSCALL_64_after_hwframe+0x49/0xb3
    INITIAL USE at:
                     lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                     __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
                     _raw_spin_lock_irqsave+0x8c/0xbf kernel/locking/spinlock.c:159
                     __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:122
                     do_notify_pidfd kernel/signal.c:1900 [inline]
                     do_notify_parent+0x19e/0xe60 kernel/signal.c:1927
                     exit_notify kernel/exit.c:660 [inline]
                     do_exit+0x238f/0x2dd0 kernel/exit.c:816
                     call_usermodehelper_exec_async+0x507/0x710 kernel/umh.c:125
                     ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
  }
  ... key      at: [<ffffffff8bba4680>] __key.53714+0x0/0x40
  ... acquired at:
   __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
   _raw_spin_lock_irqsave+0x8c/0xbf kernel/locking/spinlock.c:159
   __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:122
   do_notify_pidfd kernel/signal.c:1900 [inline]
   do_notify_parent+0x19e/0xe60 kernel/signal.c:1927
   exit_notify kernel/exit.c:660 [inline]
   do_exit+0x238f/0x2dd0 kernel/exit.c:816
   call_usermodehelper_exec_async+0x507/0x710 kernel/umh.c:125
   ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352

-> (tasklist_lock){.+.?}-{2:2} {
   HARDIRQ-ON-R at:
                    lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                    __raw_read_lock include/linux/rwlock_api_smp.h:149 [inline]
                    _raw_read_lock+0x2d/0x40 kernel/locking/spinlock.c:223
                    do_wait+0x3b9/0xa00 kernel/exit.c:1436
                    kernel_wait4+0x14c/0x260 kernel/exit.c:1611
                    call_usermodehelper_exec_sync kernel/umh.c:150 [inline]
                    call_usermodehelper_exec_work+0x172/0x260 kernel/umh.c:187
                    process_one_work+0x965/0x16a0 kernel/workqueue.c:2266
                    worker_thread+0x96/0xe20 kernel/workqueue.c:2412
                    kthread+0x388/0x470 kernel/kthread.c:268
                    ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
   IN-SOFTIRQ-R at:
                    lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                    __raw_read_lock include/linux/rwlock_api_smp.h:149 [inline]
                    _raw_read_lock+0x2d/0x40 kernel/locking/spinlock.c:223
                    send_sigurg+0x9f/0x320 fs/fcntl.c:840
                    sk_send_sigurg+0x76/0x300 net/core/sock.c:2855
                    tcp_check_urg net/ipv4/tcp_input.c:5353 [inline]
                    tcp_urg+0x38c/0xb80 net/ipv4/tcp_input.c:5394
                    tcp_rcv_established+0x8f3/0x1d90 net/ipv4/tcp_input.c:5724
                    tcp_v4_do_rcv+0x605/0x8b0 net/ipv4/tcp_ipv4.c:1621
                    tcp_v4_rcv+0x2f60/0x39c0 net/ipv4/tcp_ipv4.c:2003
                    ip_protocol_deliver_rcu+0x57/0x880 net/ipv4/ip_input.c:204
                    ip_local_deliver_finish+0x220/0x360 net/ipv4/ip_input.c:231
                    NF_HOOK include/linux/netfilter.h:307 [inline]
                    NF_HOOK include/linux/netfilter.h:301 [inline]
                    ip_local_deliver+0x1c8/0x4e0 net/ipv4/ip_input.c:252
                    dst_input include/net/dst.h:441 [inline]
                    ip_rcv_finish+0x1da/0x2f0 net/ipv4/ip_input.c:428
                    NF_HOOK include/linux/netfilter.h:307 [inline]
                    NF_HOOK include/linux/netfilter.h:301 [inline]
                    ip_rcv+0xd0/0x3c0 net/ipv4/ip_input.c:539
                    __netif_receive_skb_one_core+0xf5/0x160 net/core/dev.c:5187
                    __netif_receive_skb+0x27/0x1c0 net/core/dev.c:5301
                    process_backlog+0x21e/0x7a0 net/core/dev.c:6133
                    napi_poll net/core/dev.c:6571 [inline]
                    net_rx_action+0x4c2/0x1070 net/core/dev.c:6639
                    __do_softirq+0x26c/0x9f7 kernel/softirq.c:292
                    invoke_softirq kernel/softirq.c:373 [inline]
                    irq_exit+0x192/0x1d0 kernel/softirq.c:413
                    exiting_irq arch/x86/include/asm/apic.h:546 [inline]
                    smp_apic_timer_interrupt+0x19e/0x600 arch/x86/kernel/apic/apic.c:1140
                    apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829
                    native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:60
                    arch_safe_halt arch/x86/include/asm/paravirt.h:144 [inline]
                    default_idle+0x49/0x350 arch/x86/kernel/process.c:697
                    cpuidle_idle_call kernel/sched/idle.c:154 [inline]
                    do_idle+0x393/0x690 kernel/sched/idle.c:269
                    cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:361
                    start_secondary+0x2f3/0x400 arch/x86/kernel/smpboot.c:268
                    secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242
   SOFTIRQ-ON-R at:
                    lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                    __raw_read_lock include/linux/rwlock_api_smp.h:149 [inline]
                    _raw_read_lock+0x2d/0x40 kernel/locking/spinlock.c:223
                    do_wait+0x3b9/0xa00 kernel/exit.c:1436
                    kernel_wait4+0x14c/0x260 kernel/exit.c:1611
                    call_usermodehelper_exec_sync kernel/umh.c:150 [inline]
                    call_usermodehelper_exec_work+0x172/0x260 kernel/umh.c:187
                    process_one_work+0x965/0x16a0 kernel/workqueue.c:2266
                    worker_thread+0x96/0xe20 kernel/workqueue.c:2412
                    kthread+0x388/0x470 kernel/kthread.c:268
                    ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
   INITIAL USE at:
                   lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
                   __raw_write_lock_irq include/linux/rwlock_api_smp.h:196 [inline]
                   _raw_write_lock_irq+0x5b/0x80 kernel/locking/spinlock.c:311
                   copy_process+0x3430/0x72c0 kernel/fork.c:2204
                   _do_fork+0x12d/0x1010 kernel/fork.c:2431
                   kernel_thread+0xb1/0xf0 kernel/fork.c:2518
                   rest_init+0x23/0x365 init/main.c:626
                   start_kernel+0x867/0x8a1 init/main.c:998
                   secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242
 }
 ... key      at: [<ffffffff898090d8>] tasklist_lock+0x18/0x40
 ... acquired at:
   mark_lock_irq kernel/locking/lockdep.c:3585 [inline]
   mark_lock+0x624/0xf10 kernel/locking/lockdep.c:3935
   mark_usage kernel/locking/lockdep.c:3826 [inline]
   __lock_acquire+0x1ed9/0x4e00 kernel/locking/lockdep.c:4298
   lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
   __raw_read_lock include/linux/rwlock_api_smp.h:149 [inline]
   _raw_read_lock+0x2d/0x40 kernel/locking/spinlock.c:223
   send_sigurg+0x9f/0x320 fs/fcntl.c:840
   sk_send_sigurg+0x76/0x300 net/core/sock.c:2855
   tcp_check_urg net/ipv4/tcp_input.c:5353 [inline]
   tcp_urg+0x38c/0xb80 net/ipv4/tcp_input.c:5394
   tcp_rcv_established+0x8f3/0x1d90 net/ipv4/tcp_input.c:5724
   tcp_v4_do_rcv+0x605/0x8b0 net/ipv4/tcp_ipv4.c:1621
   tcp_v4_rcv+0x2f60/0x39c0 net/ipv4/tcp_ipv4.c:2003
   ip_protocol_deliver_rcu+0x57/0x880 net/ipv4/ip_input.c:204
   ip_local_deliver_finish+0x220/0x360 net/ipv4/ip_input.c:231
   NF_HOOK include/linux/netfilter.h:307 [inline]
   NF_HOOK include/linux/netfilter.h:301 [inline]
   ip_local_deliver+0x1c8/0x4e0 net/ipv4/ip_input.c:252
   dst_input include/net/dst.h:441 [inline]
   ip_rcv_finish+0x1da/0x2f0 net/ipv4/ip_input.c:428
   NF_HOOK include/linux/netfilter.h:307 [inline]
   NF_HOOK include/linux/netfilter.h:301 [inline]
   ip_rcv+0xd0/0x3c0 net/ipv4/ip_input.c:539
   __netif_receive_skb_one_core+0xf5/0x160 net/core/dev.c:5187
   __netif_receive_skb+0x27/0x1c0 net/core/dev.c:5301
   process_backlog+0x21e/0x7a0 net/core/dev.c:6133
   napi_poll net/core/dev.c:6571 [inline]
   net_rx_action+0x4c2/0x1070 net/core/dev.c:6639
   __do_softirq+0x26c/0x9f7 kernel/softirq.c:292
   invoke_softirq kernel/softirq.c:373 [inline]
   irq_exit+0x192/0x1d0 kernel/softirq.c:413
   exiting_irq arch/x86/include/asm/apic.h:546 [inline]
   smp_apic_timer_interrupt+0x19e/0x600 arch/x86/kernel/apic/apic.c:1140
   apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829
   native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:60
   arch_safe_halt arch/x86/include/asm/paravirt.h:144 [inline]
   default_idle+0x49/0x350 arch/x86/kernel/process.c:697
   cpuidle_idle_call kernel/sched/idle.c:154 [inline]
   do_idle+0x393/0x690 kernel/sched/idle.c:269
   cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:361
   start_secondary+0x2f3/0x400 arch/x86/kernel/smpboot.c:268
   secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242


stack backtrace:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.6.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x188/0x20d lib/dump_stack.c:118
 print_irq_inversion_bug kernel/locking/lockdep.c:3448 [inline]
 check_usage_forwards.cold+0x20/0x29 kernel/locking/lockdep.c:3472
 mark_lock_irq kernel/locking/lockdep.c:3585 [inline]
 mark_lock+0x624/0xf10 kernel/locking/lockdep.c:3935
 mark_usage kernel/locking/lockdep.c:3826 [inline]
 __lock_acquire+0x1ed9/0x4e00 kernel/locking/lockdep.c:4298
 lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
 __raw_read_lock include/linux/rwlock_api_smp.h:149 [inline]
 _raw_read_lock+0x2d/0x40 kernel/locking/spinlock.c:223
 send_sigurg+0x9f/0x320 fs/fcntl.c:840
 sk_send_sigurg+0x76/0x300 net/core/sock.c:2855
 tcp_check_urg net/ipv4/tcp_input.c:5353 [inline]
 tcp_urg+0x38c/0xb80 net/ipv4/tcp_input.c:5394
 tcp_rcv_established+0x8f3/0x1d90 net/ipv4/tcp_input.c:5724
 tcp_v4_do_rcv+0x605/0x8b0 net/ipv4/tcp_ipv4.c:1621
 tcp_v4_rcv+0x2f60/0x39c0 net/ipv4/tcp_ipv4.c:2003
 ip_protocol_deliver_rcu+0x57/0x880 net/ipv4/ip_input.c:204
 ip_local_deliver_finish+0x220/0x360 net/ipv4/ip_input.c:231
 NF_HOOK include/linux/netfilter.h:307 [inline]
 NF_HOOK include/linux/netfilter.h:301 [inline]
 ip_local_deliver+0x1c8/0x4e0 net/ipv4/ip_input.c:252
 dst_input include/net/dst.h:441 [inline]
 ip_rcv_finish+0x1da/0x2f0 net/ipv4/ip_input.c:428
 NF_HOOK include/linux/netfilter.h:307 [inline]
 NF_HOOK include/linux/netfilter.h:301 [inline]
 ip_rcv+0xd0/0x3c0 net/ipv4/ip_input.c:539
 __netif_receive_skb_one_core+0xf5/0x160 net/core/dev.c:5187
 __netif_receive_skb+0x27/0x1c0 net/core/dev.c:5301
 process_backlog+0x21e/0x7a0 net/core/dev.c:6133
 napi_poll net/core/dev.c:6571 [inline]
 net_rx_action+0x4c2/0x1070 net/core/dev.c:6639
 __do_softirq+0x26c/0x9f7 kernel/softirq.c:292
 invoke_softirq kernel/softirq.c:373 [inline]
 irq_exit+0x192/0x1d0 kernel/softirq.c:413
 exiting_irq arch/x86/include/asm/apic.h:546 [inline]
 smp_apic_timer_interrupt+0x19e/0x600 arch/x86/kernel/apic/apic.c:1140
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829
 </IRQ>
RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61
Code: cc cc cc cc cc cc cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 44 ae 5e 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 34 ae 5e 00 fb f4 <c3> cc 41 56 41 55 41 54 55 53 e8 c3 07 97 f9 e8 9e 72 cb fb 0f 1f
RSP: 0018:ffffc90000d3fdb8 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13
RAX: 1ffffffff13291af RBX: ffff8880a95f2340 RCX: 0000000000000000
RDX: dffffc0000000000 RSI: 0000000000000006 RDI: ffff8880a95f2c04
RBP: dffffc0000000000 R08: ffff8880a95f2340 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffffed10152be468
R13: 0000000000000001 R14: ffffffff8a883540 R15: 0000000000000000
 arch_safe_halt arch/x86/include/asm/paravirt.h:144 [inline]
 default_idle+0x49/0x350 arch/x86/kernel/process.c:697
 cpuidle_idle_call kernel/sched/idle.c:154 [inline]
 do_idle+0x393/0x690 kernel/sched/idle.c:269
 cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:361
 start_secondary+0x2f3/0x400 arch/x86/kernel/smpboot.c:268
 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: possible deadlock in send_sigurg
  2020-04-03  6:15 possible deadlock in send_sigurg syzbot
@ 2020-04-03  9:11 ` Oleg Nesterov
  2020-04-03  9:36   ` Christian Brauner
  2020-04-08 20:28   ` [PATCH] proc: Use a dedicated lock in struct pid Eric W. Biederman
  0 siblings, 2 replies; 6+ messages in thread
From: Oleg Nesterov @ 2020-04-03  9:11 UTC (permalink / raw)
  To: syzbot
  Cc: adobriyan, akpm, allison, areber, aubrey.li, avagin, bfields,
	christian, cyphar, ebiederm, gregkh, guro, jlayton, joel,
	keescook, linmiaohe, linux-fsdevel, linux-kernel, mhocko, mingo,
	peterz, sargun, syzkaller-bugs, tglx, viro

On 04/02, syzbot wrote:
>
>                       lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
>                       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
>                       _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
>                       spin_lock include/linux/spinlock.h:353 [inline]
>                       proc_pid_make_inode+0x1f9/0x3c0 fs/proc/base.c:1880

Yes, spin_lock(wait_pidfd.lock) is not safe...

Eric, at first glance the fix is simple.

Oleg.


diff --git a/fs/proc/base.c b/fs/proc/base.c
index 74f948a6b621..9ec8c114aa60 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1839,9 +1839,9 @@ void proc_pid_evict_inode(struct proc_inode *ei)
 	struct pid *pid = ei->pid;
 
 	if (S_ISDIR(ei->vfs_inode.i_mode)) {
-		spin_lock(&pid->wait_pidfd.lock);
+		spin_lock_irq(&pid->wait_pidfd.lock);
 		hlist_del_init_rcu(&ei->sibling_inodes);
-		spin_unlock(&pid->wait_pidfd.lock);
+		spin_unlock_irq(&pid->wait_pidfd.lock);
 	}
 
 	put_pid(pid);
@@ -1877,9 +1877,9 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 	/* Let the pid remember us for quick removal */
 	ei->pid = pid;
 	if (S_ISDIR(mode)) {
-		spin_lock(&pid->wait_pidfd.lock);
+		spin_lock_irq(&pid->wait_pidfd.lock);
 		hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
-		spin_unlock(&pid->wait_pidfd.lock);
+		spin_unlock_irq(&pid->wait_pidfd.lock);
 	}
 
 	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 1e730ea1dcd6..6b7ee76e1b36 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -123,9 +123,9 @@ void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock
 		if (!node)
 			break;
 		ei = hlist_entry(node, struct proc_inode, sibling_inodes);
-		spin_lock(lock);
+		spin_lock_irq(lock);
 		hlist_del_init_rcu(&ei->sibling_inodes);
-		spin_unlock(lock);
+		spin_unlock_irq(lock);
 
 		inode = &ei->vfs_inode;
 		sb = inode->i_sb;


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: possible deadlock in send_sigurg
  2020-04-03  9:11 ` Oleg Nesterov
@ 2020-04-03  9:36   ` Christian Brauner
  2020-04-03 12:57     ` Eric W. Biederman
  2020-04-08 20:28   ` [PATCH] proc: Use a dedicated lock in struct pid Eric W. Biederman
  1 sibling, 1 reply; 6+ messages in thread
From: Christian Brauner @ 2020-04-03  9:36 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: syzbot, adobriyan, akpm, allison, areber, aubrey.li, avagin,
	bfields, christian, cyphar, ebiederm, gregkh, guro, jlayton,
	joel, keescook, linmiaohe, linux-fsdevel, linux-kernel, mhocko,
	mingo, peterz, sargun, syzkaller-bugs, tglx, viro

On Fri, Apr 03, 2020 at 11:11:35AM +0200, Oleg Nesterov wrote:
> On 04/02, syzbot wrote:
> >
> >                       lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
> >                       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
> >                       _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
> >                       spin_lock include/linux/spinlock.h:353 [inline]
> >                       proc_pid_make_inode+0x1f9/0x3c0 fs/proc/base.c:1880
> 
> Yes, spin_lock(wait_pidfd.lock) is not safe...
> 
> Eric, at first glance the fix is simple.
> 
> Oleg.
> 
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c

Um, when did this lock get added to proc/base.c in the first place and
why has it been abused for this?
People just recently complained loudly about this in the
cred_guard_mutex thread that abusing locks for things they weren't
intended for is a bad idea...

> index 74f948a6b621..9ec8c114aa60 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1839,9 +1839,9 @@ void proc_pid_evict_inode(struct proc_inode *ei)
>  	struct pid *pid = ei->pid;
>  
>  	if (S_ISDIR(ei->vfs_inode.i_mode)) {
> -		spin_lock(&pid->wait_pidfd.lock);
> +		spin_lock_irq(&pid->wait_pidfd.lock);
>  		hlist_del_init_rcu(&ei->sibling_inodes);
> -		spin_unlock(&pid->wait_pidfd.lock);
> +		spin_unlock_irq(&pid->wait_pidfd.lock);
>  	}
>  
>  	put_pid(pid);
> @@ -1877,9 +1877,9 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
>  	/* Let the pid remember us for quick removal */
>  	ei->pid = pid;
>  	if (S_ISDIR(mode)) {
> -		spin_lock(&pid->wait_pidfd.lock);
> +		spin_lock_irq(&pid->wait_pidfd.lock);
>  		hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
> -		spin_unlock(&pid->wait_pidfd.lock);
> +		spin_unlock_irq(&pid->wait_pidfd.lock);
>  	}
>  
>  	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index 1e730ea1dcd6..6b7ee76e1b36 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -123,9 +123,9 @@ void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock
>  		if (!node)
>  			break;
>  		ei = hlist_entry(node, struct proc_inode, sibling_inodes);
> -		spin_lock(lock);
> +		spin_lock_irq(lock);
>  		hlist_del_init_rcu(&ei->sibling_inodes);
> -		spin_unlock(lock);
> +		spin_unlock_irq(lock);
>  
>  		inode = &ei->vfs_inode;
>  		sb = inode->i_sb;
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: possible deadlock in send_sigurg
  2020-04-03  9:36   ` Christian Brauner
@ 2020-04-03 12:57     ` Eric W. Biederman
  0 siblings, 0 replies; 6+ messages in thread
From: Eric W. Biederman @ 2020-04-03 12:57 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Oleg Nesterov, syzbot, adobriyan, akpm, allison, areber,
	aubrey.li, avagin, bfields, christian, cyphar, gregkh, guro,
	jlayton, joel, keescook, linmiaohe, linux-fsdevel, linux-kernel,
	mhocko, mingo, peterz, sargun, syzkaller-bugs, tglx, viro

Christian Brauner <christian.brauner@ubuntu.com> writes:

> On Fri, Apr 03, 2020 at 11:11:35AM +0200, Oleg Nesterov wrote:
>> On 04/02, syzbot wrote:
>> >
>> >                       lock_acquire+0x1f2/0x8f0 kernel/locking/lockdep.c:4923
>> >                       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
>> >                       _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
>> >                       spin_lock include/linux/spinlock.h:353 [inline]
>> >                       proc_pid_make_inode+0x1f9/0x3c0 fs/proc/base.c:1880
>> 
>> Yes, spin_lock(wait_pidfd.lock) is not safe...
>> 
>> Eric, at first glance the fix is simple.
>> 
>> Oleg.
>> 
>> 
>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>
> Um, when did this lock get added to proc/base.c in the first place and
> why has it been abused for this?

Because struct pid is too bloated already.

> People just recently complained loudly about this in the
> cred_guard_mutex thread that abusing locks for things they weren't
> intended for is a bad idea...

The problem there is/was holding locks over places they shouldn't.
It looks like I made an equally dump mistake with struct pid.

That said can you take a look at calling putting do_notify_pidfd
someplace sane.  I can't see how it makes sense to call that in
the same set of circumstances where we notify the parent.

Reparenting should not be a concern, nor should ptracing.  Which I think
means that do_notify_pid can potentially get called many times more
than it needs to be.

Not to mention it is being called a bit too soon when called from
do_notify_parent.  Which I saw earlier is causing problems.  Signal
sending can call do_notify_parent early because everything just queues
up and no action is taken.  Wake-ups on the other hand trigger more
immediate action.

There is no connection to the current bug except this discussion
just remimded me about do_notify_pidfd and I figured I should say
something before I forget again.

Eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] proc: Use a dedicated lock in struct pid
  2020-04-03  9:11 ` Oleg Nesterov
  2020-04-03  9:36   ` Christian Brauner
@ 2020-04-08 20:28   ` Eric W. Biederman
  2020-04-09  8:35     ` Christian Brauner
  1 sibling, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2020-04-08 20:28 UTC (permalink / raw)
  To: linux-kernel
  Cc: Oleg Nesterov, syzbot, adobriyan, akpm, allison, areber,
	aubrey.li, avagin, bfields, christian, cyphar, gregkh, guro,
	jlayton, joel, keescook, linmiaohe, linux-fsdevel, mhocko, mingo,
	peterz, sargun, syzkaller-bugs, tglx, viro


syzbot wrote:
> ========================================================
> WARNING: possible irq lock inversion dependency detected
> 5.6.0-syzkaller #0 Not tainted
> --------------------------------------------------------
> swapper/1/0 just changed the state of lock:
> ffffffff898090d8 (tasklist_lock){.+.?}-{2:2}, at: send_sigurg+0x9f/0x320 fs/fcntl.c:840
> but this lock took another, SOFTIRQ-unsafe lock in the past:
>  (&pid->wait_pidfd){+.+.}-{2:2}
>
>
> and interrupts could create inverse lock ordering between them.
>
>
> other info that might help us debug this:
>  Possible interrupt unsafe locking scenario:
>
>        CPU0                    CPU1
>        ----                    ----
>   lock(&pid->wait_pidfd);
>                                local_irq_disable();
>                                lock(tasklist_lock);
>                                lock(&pid->wait_pidfd);
>   <Interrupt>
>     lock(tasklist_lock);
>
>  *** DEADLOCK ***
>
> 4 locks held by swapper/1/0:

The problem is that because wait_pidfd.lock is taken under the tasklist
lock.  It must always be taken with irqs disabled as tasklist_lock can be
taken from interrupt context and if wait_pidfd.lock was already taken this
would create a lock order inversion.

Oleg suggested just disabling irqs where I have added extra calls to
wait_pidfd.lock.  That should be safe and I think the code will eventually
do that.  It was rightly pointed out by Christian that sharing the
wait_pidfd.lock was a premature optimization.

It is also true that my pre-merge window testing was insufficient.  So
remove the premature optimization and give struct pid a dedicated lock of
it's own for struct pid things.  I have verified that lockdep sees all 3
paths where we take the new pid->lock and lockdep does not complain.

It is my current day dream that one day pid->lock can be used to guard the
task lists as well and then the tasklist_lock won't need to be held to
deliver signals.  That will require taking pid->lock with irqs disabled.

Link: https://lore.kernel.org/lkml/00000000000011d66805a25cd73f@google.com/
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Reported-by: syzbot+343f75cdeea091340956@syzkaller.appspotmail.com
Reported-by: syzbot+832aabf700bc3ec920b9@syzkaller.appspotmail.com
Reported-by: syzbot+f675f964019f884dbd0f@syzkaller.appspotmail.com
Reported-by: syzbot+a9fb1457d720a55d6dc5@syzkaller.appspotmail.com
Fixes: 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---

If anyone sees an issue please holer otherwise I plan on sending
this fix to Linus.

 fs/proc/base.c      | 10 +++++-----
 include/linux/pid.h |  1 +
 kernel/pid.c        |  1 +
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 74f948a6b621..6042b646ab27 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1839,9 +1839,9 @@ void proc_pid_evict_inode(struct proc_inode *ei)
 	struct pid *pid = ei->pid;
 
 	if (S_ISDIR(ei->vfs_inode.i_mode)) {
-		spin_lock(&pid->wait_pidfd.lock);
+		spin_lock(&pid->lock);
 		hlist_del_init_rcu(&ei->sibling_inodes);
-		spin_unlock(&pid->wait_pidfd.lock);
+		spin_unlock(&pid->lock);
 	}
 
 	put_pid(pid);
@@ -1877,9 +1877,9 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 	/* Let the pid remember us for quick removal */
 	ei->pid = pid;
 	if (S_ISDIR(mode)) {
-		spin_lock(&pid->wait_pidfd.lock);
+		spin_lock(&pid->lock);
 		hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
-		spin_unlock(&pid->wait_pidfd.lock);
+		spin_unlock(&pid->lock);
 	}
 
 	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
@@ -3273,7 +3273,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
 
 void proc_flush_pid(struct pid *pid)
 {
-	proc_invalidate_siblings_dcache(&pid->inodes, &pid->wait_pidfd.lock);
+	proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
 	put_pid(pid);
 }
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 01a0d4e28506..cc896f0fc4e3 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -60,6 +60,7 @@ struct pid
 {
 	refcount_t count;
 	unsigned int level;
+	spinlock_t lock;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
 	struct hlist_head inodes;
diff --git a/kernel/pid.c b/kernel/pid.c
index efd34874b3d1..517d0855d4cf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -246,6 +246,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 
 	get_pid_ns(ns);
 	refcount_set(&pid->count, 1);
+	spin_lock_init(&pid->lock);
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
 
-- 
2.20.1

Eric

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] proc: Use a dedicated lock in struct pid
  2020-04-08 20:28   ` [PATCH] proc: Use a dedicated lock in struct pid Eric W. Biederman
@ 2020-04-09  8:35     ` Christian Brauner
  0 siblings, 0 replies; 6+ messages in thread
From: Christian Brauner @ 2020-04-09  8:35 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-kernel, Oleg Nesterov, syzbot, adobriyan, akpm, allison,
	areber, aubrey.li, avagin, bfields, christian, cyphar, gregkh,
	guro, jlayton, joel, keescook, linmiaohe, linux-fsdevel, mhocko,
	mingo, peterz, sargun, syzkaller-bugs, tglx, viro

On Wed, Apr 08, 2020 at 03:28:40PM -0500, Eric W. Biederman wrote:
> 
> syzbot wrote:
> > ========================================================
> > WARNING: possible irq lock inversion dependency detected
> > 5.6.0-syzkaller #0 Not tainted
> > --------------------------------------------------------
> > swapper/1/0 just changed the state of lock:
> > ffffffff898090d8 (tasklist_lock){.+.?}-{2:2}, at: send_sigurg+0x9f/0x320 fs/fcntl.c:840
> > but this lock took another, SOFTIRQ-unsafe lock in the past:
> >  (&pid->wait_pidfd){+.+.}-{2:2}
> >
> >
> > and interrupts could create inverse lock ordering between them.
> >
> >
> > other info that might help us debug this:
> >  Possible interrupt unsafe locking scenario:
> >
> >        CPU0                    CPU1
> >        ----                    ----
> >   lock(&pid->wait_pidfd);
> >                                local_irq_disable();
> >                                lock(tasklist_lock);
> >                                lock(&pid->wait_pidfd);
> >   <Interrupt>
> >     lock(tasklist_lock);
> >
> >  *** DEADLOCK ***
> >
> > 4 locks held by swapper/1/0:
> 
> The problem is that because wait_pidfd.lock is taken under the tasklist
> lock.  It must always be taken with irqs disabled as tasklist_lock can be
> taken from interrupt context and if wait_pidfd.lock was already taken this
> would create a lock order inversion.
> 
> Oleg suggested just disabling irqs where I have added extra calls to
> wait_pidfd.lock.  That should be safe and I think the code will eventually
> do that.  It was rightly pointed out by Christian that sharing the
> wait_pidfd.lock was a premature optimization.
> 
> It is also true that my pre-merge window testing was insufficient.  So
> remove the premature optimization and give struct pid a dedicated lock of
> it's own for struct pid things.  I have verified that lockdep sees all 3
> paths where we take the new pid->lock and lockdep does not complain.
> 
> It is my current day dream that one day pid->lock can be used to guard the
> task lists as well and then the tasklist_lock won't need to be held to
> deliver signals.  That will require taking pid->lock with irqs disabled.
> 
> Link: https://lore.kernel.org/lkml/00000000000011d66805a25cd73f@google.com/
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Christian Brauner <christian.brauner@ubuntu.com>
> Reported-by: syzbot+343f75cdeea091340956@syzkaller.appspotmail.com
> Reported-by: syzbot+832aabf700bc3ec920b9@syzkaller.appspotmail.com
> Reported-by: syzbot+f675f964019f884dbd0f@syzkaller.appspotmail.com
> Reported-by: syzbot+a9fb1457d720a55d6dc5@syzkaller.appspotmail.com
> Fixes: 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Thanks, Eric.
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>

Christian

> ---
> 
> If anyone sees an issue please holer otherwise I plan on sending
> this fix to Linus.
> 
>  fs/proc/base.c      | 10 +++++-----
>  include/linux/pid.h |  1 +
>  kernel/pid.c        |  1 +
>  3 files changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 74f948a6b621..6042b646ab27 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1839,9 +1839,9 @@ void proc_pid_evict_inode(struct proc_inode *ei)
>  	struct pid *pid = ei->pid;
>  
>  	if (S_ISDIR(ei->vfs_inode.i_mode)) {
> -		spin_lock(&pid->wait_pidfd.lock);
> +		spin_lock(&pid->lock);
>  		hlist_del_init_rcu(&ei->sibling_inodes);
> -		spin_unlock(&pid->wait_pidfd.lock);
> +		spin_unlock(&pid->lock);
>  	}
>  
>  	put_pid(pid);
> @@ -1877,9 +1877,9 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
>  	/* Let the pid remember us for quick removal */
>  	ei->pid = pid;
>  	if (S_ISDIR(mode)) {
> -		spin_lock(&pid->wait_pidfd.lock);
> +		spin_lock(&pid->lock);
>  		hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
> -		spin_unlock(&pid->wait_pidfd.lock);
> +		spin_unlock(&pid->lock);
>  	}
>  
>  	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> @@ -3273,7 +3273,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
>  
>  void proc_flush_pid(struct pid *pid)
>  {
> -	proc_invalidate_siblings_dcache(&pid->inodes, &pid->wait_pidfd.lock);
> +	proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
>  	put_pid(pid);
>  }
>  
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 01a0d4e28506..cc896f0fc4e3 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -60,6 +60,7 @@ struct pid
>  {
>  	refcount_t count;
>  	unsigned int level;
> +	spinlock_t lock;
>  	/* lists of tasks that use this pid */
>  	struct hlist_head tasks[PIDTYPE_MAX];
>  	struct hlist_head inodes;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index efd34874b3d1..517d0855d4cf 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -246,6 +246,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
>  
>  	get_pid_ns(ns);
>  	refcount_set(&pid->count, 1);
> +	spin_lock_init(&pid->lock);
>  	for (type = 0; type < PIDTYPE_MAX; ++type)
>  		INIT_HLIST_HEAD(&pid->tasks[type]);
>  
> -- 
> 2.20.1
> 
> Eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-04-09  8:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-03  6:15 possible deadlock in send_sigurg syzbot
2020-04-03  9:11 ` Oleg Nesterov
2020-04-03  9:36   ` Christian Brauner
2020-04-03 12:57     ` Eric W. Biederman
2020-04-08 20:28   ` [PATCH] proc: Use a dedicated lock in struct pid Eric W. Biederman
2020-04-09  8:35     ` Christian Brauner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).