linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [v4.6-rc7-183-g1410b74e4061]
       [not found] <CA+icZUXF9DkymNNGwP8zzmKY9PRAPGFZ+7p9En1xy80FcX_RRw@mail.gmail.com>
@ 2016-05-14 16:46 ` Eric Dumazet
  2016-05-16 10:50 ` [v4.6-rc7-183-g1410b74e4061] Ingo Molnar
  1 sibling, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2016-05-14 16:46 UTC (permalink / raw)
  To: sedat.dilek
  Cc: David Miller, Peter Zijlstra, Paul E. McKenney, netdev,
	the arch/x86 maintainers, LKML, Linus Torvalds,
	Alfredo Alvarez Fernandez

On Sat, May 14, 2016 at 2:22 AM, Sedat Dilek <sedat.dilek@gmail.com> wrote:
> Hi,
>
> as Linux v4.6 is very near, I decided to write this bug report (only
> drunk one coffee).
>
> First, I am not absolutely sure if this is a real issue as...
> #1: This is only a (lockdep) warning.
> #2: I have not a "vanilla" Linux v4.6-rc7+ here (see P.S. and attached patch)
>
> For a more helpful feedback I should test a...
> #1: vanilla v4.6-rc7-183-g1410b74e4061
> #2: net.git#master on top of #1
>
> What I am seeing is this while surfing with a UMTS/HSPA internet-stick
> (using PPP) and running Firefox on Ubuntu/precise AMD64...
>
> [  423.484105] ------------[ cut here ]------------
> [  423.484119] WARNING: CPU: 2 PID: 2392 at
> kernel/locking/lockdep.c:2098 __lock_acquire
> [  423.484123] DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base
> [  423.484125] Modules linked in: btrfs xor raid6_pq ntfs xfs
> libcrc32c ppp_deflate bsd_comp ppp_async crc_ccitt option usb_wwan
> cdc_ether usbserial usbnet snd_hda_codec_hdmi i915
> snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel
> snd_hda_codec arc4 uvcvideo iwldvm snd_hwdep joydev mac80211
> videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core
> videodev rfcomm bnep kvm_intel kvm usb_storage btusb i2c_algo_bit
> iwlwifi btrtl snd_hda_core drm_kms_helper btbcm snd_pcm parport_pc
> btintel syscopyarea irqbypass ppdev snd_seq_midi bluetooth sysfillrect
> psmouse snd_seq_midi_event sysimgblt snd_rawmidi fb_sys_fops cfg80211
> snd_seq drm serio_raw snd_timer samsung_laptop snd_seq_device snd
> soundcore wmi mac_hid video intel_rst lpc_ich lp parport binfmt_misc
> hid_generic usbhid hid r8169 mii
> [  423.484241] CPU: 2 PID: 2392 Comm: firefox Not tainted
> 4.6.0-rc7-183.1-iniza-small #1
> [  423.484244] Hardware name: SAMSUNG ELECTRONICS CO., LTD.
> 530U3BI/530U4BI/530U4BH/530U3BI/530U4BI/530U4BH, BIOS 13XK 03/28/2013
> [  423.484247]  0000000000000000 ffff88011fa83910 ffffffff81413825
> ffff88011fa83960
> [  423.484253]  0000000000000000 ffff88011fa83950 ffffffff81083ea1
> 0000083282f34ec0
> [  423.484259]  0000000000000005 ffff880082f34540 0000000000000000
> 0000000000000027
> [  423.484265] Call Trace:
> [  423.484268]  <IRQ>  [<ffffffff81413825>] dump_stack
> [  423.484280]  [<ffffffff81083ea1>] __warn
> [  423.484285]  [<ffffffff81083f0f>] warn_slowpath_fmt
> [  423.484289]  [<ffffffff810dee38>] __lock_acquire
> [  423.484294]  [<ffffffff810de294>] ? __lock_acquire
> [  423.484298]  [<ffffffff810de294>] ? __lock_acquire
> [  423.484302]  [<ffffffff810e0869>] lock_acquire
> [  423.484307]  [<ffffffff81711c3b>] ? __dev_queue_xmit
> [  423.484313]  [<ffffffff81823ce8>] _raw_spin_lock
> [  423.484317]  [<ffffffff81711c3b>] ? __dev_queue_xmit
> [  423.484321]  [<ffffffff81711c3b>] __dev_queue_xmit
> [  423.484326]  [<ffffffff81711640>] ? __dev_queue_xmit
> [  423.484330]  [<ffffffff81711e30>] dev_queue_xmit
> [  423.484334]  [<ffffffff8171a291>] neigh_direct_output
> [  423.484339]  [<ffffffff8175566c>] ip_finish_output2
> [  423.484344]  [<ffffffff817554ff>] ? ip_finish_output2
> [  423.484349]  [<ffffffff817565ba>] ip_finish_output
> [  423.484353]  [<ffffffff81757e81>] ip_output
> [  423.484357]  [<ffffffff810dbbb9>] ? __lock_is_held
> [  423.484362]  [<ffffffff8175737d>] ip_local_out
> [  423.484366]  [<ffffffff817577a5>] ip_queue_xmit
> [  423.484371]  [<ffffffff817575c5>] ? ip_queue_xmit
> [  423.484376]  [<ffffffff81771296>] tcp_transmit_skb
> [  423.484380]  [<ffffffff817737ea>] __tcp_retransmit_skb
> [  423.484385]  [<ffffffff81773e97>] tcp_retransmit_skb
> [  423.484389]  [<ffffffff817754b7>] tcp_retransmit_timer
> [  423.484394]  [<ffffffff81775bd0>] ? tcp_write_timer_handler
> [  423.484398]  [<ffffffff81775a7c>] tcp_write_timer_handler
> [  423.484402]  [<ffffffff81775c50>] tcp_write_timer
> [  423.484407]  [<ffffffff81104f84>] call_timer_fn
> [  423.484411]  [<ffffffff81104ef5>] ? call_timer_fn
> [  423.484416]  [<ffffffff81775bd0>] ? tcp_write_timer_handler
> [  423.484419]  [<ffffffff811058f5>] run_timer_softirq
> [  423.484424]  [<ffffffff81827632>] __do_softirq
> [  423.484428]  [<ffffffff8108b2fe>] irq_exit
> [  423.484432]  [<ffffffff818272c2>] smp_apic_timer_interrupt
> [  423.484437]  [<ffffffff818253c6>] apic_timer_interrupt
> [  423.484439]  <EOI>
> [  423.484443] ---[ end trace a29d8ee0ef420d5c ]---
> [  423.484446]
> [  423.484447] ======================
> [  423.484449] [chain_key collision ]
> [  423.484452] 4.6.0-rc7-183.1-iniza-small #1 Tainted: G        W
> [  423.484454] ----------------------
> [  423.484457] firefox/2392: Hash chain already cached but the
> contents don't match!
> [  423.484460] Held locks:depth: 6
> [  423.484463]  class_idx:1993 -> chain_key:00000000000007c9
> (((&icsk->icsk_retransmit_timer))){
> [  423.484473]  class_idx:1334 -> chain_key:0000000000f92536 (slock-AF_INET){
> [  423.484482]  class_idx:33 -> chain_key:0000001f24a6c021
> (rcu_read_lock){......}, at: [<ffffffff817575c5>] ip_queue_xmit
> [  423.484492]  class_idx:1005 -> chain_key:0003e494d80423ed
> (rcu_read_lock_bh){......}, at: [<ffffffff817554ff>] ip_finish_output2
> [  423.484500]  class_idx:1005 -> chain_key:7c929b00847da3ed
> (rcu_read_lock_bh){......}, at: [<ffffffff81711640>] __dev_queue_xmit
> [  423.484509]  class_idx:1996 -> chain_key:5360108fb47da85e
> (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){
> [  423.484517] Locks in cached chain:depth: 6
> [  423.484520]  class_idx:1998 -> chain_key:00000000000007ce
> (((&sk->sk_timer))){
> [  423.484525]  class_idx:1334 -> chain_key:0000000000f9c536 (slock-AF_INET){
> [  423.484531]  class_idx:33 -> chain_key:0000001f38a6c021
> (rcu_read_lock){......}
> [  423.484536]  class_idx:1005 -> chain_key:0003e714d80423ed
> (rcu_read_lock_bh){......}
> [  423.484541]  class_idx:1005 -> chain_key:7ce29b00847da3ed
> (rcu_read_lock_bh){......}
> [  423.484547]  class_idx:1986 -> chain_key:5360108fb47da85e (_xmit_PPP#2){
> [  423.484553]
> [  423.484553] stack backtrace:
> [  423.484558] CPU: 2 PID: 2392 Comm: firefox Tainted: G        W
>  4.6.0-rc7-183.1-iniza-small #1
> [  423.484561] Hardware name: SAMSUNG ELECTRONICS CO., LTD.
> 530U3BI/530U4BI/530U4BH/530U3BI/530U4BI/530U4BH, BIOS 13XK 03/28/2013
> [  423.484563]  0000000000000000 ffff88011fa83968 ffffffff81413825
> 0000000000000006
> [  423.484570]  ffffffff82189c20 ffff88011fa839b0 ffffffff811a8190
> 000000041fa83980
> [  423.484575]  ffff880082f34e48 0000000000000005 ffff880082f34540
> 0000000000000000
> [  423.484581] Call Trace:
> [  423.484584]  <IRQ>  [<ffffffff81413825>] dump_stack
> [  423.484592]  [<ffffffff811a8190>] print_collision
> [  423.484597]  [<ffffffff810dee4d>] __lock_acquire
> [  423.484601]  [<ffffffff810de294>] ? __lock_acquire
> [  423.484606]  [<ffffffff810de294>] ? __lock_acquire
> [  423.484610]  [<ffffffff810e0869>] lock_acquire
> [  423.484615]  [<ffffffff81711c3b>] ? __dev_queue_xmit
> [  423.484619]  [<ffffffff81823ce8>] _raw_spin_lock
> [  423.484624]  [<ffffffff81711c3b>] ? __dev_queue_xmit
> [  423.484628]  [<ffffffff81711c3b>] __dev_queue_xmit
> [  423.484632]  [<ffffffff81711640>] ? __dev_queue_xmit
> [  423.484636]  [<ffffffff81711e30>] dev_queue_xmit
> [  423.484640]  [<ffffffff8171a291>] neigh_direct_output
> [  423.484644]  [<ffffffff8175566c>] ip_finish_output2
> [  423.484648]  [<ffffffff817554ff>] ? ip_finish_output2
> [  423.484653]  [<ffffffff817565ba>] ip_finish_output
> [  423.484657]  [<ffffffff81757e81>] ip_output
> [  423.484661]  [<ffffffff810dbbb9>] ? __lock_is_held
> [  423.484666]  [<ffffffff8175737d>] ip_local_out
> [  423.484670]  [<ffffffff817577a5>] ip_queue_xmit
> [  423.484675]  [<ffffffff817575c5>] ? ip_queue_xmit
> [  423.484679]  [<ffffffff81771296>] tcp_transmit_skb
> [  423.484683]  [<ffffffff817737ea>] __tcp_retransmit_skb
> [  423.484688]  [<ffffffff81773e97>] tcp_retransmit_skb
> [  423.484692]  [<ffffffff817754b7>] tcp_retransmit_timer
> [  423.484697]  [<ffffffff81775bd0>] ? tcp_write_timer_handler
> [  423.484701]  [<ffffffff81775a7c>] tcp_write_timer_handler
> [  423.484705]  [<ffffffff81775c50>] tcp_write_timer
> [  423.484709]  [<ffffffff81104f84>] call_timer_fn
> [  423.484713]  [<ffffffff81104ef5>] ? call_timer_fn
> [  423.484718]  [<ffffffff81775bd0>] ? tcp_write_timer_handler
> [  423.484730]  [<ffffffff811058f5>] run_timer_softirq
> [  423.484734]  [<ffffffff81827632>] __do_softirq
> [  423.484738]  [<ffffffff8108b2fe>] irq_exit
> [  423.484742]  [<ffffffff818272c2>] smp_apic_timer_interrupt
> [  423.484746]  [<ffffffff818253c6>] apic_timer_interrupt
>
> NOTE: I realized this call-trace 1st after v4.6-rc7.
>
> The only commit I see which might have an effect is...
>
> 10a81980fc47 tcp: refresh skb timestamp at retransmit time
>
> ...but I may be wrong.
>
> So help in...
> "No more darkness, no more night." ---> " I see the light, I see the light."
>
> Attached is my linux-config, dmesg output and "customized" patch.
>
> Pleas give some clear instructions to get more light in debugging this.
>
> I'm really in a hurry, so sorry if this BR is unsatisfying and incomplete.
>
> Regards,
> - Sedat -
>
> P.S.: For more details see attached patch on top of Linux v4.6-rc7.
>
> Sedat Dilek (5):
>       Merge branch 'wb-buf-throttle' of
> git://git.kernel.org/.../axboe/linux-block into
> for-4.8/wb-buf-throttle-20160510
>       Merge branch 'for-4.6/vfs-fixes' into 4.6.0-rc7-183.1-iniza-small
>       Merge branch 'for-4.6/net-fixes' into 4.6.0-rc7-183.1-iniza-small
>       Merge branch 'for-4.7/pm-next-20160513' into 4.6.0-rc7-183.1-iniza-small
>       Merge branch 'for-4.8/wb-buf-throttle-20160510' into
> 4.6.0-rc7-183.1-iniza-small
>
> P.S.S: Investigating net/ changes since Linux v4.6-rc7.
>
> $ git log --oneline v4.6-rc7.. net/
> e271c7b4420d gre: do not keep the GRE header around in collect medata mode
> 16ec3d4fbb96 openvswitch: Fix cached ct with helper.
> 4e8c86155010 net sched: ife action fix late binding
> 5e1567aeb7fe net sched: skbedit action fix late binding
> 0e5538ab2b59 net sched: simple action fix late binding
> 87dfbdc6c747 net sched: mirred action fix late binding
> a57f19d30b2d net sched: ipt action fix late binding
> 5026c9b1bafc net sched: vlan action fix late binding
> 10a81980fc47 tcp: refresh skb timestamp at retransmit time
> adc0a8bfdc52 Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
> 79e48650320e net: fix a kernel infoleak in x25 module
> 229740c63169 udp_offload: Set encapsulation before inner completes.
> 43b8448cd7b4 udp_tunnel: Remove redundant udp_tunnel_gro_complete().
> 1d2f7b2d956e net: ipv6: tcp reset, icmp need to consider L3 domain
> 856ce5d083e1 bridge: fix igmp / mld query parsing
> 31ca0458a61a net: bridge: fix old ioctl unlocked net device walk
> dedc58e067d8 VSOCK: do not disconnect socket when peer has shutdown SEND only
> eda3fc50daa9 netfilter: nfnetlink_acct: validate NFACCT_QUOTA parameter
> 32b583a0cb9b Merge branch 'master' of
> git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
> 5f8e44741f9f net: fix infoleak in rtnetlink
> b8670c09f37b net: fix infoleak in llc
> cec5913c1515 netfilter: IDLETIMER: fix race condition when destroy the target
> 70d72b7e060e netfilter: conntrack: init all_locks to avoid debug warning
> d6af1a31cc72 vti: Add pmtu handling to vti_xmit.
> 215276c0147e xfrm: Reset encapsulation field of the skb before transformation
> 6ad3122a08e3 flowcache: Avoid OOM condition under preasure


This looks like a hash collision lockdep report, exposed by a recent commit

I have no idea how to proceed at this point.

commit 39e2e173fb1f900959d3a25c21c65fa88b06c6ee
Author: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@gmail.com>
Date:   Wed Mar 30 19:03:36 2016 +0200

    locking/lockdep: Print chain_key collision information

    A sequence of pairs [class_idx -> corresponding chain_key iteration]
    is printed for both the current held_lock chain and the cached chain.

    That exposes the two different class_idx sequences that led to that
    particular hash value.

    This helps with debugging hash chain collision reports.

    Signed-off-by: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@gmail.com>
    Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Cc: Peter Zijlstra <peterz@infradead.org>
    Cc: Thomas Gleixner <tglx@linutronix.de>
    Cc: linux-fsdevel@vger.kernel.org
    Cc: sedat.dilek@gmail.com
    Cc: tytso@mit.edu
    Link: http://lkml.kernel.org/r/1459357416-19190-1-git-send-email-alfredoalvarezernandez@gmail.com
    Signed-off-by: Ingo Molnar <mingo@kernel.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [v4.6-rc7-183-g1410b74e4061]
       [not found] <CA+icZUXF9DkymNNGwP8zzmKY9PRAPGFZ+7p9En1xy80FcX_RRw@mail.gmail.com>
  2016-05-14 16:46 ` [v4.6-rc7-183-g1410b74e4061] Eric Dumazet
@ 2016-05-16 10:50 ` Ingo Molnar
  2016-05-16 17:42   ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
  1 sibling, 1 reply; 6+ messages in thread
From: Ingo Molnar @ 2016-05-16 10:50 UTC (permalink / raw)
  To: Sedat Dilek
  Cc: David Miller, Eric Dumazet, Peter Zijlstra, Paul E. McKenney,
	netdev, the arch/x86 maintainers, LKML, Linus Torvalds


* Sedat Dilek <sedat.dilek@gmail.com> wrote:

> Hi,
> 
> as Linux v4.6 is very near, I decided to write this bug report (only
> drunk one coffee).
> 
> First, I am not absolutely sure if this is a real issue as...
> #1: This is only a (lockdep) warning.
> #2: I have not a "vanilla" Linux v4.6-rc7+ here (see P.S. and attached patch)

Having such a kernel base is certainly not helpful to the quality of your report, 
please try to reproduce under the vanilla kernel.

> For a more helpful feedback I should test a...
> #1: vanilla v4.6-rc7-183-g1410b74e4061

I'd suggest v4.6 plus this debug patch from Peter:

  https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1142352.html

... to see whether something fishy is going on.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [v4.6-rc7-183-g1410b74e4061]
  2016-05-16 10:50 ` [v4.6-rc7-183-g1410b74e4061] Ingo Molnar
@ 2016-05-16 17:42   ` Sedat Dilek
  2016-05-16 17:57     ` [v4.6-rc7-183-g1410b74e4061] Peter Zijlstra
  0 siblings, 1 reply; 6+ messages in thread
From: Sedat Dilek @ 2016-05-16 17:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: David Miller, Eric Dumazet, Peter Zijlstra, Paul E. McKenney,
	netdev, the arch/x86 maintainers, LKML, Linus Torvalds

On 5/16/16, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
>> Hi,
>>
>> as Linux v4.6 is very near, I decided to write this bug report (only
>> drunk one coffee).
>>
>> First, I am not absolutely sure if this is a real issue as...
>> #1: This is only a (lockdep) warning.
>> #2: I have not a "vanilla" Linux v4.6-rc7+ here (see P.S. and attached
>> patch)
>
> Having such a kernel base is certainly not helpful to the quality of your
> report,
> please try to reproduce under the vanilla kernel.
>
>> For a more helpful feedback I should test a...
>> #1: vanilla v4.6-rc7-183-g1410b74e4061
>
> I'd suggest v4.6 plus this debug patch from Peter:
>
>   https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1142352.html
>
> ... to see whether something fishy is going on.
>
> Thanks,
>

Unfortunately, I could not reproduce this again with none of my 183-kernels.
When I first hit a "chain_key collision" issue, it was hard to redproduce, so.
Any idea, how I can "force" this?

- Sedat -

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [v4.6-rc7-183-g1410b74e4061]
  2016-05-16 17:42   ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
@ 2016-05-16 17:57     ` Peter Zijlstra
  2016-05-16 18:11       ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2016-05-16 17:57 UTC (permalink / raw)
  To: Sedat Dilek
  Cc: Ingo Molnar, David Miller, Eric Dumazet, Paul E. McKenney,
	netdev, the arch/x86 maintainers, LKML, Linus Torvalds

On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote:

> Unfortunately, I could not reproduce this again with none of my 183-kernels.
> When I first hit a "chain_key collision" issue, it was hard to redproduce, so.
> Any idea, how I can "force" this?

Nope; I wish I knew, that'd be so much easier to work with :/

I'm hoping someone will report a reproducer, even something that
triggers once every 5-10 runs would be awesome.

In any case, like I've explained before, nothing regressed as such, we
only added this new warning under DEBUG_LOCKDEP because we want to
better understand the condition that triggers it.

If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel
is as reliable as it was before. OTOH, if you do keep it on, please
let me know if you can (semi) reliably trigger this, as I'd really like
to have a better understanding.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [v4.6-rc7-183-g1410b74e4061]
  2016-05-16 17:57     ` [v4.6-rc7-183-g1410b74e4061] Peter Zijlstra
@ 2016-05-16 18:11       ` Sedat Dilek
  2016-05-22  9:16         ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
  0 siblings, 1 reply; 6+ messages in thread
From: Sedat Dilek @ 2016-05-16 18:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, David Miller, Eric Dumazet, Paul E. McKenney,
	netdev, the arch/x86 maintainers, LKML, Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 3411 bytes --]

On 5/16/16, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote:
>
>> Unfortunately, I could not reproduce this again with none of my
>> 183-kernels.
>> When I first hit a "chain_key collision" issue, it was hard to redproduce,
>> so.
>> Any idea, how I can "force" this?
>
> Nope; I wish I knew, that'd be so much easier to work with :/
>
> I'm hoping someone will report a reproducer, even something that
> triggers once every 5-10 runs would be awesome.
>
> In any case, like I've explained before, nothing regressed as such, we
> only added this new warning under DEBUG_LOCKDEP because we want to
> better understand the condition that triggers it.
>
> If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel
> is as reliable as it was before. OTOH, if you do keep it on, please
> let me know if you can (semi) reliably trigger this, as I'd really like
> to have a better understanding.
>

OK, I keep checking my logs.

I refreshed your patch Ingo pointed me to.

But it fails like this (on top of Linux v4.6 final)...
[...]
  if [ "" = "-pg" ]; then if [ kernel/locking/mutex-debug.o !=
"scripts/mod/empty.o" ]; then ./scripts/recordmcount
"kernel/locking/mutex-debug.o"; fi; fi;
  mycompiler -Wp,-MD,kernel/locking/.lockdep.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
/usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
-Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
-Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
-I./include/uapi -Iinclude/generated/uapi -include
./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
-Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
-Werror-implicit-function-declaration -Wno-format-security -std=gnu89
-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387
-mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
-mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
-DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
-DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
-DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
-Wno-sign-compare -fno-asynchronous-unwind-tables
-fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-var-tracking-assignments -mfentry
-DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
-fno-strict-overflow -fconserve-stack -Werror=implicit-int
-Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
-D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(lockdep)"
-D"KBUILD_MODNAME=KBUILD_STR(lockdep)" -c -o
kernel/locking/.tmp_lockdep.o kernel/locking/lockdep.c
kernel/locking/lockdep.c: In function 'print_chain_keys_held_locks':
kernel/locking/lockdep.c:2034:2: error: too few arguments to function
'print_chain_key_iteration'
  print_chain_key_iteration(hlock_next->class_idx, chain_key);
  ^
kernel/locking/lockdep.c:2006:12: note: declared here
 static u64 print_chain_key_iteration(int class_idx, u64 chain_key,
u64 prev_key)
            ^
make[4]: *** [kernel/locking/lockdep.o] Error 1
make[3]: *** [kernel/locking] Error 2
make[2]: *** [kernel] Error 2
[...]

- Sedat -

[-- Attachment #2: 0001-locking-lockdep-Some-more-additional-chain_key-colli.patch --]
[-- Type: text/x-diff, Size: 4975 bytes --]

From b953be255bfb46970c75950e297be836577bc525 Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Mon, 16 May 2016 15:51:04 +0200
Subject: [PATCH] locking/lockdep: Some more additional chain_key collision
 information

From: Peter Zijlstra <peterz@infradead.org>

For more details see thread "[v4.6-rc7-183-g1410b74e4061]" at LKML [1].

Patch for testing from Peter Zijlstra see [2] and [3].

[1] http://marc.info/?t=146321784300002&r=1&w=2
[2] http://marc.info/?l=linux-kernel&m=146339587506110&w=2
[3] https://lkml.org/lkml/2016/5/10/214

Cc: Wanpeng Li <wanpeng.li@linux.intel.com>
Cc: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@gmail.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Ted Tso <tytso@mit.edu>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: the arch/x86 maintainers <x86@kernel.org>
Cc: linux-fsdevel@vger.kernel.org
---
 kernel/locking/lockdep.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0ee6dc1..5dc21eb101b0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2003,13 +2003,14 @@ static inline int get_first_held_lock(struct task_struct *curr,
 /*
  * Returns the next chain_key iteration
  */
-static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+static u64 print_chain_key_iteration(int class_idx, u64 chain_key, u64 prev_key)
 {
 	u64 new_chain_key = iterate_chain_key(chain_key, class_idx);
 
-	printk(" class_idx:%d -> chain_key:%016Lx",
+	printk(" class_idx: %d -> chain_key: %016Lx (%016Lx)",
 		class_idx,
-		(unsigned long long)new_chain_key);
+		(unsigned long long)new_chain_key,
+		(unsigned long long)prev_key);
 	return new_chain_key;
 }
 
@@ -2024,7 +2025,8 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
 	printk("depth: %u\n", depth + 1);
 	for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
 		hlock = curr->held_locks + i;
-		chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
+		chain_key = print_chain_key_iteration(hlock->class_idx, chain_key,
+						      hlock->prev_chain_key);
 
 		print_lock(hlock);
 	}
@@ -2042,7 +2044,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
 	printk("depth: %u\n", chain->depth);
 	for (i = 0; i < chain->depth; i++) {
 		class_id = chain_hlocks[chain->base + i];
-		chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+		chain_key = print_chain_key_iteration(class_id + 1, chain_key, 0);
 
 		print_lock_name(lock_classes + class_id);
 		printk("\n");
@@ -2051,7 +2053,8 @@ static void print_chain_keys_chain(struct lock_chain *chain)
 
 static void print_collision(struct task_struct *curr,
 			struct held_lock *hlock_next,
-			struct lock_chain *chain)
+			struct lock_chain *chain,
+			u64 chain_key)
 {
 	printk("\n");
 	printk("======================\n");
@@ -2060,11 +2063,14 @@ static void print_collision(struct task_struct *curr,
 	printk("----------------------\n");
 	printk("%s/%d: ", current->comm, task_pid_nr(current));
 	printk("Hash chain already cached but the contents don't match!\n");
+	printk("chain->chain_key: %016Lx chain_key: %016Lx\n",
+			(unsigned long long)chain->chain_key,
+			(unsigned long long)chain_key);
 
-	printk("Held locks:");
+	printk("Held locks: ");
 	print_chain_keys_held_locks(curr, hlock_next);
 
-	printk("Locks in cached chain:");
+	printk("Locks in cached chain: ");
 	print_chain_keys_chain(chain);
 
 	printk("\nstack backtrace:\n");
@@ -2080,7 +2086,8 @@ static void print_collision(struct task_struct *curr,
  */
 static int check_no_collision(struct task_struct *curr,
 			struct held_lock *hlock,
-			struct lock_chain *chain)
+			struct lock_chain *chain,
+			u64 chain_key)
 {
 #ifdef CONFIG_DEBUG_LOCKDEP
 	int i, j, id;
@@ -2088,7 +2095,7 @@ static int check_no_collision(struct task_struct *curr,
 	i = get_first_held_lock(curr, hlock);
 
 	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) {
-		print_collision(curr, hlock, chain);
+		print_collision(curr, hlock, chain, chain_key);
 		return 0;
 	}
 
@@ -2096,7 +2103,7 @@ static int check_no_collision(struct task_struct *curr,
 		id = curr->held_locks[i].class_idx - 1;
 
 		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
-			print_collision(curr, hlock, chain);
+			print_collision(curr, hlock, chain, chain_key);
 			return 0;
 		}
 	}
@@ -2134,7 +2141,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
 		if (chain->chain_key == chain_key) {
 cache_hit:
 			debug_atomic_inc(chain_lookup_hits);
-			if (!check_no_collision(curr, hlock, chain))
+			if (!check_no_collision(curr, hlock, chain, chain_key))
 				return 0;
 
 			if (very_verbose(class))
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [v4.6-rc7-183-g1410b74e4061]
  2016-05-16 18:11       ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
@ 2016-05-22  9:16         ` Sedat Dilek
  0 siblings, 0 replies; 6+ messages in thread
From: Sedat Dilek @ 2016-05-22  9:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, David Miller, Eric Dumazet, Paul E. McKenney,
	netdev, the arch/x86 maintainers, LKML, Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 3728 bytes --]

On 5/16/16, Sedat Dilek <sedat.dilek@gmail.com> wrote:
> On 5/16/16, Peter Zijlstra <peterz@infradead.org> wrote:
>> On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote:
>>
>>> Unfortunately, I could not reproduce this again with none of my
>>> 183-kernels.
>>> When I first hit a "chain_key collision" issue, it was hard to
>>> redproduce,
>>> so.
>>> Any idea, how I can "force" this?
>>
>> Nope; I wish I knew, that'd be so much easier to work with :/
>>
>> I'm hoping someone will report a reproducer, even something that
>> triggers once every 5-10 runs would be awesome.
>>
>> In any case, like I've explained before, nothing regressed as such, we
>> only added this new warning under DEBUG_LOCKDEP because we want to
>> better understand the condition that triggers it.
>>
>> If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel
>> is as reliable as it was before. OTOH, if you do keep it on, please
>> let me know if you can (semi) reliably trigger this, as I'd really like
>> to have a better understanding.
>>
>
> OK, I keep checking my logs.
>
> I refreshed your patch Ingo pointed me to.
>
> But it fails like this (on top of Linux v4.6 final)...
> [...]
>   if [ "" = "-pg" ]; then if [ kernel/locking/mutex-debug.o !=
> "scripts/mod/empty.o" ]; then ./scripts/recordmcount
> "kernel/locking/mutex-debug.o"; fi; fi;
>   mycompiler -Wp,-MD,kernel/locking/.lockdep.o.d  -nostdinc -isystem
> /usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem
> /usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include
> -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated
> -Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi
> -I./include/uapi -Iinclude/generated/uapi -include
> ./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef
> -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common
> -Werror-implicit-function-declaration -Wno-format-security -std=gnu89
> -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1
> -falign-loops=1 -mno-80387 -mno-fp-ret-in-387
> -mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone
> -mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args
> -DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1
> -DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1
> -DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe
> -Wno-sign-compare -fno-asynchronous-unwind-tables
> -fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0
> -Wframe-larger-than=1024 -fno-stack-protector
> -Wno-unused-but-set-variable -fno-omit-frame-pointer
> -fno-optimize-sibling-calls -fno-var-tracking-assignments -mfentry
> -DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign
> -fno-strict-overflow -fconserve-stack -Werror=implicit-int
> -Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO
> -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(lockdep)"
> -D"KBUILD_MODNAME=KBUILD_STR(lockdep)" -c -o
> kernel/locking/.tmp_lockdep.o kernel/locking/lockdep.c
> kernel/locking/lockdep.c: In function 'print_chain_keys_held_locks':
> kernel/locking/lockdep.c:2034:2: error: too few arguments to function
> 'print_chain_key_iteration'
>   print_chain_key_iteration(hlock_next->class_idx, chain_key);
>   ^
> kernel/locking/lockdep.c:2006:12: note: declared here
>  static u64 print_chain_key_iteration(int class_idx, u64 chain_key,
> u64 prev_key)
>             ^
> make[4]: *** [kernel/locking/lockdep.o] Error 1
> make[3]: *** [kernel/locking] Error 2
> make[2]: *** [kernel] Error 2
> [...]
>

Is the attached fix correct?

- Sedat -

P.S.: Attached is a refreshed version of your original proposal patch
which does not compile correctly.

[-- Attachment #2: lockdep-testing-peterz-20160516-fixes-4-6.diff --]
[-- Type: text/plain, Size: 485 bytes --]

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 5dc21eb101b0..b771a691b5e8 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2031,7 +2031,8 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
 		print_lock(hlock);
 	}
 
-	print_chain_key_iteration(hlock_next->class_idx, chain_key);
+	print_chain_key_iteration(hlock_next->class_idx, chain_key,
+				  hlock->prev_chain_key);
 	print_lock(hlock_next);
 }
 

[-- Attachment #3: 0001-locking-lockdep-Some-more-additional-chain_key-colli.patch --]
[-- Type: application/octet-stream, Size: 4975 bytes --]

From b953be255bfb46970c75950e297be836577bc525 Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Mon, 16 May 2016 15:51:04 +0200
Subject: [PATCH] locking/lockdep: Some more additional chain_key collision
 information

From: Peter Zijlstra <peterz@infradead.org>

For more details see thread "[v4.6-rc7-183-g1410b74e4061]" at LKML [1].

Patch for testing from Peter Zijlstra see [2] and [3].

[1] http://marc.info/?t=146321784300002&r=1&w=2
[2] http://marc.info/?l=linux-kernel&m=146339587506110&w=2
[3] https://lkml.org/lkml/2016/5/10/214

Cc: Wanpeng Li <wanpeng.li@linux.intel.com>
Cc: Alfredo Alvarez Fernandez <alfredoalvarezfernandez@gmail.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Ted Tso <tytso@mit.edu>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: the arch/x86 maintainers <x86@kernel.org>
Cc: linux-fsdevel@vger.kernel.org
---
 kernel/locking/lockdep.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0ee6dc1..5dc21eb101b0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2003,13 +2003,14 @@ static inline int get_first_held_lock(struct task_struct *curr,
 /*
  * Returns the next chain_key iteration
  */
-static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+static u64 print_chain_key_iteration(int class_idx, u64 chain_key, u64 prev_key)
 {
 	u64 new_chain_key = iterate_chain_key(chain_key, class_idx);
 
-	printk(" class_idx:%d -> chain_key:%016Lx",
+	printk(" class_idx: %d -> chain_key: %016Lx (%016Lx)",
 		class_idx,
-		(unsigned long long)new_chain_key);
+		(unsigned long long)new_chain_key,
+		(unsigned long long)prev_key);
 	return new_chain_key;
 }
 
@@ -2024,7 +2025,8 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
 	printk("depth: %u\n", depth + 1);
 	for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
 		hlock = curr->held_locks + i;
-		chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
+		chain_key = print_chain_key_iteration(hlock->class_idx, chain_key,
+						      hlock->prev_chain_key);
 
 		print_lock(hlock);
 	}
@@ -2042,7 +2044,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
 	printk("depth: %u\n", chain->depth);
 	for (i = 0; i < chain->depth; i++) {
 		class_id = chain_hlocks[chain->base + i];
-		chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+		chain_key = print_chain_key_iteration(class_id + 1, chain_key, 0);
 
 		print_lock_name(lock_classes + class_id);
 		printk("\n");
@@ -2051,7 +2053,8 @@ static void print_chain_keys_chain(struct lock_chain *chain)
 
 static void print_collision(struct task_struct *curr,
 			struct held_lock *hlock_next,
-			struct lock_chain *chain)
+			struct lock_chain *chain,
+			u64 chain_key)
 {
 	printk("\n");
 	printk("======================\n");
@@ -2060,11 +2063,14 @@ static void print_collision(struct task_struct *curr,
 	printk("----------------------\n");
 	printk("%s/%d: ", current->comm, task_pid_nr(current));
 	printk("Hash chain already cached but the contents don't match!\n");
+	printk("chain->chain_key: %016Lx chain_key: %016Lx\n",
+			(unsigned long long)chain->chain_key,
+			(unsigned long long)chain_key);
 
-	printk("Held locks:");
+	printk("Held locks: ");
 	print_chain_keys_held_locks(curr, hlock_next);
 
-	printk("Locks in cached chain:");
+	printk("Locks in cached chain: ");
 	print_chain_keys_chain(chain);
 
 	printk("\nstack backtrace:\n");
@@ -2080,7 +2086,8 @@ static void print_collision(struct task_struct *curr,
  */
 static int check_no_collision(struct task_struct *curr,
 			struct held_lock *hlock,
-			struct lock_chain *chain)
+			struct lock_chain *chain,
+			u64 chain_key)
 {
 #ifdef CONFIG_DEBUG_LOCKDEP
 	int i, j, id;
@@ -2088,7 +2095,7 @@ static int check_no_collision(struct task_struct *curr,
 	i = get_first_held_lock(curr, hlock);
 
 	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) {
-		print_collision(curr, hlock, chain);
+		print_collision(curr, hlock, chain, chain_key);
 		return 0;
 	}
 
@@ -2096,7 +2103,7 @@ static int check_no_collision(struct task_struct *curr,
 		id = curr->held_locks[i].class_idx - 1;
 
 		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
-			print_collision(curr, hlock, chain);
+			print_collision(curr, hlock, chain, chain_key);
 			return 0;
 		}
 	}
@@ -2134,7 +2141,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
 		if (chain->chain_key == chain_key) {
 cache_hit:
 			debug_atomic_inc(chain_lookup_hits);
-			if (!check_no_collision(curr, hlock, chain))
+			if (!check_no_collision(curr, hlock, chain, chain_key))
 				return 0;
 
 			if (very_verbose(class))
-- 
2.8.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-05-22  9:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CA+icZUXF9DkymNNGwP8zzmKY9PRAPGFZ+7p9En1xy80FcX_RRw@mail.gmail.com>
2016-05-14 16:46 ` [v4.6-rc7-183-g1410b74e4061] Eric Dumazet
2016-05-16 10:50 ` [v4.6-rc7-183-g1410b74e4061] Ingo Molnar
2016-05-16 17:42   ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
2016-05-16 17:57     ` [v4.6-rc7-183-g1410b74e4061] Peter Zijlstra
2016-05-16 18:11       ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek
2016-05-22  9:16         ` [v4.6-rc7-183-g1410b74e4061] Sedat Dilek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).