linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Lockdep warning in pciehp (v5.0-based kernel)
@ 2019-04-02  2:19 Theodore Ts'o
  2019-04-02  8:32 ` Lukas Wunner
  0 siblings, 1 reply; 2+ messages in thread
From: Theodore Ts'o @ 2019-04-02  2:19 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: linux-pci

[-- Attachment #1: Type: text/plain, Size: 8662 bytes --]

Hi, I got the following lockdep warning while booting a Dell XPS13.
Is this a known issue?

[   13.836367] pciehp 0000:3b:04.0:pcie204: Slot #4 AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+ LLActRep+

[   13.837149] ============================================
[   13.837149] WARNING: possible recursive locking detected
[   13.837150] 5.0.0-00034-gcaae0ec7396f #54 Not tainted
[   13.837151] --------------------------------------------
[   13.837152] irq/125-pciehp/165 is trying to acquire lock:
[   13.837153] 000000000f8f89cc (&ctrl->reset_lock){.+.+}, at: pciehp_check_presence+0x1b/0x72
[   13.837159] 
               but task is already holding lock:
[   13.837160] 000000001b052053 (&ctrl->reset_lock){.+.+}, at: pciehp_ist+0x10a/0x164
[   13.837162] 
               other info that might help us debug this:
[   13.837162]  Possible unsafe locking scenario:

[   13.837163]        CPU0
[   13.837163]        ----
[   13.837164]   lock(&ctrl->reset_lock);
[   13.837164]   lock(&ctrl->reset_lock);
[   13.837165] 
                *** DEADLOCK ***

[   13.837165]  May be due to missing lock nesting notation

[   13.837166] 4 locks held by irq/125-pciehp/165:
[   13.837167]  #0: 000000001b052053 (&ctrl->reset_lock){.+.+}, at: pciehp_ist+0x10a/0x164
[   13.837169]  #1: 0000000087e07843 (pci_rescan_remove_lock){+.+.}, at: pciehp_configure_device+0x1e/0xfd
[   13.837171]  #2: 00000000e9b570d4 (&dev->mutex){....}, at: __device_attach+0x28/0x12d
[   13.837174]  #3: 000000004add66ea (&dev->mutex){....}, at: __device_attach+0x28/0x12d
[   13.837176] 
               stack backtrace:
[   13.837178] CPU: 7 PID: 165 Comm: irq/125-pciehp Not tainted 5.0.0-00034-gcaae0ec7396f #54
[   13.837178] Hardware name: Dell Inc. XPS 13 9380/0KTW76, BIOS 1.2.1 02/14/2019
[   13.837179] Call Trace:
[   13.837183]  dump_stack+0x67/0x8e
[   13.837186]  __lock_acquire+0x9b2/0xddc
[   13.837189]  ? pci_hp_add+0x18a/0x1ee
[   13.837190]  ? pci_hp_add+0x18a/0x1ee
[   13.837192]  ? find_held_lock+0x2b/0x6e
[   13.837194]  lock_acquire+0x147/0x172
[   13.837196]  ? pciehp_check_presence+0x1b/0x72
[   13.837199]  down_read+0x44/0x87
[   13.837201]  ? pciehp_check_presence+0x1b/0x72
[   13.837203]  pciehp_check_presence+0x1b/0x72
[   13.837205]  pciehp_probe+0x229/0x24d
[   13.837207]  pcie_port_probe_service+0x38/0x4b
[   13.837209]  really_probe+0x1a5/0x372
[   13.837210]  ? driver_allows_async_probing+0x2c/0x2c
[   13.837211]  driver_probe_device+0xcf/0xff
[   13.837213]  ? driver_allows_async_probing+0x2c/0x2c
[   13.837215]  bus_for_each_drv+0x84/0xa8
[   13.837217]  __device_attach+0x9d/0x12d
[   13.837219]  bus_probe_device+0x31/0x9e
[   13.837221]  device_add+0x1c1/0x5bd
[   13.837223]  ? __init_waitqueue_head+0x36/0x47
[   13.837225]  pcie_port_device_register+0x3b9/0x41e
[   13.837228]  ? irq_thread_check_affinity+0x7b/0x7b
[   13.837230]  pcie_portdrv_probe+0x37/0xa4
[   13.837232]  pci_device_probe+0xbe/0x130
[   13.837234]  really_probe+0x1a5/0x372
[   13.837236]  ? driver_allows_async_probing+0x2c/0x2c
[   13.837237]  ? irq_thread+0x82/0x193
[   13.837238]  driver_probe_device+0xcf/0xff
[   13.837239]  ? driver_allows_async_probing+0x2c/0x2c
[   13.837241]  bus_for_each_drv+0x84/0xa8
[   13.837242]  __device_attach+0x9d/0x12d
[   13.837245]  pci_bus_add_device+0x4a/0x83
[   13.837247]  pci_bus_add_devices+0x2c/0x5d
[   13.837248]  pci_bus_add_devices+0x53/0x5d
[   13.837250]  pciehp_configure_device+0xef/0xfd
[   13.837253]  pciehp_handle_presence_or_link_change+0x28d/0x37b
[   13.837254]  pciehhp_ist+0x12c/0x164
[   13.837256]  ? irq_finalize_oneshot+0x9e/0x9e
[   13.837257]  irq_thread_fn+0x1e/0x41
[   13.837259]  irq_thread+0x138/0x193
[   13.837261]  ? wake_threads_waitq+0x27/0x27
[   13.837263]  ? irq_thread_check_affinity+0x7b/0x7b
[   13.837265]  kthread+0xf7/0xfc
[   13.837266]  ? kthread_cancel_delayed_work_sync+0xf/0xf
[   13.837269]  ret_from_fork+0x3a/0x50
[   13.837321] ------------[ cut here ]------------

It's followed by another warning which might be an unrelated issue;
least, the stack trace looks unrelated.

I've attached a compressed copy of the full dmesg.out in case it's
helpful.

Thanks!!

						- Ted

[   13.837321] downgrading a read lock
[   13.837327] WARNING: CPU: 2 PID: 741 at kernel/locking/lockdep.c:3553 lock_downgrade+0xe7/0x17f
[   13.837328] Modules linked in: nls_iso8859_1 nls_cp437 vfat fat snd_hda_codec_realtek snd_hda_codec_generic dell_laptop ledtrig_audio dell_wmi dell_smbios ath10k_pci joydev hid_multitouch(+) dcdbas intel_wmi_thunderbolt dell_wmi_descriptor intel_rapl x86_pkg_temp_thermal ath10k_core intel_powerclamp coretemp ath serio_raw kvm_intel snd_soc_skl btusb mac80211 snd_soc_core btrtl snd_soc_acpi_intel_match kvm btbcm snd_soc_acpi snd_soc_skl_ipc btintel snd_soc_sst_ipc irqbypass snd_soc_sst_dsp bluetooth snd_hda_ext_core uvcvideo cfg80211 videobuf2_vmalloc ecdh_generic videobuf2_memops snd_hda_intel i915 videobuf2_v4l2 videobuf2_common pcspkr snd_hda_codec snd_hwdep drm_kms_helper snd_hda_core idma64 drm mei_me processor_thermal_device intel_gtt intel_soc_dts_iosf fb_sys_fops mei syscopyarea sysfillrect intel_pch_thermal sysimgblt ucsi_acpi typec_ucsi int3403_thermal intel_hid int3400_thermal int340x_thermal_zone acpi_thermal_rel sparse_keymap acpi_pad pcc_cpufreq nfsd auth_rpcgss nfs_acl lockd
[   13.837356]  grace parport_pc ppdev lp sunrpc parport ip_tables x_tables autofs4 btrfs zstd_decompress zstd_compress dm_thin_pool dm_persistent_data dm_bio_prison algif_skcipher af_alg raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear hid_generic i2c_designware_platform i2c_designware_core crct10dif_pclmul crc32_pclmul rtsx_pci_sdmmc ghash_clmulni_intel xhci_pci i2c_i801 thunderbolt intel_lpss_pci rtsx_pci intel_lpss xhci_hcd mfd_core i2c_hid hid
[   13.837374] CPU: 2 PID: 741 Comm: run-parts Not tainted 5.0.0-00034-gcaae0ec7396f #54
[   13.837374] Hardware name: Dell Inc. XPS 13 9380/0KTW76, BIOS 1.2.1 02/14/2019
[   13.837375] RIP: 0010:lock_downgrade+0xe7/0x17f
[   13.837376] Code: 00 00 00 8b 44 24 04 89 83 80 08 00 00 48 8b 45 00 48 89 83 78 08 00 00 f6 45 32 03 74 0e 48 c7 c7 eb 6e e7 ab e8 34 81 fb ff <0f> 0b 8a 45 32 4c 89 6d 08 44 89 e6 48 89 df 83 e0 fc 83 c8 01 88
[   13.837377] RSP: 0018:ffff9dcbcf027e28 EFLAGS: 00010082
[   13.837378] RAX: 0000000000000000 RBX: ffff9dcbcef3a9c0 RCX: 0000000000000000
[   13.837379] RDX: ffff9dcbcef3a9c0 RSI: 0000000000000001 RDI: ffffffffab10ec18
[   13.837379] RBP: ffff9dcbcef3b248 R08: 0000000000000017 R09: 00000000001e1e80
[   13.837380] R10: ffff9dcbcef3a9c0 R11: 0000000a2851b6dd R12: 0000000000000001
[   13.837380] R13: ffffffffab205904 R14: 0000000000000246 R15: ffff9dcbcef49b28
[   13.837381] FS:  00007f6b961d7580(0000) GS:ffff9dcbdda00000(0000) knlGS:0000000000000000
[   13.837382] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   13.837383] CR2: 000055a3940d7000 CR3: 0000000494962005 CR4: 00000000003606e0
[   13.837383] Call Trace:
[   13.837387]  downgrade_write+0x17/0x80
[   13.837392]  __do_munmap+0x26f/0x2fe
[   13.837394]  __vm_munmap+0x74/0xbf
[   13.837396]  __x64_sys_munmap+0x27/0x2c
[   13.837398]  do_syscall_64+0x5e/0x6c
[   13.837400]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[   13.837401] RIP: 0033:0x7f6b96214717
[   13.837402] Code: ff ff ff f7 d8 89 05 48 ea 00 00 48 c7 c0 ff ff ff ff eb 8a 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8d 0d 19 ea 00 00 f7 d8 89 01 48 83
[   13.837403] RSP: 002b:00007fffbd93f668 EFLAGS: 00000202 ORIG_RAX: 000000000000000b
[   13.837403] RAX: ffffffffffffffda RBX: 0000000a2851756f RCX: 00007f6b96214717
[   13.837406] RDX: 0000000a2851a714 RSI: 0000000000021557 RDI: 00007f6b961d8000
[   13.837406] RBP: 00007fffbd93f860 R08: 00007f6b9601a1f0 R09: 0000000000000007
[   13.837407] R10: 00007f6b962229f0 R11: 0000000000000202 R12: 0000000000000000
[   13.837407] R13: 00007f6b96223190 R14: 00007f6b961d7580 R15: 00007f6b96223190
[   13.837409] irq event stamp: 719
[   13.837413] hardirqs last  enabled at (719): [<ffffffffab0486c9>] do_user_addr_fault+0xf3/0x396
[   13.837414] hardirqs last disabled at (718): [<ffffffffab0019d6>] trace_hardirqs_off_thunk+0x1a/0x1c
[   13.837416] softirqs last  enabled at (0): [<ffffffffab0b4ead>] copy_process.part.43+0x452/0x1af8
[   13.837421] softirqs last disabled at (0): [<0000000000000000>]           (null)
[   13.837422] ---[ end trace 0c20cc7bb894cd38 ]---

[-- Attachment #2: dmesg.out.gz --]
[-- Type: application/gzip, Size: 25106 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Lockdep warning in pciehp (v5.0-based kernel)
  2019-04-02  2:19 Lockdep warning in pciehp (v5.0-based kernel) Theodore Ts'o
@ 2019-04-02  8:32 ` Lukas Wunner
  0 siblings, 0 replies; 2+ messages in thread
From: Lukas Wunner @ 2019-04-02  8:32 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: Bjorn Helgaas, linux-pci

On Mon, Apr 01, 2019 at 10:19:33PM -0400, Theodore Ts'o wrote:
> Hi, I got the following lockdep warning while booting a Dell XPS13.
> Is this a known issue?

No, it's not a known issue.  I just double-checked the code.
It looks fine to me, so this appears to be a false positive.

Your machine has a JHL6540 Thunderbolt controller built-in (Alpine Ridge,
C-step) and you've attached a device with a DSL6540 Thunderbolt controller
(Alpine Ridge).  Whenever you chain Thunderbolt devices together you get
cascaded PCIe hotplug ports.  Each pciehp controller struct has a
reset_lock.

The lockdep splat happens when the upstream pciehp controller's IRQ thread
handles a hotplug event (upon discovering the attached device on boot).
It's holding the upstream controller's reset_lock while handling the event
to prevent the user from concurrently initiating a reset via sysfs.

Upon enumerating the downstream pciehp controller, its probe routine
acquire's that second controller's reset_lock for the same reason.

So these are different locks and in addition they're of type rw_semaphore
and both the IRQ thread and the probe routine only acquire the locks for
reading, hence wouldn't block each other even if it was the same lock.

Not really being familiar with the intricacies of lockdep, I'm wondering
if it's just not smart enough to recognize that these are different locks?
And why is it not recognizing that both functions only acquire the locks
for reading?  How can the code be annotated to avoid the false positive?

The only other explanation I can think of is that 5.0.0-00034-gcaae0ec7396f
is not a plain-vanilla 5.0 kernel and contains Google-specific additions
to the pciehp code which might cause the lockdep splat.

HTH,

Lukas

> [   13.836367] pciehp 0000:3b:04.0:pcie204: Slot #4 AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+ LLActRep+
> 
> [   13.837149] ============================================
> [   13.837149] WARNING: possible recursive locking detected
> [   13.837150] 5.0.0-00034-gcaae0ec7396f #54 Not tainted
> [   13.837151] --------------------------------------------
> [   13.837152] irq/125-pciehp/165 is trying to acquire lock:
> [   13.837153] 000000000f8f89cc (&ctrl->reset_lock){.+.+}, at: pciehp_check_presence+0x1b/0x72
> [   13.837159] 
>                but task is already holding lock:
> [   13.837160] 000000001b052053 (&ctrl->reset_lock){.+.+}, at: pciehp_ist+0x10a/0x164
> [   13.837162] 
>                other info that might help us debug this:
> [   13.837162]  Possible unsafe locking scenario:
> 
> [   13.837163]        CPU0
> [   13.837163]        ----
> [   13.837164]   lock(&ctrl->reset_lock);
> [   13.837164]   lock(&ctrl->reset_lock);
> [   13.837165] 
>                 *** DEADLOCK ***
> 
> [   13.837165]  May be due to missing lock nesting notation
> 
> [   13.837166] 4 locks held by irq/125-pciehp/165:
> [   13.837167]  #0: 000000001b052053 (&ctrl->reset_lock){.+.+}, at: pciehp_ist+0x10a/0x164
> [   13.837169]  #1: 0000000087e07843 (pci_rescan_remove_lock){+.+.}, at: pciehp_configure_device+0x1e/0xfd
> [   13.837171]  #2: 00000000e9b570d4 (&dev->mutex){....}, at: __device_attach+0x28/0x12d
> [   13.837174]  #3: 000000004add66ea (&dev->mutex){....}, at: __device_attach+0x28/0x12d
> [   13.837176] 
>                stack backtrace:
> [   13.837178] CPU: 7 PID: 165 Comm: irq/125-pciehp Not tainted 5.0.0-00034-gcaae0ec7396f #54
> [   13.837178] Hardware name: Dell Inc. XPS 13 9380/0KTW76, BIOS 1.2.1 02/14/2019
> [   13.837179] Call Trace:
> [   13.837183]  dump_stack+0x67/0x8e
> [   13.837186]  __lock_acquire+0x9b2/0xddc
> [   13.837189]  ? pci_hp_add+0x18a/0x1ee
> [   13.837190]  ? pci_hp_add+0x18a/0x1ee
> [   13.837192]  ? find_held_lock+0x2b/0x6e
> [   13.837194]  lock_acquire+0x147/0x172
> [   13.837196]  ? pciehp_check_presence+0x1b/0x72
> [   13.837199]  down_read+0x44/0x87
> [   13.837201]  ? pciehp_check_presence+0x1b/0x72
> [   13.837203]  pciehp_check_presence+0x1b/0x72
> [   13.837205]  pciehp_probe+0x229/0x24d
> [   13.837207]  pcie_port_probe_service+0x38/0x4b
> [   13.837209]  really_probe+0x1a5/0x372
> [   13.837210]  ? driver_allows_async_probing+0x2c/0x2c
> [   13.837211]  driver_probe_device+0xcf/0xff
> [   13.837213]  ? driver_allows_async_probing+0x2c/0x2c
> [   13.837215]  bus_for_each_drv+0x84/0xa8
> [   13.837217]  __device_attach+0x9d/0x12d
> [   13.837219]  bus_probe_device+0x31/0x9e
> [   13.837221]  device_add+0x1c1/0x5bd
> [   13.837223]  ? __init_waitqueue_head+0x36/0x47
> [   13.837225]  pcie_port_device_register+0x3b9/0x41e
> [   13.837228]  ? irq_thread_check_affinity+0x7b/0x7b
> [   13.837230]  pcie_portdrv_probe+0x37/0xa4
> [   13.837232]  pci_device_probe+0xbe/0x130
> [   13.837234]  really_probe+0x1a5/0x372
> [   13.837236]  ? driver_allows_async_probing+0x2c/0x2c
> [   13.837237]  ? irq_thread+0x82/0x193
> [   13.837238]  driver_probe_device+0xcf/0xff
> [   13.837239]  ? driver_allows_async_probing+0x2c/0x2c
> [   13.837241]  bus_for_each_drv+0x84/0xa8
> [   13.837242]  __device_attach+0x9d/0x12d
> [   13.837245]  pci_bus_add_device+0x4a/0x83
> [   13.837247]  pci_bus_add_devices+0x2c/0x5d
> [   13.837248]  pci_bus_add_devices+0x53/0x5d
> [   13.837250]  pciehp_configure_device+0xef/0xfd
> [   13.837253]  pciehp_handle_presence_or_link_change+0x28d/0x37b
> [   13.837254]  pciehhp_ist+0x12c/0x164
> [   13.837256]  ? irq_finalize_oneshot+0x9e/0x9e
> [   13.837257]  irq_thread_fn+0x1e/0x41
> [   13.837259]  irq_thread+0x138/0x193
> [   13.837261]  ? wake_threads_waitq+0x27/0x27
> [   13.837263]  ? irq_thread_check_affinity+0x7b/0x7b
> [   13.837265]  kthread+0xf7/0xfc
> [   13.837266]  ? kthread_cancel_delayed_work_sync+0xf/0xf
> [   13.837269]  ret_from_fork+0x3a/0x50
> [   13.837321] ------------[ cut here ]------------

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-04-02  8:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-02  2:19 Lockdep warning in pciehp (v5.0-based kernel) Theodore Ts'o
2019-04-02  8:32 ` Lukas Wunner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).