On 1/11/21 5:37 PM, Keith Busch wrote: > On Mon, Jan 11, 2021 at 02:39:20PM +0100, Hinko Kocevar wrote: >> Testing this patch a bit more (without the 5/5) resulted in the same CPU >> lockup: >> >> watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [irq/122-aerdrv:128] >> >> as I initially reported with the 5/5 of this patch included. >> >> It seems more infrequent, though. For example, after reboot this is not >> observed and the recovery process is successful, whereas when 5/5 is also >> used every recovery resulted in CPU lockup. > > I am assuming this soft lockup is still when restoring the downstream > port's virtual channel capability. Your initial sighting indicates that > it doesn't appear to be a deadlock, but the stack trace never existed > pci_restore_vc_state() either. I did not find any obvious issues here > just from code inspection, so if you could try applying the following > patch and send the kernel messages output, that would help. > > --- > diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c > index 5fc59ac31145..4834af7eb582 100644 > --- a/drivers/pci/vc.c > +++ b/drivers/pci/vc.c > @@ -28,6 +28,7 @@ static void pci_vc_save_restore_dwords(struct pci_dev *dev, int pos, > { > int i; > > + pci_warn(dev, "%s: pos:%d dwords:%d\n", __func__, pos, dwords); > for (i = 0; i < dwords; i++, buf++) { > if (save) > pci_read_config_dword(dev, pos + (i * 4), buf); > @@ -110,6 +111,8 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, int res) > if (!pci_is_pcie(dev) || !pcie_downstream_port(dev)) > return; > > + pci_warn(dev, "%s: pos:%d res:%d\n", __func__, pos, res); > + > ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF); > status_pos = pos + PCI_VC_RES_STATUS + (res * PCI_CAP_VC_PER_VC_SIZEOF); > > @@ -165,6 +168,8 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, int res) > if (link && !pci_wait_for_pending(link, status_pos2, > PCI_VC_RES_STATUS_NEGO)) > pci_err(link, "VC%d negotiation stuck pending\n", id); > + > + pci_warn(dev, "%s: pos:%d res:%d return\n", __func__, pos, res); > } > > /** > @@ -190,6 +195,7 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos, > int i, len = 0; > u8 *buf = save_state ? (u8 *)save_state->cap.data : NULL; > > + pci_warn(dev, "%s: buf:%d pos:%d\n", __func__, buf != NULL, pos); > /* Sanity check buffer size for save/restore */ > if (buf && save_state->cap.size != > pci_vc_do_save_buffer(dev, pos, NULL, save)) { > @@ -278,6 +284,8 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos, > pci_read_config_dword(dev, pos + PCI_VC_RES_CAP + > (i * PCI_CAP_VC_PER_VC_SIZEOF), &cap); > parb_offset = ((cap & PCI_VC_RES_CAP_ARB_OFF) >> 24) * 16; > + pci_warn(dev, "%s: i:%d evcc:%d parb_offset:%d\n", __func__, i, > + evcc, parb_offset); > if (parb_offset) { > int size, parb_phases = 0; > > @@ -332,6 +340,7 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos, > len += 4; > } > > + pci_warn(dev, "%s: len:%d\n", __func__, len); > return buf ? 0 : len; > } > > @@ -399,6 +408,7 @@ void pci_restore_vc_state(struct pci_dev *dev) > if (!save_state || !pos) > continue; > > + pci_warn(dev, "%s: i:%d pos:%d\n", __func__, i, pos); > pci_vc_do_save_buffer(dev, pos, save_state, false); > } > } > -- > Attached are the messages. Thanks!