On Tue, Sep 03, 2019 at 08:15:53PM +1000, Oliver O'Halloran wrote: > When hot-adding devices we rely on the hotplug driver to create pci_dn's > for the devices under the hotplug slot. Converse, when hot-removing the > driver will remove the pci_dn's that it created. This is a problem because > the pci_dev is still live until it's refcount drops to zero. This can > happen if the driver is slow to tear down it's internal state. Ideally, the > driver would not attempt to perform any config accesses to the device once > it's been marked as removed, but sometimes it happens. As a result, we > might attempt to access the pci_dn for a device that has been torn down and > the kernel may crash as a result. > > To fix this, don't free the pci_dn unless the corresponding pci_dev has > been released. If the pci_dev is still live, then we mark the pci_dn with > a flag that indicates the pci_dev's release function should free it. > > Signed-off-by: Oliver O'Halloran If I understand this, it works because when pci_remove_device_node_info() calls pci_get_domain_bus_and_slot(), it either: a) returns null, meaning the pci_dev is already gone, the release handler is already called, and the PDN can be removed there, or b) returns non-null and atomically increases the refcount and the release handler won't be called until after we've set the DEAD flag and released our reference. Looks good to me, Reviewed-by: Sam Bobroff > --- > arch/powerpc/include/asm/pci-bridge.h | 1 + > arch/powerpc/kernel/pci-hotplug.c | 7 +++++++ > arch/powerpc/kernel/pci_dn.c | 21 +++++++++++++++++++-- > 3 files changed, 27 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h > index 54a9de01c2a3..69f4cb3b7c56 100644 > --- a/arch/powerpc/include/asm/pci-bridge.h > +++ b/arch/powerpc/include/asm/pci-bridge.h > @@ -183,6 +183,7 @@ struct iommu_table; > struct pci_dn { > int flags; > #define PCI_DN_FLAG_IOV_VF 0x01 > +#define PCI_DN_FLAG_DEAD 0x02 /* Device has been hot-removed */ > > int busno; /* pci bus number */ > int devfn; /* pci device and function number */ > diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c > index 0b0cf8168b47..fc62c4bc47b1 100644 > --- a/arch/powerpc/kernel/pci-hotplug.c > +++ b/arch/powerpc/kernel/pci-hotplug.c > @@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node); > void pcibios_release_device(struct pci_dev *dev) > { > struct pci_controller *phb = pci_bus_to_host(dev->bus); > + struct pci_dn *pdn = pci_get_pdn(dev); > > eeh_remove_device(dev); > > if (phb->controller_ops.release_device) > phb->controller_ops.release_device(dev); > + > + /* free()ing the pci_dn has been deferred to us, do it now */ > + if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) { > + pci_dbg(dev, "freeing dead pdn\n"); > + kfree(pdn); > + } > } > > /** > diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c > index 5614ca0940ca..4e654df55969 100644 > --- a/arch/powerpc/kernel/pci_dn.c > +++ b/arch/powerpc/kernel/pci_dn.c > @@ -320,6 +320,7 @@ void pci_remove_device_node_info(struct device_node *dn) > { > struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL; > struct device_node *parent; > + struct pci_dev *pdev; > #ifdef CONFIG_EEH > struct eeh_dev *edev = pdn_to_eeh_dev(pdn); > > @@ -333,12 +334,28 @@ void pci_remove_device_node_info(struct device_node *dn) > WARN_ON(!list_empty(&pdn->child_list)); > list_del(&pdn->list); > > + /* Drop the parent pci_dn's ref to our backing dt node */ > parent = of_get_parent(dn); > if (parent) > of_node_put(parent); > > - dn->data = NULL; > - kfree(pdn); > + /* > + * At this point we *might* still have a pci_dev that was > + * instantiated from this pci_dn. So defer free()ing it until > + * the pci_dev's release function is called. > + */ > + pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number, > + pdn->busno, pdn->devfn); > + if (pdev) { > + /* NB: pdev has a ref to dn */ > + pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn); > + pdn->flags |= PCI_DN_FLAG_DEAD; > + } else { > + dn->data = NULL; > + kfree(pdn); > + } > + > + pci_dev_put(pdev); > } > EXPORT_SYMBOL_GPL(pci_remove_device_node_info); > > -- > 2.21.0 >