[tentative] PCI / ACPI: Rework PCI host bridge removal to avoid sysfs warnings
diff mbox series

Message ID 37552283.kG1L4S8Daa@vostro.rjw.lan
State New, archived
Headers show
Series
  • [tentative] PCI / ACPI: Rework PCI host bridge removal to avoid sysfs warnings
Related show

Commit Message

Rafael J. Wysocki Dec. 28, 2013, 11:20 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The device_del(&host_bridge->dev) in pci_stop_root_bus() is
problematic, because it causes all sysfs directories below
the host bridge to be removed recursively and when
pci_remove_root_bus() attempts to remove devices on the root
bus (whose sysfs directories are gone now along with all their
subdirectories), it causes warnings similar to this one to be
printed:

WARNING: CPU: 0 PID: 6 at fs/sysfs/group.c:214 sysfs_remove_group+0xc6/0xd0()
sysfs group ffffffff819ac5c0 not found for kobject '0001:ff:10.2'
Modules linked in: <irrelevant list>
CPU: 0 PID: 6 Comm: kworker/u512:0 Tainted: G        W    3.13.0-rc5+ #11
Hardware name:
Workqueue: kacpi_hotplug acpi_hotplug_work_fn
 0000000000000009 ffff8808738d3bd8 ffffffff815d84ea ffff8808738d3c20
 ffff8808738d3c10 ffffffff8106594d 0000000000000000 ffffffff819ac5c0
 ffff880871b9d0a8 ffff8a07d1895000 0000000000000103 ffff8808738d3c70
Call Trace:
 [<ffffffff815d84ea>] dump_stack+0x45/0x56
 [<ffffffff8106594d>] warn_slowpath_common+0x7d/0xa0
 [<ffffffff810659bc>] warn_slowpath_fmt+0x4c/0x50
 [<ffffffff8122b52e>] ? sysfs_get_dirent_ns+0x4e/0x70
 [<ffffffff8122c806>] sysfs_remove_group+0xc6/0xd0
 [<ffffffff813b83f3>] dpm_sysfs_remove+0x43/0x50
 [<ffffffff813ae105>] device_del+0x45/0x1c0
 [<ffffffff812e51f6>] pci_remove_bus_device+0x66/0xd0
 [<ffffffff812e5363>] pci_remove_root_bus+0x73/0x80
 [<ffffffff813276ab>] acpi_pci_root_remove+0x42/0x4f
 [<ffffffff81323070>] acpi_bus_trim+0x56/0x89
 [<ffffffff81323052>] acpi_bus_trim+0x38/0x89
 [<ffffffff813245df>] acpi_device_hotplug+0x137/0x33b
 [<ffffffff8131efba>] acpi_hotplug_work_fn+0x1c/0x27
 [<ffffffff81080f1b>] process_one_work+0x17b/0x460
 [<ffffffff81081ccb>] worker_thread+0x11b/0x400
 [<ffffffff81081bb0>] ? rescuer_thread+0x3e0/0x3e0
 [<ffffffff81088a12>] kthread+0xd2/0xf0
 [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
 [<ffffffff815e823c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180

To avoid that, the host bridge device has to be deleted after all of
its children, so merge pci_stop_root_bus() and pci_remove_root_bus()
into one function, pci_stop_and_remove_root_bus(), that first will
use pci_stop_and_remove_bus_device() to stop and remove all devices
on the root bus and then will delete the host bridge device, remove
its bus and drop the final reference to it.

Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

Hi,

I can't really test this patch, but I don't know how it can break anything.

The only user of pci_stop_root_bus() and pci_remove_root_bus() is
acpi_pci_root_remove() and the code ordering there seems to be somewhat
arbitrary.  If you are aware of any reason why it may not work, please let
me know. :-)

Thanks,
Rafael

---
 drivers/acpi/pci_root.c |    4 +---
 drivers/pci/remove.c    |   23 ++++-------------------
 include/linux/pci.h     |    3 +--
 3 files changed, 6 insertions(+), 24 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Comments

Greg KH Dec. 29, 2013, 3:59 a.m. UTC | #1
On Sun, Dec 29, 2013 at 12:20:22AM +0100, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> The device_del(&host_bridge->dev) in pci_stop_root_bus() is
> problematic, because it causes all sysfs directories below
> the host bridge to be removed recursively and when
> pci_remove_root_bus() attempts to remove devices on the root
> bus (whose sysfs directories are gone now along with all their
> subdirectories), it causes warnings similar to this one to be
> printed:
> 
> WARNING: CPU: 0 PID: 6 at fs/sysfs/group.c:214 sysfs_remove_group+0xc6/0xd0()
> sysfs group ffffffff819ac5c0 not found for kobject '0001:ff:10.2'
> Modules linked in: <irrelevant list>
> CPU: 0 PID: 6 Comm: kworker/u512:0 Tainted: G        W    3.13.0-rc5+ #11
> Hardware name:
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
>  0000000000000009 ffff8808738d3bd8 ffffffff815d84ea ffff8808738d3c20
>  ffff8808738d3c10 ffffffff8106594d 0000000000000000 ffffffff819ac5c0
>  ffff880871b9d0a8 ffff8a07d1895000 0000000000000103 ffff8808738d3c70
> Call Trace:
>  [<ffffffff815d84ea>] dump_stack+0x45/0x56
>  [<ffffffff8106594d>] warn_slowpath_common+0x7d/0xa0
>  [<ffffffff810659bc>] warn_slowpath_fmt+0x4c/0x50
>  [<ffffffff8122b52e>] ? sysfs_get_dirent_ns+0x4e/0x70
>  [<ffffffff8122c806>] sysfs_remove_group+0xc6/0xd0
>  [<ffffffff813b83f3>] dpm_sysfs_remove+0x43/0x50
>  [<ffffffff813ae105>] device_del+0x45/0x1c0
>  [<ffffffff812e51f6>] pci_remove_bus_device+0x66/0xd0
>  [<ffffffff812e5363>] pci_remove_root_bus+0x73/0x80
>  [<ffffffff813276ab>] acpi_pci_root_remove+0x42/0x4f
>  [<ffffffff81323070>] acpi_bus_trim+0x56/0x89
>  [<ffffffff81323052>] acpi_bus_trim+0x38/0x89
>  [<ffffffff813245df>] acpi_device_hotplug+0x137/0x33b
>  [<ffffffff8131efba>] acpi_hotplug_work_fn+0x1c/0x27
>  [<ffffffff81080f1b>] process_one_work+0x17b/0x460
>  [<ffffffff81081ccb>] worker_thread+0x11b/0x400
>  [<ffffffff81081bb0>] ? rescuer_thread+0x3e0/0x3e0
>  [<ffffffff81088a12>] kthread+0xd2/0xf0
>  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
>  [<ffffffff815e823c>] ret_from_fork+0x7c/0xb0
>  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
> 
> To avoid that, the host bridge device has to be deleted after all of
> its children, so merge pci_stop_root_bus() and pci_remove_root_bus()
> into one function, pci_stop_and_remove_root_bus(), that first will
> use pci_stop_and_remove_bus_device() to stop and remove all devices
> on the root bus and then will delete the host bridge device, remove
> its bus and drop the final reference to it.
> 
> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
> 
> Hi,
> 
> I can't really test this patch, but I don't know how it can break anything.
> 
> The only user of pci_stop_root_bus() and pci_remove_root_bus() is
> acpi_pci_root_remove() and the code ordering there seems to be somewhat
> arbitrary.  If you are aware of any reason why it may not work, please let
> me know. :-)

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Yinghai Lu Dec. 30, 2013, 3:30 a.m. UTC | #2
On Sat, Dec 28, 2013 at 3:20 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> The device_del(&host_bridge->dev) in pci_stop_root_bus() is
> problematic, because it causes all sysfs directories below
> the host bridge to be removed recursively and when
> pci_remove_root_bus() attempts to remove devices on the root
> bus (whose sysfs directories are gone now along with all their
> subdirectories), it causes warnings similar to this one to be
> printed:
>
> WARNING: CPU: 0 PID: 6 at fs/sysfs/group.c:214 sysfs_remove_group+0xc6/0xd0()
> sysfs group ffffffff819ac5c0 not found for kobject '0001:ff:10.2'
> Modules linked in: <irrelevant list>
> CPU: 0 PID: 6 Comm: kworker/u512:0 Tainted: G        W    3.13.0-rc5+ #11
> Hardware name:
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
>  0000000000000009 ffff8808738d3bd8 ffffffff815d84ea ffff8808738d3c20
>  ffff8808738d3c10 ffffffff8106594d 0000000000000000 ffffffff819ac5c0
>  ffff880871b9d0a8 ffff8a07d1895000 0000000000000103 ffff8808738d3c70
> Call Trace:
>  [<ffffffff815d84ea>] dump_stack+0x45/0x56
>  [<ffffffff8106594d>] warn_slowpath_common+0x7d/0xa0
>  [<ffffffff810659bc>] warn_slowpath_fmt+0x4c/0x50
>  [<ffffffff8122b52e>] ? sysfs_get_dirent_ns+0x4e/0x70
>  [<ffffffff8122c806>] sysfs_remove_group+0xc6/0xd0
>  [<ffffffff813b83f3>] dpm_sysfs_remove+0x43/0x50
>  [<ffffffff813ae105>] device_del+0x45/0x1c0
>  [<ffffffff812e51f6>] pci_remove_bus_device+0x66/0xd0
>  [<ffffffff812e5363>] pci_remove_root_bus+0x73/0x80
>  [<ffffffff813276ab>] acpi_pci_root_remove+0x42/0x4f
>  [<ffffffff81323070>] acpi_bus_trim+0x56/0x89
>  [<ffffffff81323052>] acpi_bus_trim+0x38/0x89
>  [<ffffffff813245df>] acpi_device_hotplug+0x137/0x33b
>  [<ffffffff8131efba>] acpi_hotplug_work_fn+0x1c/0x27
>  [<ffffffff81080f1b>] process_one_work+0x17b/0x460
>  [<ffffffff81081ccb>] worker_thread+0x11b/0x400
>  [<ffffffff81081bb0>] ? rescuer_thread+0x3e0/0x3e0
>  [<ffffffff81088a12>] kthread+0xd2/0xf0
>  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
>  [<ffffffff815e823c>] ret_from_fork+0x7c/0xb0
>  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
>
> To avoid that, the host bridge device has to be deleted after all of
> its children, so merge pci_stop_root_bus() and pci_remove_root_bus()
> into one function, pci_stop_and_remove_root_bus(), that first will
> use pci_stop_and_remove_bus_device() to stop and remove all devices
> on the root bus and then will delete the host bridge device, remove
> its bus and drop the final reference to it.
>
> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>
> Hi,
>
> I can't really test this patch, but I don't know how it can break anything.
>
> The only user of pci_stop_root_bus() and pci_remove_root_bus() is
> acpi_pci_root_remove() and the code ordering there seems to be somewhat
> arbitrary.  If you are aware of any reason why it may not work, please let
> me know. :-)
>
> Thanks,
> Rafael
>
> ---
>  drivers/acpi/pci_root.c |    4 +---
>  drivers/pci/remove.c    |   23 ++++-------------------
>  include/linux/pci.h     |    3 +--
>  3 files changed, 6 insertions(+), 24 deletions(-)
>
> Index: linux-pm/drivers/acpi/pci_root.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/pci_root.c
> +++ linux-pm/drivers/acpi/pci_root.c
> @@ -611,12 +611,10 @@ static void acpi_pci_root_remove(struct
>  {
>         struct acpi_pci_root *root = acpi_driver_data(device);
>
> -       pci_stop_root_bus(root->bus);
> -
>         device_set_run_wake(root->bus->bridge, false);
>         pci_acpi_remove_bus_pm_notifier(device);
>
> -       pci_remove_root_bus(root->bus);
> +       pci_stop_and_remove_root_bus(root->bus);
>
>         kfree(root);
>  }


We have patches that need to stop ioapic and iommu between
pci_stop_root_bus and pci_remove_root_bus.

Please check if the problem still happen after

http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f

http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138

or just try:

git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci/deletion

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Rafael J. Wysocki Dec. 30, 2013, 12:51 p.m. UTC | #3
On Sunday, December 29, 2013 07:30:18 PM Yinghai Lu wrote:
> On Sat, Dec 28, 2013 at 3:20 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > The device_del(&host_bridge->dev) in pci_stop_root_bus() is
> > problematic, because it causes all sysfs directories below
> > the host bridge to be removed recursively and when
> > pci_remove_root_bus() attempts to remove devices on the root
> > bus (whose sysfs directories are gone now along with all their
> > subdirectories), it causes warnings similar to this one to be
> > printed:
> >
> > WARNING: CPU: 0 PID: 6 at fs/sysfs/group.c:214 sysfs_remove_group+0xc6/0xd0()
> > sysfs group ffffffff819ac5c0 not found for kobject '0001:ff:10.2'
> > Modules linked in: <irrelevant list>
> > CPU: 0 PID: 6 Comm: kworker/u512:0 Tainted: G        W    3.13.0-rc5+ #11
> > Hardware name:
> > Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> >  0000000000000009 ffff8808738d3bd8 ffffffff815d84ea ffff8808738d3c20
> >  ffff8808738d3c10 ffffffff8106594d 0000000000000000 ffffffff819ac5c0
> >  ffff880871b9d0a8 ffff8a07d1895000 0000000000000103 ffff8808738d3c70
> > Call Trace:
> >  [<ffffffff815d84ea>] dump_stack+0x45/0x56
> >  [<ffffffff8106594d>] warn_slowpath_common+0x7d/0xa0
> >  [<ffffffff810659bc>] warn_slowpath_fmt+0x4c/0x50
> >  [<ffffffff8122b52e>] ? sysfs_get_dirent_ns+0x4e/0x70
> >  [<ffffffff8122c806>] sysfs_remove_group+0xc6/0xd0
> >  [<ffffffff813b83f3>] dpm_sysfs_remove+0x43/0x50
> >  [<ffffffff813ae105>] device_del+0x45/0x1c0
> >  [<ffffffff812e51f6>] pci_remove_bus_device+0x66/0xd0
> >  [<ffffffff812e5363>] pci_remove_root_bus+0x73/0x80
> >  [<ffffffff813276ab>] acpi_pci_root_remove+0x42/0x4f
> >  [<ffffffff81323070>] acpi_bus_trim+0x56/0x89
> >  [<ffffffff81323052>] acpi_bus_trim+0x38/0x89
> >  [<ffffffff813245df>] acpi_device_hotplug+0x137/0x33b
> >  [<ffffffff8131efba>] acpi_hotplug_work_fn+0x1c/0x27
> >  [<ffffffff81080f1b>] process_one_work+0x17b/0x460
> >  [<ffffffff81081ccb>] worker_thread+0x11b/0x400
> >  [<ffffffff81081bb0>] ? rescuer_thread+0x3e0/0x3e0
> >  [<ffffffff81088a12>] kthread+0xd2/0xf0
> >  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
> >  [<ffffffff815e823c>] ret_from_fork+0x7c/0xb0
> >  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
> >
> > To avoid that, the host bridge device has to be deleted after all of
> > its children, so merge pci_stop_root_bus() and pci_remove_root_bus()
> > into one function, pci_stop_and_remove_root_bus(), that first will
> > use pci_stop_and_remove_bus_device() to stop and remove all devices
> > on the root bus and then will delete the host bridge device, remove
> > its bus and drop the final reference to it.
> >
> > Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >
> > Hi,
> >
> > I can't really test this patch, but I don't know how it can break anything.
> >
> > The only user of pci_stop_root_bus() and pci_remove_root_bus() is
> > acpi_pci_root_remove() and the code ordering there seems to be somewhat
> > arbitrary.  If you are aware of any reason why it may not work, please let
> > me know. :-)
> >
> > Thanks,
> > Rafael
> >
> > ---
> >  drivers/acpi/pci_root.c |    4 +---
> >  drivers/pci/remove.c    |   23 ++++-------------------
> >  include/linux/pci.h     |    3 +--
> >  3 files changed, 6 insertions(+), 24 deletions(-)
> >
> > Index: linux-pm/drivers/acpi/pci_root.c
> > ===================================================================
> > --- linux-pm.orig/drivers/acpi/pci_root.c
> > +++ linux-pm/drivers/acpi/pci_root.c
> > @@ -611,12 +611,10 @@ static void acpi_pci_root_remove(struct
> >  {
> >         struct acpi_pci_root *root = acpi_driver_data(device);
> >
> > -       pci_stop_root_bus(root->bus);
> > -
> >         device_set_run_wake(root->bus->bridge, false);
> >         pci_acpi_remove_bus_pm_notifier(device);
> >
> > -       pci_remove_root_bus(root->bus);
> > +       pci_stop_and_remove_root_bus(root->bus);
> >
> >         kfree(root);
> >  }
> 
> 
> We have patches that need to stop ioapic and iommu between
> pci_stop_root_bus and pci_remove_root_bus.
> 
> Please check if the problem still happen after
> 
> http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f
> 
> http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138

The second one should fix the problem.

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Rafael J. Wysocki Dec. 30, 2013, 1:15 p.m. UTC | #4
On Monday, December 30, 2013 01:51:28 PM Rafael J. Wysocki wrote:
> On Sunday, December 29, 2013 07:30:18 PM Yinghai Lu wrote:
> > On Sat, Dec 28, 2013 at 3:20 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > >
> > > The device_del(&host_bridge->dev) in pci_stop_root_bus() is
> > > problematic, because it causes all sysfs directories below
> > > the host bridge to be removed recursively and when
> > > pci_remove_root_bus() attempts to remove devices on the root
> > > bus (whose sysfs directories are gone now along with all their
> > > subdirectories), it causes warnings similar to this one to be
> > > printed:
> > >
> > > WARNING: CPU: 0 PID: 6 at fs/sysfs/group.c:214 sysfs_remove_group+0xc6/0xd0()
> > > sysfs group ffffffff819ac5c0 not found for kobject '0001:ff:10.2'
> > > Modules linked in: <irrelevant list>
> > > CPU: 0 PID: 6 Comm: kworker/u512:0 Tainted: G        W    3.13.0-rc5+ #11
> > > Hardware name:
> > > Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> > >  0000000000000009 ffff8808738d3bd8 ffffffff815d84ea ffff8808738d3c20
> > >  ffff8808738d3c10 ffffffff8106594d 0000000000000000 ffffffff819ac5c0
> > >  ffff880871b9d0a8 ffff8a07d1895000 0000000000000103 ffff8808738d3c70
> > > Call Trace:
> > >  [<ffffffff815d84ea>] dump_stack+0x45/0x56
> > >  [<ffffffff8106594d>] warn_slowpath_common+0x7d/0xa0
> > >  [<ffffffff810659bc>] warn_slowpath_fmt+0x4c/0x50
> > >  [<ffffffff8122b52e>] ? sysfs_get_dirent_ns+0x4e/0x70
> > >  [<ffffffff8122c806>] sysfs_remove_group+0xc6/0xd0
> > >  [<ffffffff813b83f3>] dpm_sysfs_remove+0x43/0x50
> > >  [<ffffffff813ae105>] device_del+0x45/0x1c0
> > >  [<ffffffff812e51f6>] pci_remove_bus_device+0x66/0xd0
> > >  [<ffffffff812e5363>] pci_remove_root_bus+0x73/0x80
> > >  [<ffffffff813276ab>] acpi_pci_root_remove+0x42/0x4f
> > >  [<ffffffff81323070>] acpi_bus_trim+0x56/0x89
> > >  [<ffffffff81323052>] acpi_bus_trim+0x38/0x89
> > >  [<ffffffff813245df>] acpi_device_hotplug+0x137/0x33b
> > >  [<ffffffff8131efba>] acpi_hotplug_work_fn+0x1c/0x27
> > >  [<ffffffff81080f1b>] process_one_work+0x17b/0x460
> > >  [<ffffffff81081ccb>] worker_thread+0x11b/0x400
> > >  [<ffffffff81081bb0>] ? rescuer_thread+0x3e0/0x3e0
> > >  [<ffffffff81088a12>] kthread+0xd2/0xf0
> > >  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
> > >  [<ffffffff815e823c>] ret_from_fork+0x7c/0xb0
> > >  [<ffffffff81088940>] ? kthread_create_on_node+0x180/0x180
> > >
> > > To avoid that, the host bridge device has to be deleted after all of
> > > its children, so merge pci_stop_root_bus() and pci_remove_root_bus()
> > > into one function, pci_stop_and_remove_root_bus(), that first will
> > > use pci_stop_and_remove_bus_device() to stop and remove all devices
> > > on the root bus and then will delete the host bridge device, remove
> > > its bus and drop the final reference to it.
> > >
> > > Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > ---
> > >
> > > Hi,
> > >
> > > I can't really test this patch, but I don't know how it can break anything.
> > >
> > > The only user of pci_stop_root_bus() and pci_remove_root_bus() is
> > > acpi_pci_root_remove() and the code ordering there seems to be somewhat
> > > arbitrary.  If you are aware of any reason why it may not work, please let
> > > me know. :-)
> > >
> > > Thanks,
> > > Rafael
> > >
> > > ---
> > >  drivers/acpi/pci_root.c |    4 +---
> > >  drivers/pci/remove.c    |   23 ++++-------------------
> > >  include/linux/pci.h     |    3 +--
> > >  3 files changed, 6 insertions(+), 24 deletions(-)
> > >
> > > Index: linux-pm/drivers/acpi/pci_root.c
> > > ===================================================================
> > > --- linux-pm.orig/drivers/acpi/pci_root.c
> > > +++ linux-pm/drivers/acpi/pci_root.c
> > > @@ -611,12 +611,10 @@ static void acpi_pci_root_remove(struct
> > >  {
> > >         struct acpi_pci_root *root = acpi_driver_data(device);
> > >
> > > -       pci_stop_root_bus(root->bus);
> > > -
> > >         device_set_run_wake(root->bus->bridge, false);
> > >         pci_acpi_remove_bus_pm_notifier(device);
> > >
> > > -       pci_remove_root_bus(root->bus);
> > > +       pci_stop_and_remove_root_bus(root->bus);
> > >
> > >         kfree(root);
> > >  }
> > 
> > 
> > We have patches that need to stop ioapic and iommu between
> > pci_stop_root_bus and pci_remove_root_bus.

BTW, what *exactly* do they need to be stopped between?  After these two patches:

> > Please check if the problem still happen after
> > 
> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f
> > 
> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138

pci_stop_root_bus() is just a walk over devices on the root bus stopping
them and pci_remove_root_bus() starts with the removal of those devices.

Surely, those two list walks can be combined into one?

Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Yinghai Lu Dec. 31, 2013, 6:45 p.m. UTC | #5
On Mon, Dec 30, 2013 at 5:15 AM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> On Monday, December 30, 2013 01:51:28 PM Rafael J. Wysocki wrote:
>> > We have patches that need to stop ioapic and iommu between
>> > pci_stop_root_bus and pci_remove_root_bus.
>
> BTW, what *exactly* do they need to be stopped between?  After these two patches:

need to stop regular pci drivers before stop "driver" for ioapic/dmar.

>
>> > Please check if the problem still happen after
>> >
>> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f
>> >
>> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138
>
> pci_stop_root_bus() is just a walk over devices on the root bus stopping
> them and pci_remove_root_bus() starts with the removal of those devices.
>
> Surely, those two list walks can be combined into one?

maybe ok, but we have to problem to make sure stop pci drivers before
"driver" for ioapic/dmar.

also stop all first and remove all make it much cleaner.

and add path is two steps too. Add them all, and then attach driver for all.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Rafael J. Wysocki Dec. 31, 2013, 9:03 p.m. UTC | #6
On Tuesday, December 31, 2013 10:45:46 AM Yinghai Lu wrote:
> On Mon, Dec 30, 2013 at 5:15 AM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > On Monday, December 30, 2013 01:51:28 PM Rafael J. Wysocki wrote:
> >> > We have patches that need to stop ioapic and iommu between
> >> > pci_stop_root_bus and pci_remove_root_bus.
> >
> > BTW, what *exactly* do they need to be stopped between?  After these two patches:
> 
> need to stop regular pci drivers before stop "driver" for ioapic/dmar.
> 
> >
> >> > Please check if the problem still happen after
> >> >
> >> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f
> >> >
> >> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138
> >
> > pci_stop_root_bus() is just a walk over devices on the root bus stopping
> > them and pci_remove_root_bus() starts with the removal of those devices.
> >
> > Surely, those two list walks can be combined into one?
> 
> maybe ok, but we have to problem to make sure stop pci drivers before
> "driver" for ioapic/dmar.

That's fine, but ioapic/dmar stopping need not happen between the stopping of
drivers and removing of devices on the root bus I suppose?

Actually, I think that the ioapic/dmar stopping should be carried out after
removing all of the root bus devices, or it can be racy with respect to a
driver reload.  Isn't that the case?

> also stop all first and remove all make it much cleaner.

Well, I don't think that's worth special casing, though, because device removal
may be triggered via sysfs anyway for any PCI device.  In my opinion it would
be cleaner to use pci_stop_and_remove_bus_device() everywhere for PCI device
removal.

> and add path is two steps too. Add them all, and then attach driver for all.

The removal need not mirror the probing ...

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Yinghai Lu Jan. 2, 2014, 10:47 p.m. UTC | #7
On Tue, Dec 31, 2013 at 1:03 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> On Tuesday, December 31, 2013 10:45:46 AM Yinghai Lu wrote:
>> On Mon, Dec 30, 2013 at 5:15 AM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>> > On Monday, December 30, 2013 01:51:28 PM Rafael J. Wysocki wrote:
>> >> > We have patches that need to stop ioapic and iommu between
>> >> > pci_stop_root_bus and pci_remove_root_bus.
>> >
>> > BTW, what *exactly* do they need to be stopped between?  After these two patches:
>>
>> need to stop regular pci drivers before stop "driver" for ioapic/dmar.
>>
>> >
>> >> > Please check if the problem still happen after
>> >> >
>> >> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f
>> >> >
>> >> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138
>> >
>> > pci_stop_root_bus() is just a walk over devices on the root bus stopping
>> > them and pci_remove_root_bus() starts with the removal of those devices.
>> >
>> > Surely, those two list walks can be combined into one?
>>
>> maybe ok, but we have to problem to make sure stop pci drivers before
>> "driver" for ioapic/dmar.
>
> That's fine, but ioapic/dmar stopping need not happen between the stopping of
> drivers and removing of devices on the root bus I suppose?
>
> Actually, I think that the ioapic/dmar stopping should be carried out after
> removing all of the root bus devices, or it can be racy with respect to a
> driver reload.  Isn't that the case?

No. It should be before removing all root bus devices.
as they need to access the pci devices during stop ioapic and dmar.

Also ioapic itself could one one pci device.

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Rafael J. Wysocki Jan. 3, 2014, 12:45 a.m. UTC | #8
On Thursday, January 02, 2014 02:47:04 PM Yinghai Lu wrote:
> On Tue, Dec 31, 2013 at 1:03 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > On Tuesday, December 31, 2013 10:45:46 AM Yinghai Lu wrote:
> >> On Mon, Dec 30, 2013 at 5:15 AM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> >> > On Monday, December 30, 2013 01:51:28 PM Rafael J. Wysocki wrote:
> >> >> > We have patches that need to stop ioapic and iommu between
> >> >> > pci_stop_root_bus and pci_remove_root_bus.
> >> >
> >> > BTW, what *exactly* do they need to be stopped between?  After these two patches:
> >>
> >> need to stop regular pci drivers before stop "driver" for ioapic/dmar.
> >>
> >> >
> >> >> > Please check if the problem still happen after
> >> >> >
> >> >> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=c4a0a5d964e90b93eb4101c3927b788e083e530f
> >> >> >
> >> >> > http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/patch/?id=e3b439e1d315aff59c1b4f0fc43c5cd2d82b4138
> >> >
> >> > pci_stop_root_bus() is just a walk over devices on the root bus stopping
> >> > them and pci_remove_root_bus() starts with the removal of those devices.
> >> >
> >> > Surely, those two list walks can be combined into one?
> >>
> >> maybe ok, but we have to problem to make sure stop pci drivers before
> >> "driver" for ioapic/dmar.
> >
> > That's fine, but ioapic/dmar stopping need not happen between the stopping of
> > drivers and removing of devices on the root bus I suppose?
> >
> > Actually, I think that the ioapic/dmar stopping should be carried out after
> > removing all of the root bus devices, or it can be racy with respect to a
> > driver reload.  Isn't that the case?
> 
> No. It should be before removing all root bus devices.
> as they need to access the pci devices during stop ioapic and dmar.
> 
> Also ioapic itself could one one pci device.

Well, if we stop drivers first, then stop ioapic/dmar and *then* remove
devices, it is possible to rebind a driver to a device after ioapic/dmar has
been stopped, which I guess will not lead to anything nice?

Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Yinghai Lu Jan. 6, 2014, 7:28 p.m. UTC | #9
On Thu, Jan 2, 2014 at 4:45 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> On Thursday, January 02, 2014 02:47:04 PM Yinghai Lu wrote:
>>
>> No. It should be before removing all root bus devices.
>> as they need to access the pci devices during stop ioapic and dmar.
>>
>> Also ioapic itself could one one pci device.
>
> Well, if we stop drivers first, then stop ioapic/dmar and *then* remove
> devices, it is possible to rebind a driver to a device after ioapic/dmar has
> been stopped, which I guess will not lead to anything nice?

Not sure how that could happen.

If it would really happen, we could set dev->match_driver to 0 in pci_stop_dev.

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Rafael J. Wysocki Jan. 6, 2014, 8:41 p.m. UTC | #10
On Monday, January 06, 2014 11:28:43 AM Yinghai Lu wrote:
> On Thu, Jan 2, 2014 at 4:45 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > On Thursday, January 02, 2014 02:47:04 PM Yinghai Lu wrote:
> >>
> >> No. It should be before removing all root bus devices.
> >> as they need to access the pci devices during stop ioapic and dmar.
> >>
> >> Also ioapic itself could one one pci device.
> >
> > Well, if we stop drivers first, then stop ioapic/dmar and *then* remove
> > devices, it is possible to rebind a driver to a device after ioapic/dmar has
> > been stopped, which I guess will not lead to anything nice?
> 
> Not sure how that could happen.
> 
> If it would really happen, we could set dev->match_driver to 0 in pci_stop_dev.

Simply, run "modprobe -r driver && modprobe driver" in a loop and
remove the PCI host bridge the given device is on in parallel to that.  Chances
are, you'll see some nice breakage.

Also what happens if somebody uses the "remove" sysfs attribute on a device
needed by ioapic/dmar?

Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Yinghai Lu Jan. 8, 2014, 11:41 p.m. UTC | #11
On Mon, Jan 6, 2014 at 12:41 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:

>> Not sure how that could happen.
>>
>> If it would really happen, we could set dev->match_driver to 0 in pci_stop_dev.
>
> Simply, run "modprobe -r driver && modprobe driver" in a loop and
> remove the PCI host bridge the given device is on in parallel to that.  Chances
> are, you'll see some nice breakage.

I would suggest using match_driver prevent driver from attaching again.

---
 drivers/pci/remove.c |    1 +
 1 file changed, 1 insertion(+)

Index: linux-2.6/drivers/pci/remove.c
===================================================================
--- linux-2.6.orig/drivers/pci/remove.c
+++ linux-2.6/drivers/pci/remove.c
@@ -11,6 +11,7 @@ static void pci_stop_dev(struct pci_dev
         pci_proc_detach_device(dev);
         pci_remove_sysfs_dev_files(dev);
         device_release_driver(&dev->dev);
+        dev->match_driver = false;
         dev->is_added = 0;
     }


>
> Also what happens if somebody uses the "remove" sysfs attribute on a device
> needed by ioapic/dmar?

Good question, we will have problem in that case.
To make it simple, we may hide the "remove" in sysfs for ioapic pci device ?

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Rafael J. Wysocki Jan. 9, 2014, 12:10 a.m. UTC | #12
On Wednesday, January 08, 2014 03:41:52 PM Yinghai Lu wrote:
> On Mon, Jan 6, 2014 at 12:41 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> 
> >> Not sure how that could happen.
> >>
> >> If it would really happen, we could set dev->match_driver to 0 in pci_stop_dev.
> >
> > Simply, run "modprobe -r driver && modprobe driver" in a loop and
> > remove the PCI host bridge the given device is on in parallel to that.  Chances
> > are, you'll see some nice breakage.
> 
> I would suggest using match_driver prevent driver from attaching again.

Yes, we can do that.  Some locking is needed for it to be non-racy, however.

Anyway, we still have the problem with race conditions between different PCI
removal/rescan code paths.  And I'm still going to prepare a patch to use
the remove-rescan mutex to address those race conditions and that patch should
help here too.

> 
> ---
>  drivers/pci/remove.c |    1 +
>  1 file changed, 1 insertion(+)
> 
> Index: linux-2.6/drivers/pci/remove.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/remove.c
> +++ linux-2.6/drivers/pci/remove.c
> @@ -11,6 +11,7 @@ static void pci_stop_dev(struct pci_dev
>          pci_proc_detach_device(dev);
>          pci_remove_sysfs_dev_files(dev);
>          device_release_driver(&dev->dev);
> +        dev->match_driver = false;
>          dev->is_added = 0;
>      }
> 
> 
> >
> > Also what happens if somebody uses the "remove" sysfs attribute on a device
> > needed by ioapic/dmar?
> 
> Good question, we will have problem in that case.
> To make it simple, we may hide the "remove" in sysfs for ioapic pci device ?

Yeah, we need to do that if using that attribute may lead to problems.

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Patch
diff mbox series

Index: linux-pm/drivers/acpi/pci_root.c
===================================================================
--- linux-pm.orig/drivers/acpi/pci_root.c
+++ linux-pm/drivers/acpi/pci_root.c
@@ -611,12 +611,10 @@  static void acpi_pci_root_remove(struct
 {
 	struct acpi_pci_root *root = acpi_driver_data(device);
 
-	pci_stop_root_bus(root->bus);
-
 	device_set_run_wake(root->bus->bridge, false);
 	pci_acpi_remove_bus_pm_notifier(device);
 
-	pci_remove_root_bus(root->bus);
+	pci_stop_and_remove_root_bus(root->bus);
 
 	kfree(root);
 }
Index: linux-pm/drivers/pci/remove.c
===================================================================
--- linux-pm.orig/drivers/pci/remove.c
+++ linux-pm/drivers/pci/remove.c
@@ -114,7 +114,7 @@  void pci_stop_and_remove_bus_device(stru
 }
 EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
 
-void pci_stop_root_bus(struct pci_bus *bus)
+void pci_stop_and_remove_root_bus(struct pci_bus *bus)
 {
 	struct pci_dev *child, *tmp;
 	struct pci_host_bridge *host_bridge;
@@ -123,29 +123,14 @@  void pci_stop_root_bus(struct pci_bus *b
 		return;
 
 	host_bridge = to_pci_host_bridge(bus->bridge);
-	list_for_each_entry_safe_reverse(child, tmp,
-					 &bus->devices, bus_list)
-		pci_stop_bus_device(child);
+	list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list)
+		pci_stop_and_remove_bus_device(child);
 
 	/* stop the host bridge */
 	device_del(&host_bridge->dev);
-}
-
-void pci_remove_root_bus(struct pci_bus *bus)
-{
-	struct pci_dev *child, *tmp;
-	struct pci_host_bridge *host_bridge;
-
-	if (!pci_is_root_bus(bus))
-		return;
-
-	host_bridge = to_pci_host_bridge(bus->bridge);
-	list_for_each_entry_safe(child, tmp,
-				 &bus->devices, bus_list)
-		pci_remove_bus_device(child);
+	/* remove the root bus */
 	pci_remove_bus(bus);
 	host_bridge->bus = NULL;
-
 	/* remove the host bridge */
 	put_device(&host_bridge->dev);
 }
Index: linux-pm/include/linux/pci.h
===================================================================
--- linux-pm.orig/include/linux/pci.h
+++ linux-pm/include/linux/pci.h
@@ -779,8 +779,7 @@  struct pci_dev *pci_dev_get(struct pci_d
 void pci_dev_put(struct pci_dev *dev);
 void pci_remove_bus(struct pci_bus *b);
 void pci_stop_and_remove_bus_device(struct pci_dev *dev);
-void pci_stop_root_bus(struct pci_bus *bus);
-void pci_remove_root_bus(struct pci_bus *bus);
+void pci_stop_and_remove_root_bus(struct pci_bus *bus);
 void pci_setup_cardbus(struct pci_bus *bus);
 void pci_sort_breadthfirst(void);
 #define dev_is_pci(d) ((d)->bus == &pci_bus_type)