All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mlx4: prevent the device from being removed concurrently
@ 2012-02-28 18:36 Thadeu Lima de Souza Cascardo
       [not found] ` <1330454176-17768-1-git-send-email-cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2012-02-28 18:36 UTC (permalink / raw)
  To: yevgenyp; +Cc: netdev, linux-rdma, davem, roland, Thadeu Lima de Souza Cascardo

When a EEH happens, the catas poll code will try to restart the device,
removing it and adding it back again. The EEH code will try to do the
same. One of the threads ends up accessing memory that was freed by the
other thread and we get a crash.

The EEH backtrace:

<4>Call Trace:
<4>[c00000007fff3ae0] [c000000000015374] .show_stack+0x74/0x1c0 (unreliable)
<4>[c00000007fff3b90] [c00000000005d6d4] .eeh_dn_check_failure+0x2f4/0x320
<4>[c00000007fff3c50] [c00000000005d76c] .eeh_check_failure+0x6c/0x100
<4>[c00000007fff3cd0] [d00000000335165c] .poll_catas+0x25c/0x280 [mlx4_core]
<4>[c00000007fff3d70] [c00000000009add0] .run_timer_softirq+0x1b0/0x450
<4>[c00000007fff3ea0] [c000000000090c80] .__do_softirq+0x110/0x2a0
<4>[c00000007fff3f90] [c000000000021ca8] .call_do_softirq+0x14/0x24
<4>[c000000000afb910] [c000000000011288] .do_softirq+0xf8/0x130
<4>[c000000000afb9b0] [c000000000090914] .irq_exit+0xb4/0xc0
<4>[c000000000afba30] [c00000000001e024] .timer_interrupt+0x124/0x290
<4>[c000000000afbad0] [c0000000000039a4] decrementer_common+0x124/0x180
<4>--- Exception: 901 at .arch_local_irq_restore+0x54/0x60
<4>    LR = .cpu_idle+0x170/0x210
<4>[c000000000afbdc0] [c000000000017874] .cpu_idle+0x164/0x210 (unreliable)
<4>[c000000000afbe70] [c00000000000b2a8] .rest_init+0x88/0xa0
<4>[c000000000afbef0] [c000000000970ae8] .start_kernel+0x458/0x478
<4>[c000000000afbf90] [c000000000009670] .start_here_common+0x1c/0x2c
<3>mlx4_core 0000:01:00.0: Internal error detected:
<3>mlx4_core 0000:01:00.0:   buf[00]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[01]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[02]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[03]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[04]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[05]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[06]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[07]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[08]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[09]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[0a]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[0b]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[0c]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[0d]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[0e]: ffffffff
<3>mlx4_core 0000:01:00.0:   buf[0f]: ffffffff
<6>EEH: Detected PCI bus error on device 0000:01:00.0
<4>EEH: This PCI device has failed 1 times in the last hour:
<4>EEH: Bus location=U78AB.001.WZSGL60-P1-C4-T1 driver=mlx4_core pci addr=0000:01:00.0
<4>EEH: Device location=U78AB.001.WZSGL60-P1-C4-T1 driver=mlx4_core pci addr=0000:01:00.0

The crash stack trace:

Unable to handle kernel paging request for data at address 0x00000000
Faulting instruction address: 0xc000000000176a54
[c000000072d835c0] c000000000176c48 .__vunmap+0x38/0x120
[c000000072d83660] c00000000003f4e8 .__iounmap+0x38/0x60
[c000000072d836d0] d00000000335124c .mlx4_stop_catas_poll+0x3c/0xd0 [mlx4_core]
[c000000072d83760] d0000000033572dc .mlx4_unregister_device+0x2c/0xe0 [mlx4_core]
[c000000072d83800] d000000003357b68 .mlx4_remove_one+0x48/0x1f0 [mlx4_core]
[c000000072d838a0] c0000000003d3228 .pci_device_remove+0x48/0x90
[c000000072d83920] c0000000004731a0 .__device_release_driver+0x80/0x100
[c000000072d839b0] c0000000004733a0 .device_release_driver+0x30/0x60
[c000000072d83a40] c000000000472228 .bus_remove_device+0x128/0x180
[c000000072d83ad0] c00000000046fd84 .device_del+0x154/0x240
[c000000072d83b70] c00000000046fe88 .device_unregister+0x18/0x30
[c000000072d83bf0] c0000000003ccac0 .pci_stop_bus_device+0xc0/0xe0
[c000000072d83c80] c0000000003ccbbc .pci_remove_bus_device+0x2c/0x120
[c000000072d83d20] c00000000005fb68 .pcibios_remove_pci_devices+0x88/0xc0
[c000000072d83db0] c00000000005e388 .eeh_reset_device+0x48/0x180
[c000000072d83e50] c00000000005e790 .handle_eeh_events+0x2d0/0x440
[c000000072d83f00] c00000000005ee78 .eeh_event_handler+0x138/0x1c0
[c000000072d83f90] c000000000021e6c .kernel_thread+0x54/0x70

Adding a mutex in the remove code will prevent this crash.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 678558b..28279dc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -136,6 +136,8 @@ module_param_array(port_type_array, int, &arr_argc, 0444);
 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
 				"1 for IB, 2 for Ethernet");
 
+static DEFINE_MUTEX(remove_mutex);
+
 struct mlx4_port_config {
 	struct list_head list;
 	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
@@ -1939,10 +1941,15 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
 
 static void mlx4_remove_one(struct pci_dev *pdev)
 {
-	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
-	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_dev *dev;
+	struct mlx4_priv *priv;
 	int p;
 
+	mutex_lock(&remove_mutex);
+
+	dev  = pci_get_drvdata(pdev);
+	priv = mlx4_priv(dev);
+
 	if (dev) {
 		/* in SRIOV it is not allowed to unload the pf's
 		 * driver while there are alive vf's */
@@ -1999,6 +2006,8 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 		pci_disable_device(pdev);
 		pci_set_drvdata(pdev, NULL);
 	}
+
+	mutex_unlock(&remove_mutex);
 }
 
 int mlx4_restart_one(struct pci_dev *pdev)
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] mlx4: prevent the device from being removed concurrently
       [not found] ` <1330454176-17768-1-git-send-email-cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2012-02-28 19:30   ` David Miller
       [not found]     ` <20120228.143051.352474620462899753.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2012-02-28 19:30 UTC (permalink / raw)
  To: cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: yevgenyp-VPRAkNaXOzVS1MOuV/RT9w, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg

From: Thadeu Lima de Souza Cascardo <cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 28 Feb 2012 15:36:16 -0300

> When a EEH happens, the catas poll code will try to restart the device,
> removing it and adding it back again. The EEH code will try to do the
> same. One of the threads ends up accessing memory that was freed by the
> other thread and we get a crash.

Stop adding bandaids to the locking.

If the EEH infrastructure doesn't synchronize parallel operations
on the same device, that is the real bug, and that's where the real
fix belongs.

I refuse to apply this patch.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mlx4: prevent the device from being removed concurrently
       [not found]     ` <20120228.143051.352474620462899753.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
@ 2012-02-28 20:34       ` Thadeu Lima de Souza Cascardo
       [not found]         ` <20120228203438.GA12028-/9mL1TZGaJOu3CHPIDa7bVaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2012-02-28 20:34 UTC (permalink / raw)
  To: David Miller
  Cc: yevgenyp-VPRAkNaXOzVS1MOuV/RT9w, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg,
	jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb

On Tue, Feb 28, 2012 at 02:30:51PM -0500, David Miller wrote:
> From: Thadeu Lima de Souza Cascardo <cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Tue, 28 Feb 2012 15:36:16 -0300
> 
> > When a EEH happens, the catas poll code will try to restart the device,
> > removing it and adding it back again. The EEH code will try to do the
> > same. One of the threads ends up accessing memory that was freed by the
> > other thread and we get a crash.
> 
> Stop adding bandaids to the locking.
> 
> If the EEH infrastructure doesn't synchronize parallel operations
> on the same device, that is the real bug, and that's where the real
> fix belongs.
> 
> I refuse to apply this patch.
> 

It's not EEH that does not synchronize removal. The problem is that the
driver itself calls the driver remove function through mlx4_restart_one.

>From catas.c:

 88 static void catas_reset(struct work_struct *work)
...
103                 ret = mlx4_restart_one(priv->dev.pdev);


>From main.c:

2013 int mlx4_restart_one(struct pci_dev *pdev)
...
2015         mlx4_remove_one(pdev);

2067 static struct pci_driver mlx4_driver = {
...
2071         .remove         = __devexit_p(mlx4_remove_one)


Real EEH support in this driver will have to do something similar to
reset the device. And this either requires that we remove or rewrite the
catas code, or that we implement some kind of locking.

Probably, what we should do here is rewrite catas code so that is
resilient to races with any code removing the device, be it EEH or
anything else. It's just that EEH will trigger the catas code and makes
it a lot easier to test this problem.

Regards.
Cascardo.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mlx4: prevent the device from being removed concurrently
       [not found]         ` <20120228203438.GA12028-/9mL1TZGaJOu3CHPIDa7bVaTQe2KTcn/@public.gmane.org>
@ 2012-02-28 20:46           ` David Miller
       [not found]             ` <20120228.154657.1817512578346429850.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2012-02-28 20:46 UTC (permalink / raw)
  To: cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: yevgenyp-VPRAkNaXOzVS1MOuV/RT9w, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg,
	jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb

From: Thadeu Lima de Souza Cascardo <cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Tue, 28 Feb 2012 17:34:38 -0300

> On Tue, Feb 28, 2012 at 02:30:51PM -0500, David Miller wrote:
>> From: Thadeu Lima de Souza Cascardo <cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
>> Date: Tue, 28 Feb 2012 15:36:16 -0300
>> 
>> > When a EEH happens, the catas poll code will try to restart the device,
>> > removing it and adding it back again. The EEH code will try to do the
>> > same. One of the threads ends up accessing memory that was freed by the
>> > other thread and we get a crash.
>> 
>> Stop adding bandaids to the locking.
>> 
>> If the EEH infrastructure doesn't synchronize parallel operations
>> on the same device, that is the real bug, and that's where the real
>> fix belongs.
>> 
>> I refuse to apply this patch.
>> 
> 
> It's not EEH that does not synchronize removal. The problem is that the
> driver itself calls the driver remove function through mlx4_restart_one.

Then reuse the existing intf_mutex this driver has, export it to
main.c and add a new __mlx4_unregister_device that can be called
with the intf_mutex held already.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mlx4: prevent the device from being removed concurrently
       [not found]             ` <20120228.154657.1817512578346429850.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
@ 2012-02-29 14:47               ` Jack Morgenstein
       [not found]                 ` <201202291647.53161.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Jack Morgenstein @ 2012-02-29 14:47 UTC (permalink / raw)
  To: David Miller, cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	yevgenyp-VPRAkNaXOzVS1MOuV/RT9w, roland-BHEL68pLQRGGvPXPguhicg
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tuesday 28 February 2012 22:46, David Miller wrote:
> From: Thadeu Lima de Souza Cascardo <cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Tue, 28 Feb 2012 17:34:38 -0300
> 
> > On Tue, Feb 28, 2012 at 02:30:51PM -0500, David Miller wrote:
> >> From: Thadeu Lima de Souza Cascardo <cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> >> Date: Tue, 28 Feb 2012 15:36:16 -0300
> >> 
> >> > When a EEH happens, the catas poll code will try to restart the device,
> >> > removing it and adding it back again. The EEH code will try to do the
> >> > same. One of the threads ends up accessing memory that was freed by the
> >> > other thread and we get a crash.
> >> 
> >> Stop adding bandaids to the locking.
> >> 
> >> If the EEH infrastructure doesn't synchronize parallel operations
> >> on the same device, that is the real bug, and that's where the real
> >> fix belongs.
> >> 
> >> I refuse to apply this patch.
> >> 
> > 
> > It's not EEH that does not synchronize removal. The problem is that the
> > driver itself calls the driver remove function through mlx4_restart_one.
> 
> Then reuse the existing intf_mutex this driver has, export it to
> main.c and add a new __mlx4_unregister_device that can be called
> with the intf_mutex held already.
> 
Some comments.

1. Mr Cascardo's solution is only partial, and does not cover all the problem cases. He
   simply uncovered one of several examples of what lack-of-sync will do when removing a device.
   Mr. Cascardo found the kernel Oops that happens when a catastrophic error occurs during device
   removal. What if we receive a catas error while doing "init_one"?  What if we are in the middle
   of catas error recovery (in the init_one stage), and we get a remove_one request from higher up?

   There is a solution for this precise problem in the mthca driver (infiniband/hw/mthca/mthca_main.c
   infiniband/hw/mthca/mthca_catas.c). In the mthca driver, we DO in fact use an "mthca_device_mutex"
   for precisely the reason given in a. above.  I see no reason not to do the same thing here.

   This requires:
	1. mlx4_init_one(), mlx4_remove_one() and mlx4_restart_one all grab an mlx4_device_mutex.
        2. new procedure __mlx4_remove_one(), which does not grab the mutex.

   Note that it is NOT enough to simply protect the removal operation.  The protection must wrap the
   ENTIRE restart operation (both removal and init), because allowing a remove in the middle of init_one
   or restart_one would probably also cause a kernel Oops.

2. The intf_mutex is used with mlx4_un/register_device and mlx4_un/register_interface. unregister_device is
   used both in remove_one and in mlx4_change_port_types.  I would hesitate to grab that mutex for a more
   global use.  I think it is cleaner to add a device mutex (mlx4_device_mutex) for initializing/removing/
   restarting the device.

-Jack
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mlx4: prevent the device from being removed concurrently
       [not found]                 ` <201202291647.53161.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2012-02-29 15:19                   ` Jack Morgenstein
       [not found]                     ` <201202291719.50764.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Jack Morgenstein @ 2012-02-29 15:19 UTC (permalink / raw)
  To: David Miller, cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	yevgenyp-VPRAkNaXOzVS1MOuV/RT9w, roland-BHEL68pLQRGGvPXPguhicg
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Wednesday 29 February 2012 16:47, Jack Morgenstein wrote:
> Some comments.
> 
> 1. Mr Cascardo's solution is only partial, and does not cover all the problem cases. He
>    simply uncovered one of several examples of what lack-of-sync will do when removing a device.
>    Mr. Cascardo found the kernel Oops that happens when a catastrophic error occurs during device
>    removal. What if we receive a catas error while doing "init_one"?  What if we are in the middle
>    of catas error recovery (in the init_one stage), and we get a remove_one request from higher up?
> 
>    There is a solution for this precise problem in the mthca driver (infiniband/hw/mthca/mthca_main.c
>    infiniband/hw/mthca/mthca_catas.c). In the mthca driver, we DO in fact use an "mthca_device_mutex"
>    for precisely the reason given in a. above.  I see no reason not to do the same thing here.
> 
>    This requires:
> 	1. mlx4_init_one(), mlx4_remove_one() and mlx4_restart_one all grab an mlx4_device_mutex.
>         2. new procedure __mlx4_remove_one(), which does not grab the mutex.
> 
>    Note that it is NOT enough to simply protect the removal operation.  The protection must wrap the
>    ENTIRE restart operation (both removal and init), because allowing a remove in the middle of init_one
>    or restart_one would probably also cause a kernel Oops.
> 
> 2. The intf_mutex is used with mlx4_un/register_device and mlx4_un/register_interface. unregister_device is
>    used both in remove_one and in mlx4_change_port_types.  I would hesitate to grab that mutex for a more
>    global use.  I think it is cleaner to add a device mutex (mlx4_device_mutex) for initializing/removing/
>    restarting the device.
> 
> -Jack
> 
Another thing -- what about the desired final state of the device?  Say we do a remove one, and in the middle
of this, we get a catas restart.  The catas restart will wait until the remove-in-progress completes, and
then will do its remove/init -- with the net result that the device is UP rather than DOWN.

This implies that we need some sort of state machine here.

-Jack
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mlx4: prevent the device from being removed concurrently
       [not found]                     ` <201202291719.50764.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2012-03-01  7:51                       ` Jack Morgenstein
  0 siblings, 0 replies; 7+ messages in thread
From: Jack Morgenstein @ 2012-03-01  7:51 UTC (permalink / raw)
  To: cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: David Miller, yevgenyp-VPRAkNaXOzVS1MOuV/RT9w,
	roland-BHEL68pLQRGGvPXPguhicg, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

Mr Cascardo,

If this is at a customer site, there is a workaround available for you.

In file /etc/modprobe.d/mlx4_core.conf, enter a line:
options mlx4_core internal_err_reset=0

(do "modinfo mlx4_core" to see a description of the module parameter).

Setting this parameter to 0 in the modprobe conf file will cause the driver to simply report the
internal error failure (in /var/log/messages), but the device will not be restarted.

This will avoid your Oops -- the EEH flow will then be the only flow restarting the device.

(Note that even if you implement my suggestion, you will still have a situation where the device
will be reset TWICE).

Again, this is a workaround for current users.  The issue needs to be fixed so that even when the
internal error does cause a reset, the kernel will not crash.

Unfortunately, I do not have the time now to generate a patch.

-Jack

On Wednesday 29 February 2012 17:19, Jack Morgenstein wrote:
> On Wednesday 29 February 2012 16:47, Jack Morgenstein wrote:
> > Some comments.
> > 
> > 1. Mr Cascardo's solution is only partial, and does not cover all the problem cases. He
> >    simply uncovered one of several examples of what lack-of-sync will do when removing a device.
> >    Mr. Cascardo found the kernel Oops that happens when a catastrophic error occurs during device
> >    removal. What if we receive a catas error while doing "init_one"?  What if we are in the middle
> >    of catas error recovery (in the init_one stage), and we get a remove_one request from higher up?
> > 
> >    There is a solution for this precise problem in the mthca driver (infiniband/hw/mthca/mthca_main.c
> >    infiniband/hw/mthca/mthca_catas.c). In the mthca driver, we DO in fact use an "mthca_device_mutex"
> >    for precisely the reason given in a. above.  I see no reason not to do the same thing here.
> > 
> >    This requires:
> > 	1. mlx4_init_one(), mlx4_remove_one() and mlx4_restart_one all grab an mlx4_device_mutex.
> >         2. new procedure __mlx4_remove_one(), which does not grab the mutex.
> > 
> >    Note that it is NOT enough to simply protect the removal operation.  The protection must wrap the
> >    ENTIRE restart operation (both removal and init), because allowing a remove in the middle of init_one
> >    or restart_one would probably also cause a kernel Oops.
> > 
> > 2. The intf_mutex is used with mlx4_un/register_device and mlx4_un/register_interface. unregister_device is
> >    used both in remove_one and in mlx4_change_port_types.  I would hesitate to grab that mutex for a more
> >    global use.  I think it is cleaner to add a device mutex (mlx4_device_mutex) for initializing/removing/
> >    restarting the device.
> > 
> > -Jack
> > 
> Another thing -- what about the desired final state of the device?  Say we do a remove one, and in the middle
> of this, we get a catas restart.  The catas restart will wait until the remove-in-progress completes, and
> then will do its remove/init -- with the net result that the device is UP rather than DOWN.
> 
> This implies that we need some sort of state machine here.
> 
> -Jack
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-03-01  7:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-28 18:36 [PATCH] mlx4: prevent the device from being removed concurrently Thadeu Lima de Souza Cascardo
     [not found] ` <1330454176-17768-1-git-send-email-cascardo-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2012-02-28 19:30   ` David Miller
     [not found]     ` <20120228.143051.352474620462899753.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2012-02-28 20:34       ` Thadeu Lima de Souza Cascardo
     [not found]         ` <20120228203438.GA12028-/9mL1TZGaJOu3CHPIDa7bVaTQe2KTcn/@public.gmane.org>
2012-02-28 20:46           ` David Miller
     [not found]             ` <20120228.154657.1817512578346429850.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2012-02-29 14:47               ` Jack Morgenstein
     [not found]                 ` <201202291647.53161.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-02-29 15:19                   ` Jack Morgenstein
     [not found]                     ` <201202291719.50764.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2012-03-01  7:51                       ` Jack Morgenstein

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.