netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
@ 2013-05-08  0:08 Frank Li
  2013-05-08 20:15 ` David Miller
  2013-05-13  4:57 ` Shawn Guo
  0 siblings, 2 replies; 10+ messages in thread
From: Frank Li @ 2013-05-08  0:08 UTC (permalink / raw)
  To: romieu, r.schwebel, davem, l.stach, netdev, festevam, shawn.guo, lznuaa
  Cc: Frank Li

reproduce steps
 1. flood ping from other machine
 	ping -f -s 41000 IP
 2. run below script
    while [ 1 ]; do ethtool -s eth0 autoneg off;
    sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;

You can see oops in one hour.

The reason is fec_restart clear BD but NAPI may use it.
The solution is disable NAPI and stop xmit when reset BD.
disable NAPI may sleep, so fec_restart can't be call in
atomic context.

Signed-off-by: Frank Li <Frank.Li@freescale.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Lucas Stach <l.stach@pengutronix.de>
---
Change from v1 to v2
 * Add netif_tx_lock(ndev) to avoid xmit runing when reset hardware
Change from v2 to v3
 * Move put real statements after function variable declarations according to David's comments
 * Remove lock in adjust_link according to Lucas Stach's comments
Change from v3 to v4
 * rebase to latest net/master
 * remove hw_lock because not used again
 * reduce delay work to 0
 * group delay work related feild to one structure
 * call netif_device_detach() in fec_restart
Change from v4 to v5
 * use true/false instead of 1/0

 drivers/net/ethernet/freescale/fec.h      |   10 ++++--
 drivers/net/ethernet/freescale/fec_main.c |   44 +++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index d44f65b..afa7b5b 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -198,6 +198,11 @@ struct bufdesc_ex {
 #define FLAG_RX_CSUM_ENABLED	(BD_ENET_RX_ICE | BD_ENET_RX_PCR)
 #define FLAG_RX_CSUM_ERROR	(BD_ENET_RX_ICE | BD_ENET_RX_PCR)
 
+struct fec_enet_delayed_work {
+	struct delayed_work delay_work;
+	bool timeout;
+};
+
 /* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
  * tx_bd_base always point to the base of the buffer descriptors.  The
  * cur_rx and cur_tx point to the currently available buffer.
@@ -231,9 +236,6 @@ struct fec_enet_private {
 	/* The ring entries to be free()ed */
 	struct bufdesc	*dirty_tx;
 
-	/* hold while accessing the HW like ringbuffer for tx/rx but not MAC */
-	spinlock_t hw_lock;
-
 	struct	platform_device *pdev;
 
 	int	opened;
@@ -268,7 +270,7 @@ struct fec_enet_private {
 	int hwts_rx_en;
 	int hwts_tx_en;
 	struct timer_list time_keep;
-
+	struct fec_enet_delayed_work delay_work;
 };
 
 void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index b9748f1..d028830 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -445,6 +445,13 @@ fec_restart(struct net_device *ndev, int duplex)
 	u32 rcntl = OPT_FRAME_SIZE | 0x04;
 	u32 ecntl = 0x2; /* ETHEREN */
 
+	if (netif_running(ndev)) {
+		netif_device_detach(ndev);
+		napi_disable(&fep->napi);
+		netif_stop_queue(ndev);
+		netif_tx_lock(ndev);
+	}
+
 	/* Whack a reset.  We should wait for this. */
 	writel(1, fep->hwp + FEC_ECNTRL);
 	udelay(10);
@@ -605,6 +612,13 @@ fec_restart(struct net_device *ndev, int duplex)
 
 	/* Enable interrupts we wish to service */
 	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
+
+	if (netif_running(ndev)) {
+		netif_device_attach(ndev);
+		napi_enable(&fep->napi);
+		netif_wake_queue(ndev);
+		netif_tx_unlock(ndev);
+	}
 }
 
 static void
@@ -644,8 +658,22 @@ fec_timeout(struct net_device *ndev)
 
 	ndev->stats.tx_errors++;
 
-	fec_restart(ndev, fep->full_duplex);
-	netif_wake_queue(ndev);
+	fep->delay_work.timeout = true;
+	schedule_delayed_work(&(fep->delay_work.delay_work), 0);
+}
+
+static void fec_enet_work(struct work_struct *work)
+{
+	struct fec_enet_private *fep =
+		container_of(work,
+			     struct fec_enet_private,
+			     delay_work.delay_work.work);
+
+	if (fep->delay_work.timeout) {
+		fep->delay_work.timeout = false;
+		fec_restart(fep->netdev, fep->full_duplex);
+		netif_wake_queue(fep->netdev);
+	}
 }
 
 static void
@@ -1024,16 +1052,12 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	struct phy_device *phy_dev = fep->phy_dev;
-	unsigned long flags;
-
 	int status_change = 0;
 
-	spin_lock_irqsave(&fep->hw_lock, flags);
-
 	/* Prevent a state halted on mii error */
 	if (fep->mii_timeout && phy_dev->state == PHY_HALTED) {
 		phy_dev->state = PHY_RESUMING;
-		goto spin_unlock;
+		return;
 	}
 
 	if (phy_dev->link) {
@@ -1061,9 +1085,6 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 		}
 	}
 
-spin_unlock:
-	spin_unlock_irqrestore(&fep->hw_lock, flags);
-
 	if (status_change)
 		phy_print_status(phy_dev);
 }
@@ -1732,7 +1753,6 @@ static int fec_enet_init(struct net_device *ndev)
 		return -ENOMEM;
 
 	memset(cbd_base, 0, PAGE_SIZE);
-	spin_lock_init(&fep->hw_lock);
 
 	fep->netdev = ndev;
 
@@ -1947,6 +1967,7 @@ fec_probe(struct platform_device *pdev)
 	if (fep->bufdesc_ex && fep->ptp_clock)
 		netdev_info(ndev, "registered PHC device %d\n", fep->dev_id);
 
+	INIT_DELAYED_WORK(&(fep->delay_work.delay_work), fec_enet_work);
 	return 0;
 
 failed_register:
@@ -1979,6 +2000,7 @@ fec_drv_remove(struct platform_device *pdev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int i;
 
+	cancel_delayed_work_sync(&(fep->delay_work.delay_work));
 	unregister_netdev(ndev);
 	fec_enet_mii_remove(fep);
 	del_timer_sync(&fep->time_keep);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-08  0:08 [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times Frank Li
@ 2013-05-08 20:15 ` David Miller
  2013-05-13  4:57 ` Shawn Guo
  1 sibling, 0 replies; 10+ messages in thread
From: David Miller @ 2013-05-08 20:15 UTC (permalink / raw)
  To: Frank.Li; +Cc: romieu, r.schwebel, l.stach, netdev, festevam, shawn.guo, lznuaa

From: Frank Li <Frank.Li@freescale.com>
Date: Wed, 8 May 2013 08:08:44 +0800

> reproduce steps
>  1. flood ping from other machine
>  	ping -f -s 41000 IP
>  2. run below script
>     while [ 1 ]; do ethtool -s eth0 autoneg off;
>     sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
> 
> You can see oops in one hour.
> 
> The reason is fec_restart clear BD but NAPI may use it.
> The solution is disable NAPI and stop xmit when reset BD.
> disable NAPI may sleep, so fec_restart can't be call in
> atomic context.
> 
> Signed-off-by: Frank Li <Frank.Li@freescale.com>
> Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
> Tested-by: Lucas Stach <l.stach@pengutronix.de>

Applied and queued up for -stable.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-08  0:08 [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times Frank Li
  2013-05-08 20:15 ` David Miller
@ 2013-05-13  4:57 ` Shawn Guo
  2013-05-13  5:07   ` Shawn Guo
                     ` (2 more replies)
  1 sibling, 3 replies; 10+ messages in thread
From: Shawn Guo @ 2013-05-13  4:57 UTC (permalink / raw)
  To: Frank Li; +Cc: romieu, r.schwebel, davem, l.stach, netdev, festevam, lznuaa

Hi Frank,

On Wed, May 08, 2013 at 08:08:44AM +0800, Frank Li wrote:
> reproduce steps
>  1. flood ping from other machine
>  	ping -f -s 41000 IP
>  2. run below script
>     while [ 1 ]; do ethtool -s eth0 autoneg off;
>     sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
> 
> You can see oops in one hour.
> 
> The reason is fec_restart clear BD but NAPI may use it.
> The solution is disable NAPI and stop xmit when reset BD.
> disable NAPI may sleep, so fec_restart can't be call in
> atomic context.
> 
> Signed-off-by: Frank Li <Frank.Li@freescale.com>
> Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
> Tested-by: Lucas Stach <l.stach@pengutronix.de>

The patch has landed on 3.10-rc1.  Seems that it introduces a lock
warning as below.  Turn on CONFIG_PROVE_LOCKING and you will be able
to see it.

Shawn

libphy: 2188000.ethernet:01 - Link is Up - 100/Full
IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready

=================================
[ INFO: inconsistent lock state ]
3.10.0-rc1+ #1 Not tainted
---------------------------------
inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
 (_xmit_ETHER#2){+.?...}, at: [<8047a7b4>] sch_direct_xmit+0xb4/0x2fc
{SOFTIRQ-ON-W} state was registered at:
  [<80068a04>] mark_lock+0x150/0x6d0
  [<800695a8>] __lock_acquire+0x624/0x1ce0
  [<8006b16c>] lock_acquire+0x68/0x7c
  [<8054d1a4>] _raw_spin_lock+0x34/0x44
  [<80380788>] fec_restart+0x614/0x694
  [<803811f4>] fec_enet_adjust_link+0x84/0xbc
  [<8037d0ac>] phy_state_machine+0x184/0x39c
  [<8003dbfc>] process_one_work+0x1a4/0x42c
  [<8003e298>] worker_thread+0x138/0x3a8
  [<8004462c>] kthread+0xac/0xb8
  [<8000e688>] ret_from_fork+0x14/0x2c
irq event stamp: 9252
hardirqs last  enabled at (9252): [<8002bc08>] local_bh_enable_ip+0x84/0xec
hardirqs last disabled at (9251): [<8002bbc8>] local_bh_enable_ip+0x44/0xec
softirqs last  enabled at (9202): [<8002b8b0>] _local_bh_enable+0x14/0x18
softirqs last disabled at (9203): [<8002be80>] irq_exit+0xac/0xe8

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(_xmit_ETHER#2);
  <Interrupt>
    lock(_xmit_ETHER#2);

 *** DEADLOCK ***

4 locks held by swapper/0/0:
 #0:  (((&idev->mc_ifc_timer))){+.-...}, at: [<80031384>] call_timer_fn+0x0/0xf0
 #1:  (rcu_read_lock){.+.+..}, at: [<80501d58>] mld_sendpack+0x0/0x55c
 #2:  (rcu_read_lock_bh){.+....}, at: [<804e1e54>] ip6_finish_output2+0x44/0x748
 #3:  (rcu_read_lock_bh){.+....}, at: [<804641e4>] dev_queue_xmit+0x0/0x658

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc1+ #1
Backtrace:
[<80011ebc>] (dump_backtrace+0x0/0x10c) from [<8001205c>] (show_stack+0x18/0x1c)
 r6:80763fc8 r5:80763c08 r4:808a0358 r3:00000000
[<80012044>] (show_stack+0x0/0x1c) from [<80549188>] (dump_stack+0x20/0x28)
[<80549168>] (dump_stack+0x0/0x28) from [<805471f4>] (print_usage_bug+0x26c/0x2dc)
[<80546f88>] (print_usage_bug+0x0/0x2dc) from [<80068f0c>] (mark_lock+0x658/0x6d0)
[<800688b4>] (mark_lock+0x0/0x6d0) from [<80069560>] (__lock_acquire+0x5dc/0x1ce0)
[<80068f84>] (__lock_acquire+0x0/0x1ce0) from [<8006b16c>] (lock_acquire+0x68/0x7c)
[<8006b104>] (lock_acquire+0x0/0x7c) from [<8054d1a4>] (_raw_spin_lock+0x34/0x44)
 r7:00000000 r6:bf8e0800 r5:00000002 r4:bf8b5440
[<8054d170>] (_raw_spin_lock+0x0/0x44) from [<8047a7b4>] (sch_direct_xmit+0xb4/0x2fc)
 r5:bf8b5400 r4:bfbe7600
[<8047a700>] (sch_direct_xmit+0x0/0x2fc) from [<80464474>] (dev_queue_xmit+0x290/0x658)
[<804641e4>] (dev_queue_xmit+0x0/0x658) from [<8046caf4>] (neigh_resolve_output+0x114/0x218)
[<8046c9e0>] (neigh_resolve_output+0x0/0x218) from [<804e21d8>] (ip6_finish_output2+0x3c8/0x748)
[<804e1e10>] (ip6_finish_output2+0x0/0x748) from [<804e5798>] (ip6_finish_output+0xb0/0x1dc)
[<804e56e8>] (ip6_finish_output+0x0/0x1dc) from [<804e5964>] (ip6_output+0xa0/0x244)
 r5:bf8e0800 r4:bf022480
[<804e58c4>] (ip6_output+0x0/0x244) from [<805021f0>] (mld_sendpack+0x498/0x55c)
 r7:00000000 r6:bfbe5400 r5:0000004c r4:bf022480
[<80501d58>] (mld_sendpack+0x0/0x55c) from [<805028f4>] (mld_ifc_timer_expire+0x1d4/0x2b0)
[<80502720>] (mld_ifc_timer_expire+0x0/0x2b0) from [<800313f8>] (call_timer_fn+0x74/0xf0)
[<80031384>] (call_timer_fn+0x0/0xf0) from [<80031668>] (run_timer_softirq+0x1f4/0x210)
 r8:807e5d70 r7:00000000 r6:8075a0c0 r5:807e5540 r4:80759e38
[<80031474>] (run_timer_softirq+0x0/0x210) from [<8002b9b4>] (__do_softirq+0x100/0x1f4)
[<8002b8b4>] (__do_softirq+0x0/0x1f4) from [<8002be80>] (irq_exit+0xac/0xe8)
[<8002bdd4>] (irq_exit+0x0/0xe8) from [<8000eef8>] (handle_IRQ+0x58/0xb4)
 r4:80760c78 r3:00000220
[<8000eea0>] (handle_IRQ+0x0/0xb4) from [<80008604>] (gic_handle_irq+0x30/0x64)
 r8:807a2f6d r7:f4000100 r6:80759f18 r5:80760dbc r4:f400010c
r3:00000000
[<800085d4>] (gic_handle_irq+0x0/0x64) from [<8000e124>] (__irq_svc+0x44/0x5c)
Exception stack(0x80759f18 to 0x80759f60)
9f00:                                                       00000001 00000001
9f20: 00000000 80763c08 80758000 00000000 807a2f6d 00000001 807a2f6d 8055042c
9f40: 8076092c 80759f6c 80759f30 80759f60 8006badc 8000f2a4 20000113 ffffffff
 r7:80759f4c r6:ffffffff r5:20000113 r4:8000f2a4
[<8000f27c>] (arch_cpu_idle+0x0/0x44) from [<8005badc>] (cpu_startup_entry+0x74/0x160)
[<8005ba68>] (cpu_startup_entry+0x0/0x160) from [<80540e34>] (rest_init+0xb0/0xd8)
 r7:81512400
[<80540d84>] (rest_init+0x0/0xd8) from [<80712848>] (start_kernel+0x2c4/0x31c)
 r6:807493e8 r5:807a3080 r4:80760990
[<80712584>] (start_kernel+0x0/0x31c) from [<10008074>] (0x10008074)
 r7:80764fec r6:807493e4 r5:807608d0 r4:10c53c7d

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-13  4:57 ` Shawn Guo
@ 2013-05-13  5:07   ` Shawn Guo
  2013-05-14  2:29   ` Li Frank-B20596
  2013-05-14 19:27   ` Fabio Estevam
  2 siblings, 0 replies; 10+ messages in thread
From: Shawn Guo @ 2013-05-13  5:07 UTC (permalink / raw)
  To: Frank Li
  Cc: romieu, Robert Schwebel, David Miller, l.stach,
	Linux Netdev List, Fabio Estevam, lznuaa

On 13 May 2013 12:57, Shawn Guo <shawn.guo@linaro.org> wrote:
> Hi Frank,
>
> On Wed, May 08, 2013 at 08:08:44AM +0800, Frank Li wrote:
>> reproduce steps
>>  1. flood ping from other machine
>>       ping -f -s 41000 IP
>>  2. run below script
>>     while [ 1 ]; do ethtool -s eth0 autoneg off;
>>     sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
>>
>> You can see oops in one hour.
>>
>> The reason is fec_restart clear BD but NAPI may use it.
>> The solution is disable NAPI and stop xmit when reset BD.
>> disable NAPI may sleep, so fec_restart can't be call in
>> atomic context.
>>
>> Signed-off-by: Frank Li <Frank.Li@freescale.com>
>> Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
>> Tested-by: Lucas Stach <l.stach@pengutronix.de>
>
> The patch has landed on 3.10-rc1.  Seems that it introduces a lock
> warning as below.  Turn on CONFIG_PROVE_LOCKING and you will be able
> to see it.

The warning message looks a little different in one of my imx28 boot tests.

Shawn

[    4.749454] =================================
[    4.753829] [ INFO: inconsistent lock state ]
[    4.758207] 3.10.0-rc1-00013-gc5f5ad9 #77 Not tainted
[    4.763270] ---------------------------------
[    4.767641] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[    4.773669] kworker/0:1/20 [HC0[0]:SC1[3]:HE1:SE0] takes:
[    4.779080]  (_xmit_ETHER#2){+.?...}, at: [<c03c5644>]
sch_direct_xmit+0x9c/0x2d0
[    4.786686] {SOFTIRQ-ON-W} state was registered at:
[    4.791579]   [<c005a784>] __lock_acquire+0x5fc/0x1a90
[    4.796767]   [<c005c188>] lock_acquire+0x9c/0x124
[    4.801598]   [<c04449cc>] _raw_spin_lock+0x2c/0x3c
[    4.806532]   [<c02fd3e8>] fec_restart+0x5bc/0x654
[    4.811370]   [<c02fddfc>] fec_enet_adjust_link+0x7c/0xb4
[    4.816806]   [<c02fac20>] phy_state_machine+0x17c/0x394
[    4.822153]   [<c0035e08>] process_one_work+0x1bc/0x4c4
[    4.827422]   [<c00364e0>] worker_thread+0x134/0x3a0
[    4.832420]   [<c003c274>] kthread+0xa4/0xb0
[    4.836726]   [<c000e9c0>] ret_from_fork+0x14/0x34
[    4.841564] irq event stamp: 828
[    4.844806] hardirqs last  enabled at (828): [<c0022f80>]
local_bh_enable_ip+0x84/0xf0
[    4.852773] hardirqs last disabled at (827): [<c0022f40>]
local_bh_enable_ip+0x44/0xf0
[    4.860727] softirqs last  enabled at (808): [<c0022d24>]
__do_softirq+0x180/0x284
[    4.868333] softirqs last disabled at (811): [<c00231e0>] irq_exit+0x9c/0xd8
[    4.875416]
[    4.875416] other info that might help us debug this:
[    4.881959]  Possible unsafe locking scenario:
[    4.881959]
[    4.887891]        CPU0
[    4.890345]        ----
[    4.892799]   lock(_xmit_ETHER#2);
[    4.896247]   <Interrupt>
[    4.898874]     lock(_xmit_ETHER#2);
[    4.902497]
[    4.902497]  *** DEADLOCK ***
[    4.902497]
[    4.908444] 7 locks held by kworker/0:1/20:
[    4.912637]  #0:  (rpciod){.+.+.+}, at: [<c0035d84>]
process_one_work+0x138/0x4c4
[    4.920217]  #1:  ((&(&transport->connect_worker)->work)){+.+.+.},
at: [<c0035d84>] process_one_work+0x138/0x4c4
[    4.930482]  #2:  (sk_lock-AF_INET-RPC){+.+...}, at: [<c04010ec>]
inet_stream_connect+0x20/0x48
[    4.939293]  #3:  (rcu_read_lock){.+.+..}, at: [<c03d435c>]
ip_queue_xmit+0x0/0x3f8
[    4.947043]  #4:  (rcu_read_lock){.+.+..}, at: [<c03abb18>]
__netif_receive_skb_core+0x34/0x5b0
[    4.955834]  #5:  (rcu_read_lock){.+.+..}, at: [<c03b9508>]
neigh_update+0x2a4/0x50c
[    4.963667]  #6:  (rcu_read_lock_bh){.+....}, at: [<c03b0870>]
dev_queue_xmit+0x0/0x684
[    4.971767]
[    4.971767] stack backtrace:
[    4.976162] CPU: 0 PID: 20 Comm: kworker/0:1 Not tainted
3.10.0-rc1-00013-gc5f5ad9 #77
[    4.984122] Workqueue: rpciod xs_tcp_setup_socket
[    4.988909] [<c0013958>] (unwind_backtrace+0x0/0xf0) from
[<c001173c>] (show_stack+0x10/0x14)
[    4.997491] [<c001173c>] (show_stack+0x10/0x14) from [<c043e26c>]
(print_usage_bug.part.28+0x218/0x280)
[    5.006937] [<c043e26c>] (print_usage_bug.part.28+0x218/0x280) from
[<c005a048>] (mark_lock+0x528/0x668)
[    5.016461] [<c005a048>] (mark_lock+0x528/0x668) from [<c005a73c>]
(__lock_acquire+0x5b4/0x1a90)
[    5.025289] [<c005a73c>] (__lock_acquire+0x5b4/0x1a90) from
[<c005c188>] (lock_acquire+0x9c/0x124)
[    5.034298] [<c005c188>] (lock_acquire+0x9c/0x124) from
[<c04449cc>] (_raw_spin_lock+0x2c/0x3c)
[    5.043050] [<c04449cc>] (_raw_spin_lock+0x2c/0x3c) from
[<c03c5644>] (sch_direct_xmit+0x9c/0x2d0)
[    5.052055] [<c03c5644>] (sch_direct_xmit+0x9c/0x2d0) from
[<c03b0b18>] (dev_queue_xmit+0x2a8/0x684)
[    5.061234] [<c03b0b18>] (dev_queue_xmit+0x2a8/0x684) from
[<c03b9478>] (neigh_update+0x214/0x50c)
[    5.070241] [<c03b9478>] (neigh_update+0x214/0x50c) from
[<c03fa8b8>] (arp_process+0x264/0x62c)
[    5.078985] [<c03fa8b8>] (arp_process+0x264/0x62c) from
[<c03abd7c>] (__netif_receive_skb_core+0x298/0x5b0)
[    5.088770] [<c03abd7c>] (__netif_receive_skb_core+0x298/0x5b0)
from [<c03aeecc>] (napi_gro_receive+0x74/0xa0)
[    5.098824] [<c03aeecc>] (napi_gro_receive+0x74/0xa0) from
[<c02fe084>] (fec_enet_rx_napi+0x250/0x5d8)
[    5.108182] [<c02fe084>] (fec_enet_rx_napi+0x250/0x5d8) from
[<c03aeb90>] (net_rx_action+0xc0/0x238)
[    5.117364] [<c03aeb90>] (net_rx_action+0xc0/0x238) from
[<c0022c90>] (__do_softirq+0xec/0x284)
[    5.126110] [<c0022c90>] (__do_softirq+0xec/0x284) from
[<c00231e0>] (irq_exit+0x9c/0xd8)
[    5.134335] [<c00231e0>] (irq_exit+0x9c/0xd8) from [<c000f804>]
(handle_IRQ+0x34/0x84)
[    5.142296] [<c000f804>] (handle_IRQ+0x34/0x84) from [<c00086fc>]
(icoll_handle_irq+0x34/0x4c)
[    5.150949] [<c00086fc>] (icoll_handle_irq+0x34/0x4c) from
[<c000e544>] (__irq_svc+0x44/0x54)
[    5.159494] Exception stack(0xc7561c78 to 0xc7561cc0)
[    5.164571] 1c60:
    00000001 c7520338
[    5.172783] 1c80: 00000000 20000013 c7560000 c03d3e84 c76d6d00
c7736400 c77582c0 00000000
[    5.180994] 1ca0: 00000000 00000000 04228060 c7561cc0 c005c944
c002307c 20000013 ffffffff
[    5.189217] [<c000e544>] (__irq_svc+0x44/0x54) from [<c002307c>]
(local_bh_enable+0x90/0xf0)
[    5.197700] [<c002307c>] (local_bh_enable+0x90/0xf0) from
[<c03d3e84>] (ip_finish_output+0x21c/0x504)
[    5.206960] [<c03d3e84>] (ip_finish_output+0x21c/0x504) from
[<c03d42f8>] (ip_local_out+0x30/0x94)
[    5.215955] [<c03d42f8>] (ip_local_out+0x30/0x94) from [<c03d44a0>]
(ip_queue_xmit+0x144/0x3f8)
[    5.224707] [<c03d44a0>] (ip_queue_xmit+0x144/0x3f8) from
[<c03e9308>] (tcp_transmit_skb+0x3cc/0x80c)
[    5.233981] [<c03e9308>] (tcp_transmit_skb+0x3cc/0x80c) from
[<c03ebc40>] (tcp_connect+0x57c/0x5d4)
[    5.243081] [<c03ebc40>] (tcp_connect+0x57c/0x5d4) from
[<c03ef960>] (tcp_v4_connect+0x288/0x3a8)
[    5.252010] [<c03ef960>] (tcp_v4_connect+0x288/0x3a8) from
[<c0401048>] (__inet_stream_connect+0x258/0x2dc)
[    5.261799] [<c0401048>] (__inet_stream_connect+0x258/0x2dc) from
[<c0401100>] (inet_stream_connect+0x34/0x48)
[    5.271863] [<c0401100>] (inet_stream_connect+0x34/0x48) from
[<c039aaf8>] (kernel_connect+0x10/0x14)
[    5.281139] [<c039aaf8>] (kernel_connect+0x10/0x14) from
[<c041e384>] (xs_tcp_setup_socket+0x10c/0x360)
[    5.290587] [<c041e384>] (xs_tcp_setup_socket+0x10c/0x360) from
[<c0035e08>] (process_one_work+0x1bc/0x4c4)
[    5.300372] [<c0035e08>] (process_one_work+0x1bc/0x4c4) from
[<c00364e0>] (worker_thread+0x134/0x3a0)
[    5.309632] [<c00364e0>] (worker_thread+0x134/0x3a0) from
[<c003c274>] (kthread+0xa4/0xb0)
[    5.317944] [<c003c274>] (kthread+0xa4/0xb0) from [<c000e9c0>]
(ret_from_fork+0x14/0x34)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-13  4:57 ` Shawn Guo
  2013-05-13  5:07   ` Shawn Guo
@ 2013-05-14  2:29   ` Li Frank-B20596
  2013-05-14 19:27   ` Fabio Estevam
  2 siblings, 0 replies; 10+ messages in thread
From: Li Frank-B20596 @ 2013-05-14  2:29 UTC (permalink / raw)
  To: Shawn Guo; +Cc: romieu, r.schwebel, davem, l.stach, netdev, festevam, lznuaa

> 
> Hi Frank,
> 
> On Wed, May 08, 2013 at 08:08:44AM +0800, Frank Li wrote:
> > reproduce steps
> >  1. flood ping from other machine
> >  	ping -f -s 41000 IP
> >  2. run below script
> >     while [ 1 ]; do ethtool -s eth0 autoneg off;
> >     sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
> >
> > You can see oops in one hour.
> >
> > The reason is fec_restart clear BD but NAPI may use it.
> > The solution is disable NAPI and stop xmit when reset BD.
> > disable NAPI may sleep, so fec_restart can't be call in atomic
> > context.
> >
> > Signed-off-by: Frank Li <Frank.Li@freescale.com>
> > Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
> > Tested-by: Lucas Stach <l.stach@pengutronix.de>
> 
> The patch has landed on 3.10-rc1.  Seems that it introduces a lock warning as
> below.  Turn on CONFIG_PROVE_LOCKING and you will be able to see it.

I will investigate it. 

> 
> Shawn
> 
> libphy: 2188000.ethernet:01 - Link is Up - 100/Full
> IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> 
> =================================
> [ INFO: inconsistent lock state ]
> 3.10.0-rc1+ #1 Not tainted
> ---------------------------------
> inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
> swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
>  (_xmit_ETHER#2){+.?...}, at: [<8047a7b4>] sch_direct_xmit+0xb4/0x2fc
> {SOFTIRQ-ON-W} state was registered at:
>   [<80068a04>] mark_lock+0x150/0x6d0
>   [<800695a8>] __lock_acquire+0x624/0x1ce0
>   [<8006b16c>] lock_acquire+0x68/0x7c
>   [<8054d1a4>] _raw_spin_lock+0x34/0x44
>   [<80380788>] fec_restart+0x614/0x694
>   [<803811f4>] fec_enet_adjust_link+0x84/0xbc
>   [<8037d0ac>] phy_state_machine+0x184/0x39c
>   [<8003dbfc>] process_one_work+0x1a4/0x42c
>   [<8003e298>] worker_thread+0x138/0x3a8
>   [<8004462c>] kthread+0xac/0xb8
>   [<8000e688>] ret_from_fork+0x14/0x2c
> irq event stamp: 9252
> hardirqs last  enabled at (9252): [<8002bc08>] local_bh_enable_ip+0x84/0xec
> hardirqs last disabled at (9251): [<8002bbc8>] local_bh_enable_ip+0x44/0xec
> softirqs last  enabled at (9202): [<8002b8b0>] _local_bh_enable+0x14/0x18
> softirqs last disabled at (9203): [<8002be80>] irq_exit+0xac/0xe8
> 
> other info that might help us debug this:
>  Possible unsafe locking scenario:
> 
>        CPU0
>        ----
>   lock(_xmit_ETHER#2);
>   <Interrupt>
>     lock(_xmit_ETHER#2);
> 
>  *** DEADLOCK ***
> 
> 4 locks held by swapper/0/0:
>  #0:  (((&idev->mc_ifc_timer))){+.-...}, at: [<80031384>]
> call_timer_fn+0x0/0xf0
>  #1:  (rcu_read_lock){.+.+..}, at: [<80501d58>] mld_sendpack+0x0/0x55c
>  #2:  (rcu_read_lock_bh){.+....}, at: [<804e1e54>]
> ip6_finish_output2+0x44/0x748
>  #3:  (rcu_read_lock_bh){.+....}, at: [<804641e4>] dev_queue_xmit+0x0/0x658
> 
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc1+ #1
> Backtrace:
> [<80011ebc>] (dump_backtrace+0x0/0x10c) from [<8001205c>]
> (show_stack+0x18/0x1c)
>  r6:80763fc8 r5:80763c08 r4:808a0358 r3:00000000 [<80012044>]
> (show_stack+0x0/0x1c) from [<80549188>] (dump_stack+0x20/0x28) [<80549168>]
> (dump_stack+0x0/0x28) from [<805471f4>] (print_usage_bug+0x26c/0x2dc)
> [<80546f88>] (print_usage_bug+0x0/0x2dc) from [<80068f0c>]
> (mark_lock+0x658/0x6d0) [<800688b4>] (mark_lock+0x0/0x6d0) from [<80069560>]
> (__lock_acquire+0x5dc/0x1ce0) [<80068f84>] (__lock_acquire+0x0/0x1ce0) from
> [<8006b16c>] (lock_acquire+0x68/0x7c) [<8006b104>] (lock_acquire+0x0/0x7c)
> from [<8054d1a4>] (_raw_spin_lock+0x34/0x44)  r7:00000000 r6:bf8e0800
> r5:00000002 r4:bf8b5440 [<8054d170>] (_raw_spin_lock+0x0/0x44) from
> [<8047a7b4>] (sch_direct_xmit+0xb4/0x2fc)  r5:bf8b5400 r4:bfbe7600
> [<8047a700>] (sch_direct_xmit+0x0/0x2fc) from [<80464474>]
> (dev_queue_xmit+0x290/0x658) [<804641e4>] (dev_queue_xmit+0x0/0x658) from
> [<8046caf4>] (neigh_resolve_output+0x114/0x218)
> [<8046c9e0>] (neigh_resolve_output+0x0/0x218) from [<804e21d8>]
> (ip6_finish_output2+0x3c8/0x748) [<804e1e10>] (ip6_finish_output2+0x0/0x748)
> from [<804e5798>] (ip6_finish_output+0xb0/0x1dc) [<804e56e8>]
> (ip6_finish_output+0x0/0x1dc) from [<804e5964>] (ip6_output+0xa0/0x244)
> r5:bf8e0800 r4:bf022480 [<804e58c4>] (ip6_output+0x0/0x244) from [<805021f0>]
> (mld_sendpack+0x498/0x55c)  r7:00000000 r6:bfbe5400 r5:0000004c r4:bf022480
> [<80501d58>] (mld_sendpack+0x0/0x55c) from [<805028f4>]
> (mld_ifc_timer_expire+0x1d4/0x2b0)
> [<80502720>] (mld_ifc_timer_expire+0x0/0x2b0) from [<800313f8>]
> (call_timer_fn+0x74/0xf0) [<80031384>] (call_timer_fn+0x0/0xf0) from
> [<80031668>] (run_timer_softirq+0x1f4/0x210)  r8:807e5d70 r7:00000000
> r6:8075a0c0 r5:807e5540 r4:80759e38 [<80031474>] (run_timer_softirq+0x0/0x210)
> from [<8002b9b4>] (__do_softirq+0x100/0x1f4) [<8002b8b4>]
> (__do_softirq+0x0/0x1f4) from [<8002be80>] (irq_exit+0xac/0xe8) [<8002bdd4>]
> (irq_exit+0x0/0xe8) from [<8000eef8>] (handle_IRQ+0x58/0xb4)
>  r4:80760c78 r3:00000220
> [<8000eea0>] (handle_IRQ+0x0/0xb4) from [<80008604>]
> (gic_handle_irq+0x30/0x64)  r8:807a2f6d r7:f4000100 r6:80759f18 r5:80760dbc
> r4:f400010c r3:00000000 [<800085d4>] (gic_handle_irq+0x0/0x64) from
> [<8000e124>] (__irq_svc+0x44/0x5c) Exception stack(0x80759f18 to 0x80759f60)
> 9f00:                                                       00000001 00000001
> 9f20: 00000000 80763c08 80758000 00000000 807a2f6d 00000001 807a2f6d 8055042c
> 9f40: 8076092c 80759f6c 80759f30 80759f60 8006badc 8000f2a4 20000113 ffffffff
> r7:80759f4c r6:ffffffff r5:20000113 r4:8000f2a4 [<8000f27c>]
> (arch_cpu_idle+0x0/0x44) from [<8005badc>] (cpu_startup_entry+0x74/0x160)
> [<8005ba68>] (cpu_startup_entry+0x0/0x160) from [<80540e34>]
> (rest_init+0xb0/0xd8)  r7:81512400 [<80540d84>] (rest_init+0x0/0xd8) from
> [<80712848>] (start_kernel+0x2c4/0x31c)
>  r6:807493e8 r5:807a3080 r4:80760990
> [<80712584>] (start_kernel+0x0/0x31c) from [<10008074>] (0x10008074)
> r7:80764fec r6:807493e4 r5:807608d0 r4:10c53c7d

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-13  4:57 ` Shawn Guo
  2013-05-13  5:07   ` Shawn Guo
  2013-05-14  2:29   ` Li Frank-B20596
@ 2013-05-14 19:27   ` Fabio Estevam
  2013-05-14 19:30     ` Eric Dumazet
  2 siblings, 1 reply; 10+ messages in thread
From: Fabio Estevam @ 2013-05-14 19:27 UTC (permalink / raw)
  To: Shawn Guo; +Cc: Frank Li, romieu, r.schwebel, davem, l.stach, netdev, lznuaa

On Mon, May 13, 2013 at 1:57 AM, Shawn Guo <shawn.guo@linaro.org> wrote:

> The patch has landed on 3.10-rc1.  Seems that it introduces a lock
> warning as below.  Turn on CONFIG_PROVE_LOCKING and you will be able
> to see it.

This makes the warning goes away on mx28:

--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -451,7 +451,6 @@ fec_restart(struct net_device *ndev, int duplex)
                netif_device_detach(ndev);
                napi_disable(&fep->napi);
                netif_stop_queue(ndev);
-               netif_tx_lock(ndev);
        }

        /* Whack a reset.  We should wait for this. */
@@ -619,7 +618,6 @@ fec_restart(struct net_device *ndev, int duplex)
                netif_device_attach(ndev);
                napi_enable(&fep->napi);
                netif_wake_queue(ndev);
-               netif_tx_unlock(ndev);
        }
 }

,but not sure if it looks OK.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-14 19:27   ` Fabio Estevam
@ 2013-05-14 19:30     ` Eric Dumazet
  2013-05-14 19:40       ` Fabio Estevam
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2013-05-14 19:30 UTC (permalink / raw)
  To: Fabio Estevam
  Cc: Shawn Guo, Frank Li, romieu, r.schwebel, davem, l.stach, netdev, lznuaa

On Tue, 2013-05-14 at 16:27 -0300, Fabio Estevam wrote:
> On Mon, May 13, 2013 at 1:57 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
> 
> > The patch has landed on 3.10-rc1.  Seems that it introduces a lock
> > warning as below.  Turn on CONFIG_PROVE_LOCKING and you will be able
> > to see it.
> 
> This makes the warning goes away on mx28:
> 
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -451,7 +451,6 @@ fec_restart(struct net_device *ndev, int duplex)
>                 netif_device_detach(ndev);
>                 napi_disable(&fep->napi);
>                 netif_stop_queue(ndev);
> -               netif_tx_lock(ndev);
>         }
> 
>         /* Whack a reset.  We should wait for this. */
> @@ -619,7 +618,6 @@ fec_restart(struct net_device *ndev, int duplex)
>                 netif_device_attach(ndev);
>                 napi_enable(&fep->napi);
>                 netif_wake_queue(ndev);
> -               netif_tx_unlock(ndev);
>         }
>  }
> 
> ,but not sure if it looks OK.
> --

Thats because netif_tx_lock_bh() should be used.

And btw, the order of the unlocks should probably be reversed :

              netif_tx_unlock_bh(ndev);
              netif_wake_queue(ndev);
              napi_enable(&fep->napi);
              netif_device_attach(ndev);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-14 19:30     ` Eric Dumazet
@ 2013-05-14 19:40       ` Fabio Estevam
  2013-05-15  1:56         ` Frank Li
  0 siblings, 1 reply; 10+ messages in thread
From: Fabio Estevam @ 2013-05-14 19:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Shawn Guo, Frank Li, romieu, r.schwebel, davem, l.stach, netdev, lznuaa

Hi Eric,

On Tue, May 14, 2013 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Thats because netif_tx_lock_bh() should be used.

Thanks for your advise.

> And btw, the order of the unlocks should probably be reversed :
>
>               netif_tx_unlock_bh(ndev);
>               netif_wake_queue(ndev);
>               napi_enable(&fep->napi);
>               netif_device_attach(ndev);

I tried the change below and do not see the original warning on mx28:

--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -451,7 +451,7 @@ fec_restart(struct net_device *ndev, int duplex)
                netif_device_detach(ndev);
                napi_disable(&fep->napi);
                netif_stop_queue(ndev);
-               netif_tx_lock(ndev);
+               netif_tx_lock_bh(ndev);
        }

        /* Whack a reset.  We should wait for this. */
@@ -616,10 +616,10 @@ fec_restart(struct net_device *ndev, int duplex)
        writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);

        if (netif_running(ndev)) {
-               netif_device_attach(ndev);
-               napi_enable(&fep->napi);
+               netif_tx_unlock_bh(ndev);
                netif_wake_queue(ndev);
-               netif_tx_unlock(ndev);
+               napi_enable(&fep->napi);
+               netif_device_attach(ndev);
        }
 }

Could someone try it on mx6, please?

If it works I can submit the patch tomorrow.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-14 19:40       ` Fabio Estevam
@ 2013-05-15  1:56         ` Frank Li
  2013-05-15  2:42           ` Fabio Estevam
  0 siblings, 1 reply; 10+ messages in thread
From: Frank Li @ 2013-05-15  1:56 UTC (permalink / raw)
  To: Fabio Estevam
  Cc: Eric Dumazet, Shawn Guo, Frank Li, Francois Romieu,
	Robert Schwebel, David Miller, Lucas Stach, netdev

2013/5/15 Fabio Estevam <festevam@gmail.com>:
> Hi Eric,
>
> On Tue, May 14, 2013 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>> Thats because netif_tx_lock_bh() should be used.
>
> Thanks for your advise.
>
>> And btw, the order of the unlocks should probably be reversed :
>>
>>               netif_tx_unlock_bh(ndev);
>>               netif_wake_queue(ndev);
>>               napi_enable(&fep->napi);
>>               netif_device_attach(ndev);
>
> I tried the change below and do not see the original warning on mx28:
>
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -451,7 +451,7 @@ fec_restart(struct net_device *ndev, int duplex)
>                 netif_device_detach(ndev);
>                 napi_disable(&fep->napi);
>                 netif_stop_queue(ndev);
> -               netif_tx_lock(ndev);
> +               netif_tx_lock_bh(ndev);
>         }
>
>         /* Whack a reset.  We should wait for this. */
> @@ -616,10 +616,10 @@ fec_restart(struct net_device *ndev, int duplex)
>         writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
>
>         if (netif_running(ndev)) {
> -               netif_device_attach(ndev);
> -               napi_enable(&fep->napi);
> +               netif_tx_unlock_bh(ndev);
>                 netif_wake_queue(ndev);
> -               netif_tx_unlock(ndev);
> +               napi_enable(&fep->napi);
> +               netif_device_attach(ndev);
>         }
>  }
>
> Could someone try it on mx6, please?
>
> If it works I can submit the patch tomorrow.

I also find the same solution yesterdays. I tested on MX6. Shawn
tested on MX6 and MX28.
You can submit patch.

best regards
Frank Li

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
  2013-05-15  1:56         ` Frank Li
@ 2013-05-15  2:42           ` Fabio Estevam
  0 siblings, 0 replies; 10+ messages in thread
From: Fabio Estevam @ 2013-05-15  2:42 UTC (permalink / raw)
  To: Frank Li
  Cc: Eric Dumazet, Shawn Guo, Frank Li, Francois Romieu,
	Robert Schwebel, David Miller, Lucas Stach, netdev

On Tue, May 14, 2013 at 10:56 PM, Frank Li <lznuaa@gmail.com> wrote:

> I also find the same solution yesterdays. I tested on MX6. Shawn
> tested on MX6 and MX28.
> You can submit patch.

Thanks, Frank. Will submit it tomorrow.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-05-15  2:42 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-08  0:08 [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times Frank Li
2013-05-08 20:15 ` David Miller
2013-05-13  4:57 ` Shawn Guo
2013-05-13  5:07   ` Shawn Guo
2013-05-14  2:29   ` Li Frank-B20596
2013-05-14 19:27   ` Fabio Estevam
2013-05-14 19:30     ` Eric Dumazet
2013-05-14 19:40       ` Fabio Estevam
2013-05-15  1:56         ` Frank Li
2013-05-15  2:42           ` Fabio Estevam

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).