* [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
@ 2013-05-08 0:08 Frank Li
2013-05-08 20:15 ` David Miller
2013-05-13 4:57 ` Shawn Guo
0 siblings, 2 replies; 10+ messages in thread
From: Frank Li @ 2013-05-08 0:08 UTC (permalink / raw)
To: romieu, r.schwebel, davem, l.stach, netdev, festevam, shawn.guo, lznuaa
Cc: Frank Li
reproduce steps
1. flood ping from other machine
ping -f -s 41000 IP
2. run below script
while [ 1 ]; do ethtool -s eth0 autoneg off;
sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
You can see oops in one hour.
The reason is fec_restart clear BD but NAPI may use it.
The solution is disable NAPI and stop xmit when reset BD.
disable NAPI may sleep, so fec_restart can't be call in
atomic context.
Signed-off-by: Frank Li <Frank.Li@freescale.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Lucas Stach <l.stach@pengutronix.de>
---
Change from v1 to v2
* Add netif_tx_lock(ndev) to avoid xmit runing when reset hardware
Change from v2 to v3
* Move put real statements after function variable declarations according to David's comments
* Remove lock in adjust_link according to Lucas Stach's comments
Change from v3 to v4
* rebase to latest net/master
* remove hw_lock because not used again
* reduce delay work to 0
* group delay work related feild to one structure
* call netif_device_detach() in fec_restart
Change from v4 to v5
* use true/false instead of 1/0
drivers/net/ethernet/freescale/fec.h | 10 ++++--
drivers/net/ethernet/freescale/fec_main.c | 44 +++++++++++++++++++++-------
2 files changed, 39 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index d44f65b..afa7b5b 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -198,6 +198,11 @@ struct bufdesc_ex {
#define FLAG_RX_CSUM_ENABLED (BD_ENET_RX_ICE | BD_ENET_RX_PCR)
#define FLAG_RX_CSUM_ERROR (BD_ENET_RX_ICE | BD_ENET_RX_PCR)
+struct fec_enet_delayed_work {
+ struct delayed_work delay_work;
+ bool timeout;
+};
+
/* The FEC buffer descriptors track the ring buffers. The rx_bd_base and
* tx_bd_base always point to the base of the buffer descriptors. The
* cur_rx and cur_tx point to the currently available buffer.
@@ -231,9 +236,6 @@ struct fec_enet_private {
/* The ring entries to be free()ed */
struct bufdesc *dirty_tx;
- /* hold while accessing the HW like ringbuffer for tx/rx but not MAC */
- spinlock_t hw_lock;
-
struct platform_device *pdev;
int opened;
@@ -268,7 +270,7 @@ struct fec_enet_private {
int hwts_rx_en;
int hwts_tx_en;
struct timer_list time_keep;
-
+ struct fec_enet_delayed_work delay_work;
};
void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index b9748f1..d028830 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -445,6 +445,13 @@ fec_restart(struct net_device *ndev, int duplex)
u32 rcntl = OPT_FRAME_SIZE | 0x04;
u32 ecntl = 0x2; /* ETHEREN */
+ if (netif_running(ndev)) {
+ netif_device_detach(ndev);
+ napi_disable(&fep->napi);
+ netif_stop_queue(ndev);
+ netif_tx_lock(ndev);
+ }
+
/* Whack a reset. We should wait for this. */
writel(1, fep->hwp + FEC_ECNTRL);
udelay(10);
@@ -605,6 +612,13 @@ fec_restart(struct net_device *ndev, int duplex)
/* Enable interrupts we wish to service */
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
+
+ if (netif_running(ndev)) {
+ netif_device_attach(ndev);
+ napi_enable(&fep->napi);
+ netif_wake_queue(ndev);
+ netif_tx_unlock(ndev);
+ }
}
static void
@@ -644,8 +658,22 @@ fec_timeout(struct net_device *ndev)
ndev->stats.tx_errors++;
- fec_restart(ndev, fep->full_duplex);
- netif_wake_queue(ndev);
+ fep->delay_work.timeout = true;
+ schedule_delayed_work(&(fep->delay_work.delay_work), 0);
+}
+
+static void fec_enet_work(struct work_struct *work)
+{
+ struct fec_enet_private *fep =
+ container_of(work,
+ struct fec_enet_private,
+ delay_work.delay_work.work);
+
+ if (fep->delay_work.timeout) {
+ fep->delay_work.timeout = false;
+ fec_restart(fep->netdev, fep->full_duplex);
+ netif_wake_queue(fep->netdev);
+ }
}
static void
@@ -1024,16 +1052,12 @@ static void fec_enet_adjust_link(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct phy_device *phy_dev = fep->phy_dev;
- unsigned long flags;
-
int status_change = 0;
- spin_lock_irqsave(&fep->hw_lock, flags);
-
/* Prevent a state halted on mii error */
if (fep->mii_timeout && phy_dev->state == PHY_HALTED) {
phy_dev->state = PHY_RESUMING;
- goto spin_unlock;
+ return;
}
if (phy_dev->link) {
@@ -1061,9 +1085,6 @@ static void fec_enet_adjust_link(struct net_device *ndev)
}
}
-spin_unlock:
- spin_unlock_irqrestore(&fep->hw_lock, flags);
-
if (status_change)
phy_print_status(phy_dev);
}
@@ -1732,7 +1753,6 @@ static int fec_enet_init(struct net_device *ndev)
return -ENOMEM;
memset(cbd_base, 0, PAGE_SIZE);
- spin_lock_init(&fep->hw_lock);
fep->netdev = ndev;
@@ -1947,6 +1967,7 @@ fec_probe(struct platform_device *pdev)
if (fep->bufdesc_ex && fep->ptp_clock)
netdev_info(ndev, "registered PHC device %d\n", fep->dev_id);
+ INIT_DELAYED_WORK(&(fep->delay_work.delay_work), fec_enet_work);
return 0;
failed_register:
@@ -1979,6 +2000,7 @@ fec_drv_remove(struct platform_device *pdev)
struct fec_enet_private *fep = netdev_priv(ndev);
int i;
+ cancel_delayed_work_sync(&(fep->delay_work.delay_work));
unregister_netdev(ndev);
fec_enet_mii_remove(fep);
del_timer_sync(&fep->time_keep);
--
1.7.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-08 0:08 [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times Frank Li
@ 2013-05-08 20:15 ` David Miller
2013-05-13 4:57 ` Shawn Guo
1 sibling, 0 replies; 10+ messages in thread
From: David Miller @ 2013-05-08 20:15 UTC (permalink / raw)
To: Frank.Li; +Cc: romieu, r.schwebel, l.stach, netdev, festevam, shawn.guo, lznuaa
From: Frank Li <Frank.Li@freescale.com>
Date: Wed, 8 May 2013 08:08:44 +0800
> reproduce steps
> 1. flood ping from other machine
> ping -f -s 41000 IP
> 2. run below script
> while [ 1 ]; do ethtool -s eth0 autoneg off;
> sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
>
> You can see oops in one hour.
>
> The reason is fec_restart clear BD but NAPI may use it.
> The solution is disable NAPI and stop xmit when reset BD.
> disable NAPI may sleep, so fec_restart can't be call in
> atomic context.
>
> Signed-off-by: Frank Li <Frank.Li@freescale.com>
> Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
> Tested-by: Lucas Stach <l.stach@pengutronix.de>
Applied and queued up for -stable.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-08 0:08 [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times Frank Li
2013-05-08 20:15 ` David Miller
@ 2013-05-13 4:57 ` Shawn Guo
2013-05-13 5:07 ` Shawn Guo
` (2 more replies)
1 sibling, 3 replies; 10+ messages in thread
From: Shawn Guo @ 2013-05-13 4:57 UTC (permalink / raw)
To: Frank Li; +Cc: romieu, r.schwebel, davem, l.stach, netdev, festevam, lznuaa
Hi Frank,
On Wed, May 08, 2013 at 08:08:44AM +0800, Frank Li wrote:
> reproduce steps
> 1. flood ping from other machine
> ping -f -s 41000 IP
> 2. run below script
> while [ 1 ]; do ethtool -s eth0 autoneg off;
> sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
>
> You can see oops in one hour.
>
> The reason is fec_restart clear BD but NAPI may use it.
> The solution is disable NAPI and stop xmit when reset BD.
> disable NAPI may sleep, so fec_restart can't be call in
> atomic context.
>
> Signed-off-by: Frank Li <Frank.Li@freescale.com>
> Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
> Tested-by: Lucas Stach <l.stach@pengutronix.de>
The patch has landed on 3.10-rc1. Seems that it introduces a lock
warning as below. Turn on CONFIG_PROVE_LOCKING and you will be able
to see it.
Shawn
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
=================================
[ INFO: inconsistent lock state ]
3.10.0-rc1+ #1 Not tainted
---------------------------------
inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
(_xmit_ETHER#2){+.?...}, at: [<8047a7b4>] sch_direct_xmit+0xb4/0x2fc
{SOFTIRQ-ON-W} state was registered at:
[<80068a04>] mark_lock+0x150/0x6d0
[<800695a8>] __lock_acquire+0x624/0x1ce0
[<8006b16c>] lock_acquire+0x68/0x7c
[<8054d1a4>] _raw_spin_lock+0x34/0x44
[<80380788>] fec_restart+0x614/0x694
[<803811f4>] fec_enet_adjust_link+0x84/0xbc
[<8037d0ac>] phy_state_machine+0x184/0x39c
[<8003dbfc>] process_one_work+0x1a4/0x42c
[<8003e298>] worker_thread+0x138/0x3a8
[<8004462c>] kthread+0xac/0xb8
[<8000e688>] ret_from_fork+0x14/0x2c
irq event stamp: 9252
hardirqs last enabled at (9252): [<8002bc08>] local_bh_enable_ip+0x84/0xec
hardirqs last disabled at (9251): [<8002bbc8>] local_bh_enable_ip+0x44/0xec
softirqs last enabled at (9202): [<8002b8b0>] _local_bh_enable+0x14/0x18
softirqs last disabled at (9203): [<8002be80>] irq_exit+0xac/0xe8
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(_xmit_ETHER#2);
<Interrupt>
lock(_xmit_ETHER#2);
*** DEADLOCK ***
4 locks held by swapper/0/0:
#0: (((&idev->mc_ifc_timer))){+.-...}, at: [<80031384>] call_timer_fn+0x0/0xf0
#1: (rcu_read_lock){.+.+..}, at: [<80501d58>] mld_sendpack+0x0/0x55c
#2: (rcu_read_lock_bh){.+....}, at: [<804e1e54>] ip6_finish_output2+0x44/0x748
#3: (rcu_read_lock_bh){.+....}, at: [<804641e4>] dev_queue_xmit+0x0/0x658
stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc1+ #1
Backtrace:
[<80011ebc>] (dump_backtrace+0x0/0x10c) from [<8001205c>] (show_stack+0x18/0x1c)
r6:80763fc8 r5:80763c08 r4:808a0358 r3:00000000
[<80012044>] (show_stack+0x0/0x1c) from [<80549188>] (dump_stack+0x20/0x28)
[<80549168>] (dump_stack+0x0/0x28) from [<805471f4>] (print_usage_bug+0x26c/0x2dc)
[<80546f88>] (print_usage_bug+0x0/0x2dc) from [<80068f0c>] (mark_lock+0x658/0x6d0)
[<800688b4>] (mark_lock+0x0/0x6d0) from [<80069560>] (__lock_acquire+0x5dc/0x1ce0)
[<80068f84>] (__lock_acquire+0x0/0x1ce0) from [<8006b16c>] (lock_acquire+0x68/0x7c)
[<8006b104>] (lock_acquire+0x0/0x7c) from [<8054d1a4>] (_raw_spin_lock+0x34/0x44)
r7:00000000 r6:bf8e0800 r5:00000002 r4:bf8b5440
[<8054d170>] (_raw_spin_lock+0x0/0x44) from [<8047a7b4>] (sch_direct_xmit+0xb4/0x2fc)
r5:bf8b5400 r4:bfbe7600
[<8047a700>] (sch_direct_xmit+0x0/0x2fc) from [<80464474>] (dev_queue_xmit+0x290/0x658)
[<804641e4>] (dev_queue_xmit+0x0/0x658) from [<8046caf4>] (neigh_resolve_output+0x114/0x218)
[<8046c9e0>] (neigh_resolve_output+0x0/0x218) from [<804e21d8>] (ip6_finish_output2+0x3c8/0x748)
[<804e1e10>] (ip6_finish_output2+0x0/0x748) from [<804e5798>] (ip6_finish_output+0xb0/0x1dc)
[<804e56e8>] (ip6_finish_output+0x0/0x1dc) from [<804e5964>] (ip6_output+0xa0/0x244)
r5:bf8e0800 r4:bf022480
[<804e58c4>] (ip6_output+0x0/0x244) from [<805021f0>] (mld_sendpack+0x498/0x55c)
r7:00000000 r6:bfbe5400 r5:0000004c r4:bf022480
[<80501d58>] (mld_sendpack+0x0/0x55c) from [<805028f4>] (mld_ifc_timer_expire+0x1d4/0x2b0)
[<80502720>] (mld_ifc_timer_expire+0x0/0x2b0) from [<800313f8>] (call_timer_fn+0x74/0xf0)
[<80031384>] (call_timer_fn+0x0/0xf0) from [<80031668>] (run_timer_softirq+0x1f4/0x210)
r8:807e5d70 r7:00000000 r6:8075a0c0 r5:807e5540 r4:80759e38
[<80031474>] (run_timer_softirq+0x0/0x210) from [<8002b9b4>] (__do_softirq+0x100/0x1f4)
[<8002b8b4>] (__do_softirq+0x0/0x1f4) from [<8002be80>] (irq_exit+0xac/0xe8)
[<8002bdd4>] (irq_exit+0x0/0xe8) from [<8000eef8>] (handle_IRQ+0x58/0xb4)
r4:80760c78 r3:00000220
[<8000eea0>] (handle_IRQ+0x0/0xb4) from [<80008604>] (gic_handle_irq+0x30/0x64)
r8:807a2f6d r7:f4000100 r6:80759f18 r5:80760dbc r4:f400010c
r3:00000000
[<800085d4>] (gic_handle_irq+0x0/0x64) from [<8000e124>] (__irq_svc+0x44/0x5c)
Exception stack(0x80759f18 to 0x80759f60)
9f00: 00000001 00000001
9f20: 00000000 80763c08 80758000 00000000 807a2f6d 00000001 807a2f6d 8055042c
9f40: 8076092c 80759f6c 80759f30 80759f60 8006badc 8000f2a4 20000113 ffffffff
r7:80759f4c r6:ffffffff r5:20000113 r4:8000f2a4
[<8000f27c>] (arch_cpu_idle+0x0/0x44) from [<8005badc>] (cpu_startup_entry+0x74/0x160)
[<8005ba68>] (cpu_startup_entry+0x0/0x160) from [<80540e34>] (rest_init+0xb0/0xd8)
r7:81512400
[<80540d84>] (rest_init+0x0/0xd8) from [<80712848>] (start_kernel+0x2c4/0x31c)
r6:807493e8 r5:807a3080 r4:80760990
[<80712584>] (start_kernel+0x0/0x31c) from [<10008074>] (0x10008074)
r7:80764fec r6:807493e4 r5:807608d0 r4:10c53c7d
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-13 4:57 ` Shawn Guo
@ 2013-05-13 5:07 ` Shawn Guo
2013-05-14 2:29 ` Li Frank-B20596
2013-05-14 19:27 ` Fabio Estevam
2 siblings, 0 replies; 10+ messages in thread
From: Shawn Guo @ 2013-05-13 5:07 UTC (permalink / raw)
To: Frank Li
Cc: romieu, Robert Schwebel, David Miller, l.stach,
Linux Netdev List, Fabio Estevam, lznuaa
On 13 May 2013 12:57, Shawn Guo <shawn.guo@linaro.org> wrote:
> Hi Frank,
>
> On Wed, May 08, 2013 at 08:08:44AM +0800, Frank Li wrote:
>> reproduce steps
>> 1. flood ping from other machine
>> ping -f -s 41000 IP
>> 2. run below script
>> while [ 1 ]; do ethtool -s eth0 autoneg off;
>> sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
>>
>> You can see oops in one hour.
>>
>> The reason is fec_restart clear BD but NAPI may use it.
>> The solution is disable NAPI and stop xmit when reset BD.
>> disable NAPI may sleep, so fec_restart can't be call in
>> atomic context.
>>
>> Signed-off-by: Frank Li <Frank.Li@freescale.com>
>> Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
>> Tested-by: Lucas Stach <l.stach@pengutronix.de>
>
> The patch has landed on 3.10-rc1. Seems that it introduces a lock
> warning as below. Turn on CONFIG_PROVE_LOCKING and you will be able
> to see it.
The warning message looks a little different in one of my imx28 boot tests.
Shawn
[ 4.749454] =================================
[ 4.753829] [ INFO: inconsistent lock state ]
[ 4.758207] 3.10.0-rc1-00013-gc5f5ad9 #77 Not tainted
[ 4.763270] ---------------------------------
[ 4.767641] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[ 4.773669] kworker/0:1/20 [HC0[0]:SC1[3]:HE1:SE0] takes:
[ 4.779080] (_xmit_ETHER#2){+.?...}, at: [<c03c5644>]
sch_direct_xmit+0x9c/0x2d0
[ 4.786686] {SOFTIRQ-ON-W} state was registered at:
[ 4.791579] [<c005a784>] __lock_acquire+0x5fc/0x1a90
[ 4.796767] [<c005c188>] lock_acquire+0x9c/0x124
[ 4.801598] [<c04449cc>] _raw_spin_lock+0x2c/0x3c
[ 4.806532] [<c02fd3e8>] fec_restart+0x5bc/0x654
[ 4.811370] [<c02fddfc>] fec_enet_adjust_link+0x7c/0xb4
[ 4.816806] [<c02fac20>] phy_state_machine+0x17c/0x394
[ 4.822153] [<c0035e08>] process_one_work+0x1bc/0x4c4
[ 4.827422] [<c00364e0>] worker_thread+0x134/0x3a0
[ 4.832420] [<c003c274>] kthread+0xa4/0xb0
[ 4.836726] [<c000e9c0>] ret_from_fork+0x14/0x34
[ 4.841564] irq event stamp: 828
[ 4.844806] hardirqs last enabled at (828): [<c0022f80>]
local_bh_enable_ip+0x84/0xf0
[ 4.852773] hardirqs last disabled at (827): [<c0022f40>]
local_bh_enable_ip+0x44/0xf0
[ 4.860727] softirqs last enabled at (808): [<c0022d24>]
__do_softirq+0x180/0x284
[ 4.868333] softirqs last disabled at (811): [<c00231e0>] irq_exit+0x9c/0xd8
[ 4.875416]
[ 4.875416] other info that might help us debug this:
[ 4.881959] Possible unsafe locking scenario:
[ 4.881959]
[ 4.887891] CPU0
[ 4.890345] ----
[ 4.892799] lock(_xmit_ETHER#2);
[ 4.896247] <Interrupt>
[ 4.898874] lock(_xmit_ETHER#2);
[ 4.902497]
[ 4.902497] *** DEADLOCK ***
[ 4.902497]
[ 4.908444] 7 locks held by kworker/0:1/20:
[ 4.912637] #0: (rpciod){.+.+.+}, at: [<c0035d84>]
process_one_work+0x138/0x4c4
[ 4.920217] #1: ((&(&transport->connect_worker)->work)){+.+.+.},
at: [<c0035d84>] process_one_work+0x138/0x4c4
[ 4.930482] #2: (sk_lock-AF_INET-RPC){+.+...}, at: [<c04010ec>]
inet_stream_connect+0x20/0x48
[ 4.939293] #3: (rcu_read_lock){.+.+..}, at: [<c03d435c>]
ip_queue_xmit+0x0/0x3f8
[ 4.947043] #4: (rcu_read_lock){.+.+..}, at: [<c03abb18>]
__netif_receive_skb_core+0x34/0x5b0
[ 4.955834] #5: (rcu_read_lock){.+.+..}, at: [<c03b9508>]
neigh_update+0x2a4/0x50c
[ 4.963667] #6: (rcu_read_lock_bh){.+....}, at: [<c03b0870>]
dev_queue_xmit+0x0/0x684
[ 4.971767]
[ 4.971767] stack backtrace:
[ 4.976162] CPU: 0 PID: 20 Comm: kworker/0:1 Not tainted
3.10.0-rc1-00013-gc5f5ad9 #77
[ 4.984122] Workqueue: rpciod xs_tcp_setup_socket
[ 4.988909] [<c0013958>] (unwind_backtrace+0x0/0xf0) from
[<c001173c>] (show_stack+0x10/0x14)
[ 4.997491] [<c001173c>] (show_stack+0x10/0x14) from [<c043e26c>]
(print_usage_bug.part.28+0x218/0x280)
[ 5.006937] [<c043e26c>] (print_usage_bug.part.28+0x218/0x280) from
[<c005a048>] (mark_lock+0x528/0x668)
[ 5.016461] [<c005a048>] (mark_lock+0x528/0x668) from [<c005a73c>]
(__lock_acquire+0x5b4/0x1a90)
[ 5.025289] [<c005a73c>] (__lock_acquire+0x5b4/0x1a90) from
[<c005c188>] (lock_acquire+0x9c/0x124)
[ 5.034298] [<c005c188>] (lock_acquire+0x9c/0x124) from
[<c04449cc>] (_raw_spin_lock+0x2c/0x3c)
[ 5.043050] [<c04449cc>] (_raw_spin_lock+0x2c/0x3c) from
[<c03c5644>] (sch_direct_xmit+0x9c/0x2d0)
[ 5.052055] [<c03c5644>] (sch_direct_xmit+0x9c/0x2d0) from
[<c03b0b18>] (dev_queue_xmit+0x2a8/0x684)
[ 5.061234] [<c03b0b18>] (dev_queue_xmit+0x2a8/0x684) from
[<c03b9478>] (neigh_update+0x214/0x50c)
[ 5.070241] [<c03b9478>] (neigh_update+0x214/0x50c) from
[<c03fa8b8>] (arp_process+0x264/0x62c)
[ 5.078985] [<c03fa8b8>] (arp_process+0x264/0x62c) from
[<c03abd7c>] (__netif_receive_skb_core+0x298/0x5b0)
[ 5.088770] [<c03abd7c>] (__netif_receive_skb_core+0x298/0x5b0)
from [<c03aeecc>] (napi_gro_receive+0x74/0xa0)
[ 5.098824] [<c03aeecc>] (napi_gro_receive+0x74/0xa0) from
[<c02fe084>] (fec_enet_rx_napi+0x250/0x5d8)
[ 5.108182] [<c02fe084>] (fec_enet_rx_napi+0x250/0x5d8) from
[<c03aeb90>] (net_rx_action+0xc0/0x238)
[ 5.117364] [<c03aeb90>] (net_rx_action+0xc0/0x238) from
[<c0022c90>] (__do_softirq+0xec/0x284)
[ 5.126110] [<c0022c90>] (__do_softirq+0xec/0x284) from
[<c00231e0>] (irq_exit+0x9c/0xd8)
[ 5.134335] [<c00231e0>] (irq_exit+0x9c/0xd8) from [<c000f804>]
(handle_IRQ+0x34/0x84)
[ 5.142296] [<c000f804>] (handle_IRQ+0x34/0x84) from [<c00086fc>]
(icoll_handle_irq+0x34/0x4c)
[ 5.150949] [<c00086fc>] (icoll_handle_irq+0x34/0x4c) from
[<c000e544>] (__irq_svc+0x44/0x54)
[ 5.159494] Exception stack(0xc7561c78 to 0xc7561cc0)
[ 5.164571] 1c60:
00000001 c7520338
[ 5.172783] 1c80: 00000000 20000013 c7560000 c03d3e84 c76d6d00
c7736400 c77582c0 00000000
[ 5.180994] 1ca0: 00000000 00000000 04228060 c7561cc0 c005c944
c002307c 20000013 ffffffff
[ 5.189217] [<c000e544>] (__irq_svc+0x44/0x54) from [<c002307c>]
(local_bh_enable+0x90/0xf0)
[ 5.197700] [<c002307c>] (local_bh_enable+0x90/0xf0) from
[<c03d3e84>] (ip_finish_output+0x21c/0x504)
[ 5.206960] [<c03d3e84>] (ip_finish_output+0x21c/0x504) from
[<c03d42f8>] (ip_local_out+0x30/0x94)
[ 5.215955] [<c03d42f8>] (ip_local_out+0x30/0x94) from [<c03d44a0>]
(ip_queue_xmit+0x144/0x3f8)
[ 5.224707] [<c03d44a0>] (ip_queue_xmit+0x144/0x3f8) from
[<c03e9308>] (tcp_transmit_skb+0x3cc/0x80c)
[ 5.233981] [<c03e9308>] (tcp_transmit_skb+0x3cc/0x80c) from
[<c03ebc40>] (tcp_connect+0x57c/0x5d4)
[ 5.243081] [<c03ebc40>] (tcp_connect+0x57c/0x5d4) from
[<c03ef960>] (tcp_v4_connect+0x288/0x3a8)
[ 5.252010] [<c03ef960>] (tcp_v4_connect+0x288/0x3a8) from
[<c0401048>] (__inet_stream_connect+0x258/0x2dc)
[ 5.261799] [<c0401048>] (__inet_stream_connect+0x258/0x2dc) from
[<c0401100>] (inet_stream_connect+0x34/0x48)
[ 5.271863] [<c0401100>] (inet_stream_connect+0x34/0x48) from
[<c039aaf8>] (kernel_connect+0x10/0x14)
[ 5.281139] [<c039aaf8>] (kernel_connect+0x10/0x14) from
[<c041e384>] (xs_tcp_setup_socket+0x10c/0x360)
[ 5.290587] [<c041e384>] (xs_tcp_setup_socket+0x10c/0x360) from
[<c0035e08>] (process_one_work+0x1bc/0x4c4)
[ 5.300372] [<c0035e08>] (process_one_work+0x1bc/0x4c4) from
[<c00364e0>] (worker_thread+0x134/0x3a0)
[ 5.309632] [<c00364e0>] (worker_thread+0x134/0x3a0) from
[<c003c274>] (kthread+0xa4/0xb0)
[ 5.317944] [<c003c274>] (kthread+0xa4/0xb0) from [<c000e9c0>]
(ret_from_fork+0x14/0x34)
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-13 4:57 ` Shawn Guo
2013-05-13 5:07 ` Shawn Guo
@ 2013-05-14 2:29 ` Li Frank-B20596
2013-05-14 19:27 ` Fabio Estevam
2 siblings, 0 replies; 10+ messages in thread
From: Li Frank-B20596 @ 2013-05-14 2:29 UTC (permalink / raw)
To: Shawn Guo; +Cc: romieu, r.schwebel, davem, l.stach, netdev, festevam, lznuaa
>
> Hi Frank,
>
> On Wed, May 08, 2013 at 08:08:44AM +0800, Frank Li wrote:
> > reproduce steps
> > 1. flood ping from other machine
> > ping -f -s 41000 IP
> > 2. run below script
> > while [ 1 ]; do ethtool -s eth0 autoneg off;
> > sleep 3;ethtool -s eth0 autoneg on; sleep 4; done;
> >
> > You can see oops in one hour.
> >
> > The reason is fec_restart clear BD but NAPI may use it.
> > The solution is disable NAPI and stop xmit when reset BD.
> > disable NAPI may sleep, so fec_restart can't be call in atomic
> > context.
> >
> > Signed-off-by: Frank Li <Frank.Li@freescale.com>
> > Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
> > Tested-by: Lucas Stach <l.stach@pengutronix.de>
>
> The patch has landed on 3.10-rc1. Seems that it introduces a lock warning as
> below. Turn on CONFIG_PROVE_LOCKING and you will be able to see it.
I will investigate it.
>
> Shawn
>
> libphy: 2188000.ethernet:01 - Link is Up - 100/Full
> IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
>
> =================================
> [ INFO: inconsistent lock state ]
> 3.10.0-rc1+ #1 Not tainted
> ---------------------------------
> inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
> swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
> (_xmit_ETHER#2){+.?...}, at: [<8047a7b4>] sch_direct_xmit+0xb4/0x2fc
> {SOFTIRQ-ON-W} state was registered at:
> [<80068a04>] mark_lock+0x150/0x6d0
> [<800695a8>] __lock_acquire+0x624/0x1ce0
> [<8006b16c>] lock_acquire+0x68/0x7c
> [<8054d1a4>] _raw_spin_lock+0x34/0x44
> [<80380788>] fec_restart+0x614/0x694
> [<803811f4>] fec_enet_adjust_link+0x84/0xbc
> [<8037d0ac>] phy_state_machine+0x184/0x39c
> [<8003dbfc>] process_one_work+0x1a4/0x42c
> [<8003e298>] worker_thread+0x138/0x3a8
> [<8004462c>] kthread+0xac/0xb8
> [<8000e688>] ret_from_fork+0x14/0x2c
> irq event stamp: 9252
> hardirqs last enabled at (9252): [<8002bc08>] local_bh_enable_ip+0x84/0xec
> hardirqs last disabled at (9251): [<8002bbc8>] local_bh_enable_ip+0x44/0xec
> softirqs last enabled at (9202): [<8002b8b0>] _local_bh_enable+0x14/0x18
> softirqs last disabled at (9203): [<8002be80>] irq_exit+0xac/0xe8
>
> other info that might help us debug this:
> Possible unsafe locking scenario:
>
> CPU0
> ----
> lock(_xmit_ETHER#2);
> <Interrupt>
> lock(_xmit_ETHER#2);
>
> *** DEADLOCK ***
>
> 4 locks held by swapper/0/0:
> #0: (((&idev->mc_ifc_timer))){+.-...}, at: [<80031384>]
> call_timer_fn+0x0/0xf0
> #1: (rcu_read_lock){.+.+..}, at: [<80501d58>] mld_sendpack+0x0/0x55c
> #2: (rcu_read_lock_bh){.+....}, at: [<804e1e54>]
> ip6_finish_output2+0x44/0x748
> #3: (rcu_read_lock_bh){.+....}, at: [<804641e4>] dev_queue_xmit+0x0/0x658
>
> stack backtrace:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc1+ #1
> Backtrace:
> [<80011ebc>] (dump_backtrace+0x0/0x10c) from [<8001205c>]
> (show_stack+0x18/0x1c)
> r6:80763fc8 r5:80763c08 r4:808a0358 r3:00000000 [<80012044>]
> (show_stack+0x0/0x1c) from [<80549188>] (dump_stack+0x20/0x28) [<80549168>]
> (dump_stack+0x0/0x28) from [<805471f4>] (print_usage_bug+0x26c/0x2dc)
> [<80546f88>] (print_usage_bug+0x0/0x2dc) from [<80068f0c>]
> (mark_lock+0x658/0x6d0) [<800688b4>] (mark_lock+0x0/0x6d0) from [<80069560>]
> (__lock_acquire+0x5dc/0x1ce0) [<80068f84>] (__lock_acquire+0x0/0x1ce0) from
> [<8006b16c>] (lock_acquire+0x68/0x7c) [<8006b104>] (lock_acquire+0x0/0x7c)
> from [<8054d1a4>] (_raw_spin_lock+0x34/0x44) r7:00000000 r6:bf8e0800
> r5:00000002 r4:bf8b5440 [<8054d170>] (_raw_spin_lock+0x0/0x44) from
> [<8047a7b4>] (sch_direct_xmit+0xb4/0x2fc) r5:bf8b5400 r4:bfbe7600
> [<8047a700>] (sch_direct_xmit+0x0/0x2fc) from [<80464474>]
> (dev_queue_xmit+0x290/0x658) [<804641e4>] (dev_queue_xmit+0x0/0x658) from
> [<8046caf4>] (neigh_resolve_output+0x114/0x218)
> [<8046c9e0>] (neigh_resolve_output+0x0/0x218) from [<804e21d8>]
> (ip6_finish_output2+0x3c8/0x748) [<804e1e10>] (ip6_finish_output2+0x0/0x748)
> from [<804e5798>] (ip6_finish_output+0xb0/0x1dc) [<804e56e8>]
> (ip6_finish_output+0x0/0x1dc) from [<804e5964>] (ip6_output+0xa0/0x244)
> r5:bf8e0800 r4:bf022480 [<804e58c4>] (ip6_output+0x0/0x244) from [<805021f0>]
> (mld_sendpack+0x498/0x55c) r7:00000000 r6:bfbe5400 r5:0000004c r4:bf022480
> [<80501d58>] (mld_sendpack+0x0/0x55c) from [<805028f4>]
> (mld_ifc_timer_expire+0x1d4/0x2b0)
> [<80502720>] (mld_ifc_timer_expire+0x0/0x2b0) from [<800313f8>]
> (call_timer_fn+0x74/0xf0) [<80031384>] (call_timer_fn+0x0/0xf0) from
> [<80031668>] (run_timer_softirq+0x1f4/0x210) r8:807e5d70 r7:00000000
> r6:8075a0c0 r5:807e5540 r4:80759e38 [<80031474>] (run_timer_softirq+0x0/0x210)
> from [<8002b9b4>] (__do_softirq+0x100/0x1f4) [<8002b8b4>]
> (__do_softirq+0x0/0x1f4) from [<8002be80>] (irq_exit+0xac/0xe8) [<8002bdd4>]
> (irq_exit+0x0/0xe8) from [<8000eef8>] (handle_IRQ+0x58/0xb4)
> r4:80760c78 r3:00000220
> [<8000eea0>] (handle_IRQ+0x0/0xb4) from [<80008604>]
> (gic_handle_irq+0x30/0x64) r8:807a2f6d r7:f4000100 r6:80759f18 r5:80760dbc
> r4:f400010c r3:00000000 [<800085d4>] (gic_handle_irq+0x0/0x64) from
> [<8000e124>] (__irq_svc+0x44/0x5c) Exception stack(0x80759f18 to 0x80759f60)
> 9f00: 00000001 00000001
> 9f20: 00000000 80763c08 80758000 00000000 807a2f6d 00000001 807a2f6d 8055042c
> 9f40: 8076092c 80759f6c 80759f30 80759f60 8006badc 8000f2a4 20000113 ffffffff
> r7:80759f4c r6:ffffffff r5:20000113 r4:8000f2a4 [<8000f27c>]
> (arch_cpu_idle+0x0/0x44) from [<8005badc>] (cpu_startup_entry+0x74/0x160)
> [<8005ba68>] (cpu_startup_entry+0x0/0x160) from [<80540e34>]
> (rest_init+0xb0/0xd8) r7:81512400 [<80540d84>] (rest_init+0x0/0xd8) from
> [<80712848>] (start_kernel+0x2c4/0x31c)
> r6:807493e8 r5:807a3080 r4:80760990
> [<80712584>] (start_kernel+0x0/0x31c) from [<10008074>] (0x10008074)
> r7:80764fec r6:807493e4 r5:807608d0 r4:10c53c7d
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-13 4:57 ` Shawn Guo
2013-05-13 5:07 ` Shawn Guo
2013-05-14 2:29 ` Li Frank-B20596
@ 2013-05-14 19:27 ` Fabio Estevam
2013-05-14 19:30 ` Eric Dumazet
2 siblings, 1 reply; 10+ messages in thread
From: Fabio Estevam @ 2013-05-14 19:27 UTC (permalink / raw)
To: Shawn Guo; +Cc: Frank Li, romieu, r.schwebel, davem, l.stach, netdev, lznuaa
On Mon, May 13, 2013 at 1:57 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
> The patch has landed on 3.10-rc1. Seems that it introduces a lock
> warning as below. Turn on CONFIG_PROVE_LOCKING and you will be able
> to see it.
This makes the warning goes away on mx28:
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -451,7 +451,6 @@ fec_restart(struct net_device *ndev, int duplex)
netif_device_detach(ndev);
napi_disable(&fep->napi);
netif_stop_queue(ndev);
- netif_tx_lock(ndev);
}
/* Whack a reset. We should wait for this. */
@@ -619,7 +618,6 @@ fec_restart(struct net_device *ndev, int duplex)
netif_device_attach(ndev);
napi_enable(&fep->napi);
netif_wake_queue(ndev);
- netif_tx_unlock(ndev);
}
}
,but not sure if it looks OK.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-14 19:27 ` Fabio Estevam
@ 2013-05-14 19:30 ` Eric Dumazet
2013-05-14 19:40 ` Fabio Estevam
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2013-05-14 19:30 UTC (permalink / raw)
To: Fabio Estevam
Cc: Shawn Guo, Frank Li, romieu, r.schwebel, davem, l.stach, netdev, lznuaa
On Tue, 2013-05-14 at 16:27 -0300, Fabio Estevam wrote:
> On Mon, May 13, 2013 at 1:57 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
>
> > The patch has landed on 3.10-rc1. Seems that it introduces a lock
> > warning as below. Turn on CONFIG_PROVE_LOCKING and you will be able
> > to see it.
>
> This makes the warning goes away on mx28:
>
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -451,7 +451,6 @@ fec_restart(struct net_device *ndev, int duplex)
> netif_device_detach(ndev);
> napi_disable(&fep->napi);
> netif_stop_queue(ndev);
> - netif_tx_lock(ndev);
> }
>
> /* Whack a reset. We should wait for this. */
> @@ -619,7 +618,6 @@ fec_restart(struct net_device *ndev, int duplex)
> netif_device_attach(ndev);
> napi_enable(&fep->napi);
> netif_wake_queue(ndev);
> - netif_tx_unlock(ndev);
> }
> }
>
> ,but not sure if it looks OK.
> --
Thats because netif_tx_lock_bh() should be used.
And btw, the order of the unlocks should probably be reversed :
netif_tx_unlock_bh(ndev);
netif_wake_queue(ndev);
napi_enable(&fep->napi);
netif_device_attach(ndev);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-14 19:30 ` Eric Dumazet
@ 2013-05-14 19:40 ` Fabio Estevam
2013-05-15 1:56 ` Frank Li
0 siblings, 1 reply; 10+ messages in thread
From: Fabio Estevam @ 2013-05-14 19:40 UTC (permalink / raw)
To: Eric Dumazet
Cc: Shawn Guo, Frank Li, romieu, r.schwebel, davem, l.stach, netdev, lznuaa
Hi Eric,
On Tue, May 14, 2013 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Thats because netif_tx_lock_bh() should be used.
Thanks for your advise.
> And btw, the order of the unlocks should probably be reversed :
>
> netif_tx_unlock_bh(ndev);
> netif_wake_queue(ndev);
> napi_enable(&fep->napi);
> netif_device_attach(ndev);
I tried the change below and do not see the original warning on mx28:
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -451,7 +451,7 @@ fec_restart(struct net_device *ndev, int duplex)
netif_device_detach(ndev);
napi_disable(&fep->napi);
netif_stop_queue(ndev);
- netif_tx_lock(ndev);
+ netif_tx_lock_bh(ndev);
}
/* Whack a reset. We should wait for this. */
@@ -616,10 +616,10 @@ fec_restart(struct net_device *ndev, int duplex)
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
if (netif_running(ndev)) {
- netif_device_attach(ndev);
- napi_enable(&fep->napi);
+ netif_tx_unlock_bh(ndev);
netif_wake_queue(ndev);
- netif_tx_unlock(ndev);
+ napi_enable(&fep->napi);
+ netif_device_attach(ndev);
}
}
Could someone try it on mx6, please?
If it works I can submit the patch tomorrow.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-14 19:40 ` Fabio Estevam
@ 2013-05-15 1:56 ` Frank Li
2013-05-15 2:42 ` Fabio Estevam
0 siblings, 1 reply; 10+ messages in thread
From: Frank Li @ 2013-05-15 1:56 UTC (permalink / raw)
To: Fabio Estevam
Cc: Eric Dumazet, Shawn Guo, Frank Li, Francois Romieu,
Robert Schwebel, David Miller, Lucas Stach, netdev
2013/5/15 Fabio Estevam <festevam@gmail.com>:
> Hi Eric,
>
> On Tue, May 14, 2013 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>> Thats because netif_tx_lock_bh() should be used.
>
> Thanks for your advise.
>
>> And btw, the order of the unlocks should probably be reversed :
>>
>> netif_tx_unlock_bh(ndev);
>> netif_wake_queue(ndev);
>> napi_enable(&fep->napi);
>> netif_device_attach(ndev);
>
> I tried the change below and do not see the original warning on mx28:
>
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -451,7 +451,7 @@ fec_restart(struct net_device *ndev, int duplex)
> netif_device_detach(ndev);
> napi_disable(&fep->napi);
> netif_stop_queue(ndev);
> - netif_tx_lock(ndev);
> + netif_tx_lock_bh(ndev);
> }
>
> /* Whack a reset. We should wait for this. */
> @@ -616,10 +616,10 @@ fec_restart(struct net_device *ndev, int duplex)
> writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
>
> if (netif_running(ndev)) {
> - netif_device_attach(ndev);
> - napi_enable(&fep->napi);
> + netif_tx_unlock_bh(ndev);
> netif_wake_queue(ndev);
> - netif_tx_unlock(ndev);
> + napi_enable(&fep->napi);
> + netif_device_attach(ndev);
> }
> }
>
> Could someone try it on mx6, please?
>
> If it works I can submit the patch tomorrow.
I also find the same solution yesterdays. I tested on MX6. Shawn
tested on MX6 and MX28.
You can submit patch.
best regards
Frank Li
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times
2013-05-15 1:56 ` Frank Li
@ 2013-05-15 2:42 ` Fabio Estevam
0 siblings, 0 replies; 10+ messages in thread
From: Fabio Estevam @ 2013-05-15 2:42 UTC (permalink / raw)
To: Frank Li
Cc: Eric Dumazet, Shawn Guo, Frank Li, Francois Romieu,
Robert Schwebel, David Miller, Lucas Stach, netdev
On Tue, May 14, 2013 at 10:56 PM, Frank Li <lznuaa@gmail.com> wrote:
> I also find the same solution yesterdays. I tested on MX6. Shawn
> tested on MX6 and MX28.
> You can submit patch.
Thanks, Frank. Will submit it tomorrow.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2013-05-15 2:42 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-08 0:08 [PATCH v5 1/1 net] net: fec: fix kernel oops when plug/unplug cable many times Frank Li
2013-05-08 20:15 ` David Miller
2013-05-13 4:57 ` Shawn Guo
2013-05-13 5:07 ` Shawn Guo
2013-05-14 2:29 ` Li Frank-B20596
2013-05-14 19:27 ` Fabio Estevam
2013-05-14 19:30 ` Eric Dumazet
2013-05-14 19:40 ` Fabio Estevam
2013-05-15 1:56 ` Frank Li
2013-05-15 2:42 ` Fabio Estevam
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).