All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sky2: Fix a race between sky2_down and sky2_poll
@ 2009-06-09 14:03 Mike McCormack
  2009-06-10 14:50 ` Stephen Hemminger
  2009-06-11 17:04 ` Stephen Hemminger
  0 siblings, 2 replies; 6+ messages in thread
From: Mike McCormack @ 2009-06-09 14:03 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

Hi Stephen,

This patch fixes a crash in the sky2 driver when doing "ifconfig eth1
down" and receiving a constant stream of ethernet packets on my
mac-mini with Linux 2.6.29.4.  This is what I can see of the crash on
the console:

do_IRQ+0x64/0x77
common_interrupt+0x27/0x2c
kfree+0x78/0x95
__kfree_skb+0xf/0x6e
sky2_rx_clean+0x35/0x48
sky2_down+0x367/0x486
dev_close+0x63/0x85
dev_change_flags+0xa2/0x153
devinet_ioctl+0x22a/0x532
sock_ioctl+0x1ad/0x1d1
sock_ioctl+0x0/0x1d1
vfs_ioctl+0x1c/0x5f
do_vfs_ioctl+0x456/0x491
net_tx_action+0xc6/0x123
__do_soft_irq+0x8c/0x115
sys_ioctl+0x4/0x58
sysenter_do_call+0x12/0x2f
Code: 68 cd f0 93 de e9 0 02 00 00 8b 4c 24 24 0f b7 ...
EIP: sky2_poll+0x856/0xb49 [sky2]
kernel panic - no syncing: fatal exception in interrupt

This was tested by adding a USB ethernet dongle to the same machine,
and creating a program that spews raw packets back to the sky2 port.

I have update my analysis and tweaked the patch slightly.

I had a look at the callers of sky2_down, and can't see any immediate
trouble that could be caused by this patch.  If you have any
suggestions for further improvements, please let me know what they are
and I will do my best to address them.

thanks,

Mike


---

If sky2_down was called between an interrupt and the corresponding sky2_poll,
rx_ring will have been free'd and we'll crash.

This may happen because not all sources of interrupts are masked in sky2_down,
but a single interrupt from any source will cause all sources to be
checked, regardless of whether they are masked or not.

Signed-off-by: Mike McCormack <mikem@ring3k.org>
---
 drivers/net/sky2.c |   14 ++++++++++++--
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a2ff9cb..d0d4840 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1822,8 +1822,8 @@ static int sky2_down(struct net_device *dev)
 	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
 	gma_write16(hw, port, GM_GP_CTRL, ctrl);

-	/* Make sure no packets are pending */
-	napi_synchronize(&hw->napi);
+	/* disable soft interrupts */
+	napi_disable(&hw->napi);

 	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);

@@ -1878,6 +1878,9 @@ static int sky2_down(struct net_device *dev)
 	sky2->rx_ring = NULL;
 	sky2->tx_ring = NULL;

+	/* re-enable soft interrupts */
+	napi_enable(&hw->napi);
+
 	return 0;
 }

@@ -2372,6 +2375,13 @@ static int sky2_status_intr(struct sky2_hw *hw,
int to_do, u16 idx)
 		length = le16_to_cpu(le->length);
 		status = le32_to_cpu(le->status);

+		/* rx_ring may have been free'd in sky2_down */
+		if (unlikely(!sky2->rx_ring)) {
+			printk(KERN_INFO "sky2_status_intr: rx_ring NULL opcode %02x\n", opcode);
+			work_done++;
+			break;
+		}
+
 		le->opcode = 0;
 		switch (opcode & ~HW_OWNER) {
 		case OP_RXSTAT:
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sky2: Fix a race between sky2_down and sky2_poll
  2009-06-09 14:03 [PATCH] sky2: Fix a race between sky2_down and sky2_poll Mike McCormack
@ 2009-06-10 14:50 ` Stephen Hemminger
  2009-06-11 17:04 ` Stephen Hemminger
  1 sibling, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2009-06-10 14:50 UTC (permalink / raw)
  To: Mike McCormack; +Cc: netdev

On Tue, 9 Jun 2009 23:03:52 +0900
Mike McCormack <mikem@ring3k.org> wrote:

> Hi Stephen,
> 
> This patch fixes a crash in the sky2 driver when doing "ifconfig eth1
> down" and receiving a constant stream of ethernet packets on my
> mac-mini with Linux 2.6.29.4.  This is what I can see of the crash on
> the console:
> 
> do_IRQ+0x64/0x77
> common_interrupt+0x27/0x2c
> kfree+0x78/0x95
> __kfree_skb+0xf/0x6e
> sky2_rx_clean+0x35/0x48
> sky2_down+0x367/0x486
> dev_close+0x63/0x85
> dev_change_flags+0xa2/0x153
> devinet_ioctl+0x22a/0x532
> sock_ioctl+0x1ad/0x1d1
> sock_ioctl+0x0/0x1d1
> vfs_ioctl+0x1c/0x5f
> do_vfs_ioctl+0x456/0x491
> net_tx_action+0xc6/0x123
> __do_soft_irq+0x8c/0x115
> sys_ioctl+0x4/0x58
> sysenter_do_call+0x12/0x2f
> Code: 68 cd f0 93 de e9 0 02 00 00 8b 4c 24 24 0f b7 ...
> EIP: sky2_poll+0x856/0xb49 [sky2]
> kernel panic - no syncing: fatal exception in interrupt
> 
> This was tested by adding a USB ethernet dongle to the same machine,
> and creating a program that spews raw packets back to the sky2 port.
> 
> I have update my analysis and tweaked the patch slightly.
> 
> I had a look at the callers of sky2_down, and can't see any immediate
> trouble that could be caused by this patch.  If you have any
> suggestions for further improvements, please let me know what they are
> and I will do my best to address them.
> 
> thanks,
> 
> Mike
> 
> 
> ---
> 
> If sky2_down was called between an interrupt and the corresponding sky2_poll,
> rx_ring will have been free'd and we'll crash.
> 
> This may happen because not all sources of interrupts are masked in sky2_down,
> but a single interrupt from any source will cause all sources to be
> checked, regardless of whether they are masked or not.
> 
> Signed-off-by: Mike McCormack <mikem@ring3k.org>
> ---
>  drivers/net/sky2.c |   14 ++++++++++++--
>  1 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
> index a2ff9cb..d0d4840 100644
> --- a/drivers/net/sky2.c
> +++ b/drivers/net/sky2.c
> @@ -1822,8 +1822,8 @@ static int sky2_down(struct net_device *dev)
>  	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
>  	gma_write16(hw, port, GM_GP_CTRL, ctrl);
> 
> -	/* Make sure no packets are pending */
> -	napi_synchronize(&hw->napi);
> +	/* disable soft interrupts */
> +	napi_disable(&hw->napi);
> 
>  	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);
> 
> @@ -1878,6 +1878,9 @@ static int sky2_down(struct net_device *dev)
>  	sky2->rx_ring = NULL;
>  	sky2->tx_ring = NULL;
> 
> +	/* re-enable soft interrupts */
> +	napi_enable(&hw->napi);
> +
>  	return 0;
>  }
> 
> @@ -2372,6 +2375,13 @@ static int sky2_status_intr(struct sky2_hw *hw,
> int to_do, u16 idx)
>  		length = le16_to_cpu(le->length);
>  		status = le32_to_cpu(le->status);
> 
> +		/* rx_ring may have been free'd in sky2_down */
> +		if (unlikely(!sky2->rx_ring)) {
> +			printk(KERN_INFO "sky2_status_intr: rx_ring NULL opcode %02x\n", opcode);
> +			work_done++;
> +			break;
> +		}
> +

The first part is okay, but the second part is not. It wallpapers
over a real race. It is important that it should not be possible
to get to the sky2_status_intr (called from sky2_poll)
after down has both disabled IRQ in hardware and disabled NAPI.
If it can make to there then there is some bug in NAPI logic,
or hardware that needs attention. Remember sky2_down might be
running on a different CPU than the receiver.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sky2: Fix a race between sky2_down and sky2_poll
  2009-06-09 14:03 [PATCH] sky2: Fix a race between sky2_down and sky2_poll Mike McCormack
  2009-06-10 14:50 ` Stephen Hemminger
@ 2009-06-11 17:04 ` Stephen Hemminger
  2009-06-12 13:16   ` Mike McCormack
  1 sibling, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2009-06-11 17:04 UTC (permalink / raw)
  To: Mike McCormack; +Cc: netdev

Does the following fix the problem?
------------------------------
Subject: sky2: more careful shutdown

The code to shutdown hardware needs to be moer careful.
  * block napi from re-enabling
  * wait for status queue to drain before dropping rx (and tx) area
  * make sure no PCI posting bugs (synchronize does pci read)
  * add more reset pokes (from vendor sk98lin)

Rearrange exist tx/rx stop code for clarity

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/drivers/net/sky2.c	2009-06-11 08:39:06.429163804 -0700
+++ b/drivers/net/sky2.c	2009-06-11 09:26:05.420060719 -0700
@@ -148,6 +148,7 @@ static const unsigned rxqaddr[] = { Q_R1
 static const u32 portirq_msk[] = { Y2_IS_PORT_1, Y2_IS_PORT_2 };
 
 static void sky2_set_multicast(struct net_device *dev);
+static void sky2_synchronize(struct sky2_hw *hw);
 
 /* Access to PHY via serial interconnect */
 static int gm_phy_write(struct sky2_hw *hw, unsigned port, u16 reg, u16 val)
@@ -1151,7 +1152,12 @@ stopped:
 
 	/* reset the Rx prefetch unit */
 	sky2_write32(hw, Y2_QADDR(rxq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
-	mmiowb();
+
+	/* Reset the RAM Buffer receive queue */
+	sky2_write8(hw, RB_ADDR(rxq, RB_CTRL), RB_RST_SET);
+
+	/* Reset Rx MAC FIFO */
+	sky2_write8(hw, SK_REG(sky2->port, RX_GMF_CTRL_T), GMF_RST_SET);
 }
 
 /* Clean out receive buffer area, assumes receiver hardware stopped */
@@ -1792,6 +1798,51 @@ static void sky2_tx_complete(struct sky2
 		netif_wake_queue(dev);
 }
 
+/* Stop transmitter */
+static void sky2_tx_stop(struct sky2_port *sky2)
+{
+	struct sky2_hw *hw = sky2->hw;
+	unsigned port = sky2->port;
+	unsigned txq = txqaddr[port];
+	u16 ctrl;
+
+	sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP);
+	sky2_read32(hw, Q_ADDR(txq, Q_CSR));
+
+	sky2_write32(hw, RB_ADDR(txq, RB_CTRL), RB_RST_SET | RB_DIS_OP_MD);
+
+	ctrl = gma_read16(hw, port, GM_GP_CTRL);
+	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
+	gma_write16(hw, port, GM_GP_CTRL, ctrl);
+
+	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);
+
+	/* Workaround shared GMAC reset */
+	if (!(hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0
+	      && port == 0 && hw->dev[1] && netif_running(hw->dev[1])))
+		sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_RST_SET);
+
+	/* Disable Force Sync bit and Enable Alloc bit */
+	sky2_write8(hw, SK_REG(port, TXA_CTRL),
+		    TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC);
+
+	/* Stop Interval Timer and Limit Counter of Tx Arbiter */
+	sky2_write32(hw, SK_REG(port, TXA_ITI_INI), 0L);
+	sky2_write32(hw, SK_REG(port, TXA_LIM_INI), 0L);
+
+	/* Reset the PCI FIFO of the async Tx queue */
+	sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_RST_SET | BMU_FIFO_RST);
+
+	/* Reset the Tx prefetch units */
+	sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
+
+	sky2_write32(hw, RB_ADDR(txq, RB_CTRL), RB_RST_SET);
+	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
+
+	/* set Pause Off */
+	sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF);
+}
+
 /* Cleanup all untransmitted buffers, assume transmitter not running */
 static void sky2_tx_clean(struct net_device *dev)
 {
@@ -1808,7 +1859,6 @@ static int sky2_down(struct net_device *
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
 	unsigned port = sky2->port;
-	u16 ctrl;
 	u32 imask;
 
 	/* Never really got started! */
@@ -1825,51 +1875,14 @@ static int sky2_down(struct net_device *
 
 	synchronize_irq(hw->pdev->irq);
 
-	sky2_gmac_reset(hw, port);
-
-	/* Stop transmitter */
-	sky2_write32(hw, Q_ADDR(txqaddr[port], Q_CSR), BMU_STOP);
-	sky2_read32(hw, Q_ADDR(txqaddr[port], Q_CSR));
-
-	sky2_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL),
-		     RB_RST_SET | RB_DIS_OP_MD);
-
-	ctrl = gma_read16(hw, port, GM_GP_CTRL);
-	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
-	gma_write16(hw, port, GM_GP_CTRL, ctrl);
-
-	/* Make sure no packets are pending */
-	napi_synchronize(&hw->napi);
-
-	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);
-
-	/* Workaround shared GMAC reset */
-	if (!(hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0
-	      && port == 0 && hw->dev[1] && netif_running(hw->dev[1])))
-		sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_RST_SET);
-
-	/* Disable Force Sync bit and Enable Alloc bit */
-	sky2_write8(hw, SK_REG(port, TXA_CTRL),
-		    TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC);
-
-	/* Stop Interval Timer and Limit Counter of Tx Arbiter */
-	sky2_write32(hw, SK_REG(port, TXA_ITI_INI), 0L);
-	sky2_write32(hw, SK_REG(port, TXA_LIM_INI), 0L);
-
-	/* Reset the PCI FIFO of the async Tx queue */
-	sky2_write32(hw, Q_ADDR(txqaddr[port], Q_CSR),
-		     BMU_RST_SET | BMU_FIFO_RST);
-
-	/* Reset the Tx prefetch units */
-	sky2_write32(hw, Y2_QADDR(txqaddr[port], PREF_UNIT_CTRL),
-		     PREF_UNIT_RST_SET);
+	napi_disable(&hw->napi);
 
-	sky2_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL), RB_RST_SET);
+	sky2_gmac_reset(hw, port);
 
+	sky2_tx_stop(sky2);
 	sky2_rx_stop(sky2);
 
-	sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
-	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
+	sky2_synchronize(hw);
 
 	sky2_phy_power_down(hw, port);
 
@@ -1894,6 +1907,8 @@ static int sky2_down(struct net_device *
 	sky2->rx_ring = NULL;
 	sky2->tx_ring = NULL;
 
+	napi_enable(&hw->napi);
+
 	return 0;
 }
 
@@ -2782,6 +2797,16 @@ done:
 	return work_done;
 }
 
+/* Process all pending status entries */
+static void sky2_synchronize(struct sky2_hw *hw)
+{
+	u16 idx;
+
+	while ((idx = sky2_read16(hw, STAT_PUT_IDX)) != hw->st_idx) {
+		sky2_status_intr(hw, NAPI_WEIGHT, idx);
+	}
+}
+
 static irqreturn_t sky2_intr(int irq, void *dev_id)
 {
 	struct sky2_hw *hw = dev_id;

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sky2: Fix a race between sky2_down and sky2_poll
  2009-06-11 17:04 ` Stephen Hemminger
@ 2009-06-12 13:16   ` Mike McCormack
  2009-06-12 14:16     ` Stephen Hemminger
  0 siblings, 1 reply; 6+ messages in thread
From: Mike McCormack @ 2009-06-12 13:16 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

2009/6/12 Stephen Hemminger <shemminger@linux-foundation.org>

> Does the following fix the problem?

I tried your patch out, but it still got a crash.

The patch below does fix the problem. Does this look better?

thanks,

Mike

-----------------------------

Subject: [PATCH] sky2: Shutdown receive path cleanly in sky2_down

If sky2_down was called while receiving a packet, receive interrupts
would still occur after the receive buffer was free'd causing a crash in
sky2_status_intr.

To avoid further receive interrupts after sky2_down:

* make sure NAPI and interrupts are fully disabled during shutdown
* call sky2_rx_stop after shutting down the receiver
* clear received packets after shutdown

Signed-off-by: Mike McCormack <mikem@ring3k.org>
---
 drivers/net/sky2.c |   21 +++++++++++++++------
 1 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a2ff9cb..136b362 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1805,10 +1805,13 @@ static int sky2_down(struct net_device *dev)
 	/* Disable port IRQ */
 	imask = sky2_read32(hw, B0_IMSK);
 	imask &= ~portirq_msk[port];
-	sky2_write32(hw, B0_IMSK, imask);
+	sky2_write32(hw, B0_IMSK, 0);

 	synchronize_irq(hw->pdev->irq);

+	/* disable soft interrupts */
+	napi_disable(&hw->napi);
+
 	sky2_gmac_reset(hw, port);

 	/* Stop transmitter */
@@ -1822,9 +1825,6 @@ static int sky2_down(struct net_device *dev)
 	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
 	gma_write16(hw, port, GM_GP_CTRL, ctrl);

-	/* Make sure no packets are pending */
-	napi_synchronize(&hw->napi);
-
 	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);

 	/* Workaround shared GMAC reset */
@@ -1850,11 +1850,11 @@ static int sky2_down(struct net_device *dev)

 	sky2_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL), RB_RST_SET);

-	sky2_rx_stop(sky2);
-
 	sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
 	sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);

+	sky2_rx_stop(sky2);
+
 	sky2_phy_power_down(hw, port);

 	/* turn off LED's */
@@ -1878,6 +1878,15 @@ static int sky2_down(struct net_device *dev)
 	sky2->rx_ring = NULL;
 	sky2->tx_ring = NULL;

+	/* clear receive interrupt condition and IRQ */
+	hw->st_idx = sky2_read16(hw, STAT_PUT_IDX);
+
+	/* re-enable interrupts */
+	sky2_write32(hw, B0_IMSK, imask);
+
+	/* re-enable soft interrupts */
+	napi_enable(&hw->napi);
+
 	return 0;
 }

-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sky2: Fix a race between sky2_down and sky2_poll
  2009-06-12 13:16   ` Mike McCormack
@ 2009-06-12 14:16     ` Stephen Hemminger
  2009-06-16  5:54       ` Mike McCormack
  0 siblings, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2009-06-12 14:16 UTC (permalink / raw)
  To: Mike McCormack; +Cc: netdev

On Fri, 12 Jun 2009 22:16:42 +0900
Mike McCormack <mikem@ring3k.org> wrote:

> 2009/6/12 Stephen Hemminger <shemminger@linux-foundation.org>
> 
> > Does the following fix the problem?
> 
> I tried your patch out, but it still got a crash.
> 
> The patch below does fix the problem. Does this look better?
> 
> thanks,
> 
> Mike
> 
> -----------------------------
> 
> Subject: [PATCH] sky2: Shutdown receive path cleanly in sky2_down
> 
> If sky2_down was called while receiving a packet, receive interrupts
> would still occur after the receive buffer was free'd causing a crash in
> sky2_status_intr.
> 
> To avoid further receive interrupts after sky2_down:
> 
> * make sure NAPI and interrupts are fully disabled during shutdown
> * call sky2_rx_stop after shutting down the receiver
> * clear received packets after shutdown
> 
> Signed-off-by: Mike McCormack <mikem@ring3k.org>
> ---
>  drivers/net/sky2.c |   21 +++++++++++++++------
>  1 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
> index a2ff9cb..136b362 100644
> --- a/drivers/net/sky2.c
> +++ b/drivers/net/sky2.c
> @@ -1805,10 +1805,13 @@ static int sky2_down(struct net_device *dev)
>  	/* Disable port IRQ */
>  	imask = sky2_read32(hw, B0_IMSK);
>  	imask &= ~portirq_msk[port];
> -	sky2_write32(hw, B0_IMSK, imask);
> +	sky2_write32(hw, B0_IMSK, 0);
> 

This breaks on 2 port boards. On dual port boards, both ports share
the same IRQ. 


-- 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sky2: Fix a race between sky2_down and sky2_poll
  2009-06-12 14:16     ` Stephen Hemminger
@ 2009-06-16  5:54       ` Mike McCormack
  0 siblings, 0 replies; 6+ messages in thread
From: Mike McCormack @ 2009-06-16  5:54 UTC (permalink / raw)
  To: Stephen Hemminger, netdev

2009/6/12 Stephen Hemminger <shemminger@linux-foundation.org>:

> This breaks on 2 port boards. On dual port boards, both ports share
> the same IRQ.

Hi Stephen,

OK, I can see the status ring is shared, and my previous patch will
interfere with the second port's operation.

As far as I can tell from testing here, the status buffer is updated
after the Rx shutdown procedure is complete. i.e. After sky2_rx_stop()
is complete and after the Rx chain is reset, I check that the status
buffer is empty (and it is), but at some time later see an
sky2_status_intr reporting status updates for the port that we've shut
down.

Does the following patch look better?

The msleep() could be removed if there's a way to make sure any
pending status updates have been flushed to the status ring buffer,
but that doesn't seem to be the case with the current code.

Calling sky2_status_intr() directly seems to hang the receiver in a
way that is unrecoverable by doing ifup/ifdown, so I have refactored
sky2_status_intr into a function that doesn't do the SC_STAT_CLR_IRQ.

thanks,

Mike



------------------

Subject: [PATCH] sky2: Fix race condition in sky2_down

Empty the status ring after shutting down the receiver in sky2_down.
Refactor sky2_status_intr into sky2_process_status to allow calling
from outside an interrupt context.
---
 drivers/net/sky2.c |  254 +++++++++++++++++++++++++++++-----------------------
 1 files changed, 142 insertions(+), 112 deletions(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 6b5946f..670cf52 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -148,6 +148,7 @@ static const unsigned rxqaddr[] = { Q_R1, Q_R2 };
 static const u32 portirq_msk[] = { Y2_IS_PORT_1, Y2_IS_PORT_2 };

 static void sky2_set_multicast(struct net_device *dev);
+static void sky2_synchronize(struct sky2_hw *hw);

 /* Access to PHY via serial interconnect */
 static int gm_phy_write(struct sky2_hw *hw, unsigned port, u16 reg, u16 val)
@@ -1806,7 +1807,8 @@ static int sky2_down(struct net_device *dev)
 	imask &= ~portirq_msk[port];
 	sky2_write32(hw, B0_IMSK, imask);

-	synchronize_irq(hw->pdev->irq);
+	/* disable soft interrupts */
+	napi_disable(&hw->napi);

 	sky2_gmac_reset(hw, port);

@@ -1821,9 +1823,6 @@ static int sky2_down(struct net_device *dev)
 	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
 	gma_write16(hw, port, GM_GP_CTRL, ctrl);

-	/* Make sure no packets are pending */
-	napi_synchronize(&hw->napi);
-
 	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);

 	/* Workaround shared GMAC reset */
@@ -1859,6 +1858,10 @@ static int sky2_down(struct net_device *dev)
 	/* turn off LED's */
 	sky2_write16(hw, B0_Y2LED, LED_STAT_OFF);

+	/* flush the status ring */
+	msleep(1);
+	sky2_synchronize(hw);
+
 	sky2_tx_clean(dev);
 	sky2_rx_clean(sky2);

@@ -1877,6 +1880,9 @@ static int sky2_down(struct net_device *dev)
 	sky2->rx_ring = NULL;
 	sky2->tx_ring = NULL;

+	/* re-enable soft interrupts */
+	napi_enable(&hw->napi);
+
 	return 0;
 }

@@ -2344,135 +2350,150 @@ static inline void sky2_tx_done(struct
net_device *dev, u16 last)
 }

 /* Process status response ring */
-static int sky2_status_intr(struct sky2_hw *hw, int to_do, u16 idx)
+static int sky2_process_status(struct sky2_hw *hw, unsigned *rx)
 {
+	struct sky2_port *sky2;
+	struct sky2_status_le *le  = hw->st_le + hw->st_idx;
+	unsigned port;
+	struct net_device *dev;
+	struct sk_buff *skb;
+	u32 status;
+	u16 length;
+	u8 opcode = le->opcode;
 	int work_done = 0;
-	unsigned rx[2] = { 0, 0 };

-	rmb();
-	do {
-		struct sky2_port *sky2;
-		struct sky2_status_le *le  = hw->st_le + hw->st_idx;
-		unsigned port;
-		struct net_device *dev;
-		struct sk_buff *skb;
-		u32 status;
-		u16 length;
-		u8 opcode = le->opcode;
-
-		if (!(opcode & HW_OWNER))
-			break;
+	hw->st_idx = RING_NEXT(hw->st_idx, STATUS_RING_SIZE);

-		hw->st_idx = RING_NEXT(hw->st_idx, STATUS_RING_SIZE);
-
-		port = le->css & CSS_LINK_BIT;
-		dev = hw->dev[port];
-		sky2 = netdev_priv(dev);
-		length = le16_to_cpu(le->length);
-		status = le32_to_cpu(le->status);
-
-		le->opcode = 0;
-		switch (opcode & ~HW_OWNER) {
-		case OP_RXSTAT:
-			++rx[port];
-			skb = sky2_receive(dev, length, status);
-			if (unlikely(!skb)) {
-				dev->stats.rx_dropped++;
-				break;
-			}
+	if (!(opcode & HW_OWNER))
+		goto error;

-			/* This chip reports checksum status differently */
-			if (hw->flags & SKY2_HW_NEW_LE) {
-				if (sky2->rx_csum &&
-				    (le->css & (CSS_ISIPV4 | CSS_ISIPV6)) &&
-				    (le->css & CSS_TCPUDPCSOK))
-					skb->ip_summed = CHECKSUM_UNNECESSARY;
-				else
-					skb->ip_summed = CHECKSUM_NONE;
-			}
+	port = le->css & CSS_LINK_BIT;
+	dev = hw->dev[port];
+	sky2 = netdev_priv(dev);
+	length = le16_to_cpu(le->length);
+	status = le32_to_cpu(le->status);

-			skb->protocol = eth_type_trans(skb, dev);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += skb->len;
-			dev->last_rx = jiffies;
+	/* rx_ring may have been free'd in sky2_down */
+	if (unlikely(!sky2->rx_ring)) {
+		printk(KERN_INFO "sky2_status_intr: rx_ring NULL opcode %02x\n", opcode);
+		goto error;
+	}
+
+	le->opcode = 0;
+	switch (opcode & ~HW_OWNER) {
+	case OP_RXSTAT:
+		++rx[port];
+		skb = sky2_receive(dev, length, status);
+		if (unlikely(!skb)) {
+			dev->stats.rx_dropped++;
+			break;
+		}
+
+		/* This chip reports checksum status differently */
+		if (hw->flags & SKY2_HW_NEW_LE) {
+			if (sky2->rx_csum &&
+			    (le->css & (CSS_ISIPV4 | CSS_ISIPV6)) &&
+			    (le->css & CSS_TCPUDPCSOK))
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+			else
+				skb->ip_summed = CHECKSUM_NONE;
+		}
+
+		skb->protocol = eth_type_trans(skb, dev);
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
+		dev->last_rx = jiffies;

 #ifdef SKY2_VLAN_TAG_USED
-			if (sky2->vlgrp && (status & GMR_FS_VLAN)) {
-				vlan_hwaccel_receive_skb(skb,
-							 sky2->vlgrp,
-							 be16_to_cpu(sky2->rx_tag));
-			} else
+		if (sky2->vlgrp && (status & GMR_FS_VLAN)) {
+			vlan_hwaccel_receive_skb(skb,
+						 sky2->vlgrp,
+						 be16_to_cpu(sky2->rx_tag));
+		} else
 #endif
-				netif_receive_skb(skb);
+			netif_receive_skb(skb);

-			/* Stop after net poll weight */
-			if (++work_done >= to_do)
-				goto exit_loop;
-			break;
+		/* Stop after net poll weight */
+		work_done++;
+		break;

 #ifdef SKY2_VLAN_TAG_USED
-		case OP_RXVLAN:
-			sky2->rx_tag = length;
-			break;
+	case OP_RXVLAN:
+		sky2->rx_tag = length;
+		break;

-		case OP_RXCHKSVLAN:
-			sky2->rx_tag = length;
-			/* fall through */
+	case OP_RXCHKSVLAN:
+		sky2->rx_tag = length;
+		/* fall through */
 #endif
-		case OP_RXCHKS:
-			if (!sky2->rx_csum)
-				break;
-
-			/* If this happens then driver assuming wrong format */
-			if (unlikely(hw->flags & SKY2_HW_NEW_LE)) {
-				if (net_ratelimit())
-					printk(KERN_NOTICE "%s: unexpected"
-					       " checksum status\n",
-					       dev->name);
-				break;
-			}
-
-			/* Both checksum counters are programmed to start at
-			 * the same offset, so unless there is a problem they
-			 * should match. This failure is an early indication that
-			 * hardware receive checksumming won't work.
-			 */
-			if (likely(status >> 16 == (status & 0xffff))) {
-				skb = sky2->rx_ring[sky2->rx_next].skb;
-				skb->ip_summed = CHECKSUM_COMPLETE;
-				skb->csum = status & 0xffff;
-			} else {
-				printk(KERN_NOTICE PFX "%s: hardware receive "
-				       "checksum problem (status = %#x)\n",
-				       dev->name, status);
-				sky2->rx_csum = 0;
-				sky2_write32(sky2->hw,
-					     Q_ADDR(rxqaddr[port], Q_CSR),
-					     BMU_DIS_RX_CHKSUM);
-			}
+	case OP_RXCHKS:
+		if (!sky2->rx_csum)
 			break;

-		case OP_TXINDEXLE:
-			/* TX index reports status for both ports */
-			BUILD_BUG_ON(TX_RING_SIZE > 0x1000);
-			sky2_tx_done(hw->dev[0], status & 0xfff);
-			if (hw->dev[1])
-				sky2_tx_done(hw->dev[1],
-				     ((status >> 24) & 0xff)
-					     | (u16)(length & 0xf) << 8);
+		/* If this happens then driver assuming wrong format */
+		if (unlikely(hw->flags & SKY2_HW_NEW_LE)) {
+			if (net_ratelimit())
+				printk(KERN_NOTICE "%s: unexpected"
+				       " checksum status\n",
+				       dev->name);
 			break;
+		}

-		default:
-			if (net_ratelimit())
-				printk(KERN_WARNING PFX
-				       "unknown status opcode 0x%x\n", opcode);
+		/* Both checksum counters are programmed to start at
+		 * the same offset, so unless there is a problem they
+		 * should match. This failure is an early indication that
+		 * hardware receive checksumming won't work.
+		 */
+		if (likely(status >> 16 == (status & 0xffff))) {
+			skb = sky2->rx_ring[sky2->rx_next].skb;
+			skb->ip_summed = CHECKSUM_COMPLETE;
+			skb->csum = status & 0xffff;
+		} else {
+			printk(KERN_NOTICE PFX "%s: hardware receive "
+			       "checksum problem (status = %#x)\n",
+			       dev->name, status);
+			sky2->rx_csum = 0;
+			sky2_write32(sky2->hw,
+				     Q_ADDR(rxqaddr[port], Q_CSR),
+				     BMU_DIS_RX_CHKSUM);
 		}
-	} while (hw->st_idx != idx);
+		break;

-	/* Fully processed status ring so clear irq */
-	sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
+	case OP_TXINDEXLE:
+		/* TX index reports status for both ports */
+		BUILD_BUG_ON(TX_RING_SIZE > 0x1000);
+		sky2_tx_done(hw->dev[0], status & 0xfff);
+		if (hw->dev[1])
+			sky2_tx_done(hw->dev[1],
+			     ((status >> 24) & 0xff)
+				     | (u16)(length & 0xf) << 8);
+		break;
+
+	default:
+		if (net_ratelimit())
+			printk(KERN_WARNING PFX
+			       "unknown status opcode 0x%x\n", opcode);
+	}
+error:
+	return work_done;
+}
+
+static int sky2_status_intr(struct sky2_hw *hw, int to_do, u16 idx)
+{
+	int work_done = 0;
+	unsigned rx[2] = { 0, 0 };
+
+	rmb();
+
+	while (work_done < to_do) {
+		work_done += sky2_process_status(hw, rx);
+		if (hw->st_idx == idx) {
+			/* Fully processed status ring so clear irq */
+			sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
+			break;
+		}
+	}

-exit_loop:
 	if (rx[0])
 		sky2_rx_update(netdev_priv(hw->dev[0]), Q_R1);

@@ -2741,6 +2762,15 @@ done:
 	return work_done;
 }

+/* Process all pending status entries */
+static void sky2_synchronize(struct sky2_hw *hw)
+{
+	unsigned rx[2];
+	while (sky2_read16(hw, STAT_PUT_IDX) != hw->st_idx) {
+		sky2_process_status(hw, rx);
+	}
+}
+
 static irqreturn_t sky2_intr(int irq, void *dev_id)
 {
 	struct sky2_hw *hw = dev_id;
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-06-16  5:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-09 14:03 [PATCH] sky2: Fix a race between sky2_down and sky2_poll Mike McCormack
2009-06-10 14:50 ` Stephen Hemminger
2009-06-11 17:04 ` Stephen Hemminger
2009-06-12 13:16   ` Mike McCormack
2009-06-12 14:16     ` Stephen Hemminger
2009-06-16  5:54       ` Mike McCormack

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.