All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch sungem] improved locking
@ 2006-11-09 21:33 Eric Lemoine
  2006-11-09 23:04 ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-11-09 21:33 UTC (permalink / raw)
  To: netdev, Benjamin Herrenschmidt, David S. Miller

[-- Attachment #1: Type: text/plain, Size: 763 bytes --]

The attached patch improves locking in the sungem driver:

- a single lock is used in the driver
 - gem_start_xmit, gem_poll, and gem_interrupt are lockless

The new locking design is based on what's in tg3.c.

The patch runs smoothly on my ibook (with CONFIG_SMP set), but it will
need extensive testing on a multi-cpu box.

The patch includes two implementations for gem_interrupt(). One is
lockless while the other makes use of a spinlock. The spinlock version
is there because I was not sure the lockless version would work with
net_poll_controller. One of the two versions must be removed in the
final patch.

Patch applies to current git net-2.6.

Please review, and test if possible.

Thanks,

Signed-ff-by: Eric Lemoine <eric.lemoine@gmail.com>

-- 
Eric

[-- Attachment #2: sungem-locking.patch --]
[-- Type: application/octet-stream, Size: 24938 bytes --]

--- drivers/net/sungem.c.orig	2006-11-07 21:08:43.000000000 +0100
+++ drivers/net/sungem.c	2006-11-09 21:29:45.000000000 +0100
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to keep a spinlock
- *    where it could have been dropped. change_mtu especially would love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */
 
 #include <linux/module.h>
@@ -206,11 +186,18 @@
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }
 
-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
 	/* Enable all interrupts but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
+	
+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}
 
 static inline void gem_disable_ints(struct gem *gp)
 {
@@ -218,6 +205,59 @@
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
 
+#if GEM_INTERRUPT_LOCKLESS
+static inline u32 gem_read_status2(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT2);
+}
+#else
+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+#endif
+
+static inline void gem_ack_int(struct gem *gp)
+{
+	writel(0x3f, gp->regs + GREG_IACK);
+}
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +698,20 @@
 	return 0;
 
 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);
 
 	return 1;
 }
 
+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +765,20 @@
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;
 
-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }
 
 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +939,6 @@
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);
 
 	do {
 		int work_to_do, work_done;
@@ -899,19 +950,10 @@
 		}
 
 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);
-
-		spin_unlock_irqrestore(&gp->lock, flags);
 
-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);
 
 		*budget -= work_done;
@@ -920,23 +962,57 @@
 		if (work_done >= work_to_do)
 			return 1;
 
-		spin_lock_irqsave(&gp->lock, flags);
-
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);
 
 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);
 
-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }
 
+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
+#if GEM_INTERRUPT_LOCKLESS
+
+/* Lockless implementation of IRQ handler.
+ *
+ * The downside of this implementation is that it requires three PIO
+ * operations: read status, disable interrupts and ack interrupt.
+ *
+ * Also, this implementation may not work with NET_POLL_CONTROLLER. 
+ * TODO: need further investigation.
+ */
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	u32 gem_status;
 
 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
@@ -945,29 +1021,71 @@
 	if (!gp->running)
 		return IRQ_HANDLED;
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_status = gem_read_status2(gp);
+	if (unlikely(!gem_status)) /* shared irq? */
+		return IRQ_NONE;
 
 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		gem_disable_ints(gp);
+		gem_ack_int(gp);
 
-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
+			/* We are sure that *we* disabled polling, so we can
+			 * safely reenable it before returning.
+			 */
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			return IRQ_HANDLED;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}
 
-	spin_unlock_irqrestore(&gp->lock, flags);
+	return IRQ_HANDLED;
+}
+#else
+static irqreturn_t gem_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct gem *gp = dev->priv;
+	unsigned int handled = 1;
+	unsigned long flags;
 
-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
+	/* Swallow interrupts when shutting the chip down, though
+	 * that shouldn't happen, we should have done free_irq() at
+	 * this point...
 	 */
+	if (!gp->running)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&gp->int_lock, flags);
+
+	if (netif_rx_schedule_prep(dev)) {
+		u32 gem_status = gem_read_status(gp);
+
+		if (unlikely(!gem_status)) {
+			netif_poll_enable(dev);
+			handled = 0;
+			goto out;
+		}
+
+		gem_disable_ints(gp);
+		gem_ack_int(gp);
+
+		if (gem_irq_sync(gp)) {
+			netif_poll_enable(dev);
+			goto out;
+		}
+
+		gp->status = gem_status;
+		__netif_rx_schedule(dev);
+	}
+
+out:
+	spin_unlock_irqrestore(&gp->int_lock, flags);
 	return IRQ_HANDLED;
 }
+#endif
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gem_poll_controller(struct net_device *dev)
@@ -999,14 +1117,8 @@
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	printk(KERN_INFO "%s: gem_tx_timeout\n", dev->name);
+	gem_schedule_reset_task(gp);
 }
 
 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1135,6 @@
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1148,13 @@
 			(csum_stuff_off << 21));
 	}
 
-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1131,15 +1231,17 @@
 	}
 
 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}
 
 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);
 
 	dev->trans_start = jiffies;
 
@@ -1148,7 +1250,6 @@
 
 #define STOP_TRIES 32
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1275,6 @@
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1297,7 @@
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1318,7 @@
 }
 
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1403,6 @@
 
 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1512,6 @@
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1568,13 @@
 	if (gp->asleep)
 		return;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 
 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;
 
@@ -1528,8 +1622,7 @@
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1639,9 @@
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1692,6 @@
 	}
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1865,9 @@
 	netif_carrier_off(gp->dev);
 
 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1905,6 @@
 		       gp->regs + RXDMA_BLANK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1946,6 @@
 	return rxcfg;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +2028,6 @@
 		writel(0, gp->regs + WOL_WAKECSR);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2180,6 @@
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2199,10 @@
 	gem_init_mac(gp);
 }
 
-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;
 
 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2248,13 @@
 	writel(0, gp->regs + RXDMA_CFG);
 
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);
 
 		/* No need to take the lock here */
 
@@ -2192,14 +2273,11 @@
 	}
 }
 
-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2289,25 @@
 
 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}
 
-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);
 
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
 
 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);
 
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 
 		return -EAGAIN;
 	}
@@ -2243,22 +2318,14 @@
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;
-
-	/* Stop netif queue */
-	netif_stop_queue(dev);
 
-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;
 
-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);
 
 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2342,9 @@
 
 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }
 
@@ -2287,30 +2354,26 @@
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
 
 	if (gp->running == 0)
 		goto not_running;
 
-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_netif_stop(gp);
 
-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);
 
-	netif_poll_enable(gp->dev);
+not_running:
+	gp->reset_task_pending = 0;
+
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 }
@@ -2324,8 +2387,12 @@
 	mutex_lock(&gp->pm_mutex);
 
 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);
 
 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2404,14 @@
 {
 	struct gem *gp = dev->priv;
 
-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);
 
 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2357,22 +2423,17 @@
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");
 
 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2442,7 @@
 
 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2461,7 @@
 	mutex_unlock(&gp->pm_mutex);
 
 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();
 
 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2482,6 @@
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	printk(KERN_INFO "%s: resuming\n", dev->name);
 
@@ -2465,11 +2525,9 @@
 
 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2540,9 @@
 	 */
 	gem_put_cell(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
-	netif_poll_enable(dev);
+	gem_netif_start(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2498,8 +2555,8 @@
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);
 
 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2579,26 @@
 		writel(0, gp->regs + MAC_LCOLL);
 	}
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);
 
 	return &gp->net_stats;
 }
 
+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;
 
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	if (!gp->running)
 		goto bail;
 
-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2618,8 @@
 
 	writel(rxcfg, gp->regs + MAC_RXCFG);
 
-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2646,22 @@
 	}
 
 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);
 
 	return 0;
@@ -2635,7 +2694,7 @@
 		cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2706,7 @@
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2744,11 @@
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2702,11 +2761,11 @@
 		return -EINVAL;
 
 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2770,16 +2829,15 @@
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;
 
 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2867,9 @@
 		break;
 	};
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2918,6 +2976,7 @@
 	if (dev) {
 		struct gem *gp = dev->priv;
 
+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);
 
 		/* Stop the link timer */
@@ -2927,8 +2986,7 @@
 		gem_get_cell(gp);
 
 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();
 
 		/* Shut the PHY down */
@@ -3035,8 +3093,11 @@
 
 	gp->msg_enable = DEFAULT_MSG;
 
+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
+#if !GEM_INTERRUPT_LOCKLESS
+	spin_lock_init(&gp->int_lock);
+#endif
 	mutex_init(&gp->pm_mutex);
 
 	init_timer(&gp->link_timer);
@@ -3132,9 +3193,7 @@
 	 */
 	gem_init_phy(gp);
 
-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
 
 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3215,7 @@
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");
 
-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 
@@ -3177,7 +3235,6 @@
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }
 
 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-09 21:33 [patch sungem] improved locking Eric Lemoine
@ 2006-11-09 23:04 ` David Miller
  2006-11-10 13:28   ` Eric Lemoine
  0 siblings, 1 reply; 31+ messages in thread
From: David Miller @ 2006-11-09 23:04 UTC (permalink / raw)
  To: eric.lemoine; +Cc: netdev, benh


Please use GREG_STAT_* instead of magic constants for the
interrupt mask and ACK register writes.  In fact, there are
some questionable values you use, in particular this one:

+static inline void gem_ack_int(struct gem *gp)
+{
+	writel(0x3f, gp->regs + GREG_IACK);
+}

There is no bit defined in GREG_STAT_* for 0x08, but you
set it in this magic bitmask.  It is another reason not
to use magic constants like this :-)

Also, if you need to use an attachment to get the tabbing
right, that's fine, but please also provide a copy inline
so that it is easy to quote the patch for review purposes.
It's a truly a pain in the rear to quote things when you use
a binary attachment.

I'd like these very simple and straightforward issues to
be worked out before I even begin to review the actual
locking change itself.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-09 23:04 ` David Miller
@ 2006-11-10 13:28   ` Eric Lemoine
  2006-11-10 20:37     ` Benjamin Herrenschmidt
  2006-11-10 20:42     ` David Miller
  0 siblings, 2 replies; 31+ messages in thread
From: Eric Lemoine @ 2006-11-10 13:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

On 11/10/06, David Miller <davem@davemloft.net> wrote:
>
> Please use GREG_STAT_* instead of magic constants for the
> interrupt mask and ACK register writes.  In fact, there are
> some questionable values you use, in particular this one:
>
> +static inline void gem_ack_int(struct gem *gp)
> +{
> +       writel(0x3f, gp->regs + GREG_IACK);
> +}

This code clears bits 0 through 6, which GREG_IACK is for.

> There is no bit defined in GREG_STAT_* for 0x08, but you
> set it in this magic bitmask.  It is another reason not
> to use magic constants like this :-)

Yes, the bit 4 isn't used, but I assumed clearing it shouldn't be prolem.

Would you prefer that:

#define GREG_STAT_IACK 0x3f

static inline void gem_ack_int(struct gem *gp)
{
       writel(GREG_STAT_IACK, gp->regs + GREG_IACK);
}

or that:

#define GREG_STAT_IACK (GREG_STAT_TXINTME | GREG_STAT_TXALL | \
                                         GREG_STAT_RXDONE | GREG_STAT_RXNOBUF)

to avoid bit 4, and bit 3 (TX_DONE) which is masked.

Personnaly, I like the first way best.

>
> Also, if you need to use an attachment to get the tabbing
> right, that's fine, but please also provide a copy inline
> so that it is easy to quote the patch for review purposes.
> It's a truly a pain in the rear to quote things when you use
> a binary attachment.

I will.

>
> I'd like these very simple and straightforward issues to
> be worked out before I even begin to review the actual
> locking change itself.

Ok.

I'll wait for you regarding gem_ack_int() and send out another patch.

Thanks,


-- 
Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-10 13:28   ` Eric Lemoine
@ 2006-11-10 20:37     ` Benjamin Herrenschmidt
  2006-11-10 20:42     ` David Miller
  1 sibling, 0 replies; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-11-10 20:37 UTC (permalink / raw)
  To: Eric Lemoine; +Cc: David Miller, netdev


> Yes, the bit 4 isn't used, but I assumed clearing it shouldn't be prolem.

Always leave reserved bits alone ... I agree it's very unlikely in this
case that it could cause a problem but I've seen stranger things....

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-10 13:28   ` Eric Lemoine
  2006-11-10 20:37     ` Benjamin Herrenschmidt
@ 2006-11-10 20:42     ` David Miller
  2006-11-12 23:11       ` Eric Lemoine
  1 sibling, 1 reply; 31+ messages in thread
From: David Miller @ 2006-11-10 20:42 UTC (permalink / raw)
  To: eric.lemoine; +Cc: netdev, benh

From: "Eric Lemoine" <eric.lemoine@gmail.com>
Date: Fri, 10 Nov 2006 14:28:41 +0100

> On 11/10/06, David Miller <davem@davemloft.net> wrote:
> >
> > Please use GREG_STAT_* instead of magic constants for the
> > interrupt mask and ACK register writes.  In fact, there are
> > some questionable values you use, in particular this one:
> >
> > +static inline void gem_ack_int(struct gem *gp)
> > +{
> > +       writel(0x3f, gp->regs + GREG_IACK);
> > +}
> 
> This code clears bits 0 through 6, which GREG_IACK is for.
> 
> > There is no bit defined in GREG_STAT_* for 0x08, but you
> > set it in this magic bitmask.  It is another reason not
> > to use magic constants like this :-)
> 
> Yes, the bit 4 isn't used, but I assumed clearing it shouldn't be prolem.
> 
> Would you prefer that:
> 
> #define GREG_STAT_IACK 0x3f
> 
> static inline void gem_ack_int(struct gem *gp)
> {
>        writel(GREG_STAT_IACK, gp->regs + GREG_IACK);
> }
> 
> or that:
> 
> #define GREG_STAT_IACK (GREG_STAT_TXINTME | GREG_STAT_TXALL | \
>                                          GREG_STAT_RXDONE | GREG_STAT_RXNOBUF)
> 
> to avoid bit 4, and bit 3 (TX_DONE) which is masked.
> 
> Personnaly, I like the first way best.

I think the second way is better because it clearly states
your intentions.  The GREG_STAT_IACK 0x3f macro is still
"magic" because it doesn't say what the bits actually mean.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-10 20:42     ` David Miller
@ 2006-11-12 23:11       ` Eric Lemoine
  2006-11-14  0:46         ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-11-12 23:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

[-- Attachment #1: Type: text/plain, Size: 29019 bytes --]

> I think the second way is better because it clearly states
> your intentions.  The GREG_STAT_IACK 0x3f macro is still
> "magic" because it doesn't say what the bits actually mean.

New patch attached.

And provided inline for people to comment.

Signed-off-by: Eric Lemoine <eric.lemoine@gmail.com>

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..31bd90d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to
keep a spinlock
- *    where it could have been dropped. change_mtu especially would
love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take
effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */

 #include <linux/module.h>
@@ -206,11 +186,18 @@ static inline void phy_write(struct gem
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }

-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
+	
+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}

 static inline void gem_disable_ints(struct gem *gp)
 {
@@ -218,6 +205,60 @@ static inline void gem_disable_ints(stru
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }

+#if GEM_INTERRUPT_LOCKLESS
+static inline u32 gem_read_status2(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT2);
+}
+
+static inline void gem_ack_int(struct gem *gp)
+{
+	writel(GREG_STAT_IACK, gp->regs + GREG_IACK);
+}
+
+#else
+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+#endif
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +699,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;

 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);

 	return 1;
 }

+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp,
u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +766,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;

-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }

 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +940,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);

 	do {
 		int work_to_do, work_done;
@@ -899,19 +951,10 @@ static int gem_poll(struct net_device *d
 		}

 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);
-
-		spin_unlock_irqrestore(&gp->lock, flags);

-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);

 		*budget -= work_done;
@@ -920,23 +963,57 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;

-		spin_lock_irqsave(&gp->lock, flags);
-
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);

 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);

-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }

+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
+#if GEM_INTERRUPT_LOCKLESS
+
+/* Lockless implementation of IRQ handler.
+ *
+ * The downside of this implementation is that it requires three PIO
+ * operations: read status, disable interrupts and ack interrupt.
+ *
+ * Also, this implementation may not work with NET_POLL_CONTROLLER.
+ * TODO: need further investigation.
+ */
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	u32 gem_status;

 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
@@ -945,29 +1022,70 @@ static irqreturn_t gem_interrupt(int irq
 	if (!gp->running)
 		return IRQ_HANDLED;

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_status = gem_read_status2(gp);
+	if (unlikely(!gem_status)) /* shared irq? */
+		return IRQ_NONE;

 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		gem_disable_ints(gp);
+		gem_ack_int(gp);

-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
+			/* We are sure that *we* disabled polling, so we can
+			 * safely reenable it before returning.
+			 */
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			return IRQ_HANDLED;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}

-	spin_unlock_irqrestore(&gp->lock, flags);
+	return IRQ_HANDLED;
+}
+#else
+static irqreturn_t gem_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct gem *gp = dev->priv;
+	unsigned int handled = 1;
+	unsigned long flags;

-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
+	/* Swallow interrupts when shutting the chip down, though
+	 * that shouldn't happen, we should have done free_irq() at
+	 * this point...
 	 */
+	if (!gp->running)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&gp->int_lock, flags);
+
+	if (netif_rx_schedule_prep(dev)) {
+		u32 gem_status = gem_read_status(gp);
+
+		if (unlikely(!gem_status)) {
+			netif_poll_enable(dev);
+			handled = 0;
+			goto out;
+		}
+
+		gem_disable_ints(gp);
+
+		if (gem_irq_sync(gp)) {
+			netif_poll_enable(dev);
+			goto out;
+		}
+
+		gp->status = gem_status;
+		__netif_rx_schedule(dev);
+	}
+
+out:
+	spin_unlock_irqrestore(&gp->int_lock, flags);
 	return IRQ_HANDLED;
 }
+#endif

 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gem_poll_controller(struct net_device *dev)
@@ -999,14 +1117,8 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	printk(KERN_INFO "%s: gem_tx_timeout\n", dev->name);
+	gem_schedule_reset_task(gp);
 }

 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1135,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;

 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1148,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}

-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}

@@ -1131,15 +1231,17 @@ static int gem_start_xmit(struct sk_buff
 	}

 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}

 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);

 	dev->trans_start = jiffies;

@@ -1148,7 +1250,6 @@ static int gem_start_xmit(struct sk_buff

 #define STOP_TRIES 32

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1275,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1297,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1318,7 @@ static void gem_stop_dma(struct gem *gp)
 }


-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1403,6 @@ non_mii:

 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1512,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1568,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);

 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;

@@ -1528,8 +1622,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1639,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1692,6 @@ static void gem_clean_rings(struct gem *
 	}
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1865,9 @@ static void gem_init_phy(struct gem *gp)
 	netif_carrier_off(gp->dev);

 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1905,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1946,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +2028,6 @@ static void gem_init_mac(struct gem *gp)
 		writel(0, gp->regs + WOL_WAKECSR);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2180,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2199,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }

-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;

 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2248,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);

 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);

 		/* No need to take the lock here */

@@ -2192,14 +2273,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }

-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2289,25 @@ static int gem_do_start(struct net_devic

 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}

-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);

-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);

 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);

-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);

 		return -EAGAIN;
 	}
@@ -2243,22 +2318,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;
-
-	/* Stop netif queue */
-	netif_stop_queue(dev);

-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;

-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);

 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2342,9 @@ static void gem_do_stop(struct net_devic

 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }

@@ -2287,30 +2354,26 @@ static void gem_reset_task(void *data)

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);

 	if (gp->running == 0)
 		goto not_running;

-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_netif_stop(gp);

-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);

-	netif_poll_enable(gp->dev);
+not_running:
+	gp->reset_task_pending = 0;
+
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);

 	mutex_unlock(&gp->pm_mutex);
 }
@@ -2324,8 +2387,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);

 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);

 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2404,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;

-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);

 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}

 	mutex_unlock(&gp->pm_mutex);

@@ -2357,22 +2423,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");

 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2442,7 @@ static int gem_suspend(struct pci_dev *p

 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2461,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);

 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();

 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2482,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	printk(KERN_INFO "%s: resuming\n", dev->name);

@@ -2465,11 +2525,9 @@ static int gem_resume(struct pci_dev *pd

 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2540,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

-	netif_poll_enable(dev);
+	gem_netif_start(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2498,8 +2555,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);

 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2579,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);

 	return &gp->net_stats;
 }

+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;

-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	if (!gp->running)
 		goto bail;

-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2618,8 @@ static void gem_set_multicast(struct net

 	writel(rxcfg, gp->regs + MAC_RXCFG);

-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2646,22 @@ static int gem_change_mtu(struct net_dev
 	}

 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);

 	return 0;
@@ -2635,7 +2694,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */

 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2706,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2744,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;

 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2702,11 +2761,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;

 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2770,16 +2829,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;

 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2867,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2918,6 +2976,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;

+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);

 		/* Stop the link timer */
@@ -2927,8 +2986,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);

 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();

 		/* Shut the PHY down */
@@ -3035,8 +3093,11 @@ static int __devinit gem_init_one(struct

 	gp->msg_enable = DEFAULT_MSG;

+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
+#if !GEM_INTERRUPT_LOCKLESS
+	spin_lock_init(&gp->int_lock);
+#endif
 	mutex_init(&gp->pm_mutex);

 	init_timer(&gp->link_timer);
@@ -3132,9 +3193,7 @@ static int __devinit gem_init_one(struct
 	 */
 	gem_init_phy(gp);

-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);

 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3215,7 @@ static int __devinit gem_init_one(struct
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");

-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;

@@ -3177,7 +3235,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }


diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..c04a320 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -7,6 +7,8 @@
 #ifndef _SUNGEM_H
 #define _SUNGEM_H

+#define GEM_INTERRUPT_LOCKLESS 1 /* TODO */
+
 /* Global Registers */
 #define GREG_SEBSTATE	0x0000UL	/* SEB State Register		*/
 #define GREG_CFG	0x0004UL	/* Configuration Register	*/
@@ -60,6 +62,10 @@
 				 GREG_STAT_PCS | GREG_STAT_TXMAC | GREG_STAT_RXMAC | \
 				 GREG_STAT_MAC | GREG_STAT_MIF | GREG_STAT_PCIERR)

+/* Bitmask representing the interrupt conditions the driver handles. The TXDONE
+ * interrupt condition isn't handled as we want to avoid having one interrupt
+ * per transmitted frame.
+ */
 #define GREG_STAT_NAPI		(GREG_STAT_TXALL  | GREG_STAT_TXINTME | \
 				 GREG_STAT_RXDONE | GREG_STAT_ABNORMAL)

@@ -71,6 +77,15 @@
  * on GREG_STAT.
  */

+#if GEM_INTERRUPT_LOCKLESS
+
+/* Bitmask representing the interrupt conditions that we clear using GREG_IACK.
+ * We clear all the top-level interrupt conditions (bits 0 through 6) that we
+ * handle.
+ */
+#define GREG_STAT_IACK		(GREG_STAT_NAPI & 0x3f)
+#endif
+
 /* Global PCI Error Status Register */
 #define GREG_PCIESTAT_BADACK	0x00000001	/* No ACK64# during ABS64 cycle	*/
 #define GREG_PCIESTAT_DTRTO	0x00000002	/* Delayed transaction timeout	*/
@@ -973,8 +988,11 @@ enum link_state {
 };

 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
+#if !GEM_INTERRUPT_LOCKLESS
+	spinlock_t		int_lock;
+#endif
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;


-- 
Eric

[-- Attachment #2: sungem-locking.patch --]
[-- Type: application/octet-stream, Size: 28852 bytes --]

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..31bd90d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to keep a spinlock
- *    where it could have been dropped. change_mtu especially would love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */
 
 #include <linux/module.h>
@@ -206,11 +186,18 @@ static inline void phy_write(struct gem 
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }
 
-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
+	
+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}
 
 static inline void gem_disable_ints(struct gem *gp)
 {
@@ -218,6 +205,60 @@ static inline void gem_disable_ints(stru
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
 
+#if GEM_INTERRUPT_LOCKLESS
+static inline u32 gem_read_status2(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT2);
+}
+
+static inline void gem_ack_int(struct gem *gp)
+{
+	writel(GREG_STAT_IACK, gp->regs + GREG_IACK);
+}
+
+#else
+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+#endif
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +699,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;
 
 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);
 
 	return 1;
 }
 
+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +766,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;
 
-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }
 
 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +940,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);
 
 	do {
 		int work_to_do, work_done;
@@ -899,19 +951,10 @@ static int gem_poll(struct net_device *d
 		}
 
 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);
-
-		spin_unlock_irqrestore(&gp->lock, flags);
 
-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);
 
 		*budget -= work_done;
@@ -920,23 +963,57 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;
 
-		spin_lock_irqsave(&gp->lock, flags);
-
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);
 
 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);
 
-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }
 
+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
+#if GEM_INTERRUPT_LOCKLESS
+
+/* Lockless implementation of IRQ handler.
+ *
+ * The downside of this implementation is that it requires three PIO
+ * operations: read status, disable interrupts and ack interrupt.
+ *
+ * Also, this implementation may not work with NET_POLL_CONTROLLER. 
+ * TODO: need further investigation.
+ */
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	u32 gem_status;
 
 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
@@ -945,29 +1022,70 @@ static irqreturn_t gem_interrupt(int irq
 	if (!gp->running)
 		return IRQ_HANDLED;
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_status = gem_read_status2(gp);
+	if (unlikely(!gem_status)) /* shared irq? */
+		return IRQ_NONE;
 
 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		gem_disable_ints(gp);
+		gem_ack_int(gp);
 
-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
+			/* We are sure that *we* disabled polling, so we can
+			 * safely reenable it before returning.
+			 */
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			return IRQ_HANDLED;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}
 
-	spin_unlock_irqrestore(&gp->lock, flags);
+	return IRQ_HANDLED;
+}
+#else
+static irqreturn_t gem_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct gem *gp = dev->priv;
+	unsigned int handled = 1;
+	unsigned long flags;
 
-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
+	/* Swallow interrupts when shutting the chip down, though
+	 * that shouldn't happen, we should have done free_irq() at
+	 * this point...
 	 */
+	if (!gp->running)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&gp->int_lock, flags);
+
+	if (netif_rx_schedule_prep(dev)) {
+		u32 gem_status = gem_read_status(gp);
+
+		if (unlikely(!gem_status)) {
+			netif_poll_enable(dev);
+			handled = 0;
+			goto out;
+		}
+
+		gem_disable_ints(gp);
+
+		if (gem_irq_sync(gp)) {
+			netif_poll_enable(dev);
+			goto out;
+		}
+
+		gp->status = gem_status;
+		__netif_rx_schedule(dev);
+	}
+
+out:
+	spin_unlock_irqrestore(&gp->int_lock, flags);
 	return IRQ_HANDLED;
 }
+#endif
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gem_poll_controller(struct net_device *dev)
@@ -999,14 +1117,8 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	printk(KERN_INFO "%s: gem_tx_timeout\n", dev->name);
+	gem_schedule_reset_task(gp);
 }
 
 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1135,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1148,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}
 
-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1131,15 +1231,17 @@ static int gem_start_xmit(struct sk_buff
 	}
 
 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}
 
 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);
 
 	dev->trans_start = jiffies;
 
@@ -1148,7 +1250,6 @@ static int gem_start_xmit(struct sk_buff
 
 #define STOP_TRIES 32
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1275,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1297,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1318,7 @@ static void gem_stop_dma(struct gem *gp)
 }
 
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1403,6 @@ non_mii:
 
 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1512,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1568,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 
 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;
 
@@ -1528,8 +1622,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1639,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1692,6 @@ static void gem_clean_rings(struct gem *
 	}
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1865,9 @@ static void gem_init_phy(struct gem *gp)
 	netif_carrier_off(gp->dev);
 
 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1905,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1946,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +2028,6 @@ static void gem_init_mac(struct gem *gp)
 		writel(0, gp->regs + WOL_WAKECSR);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2180,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2199,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }
 
-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;
 
 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2248,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);
 
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);
 
 		/* No need to take the lock here */
 
@@ -2192,14 +2273,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }
 
-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2289,25 @@ static int gem_do_start(struct net_devic
 
 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}
 
-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);
 
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
 
 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);
 
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 
 		return -EAGAIN;
 	}
@@ -2243,22 +2318,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;
-
-	/* Stop netif queue */
-	netif_stop_queue(dev);
 
-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;
 
-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);
 
 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2342,9 @@ static void gem_do_stop(struct net_devic
 
 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }
 
@@ -2287,30 +2354,26 @@ static void gem_reset_task(void *data)
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
 
 	if (gp->running == 0)
 		goto not_running;
 
-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_netif_stop(gp);
 
-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);
 
-	netif_poll_enable(gp->dev);
+not_running:
+	gp->reset_task_pending = 0;
+
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 }
@@ -2324,8 +2387,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);
 
 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);
 
 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2404,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;
 
-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);
 
 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2357,22 +2423,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");
 
 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2442,7 @@ static int gem_suspend(struct pci_dev *p
 
 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2461,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);
 
 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();
 
 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2482,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	printk(KERN_INFO "%s: resuming\n", dev->name);
 
@@ -2465,11 +2525,9 @@ static int gem_resume(struct pci_dev *pd
 
 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2540,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
-	netif_poll_enable(dev);
+	gem_netif_start(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2498,8 +2555,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);
 
 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2579,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);
 
 	return &gp->net_stats;
 }
 
+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;
 
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	if (!gp->running)
 		goto bail;
 
-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2618,8 @@ static void gem_set_multicast(struct net
 
 	writel(rxcfg, gp->regs + MAC_RXCFG);
 
-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2646,22 @@ static int gem_change_mtu(struct net_dev
 	}
 
 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);
 
 	return 0;
@@ -2635,7 +2694,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2706,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2744,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2702,11 +2761,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;
 
 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2770,16 +2829,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;
 
 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2867,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2918,6 +2976,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;
 
+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);
 
 		/* Stop the link timer */
@@ -2927,8 +2986,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);
 
 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();
 
 		/* Shut the PHY down */
@@ -3035,8 +3093,11 @@ static int __devinit gem_init_one(struct
 
 	gp->msg_enable = DEFAULT_MSG;
 
+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
+#if !GEM_INTERRUPT_LOCKLESS
+	spin_lock_init(&gp->int_lock);
+#endif
 	mutex_init(&gp->pm_mutex);
 
 	init_timer(&gp->link_timer);
@@ -3132,9 +3193,7 @@ static int __devinit gem_init_one(struct
 	 */
 	gem_init_phy(gp);
 
-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
 
 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3215,7 @@ static int __devinit gem_init_one(struct
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");
 
-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 
@@ -3177,7 +3235,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }
 
 
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..c04a320 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -7,6 +7,8 @@
 #ifndef _SUNGEM_H
 #define _SUNGEM_H
 
+#define GEM_INTERRUPT_LOCKLESS 1 /* TODO */
+
 /* Global Registers */
 #define GREG_SEBSTATE	0x0000UL	/* SEB State Register		*/
 #define GREG_CFG	0x0004UL	/* Configuration Register	*/
@@ -60,6 +62,10 @@
 				 GREG_STAT_PCS | GREG_STAT_TXMAC | GREG_STAT_RXMAC | \
 				 GREG_STAT_MAC | GREG_STAT_MIF | GREG_STAT_PCIERR)
 
+/* Bitmask representing the interrupt conditions the driver handles. The TXDONE
+ * interrupt condition isn't handled as we want to avoid having one interrupt
+ * per transmitted frame.
+ */
 #define GREG_STAT_NAPI		(GREG_STAT_TXALL  | GREG_STAT_TXINTME | \
 				 GREG_STAT_RXDONE | GREG_STAT_ABNORMAL)
 
@@ -71,6 +77,15 @@
  * on GREG_STAT.
  */
 
+#if GEM_INTERRUPT_LOCKLESS
+
+/* Bitmask representing the interrupt conditions that we clear using GREG_IACK.
+ * We clear all the top-level interrupt conditions (bits 0 through 6) that we
+ * handle.
+ */
+#define GREG_STAT_IACK		(GREG_STAT_NAPI & 0x3f)
+#endif
+
 /* Global PCI Error Status Register */
 #define GREG_PCIESTAT_BADACK	0x00000001	/* No ACK64# during ABS64 cycle	*/
 #define GREG_PCIESTAT_DTRTO	0x00000002	/* Delayed transaction timeout	*/
@@ -973,8 +988,11 @@ enum link_state {
 };
 
 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
+#if !GEM_INTERRUPT_LOCKLESS
+	spinlock_t		int_lock;
+#endif
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-12 23:11       ` Eric Lemoine
@ 2006-11-14  0:46         ` David Miller
  2006-11-14  7:28           ` Eric Lemoine
  0 siblings, 1 reply; 31+ messages in thread
From: David Miller @ 2006-11-14  0:46 UTC (permalink / raw)
  To: eric.lemoine; +Cc: netdev, benh

From: "Eric Lemoine" <eric.lemoine@gmail.com>
Date: Mon, 13 Nov 2006 00:11:49 +0100

> +#if GEM_INTERRUPT_LOCKLESS
> +
> +/* Bitmask representing the interrupt conditions that we clear using GREG_IACK.
> + * We clear all the top-level interrupt conditions (bits 0 through 6) that we
> + * handle.
> + */
> +#define GREG_STAT_IACK		(GREG_STAT_NAPI & 0x3f)
> +#endif

I'm asking for this a second time...  and to be honest I'm a little
bit ticked off.

Please spell out the explicit bits using the existing defines to
create this mask.  Do not use this magic 0x3f magic number which
contains bits which are undefined, so obtain this mask (as I asked the
first time) like this:

	(GREG_STAT_TXINTME | GREG_STAT_TXALL |
	 GREG_STAT_TXDONE | GREG_STAT_RXDONE |
	 GREG_STAT_RXNOBUF)

I will not go through the effort a third time, instead I will simply
ignore your patch submissions, it's that simple.  If you ignore me, I
ignore you.

You want me to review this patch and the locking bits, but you're
never going to get to that point if you continually ignore the
most simple requests I'm making of you. :-(

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-14  0:46         ` David Miller
@ 2006-11-14  7:28           ` Eric Lemoine
  2006-11-14  7:44             ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-11-14  7:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

On 11/14/06, David Miller <davem@davemloft.net> wrote:
> From: "Eric Lemoine" <eric.lemoine@gmail.com>
> Date: Mon, 13 Nov 2006 00:11:49 +0100
>
> > +#if GEM_INTERRUPT_LOCKLESS
> > +
> > +/* Bitmask representing the interrupt conditions that we clear using GREG_IACK.
> > + * We clear all the top-level interrupt conditions (bits 0 through 6) that we
> > + * handle.
> > + */
> > +#define GREG_STAT_IACK               (GREG_STAT_NAPI & 0x3f)
> > +#endif
>
> I'm asking for this a second time...  and to be honest I'm a little
> bit ticked off.
>
> Please spell out the explicit bits using the existing defines to
> create this mask.  Do not use this magic 0x3f magic number which
> contains bits which are undefined, so obtain this mask (as I asked the
> first time) like this:
>
>         (GREG_STAT_TXINTME | GREG_STAT_TXALL |
>          GREG_STAT_TXDONE | GREG_STAT_RXDONE |
>          GREG_STAT_RXNOBUF)
>
> I will not go through the effort a third time, instead I will simply
> ignore your patch submissions, it's that simple.  If you ignore me, I
> ignore you.

Dave, please believe me, I'm not ignoring you at all! I'm just trying
to make things better.

I had thought that:

(GREG_STAT_NAPI & 0x3f)

was better than

(GREG_STAT_TXINTME | GREG_STAT_TXALL |
 GREG_STAT_TXDONE | GREG_STAT_RXDONE |
 GREG_STAT_RXNOBUF)

because it makes it explicit that only bits 0 through 6 are taken into
account when writing the IACK register. In addition,  GREG_STAT_IACK
doesn't need to be changed if GREG_STAT_NAPI is changed. That's why I
did it that way. And as opposed to what I did in the previous patch, I
no longer set bits that don't exist with this new macro.

If you don't like it, let's discuss, and I can change my mind. But
again, please, do not think I'm ignoring your comments.

Thanks,

-- 
Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-14  7:28           ` Eric Lemoine
@ 2006-11-14  7:44             ` David Miller
  2006-11-14 21:54               ` Eric Lemoine
  0 siblings, 1 reply; 31+ messages in thread
From: David Miller @ 2006-11-14  7:44 UTC (permalink / raw)
  To: eric.lemoine; +Cc: netdev, benh

From: "Eric Lemoine" <eric.lemoine@gmail.com>
Date: Tue, 14 Nov 2006 08:28:42 +0100

> because it makes it explicit that only bits 0 through 6 are taken into
> account when writing the IACK register.

The phrase "bits 0 through 6" doesn't make any sense when bit 3 DOES
NOT EXIST in the hardware, it's reserved, it's not there, so including
it only confuses people and obfuscates the code.

Please use the explicit bit mask composed of existing macros, which
not only makes sure that the mask has meaning, but it also makes sure
that reserved and non-existing bits are never referenced.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-14  7:44             ` David Miller
@ 2006-11-14 21:54               ` Eric Lemoine
  2006-11-28 22:49                 ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-11-14 21:54 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

[-- Attachment #1: Type: text/plain, Size: 28960 bytes --]

On 11/14/06, David Miller <davem@davemloft.net> wrote:
> From: "Eric Lemoine" <eric.lemoine@gmail.com>
> Date: Tue, 14 Nov 2006 08:28:42 +0100
>
> > because it makes it explicit that only bits 0 through 6 are taken into
> > account when writing the IACK register.
>
> The phrase "bits 0 through 6" doesn't make any sense when bit 3 DOES
> NOT EXIST in the hardware, it's reserved, it's not there, so including
> it only confuses people and obfuscates the code.
>
> Please use the explicit bit mask composed of existing macros, which
> not only makes sure that the mask has meaning, but it also makes sure
> that reserved and non-existing bits are never referenced.

Patch attached.

Remember, the GEM_INTERRUPT_LOCKLESS isn't there to stay. It's
currently there because I'm not sure the lockless implementation of
gem_interrupt() will work with poll_net_controller.

Thanks,

Signed-off-by: Eric Lemoine <eric.lemoine@gmail.com>

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..31bd90d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to
keep a spinlock
- *    where it could have been dropped. change_mtu especially would
love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take
effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */

 #include <linux/module.h>
@@ -206,11 +186,18 @@ static inline void phy_write(struct gem
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }

-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
+	
+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}

 static inline void gem_disable_ints(struct gem *gp)
 {
@@ -218,6 +205,60 @@ static inline void gem_disable_ints(stru
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }

+#if GEM_INTERRUPT_LOCKLESS
+static inline u32 gem_read_status2(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT2);
+}
+
+static inline void gem_ack_int(struct gem *gp)
+{
+	writel(GREG_STAT_IACK, gp->regs + GREG_IACK);
+}
+
+#else
+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+#endif
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +699,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;

 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);

 	return 1;
 }

+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp,
u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +766,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;

-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }

 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +940,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);

 	do {
 		int work_to_do, work_done;
@@ -899,19 +951,10 @@ static int gem_poll(struct net_device *d
 		}

 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);
-
-		spin_unlock_irqrestore(&gp->lock, flags);

-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);

 		*budget -= work_done;
@@ -920,23 +963,57 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;

-		spin_lock_irqsave(&gp->lock, flags);
-
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);

 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);

-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }

+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
+#if GEM_INTERRUPT_LOCKLESS
+
+/* Lockless implementation of IRQ handler.
+ *
+ * The downside of this implementation is that it requires three PIO
+ * operations: read status, disable interrupts and ack interrupt.
+ *
+ * Also, this implementation may not work with NET_POLL_CONTROLLER.
+ * TODO: need further investigation.
+ */
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	u32 gem_status;

 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
@@ -945,29 +1022,70 @@ static irqreturn_t gem_interrupt(int irq
 	if (!gp->running)
 		return IRQ_HANDLED;

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_status = gem_read_status2(gp);
+	if (unlikely(!gem_status)) /* shared irq? */
+		return IRQ_NONE;

 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		gem_disable_ints(gp);
+		gem_ack_int(gp);

-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
+			/* We are sure that *we* disabled polling, so we can
+			 * safely reenable it before returning.
+			 */
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			return IRQ_HANDLED;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}

-	spin_unlock_irqrestore(&gp->lock, flags);
+	return IRQ_HANDLED;
+}
+#else
+static irqreturn_t gem_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct gem *gp = dev->priv;
+	unsigned int handled = 1;
+	unsigned long flags;

-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
+	/* Swallow interrupts when shutting the chip down, though
+	 * that shouldn't happen, we should have done free_irq() at
+	 * this point...
 	 */
+	if (!gp->running)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&gp->int_lock, flags);
+
+	if (netif_rx_schedule_prep(dev)) {
+		u32 gem_status = gem_read_status(gp);
+
+		if (unlikely(!gem_status)) {
+			netif_poll_enable(dev);
+			handled = 0;
+			goto out;
+		}
+
+		gem_disable_ints(gp);
+
+		if (gem_irq_sync(gp)) {
+			netif_poll_enable(dev);
+			goto out;
+		}
+
+		gp->status = gem_status;
+		__netif_rx_schedule(dev);
+	}
+
+out:
+	spin_unlock_irqrestore(&gp->int_lock, flags);
 	return IRQ_HANDLED;
 }
+#endif

 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gem_poll_controller(struct net_device *dev)
@@ -999,14 +1117,8 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	printk(KERN_INFO "%s: gem_tx_timeout\n", dev->name);
+	gem_schedule_reset_task(gp);
 }

 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1135,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;

 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1148,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}

-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}

@@ -1131,15 +1231,17 @@ static int gem_start_xmit(struct sk_buff
 	}

 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}

 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);

 	dev->trans_start = jiffies;

@@ -1148,7 +1250,6 @@ static int gem_start_xmit(struct sk_buff

 #define STOP_TRIES 32

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1275,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1297,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1318,7 @@ static void gem_stop_dma(struct gem *gp)
 }


-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1403,6 @@ non_mii:

 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1512,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1568,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);

 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;

@@ -1528,8 +1622,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1639,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1692,6 @@ static void gem_clean_rings(struct gem *
 	}
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1865,9 @@ #endif
 	netif_carrier_off(gp->dev);

 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1905,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1946,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +2028,6 @@ #endif
 		writel(0, gp->regs + WOL_WAKECSR);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2180,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2199,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }

-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;

 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2248,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);

 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);

 		/* No need to take the lock here */

@@ -2192,14 +2273,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }

-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2289,25 @@ static int gem_do_start(struct net_devic

 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}

-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);

-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);

 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);

-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);

 		return -EAGAIN;
 	}
@@ -2243,22 +2318,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;
-
-	/* Stop netif queue */
-	netif_stop_queue(dev);

-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;

-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);

 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2342,9 @@ static void gem_do_stop(struct net_devic

 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }

@@ -2287,30 +2354,26 @@ static void gem_reset_task(void *data)

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);

 	if (gp->running == 0)
 		goto not_running;

-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_netif_stop(gp);

-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);

-	netif_poll_enable(gp->dev);
+not_running:
+	gp->reset_task_pending = 0;
+
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);

 	mutex_unlock(&gp->pm_mutex);
 }
@@ -2324,8 +2387,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);

 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);

 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2404,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;

-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);

 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}

 	mutex_unlock(&gp->pm_mutex);

@@ -2357,22 +2423,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");

 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2442,7 @@ static int gem_suspend(struct pci_dev *p

 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2461,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);

 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();

 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2482,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	printk(KERN_INFO "%s: resuming\n", dev->name);

@@ -2465,11 +2525,9 @@ static int gem_resume(struct pci_dev *pd

 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2540,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

-	netif_poll_enable(dev);
+	gem_netif_start(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2498,8 +2555,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);

 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2579,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);

 	return &gp->net_stats;
 }

+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;

-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	if (!gp->running)
 		goto bail;

-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2618,8 @@ #endif

 	writel(rxcfg, gp->regs + MAC_RXCFG);

-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2646,22 @@ static int gem_change_mtu(struct net_dev
 	}

 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);

 	return 0;
@@ -2635,7 +2694,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */

 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2706,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2744,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;

 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2702,11 +2761,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;

 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2770,16 +2829,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;

 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2867,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2918,6 +2976,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;

+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);

 		/* Stop the link timer */
@@ -2927,8 +2986,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);

 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();

 		/* Shut the PHY down */
@@ -3035,8 +3093,11 @@ static int __devinit gem_init_one(struct

 	gp->msg_enable = DEFAULT_MSG;

+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
+#if !GEM_INTERRUPT_LOCKLESS
+	spin_lock_init(&gp->int_lock);
+#endif
 	mutex_init(&gp->pm_mutex);

 	init_timer(&gp->link_timer);
@@ -3132,9 +3193,7 @@ #endif
 	 */
 	gem_init_phy(gp);

-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);

 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3215,7 @@ #endif
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");

-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;

@@ -3177,7 +3235,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }


diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..49d3aa5 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -7,6 +7,8 @@
 #ifndef _SUNGEM_H
 #define _SUNGEM_H

+#define GEM_INTERRUPT_LOCKLESS 1 /* TODO */
+
 /* Global Registers */
 #define GREG_SEBSTATE	0x0000UL	/* SEB State Register		*/
 #define GREG_CFG	0x0004UL	/* Configuration Register	*/
@@ -71,6 +73,11 @@ #define GREG_STAT_NAPI		(GREG_STAT_TXALL
  * on GREG_STAT.
  */

+#if GEM_INTERRUPT_LOCKLESS
+#define GREG_STAT_IACK		(GREG_STAT_TXALL  | GREG_STAT_TXINTME | \
+				 GREG_STAT_RXDONE | GREG_STAT_RXNOBUF)
+#endif
+
 /* Global PCI Error Status Register */
 #define GREG_PCIESTAT_BADACK	0x00000001	/* No ACK64# during ABS64 cycle	*/
 #define GREG_PCIESTAT_DTRTO	0x00000002	/* Delayed transaction timeout	*/
@@ -973,8 +980,11 @@ enum link_state {
 };

 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
+#if !GEM_INTERRUPT_LOCKLESS
+	spinlock_t		int_lock;
+#endif
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;



--
Eric

[-- Attachment #2: sungem-locking.patch --]
[-- Type: text/x-patch, Size: 28163 bytes --]

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..31bd90d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to keep a spinlock
- *    where it could have been dropped. change_mtu especially would love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */
 
 #include <linux/module.h>
@@ -206,11 +186,18 @@ static inline void phy_write(struct gem 
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }
 
-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
+	
+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}
 
 static inline void gem_disable_ints(struct gem *gp)
 {
@@ -218,6 +205,60 @@ static inline void gem_disable_ints(stru
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
 
+#if GEM_INTERRUPT_LOCKLESS
+static inline u32 gem_read_status2(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT2);
+}
+
+static inline void gem_ack_int(struct gem *gp)
+{
+	writel(GREG_STAT_IACK, gp->regs + GREG_IACK);
+}
+
+#else
+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+#endif
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +699,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;
 
 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);
 
 	return 1;
 }
 
+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +766,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;
 
-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }
 
 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +940,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);
 
 	do {
 		int work_to_do, work_done;
@@ -899,19 +951,10 @@ static int gem_poll(struct net_device *d
 		}
 
 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);
-
-		spin_unlock_irqrestore(&gp->lock, flags);
 
-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);
 
 		*budget -= work_done;
@@ -920,23 +963,57 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;
 
-		spin_lock_irqsave(&gp->lock, flags);
-
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);
 
 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);
 
-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }
 
+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
+#if GEM_INTERRUPT_LOCKLESS
+
+/* Lockless implementation of IRQ handler.
+ *
+ * The downside of this implementation is that it requires three PIO
+ * operations: read status, disable interrupts and ack interrupt.
+ *
+ * Also, this implementation may not work with NET_POLL_CONTROLLER. 
+ * TODO: need further investigation.
+ */
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	u32 gem_status;
 
 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
@@ -945,29 +1022,70 @@ static irqreturn_t gem_interrupt(int irq
 	if (!gp->running)
 		return IRQ_HANDLED;
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_status = gem_read_status2(gp);
+	if (unlikely(!gem_status)) /* shared irq? */
+		return IRQ_NONE;
 
 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		gem_disable_ints(gp);
+		gem_ack_int(gp);
 
-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
+			/* We are sure that *we* disabled polling, so we can
+			 * safely reenable it before returning.
+			 */
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			return IRQ_HANDLED;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}
 
-	spin_unlock_irqrestore(&gp->lock, flags);
+	return IRQ_HANDLED;
+}
+#else
+static irqreturn_t gem_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct gem *gp = dev->priv;
+	unsigned int handled = 1;
+	unsigned long flags;
 
-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
+	/* Swallow interrupts when shutting the chip down, though
+	 * that shouldn't happen, we should have done free_irq() at
+	 * this point...
 	 */
+	if (!gp->running)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&gp->int_lock, flags);
+
+	if (netif_rx_schedule_prep(dev)) {
+		u32 gem_status = gem_read_status(gp);
+
+		if (unlikely(!gem_status)) {
+			netif_poll_enable(dev);
+			handled = 0;
+			goto out;
+		}
+
+		gem_disable_ints(gp);
+
+		if (gem_irq_sync(gp)) {
+			netif_poll_enable(dev);
+			goto out;
+		}
+
+		gp->status = gem_status;
+		__netif_rx_schedule(dev);
+	}
+
+out:
+	spin_unlock_irqrestore(&gp->int_lock, flags);
 	return IRQ_HANDLED;
 }
+#endif
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gem_poll_controller(struct net_device *dev)
@@ -999,14 +1117,8 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	printk(KERN_INFO "%s: gem_tx_timeout\n", dev->name);
+	gem_schedule_reset_task(gp);
 }
 
 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1135,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1148,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}
 
-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1131,15 +1231,17 @@ static int gem_start_xmit(struct sk_buff
 	}
 
 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}
 
 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);
 
 	dev->trans_start = jiffies;
 
@@ -1148,7 +1250,6 @@ static int gem_start_xmit(struct sk_buff
 
 #define STOP_TRIES 32
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1275,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1297,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1318,7 @@ static void gem_stop_dma(struct gem *gp)
 }
 
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1403,6 @@ non_mii:
 
 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1512,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1568,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 
 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;
 
@@ -1528,8 +1622,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1639,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1692,6 @@ static void gem_clean_rings(struct gem *
 	}
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1865,9 @@ #endif
 	netif_carrier_off(gp->dev);
 
 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1905,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1946,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +2028,6 @@ #endif
 		writel(0, gp->regs + WOL_WAKECSR);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2180,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2199,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }
 
-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;
 
 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2248,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);
 
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);
 
 		/* No need to take the lock here */
 
@@ -2192,14 +2273,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }
 
-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2289,25 @@ static int gem_do_start(struct net_devic
 
 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}
 
-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);
 
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
 
 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);
 
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 
 		return -EAGAIN;
 	}
@@ -2243,22 +2318,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;
-
-	/* Stop netif queue */
-	netif_stop_queue(dev);
 
-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;
 
-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);
 
 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2342,9 @@ static void gem_do_stop(struct net_devic
 
 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }
 
@@ -2287,30 +2354,26 @@ static void gem_reset_task(void *data)
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
 
 	if (gp->running == 0)
 		goto not_running;
 
-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_netif_stop(gp);
 
-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);
 
-	netif_poll_enable(gp->dev);
+not_running:
+	gp->reset_task_pending = 0;
+
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 }
@@ -2324,8 +2387,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);
 
 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);
 
 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2404,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;
 
-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);
 
 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2357,22 +2423,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");
 
 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2442,7 @@ static int gem_suspend(struct pci_dev *p
 
 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2461,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);
 
 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();
 
 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2482,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	printk(KERN_INFO "%s: resuming\n", dev->name);
 
@@ -2465,11 +2525,9 @@ static int gem_resume(struct pci_dev *pd
 
 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2540,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
-	netif_poll_enable(dev);
+	gem_netif_start(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2498,8 +2555,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);
 
 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2579,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);
 
 	return &gp->net_stats;
 }
 
+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;
 
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	if (!gp->running)
 		goto bail;
 
-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2618,8 @@ #endif
 
 	writel(rxcfg, gp->regs + MAC_RXCFG);
 
-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2646,22 @@ static int gem_change_mtu(struct net_dev
 	}
 
 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);
 
 	return 0;
@@ -2635,7 +2694,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2706,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2744,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2702,11 +2761,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;
 
 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2770,16 +2829,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;
 
 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2867,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2918,6 +2976,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;
 
+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);
 
 		/* Stop the link timer */
@@ -2927,8 +2986,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);
 
 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();
 
 		/* Shut the PHY down */
@@ -3035,8 +3093,11 @@ static int __devinit gem_init_one(struct
 
 	gp->msg_enable = DEFAULT_MSG;
 
+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
+#if !GEM_INTERRUPT_LOCKLESS
+	spin_lock_init(&gp->int_lock);
+#endif
 	mutex_init(&gp->pm_mutex);
 
 	init_timer(&gp->link_timer);
@@ -3132,9 +3193,7 @@ #endif
 	 */
 	gem_init_phy(gp);
 
-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
 
 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3215,7 @@ #endif
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");
 
-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 
@@ -3177,7 +3235,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }
 
 
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..49d3aa5 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -7,6 +7,8 @@
 #ifndef _SUNGEM_H
 #define _SUNGEM_H
 
+#define GEM_INTERRUPT_LOCKLESS 1 /* TODO */
+
 /* Global Registers */
 #define GREG_SEBSTATE	0x0000UL	/* SEB State Register		*/
 #define GREG_CFG	0x0004UL	/* Configuration Register	*/
@@ -71,6 +73,11 @@ #define GREG_STAT_NAPI		(GREG_STAT_TXALL
  * on GREG_STAT.
  */
 
+#if GEM_INTERRUPT_LOCKLESS
+#define GREG_STAT_IACK		(GREG_STAT_TXALL  | GREG_STAT_TXINTME | \
+				 GREG_STAT_RXDONE | GREG_STAT_RXNOBUF)
+#endif
+
 /* Global PCI Error Status Register */
 #define GREG_PCIESTAT_BADACK	0x00000001	/* No ACK64# during ABS64 cycle	*/
 #define GREG_PCIESTAT_DTRTO	0x00000002	/* Delayed transaction timeout	*/
@@ -973,8 +980,11 @@ enum link_state {
 };
 
 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
+#if !GEM_INTERRUPT_LOCKLESS
+	spinlock_t		int_lock;
+#endif
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-14 21:54               ` Eric Lemoine
@ 2006-11-28 22:49                 ` David Miller
  2006-11-28 22:57                   ` Benjamin Herrenschmidt
  2006-11-29 10:56                   ` Eric Lemoine
  0 siblings, 2 replies; 31+ messages in thread
From: David Miller @ 2006-11-28 22:49 UTC (permalink / raw)
  To: eric.lemoine; +Cc: netdev, benh

From: "Eric Lemoine" <eric.lemoine@gmail.com>
Date: Tue, 14 Nov 2006 22:54:40 +0100

> On 11/14/06, David Miller <davem@davemloft.net> wrote:
> > From: "Eric Lemoine" <eric.lemoine@gmail.com>
> > Date: Tue, 14 Nov 2006 08:28:42 +0100
> >
> > > because it makes it explicit that only bits 0 through 6 are taken into
> > > account when writing the IACK register.
> >
> > The phrase "bits 0 through 6" doesn't make any sense when bit 3 DOES
> > NOT EXIST in the hardware, it's reserved, it's not there, so including
> > it only confuses people and obfuscates the code.
> >
> > Please use the explicit bit mask composed of existing macros, which
> > not only makes sure that the mask has meaning, but it also makes sure
> > that reserved and non-existing bits are never referenced.
> 
> Patch attached.
> 
> Remember, the GEM_INTERRUPT_LOCKLESS isn't there to stay. It's
> currently there because I'm not sure the lockless implementation of
> gem_interrupt() will work with poll_net_controller.
> 
> Thanks,
> 
> Signed-off-by: Eric Lemoine <eric.lemoine@gmail.com>

This looks mostly fine.

I was thinking about the lockless stuff, and I wonder if there
is a clever way you can get it back down to one PIO on the
GREG_STAT register.

I think you'd need to have the ->poll() clear gp->status, then
do a smp_wb(), right before it re-enables interrupts.

Then in the interrupt handler, you need to find a way to safely
OR-in any unset bits in gp->status in a race-free manner.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-28 22:49                 ` David Miller
@ 2006-11-28 22:57                   ` Benjamin Herrenschmidt
  2006-11-28 23:43                     ` David Miller
  2006-11-29 10:56                   ` Eric Lemoine
  1 sibling, 1 reply; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-11-28 22:57 UTC (permalink / raw)
  To: David Miller; +Cc: eric.lemoine, netdev


> This looks mostly fine.
> 
> I was thinking about the lockless stuff, and I wonder if there
> is a clever way you can get it back down to one PIO on the
> GREG_STAT register.
> 
> I think you'd need to have the ->poll() clear gp->status, then
> do a smp_wb(), right before it re-enables interrupts.
> 
> Then in the interrupt handler, you need to find a way to safely
> OR-in any unset bits in gp->status in a race-free manner.

Having it atomic might work at a slightly smaller cost than a lock,
though atomics don't have strong ordering requirements so you'd still
have to be a bit careful.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-28 22:57                   ` Benjamin Herrenschmidt
@ 2006-11-28 23:43                     ` David Miller
  2006-11-29  0:19                       ` Benjamin Herrenschmidt
  2006-11-29 10:16                       ` Eric Lemoine
  0 siblings, 2 replies; 31+ messages in thread
From: David Miller @ 2006-11-28 23:43 UTC (permalink / raw)
  To: benh; +Cc: eric.lemoine, netdev

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 29 Nov 2006 09:57:24 +1100

> 
> > This looks mostly fine.
> > 
> > I was thinking about the lockless stuff, and I wonder if there
> > is a clever way you can get it back down to one PIO on the
> > GREG_STAT register.
> > 
> > I think you'd need to have the ->poll() clear gp->status, then
> > do a smp_wb(), right before it re-enables interrupts.
> > 
> > Then in the interrupt handler, you need to find a way to safely
> > OR-in any unset bits in gp->status in a race-free manner.
> 
> Having it atomic might work at a slightly smaller cost than a lock,
> though atomics don't have strong ordering requirements so you'd still
> have to be a bit careful.

At least in theory the atomic + any necessary memory barriers
would be cheaper than the extra PIO read we need otherwise.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-28 23:43                     ` David Miller
@ 2006-11-29  0:19                       ` Benjamin Herrenschmidt
  2006-11-29 10:16                       ` Eric Lemoine
  1 sibling, 0 replies; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-11-29  0:19 UTC (permalink / raw)
  To: David Miller; +Cc: eric.lemoine, netdev

On Tue, 2006-11-28 at 15:43 -0800, David Miller wrote:

> At least in theory the atomic + any necessary memory barriers
> would be cheaper than the extra PIO read we need otherwise.

Yes, IO reads are generally the worst case scenarios even on machines
with fairly slow locks.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-28 23:43                     ` David Miller
  2006-11-29  0:19                       ` Benjamin Herrenschmidt
@ 2006-11-29 10:16                       ` Eric Lemoine
  1 sibling, 0 replies; 31+ messages in thread
From: Eric Lemoine @ 2006-11-29 10:16 UTC (permalink / raw)
  To: David Miller; +Cc: benh, netdev

On 11/29/06, David Miller <davem@davemloft.net> wrote:
> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Date: Wed, 29 Nov 2006 09:57:24 +1100
>
> >
> > > This looks mostly fine.
> > >
> > > I was thinking about the lockless stuff, and I wonder if there
> > > is a clever way you can get it back down to one PIO on the
> > > GREG_STAT register.
> > >
> > > I think you'd need to have the ->poll() clear gp->status, then
> > > do a smp_wb(), right before it re-enables interrupts.
> > >
> > > Then in the interrupt handler, you need to find a way to safely
> > > OR-in any unset bits in gp->status in a race-free manner.
> >
> > Having it atomic might work at a slightly smaller cost than a lock,
> > though atomics don't have strong ordering requirements so you'd still
> > have to be a bit careful.
>
> At least in theory the atomic + any necessary memory barriers
> would be cheaper than the extra PIO read we need otherwise.

Just a remark: the lockless impl doesn't add a PIO read, it adds a PIO
write (to the IACK reg). FYI, I'm currently checking whether I can get
rid of this extra PIO write, based on David's suggestion...


-- 
Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-28 22:49                 ` David Miller
  2006-11-28 22:57                   ` Benjamin Herrenschmidt
@ 2006-11-29 10:56                   ` Eric Lemoine
  2006-11-29 22:37                     ` Eric Lemoine
  2006-12-12  1:43                     ` David Miller
  1 sibling, 2 replies; 31+ messages in thread
From: Eric Lemoine @ 2006-11-29 10:56 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

On 11/28/06, David Miller <davem@davemloft.net> wrote:
> From: "Eric Lemoine" <eric.lemoine@gmail.com>
> Date: Tue, 14 Nov 2006 22:54:40 +0100
>
> > On 11/14/06, David Miller <davem@davemloft.net> wrote:
> > > From: "Eric Lemoine" <eric.lemoine@gmail.com>
> > > Date: Tue, 14 Nov 2006 08:28:42 +0100
> > >
> > > > because it makes it explicit that only bits 0 through 6 are taken into
> > > > account when writing the IACK register.
> > >
> > > The phrase "bits 0 through 6" doesn't make any sense when bit 3 DOES
> > > NOT EXIST in the hardware, it's reserved, it's not there, so including
> > > it only confuses people and obfuscates the code.
> > >
> > > Please use the explicit bit mask composed of existing macros, which
> > > not only makes sure that the mask has meaning, but it also makes sure
> > > that reserved and non-existing bits are never referenced.
> >
> > Patch attached.
> >
> > Remember, the GEM_INTERRUPT_LOCKLESS isn't there to stay. It's
> > currently there because I'm not sure the lockless implementation of
> > gem_interrupt() will work with poll_net_controller.
> >
> > Thanks,
> >
> > Signed-off-by: Eric Lemoine <eric.lemoine@gmail.com>
>
> This looks mostly fine.
>
> I was thinking about the lockless stuff, and I wonder if there
> is a clever way you can get it back down to one PIO on the
> GREG_STAT register.
>
> I think you'd need to have the ->poll() clear gp->status, then
> do a smp_wb(), right before it re-enables interrupts.
>
> Then in the interrupt handler, you need to find a way to safely
> OR-in any unset bits in gp->status in a race-free manner.

I don't understand why we'd need all this.

I think the following code for gem_interrupt should do the trick:

static irqreturn_t gem_interrupt(int irq, void *dev_id)
{
    struct net_device *dev = dev_id;
    struct gem *gp = dev->priv;
    unsigned int handled = 1;

    if (!gp->running)
        goto out;

    if (netif_rx_schedule_prep(dev)) {
        u32 gem_status = gem_read_status(gp);
        gem_disable_ints();

        if (unlikely(!gem_status))
            handled = 0;

        if (gem_irq_sync(gp)) {
            netif_poll_enable(dev);
            goto out;
        }

        gp->status = gem_status;
        __netif_rx_schedule(dev);
    }

out:
    return IRQ_RETVAL(handled);
}

The important thing is: we __netif_rx_schedule even if gem_status is 0
(shared irq case) because we don't want to miss events should the
following scenario occur:

CPU0                                                     CPU1
gem interrupt
prepare rx schedule
                                                             gem_interrupt
                                                             rx is
already scheduled
shared irq -> (we can miss NIC events
if we don't __netif_rx_schedule before
returning)

Thanks,
-- 
Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-29 10:56                   ` Eric Lemoine
@ 2006-11-29 22:37                     ` Eric Lemoine
  2006-12-13  3:12                       ` Benjamin Herrenschmidt
  2006-12-12  1:43                     ` David Miller
  1 sibling, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-11-29 22:37 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

[-- Attachment #1: Type: text/plain, Size: 28162 bytes --]

On 11/29/06, Eric Lemoine <eric.lemoine@gmail.com> wrote:
> I think the following code for gem_interrupt should do the trick:
>
> static irqreturn_t gem_interrupt(int irq, void *dev_id)
> {
>     struct net_device *dev = dev_id;
>     struct gem *gp = dev->priv;
>     unsigned int handled = 1;
>
>     if (!gp->running)
>         goto out;
>
>     if (netif_rx_schedule_prep(dev)) {
>         u32 gem_status = gem_read_status(gp);
>         gem_disable_ints();
>
>         if (unlikely(!gem_status))
>             handled = 0;
>
>         if (gem_irq_sync(gp)) {
>             netif_poll_enable(dev);
>             goto out;
>         }
>
>         gp->status = gem_status;
>         __netif_rx_schedule(dev);
>     }
>
> out:
>     return IRQ_RETVAL(handled);
> }
>
> The important thing is: we __netif_rx_schedule even if gem_status is 0
> (shared irq case) because we don't want to miss events should the
> following scenario occur:
>
> CPU0                                                     CPU1
> gem interrupt
> prepare rx schedule
>                                                              gem_interrupt
>                                                              rx is
> already scheduled
> shared irq -> (we can miss NIC events
> if we don't __netif_rx_schedule before
> returning)

This new patch implements the above, and fixes a bug that was in the
previous patch. This bug made BUG_ON() in gem_irq_quiesce() trigger
when putting the link down while the device was closed.

Patch applies to current git net-2.6 (2.6.19-rc6). Dave, do you still
use that tree, or net-2.6.20 is your only working tree at this time?

Thanks,

--
Eric


Signed-off-by: Eric Lemoine <eric.lemoine@gmail.com>

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..1085c3d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to
keep a spinlock
- *    where it could have been dropped. change_mtu especially would
love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take
effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */

 #include <linux/module.h>
@@ -206,18 +186,66 @@ static inline void phy_write(struct gem
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }

-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }

+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}
+
 static inline void gem_disable_ints(struct gem *gp)
 {
 	/* Disable all interrupts, including TXDONE */
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }

+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +686,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;

 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);

 	return 1;
 }

+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp,
u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +753,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;

-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }

 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +927,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);

 	do {
 		int work_to_do, work_done;
@@ -899,19 +938,10 @@ static int gem_poll(struct net_device *d
 		}

 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);

-		spin_unlock_irqrestore(&gp->lock, flags);
-
-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);

 		*budget -= work_done;
@@ -920,53 +950,90 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;

-		spin_lock_irqsave(&gp->lock, flags);
-
-		gp->status = readl(gp->regs + GREG_STAT);
+		gp->status = gem_read_status(gp);
 	} while (gp->status & GREG_STAT_NAPI);

 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);

-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }

+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	unsigned int handled = 1;

 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
 	 * this point...
 	 */
 	if (!gp->running)
-		return IRQ_HANDLED;
-
-	spin_lock_irqsave(&gp->lock, flags);
+		goto out;

 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		u32 gem_status;
+		
+		gem_status = gem_read_status(gp);
+		if (unlikely(!gem_status)) {
+			/* Shared IRQ. We still want to __netif_rx_schedule
+			 * here, because we don't want to miss NIC events
+			 * should the following scenario occur:
+			 *
+			 * CPU0				CPU1
+			 * interrupt
+			 * prepare rx schedule
+			 * 				interrupt
+			 * 				rx is already scheduled
+			 * gem_status is 0 (we can
+			 * miss NIC events if we
+			 * don't __netif_rx_schedule
+			 * and return right away here)
+			 */
+			handled = 0;
+		}
+
+		gem_disable_ints(gp);

-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			goto out;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}

-	spin_unlock_irqrestore(&gp->lock, flags);
-
-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
-	 */
-	return IRQ_HANDLED;
+out:
+	return IRQ_RETVAL(handled);
 }

 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -999,14 +1066,7 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_schedule_reset_task(gp);
 }

 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1083,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;

 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1096,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}

-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}

@@ -1131,15 +1179,17 @@ static int gem_start_xmit(struct sk_buff
 	}

 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}

 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);

 	dev->trans_start = jiffies;

@@ -1148,7 +1198,6 @@ static int gem_start_xmit(struct sk_buff

 #define STOP_TRIES 32

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1223,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1245,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1266,7 @@ static void gem_stop_dma(struct gem *gp)
 }


-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1351,6 @@ non_mii:

 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1460,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1516,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);

 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;

@@ -1528,8 +1570,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1587,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1640,6 @@ static void gem_clean_rings(struct gem *
 	}
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1813,9 @@ #endif
 	netif_carrier_off(gp->dev);

 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1853,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1894,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +1976,6 @@ #endif
 		writel(0, gp->regs + WOL_WAKECSR);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2128,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2147,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }

-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;

 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2196,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);

 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);

 		/* No need to take the lock here */

@@ -2192,14 +2221,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }

-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2237,26 @@ static int gem_do_start(struct net_devic

 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}

-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gp->irq_sync = 0;
+	gem_full_unlock(gp);

 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);

-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);

 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);

-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);

 		return -EAGAIN;
 	}
@@ -2243,22 +2267,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;

-	/* Stop netif queue */
-	netif_stop_queue(dev);
-
-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;

-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);

 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2291,9 @@ static void gem_do_stop(struct net_devic

 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }

@@ -2287,31 +2303,26 @@ static void gem_reset_task(void *data)

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
 	if (gp->running == 0)
 		goto not_running;

-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);

-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	gem_netif_stop(gp);
+
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);

-	netif_poll_enable(gp->dev);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);

+not_running:
+	gp->reset_task_pending = 0;
 	mutex_unlock(&gp->pm_mutex);
 }

@@ -2324,8 +2335,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);

 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);

 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2352,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;

-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);

 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}

 	mutex_unlock(&gp->pm_mutex);

@@ -2357,22 +2371,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");

 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2390,7 @@ static int gem_suspend(struct pci_dev *p

 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2409,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);

 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();

 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2430,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	printk(KERN_INFO "%s: resuming\n", dev->name);

@@ -2465,11 +2473,9 @@ static int gem_resume(struct pci_dev *pd

 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2488,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

-	netif_poll_enable(dev);
+	gem_netif_start(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2498,8 +2503,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);

 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2527,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);

 	return &gp->net_stats;
 }

+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;

-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	if (!gp->running)
 		goto bail;

-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2566,8 @@ #endif

 	writel(rxcfg, gp->regs + MAC_RXCFG);

-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2594,22 @@ static int gem_change_mtu(struct net_dev
 	}

 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);

 	return 0;
@@ -2635,7 +2642,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */

 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2654,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2692,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;

 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2702,11 +2709,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;

 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2770,16 +2777,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;

 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2815,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2918,6 +2924,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;

+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);

 		/* Stop the link timer */
@@ -2927,8 +2934,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);

 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();

 		/* Shut the PHY down */
@@ -3035,8 +3041,8 @@ static int __devinit gem_init_one(struct

 	gp->msg_enable = DEFAULT_MSG;

+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
 	mutex_init(&gp->pm_mutex);

 	init_timer(&gp->link_timer);
@@ -3132,9 +3138,7 @@ #endif
 	 */
 	gem_init_phy(gp);

-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);

 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3160,7 @@ #endif
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");

-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;

@@ -3177,7 +3180,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }


diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..5869818 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -929,11 +929,6 @@ #endif
 #define NEXT_TX(N)	(((N) + 1) & (TX_RING_SIZE - 1))
 #define NEXT_RX(N)	(((N) + 1) & (RX_RING_SIZE - 1))

-#define TX_BUFFS_AVAIL(GP)					\
-	(((GP)->tx_old <= (GP)->tx_new) ?			\
-	  (GP)->tx_old + (TX_RING_SIZE - 1) - (GP)->tx_new :	\
-	  (GP)->tx_old - (GP)->tx_new - 1)
-
 #define RX_OFFSET          2
 #define RX_BUF_ALLOC_SIZE(gp)	((gp)->rx_buf_sz + 28 + RX_OFFSET + 64)

@@ -973,8 +968,8 @@ enum link_state {
 };

 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;

[-- Attachment #2: sungem-locking-take2.patch --]
[-- Type: text/x-patch, Size: 26595 bytes --]

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..1085c3d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@gmail.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to keep a spinlock
- *    where it could have been dropped. change_mtu especially would love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */
 
 #include <linux/module.h>
@@ -206,18 +186,66 @@ static inline void phy_write(struct gem 
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }
 
-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
 
+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}
+
 static inline void gem_disable_ints(struct gem *gp)
 {
 	/* Disable all interrupts, including TXDONE */
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }
 
+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +686,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;
 
 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);
 
 	return 1;
 }
 
+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +753,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;
 
-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }
 
 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +927,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);
 
 	do {
 		int work_to_do, work_done;
@@ -899,19 +938,10 @@ static int gem_poll(struct net_device *d
 		}
 
 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);
 
-		spin_unlock_irqrestore(&gp->lock, flags);
-
-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);
 
 		*budget -= work_done;
@@ -920,53 +950,90 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;
 
-		spin_lock_irqsave(&gp->lock, flags);
-
-		gp->status = readl(gp->regs + GREG_STAT);
+		gp->status = gem_read_status(gp);
 	} while (gp->status & GREG_STAT_NAPI);
 
 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);
 
-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }
 
+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	unsigned int handled = 1;
 
 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
 	 * this point...
 	 */
 	if (!gp->running)
-		return IRQ_HANDLED;
-
-	spin_lock_irqsave(&gp->lock, flags);
+		goto out;
 
 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		u32 gem_status;
+		
+		gem_status = gem_read_status(gp);
+		if (unlikely(!gem_status)) {
+			/* Shared IRQ. We still want to __netif_rx_schedule
+			 * here, because we don't want to miss NIC events
+			 * should the following scenario occur:
+			 *
+			 * CPU0				CPU1
+			 * interrupt
+			 * prepare rx schedule
+			 * 				interrupt
+			 * 				rx is already scheduled
+			 * gem_status is 0 (we can
+			 * miss NIC events if we 
+			 * don't __netif_rx_schedule
+			 * and return right away here)
+			 */
+			handled = 0;
+		}
+
+		gem_disable_ints(gp);
 
-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			goto out;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}
 
-	spin_unlock_irqrestore(&gp->lock, flags);
-
-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
-	 */
-	return IRQ_HANDLED;
+out:
+	return IRQ_RETVAL(handled);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -999,14 +1066,7 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_schedule_reset_task(gp);
 }
 
 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1083,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1096,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}
 
-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1131,15 +1179,17 @@ static int gem_start_xmit(struct sk_buff
 	}
 
 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}
 
 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);
 
 	dev->trans_start = jiffies;
 
@@ -1148,7 +1198,6 @@ static int gem_start_xmit(struct sk_buff
 
 #define STOP_TRIES 32
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1223,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1245,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1266,7 @@ static void gem_stop_dma(struct gem *gp)
 }
 
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1351,6 @@ non_mii:
 
 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1460,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1516,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 
 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;
 
@@ -1528,8 +1570,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1587,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1640,6 @@ static void gem_clean_rings(struct gem *
 	}
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1813,9 @@ #endif
 	netif_carrier_off(gp->dev);
 
 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1853,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1894,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +1976,6 @@ #endif
 		writel(0, gp->regs + WOL_WAKECSR);
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2128,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }
 
-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2147,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }
 
-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;
 
 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2196,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);
 
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);
 
 		/* No need to take the lock here */
 
@@ -2192,14 +2221,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }
 
-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2237,26 @@ static int gem_do_start(struct net_devic
 
 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}
 
-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gp->irq_sync = 0;
+	gem_full_unlock(gp);
 
 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);
 
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
 
 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);
 
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 
 		return -EAGAIN;
 	}
@@ -2243,22 +2267,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;
 
-	/* Stop netif queue */
-	netif_stop_queue(dev);
-
-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;
 
-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);
 
 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2291,9 @@ static void gem_do_stop(struct net_devic
 
 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }
 
@@ -2287,31 +2303,26 @@ static void gem_reset_task(void *data)
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
 	if (gp->running == 0)
 		goto not_running;
 
-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
 
-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	gem_netif_stop(gp);
+
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);
 
-	netif_poll_enable(gp->dev);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
 
+not_running:
+	gp->reset_task_pending = 0;
 	mutex_unlock(&gp->pm_mutex);
 }
 
@@ -2324,8 +2335,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);
 
 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);
 
 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2352,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;
 
-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);
 
 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2357,22 +2371,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");
 
 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2390,7 @@ static int gem_suspend(struct pci_dev *p
 
 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2409,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);
 
 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();
 
 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2430,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;
 
 	printk(KERN_INFO "%s: resuming\n", dev->name);
 
@@ -2465,11 +2473,9 @@ static int gem_resume(struct pci_dev *pd
 
 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}
 
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2488,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
-	netif_poll_enable(dev);
+	gem_netif_start(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2498,8 +2503,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;
 
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);
 
 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2527,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}
 
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);
 
 	return &gp->net_stats;
 }
 
+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;
 
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 
 	if (!gp->running)
 		goto bail;
 
-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2566,8 @@ #endif
 
 	writel(rxcfg, gp->regs + MAC_RXCFG);
 
-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }
 
 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2594,22 @@ static int gem_change_mtu(struct net_dev
 	}
 
 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);
 
 	return 0;
@@ -2635,7 +2642,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2654,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2692,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2702,11 +2709,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;
 
 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 
 	return 0;
 }
@@ -2770,16 +2777,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;
 
 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2815,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};
 
-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -2918,6 +2924,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;
 
+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);
 
 		/* Stop the link timer */
@@ -2927,8 +2934,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);
 
 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();
 
 		/* Shut the PHY down */
@@ -3035,8 +3041,8 @@ static int __devinit gem_init_one(struct
 
 	gp->msg_enable = DEFAULT_MSG;
 
+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
 	mutex_init(&gp->pm_mutex);
 
 	init_timer(&gp->link_timer);
@@ -3132,9 +3138,7 @@ #endif
 	 */
 	gem_init_phy(gp);
 
-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
 
 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3160,7 @@ #endif
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");
 
-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 
@@ -3177,7 +3180,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }
 
 
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..5869818 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -929,11 +929,6 @@ #endif
 #define NEXT_TX(N)	(((N) + 1) & (TX_RING_SIZE - 1))
 #define NEXT_RX(N)	(((N) + 1) & (RX_RING_SIZE - 1))
 
-#define TX_BUFFS_AVAIL(GP)					\
-	(((GP)->tx_old <= (GP)->tx_new) ?			\
-	  (GP)->tx_old + (TX_RING_SIZE - 1) - (GP)->tx_new :	\
-	  (GP)->tx_old - (GP)->tx_new - 1)
-
 #define RX_OFFSET          2
 #define RX_BUF_ALLOC_SIZE(gp)	((gp)->rx_buf_sz + 28 + RX_OFFSET + 64)
 
@@ -973,8 +968,8 @@ enum link_state {
 };
 
 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-29 10:56                   ` Eric Lemoine
  2006-11-29 22:37                     ` Eric Lemoine
@ 2006-12-12  1:43                     ` David Miller
  2006-12-12  5:33                       ` Eric Lemoine
  1 sibling, 1 reply; 31+ messages in thread
From: David Miller @ 2006-12-12  1:43 UTC (permalink / raw)
  To: eric.lemoine; +Cc: netdev, benh

From: "Eric Lemoine" <eric.lemoine@gmail.com>
Date: Wed, 29 Nov 2006 11:56:30 +0100

> I don't understand why we'd need all this.
> 
> I think the following code for gem_interrupt should do the trick:
...
> The important thing is: we __netif_rx_schedule even if gem_status is 0
> (shared irq case) because we don't want to miss events should the
> following scenario occur:

I see, I miseed the bit where we're reading the status register
also in the ->poll() loop.

Reading the status register at least twice every interrupt is
really expensive.

I hope there is a lesson being learned, and nobody out there is
making modern networking NICs that put the interrupt status
in a register.

It should always be in a DMA'd status block, without any exception.
This way it can be polled in the cheapest manner possible.  When the
status block is in main memory, the CPU only takes the "cost" of a
read when the value actually changes.

Anyways, Eric your changes look fine as far as I can tell, can you
give them a really good testing on some SMP boxes?  Then we can
think about putting it into 2.6.21 or similar, I don't want to
put any more major networking changes into 2.6.20 at this time.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-12  1:43                     ` David Miller
@ 2006-12-12  5:33                       ` Eric Lemoine
  2006-12-12  5:36                         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-12-12  5:33 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, benh

On 12/12/06, David Miller <davem@davemloft.net> wrote:
> [...]
> Anyways, Eric your changes look fine as far as I can tell, can you
> give them a really good testing on some SMP boxes?

Unfortunately I can't, I don't have the hardware (only an old ibook here).

-- 
Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-12  5:33                       ` Eric Lemoine
@ 2006-12-12  5:36                         ` Benjamin Herrenschmidt
  2006-12-12  5:49                           ` Eric Lemoine
  0 siblings, 1 reply; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-12  5:36 UTC (permalink / raw)
  To: Eric Lemoine; +Cc: David Miller, netdev

On Tue, 2006-12-12 at 06:33 +0100, Eric Lemoine wrote:
> On 12/12/06, David Miller <davem@davemloft.net> wrote:
> > [...]
> > Anyways, Eric your changes look fine as far as I can tell, can you
> > give them a really good testing on some SMP boxes?
> 
> Unfortunately I can't, I don't have the hardware (only an old ibook here).

I do however, I'll give it a beating on a dual G5 as soon as I get a
chance. I'm pretty swamped at the moment and the box is used by somebody
else today.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-12  5:36                         ` Benjamin Herrenschmidt
@ 2006-12-12  5:49                           ` Eric Lemoine
  2006-12-15  0:59                             ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 31+ messages in thread
From: Eric Lemoine @ 2006-12-12  5:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: David Miller, netdev

On 12/12/06, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> On Tue, 2006-12-12 at 06:33 +0100, Eric Lemoine wrote:
> > On 12/12/06, David Miller <davem@davemloft.net> wrote:
> > > [...]
> > > Anyways, Eric your changes look fine as far as I can tell, can you
> > > give them a really good testing on some SMP boxes?
> >
> > Unfortunately I can't, I don't have the hardware (only an old ibook here).
>
> I do however, I'll give it a beating on a dual G5 as soon as I get a
> chance. I'm pretty swamped at the moment and the box is used by somebody
> else today.

Ok, thanks a lot Benjamin.
-- 
Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-11-29 22:37                     ` Eric Lemoine
@ 2006-12-13  3:12                       ` Benjamin Herrenschmidt
  2006-12-13  3:24                         ` Benjamin Herrenschmidt
  2006-12-13  4:03                         ` David Miller
  0 siblings, 2 replies; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-13  3:12 UTC (permalink / raw)
  To: Eric Lemoine; +Cc: David Miller, netdev

Been hitting a raw throughput in both directions plus a few other things
on a dual G5 and the driver didn't crash :-)

I'm seeing a problem though but I'm not sure it's related to your patch,
I'll have to test without it.

Basically, if I use a slightly modified versio of tridge's socklib (raw
xput test, basically, a tcp connection with one side pushing as fast as
it can a known pattern and the other one just receiving and verifying
the data integrity), it works fine when running only one side (either rx
or tx, doesn't matter).

But if I start it both ways (that is both a receiver and a sender on the
GMAC box) and the other end is a tg3 (quad g5), then I'm getting a lot
of eth0: RX MAC fifo overflow smac[02045822 but there are other numbers
here every now and then] errors on the sungem side.

David, could that be the pause stuff not working properly ?

The link is gigabit and the tg3 side doesn't complain about anything.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-13  3:12                       ` Benjamin Herrenschmidt
@ 2006-12-13  3:24                         ` Benjamin Herrenschmidt
  2006-12-13  4:03                         ` David Miller
  1 sibling, 0 replies; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-13  3:24 UTC (permalink / raw)
  To: Eric Lemoine; +Cc: David Miller, netdev

On Wed, 2006-12-13 at 14:12 +1100, Benjamin Herrenschmidt wrote:
> Been hitting a raw throughput in both directions plus a few other things
> on a dual G5 and the driver didn't crash :-)
> 
> I'm seeing a problem though but I'm not sure it's related to your patch,
> I'll have to test without it.
> 
> Basically, if I use a slightly modified versio of tridge's socklib (raw
> xput test, basically, a tcp connection with one side pushing as fast as
> it can a known pattern and the other one just receiving and verifying
> the data integrity), it works fine when running only one side (either rx
> or tx, doesn't matter).
> 
> But if I start it both ways (that is both a receiver and a sender on the
> GMAC box) and the other end is a tg3 (quad g5), then I'm getting a lot
> of eth0: RX MAC fifo overflow smac[02045822 but there are other numbers
> here every now and then] errors on the sungem side.
> 
> David, could that be the pause stuff not working properly ?
> 
> The link is gigabit and the tg3 side doesn't complain about anything.

I've verified that the problem isn't related to Eric's patch and is
there without it... I suppose I just never noticed it before :-)

So far, the patch looks solid. I'll let it run overnight and will holler
if things went wrong tomorrow.

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-13  3:12                       ` Benjamin Herrenschmidt
  2006-12-13  3:24                         ` Benjamin Herrenschmidt
@ 2006-12-13  4:03                         ` David Miller
  2006-12-13  4:07                           ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 31+ messages in thread
From: David Miller @ 2006-12-13  4:03 UTC (permalink / raw)
  To: benh; +Cc: eric.lemoine, netdev

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 13 Dec 2006 14:12:13 +1100

> David, could that be the pause stuff not working properly ?

It could be, but if the tg3 says flow control is up in the kernel logs
things ought to be OK.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-13  4:03                         ` David Miller
@ 2006-12-13  4:07                           ` Benjamin Herrenschmidt
  2006-12-29  5:05                             ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-13  4:07 UTC (permalink / raw)
  To: David Miller; +Cc: eric.lemoine, netdev

On Tue, 2006-12-12 at 20:03 -0800, David Miller wrote:
> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Date: Wed, 13 Dec 2006 14:12:13 +1100
> 
> > David, could that be the pause stuff not working properly ?
> 
> It could be, but if the tg3 says flow control is up in the kernel logs
> things ought to be OK.

tg3 says

tg3: eth0: Link is up at 1000 Mbps, full duplex.
tg3: eth0: Flow control is on for TX and on for RX.

but sungem says

eth0: Link is up at 1000 Mbps, full-duplex.
eth0: Pause is disabled

Hrm... I suppose I need to dig more. No time to do that today though.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-12  5:49                           ` Eric Lemoine
@ 2006-12-15  0:59                             ` Benjamin Herrenschmidt
  2006-12-18  1:15                               ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-15  0:59 UTC (permalink / raw)
  To: Eric Lemoine; +Cc: David Miller, netdev

On Tue, 2006-12-12 at 06:49 +0100, Eric Lemoine wrote:
> On 12/12/06, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> > On Tue, 2006-12-12 at 06:33 +0100, Eric Lemoine wrote:
> > > On 12/12/06, David Miller <davem@davemloft.net> wrote:
> > > > [...]
> > > > Anyways, Eric your changes look fine as far as I can tell, can you
> > > > give them a really good testing on some SMP boxes?
> > >
> > > Unfortunately I can't, I don't have the hardware (only an old ibook here).
> >
> > I do however, I'll give it a beating on a dual G5 as soon as I get a
> > chance. I'm pretty swamped at the moment and the box is used by somebody
> > else today.
> 
> Ok, thanks a lot Benjamin.

Patched driver's been running fine for a couple of days & nights with
constant beating... just those RX MAC fifo overflows every now and then
(though they cause no data corruption and no big hit on the driver perfs
neither). I suppose still worth investigating when I have a bit of time,
I must have done something stupid with the pause settings.

In the meantime, Eric's patch is all good.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-15  0:59                             ` Benjamin Herrenschmidt
@ 2006-12-18  1:15                               ` David Miller
  2006-12-18  1:41                                 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 31+ messages in thread
From: David Miller @ 2006-12-18  1:15 UTC (permalink / raw)
  To: benh; +Cc: eric.lemoine, netdev

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 15 Dec 2006 11:59:06 +1100

> Patched driver's been running fine for a couple of days & nights with
> constant beating... just those RX MAC fifo overflows every now and then
> (though they cause no data corruption and no big hit on the driver perfs
> neither). I suppose still worth investigating when I have a bit of time,
> I must have done something stupid with the pause settings.
> 
> In the meantime, Eric's patch is all good.

Thanks for testing this stuff.

I'll take a look at the pause-enabling issue in the sungem
drive then work on integrating Eric's patch.

Probably we'll need to put this in post-2.6.20 as the merge
window is closed.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-18  1:15                               ` David Miller
@ 2006-12-18  1:41                                 ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-18  1:41 UTC (permalink / raw)
  To: David Miller; +Cc: eric.lemoine, netdev


> Thanks for testing this stuff.
> 
> I'll take a look at the pause-enabling issue in the sungem
> drive then work on integrating Eric's patch.

Ok, thanks.

> Probably we'll need to put this in post-2.6.20 as the merge
> window is closed.

Yup, no hurry there.

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-13  4:07                           ` Benjamin Herrenschmidt
@ 2006-12-29  5:05                             ` David Miller
  2006-12-29 21:36                               ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 31+ messages in thread
From: David Miller @ 2006-12-29  5:05 UTC (permalink / raw)
  To: benh; +Cc: eric.lemoine, netdev

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 13 Dec 2006 15:07:24 +1100

> tg3 says
> 
> tg3: eth0: Link is up at 1000 Mbps, full duplex.
> tg3: eth0: Flow control is on for TX and on for RX.
> 
> but sungem says
> 
> eth0: Link is up at 1000 Mbps, full-duplex.
> eth0: Pause is disabled
> 
> Hrm... I suppose I need to dig more. No time to do that today though.

I was about to try and debug this, and noticed immediately that I
didn't recognize any of the code.

Could you look into this, you rewrote all of this stuff and this
looks like a regression added, because I know this pause stuff
used to work perfectly when I wrote the original GEM driver. :-)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-29  5:05                             ` David Miller
@ 2006-12-29 21:36                               ` Benjamin Herrenschmidt
  2006-12-29 23:20                                 ` David Miller
  0 siblings, 1 reply; 31+ messages in thread
From: Benjamin Herrenschmidt @ 2006-12-29 21:36 UTC (permalink / raw)
  To: David Miller; +Cc: eric.lemoine, netdev

On Thu, 2006-12-28 at 21:05 -0800, David Miller wrote:
> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Date: Wed, 13 Dec 2006 15:07:24 +1100
> 
> > tg3 says
> > 
> > tg3: eth0: Link is up at 1000 Mbps, full duplex.
> > tg3: eth0: Flow control is on for TX and on for RX.
> > 
> > but sungem says
> > 
> > eth0: Link is up at 1000 Mbps, full-duplex.
> > eth0: Pause is disabled
> > 
> > Hrm... I suppose I need to dig more. No time to do that today though.
> 
> I was about to try and debug this, and noticed immediately that I
> didn't recognize any of the code.
> 
> Could you look into this, you rewrote all of this stuff and this
> looks like a regression added, because I know this pause stuff
> used to work perfectly when I wrote the original GEM driver. :-)

Heh, it's very possible it's a regression I added indeed. I'll try to
have a look next week. Do you know of anybody who can verify on non-mii
hardware or is pause irrelevant there ?

Ben.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [patch sungem] improved locking
  2006-12-29 21:36                               ` Benjamin Herrenschmidt
@ 2006-12-29 23:20                                 ` David Miller
  0 siblings, 0 replies; 31+ messages in thread
From: David Miller @ 2006-12-29 23:20 UTC (permalink / raw)
  To: benh; +Cc: eric.lemoine, netdev

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sat, 30 Dec 2006 08:36:07 +1100

> Heh, it's very possible it's a regression I added indeed. I'll try to
> have a look next week. Do you know of anybody who can verify on non-mii
> hardware or is pause irrelevant there ?

For PCS based PHY's most of the logic is taken care of internally,
and the pause-present signal is just sampled from one of the
PCS registers.

I think I have one here using non-mii that I could check at
some point :)

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2006-12-29 23:21 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-11-09 21:33 [patch sungem] improved locking Eric Lemoine
2006-11-09 23:04 ` David Miller
2006-11-10 13:28   ` Eric Lemoine
2006-11-10 20:37     ` Benjamin Herrenschmidt
2006-11-10 20:42     ` David Miller
2006-11-12 23:11       ` Eric Lemoine
2006-11-14  0:46         ` David Miller
2006-11-14  7:28           ` Eric Lemoine
2006-11-14  7:44             ` David Miller
2006-11-14 21:54               ` Eric Lemoine
2006-11-28 22:49                 ` David Miller
2006-11-28 22:57                   ` Benjamin Herrenschmidt
2006-11-28 23:43                     ` David Miller
2006-11-29  0:19                       ` Benjamin Herrenschmidt
2006-11-29 10:16                       ` Eric Lemoine
2006-11-29 10:56                   ` Eric Lemoine
2006-11-29 22:37                     ` Eric Lemoine
2006-12-13  3:12                       ` Benjamin Herrenschmidt
2006-12-13  3:24                         ` Benjamin Herrenschmidt
2006-12-13  4:03                         ` David Miller
2006-12-13  4:07                           ` Benjamin Herrenschmidt
2006-12-29  5:05                             ` David Miller
2006-12-29 21:36                               ` Benjamin Herrenschmidt
2006-12-29 23:20                                 ` David Miller
2006-12-12  1:43                     ` David Miller
2006-12-12  5:33                       ` Eric Lemoine
2006-12-12  5:36                         ` Benjamin Herrenschmidt
2006-12-12  5:49                           ` Eric Lemoine
2006-12-15  0:59                             ` Benjamin Herrenschmidt
2006-12-18  1:15                               ` David Miller
2006-12-18  1:41                                 ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.