linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net v6 0/4] tg3: tx_pending fixes
@ 2014-09-05  1:30 Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 1/4] tg3: Limit minimum tx queue wakeup threshold Benjamin Poirier
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Benjamin Poirier @ 2014-09-05  1:30 UTC (permalink / raw)
  To: Prashant Sreedharan, Michael Chan; +Cc: netdev, linux-kernel


Extra info regarding patch 4:
This version of the series calls gso_segment() without NETIF_F_SG. This avoids
the need for desc_cnt_est in tg3_tso_bug() as in previous versions of this
patch series. Since Michael had previously raised concerns about gso_segment
without SG, I ran some netperf throughput tests. I used a small patch to force
tg3_tso_bug() to be called even when it is not needed [1].

root@linux-y64m:~# perf stat -r10 -ad netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- -d send

* original tg3_tso_bug() (ie. without patch 4/4)
  781±2 10^6bits/s
  6.60 cycle/bit
* gso_segment() without SG (current series)
  801.0±0.9 10^6bits/s
  5.79 cycle/bit
* gso_segment() with SG (alternate patch 4/4 [2])
  783±2 10^6bits/s
  7.25 cycle/bit

(For reference, with the original tg3_tso_bug() implementation but without
forcing it to be called, the throughput I get is 822±1 10^6bits/s @ 3.82
cycle/bit with 0 invocations of tg3_tso_bug)

[1] fault injection patch

---
 drivers/net/ethernet/broadcom/tg3.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index cb77ae9..f9144dc 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -47,6 +47,7 @@
 #include <linux/ssb/ssb_driver_gige.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <linux/debugfs.h>
 
 #include <net/checksum.h>
 #include <net/ip.h>
@@ -468,6 +469,27 @@ static const struct {
 #define TG3_NUM_TEST	ARRAY_SIZE(ethtool_test_keys)
 
 
+/* debugging stuff */
+static u32 tg3_do_mangle;
+static struct dentry *tg3_mangle_debugfs;
+
+static int __init tg3_mod_init(void)
+{
+	tg3_mangle_debugfs = debugfs_create_u32("tg3_do_mangle", S_IRUGO |
+						S_IWUSR, NULL,
+						&tg3_do_mangle);
+
+	return 0;
+}
+module_init(tg3_mod_init);
+
+static void __exit tg3_mod_exit(void)
+{
+	debugfs_remove(tg3_mangle_debugfs);
+}
+module_exit(tg3_mod_exit);
+/* --- */
+
 static void tg3_write32(struct tg3 *tp, u32 off, u32 val)
 {
 	writel(val, tp->regs + off);
@@ -8048,6 +8070,11 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				would_hit_hwbug = 1;
 				break;
 			}
+
+			if (tg3_do_mangle > 0) {
+				would_hit_hwbug = 4;
+				break;
+			}
 		}
 	}
 
-- 

[2] alternate patch 4

call gso_segment with SG (without removing it, actually)

---
 drivers/net/ethernet/broadcom/tg3.c | 80 +++++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ee93b51..1ecb393 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -205,6 +205,9 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)	max_t(u32, (tnapi)->tx_pending / 4, \
 					      MAX_SKB_FRAGS + 1)
+/* estimate a certain number of descriptors per gso segment */
+#define TG3_TX_DESC_PER_SEG(seg_nb)	((seg_nb) * 3)
+
 #define TG3_TX_BD_DMA_MAX_2K		2048
 #define TG3_TX_BD_DMA_MAX_4K		4096
 
@@ -7852,6 +7855,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
 }
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *,
+				    u32);
 
 /* Returns true if the queue has been stopped. Note that it may have been
  * restarted since.
@@ -7888,27 +7893,56 @@ static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
 static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 		       struct netdev_queue *txq, struct sk_buff *skb)
 {
-	struct sk_buff *segs, *nskb;
-	u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3;
+	unsigned int segs_remaining = skb_shinfo(skb)->gso_segs;
+	u32 desc_cnt_est = TG3_TX_DESC_PER_SEG(segs_remaining);
 
-	/* Estimate the number of fragments in the worst case */
-	tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
-	if (netif_tx_queue_stopped(txq))
-		return NETDEV_TX_BUSY;
+	if (unlikely(tg3_tx_avail(tnapi) <= desc_cnt_est)) {
+		if (!skb_is_nonlinear(skb) || skb_linearize(skb))
+			goto tg3_tso_bug_drop;
+		tg3_start_xmit(skb, tp->dev);
+	} else {
+		struct sk_buff *segs, *nskb;
 
-	segs = skb_gso_segment(skb, tp->dev->features &
-				    ~(NETIF_F_TSO | NETIF_F_TSO6));
-	if (IS_ERR(segs) || !segs)
-		goto tg3_tso_bug_end;
+		segs = skb_gso_segment(skb, tp->dev->features &
+				       ~(NETIF_F_TSO | NETIF_F_TSO6));
+		if (IS_ERR(segs) || !segs)
+			goto tg3_tso_bug_drop;
 
-	do {
-		nskb = segs;
-		segs = segs->next;
-		nskb->next = NULL;
-		tg3_start_xmit(nskb, tp->dev);
-	} while (segs);
+		do {
+			unsigned int desc_cnt = skb_shinfo(segs)->nr_frags + 1;
+
+			nskb = segs;
+			segs = segs->next;
+			nskb->next = NULL;
+
+			if (tg3_tx_avail(tnapi) <= segs_remaining - 1 +
+			    desc_cnt && skb_linearize(nskb)) {
+				nskb->next = segs;
+				segs = nskb;
+				do {
+					nskb = segs->next;
+
+					dev_kfree_skb_any(segs);
+					segs = nskb;
+				} while (segs);
+				tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1,
+						   TG3_TX_WAKEUP_THRESH(tnapi));
+
+				goto tg3_tso_bug_drop;
+			}
+			if (--segs_remaining)
+				__tg3_start_xmit(nskb, tp->dev, segs_remaining);
+			else
+				tg3_start_xmit(nskb, tp->dev);
+		} while (segs);
 
-tg3_tso_bug_end:
+		dev_kfree_skb_any(skb);
+	}
+
+	return NETDEV_TX_OK;
+
+tg3_tso_bug_drop:
+	tp->tx_dropped++;
 	dev_kfree_skb_any(skb);
 
 	return NETDEV_TX_OK;
@@ -7917,6 +7951,12 @@ tg3_tso_bug_end:
 /* hard_start_xmit for all devices */
 static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	return __tg3_start_xmit(skb, dev, MAX_SKB_FRAGS + 1);
+}
+
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev, u32 stop_thresh)
+{
 	struct tg3 *tp = netdev_priv(dev);
 	u32 len, entry, base_flags, mss, vlan = 0;
 	u32 budget;
@@ -8129,7 +8169,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tw32_tx_mbox(tnapi->prodmbox, entry);
 
 	tnapi->tx_prod = entry;
-	tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1,
+	tg3_maybe_stop_txq(tnapi, txq, stop_thresh,
 			   TG3_TX_WAKEUP_THRESH(tnapi));
 
 	mmiowb();
@@ -12363,9 +12403,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 	if ((ering->rx_pending > tp->rx_std_ring_mask) ||
 	    (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) ||
 	    (ering->tx_pending > TG3_TX_RING_SIZE - 1) ||
-	    (ering->tx_pending <= MAX_SKB_FRAGS + 1) ||
-	    (tg3_flag(tp, TSO_BUG) &&
-	     (ering->tx_pending <= (MAX_SKB_FRAGS * 3))))
+	    (ering->tx_pending <= MAX_SKB_FRAGS + 1))
 		return -EINVAL;
 
 	if (netif_running(dev)) {
-- 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net v6 1/4] tg3: Limit minimum tx queue wakeup threshold
  2014-09-05  1:30 [PATCH net v6 0/4] tg3: tx_pending fixes Benjamin Poirier
@ 2014-09-05  1:30 ` Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS Benjamin Poirier
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Benjamin Poirier @ 2014-09-05  1:30 UTC (permalink / raw)
  To: Prashant Sreedharan, Michael Chan; +Cc: netdev, linux-kernel

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier <bpoirier@suse.de>

---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with
[10597.596155] tg3 0000:06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since "5640f76 net: use a per task frag allocator (v3.7-rc1)". I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh

F="/sys/kernel/debug/fail_page_alloc"

echo -1 > "$F/times"
echo 0 > "$F/verbose"
echo 0 > "$F/ignore-gfp-wait"
echo 1 > "$F/task-filter"
echo 100 > "$F/probability"

netperf -H 192.168.9.30 -l100 -t omni -- -d send &

n=$!

sleep 0.3
echo 1 > "/proc/$n/make-it-fail"
sleep 10

kill "$n"
---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index cb77ae9..81b3a57 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)		((tnapi)->tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)	max_t(u32, (tnapi)->tx_pending / 4, \
+					      MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K		2048
 #define TG3_TX_BD_DMA_MAX_4K		4096
 
-- 
1.8.4.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net v6 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS
  2014-09-05  1:30 [PATCH net v6 0/4] tg3: tx_pending fixes Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 1/4] tg3: Limit minimum tx queue wakeup threshold Benjamin Poirier
@ 2014-09-05  1:30 ` Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 3/4] tg3: Move tx queue stop logic to its own function Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug Benjamin Poirier
  3 siblings, 0 replies; 12+ messages in thread
From: Benjamin Poirier @ 2014-09-05  1:30 UTC (permalink / raw)
  To: Prashant Sreedharan, Michael Chan; +Cc: netdev, linux-kernel

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be > (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier <bpoirier@suse.de>

---

Changes v1->v2
Moved ahead in the series from 3/3 to 2/3, no functionnal change

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18
---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 81b3a57..c5061c3 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12331,7 +12331,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 	if ((ering->rx_pending > tp->rx_std_ring_mask) ||
 	    (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) ||
 	    (ering->tx_pending > TG3_TX_RING_SIZE - 1) ||
-	    (ering->tx_pending <= MAX_SKB_FRAGS) ||
+	    (ering->tx_pending <= MAX_SKB_FRAGS + 1) ||
 	    (tg3_flag(tp, TSO_BUG) &&
 	     (ering->tx_pending <= (MAX_SKB_FRAGS * 3))))
 		return -EINVAL;
-- 
1.8.4.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net v6 3/4] tg3: Move tx queue stop logic to its own function
  2014-09-05  1:30 [PATCH net v6 0/4] tg3: tx_pending fixes Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 1/4] tg3: Limit minimum tx queue wakeup threshold Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS Benjamin Poirier
@ 2014-09-05  1:30 ` Benjamin Poirier
  2014-09-05  1:30 ` [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug Benjamin Poirier
  3 siblings, 0 replies; 12+ messages in thread
From: Benjamin Poirier @ 2014-09-05  1:30 UTC (permalink / raw)
  To: Prashant Sreedharan, Michael Chan; +Cc: netdev, linux-kernel

It is duplicated. Also, the first instance in tg3_start_xmit() is racy.
Consider:

tg3_start_xmit()
	if budget <= ...
				tg3_tx()
					(free up the entire ring)
					tx_cons =
					smp_mb
					if queue_stopped and tx_avail, NO
		if !queue_stopped
			stop queue
		return NETDEV_TX_BUSY

... tx queue stopped forever

Signed-off-by: Benjamin Poirier <bpoirier@suse.de>

---

Changes v2->v3
* new patch to avoid repeatedly open coding this block in the next patch.

Changes v3->v4
* added a comment to clarify the return value, as suggested
* replaced the BUG_ON with netdev_err(). No need to be so dramatic, this
  situation will trigger a netdev watchdog anyways.
---
 drivers/net/ethernet/broadcom/tg3.c | 75 ++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index c5061c3..6e6b07c 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7831,6 +7831,35 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
+/* Returns true if the queue has been stopped. Note that it may have been
+ * restarted since.
+ */
+static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
+				      struct netdev_queue *txq,
+				      u32 stop_thresh, u32 wakeup_thresh)
+{
+	bool stopped = false;
+
+	if (unlikely(tg3_tx_avail(tnapi) <= stop_thresh)) {
+		if (!netif_tx_queue_stopped(txq)) {
+			stopped = true;
+			netif_tx_stop_queue(txq);
+			if (wakeup_thresh >= tnapi->tx_pending)
+				netdev_err(tnapi->tp->dev,
+					   "BUG! wakeup_thresh too large (%u >= %u)\n",
+					   wakeup_thresh, tnapi->tx_pending);
+		}
+		/* netif_tx_stop_queue() must be done before checking tx index
+		 * in tg3_tx_avail(), because in tg3_tx(), we update tx index
+		 * before checking for netif_tx_queue_stopped().
+		 */
+		smp_mb();
+		if (tg3_tx_avail(tnapi) > wakeup_thresh)
+			netif_tx_wake_queue(txq);
+	}
+	return stopped;
+}
+
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
  * indicated in tg3_tx_frag_set()
  */
@@ -7841,20 +7870,9 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 	u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3;
 
 	/* Estimate the number of fragments in the worst case */
-	if (unlikely(tg3_tx_avail(tnapi) <= frag_cnt_est)) {
-		netif_tx_stop_queue(txq);
-
-		/* netif_tx_stop_queue() must be done before checking
-		 * checking tx index in tg3_tx_avail() below, because in
-		 * tg3_tx(), we update tx index before checking for
-		 * netif_tx_queue_stopped().
-		 */
-		smp_mb();
-		if (tg3_tx_avail(tnapi) <= frag_cnt_est)
-			return NETDEV_TX_BUSY;
-
-		netif_tx_wake_queue(txq);
-	}
+	tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
+	if (netif_tx_queue_stopped(txq))
+		return NETDEV_TX_BUSY;
 
 	segs = skb_gso_segment(skb, tp->dev->features &
 				    ~(NETIF_F_TSO | NETIF_F_TSO6));
@@ -7902,16 +7920,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 	 * no IRQ context deadlocks to worry about either.  Rejoice!
 	 */
-	if (unlikely(budget <= (skb_shinfo(skb)->nr_frags + 1))) {
-		if (!netif_tx_queue_stopped(txq)) {
-			netif_tx_stop_queue(txq);
-
-			/* This is a hard error, log it. */
-			netdev_err(dev,
-				   "BUG! Tx Ring full when queue awake!\n");
-		}
-		return NETDEV_TX_BUSY;
+	if (tg3_maybe_stop_txq(tnapi, txq, skb_shinfo(skb)->nr_frags + 1,
+			       TG3_TX_WAKEUP_THRESH(tnapi))) {
+		/* This is a hard error, log it. */
+		netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
 	}
+	if (netif_tx_queue_stopped(txq))
+		return NETDEV_TX_BUSY;
 
 	entry = tnapi->tx_prod;
 	base_flags = 0;
@@ -8087,18 +8102,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tw32_tx_mbox(tnapi->prodmbox, entry);
 
 	tnapi->tx_prod = entry;
-	if (unlikely(tg3_tx_avail(tnapi) <= (MAX_SKB_FRAGS + 1))) {
-		netif_tx_stop_queue(txq);
-
-		/* netif_tx_stop_queue() must be done before checking
-		 * checking tx index in tg3_tx_avail() below, because in
-		 * tg3_tx(), we update tx index before checking for
-		 * netif_tx_queue_stopped().
-		 */
-		smp_mb();
-		if (tg3_tx_avail(tnapi) > TG3_TX_WAKEUP_THRESH(tnapi))
-			netif_tx_wake_queue(txq);
-	}
+	tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1,
+			   TG3_TX_WAKEUP_THRESH(tnapi));
 
 	mmiowb();
 	return NETDEV_TX_OK;
-- 
1.8.4.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-09-05  1:30 [PATCH net v6 0/4] tg3: tx_pending fixes Benjamin Poirier
                   ` (2 preceding siblings ...)
  2014-09-05  1:30 ` [PATCH net v6 3/4] tg3: Move tx queue stop logic to its own function Benjamin Poirier
@ 2014-09-05  1:30 ` Benjamin Poirier
  2014-09-05 23:35   ` Prashant Sreedharan
  3 siblings, 1 reply; 12+ messages in thread
From: Benjamin Poirier @ 2014-09-05  1:30 UTC (permalink / raw)
  To: Prashant Sreedharan, Michael Chan; +Cc: netdev, linux-kernel

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
"tx_pending <= (MAX_SKB_FRAGS * 3)" check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for <= MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, this patch changes tg3_set_ringparam() to ignore the requirements of
tg3_tso_bug(). Those requirements are instead checked in tg3_tso_bug() itself
and if there is not a sufficient number of descriptors available in the tx
queue, the skb is linearized.

This patch also removes the current scheme in tg3_tso_bug() where the number
of descriptors required to transmit an skb is estimated. Instead,
gso_segment() is called without _SG which yields predictable, linear skbs.

Signed-off-by: Benjamin Poirier <bpoirier@suse.de>

---

Changes v1->v2
* in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors
  per gso seg instead of only 1 as in v1
* in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise
  linearize some skbs as needed
* in tg3_start_xmit(), make the queue stop threshold a parameter, for the
  reason explained in the commit description

Changes v2->v3
* use tg3_maybe_stop_txq() instead of repeatedly open coding it
* add the requested tp->tx_dropped++ stat increase in tg3_tso_bug() if
  skb_linearize() fails and we must abort
* in the same code block, add an additional check to stop the queue with the
  default threshold. Otherwise, the netdev_err message at the start of
  __tg3_start_xmit() could be triggered when the next frame is transmitted.
  That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug()
  may have been using a stop_thresh=segs_remaining that is < MAX_SKB_FRAGS +
  1.

Changes v3->v4
* in tg3_set_ringparam(), make sure that wakeup_thresh does not end up being
  >= tx_pending. Identified by Prashant.

Changes v4->v5
* in tg3_set_ringparam(), use TG3_TX_WAKEUP_THRESH() and tp->txq_cnt instead
  of tp->irq_max. Identified by Prashant.

Changes v5->v6
* avoid changing gso_max_segs and making the tx queue wakeup threshold
  dynamic. Instead of stopping the queue when there are not enough descriptors
  available, the skb is linearized.

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending <= 135
---
 drivers/net/ethernet/broadcom/tg3.c | 59 ++++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 6e6b07c..a9787a1 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7830,6 +7830,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
 }
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *,
+				    u32);
 
 /* Returns true if the queue has been stopped. Note that it may have been
  * restarted since.
@@ -7866,27 +7868,38 @@ static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
 static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 		       struct netdev_queue *txq, struct sk_buff *skb)
 {
-	struct sk_buff *segs, *nskb;
-	u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3;
+	unsigned int segs_remaining = skb_shinfo(skb)->gso_segs;
 
-	/* Estimate the number of fragments in the worst case */
-	tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
-	if (netif_tx_queue_stopped(txq))
-		return NETDEV_TX_BUSY;
+	if (unlikely(tg3_tx_avail(tnapi) <= segs_remaining)) {
+		if (!skb_is_nonlinear(skb) || skb_linearize(skb))
+			goto tg3_tso_bug_drop;
+		tg3_start_xmit(skb, tp->dev);
+	} else {
+		struct sk_buff *segs, *nskb;
 
-	segs = skb_gso_segment(skb, tp->dev->features &
-				    ~(NETIF_F_TSO | NETIF_F_TSO6));
-	if (IS_ERR(segs) || !segs)
-		goto tg3_tso_bug_end;
+		segs = skb_gso_segment(skb, tp->dev->features &
+				       ~(NETIF_F_TSO | NETIF_F_TSO6 |
+					 NETIF_F_SG));
+		if (IS_ERR(segs) || !segs)
+			goto tg3_tso_bug_drop;
 
-	do {
-		nskb = segs;
-		segs = segs->next;
-		nskb->next = NULL;
-		tg3_start_xmit(nskb, tp->dev);
-	} while (segs);
+		do {
+			nskb = segs;
+			segs = segs->next;
+			nskb->next = NULL;
+			if (--segs_remaining)
+				__tg3_start_xmit(nskb, tp->dev, segs_remaining);
+			else
+				tg3_start_xmit(nskb, tp->dev);
+		} while (segs);
 
-tg3_tso_bug_end:
+		dev_kfree_skb_any(skb);
+	}
+
+	return NETDEV_TX_OK;
+
+tg3_tso_bug_drop:
+	tp->tx_dropped++;
 	dev_kfree_skb_any(skb);
 
 	return NETDEV_TX_OK;
@@ -7895,6 +7908,12 @@ tg3_tso_bug_end:
 /* hard_start_xmit for all devices */
 static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	return __tg3_start_xmit(skb, dev, MAX_SKB_FRAGS + 1);
+}
+
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev, u32 stop_thresh)
+{
 	struct tg3 *tp = netdev_priv(dev);
 	u32 len, entry, base_flags, mss, vlan = 0;
 	u32 budget;
@@ -8102,7 +8121,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tw32_tx_mbox(tnapi->prodmbox, entry);
 
 	tnapi->tx_prod = entry;
-	tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1,
+	tg3_maybe_stop_txq(tnapi, txq, stop_thresh,
 			   TG3_TX_WAKEUP_THRESH(tnapi));
 
 	mmiowb();
@@ -12336,9 +12355,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 	if ((ering->rx_pending > tp->rx_std_ring_mask) ||
 	    (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) ||
 	    (ering->tx_pending > TG3_TX_RING_SIZE - 1) ||
-	    (ering->tx_pending <= MAX_SKB_FRAGS + 1) ||
-	    (tg3_flag(tp, TSO_BUG) &&
-	     (ering->tx_pending <= (MAX_SKB_FRAGS * 3))))
+	    (ering->tx_pending <= MAX_SKB_FRAGS + 1))
 		return -EINVAL;
 
 	if (netif_running(dev)) {
-- 
1.8.4.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-09-05  1:30 ` [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug Benjamin Poirier
@ 2014-09-05 23:35   ` Prashant Sreedharan
  2014-09-06  0:03     ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Prashant Sreedharan @ 2014-09-05 23:35 UTC (permalink / raw)
  To: Benjamin Poirier; +Cc: Michael Chan, netdev, linux-kernel


>  static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
>  		       struct netdev_queue *txq, struct sk_buff *skb)
>  {
> -	struct sk_buff *segs, *nskb;
> -	u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3;
> +	unsigned int segs_remaining = skb_shinfo(skb)->gso_segs;
>  
> -	/* Estimate the number of fragments in the worst case */
> -	tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
> -	if (netif_tx_queue_stopped(txq))
> -		return NETDEV_TX_BUSY;
> +	if (unlikely(tg3_tx_avail(tnapi) <= segs_remaining)) {
> +		if (!skb_is_nonlinear(skb) || skb_linearize(skb))
> +			goto tg3_tso_bug_drop;
> +		tg3_start_xmit(skb, tp->dev);

fyi.. Initially the driver was doing a skb_copy()
(tigon3_dma_hwbug_workaround()) for LSO skb that met HW bug conditions
but users started reporting page allocation failures due to copying of
large LSO skbs. To avoid this Commit 4caab52eb102f1 (tg3: Prevent page
allocation failure during TSO workaround) changed the driver logic to do
skb_gso_segment() for LSO skbs that met the HW bug conditions. With
skb_linearize() we might end up again with memory allocation failures
for large LSO skbs though at a much less frequent level (ie when TX
queue is almost full). 

Also some of the tg3 supported chips like 5719, 57766 have dma_limits of
4k, 2k respectively so if the LSO skb that gets linearized has size more
than dma_limit then tg3_tx_frag_set() will consume more descriptors and
if budget becomes 0 in tg3_tx_frag_set() we end up calling tg3_tso_bug()
again and eventually dropping the skb, if descriptors do not get freed
still. Instead the skb can be dropped when we know we do not have enough
descriptors to handle skb for these chip versions.

> +	} else {
> +		struct sk_buff *segs, *nskb;
>  
> -	segs = skb_gso_segment(skb, tp->dev->features &
> -				    ~(NETIF_F_TSO | NETIF_F_TSO6));
> -	if (IS_ERR(segs) || !segs)
> -		goto tg3_tso_bug_end;
> +		segs = skb_gso_segment(skb, tp->dev->features &
> +				       ~(NETIF_F_TSO | NETIF_F_TSO6 |
> +					 NETIF_F_SG));
> +		if (IS_ERR(segs) || !segs)
> +			goto tg3_tso_bug_drop;
>  
> -	do {
> -		nskb = segs;
> -		segs = segs->next;
> -		nskb->next = NULL;
> -		tg3_start_xmit(nskb, tp->dev);
> -	} while (segs);
> +		do {
> +			nskb = segs;
> +			segs = segs->next;
> +			nskb->next = NULL;
> +			if (--segs_remaining)
> +				__tg3_start_xmit(nskb, tp->dev, segs_remaining);
> +			else
> +				tg3_start_xmit(nskb, tp->dev);
> +		} while (segs);
>  
> -tg3_tso_bug_end:
> +		dev_kfree_skb_any(skb);
> +	}
> +
> +	return NETDEV_TX_OK;
> +
> +tg3_tso_bug_drop:
> +	tp->tx_dropped++;
>  	dev_kfree_skb_any(skb);
>  
>  	return NETDEV_TX_OK;
> @@ -7895,6 +7908,12 @@ tg3_tso_bug_end:
>  /* hard_start_xmit for all devices */
>  static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
> +	return __tg3_start_xmit(skb, dev, MAX_SKB_FRAGS + 1);
> +}
> +
> +static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb,
> +				    struct net_device *dev, u32 stop_thresh)
> +{
>  	struct tg3 *tp = netdev_priv(dev);
>  	u32 len, entry, base_flags, mss, vlan = 0;
>  	u32 budget;
> @@ -8102,7 +8121,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  	tw32_tx_mbox(tnapi->prodmbox, entry);
>  
>  	tnapi->tx_prod = entry;
> -	tg3_maybe_stop_txq(tnapi, txq, MAX_SKB_FRAGS + 1,
> +	tg3_maybe_stop_txq(tnapi, txq, stop_thresh,
>  			   TG3_TX_WAKEUP_THRESH(tnapi));
>  
>  	mmiowb();
> @@ -12336,9 +12355,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
>  	if ((ering->rx_pending > tp->rx_std_ring_mask) ||
>  	    (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) ||
>  	    (ering->tx_pending > TG3_TX_RING_SIZE - 1) ||
> -	    (ering->tx_pending <= MAX_SKB_FRAGS + 1) ||
> -	    (tg3_flag(tp, TSO_BUG) &&
> -	     (ering->tx_pending <= (MAX_SKB_FRAGS * 3))))
> +	    (ering->tx_pending <= MAX_SKB_FRAGS + 1))
>  		return -EINVAL;
>  
>  	if (netif_running(dev)) {



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-09-05 23:35   ` Prashant Sreedharan
@ 2014-09-06  0:03     ` Eric Dumazet
  2014-09-06  0:13       ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2014-09-06  0:03 UTC (permalink / raw)
  To: Prashant Sreedharan; +Cc: Benjamin Poirier, Michael Chan, netdev, linux-kernel

On Fri, 2014-09-05 at 16:35 -0700, Prashant Sreedharan wrote:

> fyi.. Initially the driver was doing a skb_copy()
> (tigon3_dma_hwbug_workaround()) for LSO skb that met HW bug conditions
> but users started reporting page allocation failures due to copying of
> large LSO skbs. To avoid this Commit 4caab52eb102f1 (tg3: Prevent page
> allocation failure during TSO workaround) changed the driver logic to do
> skb_gso_segment() for LSO skbs that met the HW bug conditions. With
> skb_linearize() we might end up again with memory allocation failures
> for large LSO skbs though at a much less frequent level (ie when TX
> queue is almost full). 

Note that TCP stack has one skb collapse feature, currently limited in
allocation of linear skbs fitting a whole page.

Instead of this private helper (and pretty limited one btw), we could
add a core function, that would build skbs with order-0 fragments.

Instead of skb_linearize(), I guess many call sites could instead use
this new helper.

Because as you said, skb_linearize() of one 64KB GSO packet can ask
order-5 allocations, and this generally does not work reliably.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-09-06  0:03     ` Eric Dumazet
@ 2014-09-06  0:13       ` David Miller
  2014-09-06  4:39         ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2014-09-06  0:13 UTC (permalink / raw)
  To: eric.dumazet; +Cc: prashant, bpoirier, mchan, netdev, linux-kernel

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 05 Sep 2014 17:03:30 -0700

> Instead of this private helper (and pretty limited one btw), we could
> add a core function, that would build skbs with order-0 fragments.
> 
> Instead of skb_linearize(), I guess many call sites could instead use
> this new helper.
> 
> Because as you said, skb_linearize() of one 64KB GSO packet can ask
> order-5 allocations, and this generally does not work reliably.

xen-netback could make use of this helper too.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-09-06  0:13       ` David Miller
@ 2014-09-06  4:39         ` David Miller
  2014-10-01  3:14           ` Prashant
  0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2014-09-06  4:39 UTC (permalink / raw)
  To: eric.dumazet; +Cc: prashant, bpoirier, mchan, netdev, linux-kernel

From: David Miller <davem@davemloft.net>
Date: Fri, 05 Sep 2014 17:13:06 -0700 (PDT)

> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 05 Sep 2014 17:03:30 -0700
> 
>> Instead of this private helper (and pretty limited one btw), we could
>> add a core function, that would build skbs with order-0 fragments.
>> 
>> Instead of skb_linearize(), I guess many call sites could instead use
>> this new helper.
>> 
>> Because as you said, skb_linearize() of one 64KB GSO packet can ask
>> order-5 allocations, and this generally does not work reliably.
> 
> xen-netback could make use of this helper too.

I was curious what it might look like so I cobbled the following
completely untested patch together :-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index da1378a..eba0ad6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -955,6 +955,67 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 EXPORT_SYMBOL(skb_copy);
 
 /**
+ *	skb_copy_pskb	-	copy sk_buff into a paged skb
+ *	@oskb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Normalize a paged skb into one that maximally uses order
+ *	zero pages in it's fragment array.  This is used to canonicalize
+ *	spaghetti SKBs that use the page array inefficiently (f.e. only
+ *	one byte per page frag).
+ */
+
+struct sk_buff *skb_copy_pskb(const struct sk_buff *oskb, gfp_t gfp_mask)
+{
+	unsigned int data_len = oskb->data_len;
+	int offset, npages, i;
+	struct sk_buff *skb;
+
+	npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (npages > MAX_SKB_FRAGS)
+		return NULL;
+
+	skb = __alloc_skb(skb_end_offset(oskb), gfp_mask,
+			  skb_alloc_rx_flag(oskb), NUMA_NO_NODE);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, skb_headroom(oskb));
+	skb_put(skb, skb_headlen(oskb));
+	skb_copy_from_linear_data(oskb, skb->data, skb->len);
+
+	copy_skb_header(skb, oskb);
+
+	skb->truesize += data_len;
+	offset = skb_headlen(oskb);
+	for (i = 0; i < npages; i++) {
+		struct page *page = alloc_page(gfp_mask);
+		unsigned int chunk;
+		u8 *vaddr;
+
+		if (!page) {
+			kfree(skb);
+			skb = NULL;
+			break;
+		}
+
+		chunk = min_t(unsigned int, data_len, PAGE_SIZE);
+		skb_fill_page_desc(skb, i, page, 0, chunk);
+
+		vaddr = kmap_atomic(page);
+		skb_copy_bits(oskb, offset, vaddr, chunk);
+		kunmap_atomic(vaddr);
+
+		offset += chunk;
+		data_len -= chunk;
+		skb->data_len += chunk;
+	}
+
+	return skb;
+}
+EXPORT_SYMBOL(skb_copy_pskb);
+
+/**
  *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.
  *	@skb: buffer to copy
  *	@headroom: headroom of new skb

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-09-06  4:39         ` David Miller
@ 2014-10-01  3:14           ` Prashant
  2014-10-01  4:24             ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Prashant @ 2014-10-01  3:14 UTC (permalink / raw)
  To: David Miller, eric.dumazet; +Cc: bpoirier, mchan, netdev, linux-kernel



On 9/5/2014 9:39 PM, David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Fri, 05 Sep 2014 17:13:06 -0700 (PDT)
>
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Fri, 05 Sep 2014 17:03:30 -0700
>>
>>> Instead of this private helper (and pretty limited one btw), we could
>>> add a core function, that would build skbs with order-0 fragments.
>>>
>>> Instead of skb_linearize(), I guess many call sites could instead use
>>> this new helper.
>>>
>>> Because as you said, skb_linearize() of one 64KB GSO packet can ask
>>> order-5 allocations, and this generally does not work reliably.
>>
>> xen-netback could make use of this helper too.
>
> I was curious what it might look like so I cobbled the following
> completely untested patch together :-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index da1378a..eba0ad6 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -955,6 +955,67 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
>   EXPORT_SYMBOL(skb_copy);
>
>   /**
> + *	skb_copy_pskb	-	copy sk_buff into a paged skb
> + *	@oskb: buffer to copy
> + *	@gfp_mask: allocation priority
> + *
> + *	Normalize a paged skb into one that maximally uses order
> + *	zero pages in it's fragment array.  This is used to canonicalize
> + *	spaghetti SKBs that use the page array inefficiently (f.e. only
> + *	one byte per page frag).
> + */
> +
> +struct sk_buff *skb_copy_pskb(const struct sk_buff *oskb, gfp_t gfp_mask)
> +{
> +	unsigned int data_len = oskb->data_len;
> +	int offset, npages, i;
> +	struct sk_buff *skb;
> +
> +	npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
> +	if (npages > MAX_SKB_FRAGS)
> +		return NULL;
> +
> +	skb = __alloc_skb(skb_end_offset(oskb), gfp_mask,
> +			  skb_alloc_rx_flag(oskb), NUMA_NO_NODE);
> +	if (!skb)
> +		return NULL;
> +
> +	skb_reserve(skb, skb_headroom(oskb));
> +	skb_put(skb, skb_headlen(oskb));
> +	skb_copy_from_linear_data(oskb, skb->data, skb->len);
> +
> +	copy_skb_header(skb, oskb);
> +
> +	skb->truesize += data_len;
> +	offset = skb_headlen(oskb);
> +	for (i = 0; i < npages; i++) {
> +		struct page *page = alloc_page(gfp_mask);
> +		unsigned int chunk;
> +		u8 *vaddr;
> +
> +		if (!page) {
> +			kfree(skb);
> +			skb = NULL;
> +			break;
> +		}
> +
> +		chunk = min_t(unsigned int, data_len, PAGE_SIZE);
> +		skb_fill_page_desc(skb, i, page, 0, chunk);
> +
> +		vaddr = kmap_atomic(page);
> +		skb_copy_bits(oskb, offset, vaddr, chunk);
> +		kunmap_atomic(vaddr);
> +
> +		offset += chunk;
> +		data_len -= chunk;
> +		skb->data_len += chunk;
> +	}
> +
> +	return skb;
> +}
> +EXPORT_SYMBOL(skb_copy_pskb);
> +
> +/**
>    *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.
>    *	@skb: buffer to copy
>    *	@headroom: headroom of new skb
>
Sorry about the late reply, out of all the HW bug conditions checked in 
tg3_tx_frag_set() the most frequently hit condition is the short 8 byte 
dma bug, where the chip cannot handle TX descriptors whose data buffer 
is 8 bytes or less. Most of the LSO skb's given to the driver has their 
fragments filled upto PAGE_SIZE (expect the last fragment depending on 
skb->len). And if such a LSO skb's last fragment meets the 8 bytes HW 
bug condition the above routine will not help workaround this particular 
case.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-10-01  3:14           ` Prashant
@ 2014-10-01  4:24             ` Eric Dumazet
  2014-10-01 18:29               ` Prashant
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2014-10-01  4:24 UTC (permalink / raw)
  To: Prashant; +Cc: David Miller, bpoirier, mchan, netdev, linux-kernel

On Tue, 2014-09-30 at 20:14 -0700, Prashant wrote:

> Sorry about the late reply, out of all the HW bug conditions checked in 
> tg3_tx_frag_set() the most frequently hit condition is the short 8 byte 
> dma bug, where the chip cannot handle TX descriptors whose data buffer 
> is 8 bytes or less. Most of the LSO skb's given to the driver has their 
> fragments filled upto PAGE_SIZE (expect the last fragment depending on 
> skb->len). And if such a LSO skb's last fragment meets the 8 bytes HW 
> bug condition the above routine will not help workaround this particular 
> case.

Thats pretty easy to work around.

Say rebuilt skb has N frags (N > 1 given your description)

They are numbered 0, ... N-2, N-1

Instead of filling N-2 completely, fill it to PAGE_SIZE-8, so that last
frag has at least 8 bytes in it.

Also take a look at commit 2e4e44107176d552f8bb1bb76053e850e3809841
("net: add alloc_skb_with_frags() helper")




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
  2014-10-01  4:24             ` Eric Dumazet
@ 2014-10-01 18:29               ` Prashant
  0 siblings, 0 replies; 12+ messages in thread
From: Prashant @ 2014-10-01 18:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, bpoirier, mchan, netdev, linux-kernel



On 9/30/2014 9:24 PM, Eric Dumazet wrote:
> On Tue, 2014-09-30 at 20:14 -0700, Prashant wrote:
>
>> Sorry about the late reply, out of all the HW bug conditions checked in
>> tg3_tx_frag_set() the most frequently hit condition is the short 8 byte
>> dma bug, where the chip cannot handle TX descriptors whose data buffer
>> is 8 bytes or less. Most of the LSO skb's given to the driver has their
>> fragments filled upto PAGE_SIZE (expect the last fragment depending on
>> skb->len). And if such a LSO skb's last fragment meets the 8 bytes HW
>> bug condition the above routine will not help workaround this particular
>> case.
>
> Thats pretty easy to work around.
>
> Say rebuilt skb has N frags (N > 1 given your description)
>
> They are numbered 0, ... N-2, N-1
>
> Instead of filling N-2 completely, fill it to PAGE_SIZE-8, so that last
> frag has at least 8 bytes in it.

definitely it can be tweaked to match what is needed with additional 
workarounds.

>
> Also take a look at commit 2e4e44107176d552f8bb1bb76053e850e3809841
> ("net: add alloc_skb_with_frags() helper")
>
>
This helper is much modular/flexible than the initial proposed one. Thanks.
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-10-01 18:29 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-05  1:30 [PATCH net v6 0/4] tg3: tx_pending fixes Benjamin Poirier
2014-09-05  1:30 ` [PATCH net v6 1/4] tg3: Limit minimum tx queue wakeup threshold Benjamin Poirier
2014-09-05  1:30 ` [PATCH net v6 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS Benjamin Poirier
2014-09-05  1:30 ` [PATCH net v6 3/4] tg3: Move tx queue stop logic to its own function Benjamin Poirier
2014-09-05  1:30 ` [PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug Benjamin Poirier
2014-09-05 23:35   ` Prashant Sreedharan
2014-09-06  0:03     ` Eric Dumazet
2014-09-06  0:13       ` David Miller
2014-09-06  4:39         ` David Miller
2014-10-01  3:14           ` Prashant
2014-10-01  4:24             ` Eric Dumazet
2014-10-01 18:29               ` Prashant

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).