All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/12] ftgmac100: Rework batch 3 - TX path
@ 2017-04-07  3:30 Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 01/12] ftgmac100: Add a tx timeout handler Benjamin Herrenschmidt
                   ` (11 more replies)
  0 siblings, 12 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev

This is the third batch of updates to the ftgmac100 driver.

This one tackles the TX path of the driver. This provides the
bulk of the performance improvements by adding support for
fragmented sends along with a bunch of cleanups.

Subsequent batches will add various features (ethtool functions,
vlan offlan, ...) and cleanups.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 01/12] ftgmac100: Add a tx timeout handler
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
@ 2017-04-07  3:30 ` Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around Benjamin Herrenschmidt
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

We have a reset task to reset our chip, use it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 9f18e5e..f4b719214 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1248,6 +1248,17 @@ static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int
 	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
 }
 
+static void ftgmac100_tx_timeout(struct net_device *netdev)
+{
+	struct ftgmac100 *priv = netdev_priv(netdev);
+
+	/* Disable all interrupts */
+	iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
+
+	/* Do the reset outside of interrupt context */
+	schedule_work(&priv->reset_task);
+}
+
 static const struct net_device_ops ftgmac100_netdev_ops = {
 	.ndo_open		= ftgmac100_open,
 	.ndo_stop		= ftgmac100_stop,
@@ -1255,6 +1266,7 @@ static const struct net_device_ops ftgmac100_netdev_ops = {
 	.ndo_set_mac_address	= ftgmac100_set_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_do_ioctl		= ftgmac100_do_ioctl,
+	.ndo_tx_timeout		= ftgmac100_tx_timeout,
 };
 
 static int ftgmac100_setup_mdio(struct net_device *netdev)
@@ -1359,6 +1371,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	netdev->ethtool_ops = &ftgmac100_ethtool_ops;
 	netdev->netdev_ops = &ftgmac100_netdev_ops;
+	netdev->watchdog_timeo = 5 * HZ;
 
 	platform_set_drvdata(pdev, netdev);
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 01/12] ftgmac100: Add a tx timeout handler Benjamin Herrenschmidt
@ 2017-04-07  3:30 ` Benjamin Herrenschmidt
  2017-04-07 10:09   ` Sergei Shtylyov
  2017-04-07 12:49   ` David Miller
  2017-04-07  3:30 ` [PATCH 03/12] ftgmac100: Merge ftgmac100_xmit() into ftgmac100_hard_start_xmit() Benjamin Herrenschmidt
                   ` (9 subsequent siblings)
  11 siblings, 2 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Move it below ftgmac100_xmit() and the rest of the tx path

No code change.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 59 ++++++++++++++++----------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index f4b719214..3966c7a 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -63,6 +63,7 @@ struct ftgmac100 {
 	u32 rxdes0_edorr_mask;
 
 	/* Tx ring */
+	struct sk_buff *tx_skbs[TX_QUEUE_ENTRIES];
 	unsigned int tx_clean_pointer;
 	unsigned int tx_pointer;
 	unsigned int tx_pending;
@@ -673,6 +674,35 @@ static int ftgmac100_xmit(struct ftgmac100 *priv, struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
+static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
+				     struct net_device *netdev)
+{
+	struct ftgmac100 *priv = netdev_priv(netdev);
+	dma_addr_t map;
+
+	if (unlikely(skb->len > MAX_PKT_SIZE)) {
+		if (net_ratelimit())
+			netdev_dbg(netdev, "tx packet too big\n");
+
+		netdev->stats.tx_dropped++;
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, map))) {
+		/* drop packet */
+		if (net_ratelimit())
+			netdev_err(netdev, "map socket buffer failed\n");
+
+		netdev->stats.tx_dropped++;
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	return ftgmac100_xmit(priv, skb, map);
+}
+
 static void ftgmac100_free_buffers(struct ftgmac100 *priv)
 {
 	int i;
@@ -1210,35 +1240,6 @@ static int ftgmac100_stop(struct net_device *netdev)
 	return 0;
 }
 
-static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
-				     struct net_device *netdev)
-{
-	struct ftgmac100 *priv = netdev_priv(netdev);
-	dma_addr_t map;
-
-	if (unlikely(skb->len > MAX_PKT_SIZE)) {
-		if (net_ratelimit())
-			netdev_dbg(netdev, "tx packet too big\n");
-
-		netdev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(priv->dev, map))) {
-		/* drop packet */
-		if (net_ratelimit())
-			netdev_err(netdev, "map socket buffer failed\n");
-
-		netdev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	return ftgmac100_xmit(priv, skb, map);
-}
-
 /* optional */
 static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 03/12] ftgmac100: Merge ftgmac100_xmit() into ftgmac100_hard_start_xmit()
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 01/12] ftgmac100: Add a tx timeout handler Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around Benjamin Herrenschmidt
@ 2017-04-07  3:30 ` Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 04/12] ftgmac100: Factor tx packet dropping path Benjamin Herrenschmidt
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

This will make subsequent rework of the tx path simpler

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 58 ++++++++++++++------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 3966c7a..6fa070a 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -628,12 +628,33 @@ static void ftgmac100_tx_complete(struct ftgmac100 *priv)
 		;
 }
 
-static int ftgmac100_xmit(struct ftgmac100 *priv, struct sk_buff *skb,
-			  dma_addr_t map)
+static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
+				     struct net_device *netdev)
 {
-	struct net_device *netdev = priv->netdev;
-	struct ftgmac100_txdes *txdes;
 	unsigned int len = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
+	struct ftgmac100 *priv = netdev_priv(netdev);
+	struct ftgmac100_txdes *txdes;
+	dma_addr_t map;
+
+	if (unlikely(skb->len > MAX_PKT_SIZE)) {
+		if (net_ratelimit())
+			netdev_dbg(netdev, "tx packet too big\n");
+
+		netdev->stats.tx_dropped++;
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, map))) {
+		/* drop packet */
+		if (net_ratelimit())
+			netdev_err(netdev, "map socket buffer failed\n");
+
+		netdev->stats.tx_dropped++;
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
 
 	txdes = ftgmac100_current_txdes(priv);
 	ftgmac100_tx_pointer_advance(priv);
@@ -674,35 +695,6 @@ static int ftgmac100_xmit(struct ftgmac100 *priv, struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
-				     struct net_device *netdev)
-{
-	struct ftgmac100 *priv = netdev_priv(netdev);
-	dma_addr_t map;
-
-	if (unlikely(skb->len > MAX_PKT_SIZE)) {
-		if (net_ratelimit())
-			netdev_dbg(netdev, "tx packet too big\n");
-
-		netdev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(priv->dev, map))) {
-		/* drop packet */
-		if (net_ratelimit())
-			netdev_err(netdev, "map socket buffer failed\n");
-
-		netdev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
-	return ftgmac100_xmit(priv, skb, map);
-}
-
 static void ftgmac100_free_buffers(struct ftgmac100 *priv)
 {
 	int i;
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 04/12] ftgmac100: Factor tx packet dropping path
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (2 preceding siblings ...)
  2017-04-07  3:30 ` [PATCH 03/12] ftgmac100: Merge ftgmac100_xmit() into ftgmac100_hard_start_xmit() Benjamin Herrenschmidt
@ 2017-04-07  3:30 ` Benjamin Herrenschmidt
  2017-04-07  3:30 ` [PATCH 05/12] ftgmac100: Pad small frames properly Benjamin Herrenschmidt
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Use a simple goto to a drop path at the tail of the function,
it will be used in a few more cases soon

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 6fa070a..6c71726 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -639,10 +639,7 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	if (unlikely(skb->len > MAX_PKT_SIZE)) {
 		if (net_ratelimit())
 			netdev_dbg(netdev, "tx packet too big\n");
-
-		netdev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
+		goto drop;
 	}
 
 	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
@@ -650,10 +647,7 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		/* drop packet */
 		if (net_ratelimit())
 			netdev_err(netdev, "map socket buffer failed\n");
-
-		netdev->stats.tx_dropped++;
-		kfree_skb(skb);
-		return NETDEV_TX_OK;
+		goto drop;
 	}
 
 	txdes = ftgmac100_current_txdes(priv);
@@ -693,6 +687,13 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	ftgmac100_txdma_normal_prio_start_polling(priv);
 
 	return NETDEV_TX_OK;
+
+ drop:
+	/* Drop the packet */
+	dev_kfree_skb_any(skb);
+	netdev->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
 }
 
 static void ftgmac100_free_buffers(struct ftgmac100 *priv)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 05/12] ftgmac100: Pad small frames properly
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (3 preceding siblings ...)
  2017-04-07  3:30 ` [PATCH 04/12] ftgmac100: Factor tx packet dropping path Benjamin Herrenschmidt
@ 2017-04-07  3:30 ` Benjamin Herrenschmidt
  2017-04-07 13:05   ` Florian Fainelli
  2017-04-07  3:30 ` [PATCH 06/12] ftgmac100: Store tx skbs in a separate array Benjamin Herrenschmidt
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Rather than just transmitting garbage past the end of the small
packet.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 6c71726..3f2172f 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -636,6 +636,13 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	struct ftgmac100_txdes *txdes;
 	dma_addr_t map;
 
+	/* The HW doesn't pad small frames */
+	if (skb_padto(skb, ETH_ZLEN) < 0) {
+		netdev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	/* Reject oversize packets */
 	if (unlikely(skb->len > MAX_PKT_SIZE)) {
 		if (net_ratelimit())
 			netdev_dbg(netdev, "tx packet too big\n");
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 06/12] ftgmac100: Store tx skbs in a separate array
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (4 preceding siblings ...)
  2017-04-07  3:30 ` [PATCH 05/12] ftgmac100: Pad small frames properly Benjamin Herrenschmidt
@ 2017-04-07  3:30 ` Benjamin Herrenschmidt
  2017-04-07  3:31 ` [PATCH 07/12] ftgmac100: Cleanup tx queue handling Benjamin Herrenschmidt
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:30 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Rather than in the descriptor. The descriptor is mapped non-cachable
and rather slow to access.

Since to do that we need to keep track of the tx "pointer" we also
have no use of all the accesors to manipulate it, just open code
it, it's as clear and will help when adding fragmented sends.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 58 +++++++++-----------------------
 1 file changed, 15 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 3f2172f..ff01dfb 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -544,63 +544,29 @@ static dma_addr_t ftgmac100_txdes_get_dma_addr(struct ftgmac100_txdes *txdes)
 	return le32_to_cpu(txdes->txdes3);
 }
 
-/*
- * txdes2 is not used by hardware. We use it to keep track of socket buffer.
- * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu().
- */
-static void ftgmac100_txdes_set_skb(struct ftgmac100_txdes *txdes,
-				    struct sk_buff *skb)
-{
-	txdes->txdes2 = (unsigned int)skb;
-}
-
-static struct sk_buff *ftgmac100_txdes_get_skb(struct ftgmac100_txdes *txdes)
-{
-	return (struct sk_buff *)txdes->txdes2;
-}
-
 static int ftgmac100_next_tx_pointer(int pointer)
 {
 	return (pointer + 1) & (TX_QUEUE_ENTRIES - 1);
 }
 
-static void ftgmac100_tx_pointer_advance(struct ftgmac100 *priv)
-{
-	priv->tx_pointer = ftgmac100_next_tx_pointer(priv->tx_pointer);
-}
-
-static void ftgmac100_tx_clean_pointer_advance(struct ftgmac100 *priv)
-{
-	priv->tx_clean_pointer = ftgmac100_next_tx_pointer(priv->tx_clean_pointer);
-}
-
-static struct ftgmac100_txdes *ftgmac100_current_txdes(struct ftgmac100 *priv)
-{
-	return &priv->descs->txdes[priv->tx_pointer];
-}
-
-static struct ftgmac100_txdes *
-ftgmac100_current_clean_txdes(struct ftgmac100 *priv)
-{
-	return &priv->descs->txdes[priv->tx_clean_pointer];
-}
-
 static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 {
 	struct net_device *netdev = priv->netdev;
 	struct ftgmac100_txdes *txdes;
+	unsigned int pointer;
 	struct sk_buff *skb;
 	dma_addr_t map;
 
 	if (priv->tx_pending == 0)
 		return false;
 
-	txdes = ftgmac100_current_clean_txdes(priv);
+	pointer = priv->tx_clean_pointer;
+	txdes = &priv->descs->txdes[pointer];
 
 	if (ftgmac100_txdes_owned_by_dma(txdes))
 		return false;
 
-	skb = ftgmac100_txdes_get_skb(txdes);
+	skb = priv->tx_skbs[pointer];
 	map = ftgmac100_txdes_get_dma_addr(txdes);
 
 	netdev->stats.tx_packets++;
@@ -609,10 +575,11 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 	dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
 
 	dev_kfree_skb(skb);
+	priv->tx_skbs[pointer] = NULL;
 
 	ftgmac100_txdes_reset(priv, txdes);
 
-	ftgmac100_tx_clean_pointer_advance(priv);
+	priv->tx_clean_pointer = ftgmac100_next_tx_pointer(pointer);
 
 	spin_lock(&priv->tx_lock);
 	priv->tx_pending--;
@@ -634,6 +601,7 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	unsigned int len = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
 	struct ftgmac100 *priv = netdev_priv(netdev);
 	struct ftgmac100_txdes *txdes;
+	unsigned int pointer;
 	dma_addr_t map;
 
 	/* The HW doesn't pad small frames */
@@ -657,11 +625,12 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		goto drop;
 	}
 
-	txdes = ftgmac100_current_txdes(priv);
-	ftgmac100_tx_pointer_advance(priv);
+	/* Grab the next free tx descriptor */
+	pointer = priv->tx_pointer;
+	txdes = &priv->descs->txdes[pointer];
 
 	/* setup TX descriptor */
-	ftgmac100_txdes_set_skb(txdes, skb);
+	priv->tx_skbs[pointer] = skb;
 	ftgmac100_txdes_set_dma_addr(txdes, map);
 	ftgmac100_txdes_set_buffer_size(txdes, len);
 
@@ -682,6 +651,9 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		}
 	}
 
+	/* Update next TX pointer */
+	priv->tx_pointer = ftgmac100_next_tx_pointer(pointer);
+
 	spin_lock(&priv->tx_lock);
 	priv->tx_pending++;
 	if (priv->tx_pending == TX_QUEUE_ENTRIES)
@@ -724,7 +696,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv)
 	/* Free all TX buffers */
 	for (i = 0; i < TX_QUEUE_ENTRIES; i++) {
 		struct ftgmac100_txdes *txdes = &priv->descs->txdes[i];
-		struct sk_buff *skb = ftgmac100_txdes_get_skb(txdes);
+		struct sk_buff *skb = priv->tx_skbs[i];
 		dma_addr_t map = ftgmac100_txdes_get_dma_addr(txdes);
 
 		if (!skb)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 07/12] ftgmac100: Cleanup tx queue handling
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (5 preceding siblings ...)
  2017-04-07  3:30 ` [PATCH 06/12] ftgmac100: Store tx skbs in a separate array Benjamin Herrenschmidt
@ 2017-04-07  3:31 ` Benjamin Herrenschmidt
  2017-04-07  3:31 ` [PATCH 08/12] ftgmac100: Move the barrier out of ftgmac100_txdes_set_dma_own() Benjamin Herrenschmidt
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:31 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

We have a private lock which isn't terribly useful, and we maintain
a "tx_pending" counter for information that's already available
via a trivial arithmetic operation. Then we unconditionaly wake
the queue even when not stopped. Finally our code in tx isn't
really safe vs. a concurrent reclaim. The aspeed chips aren't SMP
today but I prefer the code being right and future proof.

So rip that out and replace it with more "standard" queue handling,
currently with a threshold of 1 queue element, which will be
increased when we implement fragmented sends.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 103 ++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index ff01dfb..ccf0fcd 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -44,6 +44,9 @@
 #define MAX_PKT_SIZE		1536
 #define RX_BUF_SIZE		MAX_PKT_SIZE	/* must be smaller than 0x3fff */
 
+/* Min number of tx ring entries before stopping queue */
+#define TX_THRESHOLD		(1)
+
 struct ftgmac100_descs {
 	struct ftgmac100_rxdes rxdes[RX_QUEUE_ENTRIES];
 	struct ftgmac100_txdes txdes[TX_QUEUE_ENTRIES];
@@ -66,9 +69,7 @@ struct ftgmac100 {
 	struct sk_buff *tx_skbs[TX_QUEUE_ENTRIES];
 	unsigned int tx_clean_pointer;
 	unsigned int tx_pointer;
-	unsigned int tx_pending;
 	u32 txdes0_edotr_mask;
-	spinlock_t tx_lock;
 
 	/* Scratch page to use when rx skb alloc fails */
 	void *rx_scratch;
@@ -163,7 +164,6 @@ static int ftgmac100_reset_and_config_mac(struct ftgmac100 *priv)
 	priv->rx_pointer = 0;
 	priv->tx_clean_pointer = 0;
 	priv->tx_pointer = 0;
-	priv->tx_pending = 0;
 
 	/* The doc says reset twice with 10us interval */
 	if (ftgmac100_reset_mac(priv, maccr))
@@ -549,6 +549,23 @@ static int ftgmac100_next_tx_pointer(int pointer)
 	return (pointer + 1) & (TX_QUEUE_ENTRIES - 1);
 }
 
+static u32 ftgmac100_tx_buf_avail(struct ftgmac100 *priv)
+{
+	/* Returns the number of available slots in the TX queue
+	 *
+	 * This always leaves one free slot so we don't have to
+	 * worry about empty vs. full, and this simplifies the
+	 * test for ftgmac100_tx_buf_cleanable() below
+	 */
+	return (priv->tx_clean_pointer - priv->tx_pointer - 1) &
+		(TX_QUEUE_ENTRIES - 1);
+}
+
+static bool ftgmac100_tx_buf_cleanable(struct ftgmac100 *priv)
+{
+	return priv->tx_pointer != priv->tx_clean_pointer;
+}
+
 static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 {
 	struct net_device *netdev = priv->netdev;
@@ -557,9 +574,6 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 	struct sk_buff *skb;
 	dma_addr_t map;
 
-	if (priv->tx_pending == 0)
-		return false;
-
 	pointer = priv->tx_clean_pointer;
 	txdes = &priv->descs->txdes[pointer];
 
@@ -581,18 +595,31 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 
 	priv->tx_clean_pointer = ftgmac100_next_tx_pointer(pointer);
 
-	spin_lock(&priv->tx_lock);
-	priv->tx_pending--;
-	spin_unlock(&priv->tx_lock);
-	netif_wake_queue(netdev);
-
 	return true;
 }
 
 static void ftgmac100_tx_complete(struct ftgmac100 *priv)
 {
-	while (ftgmac100_tx_complete_packet(priv))
+	struct net_device *netdev = priv->netdev;
+
+	/* Process all completed packets */
+	while (ftgmac100_tx_buf_cleanable(priv) &&
+	       ftgmac100_tx_complete_packet(priv))
 		;
+
+	/* Restart queue if needed */
+	smp_mb();
+	if (unlikely(netif_queue_stopped(netdev) &&
+		     ftgmac100_tx_buf_avail(priv) >= TX_THRESHOLD)) {
+		struct netdev_queue *txq;
+
+		txq = netdev_get_tx_queue(netdev, 0);
+		__netif_tx_lock(txq, smp_processor_id());
+		if (netif_queue_stopped(netdev) &&
+		    ftgmac100_tx_buf_avail(priv) >= TX_THRESHOLD)
+			netif_wake_queue(netdev);
+		__netif_tx_unlock(txq);
+	}
 }
 
 static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
@@ -651,17 +678,22 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		}
 	}
 
+	ftgmac100_txdes_set_dma_own(txdes);
+
 	/* Update next TX pointer */
 	priv->tx_pointer = ftgmac100_next_tx_pointer(pointer);
 
-	spin_lock(&priv->tx_lock);
-	priv->tx_pending++;
-	if (priv->tx_pending == TX_QUEUE_ENTRIES)
+	/* If there isn't enough room for all the fragments of a new packet
+	 * in the TX ring, stop the queue. The sequence below is race free
+	 * vs. a concurrent restart in ftgmac100_poll()
+	 */
+	if (unlikely(ftgmac100_tx_buf_avail(priv) < TX_THRESHOLD)) {
 		netif_stop_queue(netdev);
-
-	/* start transmit */
-	ftgmac100_txdes_set_dma_own(txdes);
-	spin_unlock(&priv->tx_lock);
+		/* Order the queue stop with the test below */
+		smp_mb();
+		if (ftgmac100_tx_buf_avail(priv) >= TX_THRESHOLD)
+			netif_wake_queue(netdev);
+	}
 
 	ftgmac100_txdma_normal_prio_start_polling(priv);
 
@@ -979,17 +1011,17 @@ static bool ftgmac100_check_rx(struct ftgmac100 *priv)
 static int ftgmac100_poll(struct napi_struct *napi, int budget)
 {
 	struct ftgmac100 *priv = container_of(napi, struct ftgmac100, napi);
-	bool more, completed = true;
-	int rx = 0;
+	int work_done = 0;
+	bool more;
 
-	ftgmac100_tx_complete(priv);
+	/* Handle TX completions */
+	if (ftgmac100_tx_buf_cleanable(priv))
+		ftgmac100_tx_complete(priv);
 
+	/* Handle RX packets */
 	do {
-		more = ftgmac100_rx_packet(priv, &rx);
-	} while (more && rx < budget);
-
-	if (more && rx == budget)
-		completed = false;
+		more = ftgmac100_rx_packet(priv, &work_done);
+	} while (more && work_done < budget);
 
 
 	/* The interrupt is telling us to kick the MAC back to life
@@ -1003,11 +1035,13 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
 			  priv->base + FTGMAC100_OFFSET_IER);
 	}
 
-	/* Keep NAPI going if we have still packets to reclaim */
-	if (priv->tx_pending)
-		return budget;
+	/* As long as we are waiting for transmit packets to be
+	 * completed we keep NAPI going
+	 */
+	if (ftgmac100_tx_buf_cleanable(priv))
+		work_done = budget;
 
-	if (completed) {
+	if (work_done < budget) {
 		/* We are about to re-enable all interrupts. However
 		 * the HW has been latching RX/TX packet interrupts while
 		 * they were masked. So we clear them first, then we need
@@ -1015,7 +1049,8 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
 		 */
 		iowrite32(FTGMAC100_INT_RXTX,
 			  priv->base + FTGMAC100_OFFSET_ISR);
-		if (ftgmac100_check_rx(priv) || priv->tx_pending)
+		if (ftgmac100_check_rx(priv) ||
+		    ftgmac100_tx_buf_cleanable(priv))
 			return budget;
 
 		/* deschedule NAPI */
@@ -1026,7 +1061,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
 			  priv->base + FTGMAC100_OFFSET_IER);
 	}
 
-	return rx;
+	return work_done;
 }
 
 static int ftgmac100_init_all(struct ftgmac100 *priv, bool ignore_alloc_err)
@@ -1354,8 +1389,6 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	priv->dev = &pdev->dev;
 	INIT_WORK(&priv->reset_task, ftgmac100_reset_task);
 
-	spin_lock_init(&priv->tx_lock);
-
 	/* map io memory */
 	priv->res = request_mem_region(res->start, resource_size(res),
 				       dev_name(&pdev->dev));
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 08/12] ftgmac100: Move the barrier out of ftgmac100_txdes_set_dma_own()
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (6 preceding siblings ...)
  2017-04-07  3:31 ` [PATCH 07/12] ftgmac100: Cleanup tx queue handling Benjamin Herrenschmidt
@ 2017-04-07  3:31 ` Benjamin Herrenschmidt
  2017-04-07  3:31 ` [PATCH 09/12] ftgmac100: Split tx packet freeing from ftgmac100_tx_complete_packet() Benjamin Herrenschmidt
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:31 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

We'll use variants of this accessor without barriers when
building series of descriptors for fragmented sends

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index ccf0fcd..31fbb75 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -483,11 +483,6 @@ static bool ftgmac100_txdes_owned_by_dma(struct ftgmac100_txdes *txdes)
 
 static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes)
 {
-	/*
-	 * Make sure dma own bit will not be set before any other
-	 * descriptor fields.
-	 */
-	wmb();
 	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
 }
 
@@ -678,6 +673,10 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		}
 	}
 
+	/* Order the previous packet and descriptor udpates
+	 * before setting the OWN bit.
+	 */
+	dma_wmb();
 	ftgmac100_txdes_set_dma_own(txdes);
 
 	/* Update next TX pointer */
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 09/12] ftgmac100: Split tx packet freeing from ftgmac100_tx_complete_packet()
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (7 preceding siblings ...)
  2017-04-07  3:31 ` [PATCH 08/12] ftgmac100: Move the barrier out of ftgmac100_txdes_set_dma_own() Benjamin Herrenschmidt
@ 2017-04-07  3:31 ` Benjamin Herrenschmidt
  2017-04-07  3:31 ` [PATCH 10/12] ftgmac100: Don't clear tx desc fields unnecessarily Benjamin Herrenschmidt
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:31 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

This moves the packet freeing to a separate function
which is also used by ftgmac100_free_buffers() and will
be used more in the error path of fragmented sends.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 38 ++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 31fbb75..22edd01 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -561,13 +561,29 @@ static bool ftgmac100_tx_buf_cleanable(struct ftgmac100 *priv)
 	return priv->tx_pointer != priv->tx_clean_pointer;
 }
 
+static void ftgmac100_free_tx_packet(struct ftgmac100 *priv,
+				     unsigned int pointer,
+				     struct sk_buff *skb,
+				     struct ftgmac100_txdes *txdes)
+{
+	dma_addr_t map;
+
+	map = ftgmac100_txdes_get_dma_addr(txdes);
+
+	dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
+
+	dev_kfree_skb(skb);
+	priv->tx_skbs[pointer] = NULL;
+
+	ftgmac100_txdes_reset(priv, txdes);
+}
+
 static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 {
 	struct net_device *netdev = priv->netdev;
 	struct ftgmac100_txdes *txdes;
-	unsigned int pointer;
 	struct sk_buff *skb;
-	dma_addr_t map;
+	unsigned int pointer;
 
 	pointer = priv->tx_clean_pointer;
 	txdes = &priv->descs->txdes[pointer];
@@ -576,17 +592,9 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 		return false;
 
 	skb = priv->tx_skbs[pointer];
-	map = ftgmac100_txdes_get_dma_addr(txdes);
-
 	netdev->stats.tx_packets++;
 	netdev->stats.tx_bytes += skb->len;
-
-	dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
-
-	dev_kfree_skb(skb);
-	priv->tx_skbs[pointer] = NULL;
-
-	ftgmac100_txdes_reset(priv, txdes);
+	ftgmac100_free_tx_packet(priv, pointer, skb, txdes);
 
 	priv->tx_clean_pointer = ftgmac100_next_tx_pointer(pointer);
 
@@ -728,13 +736,9 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv)
 	for (i = 0; i < TX_QUEUE_ENTRIES; i++) {
 		struct ftgmac100_txdes *txdes = &priv->descs->txdes[i];
 		struct sk_buff *skb = priv->tx_skbs[i];
-		dma_addr_t map = ftgmac100_txdes_get_dma_addr(txdes);
-
-		if (!skb)
-			continue;
 
-		dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
-		kfree_skb(skb);
+		if (skb)
+			ftgmac100_free_tx_packet(priv, i, skb, txdes);
 	}
 }
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 10/12] ftgmac100: Don't clear tx desc fields unnecessarily
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (8 preceding siblings ...)
  2017-04-07  3:31 ` [PATCH 09/12] ftgmac100: Split tx packet freeing from ftgmac100_tx_complete_packet() Benjamin Herrenschmidt
@ 2017-04-07  3:31 ` Benjamin Herrenschmidt
  2017-04-07  3:31 ` [PATCH 11/12] ftgmac100: Add support for fragmented tx Benjamin Herrenschmidt
  2017-04-07  3:31 ` [PATCH 12/12] ftgmac100: Remove tx descriptor accessors Benjamin Herrenschmidt
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:31 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Those are non-cachable stores, let's avoid those we don't need. Remove
the helper, it's not particularly helpful and since it uses "priv"
I can't move it to the header file.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 22edd01..a68a7c4 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -466,16 +466,6 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
 	return true;
 }
 
-static void ftgmac100_txdes_reset(const struct ftgmac100 *priv,
-				  struct ftgmac100_txdes *txdes)
-{
-	/* clear all except end of ring bit */
-	txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask);
-	txdes->txdes1 = 0;
-	txdes->txdes2 = 0;
-	txdes->txdes3 = 0;
-}
-
 static bool ftgmac100_txdes_owned_by_dma(struct ftgmac100_txdes *txdes)
 {
 	return txdes->txdes0 & cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
@@ -575,7 +565,12 @@ static void ftgmac100_free_tx_packet(struct ftgmac100 *priv,
 	dev_kfree_skb(skb);
 	priv->tx_skbs[pointer] = NULL;
 
-	ftgmac100_txdes_reset(priv, txdes);
+	/* Clear txdes0 except end of ring bit, clear txdes1 as we
+	 * only "OR" into it, leave 2 and 3 alone as 2 is unused
+	 * and 3 will be overwritten entirely
+	 */
+	txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask);
+	txdes->txdes1 = 0;
 }
 
 static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 11/12] ftgmac100: Add support for fragmented tx
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (9 preceding siblings ...)
  2017-04-07  3:31 ` [PATCH 10/12] ftgmac100: Don't clear tx desc fields unnecessarily Benjamin Herrenschmidt
@ 2017-04-07  3:31 ` Benjamin Herrenschmidt
  2017-04-07 13:26   ` Florian Fainelli
  2017-04-07  3:31 ` [PATCH 12/12] ftgmac100: Remove tx descriptor accessors Benjamin Herrenschmidt
  11 siblings, 1 reply; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:31 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Add NETIF_F_SG and create multiple TX ring entries for skb fragments.

On reclaim, the skb is only freed on the segment marked as "last".

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

# Conflicts:
#	drivers/net/ethernet/faraday/ftgmac100.c
---
 drivers/net/ethernet/faraday/ftgmac100.c | 121 +++++++++++++++++++++++++------
 1 file changed, 97 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index a68a7c4..1496141 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -45,7 +45,7 @@
 #define RX_BUF_SIZE		MAX_PKT_SIZE	/* must be smaller than 0x3fff */
 
 /* Min number of tx ring entries before stopping queue */
-#define TX_THRESHOLD		(1)
+#define TX_THRESHOLD		(MAX_SKB_FRAGS + 1)
 
 struct ftgmac100_descs {
 	struct ftgmac100_rxdes rxdes[RX_QUEUE_ENTRIES];
@@ -487,20 +487,30 @@ static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes)
 	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_FTS);
 }
 
+static inline bool ftgmac100_txdes_get_first_segment(struct ftgmac100_txdes *txdes)
+{
+	return (txdes->txdes0 & cpu_to_le32(FTGMAC100_TXDES0_FTS)) != 0;
+}
+
 static void ftgmac100_txdes_set_last_segment(struct ftgmac100_txdes *txdes)
 {
 	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_LTS);
 }
 
+static inline bool ftgmac100_txdes_get_last_segment(struct ftgmac100_txdes *txdes)
+{
+	return (txdes->txdes0 & cpu_to_le32(FTGMAC100_TXDES0_LTS)) != 0;
+}
+
 static void ftgmac100_txdes_set_buffer_size(struct ftgmac100_txdes *txdes,
 					    unsigned int len)
 {
 	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXBUF_SIZE(len));
 }
 
-static void ftgmac100_txdes_set_txint(struct ftgmac100_txdes *txdes)
+static inline unsigned int ftgmac100_txdes_get_buffer_size(struct ftgmac100_txdes *txdes)
 {
-	txdes->txdes1 |= cpu_to_le32(FTGMAC100_TXDES1_TXIC);
+	return FTGMAC100_TXDES0_TXBUF_SIZE(cpu_to_le32(txdes->txdes0));
 }
 
 static void ftgmac100_txdes_set_tcpcs(struct ftgmac100_txdes *txdes)
@@ -526,7 +536,7 @@ static void ftgmac100_txdes_set_dma_addr(struct ftgmac100_txdes *txdes,
 
 static dma_addr_t ftgmac100_txdes_get_dma_addr(struct ftgmac100_txdes *txdes)
 {
-	return le32_to_cpu(txdes->txdes3);
+	return (dma_addr_t)le32_to_cpu(txdes->txdes3);
 }
 
 static int ftgmac100_next_tx_pointer(int pointer)
@@ -556,13 +566,22 @@ static void ftgmac100_free_tx_packet(struct ftgmac100 *priv,
 				     struct sk_buff *skb,
 				     struct ftgmac100_txdes *txdes)
 {
-	dma_addr_t map;
+	dma_addr_t map = ftgmac100_txdes_get_dma_addr(txdes);
 
-	map = ftgmac100_txdes_get_dma_addr(txdes);
+	if (ftgmac100_txdes_get_first_segment(txdes)) {
+		size_t len = skb_headlen(skb);
 
-	dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
+		if (skb_shinfo(skb)->nr_frags == 0 && len < ETH_ZLEN)
+			len = ETH_ZLEN;
+		dma_unmap_single(priv->dev, map, len, DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(priv->dev, map,
+			       ftgmac100_txdes_get_buffer_size(txdes),
+			       DMA_TO_DEVICE);
+	}
 
-	dev_kfree_skb(skb);
+	if (ftgmac100_txdes_get_last_segment(txdes))
+		dev_kfree_skb(skb);
 	priv->tx_skbs[pointer] = NULL;
 
 	/* Clear txdes0 except end of ring bit, clear txdes1 as we
@@ -623,10 +642,9 @@ static void ftgmac100_tx_complete(struct ftgmac100 *priv)
 static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 				     struct net_device *netdev)
 {
-	unsigned int len = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
 	struct ftgmac100 *priv = netdev_priv(netdev);
-	struct ftgmac100_txdes *txdes;
-	unsigned int pointer;
+	struct ftgmac100_txdes *txdes, *first;
+	unsigned int pointer, nfrags, len, i, j;
 	dma_addr_t map;
 
 	/* The HW doesn't pad small frames */
@@ -642,26 +660,35 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		goto drop;
 	}
 
-	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(priv->dev, map))) {
-		/* drop packet */
+	/* Do we have a limit on #fragments ? I yet have to get a reply
+	 * from Aspeed. If there's one I haven't hit it.
+	 */
+	nfrags = skb_shinfo(skb)->nr_frags;
+
+	/* Get header len and pad for non-fragmented packets */
+	len = skb_headlen(skb);
+	if (nfrags == 0 && len < ETH_ZLEN)
+		len = ETH_ZLEN;
+
+	/* Map the packet head */
+	map = dma_map_single(priv->dev, skb->data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(priv->dev, map)) {
 		if (net_ratelimit())
-			netdev_err(netdev, "map socket buffer failed\n");
+			netdev_err(netdev, "map tx packet head failed\n");
 		goto drop;
 	}
 
 	/* Grab the next free tx descriptor */
 	pointer = priv->tx_pointer;
-	txdes = &priv->descs->txdes[pointer];
+	txdes = first = &priv->descs->txdes[pointer];
 
-	/* setup TX descriptor */
+	/* Setup it up with the packet head. We don't set the OWN bit yet. */
 	priv->tx_skbs[pointer] = skb;
 	ftgmac100_txdes_set_dma_addr(txdes, map);
 	ftgmac100_txdes_set_buffer_size(txdes, len);
-
 	ftgmac100_txdes_set_first_segment(txdes);
-	ftgmac100_txdes_set_last_segment(txdes);
-	ftgmac100_txdes_set_txint(txdes);
+
+	/* Setup HW checksumming */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		__be16 protocol = skb->protocol;
 
@@ -676,14 +703,41 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		}
 	}
 
+	/* Next descriptor */
+	pointer = ftgmac100_next_tx_pointer(pointer);
+
+	/* Add the fragments */
+	for (i = 0; i < nfrags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		len = frag->size;
+
+		/* Map it */
+		map = skb_frag_dma_map(priv->dev, frag, 0, len,
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, map))
+			goto dma_err;
+
+		/* Setup descriptor */
+		priv->tx_skbs[pointer] = skb;
+		txdes = &priv->descs->txdes[pointer];
+		ftgmac100_txdes_set_dma_addr(txdes, map);
+		ftgmac100_txdes_set_buffer_size(txdes, len);
+		ftgmac100_txdes_set_dma_own(txdes);
+		pointer = ftgmac100_next_tx_pointer(pointer);
+	}
+
+	/* Tag last fragment */
+	ftgmac100_txdes_set_last_segment(txdes);
+
 	/* Order the previous packet and descriptor udpates
 	 * before setting the OWN bit.
 	 */
 	dma_wmb();
-	ftgmac100_txdes_set_dma_own(txdes);
+	ftgmac100_txdes_set_dma_own(first);
 
 	/* Update next TX pointer */
-	priv->tx_pointer = ftgmac100_next_tx_pointer(pointer);
+	priv->tx_pointer = pointer;
 
 	/* If there isn't enough room for all the fragments of a new packet
 	 * in the TX ring, stop the queue. The sequence below is race free
@@ -701,6 +755,25 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 
 	return NETDEV_TX_OK;
 
+ dma_err:
+	if (net_ratelimit())
+		netdev_err(netdev, "map tx fragment failed\n");
+
+	/* Free head */
+	pointer = priv->tx_pointer;
+	ftgmac100_free_tx_packet(priv, pointer, skb, first);
+
+	/* Then all fragments */
+	for (j = 0; j < i; j++) {
+		pointer = ftgmac100_next_tx_pointer(pointer);
+		txdes = &priv->descs->txdes[pointer];
+		ftgmac100_free_tx_packet(priv, pointer, skb, txdes);
+	}
+
+	/* This cannot be reached if we successfully mapped the
+	 * last fragment, so we know ftgmac100_free_tx_packet()
+	 * hasn't freed the skb yet.
+	 */
  drop:
 	/* Drop the packet */
 	dev_kfree_skb_any(skb);
@@ -1440,12 +1513,12 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	 * when NCSI is enabled on the interface. It doesn't work
 	 * in that case.
 	 */
-	netdev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_GRO;
+	netdev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
+		NETIF_F_GRO | NETIF_F_SG;
 	if (priv->use_ncsi &&
 	    of_get_property(pdev->dev.of_node, "no-hw-checksum", NULL))
 		netdev->features &= ~NETIF_F_IP_CSUM;
 
-
 	/* register network device */
 	err = register_netdev(netdev);
 	if (err) {
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 12/12] ftgmac100: Remove tx descriptor accessors
  2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
                   ` (10 preceding siblings ...)
  2017-04-07  3:31 ` [PATCH 11/12] ftgmac100: Add support for fragmented tx Benjamin Herrenschmidt
@ 2017-04-07  3:31 ` Benjamin Herrenschmidt
  11 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07  3:31 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin Herrenschmidt

Directly access the fields when needed. The accessors add clutter
not clarity and in some cases cause unnecessary read-modify-write
type access on the slow (uncached) descriptor memory.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 174 ++++++++++++-------------------
 drivers/net/ethernet/faraday/ftgmac100.h |   8 +-
 2 files changed, 69 insertions(+), 113 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 1496141..bea12a6 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -466,77 +466,13 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
 	return true;
 }
 
-static bool ftgmac100_txdes_owned_by_dma(struct ftgmac100_txdes *txdes)
+static u32 ftgmac100_base_tx_ctlstat(struct ftgmac100 *priv,
+				     unsigned int index)
 {
-	return txdes->txdes0 & cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
-}
-
-static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
-}
-
-static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv,
-					    struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask);
-}
-
-static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_FTS);
-}
-
-static inline bool ftgmac100_txdes_get_first_segment(struct ftgmac100_txdes *txdes)
-{
-	return (txdes->txdes0 & cpu_to_le32(FTGMAC100_TXDES0_FTS)) != 0;
-}
-
-static void ftgmac100_txdes_set_last_segment(struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_LTS);
-}
-
-static inline bool ftgmac100_txdes_get_last_segment(struct ftgmac100_txdes *txdes)
-{
-	return (txdes->txdes0 & cpu_to_le32(FTGMAC100_TXDES0_LTS)) != 0;
-}
-
-static void ftgmac100_txdes_set_buffer_size(struct ftgmac100_txdes *txdes,
-					    unsigned int len)
-{
-	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXBUF_SIZE(len));
-}
-
-static inline unsigned int ftgmac100_txdes_get_buffer_size(struct ftgmac100_txdes *txdes)
-{
-	return FTGMAC100_TXDES0_TXBUF_SIZE(cpu_to_le32(txdes->txdes0));
-}
-
-static void ftgmac100_txdes_set_tcpcs(struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes1 |= cpu_to_le32(FTGMAC100_TXDES1_TCP_CHKSUM);
-}
-
-static void ftgmac100_txdes_set_udpcs(struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes1 |= cpu_to_le32(FTGMAC100_TXDES1_UDP_CHKSUM);
-}
-
-static void ftgmac100_txdes_set_ipcs(struct ftgmac100_txdes *txdes)
-{
-	txdes->txdes1 |= cpu_to_le32(FTGMAC100_TXDES1_IP_CHKSUM);
-}
-
-static void ftgmac100_txdes_set_dma_addr(struct ftgmac100_txdes *txdes,
-					 dma_addr_t addr)
-{
-	txdes->txdes3 = cpu_to_le32(addr);
-}
-
-static dma_addr_t ftgmac100_txdes_get_dma_addr(struct ftgmac100_txdes *txdes)
-{
-	return (dma_addr_t)le32_to_cpu(txdes->txdes3);
+	if (index == (TX_QUEUE_ENTRIES - 1))
+		return priv->txdes0_edotr_mask;
+	else
+		return 0;
 }
 
 static int ftgmac100_next_tx_pointer(int pointer)
@@ -564,32 +500,28 @@ static bool ftgmac100_tx_buf_cleanable(struct ftgmac100 *priv)
 static void ftgmac100_free_tx_packet(struct ftgmac100 *priv,
 				     unsigned int pointer,
 				     struct sk_buff *skb,
-				     struct ftgmac100_txdes *txdes)
+				     struct ftgmac100_txdes *txdes,
+				     u32 ctl_stat)
 {
-	dma_addr_t map = ftgmac100_txdes_get_dma_addr(txdes);
+	dma_addr_t map = le32_to_cpu(txdes->txdes3);
+	size_t len;
 
-	if (ftgmac100_txdes_get_first_segment(txdes)) {
-		size_t len = skb_headlen(skb);
+	if (ctl_stat & FTGMAC100_TXDES0_FTS) {
+		len = skb_headlen(skb);
 
 		if (skb_shinfo(skb)->nr_frags == 0 && len < ETH_ZLEN)
 			len = ETH_ZLEN;
 		dma_unmap_single(priv->dev, map, len, DMA_TO_DEVICE);
 	} else {
-		dma_unmap_page(priv->dev, map,
-			       ftgmac100_txdes_get_buffer_size(txdes),
-			       DMA_TO_DEVICE);
+		len = FTGMAC100_TXDES0_TXBUF_SIZE(ctl_stat);
+
+		dma_unmap_page(priv->dev, map, len, DMA_TO_DEVICE);
 	}
 
-	if (ftgmac100_txdes_get_last_segment(txdes))
+	/* Free SKB on last segment */
+	if (ctl_stat & FTGMAC100_TXDES0_LTS)
 		dev_kfree_skb(skb);
 	priv->tx_skbs[pointer] = NULL;
-
-	/* Clear txdes0 except end of ring bit, clear txdes1 as we
-	 * only "OR" into it, leave 2 and 3 alone as 2 is unused
-	 * and 3 will be overwritten entirely
-	 */
-	txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask);
-	txdes->txdes1 = 0;
 }
 
 static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
@@ -598,17 +530,20 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 	struct ftgmac100_txdes *txdes;
 	struct sk_buff *skb;
 	unsigned int pointer;
+	u32 ctl_stat;
 
 	pointer = priv->tx_clean_pointer;
 	txdes = &priv->descs->txdes[pointer];
 
-	if (ftgmac100_txdes_owned_by_dma(txdes))
+	ctl_stat = le32_to_cpu(txdes->txdes0);
+	if (ctl_stat & FTGMAC100_TXDES0_TXDMA_OWN)
 		return false;
 
 	skb = priv->tx_skbs[pointer];
 	netdev->stats.tx_packets++;
 	netdev->stats.tx_bytes += skb->len;
-	ftgmac100_free_tx_packet(priv, pointer, skb, txdes);
+	ftgmac100_free_tx_packet(priv, pointer, skb, txdes, ctl_stat);
+	txdes->txdes0 = cpu_to_le32(ctl_stat & priv->txdes0_edotr_mask);
 
 	priv->tx_clean_pointer = ftgmac100_next_tx_pointer(pointer);
 
@@ -645,6 +580,7 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	struct ftgmac100 *priv = netdev_priv(netdev);
 	struct ftgmac100_txdes *txdes, *first;
 	unsigned int pointer, nfrags, len, i, j;
+	u32 f_ctl_stat, ctl_stat, csum_vlan;
 	dma_addr_t map;
 
 	/* The HW doesn't pad small frames */
@@ -682,26 +618,34 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	pointer = priv->tx_pointer;
 	txdes = first = &priv->descs->txdes[pointer];
 
-	/* Setup it up with the packet head. We don't set the OWN bit yet. */
+	/* Setup it up with the packet head. Don't write the head to the
+	 * ring just yet
+	 */
 	priv->tx_skbs[pointer] = skb;
-	ftgmac100_txdes_set_dma_addr(txdes, map);
-	ftgmac100_txdes_set_buffer_size(txdes, len);
-	ftgmac100_txdes_set_first_segment(txdes);
+	f_ctl_stat = ftgmac100_base_tx_ctlstat(priv, pointer);
+	f_ctl_stat |= FTGMAC100_TXDES0_TXDMA_OWN;
+	f_ctl_stat |= FTGMAC100_TXDES0_TXBUF_SIZE(len);
+	f_ctl_stat |= FTGMAC100_TXDES0_FTS;
+	if (nfrags == 0)
+		f_ctl_stat |= FTGMAC100_TXDES0_LTS;
+	txdes->txdes3 = cpu_to_le32(map);
 
 	/* Setup HW checksumming */
+	csum_vlan = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		__be16 protocol = skb->protocol;
 
 		if (protocol == cpu_to_be16(ETH_P_IP)) {
 			u8 ip_proto = ip_hdr(skb)->protocol;
 
-			ftgmac100_txdes_set_ipcs(txdes);
+			csum_vlan |= FTGMAC100_TXDES1_IP_CHKSUM;
 			if (ip_proto == IPPROTO_TCP)
-				ftgmac100_txdes_set_tcpcs(txdes);
+				csum_vlan |= FTGMAC100_TXDES1_TCP_CHKSUM;
 			else if (ip_proto == IPPROTO_UDP)
-				ftgmac100_txdes_set_udpcs(txdes);
+				csum_vlan |= FTGMAC100_TXDES1_UDP_CHKSUM;
 		}
 	}
+	txdes->txdes1 = cpu_to_le32(csum_vlan);
 
 	/* Next descriptor */
 	pointer = ftgmac100_next_tx_pointer(pointer);
@@ -721,20 +665,24 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 		/* Setup descriptor */
 		priv->tx_skbs[pointer] = skb;
 		txdes = &priv->descs->txdes[pointer];
-		ftgmac100_txdes_set_dma_addr(txdes, map);
-		ftgmac100_txdes_set_buffer_size(txdes, len);
-		ftgmac100_txdes_set_dma_own(txdes);
+		ctl_stat = ftgmac100_base_tx_ctlstat(priv, pointer);
+		ctl_stat |= FTGMAC100_TXDES0_TXDMA_OWN;
+		ctl_stat |= FTGMAC100_TXDES0_TXBUF_SIZE(len);
+		if (i == (nfrags - 1))
+			ctl_stat |= FTGMAC100_TXDES0_LTS;
+		txdes->txdes0 = cpu_to_le32(ctl_stat);
+		txdes->txdes1 = 0;
+		txdes->txdes3 = cpu_to_le32(map);
+
+		/* Next one */
 		pointer = ftgmac100_next_tx_pointer(pointer);
 	}
 
-	/* Tag last fragment */
-	ftgmac100_txdes_set_last_segment(txdes);
-
 	/* Order the previous packet and descriptor udpates
-	 * before setting the OWN bit.
+	 * before setting the OWN bit on the first descriptor.
 	 */
 	dma_wmb();
-	ftgmac100_txdes_set_dma_own(first);
+	first->txdes0 = cpu_to_le32(f_ctl_stat);
 
 	/* Update next TX pointer */
 	priv->tx_pointer = pointer;
@@ -761,13 +709,16 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
 
 	/* Free head */
 	pointer = priv->tx_pointer;
-	ftgmac100_free_tx_packet(priv, pointer, skb, first);
+	ftgmac100_free_tx_packet(priv, pointer, skb, first, f_ctl_stat);
+	first->txdes0 = cpu_to_le32(f_ctl_stat & priv->txdes0_edotr_mask);
 
 	/* Then all fragments */
 	for (j = 0; j < i; j++) {
 		pointer = ftgmac100_next_tx_pointer(pointer);
 		txdes = &priv->descs->txdes[pointer];
-		ftgmac100_free_tx_packet(priv, pointer, skb, txdes);
+		ctl_stat = le32_to_cpu(txdes->txdes0);
+		ftgmac100_free_tx_packet(priv, pointer, skb, txdes, ctl_stat);
+		txdes->txdes0 = cpu_to_le32(ctl_stat & priv->txdes0_edotr_mask);
 	}
 
 	/* This cannot be reached if we successfully mapped the
@@ -805,8 +756,10 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv)
 		struct ftgmac100_txdes *txdes = &priv->descs->txdes[i];
 		struct sk_buff *skb = priv->tx_skbs[i];
 
-		if (skb)
-			ftgmac100_free_tx_packet(priv, i, skb, txdes);
+		if (!skb)
+			continue;
+		ftgmac100_free_tx_packet(priv, i, skb, txdes,
+					 le32_to_cpu(txdes->txdes0));
 	}
 }
 
@@ -846,6 +799,7 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv)
 static void ftgmac100_init_rings(struct ftgmac100 *priv)
 {
 	struct ftgmac100_rxdes *rxdes;
+	struct ftgmac100_txdes *txdes;
 	int i;
 
 	/* Initialize RX ring */
@@ -858,9 +812,11 @@ static void ftgmac100_init_rings(struct ftgmac100 *priv)
 	rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask);
 
 	/* Initialize TX ring */
-	for (i = 0; i < TX_QUEUE_ENTRIES; i++)
-		priv->descs->txdes[i].txdes0 = 0;
-	ftgmac100_txdes_set_end_of_ring(priv, &priv->descs->txdes[i -1]);
+	for (i = 0; i < TX_QUEUE_ENTRIES; i++) {
+		txdes = &priv->descs->txdes[i];
+		txdes->txdes0 = 0;
+	}
+	txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask);
 }
 
 static int ftgmac100_alloc_rx_buffers(struct ftgmac100 *priv)
diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 9124785..97912c4 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h
@@ -202,10 +202,10 @@
  * Transmit descriptor, aligned to 16 bytes
  */
 struct ftgmac100_txdes {
-	unsigned int	txdes0;
-	unsigned int	txdes1;
-	unsigned int	txdes2;	/* not used by HW */
-	unsigned int	txdes3;	/* TXBUF_BADR */
+	__le32	txdes0; /* Control & status bits */
+	__le32	txdes1; /* Irq, checksum and vlan control */
+	__le32	txdes2; /* Reserved */
+	__le32	txdes3; /* DMA buffer address */
 } __attribute__ ((aligned(16)));
 
 #define FTGMAC100_TXDES0_TXBUF_SIZE(x)	((x) & 0x3fff)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around
  2017-04-07  3:30 ` [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around Benjamin Herrenschmidt
@ 2017-04-07 10:09   ` Sergei Shtylyov
  2017-04-07 23:14     ` Benjamin Herrenschmidt
  2017-04-07 12:49   ` David Miller
  1 sibling, 1 reply; 21+ messages in thread
From: Sergei Shtylyov @ 2017-04-07 10:09 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, netdev

Hello!

On 4/7/2017 6:30 AM, Benjamin Herrenschmidt wrote:

> Move it below ftgmac100_xmit() and the rest of the tx path
>
> No code change.
>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  drivers/net/ethernet/faraday/ftgmac100.c | 59 ++++++++++++++++----------------
>  1 file changed, 30 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
> index f4b719214..3966c7a 100644
> --- a/drivers/net/ethernet/faraday/ftgmac100.c
> +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> @@ -63,6 +63,7 @@ struct ftgmac100 {
>  	u32 rxdes0_edorr_mask;
>
>  	/* Tx ring */
> +	struct sk_buff *tx_skbs[TX_QUEUE_ENTRIES];

    "No code change", really?

>  	unsigned int tx_clean_pointer;
>  	unsigned int tx_pointer;
>  	unsigned int tx_pending;
[...]

MBR, Sergei

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around
  2017-04-07  3:30 ` [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around Benjamin Herrenschmidt
  2017-04-07 10:09   ` Sergei Shtylyov
@ 2017-04-07 12:49   ` David Miller
  2017-04-07 23:19     ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 21+ messages in thread
From: David Miller @ 2017-04-07 12:49 UTC (permalink / raw)
  To: benh; +Cc: netdev

From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri,  7 Apr 2017 13:30:55 +1000

> Move it below ftgmac100_xmit() and the rest of the tx path
> 
> No code change.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  drivers/net/ethernet/faraday/ftgmac100.c | 59 ++++++++++++++++----------------
>  1 file changed, 30 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
> index f4b719214..3966c7a 100644
> --- a/drivers/net/ethernet/faraday/ftgmac100.c
> +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> @@ -63,6 +63,7 @@ struct ftgmac100 {
>  	u32 rxdes0_edorr_mask;
>  
>  	/* Tx ring */
> +	struct sk_buff *tx_skbs[TX_QUEUE_ENTRIES];
>  	unsigned int tx_clean_pointer;
>  	unsigned int tx_pointer;
>  	unsigned int tx_pending;

Add this only in patch #6 or wherever it is that you start using it, not
this patch

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 05/12] ftgmac100: Pad small frames properly
  2017-04-07  3:30 ` [PATCH 05/12] ftgmac100: Pad small frames properly Benjamin Herrenschmidt
@ 2017-04-07 13:05   ` Florian Fainelli
  2017-04-07 23:15     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 21+ messages in thread
From: Florian Fainelli @ 2017-04-07 13:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, netdev



On 04/06/2017 08:30 PM, Benjamin Herrenschmidt wrote:
> Rather than just transmitting garbage past the end of the small
> packet.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  drivers/net/ethernet/faraday/ftgmac100.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
> index 6c71726..3f2172f 100644
> --- a/drivers/net/ethernet/faraday/ftgmac100.c
> +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> @@ -636,6 +636,13 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
>  	struct ftgmac100_txdes *txdes;
>  	dma_addr_t map;
>  
> +	/* The HW doesn't pad small frames */
> +	if (skb_padto(skb, ETH_ZLEN) < 0) {
> +		netdev->stats.tx_dropped++;
> +		return NETDEV_TX_OK;
> +	}

You may want to use skb_put_padto() which also takes care of bumping
skb->len accordingly, in case that makes a difference for the ftgmac100
hardware.
-- 
Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 11/12] ftgmac100: Add support for fragmented tx
  2017-04-07  3:31 ` [PATCH 11/12] ftgmac100: Add support for fragmented tx Benjamin Herrenschmidt
@ 2017-04-07 13:26   ` Florian Fainelli
  2017-04-07 23:19     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 21+ messages in thread
From: Florian Fainelli @ 2017-04-07 13:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, netdev



On 04/06/2017 08:31 PM, Benjamin Herrenschmidt wrote:
> Add NETIF_F_SG and create multiple TX ring entries for skb fragments.
> 
> On reclaim, the skb is only freed on the segment marked as "last".
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
[snip]
>  
> -	dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
> +		if (skb_shinfo(skb)->nr_frags == 0 && len < ETH_ZLEN)
> +			len = ETH_ZLEN;

This is where skb_put_padto() would help you eliminate this test since
you'd be dealing skb->len >= ETH_ZLEN.

> +		dma_unmap_single(priv->dev, map, len, DMA_TO_DEVICE);
> +	} else {
> +		dma_unmap_page(priv->dev, map,
> +			       ftgmac100_txdes_get_buffer_size(txdes),
> +			       DMA_TO_DEVICE);
> +	}
>  
> -	dev_kfree_skb(skb);
> +	if (ftgmac100_txdes_get_last_segment(txdes))
> +		dev_kfree_skb(skb);

This makes you do an uncached access to the descriptor, right? is there
a way you could use bookeeping information to free the last fragment?

>  	priv->tx_skbs[pointer] = NULL;
>  
>  	/* Clear txdes0 except end of ring bit, clear txdes1 as we
> @@ -623,10 +642,9 @@ static void ftgmac100_tx_complete(struct ftgmac100 *priv)
>  static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
>  				     struct net_device *netdev)
>  {
> -	unsigned int len = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
>  	struct ftgmac100 *priv = netdev_priv(netdev);
> -	struct ftgmac100_txdes *txdes;
> -	unsigned int pointer;
> +	struct ftgmac100_txdes *txdes, *first;
> +	unsigned int pointer, nfrags, len, i, j;
>  	dma_addr_t map;
>  
>  	/* The HW doesn't pad small frames */
> @@ -642,26 +660,35 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
>  		goto drop;
>  	}
>  
> -	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
> -	if (unlikely(dma_mapping_error(priv->dev, map))) {
> -		/* drop packet */
> +	/* Do we have a limit on #fragments ? I yet have to get a reply
> +	 * from Aspeed. If there's one I haven't hit it.
> +	 */
> +	nfrags = skb_shinfo(skb)->nr_frags;
> +
> +	/* Get header len and pad for non-fragmented packets */
> +	len = skb_headlen(skb);
> +	if (nfrags == 0 && len < ETH_ZLEN)
> +		len = ETH_ZLEN;

Same here skb_put_padto() would eliminate the test.

[snip]

>  
> + dma_err:
> +	if (net_ratelimit())
> +		netdev_err(netdev, "map tx fragment failed\n");

You may consider adding a software counter that tracks mapping failures
(few drivers do that) in a subsequent set of changes.
-- 
Florian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around
  2017-04-07 10:09   ` Sergei Shtylyov
@ 2017-04-07 23:14     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07 23:14 UTC (permalink / raw)
  To: Sergei Shtylyov, netdev

On Fri, 2017-04-07 at 13:09 +0300, Sergei Shtylyov wrote:
> Hello!
> 
> On 4/7/2017 6:30 AM, Benjamin Herrenschmidt wrote:
> 
> > Move it below ftgmac100_xmit() and the rest of the tx path
> > 
> > No code change.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > ---
> >  drivers/net/ethernet/faraday/ftgmac100.c | 59 ++++++++++++++++--
> > --------------
> >  1 file changed, 30 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/faraday/ftgmac100.c
> > b/drivers/net/ethernet/faraday/ftgmac100.c
> > index f4b719214..3966c7a 100644
> > --- a/drivers/net/ethernet/faraday/ftgmac100.c
> > +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> > @@ -63,6 +63,7 @@ struct ftgmac100 {
> >  	u32 rxdes0_edorr_mask;
> > 
> >  	/* Tx ring */
> > +	struct sk_buff *tx_skbs[TX_QUEUE_ENTRIES];
> 
>     "No code change", really?

Patch splitting accident, thanks for pointing it out. I'll fix it up in
the next round.

> 
> >  	unsigned int tx_clean_pointer;
> >  	unsigned int tx_pointer;
> >  	unsigned int tx_pending;
> 
> [...]
> 
> MBR, Sergei

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 05/12] ftgmac100: Pad small frames properly
  2017-04-07 13:05   ` Florian Fainelli
@ 2017-04-07 23:15     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07 23:15 UTC (permalink / raw)
  To: Florian Fainelli, netdev

On Fri, 2017-04-07 at 06:05 -0700, Florian Fainelli wrote:
> You may want to use skb_put_padto() which also takes care of bumping
> skb->len accordingly, in case that makes a difference for the
> ftgmac100 hardware.

Subsequent patch adds fragments, so it needs to bump headlen. I'll have
a look this week-end, thanks.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 11/12] ftgmac100: Add support for fragmented tx
  2017-04-07 13:26   ` Florian Fainelli
@ 2017-04-07 23:19     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07 23:19 UTC (permalink / raw)
  To: Florian Fainelli, netdev

On Fri, 2017-04-07 at 06:26 -0700, Florian Fainelli wrote:
> 
> On 04/06/2017 08:31 PM, Benjamin Herrenschmidt wrote:
> > Add NETIF_F_SG and create multiple TX ring entries for skb fragments.
> > 
> > On reclaim, the skb is only freed on the segment marked as "last".
> > 
> > > > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > 
> 
> [snip]
> >  
> > > > -	dma_unmap_single(priv->dev, map, skb_headlen(skb), DMA_TO_DEVICE);
> > > > +		if (skb_shinfo(skb)->nr_frags == 0 && len < ETH_ZLEN)
> > +			len = ETH_ZLEN;
> 
> This is where skb_put_padto() would help you eliminate this test since
> you'd be dealing skb->len >= ETH_ZLEN.

Ok, thanks.

> > +		dma_unmap_single(priv->dev, map, len, DMA_TO_DEVICE);
> > > > +	} else {
> > > > +		dma_unmap_page(priv->dev, map,
> > > > +			       ftgmac100_txdes_get_buffer_size(txdes),
> > > > +			       DMA_TO_DEVICE);
> > > > +	}
> >  
> > > > -	dev_kfree_skb(skb);
> > > > +	if (ftgmac100_txdes_get_last_segment(txdes))
> > +		dev_kfree_skb(skb);
> 
> This makes you do an uncached access to the descriptor, right? is there
> a way you could use bookeeping information to free the last fragment?

Not a big deal, it's handled in a subsequent patch. I have to read the
descriptor first word anyway to know the packet has been completed, 
a further patch I just pass that info along to ftgmac100_free_tx_packet

> >  	priv->tx_skbs[pointer] = NULL;
> >  
> > > >  	/* Clear txdes0 except end of ring bit, clear txdes1 as we
> > @@ -623,10 +642,9 @@ static void ftgmac100_tx_complete(struct ftgmac100 *priv)
> >  static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
> > > >  				     struct net_device *netdev)
> >  {
> > > > -	unsigned int len = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len;
> > > >  	struct ftgmac100 *priv = netdev_priv(netdev);
> > > > -	struct ftgmac100_txdes *txdes;
> > > > -	unsigned int pointer;
> > > > +	struct ftgmac100_txdes *txdes, *first;
> > > > +	unsigned int pointer, nfrags, len, i, j;
> > > >  	dma_addr_t map;
> >  
> > > >  	/* The HW doesn't pad small frames */
> > @@ -642,26 +660,35 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
> > > >  		goto drop;
> > > >  	}
> >  
> > > > -	map = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
> > > > -	if (unlikely(dma_mapping_error(priv->dev, map))) {
> > > > -		/* drop packet */
> > > > +	/* Do we have a limit on #fragments ? I yet have to get a reply
> > > > +	 * from Aspeed. If there's one I haven't hit it.
> > > > +	 */
> > > > +	nfrags = skb_shinfo(skb)->nr_frags;
> > +
> > > > +	/* Get header len and pad for non-fragmented packets */
> > > > +	len = skb_headlen(skb);
> > > > +	if (nfrags == 0 && len < ETH_ZLEN)
> > +		len = ETH_ZLEN;
> 
> Same here skb_put_padto() would eliminate the test.

Yup, I'll fix that, thx.

> [snip]
> 
> >  
> > + dma_err:
> > > > +	if (net_ratelimit())
> > +		netdev_err(netdev, "map tx fragment failed\n");
> 
> You may consider adding a software counter that tracks mapping failures
> (few drivers do that) in a subsequent set of changes.

Ok. I want to add a bunch of SW counters for other things too so
I'll add to the list.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around
  2017-04-07 12:49   ` David Miller
@ 2017-04-07 23:19     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 21+ messages in thread
From: Benjamin Herrenschmidt @ 2017-04-07 23:19 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Fri, 2017-04-07 at 05:49 -0700, David Miller wrote:
> >        /* Tx ring */
> > +     struct sk_buff *tx_skbs[TX_QUEUE_ENTRIES];
> >        unsigned int tx_clean_pointer;
> >        unsigned int tx_pointer;
> >        unsigned int tx_pending;
> 
> Add this only in patch #6 or wherever it is that you start using it,
> not this patch

Yup, just an accident, funny that I didn't notice it (I did review my
own series ;-)

Thanks !

Ben.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-04-07 23:20 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-07  3:30 [PATCH 00/12] ftgmac100: Rework batch 3 - TX path Benjamin Herrenschmidt
2017-04-07  3:30 ` [PATCH 01/12] ftgmac100: Add a tx timeout handler Benjamin Herrenschmidt
2017-04-07  3:30 ` [PATCH 02/12] ftgmac100: Move ftgmac100_hard_start_xmit() around Benjamin Herrenschmidt
2017-04-07 10:09   ` Sergei Shtylyov
2017-04-07 23:14     ` Benjamin Herrenschmidt
2017-04-07 12:49   ` David Miller
2017-04-07 23:19     ` Benjamin Herrenschmidt
2017-04-07  3:30 ` [PATCH 03/12] ftgmac100: Merge ftgmac100_xmit() into ftgmac100_hard_start_xmit() Benjamin Herrenschmidt
2017-04-07  3:30 ` [PATCH 04/12] ftgmac100: Factor tx packet dropping path Benjamin Herrenschmidt
2017-04-07  3:30 ` [PATCH 05/12] ftgmac100: Pad small frames properly Benjamin Herrenschmidt
2017-04-07 13:05   ` Florian Fainelli
2017-04-07 23:15     ` Benjamin Herrenschmidt
2017-04-07  3:30 ` [PATCH 06/12] ftgmac100: Store tx skbs in a separate array Benjamin Herrenschmidt
2017-04-07  3:31 ` [PATCH 07/12] ftgmac100: Cleanup tx queue handling Benjamin Herrenschmidt
2017-04-07  3:31 ` [PATCH 08/12] ftgmac100: Move the barrier out of ftgmac100_txdes_set_dma_own() Benjamin Herrenschmidt
2017-04-07  3:31 ` [PATCH 09/12] ftgmac100: Split tx packet freeing from ftgmac100_tx_complete_packet() Benjamin Herrenschmidt
2017-04-07  3:31 ` [PATCH 10/12] ftgmac100: Don't clear tx desc fields unnecessarily Benjamin Herrenschmidt
2017-04-07  3:31 ` [PATCH 11/12] ftgmac100: Add support for fragmented tx Benjamin Herrenschmidt
2017-04-07 13:26   ` Florian Fainelli
2017-04-07 23:19     ` Benjamin Herrenschmidt
2017-04-07  3:31 ` [PATCH 12/12] ftgmac100: Remove tx descriptor accessors Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.