All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fugang Duan <b38611@freescale.com>
To: <davem@davemloft.net>
Cc: <netdev@vger.kernel.org>, <b38611@freescale.com>,
	<ezequiel.garcia@free-electrons.com>, <b20596@freescale.com>,
	<David.Laight@ACULAB.COM>, <eric.dumazet@gmail.com>
Subject: [PATCH v5 6/6] net: fec: Add software TSO support
Date: Thu, 12 Jun 2014 08:16:23 +0800	[thread overview]
Message-ID: <1402532183-14910-7-git-send-email-b38611@freescale.com> (raw)
In-Reply-To: <1402532183-14910-1-git-send-email-b38611@freescale.com>

Add software TSO support for FEC.
This feature allows to improve outbound throughput performance.

Tested on imx6dl sabresd board, running iperf tcp tests shows:
- 16.2% improvement comparing with FEC SG patch
- 82% improvement comparing with NO SG & TSO patch

$ ethtool -K eth0 tso on
$ iperf -c 10.192.242.167 -t 3 &
[  3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 3.0 sec   181 MBytes   506 Mbits/sec

During the testing, CPU loading is 30%.
Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the
performance with SW TSO is a milestone.

CC: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Laight <David.Laight@ACULAB.COM>
CC: Li Frank <B20596@freescale.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
---
 drivers/net/ethernet/freescale/fec.h      |    6 +
 drivers/net/ethernet/freescale/fec_main.c |  255 ++++++++++++++++++++++++++---
 2 files changed, 238 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index e7ce14d..671d080 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -299,6 +299,12 @@ struct fec_enet_private {
 	unsigned short bufdesc_size;
 	unsigned short tx_ring_size;
 	unsigned short rx_ring_size;
+	unsigned short tx_stop_threshold;
+	unsigned short tx_wake_threshold;
+
+	/* Software TSO */
+	char *tso_hdrs;
+	dma_addr_t tso_hdrs_dma;
 
 	struct	platform_device *pdev;
 
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6bf8d47..d76099d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -36,6 +36,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
+#include <net/tso.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #define FEC_PAUSE_FLAG_AUTONEG	0x1
 #define FEC_PAUSE_FLAG_ENABLE	0x2
 
+#define TSO_HEADER_SIZE		128
+/* Max number of allowed TCP segments for software TSO */
+#define FEC_MAX_TSO_SEGS	100
+#define FEC_MAX_SKB_DESCS	(FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+#define IS_TSO_HEADER(txq, addr) \
+	((addr >= txq->tso_hdrs_dma) && \
+	(addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+
 static int mii_cnt;
 
 static inline
@@ -439,8 +449,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
 	unsigned short buflen;
 	unsigned int estatus = 0;
 	unsigned int index;
+	int entries_free;
 	int ret;
 
+	entries_free = fec_enet_get_free_txdesc_num(fep);
+	if (entries_free < MAX_SKB_FRAGS + 1) {
+		dev_kfree_skb_any(skb);
+		if (net_ratelimit())
+			netdev_err(ndev, "NOT enough BD for SG!\n");
+		return NETDEV_TX_OK;
+	}
+
 	/* Protocol checksum off-load for TCP and UDP. */
 	if (fec_enet_clear_csum(skb, ndev)) {
 		dev_kfree_skb_any(skb);
@@ -535,35 +554,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
 	return 0;
 }
 
-static netdev_tx_t
-fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static int
+fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
+			struct bufdesc *bdp, int index, char *data,
+			int size, bool last_tcp, bool is_last)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	struct bufdesc *bdp;
-	unsigned short	status;
-	int entries_free;
-	int ret;
-
-	/* Fill in a Tx ring entry */
-	bdp = fep->cur_tx;
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(fep->pdev);
+	struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+	unsigned short status;
+	unsigned int estatus = 0;
 
 	status = bdp->cbd_sc;
+	status &= ~BD_ENET_TX_STATS;
 
-	if (status & BD_ENET_TX_READY) {
-		/* Ooops.  All transmit buffers are full.  Bail out.
-		 * This should not happen, since ndev->tbusy should be set.
-		 */
+	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+	bdp->cbd_datlen = size;
+
+	if (((unsigned long) data) & FEC_ALIGNMENT ||
+		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+		memcpy(fep->tx_bounce[index], data, size);
+		data = fep->tx_bounce[index];
+
+		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+			swap_buffer(data, size);
+	}
+
+	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
+					size, DMA_TO_DEVICE);
+	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+		dev_kfree_skb_any(skb);
 		if (net_ratelimit())
-			netdev_err(ndev, "tx queue full!\n");
+			netdev_err(ndev, "Tx DMA memory map failed\n");
 		return NETDEV_TX_BUSY;
 	}
 
-	ret = fec_enet_txq_submit_skb(skb, ndev);
+	if (fep->bufdesc_ex) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+		ebdp->cbd_bdu = 0;
+		ebdp->cbd_esc = estatus;
+	}
+
+	/* Handle the last BD specially */
+	if (last_tcp)
+		status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
+	if (is_last) {
+		status |= BD_ENET_TX_INTR;
+		if (fep->bufdesc_ex)
+			ebdp->cbd_esc |= BD_ENET_TX_INT;
+	}
+
+	bdp->cbd_sc = status;
+
+	return 0;
+}
+
+static int
+fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
+			struct bufdesc *bdp, int index)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(fep->pdev);
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+	void *bufaddr;
+	unsigned long dmabuf;
+	unsigned short status;
+	unsigned int estatus = 0;
+
+	status = bdp->cbd_sc;
+	status &= ~BD_ENET_TX_STATS;
+	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+
+	bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+	dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
+	if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+		memcpy(fep->tx_bounce[index], skb->data, hdr_len);
+		bufaddr = fep->tx_bounce[index];
+
+		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+			swap_buffer(bufaddr, hdr_len);
+
+		dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
+					hdr_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
+			dev_kfree_skb_any(skb);
+			if (net_ratelimit())
+				netdev_err(ndev, "Tx DMA memory map failed\n");
+			return NETDEV_TX_BUSY;
+		}
+	}
+
+	bdp->cbd_bufaddr = dmabuf;
+	bdp->cbd_datlen = hdr_len;
+
+	if (fep->bufdesc_ex) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+		ebdp->cbd_bdu = 0;
+		ebdp->cbd_esc = estatus;
+	}
+
+	bdp->cbd_sc = status;
+
+	return 0;
+}
+
+static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int total_len, data_left;
+	struct bufdesc *bdp = fep->cur_tx;
+	struct tso_t tso;
+	unsigned int index = 0;
+	int ret;
+
+	if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
+		dev_kfree_skb_any(skb);
+		if (net_ratelimit())
+			netdev_err(ndev, "NOT enough BD for TSO!\n");
+		return NETDEV_TX_OK;
+	}
+
+	/* Protocol checksum off-load for TCP and UDP. */
+	if (fec_enet_clear_csum(skb, ndev)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* Initialize the TSO handler, and prepare the first payload */
+	tso_start(skb, &tso);
+
+	total_len = skb->len - hdr_len;
+	while (total_len > 0) {
+		char *hdr;
+
+		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+		total_len -= data_left;
+
+		/* prepare packet headers: MAC + IP + TCP */
+		hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+		ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
+		if (ret)
+			goto err_release;
+
+		while (data_left > 0) {
+			int size;
+
+			size = min_t(int, tso.size, data_left);
+			bdp = fec_enet_get_nextdesc(bdp, fep);
+			index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+			ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
+							size, size == data_left,
+							total_len == 0);
+			if (ret)
+				goto err_release;
+
+			data_left -= size;
+			tso_build_data(skb, &tso, size);
+		}
+
+		bdp = fec_enet_get_nextdesc(bdp, fep);
+	}
+
+	/* Save skb pointer */
+	fep->tx_skbuff[index] = skb;
+
+	fec_enet_submit_work(bdp, fep);
+
+	skb_tx_timestamp(skb);
+	fep->cur_tx = bdp;
+
+	/* Trigger transmission start */
+	writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+
+	return 0;
+
+err_release:
+	/* TODO: Release all used data descriptors for TSO */
+	return ret;
+}
+
+static netdev_tx_t
+fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	int entries_free;
+	int ret;
+
+	if (skb_is_gso(skb))
+		ret = fec_enet_txq_submit_tso(skb, ndev);
+	else
+		ret = fec_enet_txq_submit_skb(skb, ndev);
 	if (ret)
 		return ret;
 
 	entries_free = fec_enet_get_free_txdesc_num(fep);
-	if (entries_free < MAX_SKB_FRAGS + 1)
+	if (entries_free <= fep->tx_stop_threshold)
 		netif_stop_queue(ndev);
 
 	return NETDEV_TX_OK;
@@ -884,7 +1078,7 @@ fec_enet_tx(struct net_device *ndev)
 	unsigned short status;
 	struct	sk_buff	*skb;
 	int	index = 0;
-	int	entries;
+	int	entries_free;
 
 	fep = netdev_priv(ndev);
 	bdp = fep->dirty_tx;
@@ -901,8 +1095,9 @@ fec_enet_tx(struct net_device *ndev)
 		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
 
 		skb = fep->tx_skbuff[index];
-		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
-				DMA_TO_DEVICE);
+		if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
+			dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+					bdp->cbd_datlen, DMA_TO_DEVICE);
 		bdp->cbd_bufaddr = 0;
 		if (!skb) {
 			bdp = fec_enet_get_nextdesc(bdp, fep);
@@ -963,9 +1158,11 @@ fec_enet_tx(struct net_device *ndev)
 
 		/* Since we have freed up a buffer, the ring is no longer full
 		 */
-		entries = fec_enet_get_free_txdesc_num(fep);
-		if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
-			netif_wake_queue(ndev);
+		if (netif_queue_stopped(ndev)) {
+			entries_free = fec_enet_get_free_txdesc_num(fep);
+			if (entries_free >= fep->tx_wake_threshold)
+				netif_wake_queue(ndev);
+		}
 	}
 	return;
 }
@@ -2167,6 +2364,9 @@ static int fec_enet_init(struct net_device *ndev)
 	fep->tx_ring_size = TX_RING_SIZE;
 	fep->rx_ring_size = RX_RING_SIZE;
 
+	fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
+	fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;
+
 	if (fep->bufdesc_ex)
 		fep->bufdesc_size = sizeof(struct bufdesc_ex);
 	else
@@ -2180,6 +2380,13 @@ static int fec_enet_init(struct net_device *ndev)
 	if (!cbd_base)
 		return -ENOMEM;
 
+	fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
+						&fep->tso_hdrs_dma, GFP_KERNEL);
+	if (!fep->tso_hdrs) {
+		dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
+		return -ENOMEM;
+	}
+
 	memset(cbd_base, 0, PAGE_SIZE);
 
 	fep->netdev = ndev;
@@ -2210,9 +2417,11 @@ static int fec_enet_init(struct net_device *ndev)
 		ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
 
 	if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
+		ndev->gso_max_segs = FEC_MAX_TSO_SEGS;
+
 		/* enable hw accelerator */
 		ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-				| NETIF_F_RXCSUM | NETIF_F_SG);
+				| NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
 		fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
 	}
 
-- 
1.7.8

  parent reply	other threads:[~2014-06-12  1:42 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-12  0:16 [PATCH v5 0/6] net: fec: Enable Software TSO to improve the tx performance Fugang Duan
2014-06-12  0:16 ` [PATCH v5 1/6] net: fec: Factorize the .xmit transmit function Fugang Duan
2014-06-12  0:16 ` [PATCH v5 2/6] net: fec: Enable IP header hardware checksum Fugang Duan
2014-06-12  0:16 ` [PATCH v5 3/6] net: fec: Factorize feature setting Fugang Duan
2014-06-12  0:16 ` [PATCH v5 4/6] net: fec: Increase buffer descriptor entry number Fugang Duan
2014-06-12  0:16 ` [PATCH v5 5/6] net: fec: Add Scatter/gather support Fugang Duan
2014-06-12  0:16 ` Fugang Duan [this message]
2014-06-12 18:02 ` [PATCH v5 0/6] net: fec: Enable Software TSO to improve the tx performance David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1402532183-14910-7-git-send-email-b38611@freescale.com \
    --to=b38611@freescale.com \
    --cc=David.Laight@ACULAB.COM \
    --cc=b20596@freescale.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=ezequiel.garcia@free-electrons.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.