All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/7] net: hns3: updates for -next
@ 2021-06-16  6:36 Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 1/7] net: hns3: minor refactor related to desc_cb handling Guangbin Huang
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

This series includes some optimization in IO path for the HNS3 ethernet
driver.

Huazhong Tan (1):
  net: hns3: add support to query tx spare buffer size for pf

Yunsheng Lin (6):
  net: hns3: minor refactor related to desc_cb handling
  net: hns3: refactor for hns3_fill_desc() function
  net: hns3: use tx bounce buffer for small packets
  net: hns3: support dma_map_sg() for multi frags skb
  net: hns3: optimize the rx page reuse handling process
  net: hns3: use bounce buffer when rx page can not be reused

 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |   8 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c |  54 ++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 575 ++++++++++++++++++---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  58 ++-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  66 +++
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |   2 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    |  14 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |   2 +
 8 files changed, 680 insertions(+), 99 deletions(-)

-- 
2.8.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next 1/7] net: hns3: minor refactor related to desc_cb handling
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 2/7] net: hns3: refactor for hns3_fill_desc() function Guangbin Huang
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Yunsheng Lin <linyunsheng@huawei.com>

desc_cb is used to store mapping and freeing info for the
corresponding desc, which is used in the cleaning process.
There will be more desc_cb type coming up when supporting the
tx bounce buffer, change desc_cb type to bit-wise value in order
to reduce the desc_cb type checking operation in the data path.

Also move the desc_cb type definition to hns3_enet.h because it
is only used in hns3_enet.c, and declare a local variable desc_cb
in hns3_clear_desc() to reduce lines of code.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h     |  7 -----
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 40 +++++++++++--------------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h |  7 +++++
 3 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ba883b0a19f0..5822fc06f767 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -159,13 +159,6 @@ enum HNAE3_PF_CAP_BITS {
 #define ring_ptr_move_bw(ring, p) \
 	((ring)->p = ((ring)->p - 1 + (ring)->desc_num) % (ring)->desc_num)
 
-enum hns_desc_type {
-	DESC_TYPE_UNKNOWN,
-	DESC_TYPE_SKB,
-	DESC_TYPE_FRAGLIST_SKB,
-	DESC_TYPE_PAGE,
-};
-
 struct hnae3_handle;
 
 struct hnae3_queue {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 9a45f3cde6a2..f03a7a962eb0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1413,7 +1413,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  unsigned int size, enum hns_desc_type type)
+			  unsigned int size, unsigned int type)
 {
 #define HNS3_LIKELY_BD_NUM	1
 
@@ -1425,8 +1425,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 	int k, sizeoflast;
 	dma_addr_t dma;
 
-	if (type == DESC_TYPE_FRAGLIST_SKB ||
-	    type == DESC_TYPE_SKB) {
+	if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) {
 		struct sk_buff *skb = (struct sk_buff *)priv;
 
 		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
@@ -1704,6 +1703,7 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 
 	for (i = 0; i < ring->desc_num; i++) {
 		struct hns3_desc *desc = &ring->desc[ring->next_to_use];
+		struct hns3_desc_cb *desc_cb;
 
 		memset(desc, 0, sizeof(*desc));
 
@@ -1714,31 +1714,27 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 		/* rollback one */
 		ring_ptr_move_bw(ring, next_to_use);
 
-		if (!ring->desc_cb[ring->next_to_use].dma)
+		desc_cb = &ring->desc_cb[ring->next_to_use];
+
+		if (!desc_cb->dma)
 			continue;
 
 		/* unmap the descriptor dma address */
-		if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB ||
-		    ring->desc_cb[ring->next_to_use].type ==
-		    DESC_TYPE_FRAGLIST_SKB)
-			dma_unmap_single(dev,
-					 ring->desc_cb[ring->next_to_use].dma,
-					ring->desc_cb[ring->next_to_use].length,
-					DMA_TO_DEVICE);
-		else if (ring->desc_cb[ring->next_to_use].length)
-			dma_unmap_page(dev,
-				       ring->desc_cb[ring->next_to_use].dma,
-				       ring->desc_cb[ring->next_to_use].length,
+		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
+			dma_unmap_single(dev, desc_cb->dma, desc_cb->length,
+					 DMA_TO_DEVICE);
+		else if (desc_cb->length)
+			dma_unmap_page(dev, desc_cb->dma, desc_cb->length,
 				       DMA_TO_DEVICE);
 
-		ring->desc_cb[ring->next_to_use].length = 0;
-		ring->desc_cb[ring->next_to_use].dma = 0;
-		ring->desc_cb[ring->next_to_use].type = DESC_TYPE_UNKNOWN;
+		desc_cb->length = 0;
+		desc_cb->dma = 0;
+		desc_cb->type = DESC_TYPE_UNKNOWN;
 	}
 }
 
 static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
-				 struct sk_buff *skb, enum hns_desc_type type)
+				 struct sk_buff *skb, unsigned int type)
 {
 	unsigned int size = skb_headlen(skb);
 	struct sk_buff *frag_skb;
@@ -2859,7 +2855,7 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 static void hns3_free_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb, int budget)
 {
-	if (cb->type == DESC_TYPE_SKB)
+	if (cb->type & DESC_TYPE_SKB)
 		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
@@ -2880,7 +2876,7 @@ static int hns3_map_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb)
 static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 			      struct hns3_desc_cb *cb)
 {
-	if (cb->type == DESC_TYPE_SKB || cb->type == DESC_TYPE_FRAGLIST_SKB)
+	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
 	else if (cb->length)
@@ -3037,7 +3033,7 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 
 		desc_cb = &ring->desc_cb[ntc];
 
-		if (desc_cb->type == DESC_TYPE_SKB) {
+		if (desc_cb->type & DESC_TYPE_SKB) {
 			(*pkts)++;
 			(*bytes) += desc_cb->send_bytes;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 79821c7bdc16..9d18b9430b54 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -299,6 +299,13 @@ struct __packed hns3_desc {
 	};
 };
 
+enum hns3_desc_type {
+	DESC_TYPE_UNKNOWN		= 0,
+	DESC_TYPE_SKB			= 1 << 0,
+	DESC_TYPE_FRAGLIST_SKB		= 1 << 1,
+	DESC_TYPE_PAGE			= 1 << 2,
+};
+
 struct hns3_desc_cb {
 	dma_addr_t dma; /* dma address of this desc */
 	void *buf;      /* cpu addr for a desc */
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 2/7] net: hns3: refactor for hns3_fill_desc() function
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 1/7] net: hns3: minor refactor related to desc_cb handling Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 3/7] net: hns3: use tx bounce buffer for small packets Guangbin Huang
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Yunsheng Lin <linyunsheng@huawei.com>

Factor out hns3_fill_desc() so that it can be reused in the
tx bounce supporting.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 87 ++++++++++++++-----------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f03a7a962eb0..6fa1ed5c4098 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1412,39 +1412,14 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 	return 0;
 }
 
-static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  unsigned int size, unsigned int type)
+static int hns3_fill_desc(struct hns3_enet_ring *ring, dma_addr_t dma,
+			  unsigned int size)
 {
 #define HNS3_LIKELY_BD_NUM	1
 
-	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
-	struct device *dev = ring_to_dev(ring);
-	skb_frag_t *frag;
 	unsigned int frag_buf_num;
 	int k, sizeoflast;
-	dma_addr_t dma;
-
-	if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) {
-		struct sk_buff *skb = (struct sk_buff *)priv;
-
-		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
-	} else {
-		frag = (skb_frag_t *)priv;
-		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
-	}
-
-	if (unlikely(dma_mapping_error(dev, dma))) {
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.sw_err_cnt++;
-		u64_stats_update_end(&ring->syncp);
-		return -ENOMEM;
-	}
-
-	desc_cb->priv = priv;
-	desc_cb->length = size;
-	desc_cb->dma = dma;
-	desc_cb->type = type;
 
 	if (likely(size <= HNS3_MAX_BD_SIZE)) {
 		desc->addr = cpu_to_le64(dma);
@@ -1480,6 +1455,47 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 	return frag_buf_num;
 }
 
+static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv,
+				  unsigned int type)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	struct device *dev = ring_to_dev(ring);
+	unsigned int size;
+	dma_addr_t dma;
+
+	if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) {
+		struct sk_buff *skb = (struct sk_buff *)priv;
+
+		size = skb_headlen(skb);
+		if (!size)
+			return 0;
+
+		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	} else {
+		skb_frag_t *frag = (skb_frag_t *)priv;
+
+		size = skb_frag_size(frag);
+		if (!size)
+			return 0;
+
+		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+	}
+
+	if (unlikely(dma_mapping_error(dev, dma))) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	desc_cb->priv = priv;
+	desc_cb->length = size;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	return hns3_fill_desc(ring, dma, size);
+}
+
 static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
 				    unsigned int bd_num)
 {
@@ -1736,26 +1752,19 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 				 struct sk_buff *skb, unsigned int type)
 {
-	unsigned int size = skb_headlen(skb);
 	struct sk_buff *frag_skb;
 	int i, ret, bd_num = 0;
 
-	if (size) {
-		ret = hns3_fill_desc(ring, skb, size, type);
-		if (unlikely(ret < 0))
-			return ret;
+	ret = hns3_map_and_fill_desc(ring, skb, type);
+	if (unlikely(ret < 0))
+		return ret;
 
-		bd_num += ret;
-	}
+	bd_num += ret;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-		size = skb_frag_size(frag);
-		if (!size)
-			continue;
-
-		ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE);
+		ret = hns3_map_and_fill_desc(ring, frag, DESC_TYPE_PAGE);
 		if (unlikely(ret < 0))
 			return ret;
 
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 3/7] net: hns3: use tx bounce buffer for small packets
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 1/7] net: hns3: minor refactor related to desc_cb handling Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 2/7] net: hns3: refactor for hns3_fill_desc() function Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 4/7] net: hns3: add support to query tx spare buffer size for pf Guangbin Huang
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Yunsheng Lin <linyunsheng@huawei.com>

when the packet or frag size is small, it causes both security and
performance issue. As dma can't map sub-page, this means some extra
kernel data is visible to devices. On the other hand, the overhead
of dma map and unmap is huge when IOMMU is on.

So add a queue based tx shared bounce buffer to memcpy the small
packet when the len of the xmitted skb is below tx_copybreak.
Add tx_spare_buf_size module param to set the size of tx spare
buffer, and add set/get_tunable to set or query the tx_copybreak.

The throughtput improves from 30 Gbps to 90+ Gbps when running 16
netperf threads with 32KB UDP message size when IOMMU is in the
strict mode(tx_copybreak = 2000 and mtu = 1500).

Suggested-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c |  52 ++++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 289 ++++++++++++++++++++-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  43 ++-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  51 ++++
 4 files changed, 420 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index c512a63c423b..a24a75c47cad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -392,6 +392,56 @@ static void hns3_dbg_fill_content(char *content, u16 len,
 	*pos++ = '\0';
 }
 
+static const struct hns3_dbg_item tx_spare_info_items[] = {
+	{ "QUEUE_ID", 2 },
+	{ "COPYBREAK", 2 },
+	{ "LEN", 7 },
+	{ "NTU", 4 },
+	{ "NTC", 4 },
+	{ "LTC", 4 },
+	{ "DMA", 17 },
+};
+
+static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf,
+				   int len, u32 ring_num, int *pos)
+{
+	char data_str[ARRAY_SIZE(tx_spare_info_items)][HNS3_DBG_DATA_STR_LEN];
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	char *result[ARRAY_SIZE(tx_spare_info_items)];
+	char content[HNS3_DBG_INFO_LEN];
+	u32 i, j;
+
+	if (!tx_spare) {
+		*pos += scnprintf(buf + *pos, len - *pos,
+				  "tx spare buffer is not enabled\n");
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tx_spare_info_items); i++)
+		result[i] = &data_str[i][0];
+
+	*pos += scnprintf(buf + *pos, len - *pos, "tx spare buffer info\n");
+	hns3_dbg_fill_content(content, sizeof(content), tx_spare_info_items,
+			      NULL, ARRAY_SIZE(tx_spare_info_items));
+	*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+
+	for (i = 0; i < ring_num; i++) {
+		j = 0;
+		sprintf(result[j++], "%8u", i);
+		sprintf(result[j++], "%9u", ring->tx_copybreak);
+		sprintf(result[j++], "%3u", tx_spare->len);
+		sprintf(result[j++], "%3u", tx_spare->next_to_use);
+		sprintf(result[j++], "%3u", tx_spare->next_to_clean);
+		sprintf(result[j++], "%3u", tx_spare->last_to_clean);
+		sprintf(result[j++], "%pad", &tx_spare->dma);
+		hns3_dbg_fill_content(content, sizeof(content),
+				      tx_spare_info_items,
+				      (const char **)result,
+				      ARRAY_SIZE(tx_spare_info_items));
+		*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+	}
+}
+
 static const struct hns3_dbg_item rx_queue_info_items[] = {
 	{ "QUEUE_ID", 2 },
 	{ "BD_NUM", 2 },
@@ -593,6 +643,8 @@ static int hns3_dbg_tx_queue_info(struct hnae3_handle *h,
 		pos += scnprintf(buf + pos, len - pos, "%s", content);
 	}
 
+	hns3_dbg_tx_spare_info(ring, buf, len, h->kinfo.num_tqps, &pos);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 6fa1ed5c4098..e5466daac1c4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -53,6 +53,10 @@ static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, " Network interface message level setting");
 
+static unsigned int tx_spare_buf_size;
+module_param(tx_spare_buf_size, uint, 0400);
+MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer");
+
 #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
 			   NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
 
@@ -941,6 +945,177 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle)
 		ops->request_update_promisc_mode(handle);
 }
 
+static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	u32 ntc, ntu;
+
+	/* This smp_load_acquire() pairs with smp_store_release() in
+	 * hns3_tx_spare_update() called in tx desc cleaning process.
+	 */
+	ntc = smp_load_acquire(&tx_spare->last_to_clean);
+	ntu = tx_spare->next_to_use;
+
+	if (ntc > ntu)
+		return ntc - ntu - 1;
+
+	/* The free tx buffer is divided into two part, so pick the
+	 * larger one.
+	 */
+	return (ntc > (tx_spare->len - ntu) ? ntc :
+			(tx_spare->len - ntu)) - 1;
+}
+
+static void hns3_tx_spare_update(struct hns3_enet_ring *ring)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+
+	if (!tx_spare ||
+	    tx_spare->last_to_clean == tx_spare->next_to_clean)
+		return;
+
+	/* This smp_store_release() pairs with smp_load_acquire() in
+	 * hns3_tx_spare_space() called in xmit process.
+	 */
+	smp_store_release(&tx_spare->last_to_clean,
+			  tx_spare->next_to_clean);
+}
+
+static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring,
+				   struct sk_buff *skb,
+				   u32 space)
+{
+	u32 len = skb->len <= ring->tx_copybreak ? skb->len :
+				skb_headlen(skb);
+
+	if (len > ring->tx_copybreak)
+		return false;
+
+	if (ALIGN(len, dma_get_cache_alignment()) > space) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_spare_full++;
+		u64_stats_update_end(&ring->syncp);
+		return false;
+	}
+
+	return true;
+}
+
+static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
+{
+	struct hns3_tx_spare *tx_spare;
+	struct page *page;
+	dma_addr_t dma;
+	int order;
+
+	if (!tx_spare_buf_size)
+		return;
+
+	order = get_order(tx_spare_buf_size);
+	tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare),
+				GFP_KERNEL);
+	if (!tx_spare) {
+		/* The driver still work without the tx spare buffer */
+		dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n");
+		return;
+	}
+
+	page = alloc_pages_node(dev_to_node(ring_to_dev(ring)),
+				GFP_KERNEL, order);
+	if (!page) {
+		dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n");
+		devm_kfree(ring_to_dev(ring), tx_spare);
+		return;
+	}
+
+	dma = dma_map_page(ring_to_dev(ring), page, 0,
+			   PAGE_SIZE << order, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring_to_dev(ring), dma)) {
+		dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n");
+		put_page(page);
+		devm_kfree(ring_to_dev(ring), tx_spare);
+		return;
+	}
+
+	tx_spare->dma = dma;
+	tx_spare->buf = page_address(page);
+	tx_spare->len = PAGE_SIZE << order;
+	ring->tx_spare = tx_spare;
+}
+
+/* Use hns3_tx_spare_space() to make sure there is enough buffer
+ * before calling below function to allocate tx buffer.
+ */
+static void *hns3_tx_spare_alloc(struct hns3_enet_ring *ring,
+				 unsigned int size, dma_addr_t *dma,
+				 u32 *cb_len)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	u32 ntu = tx_spare->next_to_use;
+
+	size = ALIGN(size, dma_get_cache_alignment());
+	*cb_len = size;
+
+	/* Tx spare buffer wraps back here because the end of
+	 * freed tx buffer is not enough.
+	 */
+	if (ntu + size > tx_spare->len) {
+		*cb_len += (tx_spare->len - ntu);
+		ntu = 0;
+	}
+
+	tx_spare->next_to_use = ntu + size;
+	if (tx_spare->next_to_use == tx_spare->len)
+		tx_spare->next_to_use = 0;
+
+	*dma = tx_spare->dma + ntu;
+
+	return tx_spare->buf + ntu;
+}
+
+static void hns3_tx_spare_rollback(struct hns3_enet_ring *ring, u32 len)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+
+	if (len > tx_spare->next_to_use) {
+		len -= tx_spare->next_to_use;
+		tx_spare->next_to_use = tx_spare->len - len;
+	} else {
+		tx_spare->next_to_use -= len;
+	}
+}
+
+static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring,
+				     struct hns3_desc_cb *cb)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	u32 ntc = tx_spare->next_to_clean;
+	u32 len = cb->length;
+
+	tx_spare->next_to_clean += len;
+
+	if (tx_spare->next_to_clean >= tx_spare->len) {
+		tx_spare->next_to_clean -= tx_spare->len;
+
+		if (tx_spare->next_to_clean) {
+			ntc = 0;
+			len = tx_spare->next_to_clean;
+		}
+	}
+
+	/* This tx spare buffer is only really reclaimed after calling
+	 * hns3_tx_spare_update(), so it is still safe to use the info in
+	 * the tx buffer to do the dma sync after tx_spare->next_to_clean
+	 * is moved forword.
+	 */
+	if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) {
+		dma_addr_t dma = tx_spare->dma + ntc;
+
+		dma_sync_single_for_cpu(ring_to_dev(ring), dma, len,
+					DMA_TO_DEVICE);
+	}
+}
+
 static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs,
 			u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes)
 {
@@ -1471,6 +1646,11 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv,
 			return 0;
 
 		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	} else if (type & DESC_TYPE_BOUNCE_HEAD) {
+		/* Head data has been filled in hns3_handle_tx_bounce(),
+		 * just return 0 here.
+		 */
+		return 0;
 	} else {
 		skb_frag_t *frag = (skb_frag_t *)priv;
 
@@ -1739,6 +1919,9 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
 			dma_unmap_single(dev, desc_cb->dma, desc_cb->length,
 					 DMA_TO_DEVICE);
+		else if (desc_cb->type &
+			 (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL))
+			hns3_tx_spare_rollback(ring, desc_cb->length);
 		else if (desc_cb->length)
 			dma_unmap_page(dev, desc_cb->dma, desc_cb->length,
 				       DMA_TO_DEVICE);
@@ -1816,6 +1999,79 @@ static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
 	desc->tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_TSYN_B));
 }
 
+static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
+				 struct sk_buff *skb)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	unsigned int type = DESC_TYPE_BOUNCE_HEAD;
+	unsigned int size = skb_headlen(skb);
+	dma_addr_t dma;
+	int bd_num = 0;
+	u32 cb_len;
+	void *buf;
+	int ret;
+
+	if (skb->len <= ring->tx_copybreak) {
+		size = skb->len;
+		type = DESC_TYPE_BOUNCE_ALL;
+	}
+
+	/* hns3_can_use_tx_bounce() is called to ensure the below
+	 * function can always return the tx buffer.
+	 */
+	buf = hns3_tx_spare_alloc(ring, size, &dma, &cb_len);
+
+	ret = skb_copy_bits(skb, 0, buf, size);
+	if (unlikely(ret < 0)) {
+		hns3_tx_spare_rollback(ring, cb_len);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.copy_bits_err++;
+		u64_stats_update_end(&ring->syncp);
+		return ret;
+	}
+
+	desc_cb->priv = skb;
+	desc_cb->length = cb_len;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	bd_num += hns3_fill_desc(ring, dma, size);
+
+	if (type == DESC_TYPE_BOUNCE_HEAD) {
+		ret = hns3_fill_skb_to_desc(ring, skb,
+					    DESC_TYPE_BOUNCE_HEAD);
+		if (unlikely(ret < 0))
+			return ret;
+
+		bd_num += ret;
+	}
+
+	dma_sync_single_for_device(ring_to_dev(ring), dma, size,
+				   DMA_TO_DEVICE);
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_bounce++;
+	u64_stats_update_end(&ring->syncp);
+	return bd_num;
+}
+
+static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
+				    struct sk_buff *skb)
+{
+	u32 space;
+
+	if (!ring->tx_spare)
+		goto out;
+
+	space = hns3_tx_spare_space(ring);
+
+	if (hns3_can_use_tx_bounce(ring, skb, space))
+		return hns3_handle_tx_bounce(ring, skb);
+
+out:
+	return hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -1862,7 +2118,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	 * zero, which is unlikely, and 'ret > 0' means how many tx desc
 	 * need to be notified to the hw.
 	 */
-	ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
+	ret = hns3_handle_desc_filling(ring, skb);
 	if (unlikely(ret <= 0))
 		goto fill_err;
 
@@ -2064,6 +2320,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_drop += ring->stats.tx_tso_err;
 			tx_drop += ring->stats.over_max_recursion;
 			tx_drop += ring->stats.hw_limitation;
+			tx_drop += ring->stats.copy_bits_err;
 			tx_errors += ring->stats.sw_err_cnt;
 			tx_errors += ring->stats.tx_vlan_err;
 			tx_errors += ring->stats.tx_l4_proto_err;
@@ -2071,6 +2328,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_errors += ring->stats.tx_tso_err;
 			tx_errors += ring->stats.over_max_recursion;
 			tx_errors += ring->stats.hw_limitation;
+			tx_errors += ring->stats.copy_bits_err;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
@@ -2864,7 +3122,8 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 static void hns3_free_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb, int budget)
 {
-	if (cb->type & DESC_TYPE_SKB)
+	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
+			DESC_TYPE_BOUNCE_ALL))
 		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
@@ -2888,9 +3147,11 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
-	else if (cb->length)
+	else if ((cb->type & DESC_TYPE_PAGE) && cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
+	else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD))
+		hns3_tx_spare_reclaim_cb(ring, cb);
 }
 
 static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
@@ -3042,7 +3303,8 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 
 		desc_cb = &ring->desc_cb[ntc];
 
-		if (desc_cb->type & DESC_TYPE_SKB) {
+		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL |
+				     DESC_TYPE_BOUNCE_HEAD)) {
 			(*pkts)++;
 			(*bytes) += desc_cb->send_bytes;
 		}
@@ -3065,6 +3327,9 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 	 * ring_space called by hns3_nic_net_xmit.
 	 */
 	smp_store_release(&ring->next_to_clean, ntc);
+
+	hns3_tx_spare_update(ring);
+
 	return true;
 }
 
@@ -4245,6 +4510,8 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 		ring = &priv->ring[q->tqp_index];
 		desc_num = priv->ae_handle->kinfo.num_tx_desc;
 		ring->queue_index = q->tqp_index;
+		ring->tx_copybreak = priv->tx_copybreak;
+		ring->last_to_use = 0;
 	} else {
 		ring = &priv->ring[q->tqp_index + queue_num];
 		desc_num = priv->ae_handle->kinfo.num_rx_desc;
@@ -4262,7 +4529,6 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 	ring->desc_num = desc_num;
 	ring->next_to_use = 0;
 	ring->next_to_clean = 0;
-	ring->last_to_use = 0;
 }
 
 static void hns3_queue_to_ring(struct hnae3_queue *tqp,
@@ -4322,6 +4588,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
 		ret = hns3_alloc_ring_buffers(ring);
 		if (ret)
 			goto out_with_desc;
+	} else {
+		hns3_init_tx_spare_buffer(ring);
 	}
 
 	return 0;
@@ -4344,9 +4612,18 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
 	ring->next_to_use = 0;
 	ring->last_to_use = 0;
 	ring->pending_buf = 0;
-	if (ring->skb) {
+	if (!HNAE3_IS_TX_RING(ring) && ring->skb) {
 		dev_kfree_skb_any(ring->skb);
 		ring->skb = NULL;
+	} else if (HNAE3_IS_TX_RING(ring) && ring->tx_spare) {
+		struct hns3_tx_spare *tx_spare = ring->tx_spare;
+
+		dma_unmap_page(ring_to_dev(ring), tx_spare->dma, tx_spare->len,
+			       DMA_TO_DEVICE);
+		free_pages((unsigned long)tx_spare->buf,
+			   get_order(tx_spare->len));
+		devm_kfree(ring_to_dev(ring), tx_spare);
+		ring->tx_spare = NULL;
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 9d18b9430b54..8d147c1dab2c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -304,6 +304,8 @@ enum hns3_desc_type {
 	DESC_TYPE_SKB			= 1 << 0,
 	DESC_TYPE_FRAGLIST_SKB		= 1 << 1,
 	DESC_TYPE_PAGE			= 1 << 2,
+	DESC_TYPE_BOUNCE_ALL		= 1 << 3,
+	DESC_TYPE_BOUNCE_HEAD		= 1 << 4,
 };
 
 struct hns3_desc_cb {
@@ -405,6 +407,9 @@ struct ring_stats {
 			u64 tx_tso_err;
 			u64 over_max_recursion;
 			u64 hw_limitation;
+			u64 tx_bounce;
+			u64 tx_spare_full;
+			u64 copy_bits_err;
 		};
 		struct {
 			u64 rx_pkts;
@@ -423,6 +428,15 @@ struct ring_stats {
 	};
 };
 
+struct hns3_tx_spare {
+	dma_addr_t dma;
+	void *buf;
+	u32 next_to_use;
+	u32 next_to_clean;
+	u32 last_to_clean;
+	u32 len;
+};
+
 struct hns3_enet_ring {
 	struct hns3_desc *desc; /* dma map address space */
 	struct hns3_desc_cb *desc_cb;
@@ -445,18 +459,28 @@ struct hns3_enet_ring {
 	 * next_to_use
 	 */
 	int next_to_clean;
-	union {
-		int last_to_use;	/* last idx used by xmit */
-		u32 pull_len;		/* memcpy len for current rx packet */
-	};
-	u32 frag_num;
-	void *va; /* first buffer address for current packet */
-
 	u32 flag;          /* ring attribute */
 
 	int pending_buf;
-	struct sk_buff *skb;
-	struct sk_buff *tail_skb;
+	union {
+		/* for Tx ring */
+		struct {
+			u32 fd_qb_tx_sample;
+			int last_to_use;        /* last idx used by xmit */
+			u32 tx_copybreak;
+			struct hns3_tx_spare *tx_spare;
+		};
+
+		/* for Rx ring */
+		struct {
+			u32 pull_len;   /* memcpy len for current rx packet */
+			u32 frag_num;
+			/* first buffer address for current packet */
+			unsigned char *va;
+			struct sk_buff *skb;
+			struct sk_buff *tail_skb;
+		};
+	};
 } ____cacheline_internodealigned_in_smp;
 
 enum hns3_flow_level_range {
@@ -540,6 +564,7 @@ struct hns3_nic_priv {
 
 	struct hns3_enet_coalesce tx_coal;
 	struct hns3_enet_coalesce rx_coal;
+	u32 tx_copybreak;
 };
 
 union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index acef5435d7b7..f306de16d73f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -46,6 +46,9 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("tso_err", tx_tso_err),
 	HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
 	HNS3_TQP_STAT("hw_limitation", hw_limitation),
+	HNS3_TQP_STAT("bounce", tx_bounce),
+	HNS3_TQP_STAT("spare_full", tx_spare_full),
+	HNS3_TQP_STAT("copy_bits_err", copy_bits_err),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
@@ -1592,6 +1595,50 @@ static int hns3_set_priv_flags(struct net_device *netdev, u32 pflags)
 	return 0;
 }
 
+static int hns3_get_tunable(struct net_device *netdev,
+			    const struct ethtool_tunable *tuna,
+			    void *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_TX_COPYBREAK:
+		/* all the tx rings have the same tx_copybreak */
+		*(u32 *)data = priv->tx_copybreak;
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+static int hns3_set_tunable(struct net_device *netdev,
+			    const struct ethtool_tunable *tuna,
+			    const void *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int i, ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_TX_COPYBREAK:
+		priv->tx_copybreak = *(u32 *)data;
+
+		for (i = 0; i < h->kinfo.num_tqps; i++)
+			priv->ring[i].tx_copybreak = priv->tx_copybreak;
+
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
 #define HNS3_ETHTOOL_COALESCE	(ETHTOOL_COALESCE_USECS |		\
 				 ETHTOOL_COALESCE_USE_ADAPTIVE |	\
 				 ETHTOOL_COALESCE_RX_USECS_HIGH |	\
@@ -1635,6 +1682,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.set_msglevel = hns3_set_msglevel,
 	.get_priv_flags = hns3_get_priv_flags,
 	.set_priv_flags = hns3_set_priv_flags,
+	.get_tunable = hns3_get_tunable,
+	.set_tunable = hns3_set_tunable,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
@@ -1674,6 +1723,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
 	.get_priv_flags = hns3_get_priv_flags,
 	.set_priv_flags = hns3_set_priv_flags,
 	.get_ts_info = hns3_get_ts_info,
+	.get_tunable = hns3_get_tunable,
+	.set_tunable = hns3_set_tunable,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 4/7] net: hns3: add support to query tx spare buffer size for pf
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
                   ` (2 preceding siblings ...)
  2021-06-16  6:36 ` [PATCH net-next 3/7] net: hns3: use tx bounce buffer for small packets Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 5/7] net: hns3: support dma_map_sg() for multi frags skb Guangbin Huang
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Huazhong Tan <tanhuazhong@huawei.com>

Add support to query tx spare buffer size from configuration
file, and use this info to do spare buffer initialization when
the module parameter 'tx_spare_buf_size' is not specified.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             |  1 +
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c         |  7 +++++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  |  2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 14 ++++++++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h |  2 ++
 5 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 5822fc06f767..0b202f4def83 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -760,6 +760,7 @@ struct hnae3_knic_private_info {
 	u16 rx_buf_len;
 	u16 num_tx_desc;
 	u16 num_rx_desc;
+	u32 tx_spare_buf_size;
 
 	struct hnae3_tc_info tc_info;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index e5466daac1c4..d86b3735aa9f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1005,13 +1005,16 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
 {
 	struct hns3_tx_spare *tx_spare;
 	struct page *page;
+	u32 alloc_size;
 	dma_addr_t dma;
 	int order;
 
-	if (!tx_spare_buf_size)
+	alloc_size = tx_spare_buf_size ? tx_spare_buf_size :
+		     ring->tqp->handle->kinfo.tx_spare_buf_size;
+	if (!alloc_size)
 		return;
 
-	order = get_order(tx_spare_buf_size);
+	order = get_order(alloc_size);
 	tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare),
 				GFP_KERNEL);
 	if (!tx_spare) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 51be76f1795e..a322dfeba5cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -542,6 +542,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_UMV_TBL_SPACE_M	GENMASK(31, 16)
 #define HCLGE_CFG_PF_RSS_SIZE_S		0
 #define HCLGE_CFG_PF_RSS_SIZE_M		GENMASK(3, 0)
+#define HCLGE_CFG_TX_SPARE_BUF_SIZE_S	4
+#define HCLGE_CFG_TX_SPARE_BUF_SIZE_M	GENMASK(15, 4)
 
 #define HCLGE_CFG_CMD_CNT		4
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f6fdf93c8cad..f3e482ab3c71 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1279,6 +1279,7 @@ static u32 hclge_get_max_speed(u16 speed_ability)
 
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
+#define HCLGE_TX_SPARE_SIZE_UNIT		4096
 #define SPEED_ABILITY_EXT_SHIFT			8
 
 	struct hclge_cfg_param_cmd *req;
@@ -1358,6 +1359,15 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	cfg->pf_rss_size_max = cfg->pf_rss_size_max ?
 			       1U << cfg->pf_rss_size_max :
 			       cfg->vf_rss_size_max;
+
+	/* The unit of the tx spare buffer size queried from configuration
+	 * file is HCLGE_TX_SPARE_SIZE_UNIT(4096) bytes, so a conversion is
+	 * needed here.
+	 */
+	cfg->tx_spare_buf_size = hnae3_get_field(__le32_to_cpu(req->param[2]),
+						 HCLGE_CFG_TX_SPARE_BUF_SIZE_M,
+						 HCLGE_CFG_TX_SPARE_BUF_SIZE_S);
+	cfg->tx_spare_buf_size *= HCLGE_TX_SPARE_SIZE_UNIT;
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1539,6 +1549,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->tc_max = cfg.tc_num;
 	hdev->tm_info.hw_pfc_map = 0;
 	hdev->wanted_umv_size = cfg.umv_space;
+	hdev->tx_spare_buf_size = cfg.tx_spare_buf_size;
 	if (cfg.vlan_fliter_cap == HCLGE_VLAN_FLTR_CAN_MDF)
 		set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
 
@@ -1736,6 +1747,7 @@ static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps,
 	kinfo->num_rx_desc = num_rx_desc;
 
 	kinfo->rx_buf_len = hdev->rx_buf_len;
+	kinfo->tx_spare_buf_size = hdev->tx_spare_buf_size;
 
 	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
@@ -11059,6 +11071,8 @@ static void hclge_info_show(struct hclge_dev *hdev)
 		 hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
 	dev_info(dev, "MQPRIO %s\n",
 		 hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+	dev_info(dev, "Default tx spare buffer size: %u\n",
+		 hdev->tx_spare_buf_size);
 
 	dev_info(dev, "PF info end.\n");
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 02852738ce21..3d3352491dba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -384,6 +384,7 @@ struct hclge_cfg {
 	u8 mac_addr[ETH_ALEN];
 	u8 default_speed;
 	u32 numa_node_map;
+	u32 tx_spare_buf_size;
 	u16 speed_ability;
 	u16 umv_space;
 };
@@ -848,6 +849,7 @@ struct hclge_dev {
 	u16 alloc_rss_size;		/* Allocated RSS task queue */
 	u16 vf_rss_size_max;		/* HW defined VF max RSS task queue */
 	u16 pf_rss_size_max;		/* HW defined PF max RSS task queue */
+	u32 tx_spare_buf_size;		/* HW defined TX spare buffer size */
 
 	u16 fdir_pf_filter_count; /* Num of guaranteed filters for this PF */
 	u16 num_alloc_vport;		/* Num vports this driver supports */
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 5/7] net: hns3: support dma_map_sg() for multi frags skb
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
                   ` (3 preceding siblings ...)
  2021-06-16  6:36 ` [PATCH net-next 4/7] net: hns3: add support to query tx spare buffer size for pf Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 6/7] net: hns3: optimize the rx page reuse handling process Guangbin Huang
  2021-06-16  6:36 ` [PATCH net-next 7/7] net: hns3: use bounce buffer when rx page can not be reused Guangbin Huang
  6 siblings, 0 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Yunsheng Lin <linyunsheng@huawei.com>

Using the queue based tx buffer, it is also possible to allocate a
sgl buffer, and use skb_to_sgvec() to convert the skb to the sgvec
in order to support the dma_map_sg() to decreases the overhead of
IOMMU mapping and unmapping.

Firstly, it reduces the number of buffers. For example, a tcp skb
may have a 66-byte header and 3 fragments of 4328, 32768, and 28064
bytes. With this patch, dma_map_sg() will combine them into two
buffers, 66-bytes header and one 65160-bytes fragment by using IOMMU.

Secondly, it reduces the number of dma mapping and unmapping. All the
original 4 buffers are mapped only once rather than 4 times.

The throughput improves above 10% when running single thread of iperf
using TCP when IOMMU is in strict mode.

Suggested-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 111 ++++++++++++++++++++-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |   4 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |   3 +
 3 files changed, 113 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index d86b3735aa9f..f60a344a6a9f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -57,6 +57,15 @@ static unsigned int tx_spare_buf_size;
 module_param(tx_spare_buf_size, uint, 0400);
 MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer");
 
+static unsigned int tx_sgl = 1;
+module_param(tx_sgl, uint, 0600);
+MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to optimize the IOMMU mapping");
+
+#define HNS3_SGL_SIZE(nfrag)	(sizeof(struct scatterlist) * (nfrag) +	\
+				 sizeof(struct sg_table))
+#define HNS3_MAX_SGL_SIZE	ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM),\
+				      dma_get_cache_alignment())
+
 #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
 			   NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
 
@@ -1001,6 +1010,25 @@ static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring,
 	return true;
 }
 
+static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring,
+				struct sk_buff *skb,
+				u32 space)
+{
+	if (skb->len <= ring->tx_copybreak || !tx_sgl ||
+	    (!skb_has_frag_list(skb) &&
+	     skb_shinfo(skb)->nr_frags < tx_sgl))
+		return false;
+
+	if (space < HNS3_MAX_SGL_SIZE) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_spare_full++;
+		u64_stats_update_end(&ring->syncp);
+		return false;
+	}
+
+	return true;
+}
+
 static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
 {
 	struct hns3_tx_spare *tx_spare;
@@ -1108,14 +1136,19 @@ static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring,
 
 	/* This tx spare buffer is only really reclaimed after calling
 	 * hns3_tx_spare_update(), so it is still safe to use the info in
-	 * the tx buffer to do the dma sync after tx_spare->next_to_clean
-	 * is moved forword.
+	 * the tx buffer to do the dma sync or sg unmapping after
+	 * tx_spare->next_to_clean is moved forword.
 	 */
 	if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) {
 		dma_addr_t dma = tx_spare->dma + ntc;
 
 		dma_sync_single_for_cpu(ring_to_dev(ring), dma, len,
 					DMA_TO_DEVICE);
+	} else {
+		struct sg_table *sgt = tx_spare->buf + ntc;
+
+		dma_unmap_sg(ring_to_dev(ring), sgt->sgl, sgt->orig_nents,
+			     DMA_TO_DEVICE);
 	}
 }
 
@@ -2058,6 +2091,65 @@ static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
 	return bd_num;
 }
 
+static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring,
+			      struct sk_buff *skb)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	u32 nfrag = skb_shinfo(skb)->nr_frags + 1;
+	struct sg_table *sgt;
+	int i, bd_num = 0;
+	dma_addr_t dma;
+	u32 cb_len;
+	int nents;
+
+	if (skb_has_frag_list(skb))
+		nfrag = HNS3_MAX_TSO_BD_NUM;
+
+	/* hns3_can_use_tx_sgl() is called to ensure the below
+	 * function can always return the tx buffer.
+	 */
+	sgt = hns3_tx_spare_alloc(ring, HNS3_SGL_SIZE(nfrag),
+				  &dma, &cb_len);
+
+	/* scatterlist follows by the sg table */
+	sgt->sgl = (struct scatterlist *)(sgt + 1);
+	sg_init_table(sgt->sgl, nfrag);
+	nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len);
+	if (unlikely(nents < 0)) {
+		hns3_tx_spare_rollback(ring, cb_len);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.skb2sgl_err++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	sgt->orig_nents = nents;
+	sgt->nents = dma_map_sg(ring_to_dev(ring), sgt->sgl, sgt->orig_nents,
+				DMA_TO_DEVICE);
+	if (unlikely(!sgt->nents)) {
+		hns3_tx_spare_rollback(ring, cb_len);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.map_sg_err++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	desc_cb->priv = skb;
+	desc_cb->length = cb_len;
+	desc_cb->dma = dma;
+	desc_cb->type = DESC_TYPE_SGL_SKB;
+
+	for (i = 0; i < sgt->nents; i++)
+		bd_num += hns3_fill_desc(ring, sg_dma_address(sgt->sgl + i),
+					 sg_dma_len(sgt->sgl + i));
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_sgl++;
+	u64_stats_update_end(&ring->syncp);
+
+	return bd_num;
+}
+
 static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
 				    struct sk_buff *skb)
 {
@@ -2068,6 +2160,9 @@ static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
 
 	space = hns3_tx_spare_space(ring);
 
+	if (hns3_can_use_tx_sgl(ring, skb, space))
+		return hns3_handle_tx_sgl(ring, skb);
+
 	if (hns3_can_use_tx_bounce(ring, skb, space))
 		return hns3_handle_tx_bounce(ring, skb);
 
@@ -2324,6 +2419,8 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_drop += ring->stats.over_max_recursion;
 			tx_drop += ring->stats.hw_limitation;
 			tx_drop += ring->stats.copy_bits_err;
+			tx_drop += ring->stats.skb2sgl_err;
+			tx_drop += ring->stats.map_sg_err;
 			tx_errors += ring->stats.sw_err_cnt;
 			tx_errors += ring->stats.tx_vlan_err;
 			tx_errors += ring->stats.tx_l4_proto_err;
@@ -2332,6 +2429,8 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_errors += ring->stats.over_max_recursion;
 			tx_errors += ring->stats.hw_limitation;
 			tx_errors += ring->stats.copy_bits_err;
+			tx_errors += ring->stats.skb2sgl_err;
+			tx_errors += ring->stats.map_sg_err;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
@@ -3126,7 +3225,7 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb, int budget)
 {
 	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
-			DESC_TYPE_BOUNCE_ALL))
+			DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
 		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
@@ -3153,7 +3252,8 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 	else if ((cb->type & DESC_TYPE_PAGE) && cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
-	else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD))
+	else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD |
+			     DESC_TYPE_SGL_SKB))
 		hns3_tx_spare_reclaim_cb(ring, cb);
 }
 
@@ -3307,7 +3407,8 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 		desc_cb = &ring->desc_cb[ntc];
 
 		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL |
-				     DESC_TYPE_BOUNCE_HEAD)) {
+				     DESC_TYPE_BOUNCE_HEAD |
+				     DESC_TYPE_SGL_SKB)) {
 			(*pkts)++;
 			(*bytes) += desc_cb->send_bytes;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 8d147c1dab2c..22ae291471aa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -306,6 +306,7 @@ enum hns3_desc_type {
 	DESC_TYPE_PAGE			= 1 << 2,
 	DESC_TYPE_BOUNCE_ALL		= 1 << 3,
 	DESC_TYPE_BOUNCE_HEAD		= 1 << 4,
+	DESC_TYPE_SGL_SKB		= 1 << 5,
 };
 
 struct hns3_desc_cb {
@@ -410,6 +411,9 @@ struct ring_stats {
 			u64 tx_bounce;
 			u64 tx_spare_full;
 			u64 copy_bits_err;
+			u64 tx_sgl;
+			u64 skb2sgl_err;
+			u64 map_sg_err;
 		};
 		struct {
 			u64 rx_pkts;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index f306de16d73f..d7852716aaad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -49,6 +49,9 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("bounce", tx_bounce),
 	HNS3_TQP_STAT("spare_full", tx_spare_full),
 	HNS3_TQP_STAT("copy_bits_err", copy_bits_err),
+	HNS3_TQP_STAT("sgl", tx_sgl),
+	HNS3_TQP_STAT("skb2sgl_err", skb2sgl_err),
+	HNS3_TQP_STAT("map_sg_err", map_sg_err),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 6/7] net: hns3: optimize the rx page reuse handling process
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
                   ` (4 preceding siblings ...)
  2021-06-16  6:36 ` [PATCH net-next 5/7] net: hns3: support dma_map_sg() for multi frags skb Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  2021-06-16  8:47   ` Yunsheng Lin
  2021-06-16  6:36 ` [PATCH net-next 7/7] net: hns3: use bounce buffer when rx page can not be reused Guangbin Huang
  6 siblings, 1 reply; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Yunsheng Lin <linyunsheng@huawei.com>

Current rx page offset only reset to zero when all the below
conditions are satisfied:
1. rx page is only owned by driver.
2. rx page is reusable.
3. the page offset that is above to be given to the stack has
reached the end of the page.

If the page offset is over the hns3_buf_size(), it means the
buffer below the offset of the page is usable when the above
condition 1 & 2 are satisfied, so page offset can be reset to
zero instead of increasing the offset. We may be able to always
reuse the first 4K buffer of a 64K page, which means we can
limit the hot buffer size as much as possible.

The above optimization is a side effect when refacting the
rx page reuse handling in order to support the rx copybreak.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 44 ++++++++++++-------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f60a344a6a9f..98e8a548edb8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3525,7 +3525,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 
 static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
 {
-	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
+	return page_count(cb->priv) == cb->pagecnt_bias;
 }
 
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
@@ -3533,40 +3533,40 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 				struct hns3_desc_cb *desc_cb)
 {
 	struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
+	u32 frag_offset = desc_cb->page_offset + pull_len;
 	int size = le16_to_cpu(desc->rx.size);
 	u32 truesize = hns3_buf_size(ring);
+	u32 frag_size = size - pull_len;
 
-	desc_cb->pagecnt_bias--;
-	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-			size - pull_len, truesize);
+	/* Avoid re-using remote or pfmem page */
+	if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
+		goto out;
 
-	/* Avoid re-using remote and pfmemalloc pages, or the stack is still
-	 * using the page when page_offset rollback to zero, flag default
-	 * unreuse
+	/* Stack is not using and current page_offset is non-zero, we can
+	 * reuse from the zero offset.
 	 */
-	if (!dev_page_is_reusable(desc_cb->priv) ||
-	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
-		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
-		return;
-	}
-
-	/* Move offset up to the next cache line */
-	desc_cb->page_offset += truesize;
-
-	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
+	if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
+		desc_cb->page_offset = 0;
 		desc_cb->reuse_flag = 1;
-	} else if (hns3_can_reuse_page(desc_cb)) {
+	} else if (desc_cb->page_offset + truesize * 2 <=
+		   hns3_page_size(ring)) {
+		desc_cb->page_offset += truesize;
 		desc_cb->reuse_flag = 1;
-		desc_cb->page_offset = 0;
-	} else if (desc_cb->pagecnt_bias) {
-		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
-		return;
 	}
 
+out:
+	desc_cb->pagecnt_bias--;
+
 	if (unlikely(!desc_cb->pagecnt_bias)) {
 		page_ref_add(desc_cb->priv, USHRT_MAX);
 		desc_cb->pagecnt_bias = USHRT_MAX;
 	}
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+			frag_size, truesize);
+
+	if (unlikely(!desc_cb->reuse_flag))
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
 }
 
 static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 7/7] net: hns3: use bounce buffer when rx page can not be reused
  2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
                   ` (5 preceding siblings ...)
  2021-06-16  6:36 ` [PATCH net-next 6/7] net: hns3: optimize the rx page reuse handling process Guangbin Huang
@ 2021-06-16  6:36 ` Guangbin Huang
  6 siblings, 0 replies; 9+ messages in thread
From: Guangbin Huang @ 2021-06-16  6:36 UTC (permalink / raw)
  To: davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321, huangguangbin2

From: Yunsheng Lin <linyunsheng@huawei.com>

Currently rx page will be reused to receive future packet when
the stack releases the previous skb quickly. If the old page
can not be reused, a new page will be allocated and mapped,
which comsumes a lot of cpu when IOMMU is in the strict mode,
especially when the application and irq/NAPI happens to run on
the same cpu.

So allocate a new frag to memcpy the data to avoid the costly
IOMMU unmapping/mapping operation, and add "frag_alloc_err"
and "frag_alloc" stats in "ethtool -S ethX" cmd.

The throughput improves above 50% when running single thread of
iperf using TCP when IOMMU is in strict mode and iperf shares the
same cpu with irq/NAPI(rx_copybreak = 2048 and mtu = 1500).

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c |  2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 23 ++++++++++++++++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  4 ++++
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 12 +++++++++++
 4 files changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index a24a75c47cad..34b6cd904a1a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -450,6 +450,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = {
 	{ "HEAD", 2 },
 	{ "FBDNUM", 2 },
 	{ "PKTNUM", 2 },
+	{ "COPYBREAK", 2 },
 	{ "RING_EN", 2 },
 	{ "RX_RING_EN", 2 },
 	{ "BASE_ADDR", 10 },
@@ -481,6 +482,7 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring,
 
 	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
 		HNS3_RING_RX_RING_PKTNUM_RECORD_REG));
+	sprintf(result[j++], "%9u", ring->rx_copybreak);
 
 	sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
 		HNS3_RING_EN_REG) ? "on" : "off");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 98e8a548edb8..51bbf5f760c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3552,6 +3552,28 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 		   hns3_page_size(ring)) {
 		desc_cb->page_offset += truesize;
 		desc_cb->reuse_flag = 1;
+	} else if (frag_size <= ring->rx_copybreak) {
+		void *frag = napi_alloc_frag(frag_size);
+
+		if (unlikely(!frag)) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.frag_alloc_err++;
+			u64_stats_update_end(&ring->syncp);
+
+			hns3_rl_err(ring_to_netdev(ring),
+				    "failed to allocate rx frag\n");
+			goto out;
+		}
+
+		desc_cb->reuse_flag = 1;
+		memcpy(frag, desc_cb->buf + frag_offset, frag_size);
+		skb_add_rx_frag(skb, i, virt_to_page(frag),
+				offset_in_page(frag), frag_size, frag_size);
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.frag_alloc++;
+		u64_stats_update_end(&ring->syncp);
+		return;
 	}
 
 out:
@@ -4620,6 +4642,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 		ring = &priv->ring[q->tqp_index + queue_num];
 		desc_num = priv->ae_handle->kinfo.num_rx_desc;
 		ring->queue_index = q->tqp_index;
+		ring->rx_copybreak = priv->rx_copybreak;
 	}
 
 	hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 22ae291471aa..15af3d93857b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -427,6 +427,8 @@ struct ring_stats {
 			u64 csum_complete;
 			u64 rx_multicast;
 			u64 non_reuse_pg;
+			u64 frag_alloc_err;
+			u64 frag_alloc;
 		};
 		__le16 csum;
 	};
@@ -478,6 +480,7 @@ struct hns3_enet_ring {
 		/* for Rx ring */
 		struct {
 			u32 pull_len;   /* memcpy len for current rx packet */
+			u32 rx_copybreak;
 			u32 frag_num;
 			/* first buffer address for current packet */
 			unsigned char *va;
@@ -569,6 +572,7 @@ struct hns3_nic_priv {
 	struct hns3_enet_coalesce tx_coal;
 	struct hns3_enet_coalesce rx_coal;
 	u32 tx_copybreak;
+	u32 rx_copybreak;
 };
 
 union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index d7852716aaad..82061ab6930f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -71,6 +71,8 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 	HNS3_TQP_STAT("csum_complete", csum_complete),
 	HNS3_TQP_STAT("multicast", rx_multicast),
 	HNS3_TQP_STAT("non_reuse_pg", non_reuse_pg),
+	HNS3_TQP_STAT("frag_alloc_err", frag_alloc_err),
+	HNS3_TQP_STAT("frag_alloc", frag_alloc),
 };
 
 #define HNS3_PRIV_FLAGS_LEN ARRAY_SIZE(hns3_priv_flags)
@@ -1610,6 +1612,9 @@ static int hns3_get_tunable(struct net_device *netdev,
 		/* all the tx rings have the same tx_copybreak */
 		*(u32 *)data = priv->tx_copybreak;
 		break;
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = priv->rx_copybreak;
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -1634,6 +1639,13 @@ static int hns3_set_tunable(struct net_device *netdev,
 			priv->ring[i].tx_copybreak = priv->tx_copybreak;
 
 		break;
+	case ETHTOOL_RX_COPYBREAK:
+		priv->rx_copybreak = *(u32 *)data;
+
+		for (i = h->kinfo.num_tqps; i < h->kinfo.num_tqps * 2; i++)
+			priv->ring[i].rx_copybreak = priv->rx_copybreak;
+
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 6/7] net: hns3: optimize the rx page reuse handling process
  2021-06-16  6:36 ` [PATCH net-next 6/7] net: hns3: optimize the rx page reuse handling process Guangbin Huang
@ 2021-06-16  8:47   ` Yunsheng Lin
  0 siblings, 0 replies; 9+ messages in thread
From: Yunsheng Lin @ 2021-06-16  8:47 UTC (permalink / raw)
  To: Guangbin Huang, davem, kuba; +Cc: netdev, linux-kernel, salil.mehta, lipeng321

On 2021/6/16 14:36, Guangbin Huang wrote:
> From: Yunsheng Lin <linyunsheng@huawei.com>
> 
> Current rx page offset only reset to zero when all the below
> conditions are satisfied:
> 1. rx page is only owned by driver.
> 2. rx page is reusable.
> 3. the page offset that is above to be given to the stack has
> reached the end of the page.
> 
> If the page offset is over the hns3_buf_size(), it means the
> buffer below the offset of the page is usable when the above
> condition 1 & 2 are satisfied, so page offset can be reset to
> zero instead of increasing the offset. We may be able to always
> reuse the first 4K buffer of a 64K page, which means we can
> limit the hot buffer size as much as possible.
> 
> The above optimization is a side effect when refacting the
> rx page reuse handling in order to support the rx copybreak.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
> ---
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 44 ++++++++++++-------------
>  1 file changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> index f60a344a6a9f..98e8a548edb8 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> @@ -3525,7 +3525,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
>  
>  static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
>  {
> -	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
> +	return page_count(cb->priv) == cb->pagecnt_bias;
>  }
>  
>  static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
> @@ -3533,40 +3533,40 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
>  				struct hns3_desc_cb *desc_cb)
>  {
>  	struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
> +	u32 frag_offset = desc_cb->page_offset + pull_len;
>  	int size = le16_to_cpu(desc->rx.size);
>  	u32 truesize = hns3_buf_size(ring);
> +	u32 frag_size = size - pull_len;
>  
> -	desc_cb->pagecnt_bias--;
> -	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
> -			size - pull_len, truesize);
> +	/* Avoid re-using remote or pfmem page */
> +	if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
> +		goto out;
>  
> -	/* Avoid re-using remote and pfmemalloc pages, or the stack is still
> -	 * using the page when page_offset rollback to zero, flag default
> -	 * unreuse
> +	/* Stack is not using and current page_offset is non-zero, we can
> +	 * reuse from the zero offset.
>  	 */
> -	if (!dev_page_is_reusable(desc_cb->priv) ||
> -	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
> -		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
> -		return;
> -	}
> -
> -	/* Move offset up to the next cache line */
> -	desc_cb->page_offset += truesize;
> -
> -	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
> +	if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
> +		desc_cb->page_offset = 0;
>  		desc_cb->reuse_flag = 1;
> -	} else if (hns3_can_reuse_page(desc_cb)) {
> +	} else if (desc_cb->page_offset + truesize * 2 <=
> +		   hns3_page_size(ring)) {

The above assumption is wrong, we need to check the if the page
is only owned by driver at the begin and at the end of a page
to make sure there is no reuse conflict beteween driver and stack
when desc_cb->page_offset is rollback to zero or incremented.

The fix for above problem is pending internally, which was supposed to
merged with this patch when upstreaming.

It seems davem has merged this patch, will send out the fix later, sorry
for the inconvenience.


> +		desc_cb->page_offset += truesize;
>  		desc_cb->reuse_flag = 1;
> -		desc_cb->page_offset = 0;
> -	} else if (desc_cb->pagecnt_bias) {
> -		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
> -		return;
>  	}
>  
> +out:
> +	desc_cb->pagecnt_bias--;
> +
>  	if (unlikely(!desc_cb->pagecnt_bias)) {
>  		page_ref_add(desc_cb->priv, USHRT_MAX);
>  		desc_cb->pagecnt_bias = USHRT_MAX;
>  	}
> +
> +	skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
> +			frag_size, truesize);
> +
> +	if (unlikely(!desc_cb->reuse_flag))
> +		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
>  }
>  
>  static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-06-16  8:47 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-16  6:36 [PATCH net-next 0/7] net: hns3: updates for -next Guangbin Huang
2021-06-16  6:36 ` [PATCH net-next 1/7] net: hns3: minor refactor related to desc_cb handling Guangbin Huang
2021-06-16  6:36 ` [PATCH net-next 2/7] net: hns3: refactor for hns3_fill_desc() function Guangbin Huang
2021-06-16  6:36 ` [PATCH net-next 3/7] net: hns3: use tx bounce buffer for small packets Guangbin Huang
2021-06-16  6:36 ` [PATCH net-next 4/7] net: hns3: add support to query tx spare buffer size for pf Guangbin Huang
2021-06-16  6:36 ` [PATCH net-next 5/7] net: hns3: support dma_map_sg() for multi frags skb Guangbin Huang
2021-06-16  6:36 ` [PATCH net-next 6/7] net: hns3: optimize the rx page reuse handling process Guangbin Huang
2021-06-16  8:47   ` Yunsheng Lin
2021-06-16  6:36 ` [PATCH net-next 7/7] net: hns3: use bounce buffer when rx page can not be reused Guangbin Huang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.