linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] net: hns3: updates for -next
@ 2020-09-14 12:06 Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 1/6] net: hns3: batch the page reference count updates Huazhong Tan
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Huazhong Tan

There are some optimizations related to IO path.

Yunsheng Lin (6):
  net: hns3: batch the page reference count updates
  net: hns3: batch tx doorbell operation
  net: hns3: optimize the tx clean process
  net: hns3: optimize the rx clean process
  net: hns3: use writel() to optimize the barrier operation
  net: hns3: use napi_consume_skb() when cleaning tx desc

 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 225 ++++++++++++---------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  20 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |   8 +-
 3 files changed, 141 insertions(+), 112 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH net-next 1/6] net: hns3: batch the page reference count updates
  2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
@ 2020-09-14 12:06 ` Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 2/6] net: hns3: batch tx doorbell operation Huazhong Tan
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Yunsheng Lin, Huazhong Tan

From: Yunsheng Lin <linyunsheng@huawei.com>

Batch the page reference count updates instead of doing them
one at a time. By doing this we can improve the overall receive
performance by avoid some atomic increment operations when the
rx page is reused.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 32 ++++++++++++++++++-------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h |  1 +
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 93825a4..3762142 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2302,6 +2302,8 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 	cb->buf  = page_address(p);
 	cb->length = hns3_page_size(ring);
 	cb->type = DESC_TYPE_PAGE;
+	page_ref_add(p, USHRT_MAX - 1);
+	cb->pagecnt_bias = USHRT_MAX;
 
 	return 0;
 }
@@ -2311,8 +2313,8 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
 {
 	if (cb->type == DESC_TYPE_SKB)
 		dev_kfree_skb_any((struct sk_buff *)cb->priv);
-	else if (!HNAE3_IS_TX_RING(ring))
-		put_page((struct page *)cb->priv);
+	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
+		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
 	memset(cb, 0, sizeof(*cb));
 }
 
@@ -2610,6 +2612,11 @@ static bool hns3_page_is_reusable(struct page *page)
 		!page_is_pfmemalloc(page);
 }
 
+static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
+{
+	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
+}
+
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 				struct hns3_enet_ring *ring, int pull_len,
 				struct hns3_desc_cb *desc_cb)
@@ -2618,6 +2625,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	int size = le16_to_cpu(desc->rx.size);
 	u32 truesize = hns3_buf_size(ring);
 
+	desc_cb->pagecnt_bias--;
 	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
 			size - pull_len, truesize);
 
@@ -2625,20 +2633,27 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	 * when page_offset rollback to zero, flag default unreuse
 	 */
 	if (unlikely(!hns3_page_is_reusable(desc_cb->priv)) ||
-	    (!desc_cb->page_offset && page_count(desc_cb->priv) > 1))
+	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
 		return;
+	}
 
 	/* Move offset up to the next cache line */
 	desc_cb->page_offset += truesize;
 
 	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
 		desc_cb->reuse_flag = 1;
-		/* Bump ref count on page before it is given */
-		get_page(desc_cb->priv);
-	} else if (page_count(desc_cb->priv) == 1) {
+	} else if (hns3_can_reuse_page(desc_cb)) {
 		desc_cb->reuse_flag = 1;
 		desc_cb->page_offset = 0;
-		get_page(desc_cb->priv);
+	} else if (desc_cb->pagecnt_bias) {
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
+		return;
+	}
+
+	if (unlikely(!desc_cb->pagecnt_bias)) {
+		page_ref_add(desc_cb->priv, USHRT_MAX);
+		desc_cb->pagecnt_bias = USHRT_MAX;
 	}
 }
 
@@ -2846,7 +2861,8 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 		if (likely(hns3_page_is_reusable(desc_cb->priv)))
 			desc_cb->reuse_flag = 1;
 		else /* This page cannot be reused so discard it */
-			put_page(desc_cb->priv);
+			__page_frag_cache_drain(desc_cb->priv,
+						desc_cb->pagecnt_bias);
 
 		ring_ptr_move_fw(ring, next_to_clean);
 		return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 98ca6ea..8f784094 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -287,6 +287,7 @@ struct hns3_desc_cb {
 
 	/* desc type, used by the ring user to mark the type of the priv data */
 	u16 type;
+	u16 pagecnt_bias;
 };
 
 enum hns3_pkt_l3type {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 2/6] net: hns3: batch tx doorbell operation
  2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 1/6] net: hns3: batch the page reference count updates Huazhong Tan
@ 2020-09-14 12:06 ` Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 3/6] net: hns3: optimize the tx clean process Huazhong Tan
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Yunsheng Lin, Huazhong Tan

From: Yunsheng Lin <linyunsheng@huawei.com>

Use netdev_xmit_more() to defer the tx doorbell operation when
the skb is passed to the driver continuously. By doing this we
can improve the overall xmit performance by avoid some doorbell
operations.

Also, the tx_err_cnt stat is not used, so rename it to tx_more
stat.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 47 +++++++++++++++++-----
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  2 +-
 3 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 3762142..6a57c0d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1383,6 +1383,27 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 	return bd_num;
 }
 
+static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
+			     bool doorbell)
+{
+	ring->pending_buf += num;
+
+	if (!doorbell) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_more++;
+		u64_stats_update_end(&ring->syncp);
+		return;
+	}
+
+	if (!ring->pending_buf)
+		return;
+
+	wmb(); /* Commit all data before submit */
+
+	hnae3_queue_xmit(ring->tqp, ring->pending_buf);
+	ring->pending_buf = 0;
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -1391,11 +1412,14 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int pre_ntu, next_to_use_head;
 	struct sk_buff *frag_skb;
 	int bd_num = 0;
+	bool doorbell;
 	int ret;
 
 	/* Hardware can only handle short frames above 32 bytes */
-	if (skb_put_padto(skb, HNS3_MIN_TX_LEN))
+	if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) {
+		hns3_tx_doorbell(ring, 0, !netdev_xmit_more());
 		return NETDEV_TX_OK;
+	}
 
 	/* Prefetch the data used later */
 	prefetch(skb->data);
@@ -1406,6 +1430,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_busy++;
 			u64_stats_update_end(&ring->syncp);
+			hns3_tx_doorbell(ring, 0, true);
 			return NETDEV_TX_BUSY;
 		} else if (ret == -ENOMEM) {
 			u64_stats_update_begin(&ring->syncp);
@@ -1446,11 +1471,9 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	/* Complete translate all packets */
 	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
-	netdev_tx_sent_queue(dev_queue, skb->len);
-
-	wmb(); /* Commit all data before submit */
-
-	hnae3_queue_xmit(ring->tqp, bd_num);
+	doorbell = __netdev_tx_sent_queue(dev_queue, skb->len,
+					  netdev_xmit_more());
+	hns3_tx_doorbell(ring, bd_num, doorbell);
 
 	return NETDEV_TX_OK;
 
@@ -1459,6 +1482,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 out_err_tx_ok:
 	dev_kfree_skb_any(skb);
+	hns3_tx_doorbell(ring, 0, !netdev_xmit_more());
 	return NETDEV_TX_OK;
 }
 
@@ -1839,13 +1863,14 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 		    tx_ring->next_to_clean, napi->state);
 
 	netdev_info(ndev,
-		    "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu\n",
+		    "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu, tx_pending: %d\n",
 		    tx_ring->stats.tx_pkts, tx_ring->stats.tx_bytes,
-		    tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt);
+		    tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt,
+		    tx_ring->pending_buf);
 
 	netdev_info(ndev,
-		    "seg_pkt_cnt: %llu, tx_err_cnt: %llu, restart_queue: %llu, tx_busy: %llu\n",
-		    tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_err_cnt,
+		    "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n",
+		    tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_more,
 		    tx_ring->stats.restart_queue, tx_ring->stats.tx_busy);
 
 	/* When mac received many pause frames continuous, it's unable to send
@@ -4181,6 +4206,8 @@ static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
 		hns3_free_buffer_detach(ring, ring->next_to_clean);
 		ring_ptr_move_fw(ring, next_to_clean);
 	}
+
+	ring->pending_buf = 0;
 }
 
 static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 8f784094..f40738c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -351,7 +351,7 @@ struct ring_stats {
 		struct {
 			u64 tx_pkts;
 			u64 tx_bytes;
-			u64 tx_err_cnt;
+			u64 tx_more;
 			u64 restart_queue;
 			u64 tx_busy;
 			u64 tx_copy;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 2622e04..97ad68b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -32,7 +32,7 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", tx_pkts),
 	HNS3_TQP_STAT("bytes", tx_bytes),
-	HNS3_TQP_STAT("errors", tx_err_cnt),
+	HNS3_TQP_STAT("more", tx_more),
 	HNS3_TQP_STAT("wake", restart_queue),
 	HNS3_TQP_STAT("busy", tx_busy),
 	HNS3_TQP_STAT("copy", tx_copy),
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 3/6] net: hns3: optimize the tx clean process
  2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 1/6] net: hns3: batch the page reference count updates Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 2/6] net: hns3: batch tx doorbell operation Huazhong Tan
@ 2020-09-14 12:06 ` Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 4/6] net: hns3: optimize the rx " Huazhong Tan
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Yunsheng Lin, Huazhong Tan

From: Yunsheng Lin <linyunsheng@huawei.com>

Currently HNS3_RING_TX_RING_HEAD_REG register is read to determine
how many tx desc can be cleaned. To avoid the register read operation
in the critical data path, use the valid bit in the tx desc to determine
if a specific tx desc can be cleaned.

The hns3 driver sets valid bit in the tx desc before ringing a doorbell
to the hw, and hw will only clear the valid bit of the tx desc after
corresponding packet is sent out to the wire. And because next_to_use
for tx ring is a changing variable when the driver is filling the tx
desc, so reuse the pull_len for rx ring to record the tx desc that has
notified to the hw, so that hns3_nic_reclaim_desc() can decide how many
tx desc's valid bit need checking when reclaiming tx desc.

And io_err_cnt stat is also removed for it is not used anymore.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 64 ++++++++++------------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    | 12 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  2 -
 3 files changed, 33 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 6a57c0d..2db6c03 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1402,6 +1402,7 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
 
 	hnae3_queue_xmit(ring->tqp, ring->pending_buf);
 	ring->pending_buf = 0;
+	WRITE_ONCE(ring->last_to_use, ring->next_to_use);
 }
 
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
@@ -1863,10 +1864,9 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 		    tx_ring->next_to_clean, napi->state);
 
 	netdev_info(ndev,
-		    "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu, tx_pending: %d\n",
+		    "tx_pkts: %llu, tx_bytes: %llu, sw_err_cnt: %llu, tx_pending: %d\n",
 		    tx_ring->stats.tx_pkts, tx_ring->stats.tx_bytes,
-		    tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt,
-		    tx_ring->pending_buf);
+		    tx_ring->stats.sw_err_cnt, tx_ring->pending_buf);
 
 	netdev_info(ndev,
 		    "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n",
@@ -2491,13 +2491,26 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 			DMA_FROM_DEVICE);
 }
 
-static void hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int head,
+static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 				  int *bytes, int *pkts)
 {
+	/* pair with ring->last_to_use update in hns3_tx_doorbell(),
+	 * smp_store_release() is not used in hns3_tx_doorbell() because
+	 * the doorbell operation already have the needed barrier operation.
+	 */
+	int ltu = smp_load_acquire(&ring->last_to_use);
 	int ntc = ring->next_to_clean;
 	struct hns3_desc_cb *desc_cb;
+	bool reclaimed = false;
+	struct hns3_desc *desc;
+
+	while (ltu != ntc) {
+		desc = &ring->desc[ntc];
+
+		if (le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri) &
+				BIT(HNS3_TXD_VLD_B))
+			break;
 
-	while (head != ntc) {
 		desc_cb = &ring->desc_cb[ntc];
 		(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
 		(*bytes) += desc_cb->length;
@@ -2509,23 +2522,17 @@ static void hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int head,
 
 		/* Issue prefetch for next Tx descriptor */
 		prefetch(&ring->desc_cb[ntc]);
+		reclaimed = true;
 	}
 
+	if (unlikely(!reclaimed))
+		return false;
+
 	/* This smp_store_release() pairs with smp_load_acquire() in
 	 * ring_space called by hns3_nic_net_xmit.
 	 */
 	smp_store_release(&ring->next_to_clean, ntc);
-}
-
-static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
-{
-	int u = ring->next_to_use;
-	int c = ring->next_to_clean;
-
-	if (unlikely(h > ring->desc_num))
-		return 0;
-
-	return u > c ? (h > c && h <= u) : (h > c || h <= u);
+	return true;
 }
 
 void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
@@ -2534,28 +2541,12 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct netdev_queue *dev_queue;
 	int bytes, pkts;
-	int head;
-
-	head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG);
-
-	if (is_ring_empty(ring) || head == ring->next_to_clean)
-		return; /* no data to poll */
-
-	rmb(); /* Make sure head is ready before touch any data */
-
-	if (unlikely(!is_valid_clean_head(ring, head))) {
-		hns3_rl_err(netdev, "wrong head (%d, %d-%d)\n", head,
-			    ring->next_to_use, ring->next_to_clean);
-
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.io_err_cnt++;
-		u64_stats_update_end(&ring->syncp);
-		return;
-	}
 
 	bytes = 0;
 	pkts = 0;
-	hns3_nic_reclaim_desc(ring, head, &bytes, &pkts);
+
+	if (unlikely(!hns3_nic_reclaim_desc(ring, &bytes, &pkts)))
+		return;
 
 	ring->tqp_vector->tx_group.total_bytes += bytes;
 	ring->tqp_vector->tx_group.total_packets += pkts;
@@ -3714,6 +3705,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 	ring->desc_num = desc_num;
 	ring->next_to_use = 0;
 	ring->next_to_clean = 0;
+	ring->last_to_use = 0;
 }
 
 static void hns3_queue_to_ring(struct hnae3_queue *tqp,
@@ -3793,6 +3785,7 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
 	ring->desc_cb = NULL;
 	ring->next_to_clean = 0;
 	ring->next_to_use = 0;
+	ring->last_to_use = 0;
 	ring->pending_buf = 0;
 	if (ring->skb) {
 		dev_kfree_skb_any(ring->skb);
@@ -4310,6 +4303,7 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 		hns3_clear_tx_ring(&priv->ring[i]);
 		priv->ring[i].next_to_clean = 0;
 		priv->ring[i].next_to_use = 0;
+		priv->ring[i].last_to_use = 0;
 
 		rx_ring = &priv->ring[i + h->kinfo.num_tqps];
 		hns3_init_ring_hw(rx_ring);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index f40738c..876dc09 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -344,7 +344,6 @@ enum hns3_pkt_ol4type {
 };
 
 struct ring_stats {
-	u64 io_err_cnt;
 	u64 sw_err_cnt;
 	u64 seg_pkt_cnt;
 	union {
@@ -397,8 +396,10 @@ struct hns3_enet_ring {
 	 * next_to_use
 	 */
 	int next_to_clean;
-
-	u32 pull_len; /* head length for current packet */
+	union {
+		int last_to_use;	/* last idx used by xmit */
+		u32 pull_len;		/* memcpy len for current rx packet */
+	};
 	u32 frag_num;
 	void *va; /* first buffer address for current packet */
 
@@ -513,11 +514,6 @@ static inline int ring_space(struct hns3_enet_ring *ring)
 			(begin - end)) - 1;
 }
 
-static inline int is_ring_empty(struct hns3_enet_ring *ring)
-{
-	return ring->next_to_use == ring->next_to_clean;
-}
-
 static inline u32 hns3_read_reg(void __iomem *base, u32 reg)
 {
 	return readl(base + reg);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 97ad68b..f402c39 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -27,7 +27,6 @@ struct hns3_sfp_type {
 
 static const struct hns3_stats hns3_txq_stats[] = {
 	/* Tx per-queue statistics */
-	HNS3_TQP_STAT("io_err_cnt", io_err_cnt),
 	HNS3_TQP_STAT("dropped", sw_err_cnt),
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", tx_pkts),
@@ -46,7 +45,6 @@ static const struct hns3_stats hns3_txq_stats[] = {
 
 static const struct hns3_stats hns3_rxq_stats[] = {
 	/* Rx per-queue statistics */
-	HNS3_TQP_STAT("io_err_cnt", io_err_cnt),
 	HNS3_TQP_STAT("dropped", sw_err_cnt),
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", rx_pkts),
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 4/6] net: hns3: optimize the rx clean process
  2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
                   ` (2 preceding siblings ...)
  2020-09-14 12:06 ` [PATCH net-next 3/6] net: hns3: optimize the tx clean process Huazhong Tan
@ 2020-09-14 12:06 ` Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation Huazhong Tan
  2020-09-14 12:06 ` [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc Huazhong Tan
  5 siblings, 0 replies; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Yunsheng Lin, Huazhong Tan

From: Yunsheng Lin <linyunsheng@huawei.com>

Currently HNS3_RING_RX_RING_FBDNUM_REG register is read to determine
how many rx desc can be cleaned. To avoid the register read operation
in the critical data path, use the valid bit in the rx desc to determine
if a specific rx desc can be cleaned.

The hns3 driver clear valid bit in the rx desc before notifying the
rx desc to the hw, and hw will only set the valid bit of the rx desc
after corresponding buffer is filled with packet data and other field
in the rx desc is set accordingly.

Add hns3_rx_ring_move_fw() function to clear the valid bit in the rx
desc before moving rx ring's next_to_clean forward to avoid double
cleaning a rx desc, also add a dma_rmb() barrier in hns3_handle_rx_bd()
to make sure valid bit is set before reading other field in the rx desc.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 61 +++++++++++++------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 2db6c03..8490754 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2845,6 +2845,16 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
 	}
 }
 
+static void hns3_rx_ring_move_fw(struct hns3_enet_ring *ring)
+{
+	ring->desc[ring->next_to_clean].rx.bd_base_info &=
+		cpu_to_le32(~BIT(HNS3_RXD_VLD_B));
+	ring->next_to_clean += 1;
+
+	if (unlikely(ring->next_to_clean == ring->desc_num))
+		ring->next_to_clean = 0;
+}
+
 static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 			  unsigned char *va)
 {
@@ -2880,7 +2890,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 			__page_frag_cache_drain(desc_cb->priv,
 						desc_cb->pagecnt_bias);
 
-		ring_ptr_move_fw(ring, next_to_clean);
+		hns3_rx_ring_move_fw(ring);
 		return 0;
 	}
 	u64_stats_update_begin(&ring->syncp);
@@ -2891,7 +2901,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 	__skb_put(skb, ring->pull_len);
 	hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len,
 			    desc_cb);
-	ring_ptr_move_fw(ring, next_to_clean);
+	hns3_rx_ring_move_fw(ring);
 
 	return 0;
 }
@@ -2946,7 +2956,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
 
 		hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb);
 		trace_hns3_rx_desc(ring);
-		ring_ptr_move_fw(ring, next_to_clean);
+		hns3_rx_ring_move_fw(ring);
 		ring->pending_buf++;
 	} while (!(bd_base_info & BIT(HNS3_RXD_FE_B)));
 
@@ -3088,32 +3098,32 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
 
 	prefetch(desc);
 
-	length = le16_to_cpu(desc->rx.size);
-	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+	if (!skb) {
+		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+
+		/* Check valid BD */
+		if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B))))
+			return -ENXIO;
 
-	/* Check valid BD */
-	if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B))))
-		return -ENXIO;
+		dma_rmb();
+		length = le16_to_cpu(desc->rx.size);
 
-	if (!skb) {
 		ring->va = desc_cb->buf + desc_cb->page_offset;
 
 		dma_sync_single_for_cpu(ring_to_dev(ring),
 				desc_cb->dma + desc_cb->page_offset,
 				hns3_buf_size(ring),
 				DMA_FROM_DEVICE);
-	}
 
-	/* Prefetch first cache line of first page
-	 * Idea is to cache few bytes of the header of the packet. Our L1 Cache
-	 * line size is 64B so need to prefetch twice to make it 128B. But in
-	 * actual we can have greater size of caches with 128B Level 1 cache
-	 * lines. In such a case, single fetch would suffice to cache in the
-	 * relevant part of the header.
-	 */
-	net_prefetch(ring->va);
+		/* Prefetch first cache line of first page.
+		 * Idea is to cache few bytes of the header of the packet.
+		 * Our L1 Cache line size is 64B so need to prefetch twice to make
+		 * it 128B. But in actual we can have greater size of caches with
+		 * 128B Level 1 cache lines. In such a case, single fetch would
+		 * suffice to cache in the relevant part of the header.
+		 */
+		net_prefetch(ring->va);
 
-	if (!skb) {
 		ret = hns3_alloc_skb(ring, length, ring->va);
 		skb = ring->skb;
 
@@ -3153,19 +3163,11 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
 	int unused_count = hns3_desc_unused(ring);
 	int recv_pkts = 0;
-	int recv_bds = 0;
-	int err, num;
+	int err;
 
-	num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
-	num -= unused_count;
 	unused_count -= ring->pending_buf;
 
-	if (num <= 0)
-		goto out;
-
-	rmb(); /* Make sure num taken effect before the other data is touched */
-
-	while (recv_pkts < budget && recv_bds < num) {
+	while (recv_pkts < budget) {
 		/* Reuse or realloc buffers */
 		if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
 			hns3_nic_alloc_rx_buffers(ring, unused_count);
@@ -3183,7 +3185,6 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 			recv_pkts++;
 		}
 
-		recv_bds += ring->pending_buf;
 		unused_count += ring->pending_buf;
 		ring->skb = NULL;
 		ring->pending_buf = 0;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation
  2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
                   ` (3 preceding siblings ...)
  2020-09-14 12:06 ` [PATCH net-next 4/6] net: hns3: optimize the rx " Huazhong Tan
@ 2020-09-14 12:06 ` Huazhong Tan
  2020-09-14 21:45   ` Jakub Kicinski
  2020-09-14 12:06 ` [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc Huazhong Tan
  5 siblings, 1 reply; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Yunsheng Lin, Huazhong Tan

From: Yunsheng Lin <linyunsheng@huawei.com>

writel() can be used to order I/O vs memory by default when
writing portable drivers. Use writel() to replace wmb() +
writel_relaxed(), and writel() is dma_wmb() + writel_relaxed()
for ARM64, so there is an optimization here because dma_wmb()
is a lighter barrier than wmb().

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 +++-----
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 3 ---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 8490754..4a49a76 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1398,9 +1398,8 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
 	if (!ring->pending_buf)
 		return;
 
-	wmb(); /* Commit all data before submit */
-
-	hnae3_queue_xmit(ring->tqp, ring->pending_buf);
+	writel(ring->pending_buf,
+	       ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 	ring->pending_buf = 0;
 	WRITE_ONCE(ring->last_to_use, ring->next_to_use);
 }
@@ -2618,8 +2617,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 		ring_ptr_move_fw(ring, next_to_use);
 	}
 
-	wmb(); /* Make all data has been write before submit */
-	writel_relaxed(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
+	writel(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
 }
 
 static bool hns3_page_is_reusable(struct page *page)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 876dc09..20e62ce 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -539,9 +539,6 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
 #define hns3_write_dev(a, reg, value) \
 	hns3_write_reg((a)->io_base, (reg), (value))
 
-#define hnae3_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
-		(tqp)->io_base + HNS3_RING_TX_RING_TAIL_REG)
-
 #define ring_to_dev(ring) ((ring)->dev)
 
 #define ring_to_netdev(ring)	((ring)->tqp_vector->napi.dev)
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc
  2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
                   ` (4 preceding siblings ...)
  2020-09-14 12:06 ` [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation Huazhong Tan
@ 2020-09-14 12:06 ` Huazhong Tan
  2020-09-15  5:09   ` Saeed Mahameed
  5 siblings, 1 reply; 14+ messages in thread
From: Huazhong Tan @ 2020-09-14 12:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm, kuba,
	Yunsheng Lin, Huazhong Tan

From: Yunsheng Lin <linyunsheng@huawei.com>

Use napi_consume_skb() to batch consuming skb when cleaning
tx desc in NAPI polling.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 27 +++++++++++-----------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  4 ++--
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 4a49a76..feeaf75 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2333,10 +2333,10 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 }
 
 static void hns3_free_buffer(struct hns3_enet_ring *ring,
-			     struct hns3_desc_cb *cb)
+			     struct hns3_desc_cb *cb, int budget)
 {
 	if (cb->type == DESC_TYPE_SKB)
-		dev_kfree_skb_any((struct sk_buff *)cb->priv);
+		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
 	memset(cb, 0, sizeof(*cb));
@@ -2370,7 +2370,8 @@ static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
 	ring->desc[i].addr = 0;
 }
 
-static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i)
+static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i,
+				    int budget)
 {
 	struct hns3_desc_cb *cb = &ring->desc_cb[i];
 
@@ -2378,7 +2379,7 @@ static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i)
 		return;
 
 	hns3_buffer_detach(ring, i);
-	hns3_free_buffer(ring, cb);
+	hns3_free_buffer(ring, cb, budget);
 }
 
 static void hns3_free_buffers(struct hns3_enet_ring *ring)
@@ -2386,7 +2387,7 @@ static void hns3_free_buffers(struct hns3_enet_ring *ring)
 	int i;
 
 	for (i = 0; i < ring->desc_num; i++)
-		hns3_free_buffer_detach(ring, i);
+		hns3_free_buffer_detach(ring, i, 0);
 }
 
 /* free desc along with its attached buffer */
@@ -2431,7 +2432,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
 	return 0;
 
 out_with_buf:
-	hns3_free_buffer(ring, cb);
+	hns3_free_buffer(ring, cb, 0);
 out:
 	return ret;
 }
@@ -2463,7 +2464,7 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring)
 
 out_buffer_fail:
 	for (j = i - 1; j >= 0; j--)
-		hns3_free_buffer_detach(ring, j);
+		hns3_free_buffer_detach(ring, j, 0);
 	return ret;
 }
 
@@ -2491,7 +2492,7 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 }
 
 static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
-				  int *bytes, int *pkts)
+				  int *bytes, int *pkts, int budget)
 {
 	/* pair with ring->last_to_use update in hns3_tx_doorbell(),
 	 * smp_store_release() is not used in hns3_tx_doorbell() because
@@ -2514,7 +2515,7 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 		(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
 		(*bytes) += desc_cb->length;
 		/* desc_cb will be cleaned, after hnae3_free_buffer_detach */
-		hns3_free_buffer_detach(ring, ntc);
+		hns3_free_buffer_detach(ring, ntc, budget);
 
 		if (++ntc == ring->desc_num)
 			ntc = 0;
@@ -2534,7 +2535,7 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 	return true;
 }
 
-void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
+void hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
 {
 	struct net_device *netdev = ring_to_netdev(ring);
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -2544,7 +2545,7 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 	bytes = 0;
 	pkts = 0;
 
-	if (unlikely(!hns3_nic_reclaim_desc(ring, &bytes, &pkts)))
+	if (unlikely(!hns3_nic_reclaim_desc(ring, &bytes, &pkts, budget)))
 		return;
 
 	ring->tqp_vector->tx_group.total_bytes += bytes;
@@ -3351,7 +3352,7 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	hns3_for_each_ring(ring, tqp_vector->tx_group)
-		hns3_clean_tx_ring(ring);
+		hns3_clean_tx_ring(ring, budget);
 
 	/* make sure rx ring budget not smaller than 1 */
 	if (tqp_vector->num_tqps > 1)
@@ -4195,7 +4196,7 @@ static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
 {
 	while (ring->next_to_clean != ring->next_to_use) {
 		ring->desc[ring->next_to_clean].tx.bdtp_fe_sc_vld_ra_ri = 0;
-		hns3_free_buffer_detach(ring, ring->next_to_clean);
+		hns3_free_buffer_detach(ring, ring->next_to_clean, 0);
 		ring_ptr_move_fw(ring, next_to_clean);
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 20e62ce..8a6c8ba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -576,7 +576,7 @@ void hns3_ethtool_set_ops(struct net_device *netdev);
 int hns3_set_channels(struct net_device *netdev,
 		      struct ethtool_channels *ch);
 
-void hns3_clean_tx_ring(struct hns3_enet_ring *ring);
+void hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
 int hns3_init_all_ring(struct hns3_nic_priv *priv);
 int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
 int hns3_nic_reset_all_ring(struct hnae3_handle *h);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index f402c39..afa1656 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -223,14 +223,14 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
 }
 
 static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid,
-				  u32 end_ringid, u32 budget)
+				  u32 end_ringid, int budget)
 {
 	u32 i;
 
 	for (i = start_ringid; i <= end_ringid; i++) {
 		struct hns3_enet_ring *ring = &priv->ring[i];
 
-		hns3_clean_tx_ring(ring);
+		hns3_clean_tx_ring(ring, budget);
 	}
 }
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation
  2020-09-14 12:06 ` [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation Huazhong Tan
@ 2020-09-14 21:45   ` Jakub Kicinski
  2020-09-15  1:41     ` Yunsheng Lin
  0 siblings, 1 reply; 14+ messages in thread
From: Jakub Kicinski @ 2020-09-14 21:45 UTC (permalink / raw)
  To: Huazhong Tan
  Cc: davem, netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
	Yunsheng Lin

On Mon, 14 Sep 2020 20:06:56 +0800 Huazhong Tan wrote:
> From: Yunsheng Lin <linyunsheng@huawei.com>
> 
> writel() can be used to order I/O vs memory by default when
> writing portable drivers. Use writel() to replace wmb() +
> writel_relaxed(), and writel() is dma_wmb() + writel_relaxed()
> for ARM64, so there is an optimization here because dma_wmb()
> is a lighter barrier than wmb().

Cool, although lots of drivers will need a change like this now. 

And looks like memory-barriers.txt is slightly, eh, not coherent there,
between the documentation of writeX() and dma_wmb() :S

	3. A writeX() by a CPU thread to the peripheral will first wait for the
	   completion of all prior writes to memory either issued by, or
	   propagated to, the same thread. This ensures that writes by the CPU
	   to an outbound DMA buffer allocated by dma_alloc_coherent() will be
	   visible to a DMA engine when the CPU writes to its MMIO control
	   register to trigger the transfer.



 (*) dma_wmb();
 (*) dma_rmb();

     These are for use with consistent memory to guarantee the ordering
     of writes or reads of shared memory accessible to both the CPU and a
     DMA capable device.

     For example, consider a device driver that shares memory with a device
     and uses a descriptor status value to indicate if the descriptor belongs
     to the device or the CPU, and a doorbell to notify it when new
     descriptors are available:

	if (desc->status != DEVICE_OWN) {
		/* do not read data until we own descriptor */
		dma_rmb();

		/* read/modify data */
		read_data = desc->data;
		desc->data = write_data;

		/* flush modifications before status update */
		dma_wmb();

		/* assign ownership */
		desc->status = DEVICE_OWN;

		/* notify device of new descriptors */
		writel(DESC_NOTIFY, doorbell);
	}

     The dma_rmb() allows us guarantee the device has released ownership
     before we read the data from the descriptor, and the dma_wmb() allows
     us to guarantee the data is written to the descriptor before the device
     can see it now has ownership.  Note that, when using writel(), a prior
     wmb() is not needed to guarantee that the cache coherent memory writes
     have completed before writing to the MMIO region.  The cheaper
     writel_relaxed() does not provide this guarantee and must not be used
     here.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation
  2020-09-14 21:45   ` Jakub Kicinski
@ 2020-09-15  1:41     ` Yunsheng Lin
  0 siblings, 0 replies; 14+ messages in thread
From: Yunsheng Lin @ 2020-09-15  1:41 UTC (permalink / raw)
  To: Jakub Kicinski, Huazhong Tan
  Cc: davem, netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm

On 2020/9/15 5:45, Jakub Kicinski wrote:
> On Mon, 14 Sep 2020 20:06:56 +0800 Huazhong Tan wrote:
>> From: Yunsheng Lin <linyunsheng@huawei.com>
>>
>> writel() can be used to order I/O vs memory by default when
>> writing portable drivers. Use writel() to replace wmb() +
>> writel_relaxed(), and writel() is dma_wmb() + writel_relaxed()
>> for ARM64, so there is an optimization here because dma_wmb()
>> is a lighter barrier than wmb().
> 
> Cool, although lots of drivers will need a change like this now. 
> 
> And looks like memory-barriers.txt is slightly, eh, not coherent there,
> between the documentation of writeX() and dma_wmb() :S
> 
> 	3. A writeX() by a CPU thread to the peripheral will first wait for the
> 	   completion of all prior writes to memory either issued by, or

"wait for the completion of all prior writes to memory" seems to match the semantics
of writel() here?

> 	   propagated to, the same thread. This ensures that writes by the CPU
> 	   to an outbound DMA buffer allocated by dma_alloc_coherent() will be

"outbound DMA buffer" mapped by the streaming API can also be ordered by the
writel(), Is that what you meant by "not coherent"?


> 	   visible to a DMA engine when the CPU writes to its MMIO control
> 	   register to trigger the transfer.
> 
> 
> 
>  (*) dma_wmb();
>  (*) dma_rmb();
> 
>      These are for use with consistent memory to guarantee the ordering
>      of writes or reads of shared memory accessible to both the CPU and a
>      DMA capable device.
> 
>      For example, consider a device driver that shares memory with a device
>      and uses a descriptor status value to indicate if the descriptor belongs
>      to the device or the CPU, and a doorbell to notify it when new
>      descriptors are available:
> 
> 	if (desc->status != DEVICE_OWN) {
> 		/* do not read data until we own descriptor */
> 		dma_rmb();
> 
> 		/* read/modify data */
> 		read_data = desc->data;
> 		desc->data = write_data;
> 
> 		/* flush modifications before status update */
> 		dma_wmb();
> 
> 		/* assign ownership */
> 		desc->status = DEVICE_OWN;
> 
> 		/* notify device of new descriptors */
> 		writel(DESC_NOTIFY, doorbell);
> 	}
> 
>      The dma_rmb() allows us guarantee the device has released ownership
>      before we read the data from the descriptor, and the dma_wmb() allows
>      us to guarantee the data is written to the descriptor before the device
>      can see it now has ownership.  Note that, when using writel(), a prior
>      wmb() is not needed to guarantee that the cache coherent memory writes
>      have completed before writing to the MMIO region.  The cheaper
>      writel_relaxed() does not provide this guarantee and must not be used
>      here.

I am not sure writel() has any implication here. My interpretation to the above
doc is that dma_wmb() is more appropriate when only coherent/consistent memory
need to be ordered.

If writel() is used, then dma_wmb() or wmb() is unnecessary, see:

commit: 5846581e3563 ("locking/memory-barriers.txt: Fix broken DMA vs. MMIO ordering example")


> .
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc
  2020-09-14 12:06 ` [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc Huazhong Tan
@ 2020-09-15  5:09   ` Saeed Mahameed
  2020-09-15  7:04     ` Yunsheng Lin
  0 siblings, 1 reply; 14+ messages in thread
From: Saeed Mahameed @ 2020-09-15  5:09 UTC (permalink / raw)
  To: tanhuazhong, davem
  Cc: yisen.zhuang, salil.mehta, linuxarm, netdev, linux-kernel,
	linyunsheng, kuba

On Mon, 2020-09-14 at 20:06 +0800, Huazhong Tan wrote:
> From: Yunsheng Lin <linyunsheng@huawei.com>
> 
> Use napi_consume_skb() to batch consuming skb when cleaning
> tx desc in NAPI polling.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
> ---
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 27 +++++++++++-
> ----------
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
>  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  4 ++--
>  3 files changed, 17 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> index 4a49a76..feeaf75 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> @@ -2333,10 +2333,10 @@ static int hns3_alloc_buffer(struct
> hns3_enet_ring *ring,
>  }
>  
>  static void hns3_free_buffer(struct hns3_enet_ring *ring,
> -			     struct hns3_desc_cb *cb)
> +			     struct hns3_desc_cb *cb, int budget)
>  {
>  	if (cb->type == DESC_TYPE_SKB)
> -		dev_kfree_skb_any((struct sk_buff *)cb->priv);
> +		napi_consume_skb(cb->priv, budget);

This code can be reached from hns3_lb_clear_tx_ring() below which is
your loopback test and called with non-zero budget, I am not sure you
are allowed to call napi_consume_skb() with non-zero budget outside
napi context, perhaps the cb->type for loopback test is different in lb
test case ? Idk.. , please double check other code paths.

[...]

>  static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32
> start_ringid,
> -				  u32 end_ringid, u32 budget)
> +				  u32 end_ringid, int budget)
>  {
>  	u32 i;
>  
>  	for (i = start_ringid; i <= end_ringid; i++) {
>  		struct hns3_enet_ring *ring = &priv->ring[i];
>  
> -		hns3_clean_tx_ring(ring);
> +		hns3_clean_tx_ring(ring, budget);
>  	}
>  }
>  

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc
  2020-09-15  5:09   ` Saeed Mahameed
@ 2020-09-15  7:04     ` Yunsheng Lin
  2020-09-16  8:33       ` Saeed Mahameed
  0 siblings, 1 reply; 14+ messages in thread
From: Yunsheng Lin @ 2020-09-15  7:04 UTC (permalink / raw)
  To: Saeed Mahameed, tanhuazhong, davem
  Cc: yisen.zhuang, salil.mehta, linuxarm, netdev, linux-kernel, kuba

On 2020/9/15 13:09, Saeed Mahameed wrote:
> On Mon, 2020-09-14 at 20:06 +0800, Huazhong Tan wrote:
>> From: Yunsheng Lin <linyunsheng@huawei.com>
>>
>> Use napi_consume_skb() to batch consuming skb when cleaning
>> tx desc in NAPI polling.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
>> ---
>>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 27 +++++++++++-
>> ----------
>>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
>>  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  4 ++--
>>  3 files changed, 17 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>> b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>> index 4a49a76..feeaf75 100644
>> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>> @@ -2333,10 +2333,10 @@ static int hns3_alloc_buffer(struct
>> hns3_enet_ring *ring,
>>  }
>>  
>>  static void hns3_free_buffer(struct hns3_enet_ring *ring,
>> -			     struct hns3_desc_cb *cb)
>> +			     struct hns3_desc_cb *cb, int budget)
>>  {
>>  	if (cb->type == DESC_TYPE_SKB)
>> -		dev_kfree_skb_any((struct sk_buff *)cb->priv);
>> +		napi_consume_skb(cb->priv, budget);
> 
> This code can be reached from hns3_lb_clear_tx_ring() below which is
> your loopback test and called with non-zero budget, I am not sure you
> are allowed to call napi_consume_skb() with non-zero budget outside
> napi context, perhaps the cb->type for loopback test is different in lb
> test case ? Idk.. , please double check other code paths.

Yes, loopback test may call napi_consume_skb() with non-zero budget outside
napi context. Thanks for pointing out this case.

How about add the below WARN_ONCE() in napi_consume_skb() to catch this
kind of error?

WARN_ONCE(!in_serving_softirq(), "napi_consume_skb() is called with non-zero budget outside napi context");

> 
> [...]
> 
>>  static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32
>> start_ringid,
>> -				  u32 end_ringid, u32 budget)
>> +				  u32 end_ringid, int budget)
>>  {
>>  	u32 i;
>>  
>>  	for (i = start_ringid; i <= end_ringid; i++) {
>>  		struct hns3_enet_ring *ring = &priv->ring[i];
>>  
>> -		hns3_clean_tx_ring(ring);
>> +		hns3_clean_tx_ring(ring, budget);
>>  	}
>>  }
>>  

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc
  2020-09-15  7:04     ` Yunsheng Lin
@ 2020-09-16  8:33       ` Saeed Mahameed
  2020-09-16  8:38         ` Eric Dumazet
  0 siblings, 1 reply; 14+ messages in thread
From: Saeed Mahameed @ 2020-09-16  8:33 UTC (permalink / raw)
  To: Yunsheng Lin, Saeed Mahameed, tanhuazhong, davem, Eric Dumazet
  Cc: yisen.zhuang, salil.mehta, linuxarm, netdev, linux-kernel, kuba

On Tue, 2020-09-15 at 15:04 +0800, Yunsheng Lin wrote:
> On 2020/9/15 13:09, Saeed Mahameed wrote:
> > On Mon, 2020-09-14 at 20:06 +0800, Huazhong Tan wrote:
> > > From: Yunsheng Lin <linyunsheng@huawei.com>
> > > 
> > > Use napi_consume_skb() to batch consuming skb when cleaning
> > > tx desc in NAPI polling.
> > > 
> > > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> > > Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
> > > ---
> > >  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 27
> > > +++++++++++-
> > > ----------
> > >  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
> > >  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  4 ++--
> > >  3 files changed, 17 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > index 4a49a76..feeaf75 100644
> > > --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > @@ -2333,10 +2333,10 @@ static int hns3_alloc_buffer(struct
> > > hns3_enet_ring *ring,
> > >  }
> > >  
> > >  static void hns3_free_buffer(struct hns3_enet_ring *ring,
> > > -			     struct hns3_desc_cb *cb)
> > > +			     struct hns3_desc_cb *cb, int budget)
> > >  {
> > >  	if (cb->type == DESC_TYPE_SKB)
> > > -		dev_kfree_skb_any((struct sk_buff *)cb->priv);
> > > +		napi_consume_skb(cb->priv, budget);
> > 
> > This code can be reached from hns3_lb_clear_tx_ring() below which
> > is
> > your loopback test and called with non-zero budget, I am not sure
> > you
> > are allowed to call napi_consume_skb() with non-zero budget outside
> > napi context, perhaps the cb->type for loopback test is different
> > in lb
> > test case ? Idk.. , please double check other code paths.
> 
> Yes, loopback test may call napi_consume_skb() with non-zero budget
> outside
> napi context. Thanks for pointing out this case.
> 
> How about add the below WARN_ONCE() in napi_consume_skb() to catch
> this
> kind of error?
> 
> WARN_ONCE(!in_serving_softirq(), "napi_consume_skb() is called with
> non-zero budget outside napi context");
> 

Cc: Eric

I don't know, need to check performance impact.
And in_serving_softirq() doesn't necessarily mean in napi
but looking at _kfree_skb_defer(), i think it shouldn't care if napi or
not as long as it runs in soft irq it will push the skb to that
particular cpu napi_alloc_cache, which should be fine.

Maybe instead of the WARN_ONCE just remove the budget condition and
replace it with

if (!in_serving_softirq())
      dev_consume_skb_any(skb);



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc
  2020-09-16  8:33       ` Saeed Mahameed
@ 2020-09-16  8:38         ` Eric Dumazet
  2020-09-17  6:52           ` Yunsheng Lin
  0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2020-09-16  8:38 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Yunsheng Lin, Saeed Mahameed, tanhuazhong, davem, yisen.zhuang,
	salil.mehta, linuxarm, netdev, linux-kernel, kuba

On Wed, Sep 16, 2020 at 10:33 AM Saeed Mahameed <saeed@kernel.org> wrote:
>
> On Tue, 2020-09-15 at 15:04 +0800, Yunsheng Lin wrote:
> > On 2020/9/15 13:09, Saeed Mahameed wrote:
> > > On Mon, 2020-09-14 at 20:06 +0800, Huazhong Tan wrote:
> > > > From: Yunsheng Lin <linyunsheng@huawei.com>
> > > >
> > > > Use napi_consume_skb() to batch consuming skb when cleaning
> > > > tx desc in NAPI polling.
> > > >
> > > > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> > > > Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
> > > > ---
> > > >  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 27
> > > > +++++++++++-
> > > > ----------
> > > >  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
> > > >  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  4 ++--
> > > >  3 files changed, 17 insertions(+), 16 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > > b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > > index 4a49a76..feeaf75 100644
> > > > --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > > +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> > > > @@ -2333,10 +2333,10 @@ static int hns3_alloc_buffer(struct
> > > > hns3_enet_ring *ring,
> > > >  }
> > > >
> > > >  static void hns3_free_buffer(struct hns3_enet_ring *ring,
> > > > -                      struct hns3_desc_cb *cb)
> > > > +                      struct hns3_desc_cb *cb, int budget)
> > > >  {
> > > >   if (cb->type == DESC_TYPE_SKB)
> > > > -         dev_kfree_skb_any((struct sk_buff *)cb->priv);
> > > > +         napi_consume_skb(cb->priv, budget);
> > >
> > > This code can be reached from hns3_lb_clear_tx_ring() below which
> > > is
> > > your loopback test and called with non-zero budget, I am not sure
> > > you
> > > are allowed to call napi_consume_skb() with non-zero budget outside
> > > napi context, perhaps the cb->type for loopback test is different
> > > in lb
> > > test case ? Idk.. , please double check other code paths.
> >
> > Yes, loopback test may call napi_consume_skb() with non-zero budget
> > outside
> > napi context. Thanks for pointing out this case.
> >
> > How about add the below WARN_ONCE() in napi_consume_skb() to catch
> > this
> > kind of error?
> >
> > WARN_ONCE(!in_serving_softirq(), "napi_consume_skb() is called with
> > non-zero budget outside napi context");
> >
>
> Cc: Eric
>
> I don't know, need to check performance impact.
> And in_serving_softirq() doesn't necessarily mean in napi
> but looking at _kfree_skb_defer(), i think it shouldn't care if napi or
> not as long as it runs in soft irq it will push the skb to that
> particular cpu napi_alloc_cache, which should be fine.
>
> Maybe instead of the WARN_ONCE just remove the budget condition and
> replace it with
>
> if (!in_serving_softirq())
>       dev_consume_skb_any(skb);
>

I think we need to keep costs small.

So lets add a CONFIG_DEBUG_NET option so that developers can add
various DEBUG_NET() clauses.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc
  2020-09-16  8:38         ` Eric Dumazet
@ 2020-09-17  6:52           ` Yunsheng Lin
  0 siblings, 0 replies; 14+ messages in thread
From: Yunsheng Lin @ 2020-09-17  6:52 UTC (permalink / raw)
  To: Eric Dumazet, Saeed Mahameed
  Cc: Saeed Mahameed, tanhuazhong, davem, yisen.zhuang, salil.mehta,
	linuxarm, netdev, linux-kernel, kuba

On 2020/9/16 16:38, Eric Dumazet wrote:
> On Wed, Sep 16, 2020 at 10:33 AM Saeed Mahameed <saeed@kernel.org> wrote:
>>
>> On Tue, 2020-09-15 at 15:04 +0800, Yunsheng Lin wrote:
>>> On 2020/9/15 13:09, Saeed Mahameed wrote:
>>>> On Mon, 2020-09-14 at 20:06 +0800, Huazhong Tan wrote:
>>>>> From: Yunsheng Lin <linyunsheng@huawei.com>
>>>>>
>>>>> Use napi_consume_skb() to batch consuming skb when cleaning
>>>>> tx desc in NAPI polling.
>>>>>
>>>>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>>>>> Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
>>>>> ---
>>>>>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 27
>>>>> +++++++++++-
>>>>> ----------
>>>>>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  2 +-
>>>>>  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  4 ++--
>>>>>  3 files changed, 17 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>>>>> b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>>>>> index 4a49a76..feeaf75 100644
>>>>> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>>>>> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
>>>>> @@ -2333,10 +2333,10 @@ static int hns3_alloc_buffer(struct
>>>>> hns3_enet_ring *ring,
>>>>>  }
>>>>>
>>>>>  static void hns3_free_buffer(struct hns3_enet_ring *ring,
>>>>> -                      struct hns3_desc_cb *cb)
>>>>> +                      struct hns3_desc_cb *cb, int budget)
>>>>>  {
>>>>>   if (cb->type == DESC_TYPE_SKB)
>>>>> -         dev_kfree_skb_any((struct sk_buff *)cb->priv);
>>>>> +         napi_consume_skb(cb->priv, budget);
>>>>
>>>> This code can be reached from hns3_lb_clear_tx_ring() below which
>>>> is
>>>> your loopback test and called with non-zero budget, I am not sure
>>>> you
>>>> are allowed to call napi_consume_skb() with non-zero budget outside
>>>> napi context, perhaps the cb->type for loopback test is different
>>>> in lb
>>>> test case ? Idk.. , please double check other code paths.
>>>
>>> Yes, loopback test may call napi_consume_skb() with non-zero budget
>>> outside
>>> napi context. Thanks for pointing out this case.
>>>
>>> How about add the below WARN_ONCE() in napi_consume_skb() to catch
>>> this
>>> kind of error?
>>>
>>> WARN_ONCE(!in_serving_softirq(), "napi_consume_skb() is called with
>>> non-zero budget outside napi context");
>>>
>>
>> Cc: Eric
>>
>> I don't know, need to check performance impact.
>> And in_serving_softirq() doesn't necessarily mean in napi
>> but looking at _kfree_skb_defer(), i think it shouldn't care if napi or
>> not as long as it runs in soft irq it will push the skb to that
>> particular cpu napi_alloc_cache, which should be fine.

Yes, we only need to ensure _kfree_skb_defer() runs with automic context.

And it seems NAPI polling can be in thread context with BH disabled in below
patch, so in_softirq() checking should be more future-proof?

* in_softirq()   - We have BH disabled, or are processing softirqs

net: add support for threaded NAPI polling
https://www.mail-archive.com/netdev@vger.kernel.org/msg348491.html


>>
>> Maybe instead of the WARN_ONCE just remove the budget condition and
>> replace it with
>>
>> if (!in_serving_softirq())
>>       dev_consume_skb_any(skb);

Yes, that is good idea, _kfree_skb_defer() is only called in softirq or
BH disabled context, dev_consume_skb_any(skb) is called in other context,
so driver author do not need to worry about the calling context of the
napi_consume_skb().

>>
> 
> I think we need to keep costs small.
> 
> So lets add a CONFIG_DEBUG_NET option so that developers can add
> various DEBUG_NET() clauses.

Do you means something like below:

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 157e024..61a6a62 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5104,6 +5104,15 @@ do {								\
 })
 #endif

+#if defined(CONFIG_DEBUG_NET)
+#define DEBUG_NET_WARN(condition, format...)				\
+	do {								\
+		WARN(condition, ##__VA_ARGS__);
+	} while (0)
+#else
+#define DEBUG_NET_WARN(condition, format...)
+#endif
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
diff --git a/net/Kconfig b/net/Kconfig
index 3831206..f59ea4b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -473,3 +473,9 @@ config HAVE_CBPF_JIT
 # Extended BPF JIT (eBPF)
 config HAVE_EBPF_JIT
 	bool
+
+config DEBUG_NET
+	bool
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here to add some extra checks and diagnostics to networking.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bfd7483..10547db 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -904,6 +904,9 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
 		return;
 	}

+	DEBUG_NET_WARN(!in_serving_softirq(),
+		        "napi_consume_skb() is called with non-zero budget outside softirq context");
+
 	if (!skb_unref(skb))
 		return;


> .
> 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-09-17  7:09 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-14 12:06 [PATCH net-next 0/6] net: hns3: updates for -next Huazhong Tan
2020-09-14 12:06 ` [PATCH net-next 1/6] net: hns3: batch the page reference count updates Huazhong Tan
2020-09-14 12:06 ` [PATCH net-next 2/6] net: hns3: batch tx doorbell operation Huazhong Tan
2020-09-14 12:06 ` [PATCH net-next 3/6] net: hns3: optimize the tx clean process Huazhong Tan
2020-09-14 12:06 ` [PATCH net-next 4/6] net: hns3: optimize the rx " Huazhong Tan
2020-09-14 12:06 ` [PATCH net-next 5/6] net: hns3: use writel() to optimize the barrier operation Huazhong Tan
2020-09-14 21:45   ` Jakub Kicinski
2020-09-15  1:41     ` Yunsheng Lin
2020-09-14 12:06 ` [PATCH net-next 6/6] net: hns3: use napi_consume_skb() when cleaning tx desc Huazhong Tan
2020-09-15  5:09   ` Saeed Mahameed
2020-09-15  7:04     ` Yunsheng Lin
2020-09-16  8:33       ` Saeed Mahameed
2020-09-16  8:38         ` Eric Dumazet
2020-09-17  6:52           ` Yunsheng Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).