All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 1/7] gve: Switch to use napi_complete_done
@ 2021-10-07 16:25 Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias Jeroen de Borst
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, Yangchun Fu, Catherine Sullivan, David Awogbemila

From: Yangchun Fu <yangchun@google.com>

Use napi_complete_done to allow for the use of gro_flush_timeout.

Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support")
Signed-off-by: Yangchun Fu <yangchun@google.com>
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
---
 drivers/net/ethernet/google/gve/gve.h      |  5 ++-
 drivers/net/ethernet/google/gve/gve_main.c | 38 +++++++++++++---------
 drivers/net/ethernet/google/gve/gve_rx.c   | 37 +++++++++++----------
 3 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 85bf825606e8..59c525800e5d 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -825,11 +825,10 @@ __be32 gve_tx_load_event_counter(struct gve_priv *priv,
 				 struct gve_tx_ring *tx);
 /* rx handling */
 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
-bool gve_rx_poll(struct gve_notify_block *block, int budget);
+int gve_rx_poll(struct gve_notify_block *block, int budget);
+bool gve_rx_work_pending(struct gve_rx_ring *rx);
 int gve_rx_alloc_rings(struct gve_priv *priv);
 void gve_rx_free_rings_gqi(struct gve_priv *priv);
-bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
-		       netdev_features_t feat);
 /* Reset */
 void gve_schedule_reset(struct gve_priv *priv);
 int gve_reset(struct gve_priv *priv, bool attempt_teardown);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index cd9df68cc01e..388262c61b8d 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -181,34 +181,40 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
 	__be32 __iomem *irq_doorbell;
 	bool reschedule = false;
 	struct gve_priv *priv;
+	int work_done = 0;
 
 	block = container_of(napi, struct gve_notify_block, napi);
 	priv = block->priv;
 
 	if (block->tx)
 		reschedule |= gve_tx_poll(block, budget);
-	if (block->rx)
-		reschedule |= gve_rx_poll(block, budget);
+	if (block->rx) {
+		work_done = gve_rx_poll(block, budget);
+		reschedule |= work_done == budget;
+	}
 
 	if (reschedule)
 		return budget;
 
-	napi_complete(napi);
-	irq_doorbell = gve_irq_doorbell(priv, block);
-	iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
+       /* Complete processing - don't unmask irq if busy polling is enabled */
+	if (likely(napi_complete_done(napi, work_done))) {
+		irq_doorbell = gve_irq_doorbell(priv, block);
+		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
 
-	/* Double check we have no extra work.
-	 * Ensure unmask synchronizes with checking for work.
-	 */
-	mb();
-	if (block->tx)
-		reschedule |= gve_tx_poll(block, -1);
-	if (block->rx)
-		reschedule |= gve_rx_poll(block, -1);
-	if (reschedule && napi_reschedule(napi))
-		iowrite32be(GVE_IRQ_MASK, irq_doorbell);
+		/* Double check we have no extra work.
+		 * Ensure unmask synchronizes with checking for work.
+		 */
+		mb();
 
-	return 0;
+		if (block->tx)
+			reschedule |= gve_tx_poll(block, -1);
+		if (block->rx)
+			reschedule |= gve_rx_work_pending(block->rx);
+
+		if (reschedule && napi_reschedule(napi))
+			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
+	}
+	return work_done;
 }
 
 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index bb8261368250..bb9fc456416b 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -450,7 +450,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 	return true;
 }
 
-static bool gve_rx_work_pending(struct gve_rx_ring *rx)
+bool gve_rx_work_pending(struct gve_rx_ring *rx)
 {
 	struct gve_rx_desc *desc;
 	__be16 flags_seq;
@@ -518,8 +518,8 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 	return true;
 }
 
-bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
-		       netdev_features_t feat)
+int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+		      netdev_features_t feat)
 {
 	struct gve_priv *priv = rx->gve;
 	u32 work_done = 0, packets = 0;
@@ -553,13 +553,15 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 	}
 
 	if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
-		return false;
+		return 0;
 
-	u64_stats_update_begin(&rx->statss);
-	rx->rpackets += packets;
-	rx->rbytes += bytes;
-	u64_stats_update_end(&rx->statss);
-	rx->cnt = cnt;
+	if (work_done) {
+		u64_stats_update_begin(&rx->statss);
+		rx->rpackets += packets;
+		rx->rbytes += bytes;
+		u64_stats_update_end(&rx->statss);
+		rx->cnt = cnt;
+	}
 
 	/* restock ring slots */
 	if (!rx->data.raw_addressing) {
@@ -570,26 +572,26 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 		 * falls below a threshold.
 		 */
 		if (!gve_rx_refill_buffers(priv, rx))
-			return false;
+			return 0;
 
 		/* If we were not able to completely refill buffers, we'll want
 		 * to schedule this queue for work again to refill buffers.
 		 */
 		if (rx->fill_cnt - cnt <= rx->db_threshold) {
 			gve_rx_write_doorbell(priv, rx);
-			return true;
+			return budget;
 		}
 	}
 
 	gve_rx_write_doorbell(priv, rx);
-	return gve_rx_work_pending(rx);
+	return work_done;
 }
 
-bool gve_rx_poll(struct gve_notify_block *block, int budget)
+int gve_rx_poll(struct gve_notify_block *block, int budget)
 {
 	struct gve_rx_ring *rx = block->rx;
 	netdev_features_t feat;
-	bool repoll = false;
+	int work_done = 0;
 
 	feat = block->napi.dev->features;
 
@@ -598,8 +600,7 @@ bool gve_rx_poll(struct gve_notify_block *block, int budget)
 		budget = INT_MAX;
 
 	if (budget > 0)
-		repoll |= gve_clean_rx_done(rx, budget, feat);
-	else
-		repoll |= gve_rx_work_pending(rx);
-	return repoll;
+		work_done = gve_clean_rx_done(rx, budget, feat);
+
+	return work_done;
 }
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
@ 2021-10-07 16:25 ` Jeroen de Borst
  2021-10-08  2:31   ` Jakub Kicinski
  2021-10-07 16:25 ` [PATCH net-next 3/7] gve: Do lazy cleanup in TX path Jeroen de Borst
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, Catherine Sullivan, Yanchun Fu, Nathan Lewis,
	David Awogbemila

From: Catherine Sullivan <csully@google.com>

Add a pagecnt bias field to rx buffer info struct to eliminate
needing to increment the atomic page ref count on every pass in the
rx hotpath.

Also prefetch two packet pages ahead.

Fixes: ede3fcf5ec67f ("gve: Add support for raw addressing to the rx path")
Signed-off-by: Yanchun Fu <yangchun@google.com>
Signed-off-by: Nathan Lewis <npl@google.com>
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
---
 drivers/net/ethernet/google/gve/gve_rx.c | 52 +++++++++++++++++-------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index bb9fc456416b..ecf5a396290b 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -16,19 +16,23 @@ static void gve_rx_free_buffer(struct device *dev,
 	dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
 				      GVE_DATA_SLOT_ADDR_PAGE_MASK);
 
+	page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
 	gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
 }
 
 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
 {
-	if (rx->data.raw_addressing) {
-		u32 slots = rx->mask + 1;
-		int i;
+	u32 slots = rx->mask + 1;
+	int i;
 
+	if (rx->data.raw_addressing) {
 		for (i = 0; i < slots; i++)
 			gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
 					   &rx->data.data_ring[i]);
 	} else {
+		for (i = 0; i < slots; i++)
+			page_ref_sub(rx->data.page_info[i].page,
+				     rx->data.page_info[i].pagecnt_bias - 1);
 		gve_unassign_qpl(priv, rx->data.qpl->id);
 		rx->data.qpl = NULL;
 	}
@@ -69,6 +73,9 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
 	page_info->page_offset = 0;
 	page_info->page_address = page_address(page);
 	*slot_addr = cpu_to_be64(addr);
+	/* The page already has 1 ref */
+	page_ref_add(page, INT_MAX - 1);
+	page_info->pagecnt_bias = INT_MAX;
 }
 
 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
@@ -293,17 +300,18 @@ static bool gve_rx_can_flip_buffers(struct net_device *netdev)
 		? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
 }
 
-static int gve_rx_can_recycle_buffer(struct page *page)
+static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
 {
-	int pagecount = page_count(page);
+	int pagecount = page_count(page_info->page);
 
 	/* This page is not being used by any SKBs - reuse */
-	if (pagecount == 1)
+	if (pagecount == page_info->pagecnt_bias)
 		return 1;
 	/* This page is still being used by an SKB - we can't reuse */
-	else if (pagecount >= 2)
+	else if (pagecount > page_info->pagecnt_bias)
 		return 0;
-	WARN(pagecount < 1, "Pagecount should never be < 1");
+	WARN(pagecount < page_info->pagecnt_bias,
+	     "Pagecount should never be less than the bias.");
 	return -1;
 }
 
@@ -319,11 +327,11 @@ gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
 	if (!skb)
 		return NULL;
 
-	/* Optimistically stop the kernel from freeing the page by increasing
-	 * the page bias. We will check the refcount in refill to determine if
-	 * we need to alloc a new page.
+	/* Optimistically stop the kernel from freeing the page.
+	 * We will check again in refill to determine if we need to alloc a
+	 * new page.
 	 */
-	get_page(page_info->page);
+	gve_dec_pagecnt_bias(page_info);
 
 	return skb;
 }
@@ -346,7 +354,7 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev,
 		/* No point in recycling if we didn't get the skb */
 		if (skb) {
 			/* Make sure that the page isn't freed. */
-			get_page(page_info->page);
+			gve_dec_pagecnt_bias(page_info);
 			gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
 		}
 	} else {
@@ -370,8 +378,18 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 	union gve_rx_data_slot *data_slot;
 	struct sk_buff *skb = NULL;
 	dma_addr_t page_bus;
+	void *va;
 	u16 len;
 
+	/* Prefetch two packet pages ahead, we will need it soon. */
+	page_info = &rx->data.page_info[(idx + 2) & rx->mask];
+	va = page_info->page_address + GVE_RX_PAD +
+		page_info->page_offset;
+
+	prefetch(page_info->page); /* Kernel page struct. */
+	prefetch(va);              /* Packet header. */
+	prefetch(va + 64);         /* Next cacheline too. */
+
 	/* drop this packet */
 	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
 		u64_stats_update_begin(&rx->statss);
@@ -402,7 +420,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 		int recycle = 0;
 
 		if (can_flip) {
-			recycle = gve_rx_can_recycle_buffer(page_info->page);
+			recycle = gve_rx_can_recycle_buffer(page_info);
 			if (recycle < 0) {
 				if (!rx->data.raw_addressing)
 					gve_schedule_reset(priv);
@@ -493,7 +511,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 			 * owns half the page it is impossible to tell which half. Either
 			 * the whole page is free or it needs to be replaced.
 			 */
-			int recycle = gve_rx_can_recycle_buffer(page_info->page);
+			int recycle = gve_rx_can_recycle_buffer(page_info);
 
 			if (recycle < 0) {
 				if (!rx->data.raw_addressing)
@@ -540,6 +558,10 @@ int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 			   "[%d] seqno=%d rx->desc.seqno=%d\n",
 			   rx->q_num, GVE_SEQNO(desc->flags_seq),
 			   rx->desc.seqno);
+
+		/* prefetch two descriptors ahead */
+		prefetch(rx->desc.desc_ring + ((cnt + 2) & rx->mask));
+
 		dropped = !gve_rx(rx, desc, feat, idx);
 		if (!dropped) {
 			bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 3/7] gve: Do lazy cleanup in TX path
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias Jeroen de Borst
@ 2021-10-07 16:25 ` Jeroen de Borst
  2021-10-07 19:57   ` Eric Dumazet
  2021-10-07 16:25 ` [PATCH net-next 4/7] gve: Recover from queue stall due to missed IRQ Jeroen de Borst
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, Tao Liu, Catherine Sullivan

From: Tao Liu <xliutaox@google.com>

When TX queue is full, attemt to process enough TX completions
to avoid stalling the queue.

Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support")
Signed-off-by: Tao Liu <xliutaox@google.com>
Signed-off-by: Catherine Sullivan <csully@google.com>
---
 drivers/net/ethernet/google/gve/gve.h         |  9 +-
 drivers/net/ethernet/google/gve/gve_ethtool.c |  3 +-
 drivers/net/ethernet/google/gve/gve_main.c    |  6 +-
 drivers/net/ethernet/google/gve/gve_tx.c      | 94 +++++++++++--------
 4 files changed, 62 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 59c525800e5d..003b30b91c6d 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -341,8 +341,8 @@ struct gve_tx_ring {
 	union {
 		/* GQI fields */
 		struct {
-			/* NIC tail pointer */
-			__be32 last_nic_done;
+			/* Spinlock for when cleanup in progress */
+			spinlock_t clean_lock;
 		};
 
 		/* DQO fields. */
@@ -821,8 +821,9 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
 bool gve_tx_poll(struct gve_notify_block *block, int budget);
 int gve_tx_alloc_rings(struct gve_priv *priv);
 void gve_tx_free_rings_gqi(struct gve_priv *priv);
-__be32 gve_tx_load_event_counter(struct gve_priv *priv,
-				 struct gve_tx_ring *tx);
+u32 gve_tx_load_event_counter(struct gve_priv *priv,
+			      struct gve_tx_ring *tx);
+bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
 /* rx handling */
 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
 int gve_rx_poll(struct gve_notify_block *block, int budget);
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 716e6240305d..618a3e1d858e 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -330,8 +330,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
 			data[i++] = tmp_tx_bytes;
 			data[i++] = tx->wake_queue;
 			data[i++] = tx->stop_queue;
-			data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
-									  tx));
+			data[i++] = gve_tx_load_event_counter(priv, tx);
 			data[i++] = tx->dma_mapping_error;
 			/* stats from NIC */
 			if (skip_nic_stats) {
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 388262c61b8d..74e35a87ec38 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -201,13 +201,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
 		irq_doorbell = gve_irq_doorbell(priv, block);
 		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
 
-		/* Double check we have no extra work.
-		 * Ensure unmask synchronizes with checking for work.
+		/* Ensure IRQ ACK is visible before we check pending work.
+		 * If queue had issued updates, it would be truly visible.
 		 */
 		mb();
 
 		if (block->tx)
-			reschedule |= gve_tx_poll(block, -1);
+			reschedule |= gve_tx_clean_pending(priv, block->tx);
 		if (block->rx)
 			reschedule |= gve_rx_work_pending(block->rx);
 
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 9922ce46a635..a9cb241fedf4 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -144,7 +144,7 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
 
 	gve_tx_remove_from_block(priv, idx);
 	slots = tx->mask + 1;
-	gve_clean_tx_done(priv, tx, tx->req, false);
+	gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
 	netdev_tx_reset_queue(tx->netdev_txq);
 
 	dma_free_coherent(hdev, sizeof(*tx->q_resources),
@@ -176,6 +176,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 
 	/* Make sure everything is zeroed to start */
 	memset(tx, 0, sizeof(*tx));
+	spin_lock_init(&tx->clean_lock);
 	tx->q_num = idx;
 
 	tx->mask = slots - 1;
@@ -328,10 +329,16 @@ static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
 	return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc);
 }
 
+static_assert(NAPI_POLL_WEIGHT >= MAX_TX_DESC_NEEDED);
+
 /* Stops the queue if the skb cannot be transmitted. */
-static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
+static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
+			     struct sk_buff *skb)
 {
 	int bytes_required = 0;
+	u32 nic_done;
+	u32 to_do;
+	int ret;
 
 	if (!tx->raw_addressing)
 		bytes_required = gve_skb_fifo_bytes_required(tx, skb);
@@ -339,29 +346,28 @@ static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
 	if (likely(gve_can_tx(tx, bytes_required)))
 		return 0;
 
-	/* No space, so stop the queue */
-	tx->stop_queue++;
-	netif_tx_stop_queue(tx->netdev_txq);
-	smp_mb();	/* sync with restarting queue in gve_clean_tx_done() */
-
-	/* Now check for resources again, in case gve_clean_tx_done() freed
-	 * resources after we checked and we stopped the queue after
-	 * gve_clean_tx_done() checked.
-	 *
-	 * gve_maybe_stop_tx()			gve_clean_tx_done()
-	 *   nsegs/can_alloc test failed
-	 *					  gve_tx_free_fifo()
-	 *					  if (tx queue stopped)
-	 *					    netif_tx_queue_wake()
-	 *   netif_tx_stop_queue()
-	 *   Need to check again for space here!
-	 */
-	if (likely(!gve_can_tx(tx, bytes_required)))
-		return -EBUSY;
+	ret = -EBUSY;
+	spin_lock(&tx->clean_lock);
+	nic_done = gve_tx_load_event_counter(priv, tx);
+	to_do = nic_done - tx->done;
 
-	netif_tx_start_queue(tx->netdev_txq);
-	tx->wake_queue++;
-	return 0;
+	/* Only try to clean if there is hope for TX */
+	if (to_do + gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED) {
+		if (to_do > 0) {
+			to_do = min_t(u32, to_do, NAPI_POLL_WEIGHT);
+			gve_clean_tx_done(priv, tx, to_do, false);
+		}
+		if (likely(gve_can_tx(tx, bytes_required)))
+			ret = 0;
+	}
+	if (ret) {
+		/* No space, so stop the queue */
+		tx->stop_queue++;
+		netif_tx_stop_queue(tx->netdev_txq);
+	}
+	spin_unlock(&tx->clean_lock);
+
+	return ret;
 }
 
 static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
@@ -576,7 +582,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
 	WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues,
 	     "skb queue index out of range");
 	tx = &priv->tx[skb_get_queue_mapping(skb)];
-	if (unlikely(gve_maybe_stop_tx(tx, skb))) {
+	if (unlikely(gve_maybe_stop_tx(priv, tx, skb))) {
 		/* We need to ring the txq doorbell -- we have stopped the Tx
 		 * queue for want of resources, but prior calls to gve_tx()
 		 * may have added descriptors without ringing the doorbell.
@@ -672,19 +678,19 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
 	return pkts;
 }
 
-__be32 gve_tx_load_event_counter(struct gve_priv *priv,
-				 struct gve_tx_ring *tx)
+u32 gve_tx_load_event_counter(struct gve_priv *priv,
+			      struct gve_tx_ring *tx)
 {
-	u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
+	u32 counter_index = be32_to_cpu(tx->q_resources->counter_index);
+	__be32 counter = READ_ONCE(priv->counter_array[counter_index]);
 
-	return READ_ONCE(priv->counter_array[counter_index]);
+	return be32_to_cpu(counter);
 }
 
 bool gve_tx_poll(struct gve_notify_block *block, int budget)
 {
 	struct gve_priv *priv = block->priv;
 	struct gve_tx_ring *tx = block->tx;
-	bool repoll = false;
 	u32 nic_done;
 	u32 to_do;
 
@@ -692,17 +698,23 @@ bool gve_tx_poll(struct gve_notify_block *block, int budget)
 	if (budget == 0)
 		budget = INT_MAX;
 
+	/* In TX path, it may try to clean completed pkts in order to xmit,
+	 * to avoid cleaning conflict, use spin_lock(), it yields better
+	 * concurrency between xmit/clean than netif's lock.
+	 */
+	spin_lock(&tx->clean_lock);
 	/* Find out how much work there is to be done */
-	tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
-	nic_done = be32_to_cpu(tx->last_nic_done);
-	if (budget > 0) {
-		/* Do as much work as we have that the budget will
-		 * allow
-		 */
-		to_do = min_t(u32, (nic_done - tx->done), budget);
-		gve_clean_tx_done(priv, tx, to_do, true);
-	}
+	nic_done = gve_tx_load_event_counter(priv, tx);
+	to_do = min_t(u32, (nic_done - tx->done), budget);
+	gve_clean_tx_done(priv, tx, to_do, true);
+	spin_unlock(&tx->clean_lock);
 	/* If we still have work we want to repoll */
-	repoll |= (nic_done != tx->done);
-	return repoll;
+	return nic_done != tx->done;
+}
+
+bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx)
+{
+	u32 nic_done = gve_tx_load_event_counter(priv, tx);
+
+	return nic_done != tx->done;
 }
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 4/7] gve: Recover from queue stall due to missed IRQ
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 3/7] gve: Do lazy cleanup in TX path Jeroen de Borst
@ 2021-10-07 16:25 ` Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 5/7] gve: Add netif_set_xps_queue call Jeroen de Borst
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, John Fraker, David Awogbemila

From: John Fraker <jfraker@google.com>

Don't always reset the driver on a TX timeout. Attempt to
recover by kicking the queue in case an IRQ was missed.

Fixes: 9e5f7d26a4c08 ("gve: Add workqueue and reset support")
Signed-off-by: John Fraker <jfraker@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
---
 drivers/net/ethernet/google/gve/gve.h        |  4 +-
 drivers/net/ethernet/google/gve/gve_adminq.h |  1 +
 drivers/net/ethernet/google/gve/gve_main.c   | 48 +++++++++++++++++++-
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 003b30b91c6d..b8d46adb9c1a 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -30,7 +30,7 @@
 #define GVE_MIN_MSIX 3
 
 /* Numbers of gve tx/rx stats in stats report. */
-#define GVE_TX_STATS_REPORT_NUM	5
+#define GVE_TX_STATS_REPORT_NUM	6
 #define GVE_RX_STATS_REPORT_NUM	2
 
 /* Interval to schedule a stats report update, 20000ms. */
@@ -413,7 +413,9 @@ struct gve_tx_ring {
 	u32 q_num ____cacheline_aligned; /* queue idx */
 	u32 stop_queue; /* count of queue stops */
 	u32 wake_queue; /* count of queue wakes */
+	u32 queue_timeout; /* count of queue timeouts */
 	u32 ntfy_id; /* notification block index */
+	u32 last_kick_msec; /* Last time the queue was kicked */
 	dma_addr_t bus; /* dma address of the descr ring */
 	dma_addr_t q_resources_bus; /* dma address of the queue resources */
 	dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 47c3d8f313fc..3953f6f7a427 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -270,6 +270,7 @@ enum gve_stat_names {
 	TX_LAST_COMPLETION_PROCESSED	= 5,
 	RX_NEXT_EXPECTED_SEQUENCE	= 6,
 	RX_BUFFERS_POSTED		= 7,
+	TX_TIMEOUT_CNT			= 8,
 	// stats from NIC
 	RX_QUEUE_DROP_CNT		= 65,
 	RX_NO_BUFFERS_POSTED		= 66,
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 74e35a87ec38..d969040deab6 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -24,6 +24,9 @@
 #define GVE_VERSION		"1.0.0"
 #define GVE_VERSION_PREFIX	"GVE-"
 
+// Minimum amount of time between queue kicks in msec (10 seconds)
+#define MIN_TX_TIMEOUT_GAP (1000 * 10)
+
 const char gve_version_str[] = GVE_VERSION;
 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
 
@@ -1109,9 +1112,47 @@ static void gve_turnup(struct gve_priv *priv)
 
 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
-	struct gve_priv *priv = netdev_priv(dev);
+	struct gve_notify_block *block;
+	struct gve_tx_ring *tx = NULL;
+	struct gve_priv *priv;
+	u32 last_nic_done;
+	u32 current_time;
+	u32 ntfy_idx;
+
+	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
+	priv = netdev_priv(dev);
+	if (txqueue > priv->tx_cfg.num_queues)
+		goto reset;
+
+	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
+	if (ntfy_idx > priv->num_ntfy_blks)
+		goto reset;
+
+	block = &priv->ntfy_blocks[ntfy_idx];
+	tx = block->tx;
 
+	current_time = jiffies_to_msecs(jiffies);
+	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
+		goto reset;
+
+	/* Check to see if there are missed completions, which will allow us to
+	 * kick the queue.
+	 */
+	last_nic_done = gve_tx_load_event_counter(priv, tx);
+	if (last_nic_done - tx->done) {
+		netdev_info(dev, "Kicking queue %d", txqueue);
+		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
+		napi_schedule(&block->napi);
+		tx->last_kick_msec = current_time;
+		goto out;
+	} // Else reset.
+
+reset:
 	gve_schedule_reset(priv);
+
+out:
+	if (tx)
+		tx->queue_timeout++;
 	priv->tx_timeo_cnt++;
 }
 
@@ -1239,6 +1280,11 @@ void gve_handle_report_stats(struct gve_priv *priv)
 				.value = cpu_to_be64(last_completion),
 				.queue_id = cpu_to_be32(idx),
 			};
+			stats[stats_idx++] = (struct stats) {
+				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
+				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
+				.queue_id = cpu_to_be32(idx),
+			};
 		}
 	}
 	/* rx stats */
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 5/7] gve: Add netif_set_xps_queue call
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
                   ` (2 preceding siblings ...)
  2021-10-07 16:25 ` [PATCH net-next 4/7] gve: Recover from queue stall due to missed IRQ Jeroen de Borst
@ 2021-10-07 16:25 ` Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 6/7] gve: Allow pageflips on larger pages Jeroen de Borst
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, Catherine Sullivan, David Awogbemila

From: Catherine Sullivan <csully@google.com>

Configure XPS when adding tx queues to the notification blocks.

Fixes: dbdaa67540512 ("gve: Move some static functions to a common file")
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
---
 drivers/net/ethernet/google/gve/gve_utils.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 93f3dcbeeea9..45ff7a9ab5f9 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -18,12 +18,16 @@ void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
 
 void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
 {
+	unsigned int active_cpus = min_t(int, priv->num_ntfy_blks / 2,
+					 num_online_cpus());
 	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 	struct gve_tx_ring *tx = &priv->tx[queue_idx];
 
 	block->tx = tx;
 	tx->ntfy_id = ntfy_idx;
+	netif_set_xps_queue(priv->dev, get_cpu_mask(ntfy_idx % active_cpus),
+			    queue_idx);
 }
 
 void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 6/7] gve: Allow pageflips on larger pages
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
                   ` (3 preceding siblings ...)
  2021-10-07 16:25 ` [PATCH net-next 5/7] gve: Add netif_set_xps_queue call Jeroen de Borst
@ 2021-10-07 16:25 ` Jeroen de Borst
  2021-10-07 16:25 ` [PATCH net-next 7/7] gve: Track RX buffer allocation failures Jeroen de Borst
  2021-10-08 10:21 ` [PATCH net-next 1/7] gve: Switch to use napi_complete_done David Miller
  6 siblings, 0 replies; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, Jordan Kim, Jeroen de Borst

From: Jordan Kim <jrkim@google.com>

Half pages are just used for small enough packets. This change allows
this to also apply for systems with pages larger than 4 KB.

Fixes: 02b0e0c18ba75 ("gve: Rx Buffer Recycling")
Signed-off-by: Jordan Kim <jrkim@google.com>
Signed-off-by: Jeroen de Borst <jeroendb@google.com>
---
 drivers/net/ethernet/google/gve/gve_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index ecf5a396290b..c6e95e1409a9 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -296,7 +296,7 @@ static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *sl
 
 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
 {
-	return PAGE_SIZE == 4096
+	return PAGE_SIZE >= 4096
 		? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
 }
 
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 7/7] gve: Track RX buffer allocation failures
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
                   ` (4 preceding siblings ...)
  2021-10-07 16:25 ` [PATCH net-next 6/7] gve: Allow pageflips on larger pages Jeroen de Borst
@ 2021-10-07 16:25 ` Jeroen de Borst
  2021-10-08 10:21 ` [PATCH net-next 1/7] gve: Switch to use napi_complete_done David Miller
  6 siblings, 0 replies; 10+ messages in thread
From: Jeroen de Borst @ 2021-10-07 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, Catherine Sullivan, Jeroen de Borst

From: Catherine Sullivan <csully@google.com>

The rx_buf_alloc_fail counter wasn't getting updated.

Fixes: 433e274b8f7b0 ("gve: Add stats for gve.")
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: Jeroen de Borst <jeroendb@google.com>
---
 drivers/net/ethernet/google/gve/gve_rx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index c6e95e1409a9..69f6db9ffdfc 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -526,8 +526,13 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 
 				gve_rx_free_buffer(dev, page_info, data_slot);
 				page_info->page = NULL;
-				if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
+				if (gve_rx_alloc_buffer(priv, dev, page_info,
+							data_slot)) {
+					u64_stats_update_begin(&rx->statss);
+					rx->rx_buf_alloc_fail++;
+					u64_stats_update_end(&rx->statss);
 					break;
+				}
 			}
 		}
 		fill_cnt++;
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 3/7] gve: Do lazy cleanup in TX path
  2021-10-07 16:25 ` [PATCH net-next 3/7] gve: Do lazy cleanup in TX path Jeroen de Borst
@ 2021-10-07 19:57   ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2021-10-07 19:57 UTC (permalink / raw)
  To: Jeroen de Borst, netdev; +Cc: davem, kuba, Tao Liu, Catherine Sullivan



On 10/7/21 9:25 AM, Jeroen de Borst wrote:
> From: Tao Liu <xliutaox@google.com>
> 
> When TX queue is full, attemt to process enough TX completions
> to avoid stalling the queue.
> 
> Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support")
> Signed-off-by: Tao Liu <xliutaox@google.com>
> Signed-off-by: Catherine Sullivan <csully@google.com>
> ---
>  drivers/net/ethernet/google/gve/gve.h         |  9 +-
>  drivers/net/ethernet/google/gve/gve_ethtool.c |  3 +-
>  drivers/net/ethernet/google/gve/gve_main.c    |  6 +-
>  drivers/net/ethernet/google/gve/gve_tx.c      | 94 +++++++++++--------
>  4 files changed, 62 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 59c525800e5d..003b30b91c6d 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -341,8 +341,8 @@ struct gve_tx_ring {
>  	union {
>  		/* GQI fields */
>  		struct {
> -			/* NIC tail pointer */
> -			__be32 last_nic_done;
> +			/* Spinlock for when cleanup in progress */
> +			spinlock_t clean_lock;
>  		};
>

This is adding yet another spinlock in tx completion path.

Normally, BQL should kick and you should not fill the queue completely.

Something is not right.

tx completion can take a lot of time, it seems odd to block an
innocent thread in ndo_start_xmit().

Please provide more details in your changelog ?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias
  2021-10-07 16:25 ` [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias Jeroen de Borst
@ 2021-10-08  2:31   ` Jakub Kicinski
  0 siblings, 0 replies; 10+ messages in thread
From: Jakub Kicinski @ 2021-10-08  2:31 UTC (permalink / raw)
  To: Jeroen de Borst
  Cc: netdev, davem, Catherine Sullivan, Yanchun Fu, Nathan Lewis,
	David Awogbemila

On Thu,  7 Oct 2021 09:25:29 -0700 Jeroen de Borst wrote:
> From: Catherine Sullivan <csully@google.com>
> 
> Add a pagecnt bias field to rx buffer info struct to eliminate
> needing to increment the atomic page ref count on every pass in the
> rx hotpath.
> 
> Also prefetch two packet pages ahead.
> 
> Fixes: ede3fcf5ec67f ("gve: Add support for raw addressing to the rx path")
> Signed-off-by: Yanchun Fu <yangchun@google.com>
> Signed-off-by: Nathan Lewis <npl@google.com>
> Signed-off-by: Catherine Sullivan <csully@google.com>
> Signed-off-by: David Awogbemila <awogbemila@google.com>

drivers/net/ethernet/google/gve/gve_rx.c:521:5: warning: no previous prototype for ‘gve_clean_rx_done’ [-Wmissing-prototypes]
  521 | int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
      |     ^~~~~~~~~~~~~~~~~
drivers/net/ethernet/google/gve/gve_rx.c:521:5: warning: symbol 'gve_clean_rx_done' was not declared. Should it be static?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 1/7] gve: Switch to use napi_complete_done
  2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
                   ` (5 preceding siblings ...)
  2021-10-07 16:25 ` [PATCH net-next 7/7] gve: Track RX buffer allocation failures Jeroen de Borst
@ 2021-10-08 10:21 ` David Miller
  6 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2021-10-08 10:21 UTC (permalink / raw)
  To: jeroendb; +Cc: netdev, kuba, yangchun, csully, awogbemila

Please post a patch series with a proper [PATCH 00/NN] header posting
explaining what the series does, how it does it, and why it does it
thast way.

Thank you.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-10-08 10:21 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-07 16:25 [PATCH net-next 1/7] gve: Switch to use napi_complete_done Jeroen de Borst
2021-10-07 16:25 ` [PATCH net-next 2/7] gve: Add rx buffer pagecnt bias Jeroen de Borst
2021-10-08  2:31   ` Jakub Kicinski
2021-10-07 16:25 ` [PATCH net-next 3/7] gve: Do lazy cleanup in TX path Jeroen de Borst
2021-10-07 19:57   ` Eric Dumazet
2021-10-07 16:25 ` [PATCH net-next 4/7] gve: Recover from queue stall due to missed IRQ Jeroen de Borst
2021-10-07 16:25 ` [PATCH net-next 5/7] gve: Add netif_set_xps_queue call Jeroen de Borst
2021-10-07 16:25 ` [PATCH net-next 6/7] gve: Allow pageflips on larger pages Jeroen de Borst
2021-10-07 16:25 ` [PATCH net-next 7/7] gve: Track RX buffer allocation failures Jeroen de Borst
2021-10-08 10:21 ` [PATCH net-next 1/7] gve: Switch to use napi_complete_done David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.