linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing
@ 2021-08-24 16:45 Haiyang Zhang
  2021-08-24 16:45 ` [PATCH V2,net-next, 1/3] net: mana: Move NAPI from EQ to CQ Haiyang Zhang
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Haiyang Zhang @ 2021-08-24 16:45 UTC (permalink / raw)
  To: linux-hyperv, netdev
  Cc: haiyangz, kys, sthemmin, paulros, shacharr, olaf, vkuznets,
	davem, linux-kernel

The existing code uses (1 + #vPorts * #Queues) MSIXs, which may exceed
the device limit.

Support EQ sharing, so that multiple vPorts can share the same set of
MSIXs.

Haiyang Zhang (3):
  net: mana: Move NAPI from EQ to CQ
  net: mana: Add support for EQ sharing
  net: mana: Add WARN_ON_ONCE in case of CQE read overflow

 drivers/net/ethernet/microsoft/mana/gdma.h    |  32 ++--
 .../net/ethernet/microsoft/mana/gdma_main.c   |  88 +++-------
 .../net/ethernet/microsoft/mana/hw_channel.c  |   2 +-
 drivers/net/ethernet/microsoft/mana/mana.h    |  29 ++--
 drivers/net/ethernet/microsoft/mana/mana_en.c | 162 ++++++++++--------
 5 files changed, 153 insertions(+), 160 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V2,net-next, 1/3] net: mana: Move NAPI from EQ to CQ
  2021-08-24 16:45 [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing Haiyang Zhang
@ 2021-08-24 16:45 ` Haiyang Zhang
  2021-08-24 16:46 ` [PATCH V2,net-next, 2/3] net: mana: Add support for EQ sharing Haiyang Zhang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Haiyang Zhang @ 2021-08-24 16:45 UTC (permalink / raw)
  To: linux-hyperv, netdev
  Cc: haiyangz, kys, sthemmin, paulros, shacharr, olaf, vkuznets,
	davem, linux-kernel

The existing code has NAPI threads polling on EQ directly. To prepare
for EQ sharing among vPorts, move NAPI from EQ to CQ so that one EQ
can serve multiple CQs from different vPorts.

The "arm bit" is only set when CQ processing is completed to reduce
the number of EQ entries, which in turn reduce the number of interrupts
on EQ.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

---
v2: Updates suggested by Dexuan Cui <decui@microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/gdma.h    |   9 +-
 .../net/ethernet/microsoft/mana/gdma_main.c   |  55 +---------
 .../net/ethernet/microsoft/mana/hw_channel.c  |   2 +-
 drivers/net/ethernet/microsoft/mana/mana.h    |  11 +-
 drivers/net/ethernet/microsoft/mana/mana_en.c | 100 +++++++++++-------
 5 files changed, 74 insertions(+), 103 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
index 33e53d32e891..ddbca64bab07 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -239,10 +239,8 @@ struct gdma_event {
 
 struct gdma_queue;
 
-#define CQE_POLLING_BUFFER 512
 struct mana_eq {
 	struct gdma_queue *eq;
-	struct gdma_comp cqe_poll[CQE_POLLING_BUFFER];
 };
 
 typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
@@ -291,11 +289,6 @@ struct gdma_queue {
 			unsigned int msix_index;
 
 			u32 log2_throttle_limit;
-
-			/* NAPI data */
-			struct napi_struct napi;
-			int work_done;
-			int budget;
 		} eq;
 
 		struct {
@@ -406,7 +399,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
 
 int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
 
-void mana_gd_arm_cq(struct gdma_queue *cq);
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);
 
 struct gdma_wqe {
 	u32 reserved	:24;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 2f87bf90f8ec..560472fa2d00 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -267,7 +267,7 @@ void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
 			      queue->id, queue->head * GDMA_WQE_BU_SIZE, 1);
 }
 
-void mana_gd_arm_cq(struct gdma_queue *cq)
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
 {
 	struct gdma_context *gc = cq->gdma_dev->gdma_context;
 
@@ -276,7 +276,7 @@ void mana_gd_arm_cq(struct gdma_queue *cq)
 	u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS);
 
 	mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id,
-			      head, SET_ARM_BIT);
+			      head, arm_bit);
 }
 
 static void mana_gd_process_eqe(struct gdma_queue *eq)
@@ -339,7 +339,6 @@ static void mana_gd_process_eq_events(void *arg)
 	struct gdma_queue *eq = arg;
 	struct gdma_context *gc;
 	struct gdma_eqe *eqe;
-	unsigned int arm_bit;
 	u32 head, num_eqe;
 	int i;
 
@@ -370,48 +369,16 @@ static void mana_gd_process_eq_events(void *arg)
 		eq->head++;
 	}
 
-	/* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */
-	if (mana_gd_is_hwc(eq->gdma_dev)) {
-		arm_bit = SET_ARM_BIT;
-	} else if (eq->eq.work_done < eq->eq.budget &&
-		   napi_complete_done(&eq->eq.napi, eq->eq.work_done)) {
-		arm_bit = SET_ARM_BIT;
-	} else {
-		arm_bit = 0;
-	}
-
 	head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
 
 	mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
-			      head, arm_bit);
-}
-
-static int mana_poll(struct napi_struct *napi, int budget)
-{
-	struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi);
-
-	eq->eq.work_done = 0;
-	eq->eq.budget = budget;
-
-	mana_gd_process_eq_events(eq);
-
-	return min(eq->eq.work_done, budget);
-}
-
-static void mana_gd_schedule_napi(void *arg)
-{
-	struct gdma_queue *eq = arg;
-	struct napi_struct *napi;
-
-	napi = &eq->eq.napi;
-	napi_schedule_irqoff(napi);
+			      head, SET_ARM_BIT);
 }
 
 static int mana_gd_register_irq(struct gdma_queue *queue,
 				const struct gdma_queue_spec *spec)
 {
 	struct gdma_dev *gd = queue->gdma_dev;
-	bool is_mana = mana_gd_is_mana(gd);
 	struct gdma_irq_context *gic;
 	struct gdma_context *gc;
 	struct gdma_resource *r;
@@ -442,20 +409,11 @@ static int mana_gd_register_irq(struct gdma_queue *queue,
 
 	gic = &gc->irq_contexts[msi_index];
 
-	if (is_mana) {
-		netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll,
-			       NAPI_POLL_WEIGHT);
-		napi_enable(&queue->eq.napi);
-	}
-
 	WARN_ON(gic->handler || gic->arg);
 
 	gic->arg = queue;
 
-	if (is_mana)
-		gic->handler = mana_gd_schedule_napi;
-	else
-		gic->handler = mana_gd_process_eq_events;
+	gic->handler = mana_gd_process_eq_events;
 
 	return 0;
 }
@@ -549,11 +507,6 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
 
 	mana_gd_deregiser_irq(queue);
 
-	if (mana_gd_is_mana(queue->gdma_dev)) {
-		napi_disable(&queue->eq.napi);
-		netif_napi_del(&queue->eq.napi);
-	}
-
 	if (queue->eq.disable_needed)
 		mana_gd_disable_queue(queue);
 }
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 1a923fd99990..c1310ea1c216 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -304,7 +304,7 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
 						&comp_data);
 	}
 
-	mana_gd_arm_cq(q_self);
+	mana_gd_ring_cq(q_self, SET_ARM_BIT);
 }
 
 static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index a2c3f826f022..5341dbdb726e 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -225,6 +225,8 @@ struct mana_tx_comp_oob {
 
 struct mana_rxq;
 
+#define CQE_POLLING_BUFFER 512
+
 struct mana_cq {
 	struct gdma_queue *gdma_cq;
 
@@ -244,8 +246,13 @@ struct mana_cq {
 	 */
 	struct mana_txq *txq;
 
-	/* Pointer to a buffer which the CQ handler can copy the CQE's into. */
-	struct gdma_comp *gdma_comp_buf;
+	/* Buffer which the CQ handler can copy the CQE's into. */
+	struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER];
+
+	/* NAPI data */
+	struct napi_struct napi;
+	int work_done;
+	int budget;
 };
 
 #define GDMA_MAX_RQE_SGES 15
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 83eb28c132f3..8643d8cf1d5a 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -696,14 +696,6 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
 			   resp.hdr.status);
 }
 
-static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf)
-{
-	int i;
-
-	for (i = 0; i < CQE_POLLING_BUFFER; i++)
-		memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp));
-}
-
 static void mana_destroy_eq(struct gdma_context *gc,
 			    struct mana_port_context *apc)
 {
@@ -746,8 +738,6 @@ static int mana_create_eq(struct mana_port_context *apc)
 	spec.eq.ndev = apc->ndev;
 
 	for (i = 0; i < apc->num_queues; i++) {
-		mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll);
-
 		err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq);
 		if (err)
 			goto out;
@@ -790,7 +780,6 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
 
 static void mana_poll_tx_cq(struct mana_cq *cq)
 {
-	struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent;
 	struct gdma_comp *completions = cq->gdma_comp_buf;
 	struct gdma_posted_wqe_info *wqe_info;
 	unsigned int pkt_transmitted = 0;
@@ -812,6 +801,9 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 	comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
 				    CQE_POLLING_BUFFER);
 
+	if (comp_read < 1)
+		return;
+
 	for (i = 0; i < comp_read; i++) {
 		struct mana_tx_comp_oob *cqe_oob;
 
@@ -863,7 +855,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 
 		mana_unmap_skb(skb, apc);
 
-		napi_consume_skb(skb, gdma_eq->eq.budget);
+		napi_consume_skb(skb, cq->budget);
 
 		pkt_transmitted++;
 	}
@@ -892,6 +884,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 
 	if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
 		WARN_ON_ONCE(1);
+
+	cq->work_done = pkt_transmitted;
 }
 
 static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -920,17 +914,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 	struct mana_stats *rx_stats = &rxq->stats;
 	struct net_device *ndev = rxq->ndev;
 	uint pkt_len = cqe->ppi[0].pkt_len;
-	struct mana_port_context *apc;
 	u16 rxq_idx = rxq->rxq_idx;
 	struct napi_struct *napi;
-	struct gdma_queue *eq;
 	struct sk_buff *skb;
 	u32 hash_value;
 
-	apc = netdev_priv(ndev);
-	eq = apc->eqs[rxq_idx].eq;
-	eq->eq.work_done++;
-	napi = &eq->eq.napi;
+	rxq->rx_cq.work_done++;
+	napi = &rxq->rx_cq.napi;
 
 	if (!buf_va) {
 		++ndev->stats.rx_dropped;
@@ -1083,6 +1073,7 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
 static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
 {
 	struct mana_cq *cq = context;
+	u8 arm_bit;
 
 	WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
 
@@ -1091,7 +1082,33 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
 	else
 		mana_poll_tx_cq(cq);
 
-	mana_gd_arm_cq(gdma_queue);
+	if (cq->work_done < cq->budget &&
+	    napi_complete_done(&cq->napi, cq->work_done)) {
+		arm_bit = SET_ARM_BIT;
+	} else {
+		arm_bit = 0;
+	}
+
+	mana_gd_ring_cq(gdma_queue, arm_bit);
+}
+
+static int mana_poll(struct napi_struct *napi, int budget)
+{
+	struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
+
+	cq->work_done = 0;
+	cq->budget = budget;
+
+	mana_cq_handler(cq, cq->gdma_cq);
+
+	return min(cq->work_done, budget);
+}
+
+static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
+{
+	struct mana_cq *cq = context;
+
+	napi_schedule_irqoff(&cq->napi);
 }
 
 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
@@ -1116,12 +1133,18 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
 
 static void mana_destroy_txq(struct mana_port_context *apc)
 {
+	struct napi_struct *napi;
 	int i;
 
 	if (!apc->tx_qp)
 		return;
 
 	for (i = 0; i < apc->num_queues; i++) {
+		napi = &apc->tx_qp[i].tx_cq.napi;
+		napi_synchronize(napi);
+		napi_disable(napi);
+		netif_napi_del(napi);
+
 		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
 
 		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1188,7 +1211,6 @@ static int mana_create_txq(struct mana_port_context *apc,
 
 		/* Create SQ's CQ */
 		cq = &apc->tx_qp[i].tx_cq;
-		cq->gdma_comp_buf = apc->eqs[i].cqe_poll;
 		cq->type = MANA_CQ_TYPE_TX;
 
 		cq->txq = txq;
@@ -1197,7 +1219,7 @@ static int mana_create_txq(struct mana_port_context *apc,
 		spec.type = GDMA_CQ;
 		spec.monitor_avl_buf = false;
 		spec.queue_size = cq_size;
-		spec.cq.callback = mana_cq_handler;
+		spec.cq.callback = mana_schedule_napi;
 		spec.cq.parent_eq = apc->eqs[i].eq;
 		spec.cq.context = cq;
 		err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
@@ -1239,7 +1261,10 @@ static int mana_create_txq(struct mana_port_context *apc,
 
 		gc->cq_table[cq->gdma_id] = cq->gdma_cq;
 
-		mana_gd_arm_cq(cq->gdma_cq);
+		netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT);
+		napi_enable(&cq->napi);
+
+		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
 	}
 
 	return 0;
@@ -1248,21 +1273,6 @@ static int mana_create_txq(struct mana_port_context *apc,
 	return err;
 }
 
-static void mana_napi_sync_for_rx(struct mana_rxq *rxq)
-{
-	struct net_device *ndev = rxq->ndev;
-	struct mana_port_context *apc;
-	u16 rxq_idx = rxq->rxq_idx;
-	struct napi_struct *napi;
-	struct gdma_queue *eq;
-
-	apc = netdev_priv(ndev);
-	eq = apc->eqs[rxq_idx].eq;
-	napi = &eq->eq.napi;
-
-	napi_synchronize(napi);
-}
-
 static void mana_destroy_rxq(struct mana_port_context *apc,
 			     struct mana_rxq *rxq, bool validate_state)
 
@@ -1270,13 +1280,19 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
 	struct mana_recv_buf_oob *rx_oob;
 	struct device *dev = gc->dev;
+	struct napi_struct *napi;
 	int i;
 
 	if (!rxq)
 		return;
 
+	napi = &rxq->rx_cq.napi;
+
 	if (validate_state)
-		mana_napi_sync_for_rx(rxq);
+		napi_synchronize(napi);
+
+	napi_disable(napi);
+	netif_napi_del(napi);
 
 	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
 
@@ -1420,7 +1436,6 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
 	/* Create RQ's CQ */
 	cq = &rxq->rx_cq;
-	cq->gdma_comp_buf = eq->cqe_poll;
 	cq->type = MANA_CQ_TYPE_RX;
 	cq->rxq = rxq;
 
@@ -1428,7 +1443,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	spec.type = GDMA_CQ;
 	spec.monitor_avl_buf = false;
 	spec.queue_size = cq_size;
-	spec.cq.callback = mana_cq_handler;
+	spec.cq.callback = mana_schedule_napi;
 	spec.cq.parent_eq = eq->eq;
 	spec.cq.context = cq;
 	err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
@@ -1468,7 +1483,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
 	gc->cq_table[cq->gdma_id] = cq->gdma_cq;
 
-	mana_gd_arm_cq(cq->gdma_cq);
+	netif_napi_add(ndev, &cq->napi, mana_poll, 1);
+	napi_enable(&cq->napi);
+
+	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
 out:
 	if (!err)
 		return rxq;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2,net-next, 2/3] net: mana: Add support for EQ sharing
  2021-08-24 16:45 [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing Haiyang Zhang
  2021-08-24 16:45 ` [PATCH V2,net-next, 1/3] net: mana: Move NAPI from EQ to CQ Haiyang Zhang
@ 2021-08-24 16:46 ` Haiyang Zhang
  2021-08-24 16:46 ` [PATCH V2,net-next, 3/3] net: mana: Add WARN_ON_ONCE in case of CQE read overflow Haiyang Zhang
  2021-08-25 10:20 ` [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing patchwork-bot+netdevbpf
  3 siblings, 0 replies; 5+ messages in thread
From: Haiyang Zhang @ 2021-08-24 16:46 UTC (permalink / raw)
  To: linux-hyperv, netdev
  Cc: haiyangz, kys, sthemmin, paulros, shacharr, olaf, vkuznets,
	davem, linux-kernel

The existing code uses (1 + #vPorts * #Queues) MSIXs, which may exceed
the device limit.

Support EQ sharing, so that multiple vPorts (NICs) can share the same
set of MSIXs.

And, report the EQ-sharing capability bit to the host, which means the
host can potentially offer more vPorts and queues to the VM.

Also update the resource limit checking and error handling for better
robustness.

Now, we support up to 256 virtual ports per VF (it was 16/VF), and
support up to 64 queues per vPort (it was 16).

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

---
v2: Updates suggested by Dexuan Cui <decui@microsoft.com>
  Limit max_num_queues to (gc->num_msix_usable - 1)
  Add BUILD_BUG_ON(MAX_PORTS_IN_MANA_DEV * 2 * GDMA_EQE_SIZE > EQ_SIZE)
  More detailed descriptions
---
 drivers/net/ethernet/microsoft/mana/gdma.h    | 23 ++++---
 .../net/ethernet/microsoft/mana/gdma_main.c   | 31 +++++++---
 drivers/net/ethernet/microsoft/mana/mana.h    | 18 +++---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 62 +++++++++----------
 4 files changed, 78 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
index ddbca64bab07..41ecd156e95f 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -312,9 +312,6 @@ struct gdma_queue_spec {
 			void *context;
 
 			unsigned long log2_throttle_limit;
-
-			/* Only used by the MANA device. */
-			struct net_device *ndev;
 		} eq;
 
 		struct {
@@ -489,16 +486,28 @@ enum {
 	GDMA_PROTOCOL_LAST	= GDMA_PROTOCOL_V1,
 };
 
+#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
+
+#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
+
+#define GDMA_DRV_CAP_FLAGS2 0
+
+#define GDMA_DRV_CAP_FLAGS3 0
+
+#define GDMA_DRV_CAP_FLAGS4 0
+
 struct gdma_verify_ver_req {
 	struct gdma_req_hdr hdr;
 
 	/* Mandatory fields required for protocol establishment */
 	u64 protocol_ver_min;
 	u64 protocol_ver_max;
-	u64 drv_cap_flags1;
-	u64 drv_cap_flags2;
-	u64 drv_cap_flags3;
-	u64 drv_cap_flags4;
+
+	/* Gdma Driver Capability Flags */
+	u64 gd_drv_cap_flags1;
+	u64 gd_drv_cap_flags2;
+	u64 gd_drv_cap_flags3;
+	u64 gd_drv_cap_flags4;
 
 	/* Advisory fields */
 	u64 drv_ver;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 560472fa2d00..798099d64202 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -67,6 +67,10 @@ static int mana_gd_query_max_resources(struct pci_dev *pdev)
 	if (gc->max_num_queues > resp.max_rq)
 		gc->max_num_queues = resp.max_rq;
 
+	/* The Hardware Channel (HWC) used 1 MSI-X */
+	if (gc->max_num_queues > gc->num_msix_usable - 1)
+		gc->max_num_queues = gc->num_msix_usable - 1;
+
 	return 0;
 }
 
@@ -384,28 +388,31 @@ static int mana_gd_register_irq(struct gdma_queue *queue,
 	struct gdma_resource *r;
 	unsigned int msi_index;
 	unsigned long flags;
-	int err;
+	struct device *dev;
+	int err = 0;
 
 	gc = gd->gdma_context;
 	r = &gc->msix_resource;
+	dev = gc->dev;
 
 	spin_lock_irqsave(&r->lock, flags);
 
 	msi_index = find_first_zero_bit(r->map, r->size);
-	if (msi_index >= r->size) {
+	if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
 		err = -ENOSPC;
 	} else {
 		bitmap_set(r->map, msi_index, 1);
 		queue->eq.msix_index = msi_index;
-		err = 0;
 	}
 
 	spin_unlock_irqrestore(&r->lock, flags);
 
-	if (err)
-		return err;
+	if (err) {
+		dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
+			err, msi_index, r->size, gc->num_msix_usable);
 
-	WARN_ON(msi_index >= gc->num_msix_usable);
+		return err;
+	}
 
 	gic = &gc->irq_contexts[msi_index];
 
@@ -836,6 +843,11 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev)
 	req.protocol_ver_min = GDMA_PROTOCOL_FIRST;
 	req.protocol_ver_max = GDMA_PROTOCOL_LAST;
 
+	req.gd_drv_cap_flags1 = GDMA_DRV_CAP_FLAGS1;
+	req.gd_drv_cap_flags2 = GDMA_DRV_CAP_FLAGS2;
+	req.gd_drv_cap_flags3 = GDMA_DRV_CAP_FLAGS3;
+	req.gd_drv_cap_flags4 = GDMA_DRV_CAP_FLAGS4;
+
 	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
 	if (err || resp.hdr.status) {
 		dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n",
@@ -1154,10 +1166,8 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
 	if (max_queues_per_port > MANA_MAX_NUM_QUEUES)
 		max_queues_per_port = MANA_MAX_NUM_QUEUES;
 
-	max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV;
-
 	/* Need 1 interrupt for the Hardware communication Channel (HWC) */
-	max_irqs++;
+	max_irqs = max_queues_per_port + 1;
 
 	nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX);
 	if (nvec < 0)
@@ -1244,6 +1254,9 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int bar = 0;
 	int err;
 
+	/* Each port has 2 CQs, each CQ has at most 1 EQE at a time */
+	BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE);
+
 	err = pci_enable_device(pdev);
 	if (err)
 		return -ENXIO;
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index 5341dbdb726e..fc98a5ba5ed0 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -46,7 +46,7 @@ enum TRI_STATE {
 #define EQ_SIZE (8 * PAGE_SIZE)
 #define LOG2_EQ_THROTTLE 3
 
-#define MAX_PORTS_IN_MANA_DEV 16
+#define MAX_PORTS_IN_MANA_DEV 256
 
 struct mana_stats {
 	u64 packets;
@@ -322,6 +322,8 @@ struct mana_context {
 
 	u16 num_ports;
 
+	struct mana_eq *eqs;
+
 	struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
 };
 
@@ -331,8 +333,6 @@ struct mana_port_context {
 
 	u8 mac_addr[ETH_ALEN];
 
-	struct mana_eq *eqs;
-
 	enum TRI_STATE rss_state;
 
 	mana_handle_t default_rxobj;
@@ -402,11 +402,11 @@ enum mana_command_code {
 struct mana_query_device_cfg_req {
 	struct gdma_req_hdr hdr;
 
-	/* Driver Capability flags */
-	u64 drv_cap_flags1;
-	u64 drv_cap_flags2;
-	u64 drv_cap_flags3;
-	u64 drv_cap_flags4;
+	/* MANA Nic Driver Capability flags */
+	u64 mn_drv_cap_flags1;
+	u64 mn_drv_cap_flags2;
+	u64 mn_drv_cap_flags3;
+	u64 mn_drv_cap_flags4;
 
 	u32 proto_major_ver;
 	u32 proto_minor_ver;
@@ -523,7 +523,7 @@ struct mana_cfg_rx_steer_resp {
 	struct gdma_resp_hdr hdr;
 }; /* HW DATA */
 
-#define MANA_MAX_NUM_QUEUES 16
+#define MANA_MAX_NUM_QUEUES 64
 
 #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
 
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 8643d8cf1d5a..a41a7e7b2bd3 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -696,56 +696,56 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
 			   resp.hdr.status);
 }
 
-static void mana_destroy_eq(struct gdma_context *gc,
-			    struct mana_port_context *apc)
+static void mana_destroy_eq(struct mana_context *ac)
 {
+	struct gdma_context *gc = ac->gdma_dev->gdma_context;
 	struct gdma_queue *eq;
 	int i;
 
-	if (!apc->eqs)
+	if (!ac->eqs)
 		return;
 
-	for (i = 0; i < apc->num_queues; i++) {
-		eq = apc->eqs[i].eq;
+	for (i = 0; i < gc->max_num_queues; i++) {
+		eq = ac->eqs[i].eq;
 		if (!eq)
 			continue;
 
 		mana_gd_destroy_queue(gc, eq);
 	}
 
-	kfree(apc->eqs);
-	apc->eqs = NULL;
+	kfree(ac->eqs);
+	ac->eqs = NULL;
 }
 
-static int mana_create_eq(struct mana_port_context *apc)
+static int mana_create_eq(struct mana_context *ac)
 {
-	struct gdma_dev *gd = apc->ac->gdma_dev;
+	struct gdma_dev *gd = ac->gdma_dev;
+	struct gdma_context *gc = gd->gdma_context;
 	struct gdma_queue_spec spec = {};
 	int err;
 	int i;
 
-	apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq),
-			   GFP_KERNEL);
-	if (!apc->eqs)
+	ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
+			  GFP_KERNEL);
+	if (!ac->eqs)
 		return -ENOMEM;
 
 	spec.type = GDMA_EQ;
 	spec.monitor_avl_buf = false;
 	spec.queue_size = EQ_SIZE;
 	spec.eq.callback = NULL;
-	spec.eq.context = apc->eqs;
+	spec.eq.context = ac->eqs;
 	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
-	spec.eq.ndev = apc->ndev;
 
-	for (i = 0; i < apc->num_queues; i++) {
-		err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq);
+	for (i = 0; i < gc->max_num_queues; i++) {
+		err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
 		if (err)
 			goto out;
 	}
 
 	return 0;
 out:
-	mana_destroy_eq(gd->gdma_context, apc);
+	mana_destroy_eq(ac);
 	return err;
 }
 
@@ -1159,7 +1159,8 @@ static void mana_destroy_txq(struct mana_port_context *apc)
 static int mana_create_txq(struct mana_port_context *apc,
 			   struct net_device *net)
 {
-	struct gdma_dev *gd = apc->ac->gdma_dev;
+	struct mana_context *ac = apc->ac;
+	struct gdma_dev *gd = ac->gdma_dev;
 	struct mana_obj_spec wq_spec;
 	struct mana_obj_spec cq_spec;
 	struct gdma_queue_spec spec;
@@ -1220,7 +1221,7 @@ static int mana_create_txq(struct mana_port_context *apc,
 		spec.monitor_avl_buf = false;
 		spec.queue_size = cq_size;
 		spec.cq.callback = mana_schedule_napi;
-		spec.cq.parent_eq = apc->eqs[i].eq;
+		spec.cq.parent_eq = ac->eqs[i].eq;
 		spec.cq.context = cq;
 		err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
 		if (err)
@@ -1504,12 +1505,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 static int mana_add_rx_queues(struct mana_port_context *apc,
 			      struct net_device *ndev)
 {
+	struct mana_context *ac = apc->ac;
 	struct mana_rxq *rxq;
 	int err = 0;
 	int i;
 
 	for (i = 0; i < apc->num_queues; i++) {
-		rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev);
+		rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
 		if (!rxq) {
 			err = -ENOMEM;
 			goto out;
@@ -1621,16 +1623,11 @@ static int mana_init_port(struct net_device *ndev)
 int mana_alloc_queues(struct net_device *ndev)
 {
 	struct mana_port_context *apc = netdev_priv(ndev);
-	struct gdma_dev *gd = apc->ac->gdma_dev;
 	int err;
 
-	err = mana_create_eq(apc);
-	if (err)
-		return err;
-
 	err = mana_create_vport(apc, ndev);
 	if (err)
-		goto destroy_eq;
+		return err;
 
 	err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
 	if (err)
@@ -1656,8 +1653,6 @@ int mana_alloc_queues(struct net_device *ndev)
 
 destroy_vport:
 	mana_destroy_vport(apc);
-destroy_eq:
-	mana_destroy_eq(gd->gdma_context, apc);
 	return err;
 }
 
@@ -1734,8 +1729,6 @@ static int mana_dealloc_queues(struct net_device *ndev)
 
 	mana_destroy_vport(apc);
 
-	mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc);
-
 	return 0;
 }
 
@@ -1788,7 +1781,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
 	apc->ac = ac;
 	apc->ndev = ndev;
 	apc->max_queues = gc->max_num_queues;
-	apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES);
+	apc->num_queues = gc->max_num_queues;
 	apc->port_handle = INVALID_MANA_HANDLE;
 	apc->port_idx = port_idx;
 
@@ -1859,6 +1852,10 @@ int mana_probe(struct gdma_dev *gd)
 	ac->num_ports = 1;
 	gd->driver_data = ac;
 
+	err = mana_create_eq(ac);
+	if (err)
+		goto out;
+
 	err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
 				    MANA_MICRO_VERSION, &ac->num_ports);
 	if (err)
@@ -1908,6 +1905,9 @@ void mana_remove(struct gdma_dev *gd)
 
 		free_netdev(ndev);
 	}
+
+	mana_destroy_eq(ac);
+
 out:
 	mana_gd_deregister_device(gd);
 	gd->driver_data = NULL;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2,net-next, 3/3] net: mana: Add WARN_ON_ONCE in case of CQE read overflow
  2021-08-24 16:45 [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing Haiyang Zhang
  2021-08-24 16:45 ` [PATCH V2,net-next, 1/3] net: mana: Move NAPI from EQ to CQ Haiyang Zhang
  2021-08-24 16:46 ` [PATCH V2,net-next, 2/3] net: mana: Add support for EQ sharing Haiyang Zhang
@ 2021-08-24 16:46 ` Haiyang Zhang
  2021-08-25 10:20 ` [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing patchwork-bot+netdevbpf
  3 siblings, 0 replies; 5+ messages in thread
From: Haiyang Zhang @ 2021-08-24 16:46 UTC (permalink / raw)
  To: linux-hyperv, netdev
  Cc: haiyangz, kys, sthemmin, paulros, shacharr, olaf, vkuznets,
	davem, linux-kernel

This is not an expected case normally.
Add WARN_ON_ONCE in case of CQE read overflow, instead of failing
silently.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

---
v2: Add this warning suggested by Dexuan Cui <decui@microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/gdma_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 798099d64202..cee75b561f59 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1093,7 +1093,7 @@ static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
 
 	new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK;
 	/* Return -1 if overflow detected. */
-	if (owner_bits != new_bits)
+	if (WARN_ON_ONCE(owner_bits != new_bits))
 		return -1;
 
 	comp->wq_num = cqe->cqe_info.wq_num;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing
  2021-08-24 16:45 [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing Haiyang Zhang
                   ` (2 preceding siblings ...)
  2021-08-24 16:46 ` [PATCH V2,net-next, 3/3] net: mana: Add WARN_ON_ONCE in case of CQE read overflow Haiyang Zhang
@ 2021-08-25 10:20 ` patchwork-bot+netdevbpf
  3 siblings, 0 replies; 5+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-08-25 10:20 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: linux-hyperv, netdev, kys, sthemmin, paulros, shacharr, olaf,
	vkuznets, davem, linux-kernel

Hello:

This series was applied to netdev/net-next.git (refs/heads/master):

On Tue, 24 Aug 2021 09:45:58 -0700 you wrote:
> The existing code uses (1 + #vPorts * #Queues) MSIXs, which may exceed
> the device limit.
> 
> Support EQ sharing, so that multiple vPorts can share the same set of
> MSIXs.
> 
> Haiyang Zhang (3):
>   net: mana: Move NAPI from EQ to CQ
>   net: mana: Add support for EQ sharing
>   net: mana: Add WARN_ON_ONCE in case of CQE read overflow
> 
> [...]

Here is the summary with links:
  - [V2,net-next,1/3] net: mana: Move NAPI from EQ to CQ
    https://git.kernel.org/netdev/net-next/c/e1b5683ff62e
  - [V2,net-next,2/3] net: mana: Add support for EQ sharing
    https://git.kernel.org/netdev/net-next/c/1e2d0824a9c3
  - [V2,net-next,3/3] net: mana: Add WARN_ON_ONCE in case of CQE read overflow
    https://git.kernel.org/netdev/net-next/c/c1a3e9f98dde

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-08-25 10:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-24 16:45 [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing Haiyang Zhang
2021-08-24 16:45 ` [PATCH V2,net-next, 1/3] net: mana: Move NAPI from EQ to CQ Haiyang Zhang
2021-08-24 16:46 ` [PATCH V2,net-next, 2/3] net: mana: Add support for EQ sharing Haiyang Zhang
2021-08-24 16:46 ` [PATCH V2,net-next, 3/3] net: mana: Add WARN_ON_ONCE in case of CQE read overflow Haiyang Zhang
2021-08-25 10:20 ` [PATCH V2,net-next, 0/3] net: mana: Add support for EQ sharing patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).