linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
To: jgg@ziepe.ca, dledford@redhat.com
Cc: linux-rdma@vger.kernel.org,
	Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Subject: [PATCH for-next 4/6] IB/hfi1: Tune netdev xmit cachelines
Date: Mon, 13 Sep 2021 09:28:42 -0400	[thread overview]
Message-ID: <20210913132842.131370.15636.stgit@awfm-01.cornelisnetworks.com> (raw)
In-Reply-To: <20210913132317.131370.54825.stgit@awfm-01.cornelisnetworks.com>

From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>

This patch moves fields in the ring and creates a line for
the producer and the consumer.

The adds a consumer side variable that tracks the
ring avail so that the code doesn't have the read the other
cacheline to get a count for every packet. A read now only
occurs when the avail is at 0.

Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets")
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
---
 drivers/infiniband/hw/hfi1/ipoib.h    |   39 ++++++++++--------
 drivers/infiniband/hw/hfi1/ipoib_tx.c |   71 +++++++++++++++++++--------------
 drivers/infiniband/hw/hfi1/trace_tx.h |    8 ++--
 3 files changed, 66 insertions(+), 52 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
index 71b102d..8d9a03a 100644
--- a/drivers/infiniband/hw/hfi1/ipoib.h
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -46,18 +46,31 @@
 /**
  * struct hfi1_ipoib_circ_buf - List of items to be processed
  * @items: ring of items each a power of two size
- * @head: ring head
- * @tail: ring tail
  * @max_items: max items + 1 that the ring can contain
  * @shift: log2 of size for getting txreq
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @tail: ring tail
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @head: ring head
  */
 struct ipoib_txreq;
 struct hfi1_ipoib_circ_buf {
 	void *items;
-	u32 head;
-	u32 tail;
 	u32 max_items;
 	u32 shift;
+	/* consumer cache line */
+	u64 ____cacheline_aligned_in_smp sent_txreqs;
+	u32 avail;
+	u32 tail;
+	atomic_t stops;
+	atomic_t ring_full;
+	atomic_t no_desc;
+	/* producer cache line */
+	atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
+	u32 head;
 };
 
 /**
@@ -66,14 +79,10 @@ struct hfi1_ipoib_circ_buf {
  * @sde: sdma engine
  * @tx_list: tx request list
  * @sent_txreqs: count of txreqs posted to sdma
- * @stops: count of stops of queue
- * @ring_full: ring has been filled
- * @no_desc: descriptor shortage seen
  * @flow: tracks when list needs to be flushed for a flow change
  * @q_idx: ipoib Tx queue index
  * @pkts_sent: indicator packets have been sent from this queue
  * @wait: iowait structure
- * @complete_txreqs: count of txreqs completed by sdma
  * @napi: pointer to tx napi interface
  * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
  */
@@ -82,17 +91,12 @@ struct hfi1_ipoib_txq {
 	struct hfi1_ipoib_dev_priv *priv;
 	struct sdma_engine *sde;
 	struct list_head tx_list;
-	u64 sent_txreqs;
-	atomic_t stops;
-	atomic_t ring_full;
-	atomic_t no_desc;
 	union hfi1_ipoib_flow flow;
 	u8 q_idx;
 	bool pkts_sent;
 	struct iowait wait;
 
-	atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
-	struct hfi1_ipoib_circ_buf tx_ring;
+	struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring;
 };
 
 struct hfi1_ipoib_dev_priv {
@@ -100,13 +104,12 @@ struct hfi1_ipoib_dev_priv {
 	struct net_device   *netdev;
 	struct ib_device    *device;
 	struct hfi1_ipoib_txq *txqs;
+	const struct net_device_ops *netdev_ops;
+	struct rvt_qp *qp;
+	u32 qkey;
 	u16 pkey;
 	u16 pkey_index;
-	u32 qkey;
 	u8 port_num;
-
-	const struct net_device_ops *netdev_ops;
-	struct rvt_qp *qp;
 };
 
 /* hfi1 ipoib rdma netdev's private data structure */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index 734b91d..c3e43da 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -66,21 +66,21 @@ static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
 
 static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
 {
-	return hfi1_ipoib_txreqs(txq->sent_txreqs,
-				 atomic64_read(&txq->complete_txreqs));
+	return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+				 atomic64_read(&txq->tx_ring.complete_txreqs));
 }
 
 static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
 {
 	trace_hfi1_txq_stop(txq);
-	if (atomic_inc_return(&txq->stops) == 1)
+	if (atomic_inc_return(&txq->tx_ring.stops) == 1)
 		netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
 }
 
 static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
 {
 	trace_hfi1_txq_wake(txq);
-	if (atomic_dec_and_test(&txq->stops))
+	if (atomic_dec_and_test(&txq->tx_ring.stops))
 		netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
 }
 
@@ -98,9 +98,9 @@ static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
 
 static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
 {
-	++txq->sent_txreqs;
+	++txq->tx_ring.sent_txreqs;
 	if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
-	    !atomic_xchg(&txq->ring_full, 1)) {
+	    !atomic_xchg(&txq->tx_ring.ring_full, 1)) {
 		trace_hfi1_txq_full(txq);
 		hfi1_ipoib_stop_txq(txq);
 	}
@@ -125,7 +125,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
 	 * to protect against ring overflow.
 	 */
 	if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
-	    atomic_xchg(&txq->ring_full, 0)) {
+	    atomic_xchg(&txq->tx_ring.ring_full, 0)) {
 		trace_hfi1_txq_xmit_unstopped(txq);
 		hfi1_ipoib_wake_txq(txq);
 	}
@@ -168,6 +168,7 @@ static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq)
 	tx_ring->tail = 0;
 	atomic64_set(&txq->complete_txreqs, 0);
 	txq->sent_txreqs = 0;
+	tx_ring->avail = hfi1_ipoib_ring_hwat(txq);
 }
 
 static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget)
@@ -190,7 +191,7 @@ static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget)
 		head = CIRC_NEXT(head, max_tx);
 		tx =  hfi1_txreq_from_idx(tx_ring, head);
 	}
-	atomic64_add(work_done, &txq->complete_txreqs);
+	atomic64_add(work_done, &txq->tx_ring.complete_txreqs);
 
 	/* Finished freeing tx items so store the head value. */
 	smp_store_release(&tx_ring->head, head);
@@ -344,7 +345,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
 
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(txp->dqpn);
-	ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs));
+	ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs));
 
 	/* Build the deth */
 	ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
@@ -369,16 +370,25 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
 	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
 	struct hfi1_ipoib_txq *txq = txp->txq;
 	struct ipoib_txreq *tx;
-	struct hfi1_ipoib_circ_buf *tx_ring;
-	u32 tail;
+	struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+	u32 tail = tx_ring->tail;
 	int ret;
 
-	if (unlikely(hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq)))
-		/* This shouldn't happen with a stopped queue */
-		return ERR_PTR(-ENOMEM);
-	tx_ring = &txq->tx_ring;
-	tail = tx_ring->tail;
-	tx = hfi1_txreq_from_idx(tx_ring, tx_ring->tail);
+	if (unlikely(!tx_ring->avail)) {
+		u32 head;
+
+		if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))
+			/* This shouldn't happen with a stopped queue */
+			return ERR_PTR(-ENOMEM);
+		/* See hfi1_ipoib_poll_tx_ring() */
+		head = smp_load_acquire(&tx_ring->head);
+		tx_ring->avail =
+			min_t(u32, hfi1_ipoib_ring_hwat(txq),
+			      CIRC_CNT(head, tail, tx_ring->max_items));
+	} else {
+		tx_ring->avail--;
+	}
+	tx = hfi1_txreq_from_idx(tx_ring, tail);
 	trace_hfi1_txq_alloc_tx(txq);
 
 	/* so that we can test if the sdma descriptors are there */
@@ -639,7 +649,7 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
 		if (list_empty(&txq->wait.list)) {
 			struct hfi1_ibport *ibp = &sde->ppd->ibport_data;
 
-			if (!atomic_xchg(&txq->no_desc, 1)) {
+			if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) {
 				trace_hfi1_txq_queued(txq);
 				hfi1_ipoib_stop_txq(txq);
 			}
@@ -682,7 +692,7 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work)
 
 	if (likely(dev->reg_state == NETREG_REGISTERED) &&
 	    likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
-		if (atomic_xchg(&txq->no_desc, 0))
+		if (atomic_xchg(&txq->tx_ring.no_desc, 0))
 			hfi1_ipoib_wake_txq(txq);
 }
 
@@ -720,10 +730,10 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
 		txq->priv = priv;
 		txq->sde = NULL;
 		INIT_LIST_HEAD(&txq->tx_list);
-		atomic64_set(&txq->complete_txreqs, 0);
-		atomic_set(&txq->stops, 0);
-		atomic_set(&txq->ring_full, 0);
-		atomic_set(&txq->no_desc, 0);
+		atomic64_set(&txq->tx_ring.complete_txreqs, 0);
+		atomic_set(&txq->tx_ring.stops, 0);
+		atomic_set(&txq->tx_ring.ring_full, 0);
+		atomic_set(&txq->tx_ring.no_desc, 0);
 		txq->q_idx = i;
 		txq->flow.tx_queue = 0xff;
 		txq->flow.sc5 = 0xff;
@@ -740,6 +750,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
 
 		txq->tx_ring.max_items = tx_ring_size;
 		txq->tx_ring.shift = ilog2(tx_ring_size);
+		txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
 
 		netif_tx_napi_add(dev, &txq->napi,
 				  hfi1_ipoib_poll_tx_ring,
@@ -765,7 +776,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
 {
 	struct sdma_txreq *txreq;
 	struct sdma_txreq *txreq_tmp;
-	atomic64_t *complete_txreqs = &txq->complete_txreqs;
+	atomic64_t *complete_txreqs = &txq->tx_ring.complete_txreqs;
 
 	list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
 		struct ipoib_txreq *tx =
@@ -782,7 +793,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
 		dd_dev_warn(txq->priv->dd,
 			    "txq %d not empty found %u requests\n",
 			    txq->q_idx,
-			    hfi1_ipoib_txreqs(txq->sent_txreqs,
+			    hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
 					      atomic64_read(complete_txreqs)));
 }
 
@@ -834,20 +845,20 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
 {
 	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
 	struct hfi1_ipoib_txq *txq = &priv->txqs[q];
-	u64 completed = atomic64_read(&txq->complete_txreqs);
+	u64 completed = atomic64_read(&txq->tx_ring.complete_txreqs);
 
 	dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n",
 		    (unsigned long long)txq, q,
 		    __netif_subqueue_stopped(dev, txq->q_idx),
-		    atomic_read(&txq->stops),
-		    atomic_read(&txq->no_desc),
-		    atomic_read(&txq->ring_full));
+		    atomic_read(&txq->tx_ring.stops),
+		    atomic_read(&txq->tx_ring.no_desc),
+		    atomic_read(&txq->tx_ring.ring_full));
 	dd_dev_info(priv->dd, "sde %llx engine %u\n",
 		    (unsigned long long)txq->sde,
 		    txq->sde ? txq->sde->this_idx : 0);
 	dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
 	dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
-		    txq->sent_txreqs, completed, hfi1_ipoib_used(txq));
+		    txq->tx_ring.sent_txreqs, completed, hfi1_ipoib_used(txq));
 	dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n",
 		    dev->tx_queue_len, txq->tx_ring.max_items);
 	dd_dev_info(priv->dd, "head %u tail %u\n",
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 7318aa6..c9b1cd0 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -917,11 +917,11 @@
 		__entry->tail = txq->tx_ring.tail;
 		__entry->idx = txq->q_idx;
 		__entry->used =
-			txq->sent_txreqs -
-			atomic64_read(&txq->complete_txreqs);
+			txq->tx_ring.sent_txreqs -
+			atomic64_read(&txq->tx_ring.complete_txreqs);
 		__entry->flow = txq->flow.as_int;
-		__entry->stops = atomic_read(&txq->stops);
-		__entry->no_desc = atomic_read(&txq->no_desc);
+		__entry->stops = atomic_read(&txq->tx_ring.stops);
+		__entry->no_desc = atomic_read(&txq->tx_ring.no_desc);
 		__entry->stopped =
 		 __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx);
 	),


  parent reply	other threads:[~2021-09-13 13:32 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-13 13:28 [PATCH for-next 0/6] Perf and debug fixes for hfi Dennis Dalessandro
2021-09-13 13:28 ` [PATCH for-next 1/6] IB/hfi1: Remove cache and embed txreq in ring Dennis Dalessandro
2021-09-13 13:28 ` [PATCH for-next 2/6] IB/hfi1: Get rid of hot path divide Dennis Dalessandro
2021-09-13 13:28 ` [PATCH for-next 3/6] IB/hfi1: Get rid of tx priv backpointer Dennis Dalessandro
2021-09-13 13:28 ` Dennis Dalessandro [this message]
2021-09-13 13:28 ` [PATCH for-next 5/6] IB/hfi1: Remove atomic completion count Dennis Dalessandro
2021-09-13 13:28 ` [PATCH for-next 6/6] IB/hfi1: Add ring consumer and producers traces Dennis Dalessandro
2021-09-27 23:15 ` [PATCH for-next 0/6] Perf and debug fixes for hfi Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210913132842.131370.15636.stgit@awfm-01.cornelisnetworks.com \
    --to=dennis.dalessandro@cornelisnetworks.com \
    --cc=dledford@redhat.com \
    --cc=jgg@ziepe.ca \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mike.marciniszyn@cornelisnetworks.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).