All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matan Azrad <matan@mellanox.com>
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Cc: dev@dpdk.org, Ophir Munk <ophirmu@mellanox.com>
Subject: [PATCH v4 6/8] net/mlx4: separate Tx segment cases
Date: Tue, 31 Oct 2017 18:21:31 +0000	[thread overview]
Message-ID: <1509474093-31388-7-git-send-email-matan@mellanox.com> (raw)
In-Reply-To: <1509474093-31388-1-git-send-email-matan@mellanox.com>

Optimize single segment case by processing it in different block which
prevents checks, calculations and barriers relevant only for multi
segment case.

Call a dedicated function for handling multi segments case.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
 drivers/net/mlx4/mlx4_rxtx.c | 236 +++++++++++++++++++++++++++----------------
 1 file changed, 151 insertions(+), 85 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 0a6cbf1..176000f 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -62,6 +62,9 @@
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
+#define WQE_ONE_DATA_SEG_SIZE \
+	(sizeof(struct mlx4_wqe_ctrl_seg) + sizeof(struct mlx4_wqe_data_seg))
+
 /**
  * Pointer-value pair structure used in tx_post_send for saving the first
  * DWORD (32 byte) of a TXBB.
@@ -141,21 +144,18 @@ struct pv {
  *   0 on success, -1 on failure.
  */
 static int
-mlx4_txq_complete(struct txq *txq)
+mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
+				  struct mlx4_sq *sq)
 {
 	unsigned int elts_comp = txq->elts_comp;
 	unsigned int elts_tail = txq->elts_tail;
-	const unsigned int elts_n = txq->elts_n;
 	struct mlx4_cq *cq = &txq->mcq;
-	struct mlx4_sq *sq = &txq->msq;
 	struct mlx4_cqe *cqe;
 	uint32_t cons_index = cq->cons_index;
 	uint16_t new_index;
 	uint16_t nr_txbbs = 0;
 	int pkts = 0;
 
-	if (unlikely(elts_comp == 0))
-		return 0;
 	/*
 	 * Traverse over all CQ entries reported and handle each WQ entry
 	 * reported by them.
@@ -238,6 +238,119 @@ struct pv {
 	return buf->pool;
 }
 
+static int
+mlx4_tx_burst_segs(struct rte_mbuf *buf, struct txq *txq,
+			       struct mlx4_wqe_ctrl_seg **pctrl)
+{
+	int wqe_real_size;
+	int nr_txbbs;
+	struct pv *pv = (struct pv *)txq->bounce_buf;
+	struct mlx4_sq *sq = &txq->msq;
+	uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
+	struct mlx4_wqe_ctrl_seg *ctrl;
+	struct mlx4_wqe_data_seg *dseg;
+	struct rte_mbuf *sbuf;
+	uint32_t lkey;
+	uintptr_t addr;
+	uint32_t byte_count;
+	int pv_counter = 0;
+
+	/* Calculate the needed work queue entry size for this packet. */
+	wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
+		buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
+	nr_txbbs = MLX4_SIZE_TO_TXBBS(wqe_real_size);
+	/*
+	 * Check that there is room for this WQE in the send queue and that
+	 * the WQE size is legal.
+	 */
+	if (((sq->head - sq->tail) + nr_txbbs +
+				sq->headroom_txbbs) >= sq->txbb_cnt ||
+			nr_txbbs > MLX4_MAX_WQE_TXBBS) {
+		return -1;
+	}
+	/* Get the control and data entries of the WQE. */
+	ctrl = (struct mlx4_wqe_ctrl_seg *)mlx4_get_send_wqe(sq, head_idx);
+	dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
+			sizeof(struct mlx4_wqe_ctrl_seg));
+	*pctrl = ctrl;
+	/* Fill the data segments with buffer information. */
+	for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) {
+		addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+		rte_prefetch0((volatile void *)addr);
+		/* Handle WQE wraparound. */
+		if (dseg >= (struct mlx4_wqe_data_seg *)sq->eob)
+			dseg = (struct mlx4_wqe_data_seg *)sq->buf;
+		dseg->addr = rte_cpu_to_be_64(addr);
+		/* Memory region key (big endian) for this memory pool. */
+		lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+		dseg->lkey = rte_cpu_to_be_32(lkey);
+#ifndef NDEBUG
+		/* Calculate the needed work queue entry size for this packet */
+		if (unlikely(dseg->lkey == rte_cpu_to_be_32((uint32_t)-1))) {
+			/* MR does not exist. */
+			DEBUG("%p: unable to get MP <-> MR association",
+					(void *)txq);
+			/*
+			 * Restamp entry in case of failure.
+			 * Make sure that size is written correctly
+			 * Note that we give ownership to the SW, not the HW.
+			 */
+			wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
+				buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
+			ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+			mlx4_txq_stamp_freed_wqe(sq, head_idx,
+					(sq->head & sq->txbb_cnt) ? 0 : 1);
+			return -1;
+		}
+#endif /* NDEBUG */
+		if (likely(sbuf->data_len)) {
+			byte_count = rte_cpu_to_be_32(sbuf->data_len);
+		} else {
+			/*
+			 * Zero length segment is treated as inline segment
+			 * with zero data.
+			 */
+			byte_count = RTE_BE32(0x80000000);
+		}
+		/*
+		 * If the data segment is not at the beginning of a
+		 * Tx basic block (TXBB) then write the byte count,
+		 * else postpone the writing to just before updating the
+		 * control segment.
+		 */
+		if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
+			/*
+			 * Need a barrier here before writing the byte_count
+			 * fields to make sure that all the data is visible
+			 * before the byte_count field is set.
+			 * Otherwise, if the segment begins a new cacheline,
+			 * the HCA prefetcher could grab the 64-byte chunk and
+			 * get a valid (!= 0xffffffff) byte count but stale
+			 * data, and end up sending the wrong data.
+			 */
+			rte_io_wmb();
+			dseg->byte_count = byte_count;
+		} else {
+			/*
+			 * This data segment starts at the beginning of a new
+			 * TXBB, so we need to postpone its byte_count writing
+			 * for later.
+			 */
+			pv[pv_counter].dseg = dseg;
+			pv[pv_counter++].val = byte_count;
+		}
+	}
+	/* Write the first DWORD of each TXBB save earlier. */
+	if (pv_counter) {
+		/* Need a barrier here before writing the byte_count. */
+		rte_io_wmb();
+		for (--pv_counter; pv_counter  >= 0; pv_counter--)
+			pv[pv_counter].dseg->byte_count = pv[pv_counter].val;
+	}
+	/* Fill the control parameters for this packet. */
+	ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+	return nr_txbbs;
+}
 /**
  * DPDK callback for Tx.
  *
@@ -261,10 +374,11 @@ struct pv {
 	unsigned int i;
 	unsigned int max;
 	struct mlx4_sq *sq = &txq->msq;
-	struct pv *pv = (struct pv *)txq->bounce_buf;
+	int nr_txbbs;
 
 	assert(txq->elts_comp_cd != 0);
-	mlx4_txq_complete(txq);
+	if (likely(txq->elts_comp != 0))
+		mlx4_txq_complete(txq, elts_n, sq);
 	max = (elts_n - (elts_head - txq->elts_tail));
 	if (max > elts_n)
 		max -= elts_n;
@@ -283,7 +397,6 @@ struct pv {
 		uint32_t owner_opcode = MLX4_OPCODE_SEND;
 		struct mlx4_wqe_ctrl_seg *ctrl;
 		struct mlx4_wqe_data_seg *dseg;
-		struct rte_mbuf *sbuf;
 		union {
 			uint32_t flags;
 			uint16_t flags16[2];
@@ -291,10 +404,6 @@ struct pv {
 		uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
 		uint32_t lkey;
 		uintptr_t addr;
-		uint32_t byte_count;
-		int wqe_real_size;
-		int nr_txbbs;
-		int pv_counter = 0;
 
 		/* Clean up old buffer. */
 		if (likely(elt->buf != NULL)) {
@@ -313,38 +422,29 @@ struct pv {
 			} while (tmp != NULL);
 		}
 		RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
-		/*
-		 * Calculate the needed work queue entry size
-		 * for this packet.
-		 */
-		wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
-				buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
-		nr_txbbs = MLX4_SIZE_TO_TXBBS(wqe_real_size);
-		/*
-		 * Check that there is room for this WQE in the send
-		 * queue and that the WQE size is legal.
-		 */
-		if (((sq->head - sq->tail) + nr_txbbs +
-		     sq->headroom_txbbs) >= sq->txbb_cnt ||
-		    nr_txbbs > MLX4_MAX_WQE_TXBBS) {
-			elt->buf = NULL;
-			break;
-		}
-		/* Get the control and data entries of the WQE. */
-		ctrl = (struct mlx4_wqe_ctrl_seg *)
-				mlx4_get_send_wqe(sq, head_idx);
-		dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
-				sizeof(struct mlx4_wqe_ctrl_seg));
-		/* Fill the data segments with buffer information. */
-		for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) {
-			addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+		if (buf->nb_segs == 1) {
+			/*
+			 * Check that there is room for this WQE in the send
+			 * queue and that the WQE size is legal
+			 */
+			if (((sq->head - sq->tail) + 1 + sq->headroom_txbbs) >=
+			     sq->txbb_cnt || 1 > MLX4_MAX_WQE_TXBBS) {
+				elt->buf = NULL;
+				break;
+			}
+			/* Get the control and data entries of the WQE. */
+			ctrl = (struct mlx4_wqe_ctrl_seg *)
+					mlx4_get_send_wqe(sq, head_idx);
+			dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
+					sizeof(struct mlx4_wqe_ctrl_seg));
+			addr = rte_pktmbuf_mtod(buf, uintptr_t);
 			rte_prefetch0((volatile void *)addr);
 			/* Handle WQE wraparound. */
 			if (dseg >= (struct mlx4_wqe_data_seg *)sq->eob)
 				dseg = (struct mlx4_wqe_data_seg *)sq->buf;
 			dseg->addr = rte_cpu_to_be_64(addr);
 			/* Memory region key (big endian). */
-			lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+			lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
 			dseg->lkey = rte_cpu_to_be_32(lkey);
 #ifndef NDEBUG
 			if (unlikely(dseg->lkey ==
@@ -358,61 +458,27 @@ struct pv {
 				 * Note that we give ownership to the SW,
 				 * not the HW.
 				 */
-				ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+				ctrl->fence_size =
+					(WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f;
 				mlx4_txq_stamp_freed_wqe(sq, head_idx,
 					     (sq->head & sq->txbb_cnt) ? 0 : 1);
 				elt->buf = NULL;
 				break;
 			}
 #endif /* NDEBUG */
-			if (likely(sbuf->data_len)) {
-				byte_count = rte_cpu_to_be_32(sbuf->data_len);
-			} else {
-				/*
-				 * Zero length segment is treated as inline
-				 * segment with zero data.
-				 */
-				byte_count = RTE_BE32(0x80000000);
-			}
-			/*
-			 * If the data segment is not at the beginning
-			 * of a Tx basic block (TXBB) then write the
-			 * byte count, else postpone the writing to
-			 * just before updating the control segment.
-			 */
-			if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
-				/*
-				 * Need a barrier here before writing the
-				 * byte_count fields to make sure that all the
-				 * data is visible before the byte_count field
-				 * is set. otherwise, if the segment begins a
-				 * new cacheline, the HCA prefetcher could grab
-				 * the 64-byte chunk and get a valid
-				 * (!= 0xffffffff) byte count but stale data,
-				 * and end up sending the wrong data.
-				 */
-				rte_io_wmb();
-				dseg->byte_count = byte_count;
-			} else {
-				/*
-				 * This data segment starts at the beginning of
-				 * a new TXBB, so we need to postpone its
-				 * byte_count writing for later.
-				 */
-				pv[pv_counter].dseg = dseg;
-				pv[pv_counter++].val = byte_count;
-			}
-		}
-		/* Write the first DWORD of each TXBB save earlier. */
-		if (pv_counter) {
-			/* Need a barrier before writing the byte_count. */
+			/* Need a barrier here before byte count store. */
 			rte_io_wmb();
-			for (--pv_counter; pv_counter  >= 0; pv_counter--)
-				pv[pv_counter].dseg->byte_count =
-						pv[pv_counter].val;
+			dseg->byte_count = rte_cpu_to_be_32(buf->data_len);
+			/* Fill the control parameters for this packet. */
+			ctrl->fence_size = (WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f;
+			nr_txbbs = 1;
+		} else {
+			nr_txbbs = mlx4_tx_burst_segs(buf, txq, &ctrl);
+			if (nr_txbbs < 0) {
+				elt->buf = NULL;
+				break;
+			}
 		}
-		/* Fill the control parameters for this packet. */
-		ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
 		/*
 		 * For raw Ethernet, the SOLICIT flag is used to indicate
 		 * that no ICRC should be calculated.
-- 
1.8.3.1

  parent reply	other threads:[~2017-10-31 18:22 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1508752838-30408-1-git-send-email-ophirmu@mellanox.com>
2017-10-23 14:21 ` [PATCH v2 0/7] net/mlx4: follow-up on new TX datapath introduced in RC1 Ophir Munk
2017-10-23 14:21   ` [PATCH v2 1/7] net/mlx4: remove error flows from Tx fast path Ophir Munk
2017-10-25 16:49     ` Adrien Mazarguil
2017-10-23 14:21   ` [PATCH v2 2/7] net/mlx4: inline more Tx functions Ophir Munk
2017-10-25 16:49     ` Adrien Mazarguil
2017-10-25 21:42       ` Ophir Munk
2017-10-26  7:48         ` Adrien Mazarguil
2017-10-26 14:27           ` Ophir Munk
2017-10-29 19:30             ` Ophir Munk
2017-10-23 14:21   ` [PATCH v2 3/7] net/mlx4: save lkey in big-endian format Ophir Munk
2017-10-23 15:24     ` Nélio Laranjeiro
2017-10-23 14:21   ` [PATCH v2 4/7] net/mlx4: merge Tx path functions Ophir Munk
2017-10-24 13:51     ` Nélio Laranjeiro
2017-10-24 20:36       ` Ophir Munk
2017-10-25  7:50         ` Nélio Laranjeiro
2017-10-26 10:31           ` Matan Azrad
2017-10-26 12:12             ` Nélio Laranjeiro
2017-10-26 12:30               ` Matan Azrad
2017-10-26 13:44                 ` Nélio Laranjeiro
2017-10-26 16:21                   ` Matan Azrad
2017-10-23 14:21   ` [PATCH v2 5/7] net/mlx4: remove unnecessary variables in Tx burst Ophir Munk
2017-10-25 16:49     ` Adrien Mazarguil
2017-10-23 14:21   ` [PATCH v2 6/7] net/mlx4: improve performance of one Tx segment Ophir Munk
2017-10-25 16:50     ` Adrien Mazarguil
2017-10-23 14:22   ` [PATCH v2 7/7] net/mlx4: separate Tx for multi-segments Ophir Munk
2017-10-25 16:50     ` Adrien Mazarguil
2017-10-30  8:15       ` Ophir Munk
2017-10-30 10:07   ` [PATCH v3 0/7] Tx path improvements Matan Azrad
2017-10-30 10:07     ` [PATCH v3 1/7] net/mlx4: remove error flows from Tx fast path Matan Azrad
2017-10-30 14:23       ` Adrien Mazarguil
2017-10-30 18:11         ` Matan Azrad
2017-10-31 10:16           ` Adrien Mazarguil
2017-10-30 10:07     ` [PATCH v3 2/7] net/mlx4: associate MR to MP in a short function Matan Azrad
2017-10-30 14:23       ` Adrien Mazarguil
2017-10-31 13:25         ` Ophir Munk
2017-10-30 10:07     ` [PATCH v3 3/7] net/mlx4: merge Tx path functions Matan Azrad
2017-10-30 14:23       ` Adrien Mazarguil
2017-10-30 18:12         ` Matan Azrad
2017-10-30 10:07     ` [PATCH v3 4/7] net/mlx4: remove completion counter in Tx burst Matan Azrad
2017-10-30 14:23       ` Adrien Mazarguil
2017-10-30 10:07     ` [PATCH v3 5/7] net/mlx4: separate Tx segment cases Matan Azrad
2017-10-30 14:23       ` Adrien Mazarguil
2017-10-30 18:23         ` Matan Azrad
2017-10-31 10:17           ` Adrien Mazarguil
2017-10-30 10:07     ` [PATCH v3 6/7] net/mlx4: mitigate Tx path memory barriers Matan Azrad
2017-10-30 14:23       ` Adrien Mazarguil
2017-10-30 19:47         ` Matan Azrad
2017-10-31 10:17           ` Adrien Mazarguil
2017-10-31 11:35             ` Matan Azrad
2017-10-31 13:21               ` Adrien Mazarguil
2017-10-30 10:07     ` [PATCH v3 7/7] net/mlx4: remove empty Tx segment support Matan Azrad
2017-10-30 14:24       ` Adrien Mazarguil
2017-10-31 18:21     ` [PATCH v4 0/8] net/mlx4: Tx path improvements Matan Azrad
2017-10-31 18:21       ` [PATCH v4 1/8] net/mlx4: remove error flows from Tx fast path Matan Azrad
2017-10-31 18:21       ` [PATCH v4 2/8] net/mlx4: associate MR to MP in a short function Matan Azrad
2017-11-02 13:42         ` Adrien Mazarguil
2017-10-31 18:21       ` [PATCH v4 3/8] net/mlx4: fix ring wraparound compiler hint Matan Azrad
2017-11-02 13:42         ` Adrien Mazarguil
2017-10-31 18:21       ` [PATCH v4 4/8] net/mlx4: merge Tx path functions Matan Azrad
2017-11-02 13:42         ` Adrien Mazarguil
2017-10-31 18:21       ` [PATCH v4 5/8] net/mlx4: remove duplicate handling in Tx burst Matan Azrad
2017-11-02 13:42         ` Adrien Mazarguil
2017-10-31 18:21       ` Matan Azrad [this message]
2017-11-02 13:43         ` [PATCH v4 6/8] net/mlx4: separate Tx segment cases Adrien Mazarguil
2017-10-31 18:21       ` [PATCH v4 7/8] net/mlx4: fix HW memory optimizations careless Matan Azrad
2017-11-02 13:43         ` Adrien Mazarguil
2017-10-31 18:21       ` [PATCH v4 8/8] net/mlx4: mitigate Tx path memory barriers Matan Azrad
2017-11-02 13:43         ` Adrien Mazarguil
2017-11-02 13:41       ` [PATCH] net/mlx4: fix missing include Adrien Mazarguil
2017-11-02 20:35         ` Ferruh Yigit
2017-11-02 16:42     ` [PATCH v5 0/8] net/mlx4: Tx path improvements Matan Azrad
2017-11-02 16:42       ` [PATCH v5 1/8] net/mlx4: remove error flows from Tx fast path Matan Azrad
2017-11-02 16:42       ` [PATCH v5 2/8] net/mlx4: associate MR to MP in a short function Matan Azrad
2017-11-02 16:42       ` [PATCH v5 3/8] net/mlx4: fix ring wraparound compiler hint Matan Azrad
2017-11-02 16:42       ` [PATCH v5 4/8] net/mlx4: merge Tx path functions Matan Azrad
2017-11-02 16:42       ` [PATCH v5 5/8] net/mlx4: remove duplicate handling in Tx burst Matan Azrad
2017-11-02 16:42       ` [PATCH v5 6/8] net/mlx4: separate Tx segment cases Matan Azrad
2017-11-02 16:42       ` [PATCH v5 7/8] net/mlx4: fix HW memory optimizations careless Matan Azrad
2017-11-02 16:42       ` [PATCH v5 8/8] net/mlx4: mitigate Tx path memory barriers Matan Azrad
2017-11-02 17:07       ` [PATCH v5 0/8] net/mlx4: Tx path improvements Adrien Mazarguil
2017-11-02 20:35         ` Ferruh Yigit
2017-11-02 20:41       ` Ferruh Yigit
2017-11-03  9:48         ` Adrien Mazarguil
2017-11-03 19:25       ` Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1509474093-31388-7-git-send-email-matan@mellanox.com \
    --to=matan@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=ophirmu@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.