All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nithin Dabilpuram <nithind1988@gmail.com>
To: pbhagavatula@marvell.com
Cc: jerinj@marvell.com, Kiran Kumar K <kirankumark@marvell.com>,
	Sunil Kumar Kori <skori@marvell.com>,
	Satha Rao <skoteshwar@marvell.com>,
	dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine
Date: Tue, 29 Jun 2021 12:55:31 +0530	[thread overview]
Message-ID: <YNrK0IlF/0/Sx+Ck@gmail.com> (raw)
In-Reply-To: <20210628194144.637-6-pbhagavatula@marvell.com>

On Tue, Jun 29, 2021 at 01:11:43AM +0530, pbhagavatula@marvell.com wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Add multi segment Tx vector routine.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  drivers/net/cnxk/cn10k_tx.c          |  20 +-
>  drivers/net/cnxk/cn10k_tx.h          | 388 +++++++++++++++++++++++++--
>  drivers/net/cnxk/cn10k_tx_vec_mseg.c |  24 ++
>  drivers/net/cnxk/cn9k_tx.c           |  20 +-
>  drivers/net/cnxk/cn9k_tx.h           | 272 ++++++++++++++++++-
>  drivers/net/cnxk/cn9k_tx_vec_mseg.c  |  24 ++
>  drivers/net/cnxk/meson.build         |   6 +-
>  7 files changed, 709 insertions(+), 45 deletions(-)
>  create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c
>  create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c
> 
> diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
> index d06879163..1f30bab59 100644
> --- a/drivers/net/cnxk/cn10k_tx.c
> +++ b/drivers/net/cnxk/cn10k_tx.c
> @@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
>  #undef T
>  	};
>  
> -	if (dev->scalar_ena)
> +	const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
> +	[f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,
> +
> +		NIX_TX_FASTPATH_MODES
> +#undef T
> +	};
> +
> +	if (dev->scalar_ena) {
>  		pick_tx_func(eth_dev, nix_eth_tx_burst);
> -	else
> +		if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> +			pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> +	} else {
>  		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
> -
> -	if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> -		pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> +		if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> +			pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
> +	}
>  
>  	rte_mb();
>  }
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index 26797581e..532b53b31 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -42,6 +42,13 @@
>  		}                                                              \
>  	} while (0)
>  
> +/* Encoded number of segments to number of dwords macro, each value of nb_segs
> + * is encoded as 4bits.
> + */
> +#define NIX_SEGDW_MAGIC 0x76654432210ULL
> +
> +#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
> +
>  #define LMT_OFF(lmt_addr, lmt_num, offset)                                     \
>  	(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
>  
> @@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
>  	return data;
>  }
>  
> +static __rte_always_inline uint8_t
> +cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
> +{
> +	return ((flags & NIX_TX_NEED_EXT_HDR) ?
> +			      (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
> +			      4);
> +}
> +
>  static __rte_always_inline uint64_t
>  cn10k_nix_tx_steor_vec_data(const uint16_t flags)
>  {
> @@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
>  	}
>  }
>  
> +static __rte_always_inline void
> +cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
> +				union nix_send_hdr_w0_u *sh,
> +				union nix_send_sg_s *sg, const uint32_t flags)
> +{
> +	struct rte_mbuf *m_next;
> +	uint64_t *slist, sg_u;
> +	uint16_t nb_segs;
> +	int i = 1;
> +
> +	sh->total = m->pkt_len;
> +	/* Clear sg->u header before use */
> +	sg->u &= 0xFC00000000000000;
> +	sg_u = sg->u;
> +	slist = &cmd[0];
> +
> +	sg_u = sg_u | ((uint64_t)m->data_len);
> +
> +	nb_segs = m->nb_segs - 1;
> +	m_next = m->next;
> +
> +	/* Set invert df if buffer is not to be freed by H/W */
> +	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> +		sg_u |= (cnxk_nix_prefree_seg(m) << 55);
> +		/* Mark mempool object as "put" since it is freed by NIX */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> +	if (!(sg_u & (1ULL << 55)))
> +		__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> +	rte_io_wmb();
> +#endif
> +
> +	m = m_next;
> +	/* Fill mbuf segments */
> +	do {
> +		m_next = m->next;
> +		sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
> +		*slist = rte_mbuf_data_iova(m);
> +		/* Set invert df if buffer is not to be freed by H/W */
> +		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> +			sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
> +			/* Mark mempool object as "put" since it is freed by NIX
> +			 */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> +		if (!(sg_u & (1ULL << (i + 55))))
> +			__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> +		rte_io_wmb();
> +#endif
> +		slist++;
> +		i++;
> +		nb_segs--;
> +		if (i > 2 && nb_segs) {
> +			i = 0;
> +			/* Next SG subdesc */
> +			*(uint64_t *)slist = sg_u & 0xFC00000000000000;
> +			sg->u = sg_u;
> +			sg->segs = 3;
> +			sg = (union nix_send_sg_s *)slist;
> +			sg_u = sg->u;
> +			slist++;
> +		}
> +		m = m_next;
> +	} while (nb_segs);
> +
> +	sg->u = sg_u;
> +	sg->segs = i;
> +}
> +
> +static __rte_always_inline void
> +cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
> +			   uint64x2_t *cmd1, const uint8_t segdw,
> +			   const uint32_t flags)
> +{
> +	union nix_send_hdr_w0_u sh;
> +	union nix_send_sg_s sg;
> +
> +	if (m->nb_segs == 1) {
> +		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> +			sg.u = vgetq_lane_u64(cmd1[0], 0);
> +			sg.u |= (cnxk_nix_prefree_seg(m) << 55);
> +			cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> +		}
> +
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> +		sg.u = vgetq_lane_u64(cmd1[0], 0);
> +		if (!(sg.u & (1ULL << 55)))
> +			__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> +		rte_io_wmb();
> +#endif
> +		return;
> +	}
> +
> +	sh.u = vgetq_lane_u64(cmd0[0], 0);
> +	sg.u = vgetq_lane_u64(cmd1[0], 0);
> +
> +	cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
> +
> +	sh.sizem1 = segdw - 1;
> +	cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
> +	cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> +}
> +
>  #define NIX_DESCS_PER_LOOP 4
> +
> +static __rte_always_inline uint8_t
> +cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
> +			       uint64x2_t *cmd1, uint64x2_t *cmd2,
> +			       uint64x2_t *cmd3, uint8_t *segdw,
> +			       uint64_t *lmt_addr, __uint128_t *data128,
> +			       uint8_t *shift, const uint16_t flags)
> +{
> +	uint8_t j, off, lmt_used;
> +
> +	if (!(flags & NIX_TX_NEED_EXT_HDR) &&
> +	    !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> +		/* No segments in 4 consecutive packets. */
> +		if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
> +			for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
> +				cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +			vst1q_u64(lmt_addr, cmd0[0]);
> +			vst1q_u64(lmt_addr + 2, cmd1[0]);
> +			vst1q_u64(lmt_addr + 4, cmd0[1]);
> +			vst1q_u64(lmt_addr + 6, cmd1[1]);
> +			vst1q_u64(lmt_addr + 8, cmd0[2]);
> +			vst1q_u64(lmt_addr + 10, cmd1[2]);
> +			vst1q_u64(lmt_addr + 12, cmd0[3]);
> +			vst1q_u64(lmt_addr + 14, cmd1[3]);
> +
> +			*data128 |= ((__uint128_t)7) << *shift;
> +			shift += 3;
> +
> +			return 1;
> +		}
> +	}
> +
> +	lmt_used = 0;
> +	for (j = 0; j < NIX_DESCS_PER_LOOP;) {
> +		/* Fit consecutive packets in same LMTLINE. */
> +		if ((segdw[j] + segdw[j + 1]) <= 8) {
> +			if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
> +				cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +				cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
> +							   &cmd0[j + 1],
> +							   &cmd1[j + 1],
> +							   segdw[j + 1], flags);
> +				/* TSTAMP takes 4 each, no segs. */
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				vst1q_u64(lmt_addr + 6, cmd3[j]);
> +
> +				vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
> +				vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
> +				vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
> +				vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
> +			} else if (flags & NIX_TX_NEED_EXT_HDR) {
> +				/* EXT header take 3 each, space for 2 segs.*/
> +				cn10k_nix_prepare_mseg_vec(mbufs[j],
> +							   lmt_addr + 6,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				off = segdw[j] - 3;
> +				off <<= 1;
> +				cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
> +							   lmt_addr + 12 + off,
> +							   &cmd0[j + 1],
> +							   &cmd1[j + 1],
> +							   segdw[j + 1], flags);
> +				vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
> +				vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
> +				vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
> +			} else {
> +				cn10k_nix_prepare_mseg_vec(mbufs[j],
> +							   lmt_addr + 4,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd1[j]);
> +				off = segdw[j] - 2;
> +				off <<= 1;
> +				cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
> +							   lmt_addr + 8 + off,
> +							   &cmd0[j + 1],
> +							   &cmd1[j + 1],
> +							   segdw[j + 1], flags);
> +				vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
> +				vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
> +			}
> +			*data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
> +				    << *shift;
> +			*shift += 3;
> +			j += 2;
> +		} else {
> +			if ((flags & NIX_TX_NEED_EXT_HDR) &&
> +			    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> +				cn10k_nix_prepare_mseg_vec(mbufs[j],
> +							   lmt_addr + 6,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				off = segdw[j] - 4;
> +				off <<= 1;
> +				vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
> +			} else if (flags & NIX_TX_NEED_EXT_HDR) {
> +				cn10k_nix_prepare_mseg_vec(mbufs[j],
> +							   lmt_addr + 6,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +			} else {
> +				cn10k_nix_prepare_mseg_vec(mbufs[j],
> +							   lmt_addr + 4,
> +							   &cmd0[j], &cmd1[j],
> +							   segdw[j], flags);
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd1[j]);
> +			}
> +			*data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
> +			*shift += 3;
> +			j++;
> +		}
> +		lmt_used++;
> +		lmt_addr += 16;
> +	}
> +
> +	return lmt_used;
> +}
> +
>  static __rte_always_inline uint16_t
>  cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			   uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> @@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
>  	uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
>  		cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
> -	uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
> +	uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
>  	uint64x2_t senddesc01_w0, senddesc23_w0;
>  	uint64x2_t senddesc01_w1, senddesc23_w1;
>  	uint16_t left, scalar, burst, i, lmt_id;
> @@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	uint64x2_t sendext01_w1, sendext23_w1;
>  	uint64x2_t sendmem01_w0, sendmem23_w0;
>  	uint64x2_t sendmem01_w1, sendmem23_w1;
> +	uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
>  	uint64x2_t sgdesc01_w0, sgdesc23_w0;
>  	uint64x2_t sgdesc01_w1, sgdesc23_w1;
>  	struct cn10k_eth_txq *txq = tx_queue;
> @@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	uint64x2_t ltypes01, ltypes23;
>  	uint64x2_t xtmp128, ytmp128;
>  	uint64x2_t xmask01, xmask23;
> -	uint8_t lnum;
> +	uint8_t lnum, shift;
> +	union wdata {
> +		__uint128_t data128;
> +		uint64_t data[2];
> +	} wd;
>  
>  	NIX_XMIT_FC_OR_RETURN(txq, pkts);
>  
> @@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
>  			      cn10k_nix_pkts_per_vec_brst(flags) :
>  			      left;
> +	if (flags & NIX_TX_MULTI_SEG_F) {
> +		wd.data128 = 0;
> +		shift = 16;
> +	}
>  	lnum = 0;
> +
>  	for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
> +		if (flags & NIX_TX_MULTI_SEG_F) {
> +			struct rte_mbuf *m = tx_pkts[j];
> +			uint8_t j;
[Nithin] I guess it moved out of below loop while rebasing.
With this fixed,

Series-acked-by:  Nithin Dabilpuram <ndabilpuram@marvell.com>

> +
> +			for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
> +				/* Get dwords based on nb_segs. */
> +				segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
> +				/* Add dwords based on offloads. */
> +				segdw[j] += 1 + /* SEND HDR */
> +					    !!(flags & NIX_TX_NEED_EXT_HDR) +
> +					    !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
> +			}
> +
> +			/* Check if there are enough LMTLINES for this loop */
> +			if (lnum + 4 > 32) {
> +				uint8_t ldwords_con = 0, lneeded = 0;
> +				for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
> +					ldwords_con += segdw[j];
> +					if (ldwords_con > 8) {
> +						lneeded += 1;
> +						ldwords_con = segdw[j];
> +					}
> +				}
> +				lneeded += 1;
> +				if (lnum + lneeded > 32) {
> +					burst = i;
> +					break;
> +				}
> +			}
> +		}
>  		/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
>  		senddesc01_w0 =
>  			vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
> @@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			sendext23_w0 = vld1q_u64(sx_w0 + 2);
>  		}
>  
> -		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> +		if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
> +		    !(flags & NIX_TX_MULTI_SEG_F)) {
>  			/* Set don't free bit if reference count > 1 */
>  			xmask01 = vdupq_n_u64(0);
>  			xmask23 = xmask01;
> @@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  					(void **)&mbuf3, 1, 0);
>  			senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
>  			senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
> -		} else {
> +		} else if (!(flags & NIX_TX_MULTI_SEG_F)) {
>  			/* Move mbufs to iova */
>  			mbuf0 = (uint64_t *)tx_pkts[0];
>  			mbuf1 = (uint64_t *)tx_pkts[1];
> @@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
>  		}
>  
> -		if (flags & NIX_TX_NEED_EXT_HDR) {
> +		if (flags & NIX_TX_MULTI_SEG_F) {
> +			uint8_t j;
> +
> +			segdw[4] = 8;
> +			j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
> +							  cmd2, cmd3, segdw,
> +							  (uint64_t *)
> +							  LMT_OFF(laddr, lnum,
> +								  0),
> +							  &wd.data128, &shift,
> +							  flags);
> +			lnum += j;
> +		} else if (flags & NIX_TX_NEED_EXT_HDR) {
>  			/* Store the prepared send desc to LMT lines */
>  			if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
>  				vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
> @@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
>  	}
>  
> +	if (flags & NIX_TX_MULTI_SEG_F)
> +		wd.data[0] >>= 16;
> +
>  	/* Trigger LMTST */
>  	if (lnum > 16) {
> -		data = cn10k_nix_tx_steor_vec_data(flags);
> -		pa = io_addr | (data & 0x7) << 4;
> -		data &= ~0x7ULL;
> -		data |= (15ULL << 12);
> -		data |= (uint64_t)lmt_id;
> +		if (!(flags & NIX_TX_MULTI_SEG_F))
> +			wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
> +
> +		pa = io_addr | (wd.data[0] & 0x7) << 4;
> +		wd.data[0] &= ~0x7ULL;
> +
> +		if (flags & NIX_TX_MULTI_SEG_F)
> +			wd.data[0] <<= 16;
> +
> +		wd.data[0] |= (15ULL << 12);
> +		wd.data[0] |= (uint64_t)lmt_id;
>  
>  		/* STEOR0 */
> -		roc_lmt_submit_steorl(data, pa);
> +		roc_lmt_submit_steorl(wd.data[0], pa);
>  
> -		data = cn10k_nix_tx_steor_vec_data(flags);
> -		pa = io_addr | (data & 0x7) << 4;
> -		data &= ~0x7ULL;
> -		data |= ((uint64_t)(lnum - 17)) << 12;
> -		data |= (uint64_t)(lmt_id + 16);
> +		if (!(flags & NIX_TX_MULTI_SEG_F))
> +			wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
> +
> +		pa = io_addr | (wd.data[1] & 0x7) << 4;
> +		wd.data[1] &= ~0x7ULL;
> +
> +		if (flags & NIX_TX_MULTI_SEG_F)
> +			wd.data[1] <<= 16;
> +
> +		wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
> +		wd.data[1] |= (uint64_t)(lmt_id + 16);
>  
>  		/* STEOR1 */
> -		roc_lmt_submit_steorl(data, pa);
> +		roc_lmt_submit_steorl(wd.data[1], pa);
>  	} else if (lnum) {
> -		data = cn10k_nix_tx_steor_vec_data(flags);
> -		pa = io_addr | (data & 0x7) << 4;
> -		data &= ~0x7ULL;
> -		data |= ((uint64_t)(lnum - 1)) << 12;
> -		data |= lmt_id;
> +		if (!(flags & NIX_TX_MULTI_SEG_F))
> +			wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
> +
> +		pa = io_addr | (wd.data[0] & 0x7) << 4;
> +		wd.data[0] &= ~0x7ULL;
> +
> +		if (flags & NIX_TX_MULTI_SEG_F)
> +			wd.data[0] <<= 16;
> +
> +		wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
> +		wd.data[0] |= lmt_id;
>  
>  		/* STEOR0 */
> -		roc_lmt_submit_steorl(data, pa);
> +		roc_lmt_submit_steorl(wd.data[0], pa);
>  	}
>  
>  	left -= burst;
> @@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	if (left)
>  		goto again;
>  
> -	if (unlikely(scalar))
> -		pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,
> -					    flags);
> +	if (unlikely(scalar)) {
> +		if (flags & NIX_TX_MULTI_SEG_F)
> +			pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> +							 scalar, cmd, flags);
> +		else
> +			pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
> +						    cmd, flags);
> +	}
>  
>  	return pkts;
>  }
> @@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1, 1,	8,	\
>  		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
>  									       \
>  	uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
> -		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
> +		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
> +									       \
> +	uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
> +		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
>  
>  NIX_TX_FASTPATH_MODES
>  #undef T
> diff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> new file mode 100644
> index 000000000..1fad81dba
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_tx.h"
> +
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
> +	uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
> +		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
> +	{                                                                      \
> +		uint64_t cmd[sz];                                              \
> +									       \
> +		/* For TSO inner checksum is a must */                         \
> +		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
> +		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
> +			return 0;                                              \
> +		return cn10k_nix_xmit_pkts_vector(                             \
> +			tx_queue, tx_pkts, pkts, cmd,                          \
> +			(flags) | NIX_TX_MULTI_SEG_F);                         \
> +	}
> +
> +NIX_TX_FASTPATH_MODES
> +#undef T
> diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
> index 735e21cc6..763f9a14f 100644
> --- a/drivers/net/cnxk/cn9k_tx.c
> +++ b/drivers/net/cnxk/cn9k_tx.c
> @@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
>  #undef T
>  	};
>  
> -	if (dev->scalar_ena)
> +	const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)			       \
> +	[f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,
> +
> +		NIX_TX_FASTPATH_MODES
> +#undef T
> +	};
> +
> +	if (dev->scalar_ena) {
>  		pick_tx_func(eth_dev, nix_eth_tx_burst);
> -	else
> +		if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> +			pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> +	} else {
>  		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
> -
> -	if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> -		pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
> +		if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
> +			pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
> +	}
>  
>  	rte_mb();
>  }
> diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
> index dca732a9f..ed65cd351 100644
> --- a/drivers/net/cnxk/cn9k_tx.h
> +++ b/drivers/net/cnxk/cn9k_tx.h
> @@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
>  	}
>  }
>  
> +static __rte_always_inline uint8_t
> +cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
> +			       union nix_send_hdr_w0_u *sh,
> +			       union nix_send_sg_s *sg, const uint32_t flags)
> +{
> +	struct rte_mbuf *m_next;
> +	uint64_t *slist, sg_u;
> +	uint16_t nb_segs;
> +	uint64_t segdw;
> +	int i = 1;
> +
> +	sh->total = m->pkt_len;
> +	/* Clear sg->u header before use */
> +	sg->u &= 0xFC00000000000000;
> +	sg_u = sg->u;
> +	slist = &cmd[0];
> +
> +	sg_u = sg_u | ((uint64_t)m->data_len);
> +
> +	nb_segs = m->nb_segs - 1;
> +	m_next = m->next;
> +
> +	/* Set invert df if buffer is not to be freed by H/W */
> +	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> +		sg_u |= (cnxk_nix_prefree_seg(m) << 55);
> +		/* Mark mempool object as "put" since it is freed by NIX */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> +	if (!(sg_u & (1ULL << 55)))
> +		__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> +	rte_io_wmb();
> +#endif
> +
> +	m = m_next;
> +	/* Fill mbuf segments */
> +	do {
> +		m_next = m->next;
> +		sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
> +		*slist = rte_mbuf_data_iova(m);
> +		/* Set invert df if buffer is not to be freed by H/W */
> +		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> +			sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
> +			/* Mark mempool object as "put" since it is freed by NIX
> +			 */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> +		if (!(sg_u & (1ULL << (i + 55))))
> +			__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> +		rte_io_wmb();
> +#endif
> +		slist++;
> +		i++;
> +		nb_segs--;
> +		if (i > 2 && nb_segs) {
> +			i = 0;
> +			/* Next SG subdesc */
> +			*(uint64_t *)slist = sg_u & 0xFC00000000000000;
> +			sg->u = sg_u;
> +			sg->segs = 3;
> +			sg = (union nix_send_sg_s *)slist;
> +			sg_u = sg->u;
> +			slist++;
> +		}
> +		m = m_next;
> +	} while (nb_segs);
> +
> +	sg->u = sg_u;
> +	sg->segs = i;
> +	segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
> +
> +	segdw += 2;
> +	/* Roundup extra dwords to multiple of 2 */
> +	segdw = (segdw >> 1) + (segdw & 0x1);
> +	/* Default dwords */
> +	segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
> +		 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
> +	sh->sizem1 = segdw - 1;
> +
> +	return segdw;
> +}
> +
> +static __rte_always_inline uint8_t
> +cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
> +			  uint64x2_t *cmd1, const uint32_t flags)
> +{
> +	union nix_send_hdr_w0_u sh;
> +	union nix_send_sg_s sg;
> +	uint8_t ret;
> +
> +	if (m->nb_segs == 1) {
> +		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> +			sg.u = vgetq_lane_u64(cmd1[0], 0);
> +			sg.u |= (cnxk_nix_prefree_seg(m) << 55);
> +			cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> +		}
> +
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> +		sg.u = vgetq_lane_u64(cmd1[0], 0);
> +		if (!(sg.u & (1ULL << 55)))
> +			__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
> +		rte_io_wmb();
> +#endif
> +		return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
> +		       !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
> +	}
> +
> +	sh.u = vgetq_lane_u64(cmd0[0], 0);
> +	sg.u = vgetq_lane_u64(cmd1[0], 0);
> +
> +	ret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
> +
> +	cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
> +	cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
> +	return ret;
> +}
> +
>  #define NIX_DESCS_PER_LOOP 4
> +
> +static __rte_always_inline void
> +cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
> +			       uint64x2_t *cmd2, uint64x2_t *cmd3,
> +			       uint8_t *segdw,
> +			       uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
> +			       uint64_t *lmt_addr, rte_iova_t io_addr,
> +			       const uint32_t flags)
> +{
> +	uint64_t lmt_status;
> +	uint8_t j, off;
> +
> +	if (!(flags & NIX_TX_NEED_EXT_HDR) &&
> +	    !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> +		/* No segments in 4 consecutive packets. */
> +		if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
> +			do {
> +				vst1q_u64(lmt_addr, cmd0[0]);
> +				vst1q_u64(lmt_addr + 2, cmd1[0]);
> +				vst1q_u64(lmt_addr + 4, cmd0[1]);
> +				vst1q_u64(lmt_addr + 6, cmd1[1]);
> +				vst1q_u64(lmt_addr + 8, cmd0[2]);
> +				vst1q_u64(lmt_addr + 10, cmd1[2]);
> +				vst1q_u64(lmt_addr + 12, cmd0[3]);
> +				vst1q_u64(lmt_addr + 14, cmd1[3]);
> +				lmt_status = roc_lmt_submit_ldeor(io_addr);
> +			} while (lmt_status == 0);
> +
> +			return;
> +		}
> +	}
> +
> +	for (j = 0; j < NIX_DESCS_PER_LOOP;) {
> +		/* Fit consecutive packets in same LMTLINE. */
> +		if ((segdw[j] + segdw[j + 1]) <= 8) {
> +again0:
> +			if ((flags & NIX_TX_NEED_EXT_HDR) &&
> +			    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				/* Copy segs */
> +				off = segdw[j] - 4;
> +				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> +				off <<= 1;
> +				vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
> +
> +				vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
> +				vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
> +				vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
> +				roc_lmt_mov_seg(lmt_addr + 14 + off,
> +						slist[j + 1], segdw[j + 1] - 4);
> +				off += ((segdw[j + 1] - 4) << 1);
> +				vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
> +			} else if (flags & NIX_TX_NEED_EXT_HDR) {
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				/* Copy segs */
> +				off = segdw[j] - 3;
> +				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> +				off <<= 1;
> +				vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
> +				vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
> +				vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
> +				roc_lmt_mov_seg(lmt_addr + 12 + off,
> +						slist[j + 1], segdw[j + 1] - 3);
> +			} else {
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd1[j]);
> +				/* Copy segs */
> +				off = segdw[j] - 2;
> +				roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
> +				off <<= 1;
> +				vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
> +				vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
> +				roc_lmt_mov_seg(lmt_addr + 8 + off,
> +						slist[j + 1], segdw[j + 1] - 2);
> +			}
> +			lmt_status = roc_lmt_submit_ldeor(io_addr);
> +			if (lmt_status == 0)
> +				goto again0;
> +			j += 2;
> +		} else {
> +again1:
> +			if ((flags & NIX_TX_NEED_EXT_HDR) &&
> +			    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				/* Copy segs */
> +				off = segdw[j] - 4;
> +				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> +				off <<= 1;
> +				vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
> +			} else if (flags & NIX_TX_NEED_EXT_HDR) {
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd2[j]);
> +				vst1q_u64(lmt_addr + 4, cmd1[j]);
> +				/* Copy segs */
> +				off = segdw[j] - 3;
> +				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
> +			} else {
> +				vst1q_u64(lmt_addr, cmd0[j]);
> +				vst1q_u64(lmt_addr + 2, cmd1[j]);
> +				/* Copy segs */
> +				off = segdw[j] - 2;
> +				roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
> +			}
> +			lmt_status = roc_lmt_submit_ldeor(io_addr);
> +			if (lmt_status == 0)
> +				goto again1;
> +			j += 1;
> +		}
> +	}
> +}
> +
>  static __rte_always_inline uint16_t
>  cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			  uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> @@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			sendext23_w0 = vld1q_u64(sx_w0 + 2);
>  		}
>  
> -		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
> +		if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
> +		    !(flags & NIX_TX_MULTI_SEG_F)) {
>  			/* Set don't free bit if reference count > 1 */
>  			xmask01 = vdupq_n_u64(0);
>  			xmask23 = xmask01;
> @@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			 * cnxk_nix_prefree_seg are written before LMTST.
>  			 */
>  			rte_io_wmb();
> -		} else {
> +		} else if (!(flags & NIX_TX_MULTI_SEG_F)) {
>  			/* Move mbufs to iova */
>  			mbuf0 = (uint64_t *)tx_pkts[0];
>  			mbuf1 = (uint64_t *)tx_pkts[1];
> @@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
>  		}
>  
> -		if (flags & NIX_TX_NEED_EXT_HDR) {
> +		if (flags & NIX_TX_MULTI_SEG_F) {
> +			uint64_t seg_list[NIX_DESCS_PER_LOOP]
> +					 [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
> +			uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
> +
> +			/* Build mseg list for each packet individually. */
> +			for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
> +				segdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],
> +							seg_list[j], &cmd0[j],
> +							&cmd1[j], flags);
> +			segdw[4] = 8;
> +
> +			/* Commit all changes to mbuf before LMTST. */
> +			if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
> +				rte_io_wmb();
> +
> +			cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
> +						       segdw, seg_list,
> +						       lmt_addr, io_addr,
> +						       flags);
> +		} else if (flags & NIX_TX_NEED_EXT_HDR) {
>  			/* With ext header in the command we can no longer send
>  			 * all 4 packets together since LMTLINE is 128bytes.
>  			 * Split and Tx twice.
> @@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
>  	}
>  
> -	if (unlikely(pkts_left))
> -		pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,
> -					   flags);
> +	if (unlikely(pkts_left)) {
> +		if (flags & NIX_TX_MULTI_SEG_F)
> +			pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
> +							pkts_left, cmd, flags);
> +		else
> +			pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
> +						   cmd, flags);
> +	}
>  
>  	return pkts;
>  }
> @@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum,	1, 1, 1, 1, 1, 1,	8,	       \
>  		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
>  									       \
>  	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name(       \
> +		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
> +									       \
> +	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \
>  		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
>  
>  NIX_TX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
> new file mode 100644
> index 000000000..0256efd45
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_tx.h"
> +
> +#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
> +	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \
> +		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
> +	{                                                                      \
> +		uint64_t cmd[sz];                                              \
> +									       \
> +		/* For TSO inner checksum is a must */                         \
> +		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
> +		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
> +			return 0;                                              \
> +		return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
> +						 (flags) |                     \
> +							 NIX_TX_MULTI_SEG_F);  \
> +	}
> +
> +NIX_TX_FASTPATH_MODES
> +#undef T
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index aa8c7253f..361f7ce84 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',
>  		 'cn9k_rx_vec_mseg.c',
>  		 'cn9k_tx.c',
>  		 'cn9k_tx_mseg.c',
> -		 'cn9k_tx_vec.c')
> +		 'cn9k_tx_vec.c',
> +		 'cn9k_tx_vec_mseg.c')
>  # CN10K
>  sources += files('cn10k_ethdev.c',
>  		 'cn10k_rte_flow.c',
> @@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',
>  		 'cn10k_rx_vec_mseg.c',
>  		 'cn10k_tx.c',
>  		 'cn10k_tx_mseg.c',
> -		 'cn10k_tx_vec.c')
> +		 'cn10k_tx_vec.c',
> +		 'cn10k_tx_vec_mseg.c')
>  
>  deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']
>  deps += ['common_cnxk', 'mempool_cnxk']
> -- 
> 2.17.1
> 

  reply	other threads:[~2021-06-29  7:26 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-24 12:22 [dpdk-dev] [PATCH v2 1/4] event/cnxk: add Rx adapter support pbhagavatula
2021-05-24 12:23 ` [dpdk-dev] [PATCH v2 2/4] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-05-24 12:23 ` [dpdk-dev] [PATCH v2 3/4] event/cnxk: add Tx adapter support pbhagavatula
2021-05-24 12:23 ` [dpdk-dev] [PATCH v2 4/4] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-06-19 11:01 ` [dpdk-dev] [PATCH v2 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 02/13] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 03/13] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 04/13] net/cnxk: enable ptp " pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 05/13] net/cnxk: enable TSO " pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 06/13] net/cnxk: add multi seg Tx vector routine pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 07/13] event/cnxk: add Rx adapter support pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 08/13] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 09/13] event/cnxk: add Tx adapter support pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 10/13] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 11/13] event/cnxk: add Rx adapter vector support pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 12/13] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-06-19 11:01   ` [dpdk-dev] [PATCH v2 13/13] event/cnxk: add Tx " pbhagavatula
2021-06-20 20:28   ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine pbhagavatula
2021-06-20 20:28     ` [dpdk-dev] [PATCH v3 02/13] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-20 20:28     ` [dpdk-dev] [PATCH v3 03/13] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
2021-06-20 20:28     ` [dpdk-dev] [PATCH v3 04/13] net/cnxk: enable ptp " pbhagavatula
2021-06-20 20:28     ` [dpdk-dev] [PATCH v3 05/13] net/cnxk: enable TSO " pbhagavatula
2021-06-20 20:28     ` [dpdk-dev] [PATCH v3 06/13] net/cnxk: add multi seg Tx vector routine pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 07/13] event/cnxk: add Rx adapter support pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 08/13] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 09/13] event/cnxk: add Tx adapter support pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 10/13] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 11/13] event/cnxk: add Rx adapter vector support pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 12/13] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-06-20 20:29     ` [dpdk-dev] [PATCH v3 13/13] event/cnxk: add Tx " pbhagavatula
2021-06-27  6:57     ` [dpdk-dev] [PATCH v3 01/13] net/cnxk: add multi seg Rx vector routine Jerin Jacob
2021-06-28 19:41     ` [dpdk-dev] [PATCH v4 1/6] " pbhagavatula
2021-06-28 19:41       ` [dpdk-dev] [PATCH v4 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-28 19:41       ` [dpdk-dev] [PATCH v4 3/6] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
2021-06-28 19:41       ` [dpdk-dev] [PATCH v4 4/6] net/cnxk: enable ptp " pbhagavatula
2021-06-28 19:41       ` [dpdk-dev] [PATCH v4 5/6] net/cnxk: enable TSO " pbhagavatula
2021-06-28 19:41       ` [dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
2021-06-29  7:25         ` Nithin Dabilpuram [this message]
2021-06-29  7:44       ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " pbhagavatula
2021-06-29  7:44         ` [dpdk-dev] [PATCH v5 2/6] net/cnxk: enable ptp processing in vector Rx pbhagavatula
2021-06-29  7:44         ` [dpdk-dev] [PATCH v5 3/6] net/cnxk: enable VLAN processing in vector Tx pbhagavatula
2021-06-29  7:44         ` [dpdk-dev] [PATCH v5 4/6] net/cnxk: enable ptp " pbhagavatula
2021-06-29  7:44         ` [dpdk-dev] [PATCH v5 5/6] net/cnxk: enable TSO " pbhagavatula
2021-06-29  7:44         ` [dpdk-dev] [PATCH v5 6/6] net/cnxk: add multi seg Tx vector routine pbhagavatula
2021-06-29 16:20         ` [dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx " Jerin Jacob
2021-06-28 19:52     ` [dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-06-28 19:52       ` [dpdk-dev] [PATCH v4 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-06-28 19:52       ` [dpdk-dev] [PATCH v4 3/7] event/cnxk: add Tx adapter support pbhagavatula
2021-06-28 19:52       ` [dpdk-dev] [PATCH v4 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-06-28 19:52       ` [dpdk-dev] [PATCH v4 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
2021-06-28 19:52       ` [dpdk-dev] [PATCH v4 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-06-28 19:52       ` [dpdk-dev] [PATCH v4 7/7] event/cnxk: add Tx " pbhagavatula
2021-06-29  8:01       ` [dpdk-dev] [PATCH v5 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-06-29  8:01         ` [dpdk-dev] [PATCH v5 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-06-29  8:01         ` [dpdk-dev] [PATCH v5 3/7] event/cnxk: add Tx adapter support pbhagavatula
2021-06-29  8:01         ` [dpdk-dev] [PATCH v5 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-06-29  8:01         ` [dpdk-dev] [PATCH v5 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
2021-06-29  8:01         ` [dpdk-dev] [PATCH v5 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-06-29  8:01         ` [dpdk-dev] [PATCH v5 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-02 21:14         ` [dpdk-dev] [PATCH v6 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-02 21:14           ` [dpdk-dev] [PATCH v6 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-02 21:14           ` [dpdk-dev] [PATCH v6 3/7] event/cnxk: add Tx adapter support pbhagavatula
2021-07-03 13:23             ` Nithin Dabilpuram
2021-07-02 21:14           ` [dpdk-dev] [PATCH v6 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-07-02 21:14           ` [dpdk-dev] [PATCH v6 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
2021-07-02 21:14           ` [dpdk-dev] [PATCH v6 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-07-02 21:14           ` [dpdk-dev] [PATCH v6 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-03 22:00           ` [dpdk-dev] [PATCH v7 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-03 22:00             ` [dpdk-dev] [PATCH v7 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-03 22:00             ` [dpdk-dev] [PATCH v7 3/7] event/cnxk: add Tx adapter support pbhagavatula
2021-07-03 22:00             ` [dpdk-dev] [PATCH v7 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-07-03 22:00             ` [dpdk-dev] [PATCH v7 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
2021-07-03 22:00             ` [dpdk-dev] [PATCH v7 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-07-03 22:00             ` [dpdk-dev] [PATCH v7 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-11 23:29             ` [dpdk-dev] [PATCH v8 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-11 23:29               ` [dpdk-dev] [PATCH v8 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-11 23:29               ` [dpdk-dev] [PATCH v8 3/7] event/cnxk: add Tx adapter support pbhagavatula
2021-07-11 23:29               ` [dpdk-dev] [PATCH v8 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-07-11 23:29               ` [dpdk-dev] [PATCH v8 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
2021-07-11 23:29               ` [dpdk-dev] [PATCH v8 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-07-11 23:29               ` [dpdk-dev] [PATCH v8 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-13 13:36                 ` Jerin Jacob
2021-07-14  9:02               ` [dpdk-dev] [PATCH v9 1/7] event/cnxk: add Rx adapter support pbhagavatula
2021-07-14  9:02                 ` [dpdk-dev] [PATCH v9 2/7] event/cnxk: add Rx adapter fastpath ops pbhagavatula
2021-07-20 11:03                   ` David Marchand
2021-07-20 11:43                     ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
2021-07-20 11:50                       ` David Marchand
2021-07-14  9:02                 ` [dpdk-dev] [PATCH v9 3/7] event/cnxk: add Tx adapter support pbhagavatula
2021-07-14  9:02                 ` [dpdk-dev] [PATCH v9 4/7] event/cnxk: add Tx adapter fastpath ops pbhagavatula
2021-07-14  9:02                 ` [dpdk-dev] [PATCH v9 5/7] event/cnxk: add Rx adapter vector support pbhagavatula
2021-07-14  9:02                 ` [dpdk-dev] [PATCH v9 6/7] event/cnxk: add Rx event vector fastpath pbhagavatula
2021-07-14  9:02                 ` [dpdk-dev] [PATCH v9 7/7] event/cnxk: add Tx " pbhagavatula
2021-07-16 12:19                   ` Jerin Jacob

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YNrK0IlF/0/Sx+Ck@gmail.com \
    --to=nithind1988@gmail.com \
    --cc=dev@dpdk.org \
    --cc=jerinj@marvell.com \
    --cc=kirankumark@marvell.com \
    --cc=pbhagavatula@marvell.com \
    --cc=skori@marvell.com \
    --cc=skoteshwar@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.