All of lore.kernel.org
 help / color / mirror / Atom feed
From: <pbhagavatula@marvell.com>
To: <jerinj@marvell.com>, Nithin Dabilpuram <ndabilpuram@marvell.com>,
	"Kiran Kumar K" <kirankumark@marvell.com>,
	Sunil Kumar Kori <skori@marvell.com>,
	Satha Rao <skoteshwar@marvell.com>,
	Pavan Nikhilesh <pbhagavatula@marvell.com>,
	Shijith Thotton <sthotton@marvell.com>
Cc: <dev@dpdk.org>
Subject: [PATCH v4 1/3] event/cnxk: store and reuse workslot status
Date: Thu, 10 Feb 2022 18:50:44 +0530	[thread overview]
Message-ID: <20220210132047.2429-1-pbhagavatula@marvell.com> (raw)
In-Reply-To: <20220210101940.1669-1-pbhagavatula@marvell.com>

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Store and reuse workslot status for TT, GRP and HEAD status
instead of reading from GWC as reading from GWC imposes
additional latency.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 Depends-on: 21590

 v4 Changes:
 - Update commit title for 3/3

 v3 Changes:
 - Split and rebase patches.

 v2 Changes:
 - Rebase.
 - Fix incorrect use of RoC API

 drivers/common/cnxk/roc_sso.h      | 14 ++++++++------
 drivers/event/cnxk/cn10k_worker.h  | 16 +++++++++-------
 drivers/event/cnxk/cn9k_worker.h   |  6 +++---
 drivers/event/cnxk/cnxk_eventdev.h |  2 ++
 drivers/event/cnxk/cnxk_worker.h   | 11 +++++++----
 drivers/net/cnxk/cn10k_tx.h        | 12 ++++++------
 6 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 27d49c6c68..ab7cee1c60 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -54,12 +54,13 @@ struct roc_sso {
 	uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
 } __plt_cache_aligned;

-static __plt_always_inline void
-roc_sso_hws_head_wait(uintptr_t tag_op)
+static __plt_always_inline uint64_t
+roc_sso_hws_head_wait(uintptr_t base)
 {
-#ifdef RTE_ARCH_ARM64
+	uintptr_t tag_op = base + SSOW_LF_GWS_TAG;
 	uint64_t tag;

+#if defined(__aarch64__)
 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
 		     "		ldr %[tag], [%[tag_op]]	\n"
 		     "		tbnz %[tag], 35, done%=		\n"
@@ -71,10 +72,11 @@ roc_sso_hws_head_wait(uintptr_t tag_op)
 		     : [tag] "=&r"(tag)
 		     : [tag_op] "r"(tag_op));
 #else
-	/* Wait for the SWTAG/SWTAG_FULL operation */
-	while (!(plt_read64(tag_op) & BIT_ULL(35)))
-		;
+	do {
+		tag = plt_read64(tag_op);
+	} while (!(tag & BIT_ULL(35)));
 #endif
+	return tag;
 }

 /* SSO device initialization */
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index ff08b2d974..ada230ea1d 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -40,8 +40,7 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
 {
 	const uint32_t tag = (uint32_t)ev->event;
 	const uint8_t new_tt = ev->sched_type;
-	const uint8_t cur_tt =
-		CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
+	const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);

 	/* CNXK model
 	 * cur_tt/new_tt     SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -81,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 	const uint8_t grp = ev->queue_id;

 	/* Group hasn't changed, Use SWTAG to forward the event */
-	if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
+	if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
 		cn10k_sso_hws_fwd_swtag(ws, ev);
 	else
 		/*
@@ -211,6 +210,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 	} while (gw.u64[0] & BIT_ULL(63));
 	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
+	ws->gw_rdata = gw.u64[0];
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
@@ -405,7 +405,8 @@ NIX_RX_FASTPATH_MODES
 		RTE_SET_USED(timeout_ticks);                                   \
 		if (ws->swtag_req) {                                           \
 			ws->swtag_req = 0;                                     \
-			cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);  \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(                \
+				ws->base + SSOW_LF_GWS_WQE0);                  \
 			return 1;                                              \
 		}                                                              \
 		return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);  \
@@ -424,7 +425,8 @@ NIX_RX_FASTPATH_MODES
 		uint64_t iter;                                                 \
 		if (ws->swtag_req) {                                           \
 			ws->swtag_req = 0;                                     \
-			cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0);  \
+			ws->gw_rdata = cnxk_sso_hws_swtag_wait(                \
+				ws->base + SSOW_LF_GWS_WQE0);                  \
 			return ret;                                            \
 		}                                                              \
 		ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);   \
@@ -507,8 +509,8 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
 	else
 		pa = txq->io_addr | ((segdw - 1) << 4);

-	if (!sched_type)
-		roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
+	if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+		ws->gw_rdata = roc_sso_hws_head_wait(ws->base);

 	roc_lmt_submit_steorl(lmt_id, pa);
 }
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 303b04c215..8455272005 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -700,7 +700,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,

 	/* Head wait if needed */
 	if (base)
-		roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+		roc_sso_hws_head_wait(base);

 	/* ESN */
 	outb_priv = roc_nix_inl_onf_ipsec_outb_sa_sw_rsvd((void *)sa);
@@ -793,7 +793,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 					     flags);
 		if (!CNXK_TT_FROM_EVENT(ev->event)) {
 			cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
-			roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+			roc_sso_hws_head_wait(base);
 			cn9k_sso_txq_fc_wait(txq);
 			if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
 				cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
@@ -806,7 +806,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 		cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags);
 		if (!CNXK_TT_FROM_EVENT(ev->event)) {
 			cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
-			roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+			roc_sso_hws_head_wait(base);
 			cn9k_sso_txq_fc_wait(txq);
 			if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
 				cn9k_nix_xmit_one(cmd, txq->lmt_addr,
diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
index b26df58588..ab58508590 100644
--- a/drivers/event/cnxk/cnxk_eventdev.h
+++ b/drivers/event/cnxk/cnxk_eventdev.h
@@ -47,6 +47,7 @@
 #define CNXK_CLR_SUB_EVENT(x)	    (~(0xffu << 20) & x)
 #define CNXK_GRP_FROM_TAG(x)	    (((x) >> 36) & 0x3ff)
 #define CNXK_SWTAG_PEND(x)	    (BIT_ULL(62) & x)
+#define CNXK_TAG_IS_HEAD(x)	    (BIT_ULL(35) & x)

 #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)

@@ -123,6 +124,7 @@ struct cnxk_sso_evdev {

 struct cn10k_sso_hws {
 	uint64_t base;
+	uint64_t gw_rdata;
 	/* PTP timestamp */
 	struct cnxk_timesync_info *tstamp;
 	void *lookup_mem;
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 9f9ceab8a1..7de03f3fbb 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -52,11 +52,11 @@ cnxk_sso_hws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
 	plt_write64(0, flush_op);
 }

-static __rte_always_inline void
+static __rte_always_inline uint64_t
 cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
 {
-#ifdef RTE_ARCH_ARM64
 	uint64_t swtp;
+#ifdef RTE_ARCH_ARM64

 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
 		     "		ldr %[swtb], [%[swtp_loc]]	\n"
@@ -70,9 +70,12 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
 		     : [swtp_loc] "r"(tag_op));
 #else
 	/* Wait for the SWTAG/SWTAG_FULL operation */
-	while (plt_read64(tag_op) & BIT_ULL(62))
-		;
+	do {
+		swtp = plt_read64(tag_op);
+	} while (swtp & BIT_ULL(62));
 #endif
+
+	return swtp;
 }

 #endif
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 4ae6bbf517..ec6366168c 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -905,8 +905,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 			lnum++;
 	}

-	if (flags & NIX_TX_VWQE_F)
-		roc_sso_hws_head_wait(ws[0]);
+	if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+		ws[1] = roc_sso_hws_head_wait(ws[0]);

 	left -= burst;
 	tx_pkts += burst;
@@ -1041,8 +1041,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
 		}
 	}

-	if (flags & NIX_TX_VWQE_F)
-		roc_sso_hws_head_wait(ws[0]);
+	if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+		ws[1] = roc_sso_hws_head_wait(ws[0]);

 	left -= burst;
 	tx_pkts += burst;
@@ -2582,8 +2582,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 	if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
 		wd.data[0] >>= 16;

-	if (flags & NIX_TX_VWQE_F)
-		roc_sso_hws_head_wait(ws[0]);
+	if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+		ws[1] = roc_sso_hws_head_wait(ws[0]);

 	left -= burst;

--
2.17.1


  parent reply	other threads:[~2022-02-10 13:20 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-19  7:13 [PATCH v2 1/4] net/cnxk: avoid command copy from Tx queue pbhagavatula
2022-01-19  7:13 ` [PATCH v2 2/4] event/cnxk: store and reuse workslot status pbhagavatula
2022-01-19  7:13 ` [PATCH v2 3/4] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-01-19  7:13 ` [PATCH v2 4/4] net/cnxk: improve Rx performance pbhagavatula
2022-02-07 14:03 ` [PATCH v2 1/4] net/cnxk: avoid command copy from Tx queue Jerin Jacob
2022-02-10 10:13 ` [PATCH v3] " pbhagavatula
2022-02-10 10:19   ` Jerin Jacob
2022-02-10 13:15   ` [PATCH v4] " pbhagavatula
2022-02-11 10:27     ` Jerin Jacob
2022-02-10 10:19 ` [PATCH v3 1/3] event/cnxk: store and reuse workslot status pbhagavatula
2022-02-10 10:19   ` [PATCH v3 2/3] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-02-10 10:19   ` [PATCH v3 3/3] net/cnxk: improve Rx performance pbhagavatula
2022-02-10 13:20   ` pbhagavatula [this message]
2022-02-10 13:20     ` [PATCH v4 2/3] event/cnxk: disable default wait time for dequeue pbhagavatula
2022-02-10 13:20     ` [PATCH v4 3/3] event/cnxk: improve Rx performance pbhagavatula
2022-02-14  9:29     ` [PATCH v4 1/3] event/cnxk: store and reuse workslot status Jerin Jacob

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220210132047.2429-1-pbhagavatula@marvell.com \
    --to=pbhagavatula@marvell.com \
    --cc=dev@dpdk.org \
    --cc=jerinj@marvell.com \
    --cc=kirankumark@marvell.com \
    --cc=ndabilpuram@marvell.com \
    --cc=skori@marvell.com \
    --cc=skoteshwar@marvell.com \
    --cc=sthotton@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.