All of lore.kernel.org
 help / color / mirror / Atom feed
From: Simei Su <simei.su@intel.com>
To: qi.z.zhang@intel.com
Cc: dev@dpdk.org, harry.van.haaren@intel.com, wenjun1.wu@intel.com,
	Simei Su <simei.su@intel.com>
Subject: [dpdk-dev] [PATCH v2] net/ice: fix performance issue for Rx timestamp
Date: Thu, 28 Oct 2021 22:42:18 +0800	[thread overview]
Message-ID: <20211028144218.354233-1-simei.su@intel.com> (raw)
In-Reply-To: <20211028075822.330220-1-simei.su@intel.com>

In Rx data path, it reads hardware registers per packet, resulting in
big performance drop. This patch improves performance from two aspects:
(1) replace per packet hardware register read by per burst.
(2) reduce hardware register read time from 3 to 2 when the low value of
    time is not close to overflow.

Meanwhile, this patch refines "ice_timesync_read_rx_timestamp" and
"ice_timesync_read_tx_timestamp" API in which "ice_tstamp_convert_32b_64b"
is also used.

Fixes: 953e74e6b73a ("net/ice: enable Rx timestamp on flex descriptor")
Fixes: 646dcbe6c701 ("net/ice: support IEEE 1588 PTP")

Suggested-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Simei Su <simei.su@intel.com>
---
v2:
* Add "guard band" of checking wrap-around.

 drivers/net/ice/ice_ethdev.c |  4 +--
 drivers/net/ice/ice_ethdev.h |  1 +
 drivers/net/ice/ice_rxtx.c   | 59 ++++++++++++++++++++++++++------------------
 drivers/net/ice/ice_rxtx.h   | 34 +++++++++++++++----------
 4 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index ef6ee1c..13a7a97 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -5560,7 +5560,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	rxq = dev->data->rx_queues[flags];
 
 	ts_high = rxq->time_high;
-	ts_ns = ice_tstamp_convert_32b_64b(hw, ts_high);
+	ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, ts_high);
 	ns = rte_timecounter_update(&ad->rx_tstamp_tc, ts_ns);
 	*timestamp = rte_ns_to_timespec(ns);
 
@@ -5587,7 +5587,7 @@ ice_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
 		return -1;
 	}
 
-	ts_ns = ice_tstamp_convert_32b_64b(hw, (tstamp >> 8) & mask);
+	ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, (tstamp >> 8) & mask);
 	ns = rte_timecounter_update(&ad->tx_tstamp_tc, ts_ns);
 	*timestamp = rte_ns_to_timespec(ns);
 
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 599e002..0e42c4c 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -509,6 +509,7 @@ struct ice_adapter {
 	struct rte_timecounter rx_tstamp_tc;
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
+	uint64_t time_hw;
 #ifdef RTE_ARCH_X86
 	bool rx_use_avx2;
 	bool rx_use_avx512;
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index c3cad2f..2d771ea 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1581,6 +1581,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
 	 * descriptors reference packets that are ready to be received.
@@ -1614,15 +1617,15 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			ice_rxd_to_vlan_tci(mb, &rxdp[j]);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-			if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw,
+			if (ice_timestamp_dynflag > 0) {
+				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
+					rxq->hw_register_set,
 					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				if (ice_timestamp_dynflag > 0) {
-					*RTE_MBUF_DYNFIELD(mb,
-						ice_timestamp_dynfield_offset,
-						rte_mbuf_timestamp_t *) = ts_ns;
-					mb->ol_flags |= ice_timestamp_dynflag;
-				}
+				rxq->hw_register_set = 0;
+				*RTE_MBUF_DYNFIELD(mb,
+					ice_timestamp_dynfield_offset,
+					rte_mbuf_timestamp_t *) = ts_ns;
+				mb->ol_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1822,6 +1825,10 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
+
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1932,15 +1939,15 @@ ice_recv_scattered_pkts(void *rx_queue,
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw,
+		if (ice_timestamp_dynflag > 0) {
+			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
+				rxq->hw_register_set,
 				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			if (ice_timestamp_dynflag > 0) {
-				*RTE_MBUF_DYNFIELD(first_seg,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				first_seg->ol_flags |= ice_timestamp_dynflag;
-			}
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(first_seg,
+				ice_timestamp_dynfield_offset,
+				rte_mbuf_timestamp_t *) = ts_ns;
+			first_seg->ol_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -2312,6 +2319,10 @@ ice_recv_pkts(void *rx_queue,
 	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
+
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2363,15 +2374,15 @@ ice_recv_pkts(void *rx_queue,
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw,
+		if (ice_timestamp_dynflag > 0) {
+			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
+				rxq->hw_register_set,
 				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			if (ice_timestamp_dynflag > 0) {
-				*RTE_MBUF_DYNFIELD(rxm,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				rxm->ol_flags |= ice_timestamp_dynflag;
-			}
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(rxm,
+				ice_timestamp_dynfield_offset,
+				rte_mbuf_timestamp_t *) = ts_ns;
+			rxm->ol_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 146dc1f..02223c5 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -93,6 +93,7 @@ struct ice_rx_queue {
 	ice_rx_release_mbufs_t rx_rel_mbufs;
 	uint64_t offloads;
 	uint32_t time_high;
+	uint32_t hw_register_set;
 	const struct rte_memzone *mz;
 };
 
@@ -321,29 +322,36 @@ void ice_fdir_rx_parsing_enable(struct ice_adapter *ad, bool on)
 
 /* Helper function to convert a 32b nanoseconds timestamp to 64b. */
 static inline
-uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw, uint32_t in_timestamp)
+uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw, struct ice_adapter *ad,
+				    uint32_t flag, uint32_t in_timestamp)
 {
 	const uint64_t mask = 0xFFFFFFFF;
 	uint32_t hi, lo, lo2, delta;
-	uint64_t time, ns;
+	uint64_t ns;
 
-	lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
-	hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
-	lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
-
-	if (lo2 < lo) {
+	if (flag) {
 		lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
 		hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
-	}
 
-	time = ((uint64_t)hi << 32) | lo;
+		if (lo > (UINT32_MAX - 10000))
+			lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
+		else
+			lo2 = lo;
+
+		if (lo2 < lo) {
+			lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
+			hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+		}
+
+		ad->time_hw = ((uint64_t)hi << 32) | lo;
+	}
 
-	delta = (in_timestamp - (uint32_t)(time & mask));
+	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
 	if (delta > (mask / 2)) {
-		delta = ((uint32_t)(time & mask) - in_timestamp);
-		ns = time - delta;
+		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
+		ns = ad->time_hw - delta;
 	} else {
-		ns = time + delta;
+		ns = ad->time_hw + delta;
 	}
 
 	return ns;
-- 
2.9.5


  parent reply	other threads:[~2021-10-28 14:48 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-28  7:58 [dpdk-dev] [PATCH] net/ice: fix performance issue for Rx timestamp Simei Su
2021-10-28  8:49 ` Zhang, Qi Z
2021-10-28 14:42 ` Simei Su [this message]
2021-10-29  2:24   ` [dpdk-dev] [PATCH v2] " Zhang, Qi Z
2021-10-29 10:15   ` [dpdk-dev] [PATCH v3] " Simei Su
2021-10-29 12:56     ` [dpdk-dev] [PATCH v4] " Simei Su
2021-11-01  1:20       ` Zhang, Qi Z

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211028144218.354233-1-simei.su@intel.com \
    --to=simei.su@intel.com \
    --cc=dev@dpdk.org \
    --cc=harry.van.haaren@intel.com \
    --cc=qi.z.zhang@intel.com \
    --cc=wenjun1.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.