All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Zhang, Qi Z" <qi.z.zhang@intel.com>
To: "Su, Simei" <simei.su@intel.com>
Cc: "dev@dpdk.org" <dev@dpdk.org>,
	"Van Haaren, Harry" <harry.van.haaren@intel.com>,
	"Wu, Wenjun1" <wenjun1.wu@intel.com>
Subject: Re: [dpdk-dev] [PATCH] net/ice: fix performance issue for Rx timestamp
Date: Thu, 28 Oct 2021 08:49:31 +0000	[thread overview]
Message-ID: <1269c978d11f4844ace2602529d5f246@intel.com> (raw)
In-Reply-To: <20211028075822.330220-1-simei.su@intel.com>



> -----Original Message-----
> From: Su, Simei <simei.su@intel.com>
> Sent: Thursday, October 28, 2021 3:58 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>
> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>; Wu,
> Wenjun1 <wenjun1.wu@intel.com>; Su, Simei <simei.su@intel.com>
> Subject: [PATCH] net/ice: fix performance issue for Rx timestamp
> 
> In Rx data path, it reads hardware registers per packet, resulting in big
> performance drop. This patch improves performance from two aspects:
> (1) replace per packet hardware register read by per burst.
> (2) reduce hardware register read time from 3 to 2 when the low value of
>     time is not close to overflow.
> 
> Meanwhile, this patch refines "ice_timesync_read_rx_timestamp" and
> "ice_timesync_read_tx_timestamp" API in which
> "ice_tstamp_convert_32b_64b"
> is also used.
> 
> Fixes: 953e74e6b73a ("net/ice: enable Rx timestamp on flex descriptor")
> Fixes: 646dcbe6c701 ("net/ice: support IEEE 1588 PTP")
> 
> Suggested-by: Harry van Haaren <harry.van.haaren@intel.com>
> Signed-off-by: Simei Su <simei.su@intel.com>
> ---
>  drivers/net/ice/ice_ethdev.c |  4 +--
>  drivers/net/ice/ice_ethdev.h |  1 +
>  drivers/net/ice/ice_rxtx.c   | 59 ++++++++++++++++++++++++++------------------
>  drivers/net/ice/ice_rxtx.h   | 34 +++++++++++++++----------
>  4 files changed, 59 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index
> ef6ee1c..13a7a97 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -5560,7 +5560,7 @@ ice_timesync_read_rx_timestamp(struct
> rte_eth_dev *dev,
>  	rxq = dev->data->rx_queues[flags];
> 
>  	ts_high = rxq->time_high;
> -	ts_ns = ice_tstamp_convert_32b_64b(hw, ts_high);
> +	ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, ts_high);
>  	ns = rte_timecounter_update(&ad->rx_tstamp_tc, ts_ns);
>  	*timestamp = rte_ns_to_timespec(ns);
> 
> @@ -5587,7 +5587,7 @@ ice_timesync_read_tx_timestamp(struct
> rte_eth_dev *dev,
>  		return -1;
>  	}
> 
> -	ts_ns = ice_tstamp_convert_32b_64b(hw, (tstamp >> 8) & mask);
> +	ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, (tstamp >> 8) & mask);
>  	ns = rte_timecounter_update(&ad->tx_tstamp_tc, ts_ns);
>  	*timestamp = rte_ns_to_timespec(ns);
> 
> diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h index
> 599e002..0e42c4c 100644
> --- a/drivers/net/ice/ice_ethdev.h
> +++ b/drivers/net/ice/ice_ethdev.h
> @@ -509,6 +509,7 @@ struct ice_adapter {
>  	struct rte_timecounter rx_tstamp_tc;
>  	struct rte_timecounter tx_tstamp_tc;
>  	bool ptp_ena;
> +	uint64_t time_hw;
>  #ifdef RTE_ARCH_X86
>  	bool rx_use_avx2;
>  	bool rx_use_avx512;
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index
> c3cad2f..2d771ea 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -1581,6 +1581,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>  	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
>  		return 0;
> 
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +		rxq->hw_register_set = 1;
> +
>  	/**
>  	 * Scan LOOK_AHEAD descriptors at a time to determine which
>  	 * descriptors reference packets that are ready to be received.
> @@ -1614,15 +1617,15 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>  			ice_rxd_to_vlan_tci(mb, &rxdp[j]);
>  			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);  #ifndef
> RTE_LIBRTE_ICE_16BYTE_RX_DESC
> -			if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> -				ts_ns = ice_tstamp_convert_32b_64b(hw,
> +			if (ice_timestamp_dynflag > 0) {
> +				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> +					rxq->hw_register_set,
>  					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
> -				if (ice_timestamp_dynflag > 0) {
> -					*RTE_MBUF_DYNFIELD(mb,
> -						ice_timestamp_dynfield_offset,
> -						rte_mbuf_timestamp_t *) = ts_ns;
> -					mb->ol_flags |= ice_timestamp_dynflag;
> -				}
> +				rxq->hw_register_set = 0;
> +				*RTE_MBUF_DYNFIELD(mb,
> +					ice_timestamp_dynfield_offset,
> +					rte_mbuf_timestamp_t *) = ts_ns;
> +				mb->ol_flags |= ice_timestamp_dynflag;
>  			}
> 
>  			if (ad->ptp_ena && ((mb->packet_type & @@ -1822,6 +1825,10
> @@ ice_recv_scattered_pkts(void *rx_queue,
>  	uint64_t ts_ns;
>  	struct ice_adapter *ad = rxq->vsi->adapter;  #endif
> +
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +		rxq->hw_register_set = 1;
> +
>  	while (nb_rx < nb_pkts) {
>  		rxdp = &rx_ring[rx_id];
>  		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
> @@ -1932,15 +1939,15 @@ ice_recv_scattered_pkts(void *rx_queue,
>  		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
>  		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
>  #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> -		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> -			ts_ns = ice_tstamp_convert_32b_64b(hw,
> +		if (ice_timestamp_dynflag > 0) {
> +			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> +				rxq->hw_register_set,
>  				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
> -			if (ice_timestamp_dynflag > 0) {
> -				*RTE_MBUF_DYNFIELD(first_seg,
> -					ice_timestamp_dynfield_offset,
> -					rte_mbuf_timestamp_t *) = ts_ns;
> -				first_seg->ol_flags |= ice_timestamp_dynflag;
> -			}
> +			rxq->hw_register_set = 0;
> +			*RTE_MBUF_DYNFIELD(first_seg,
> +				ice_timestamp_dynfield_offset,
> +				rte_mbuf_timestamp_t *) = ts_ns;
> +			first_seg->ol_flags |= ice_timestamp_dynflag;
>  		}
> 
>  		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
> @@ -2312,6 +2319,10 @@ ice_recv_pkts(void *rx_queue,
>  	uint64_t ts_ns;
>  	struct ice_adapter *ad = rxq->vsi->adapter;  #endif
> +
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +		rxq->hw_register_set = 1;
> +
>  	while (nb_rx < nb_pkts) {
>  		rxdp = &rx_ring[rx_id];
>  		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
> @@ -2363,15 +2374,15 @@ ice_recv_pkts(void *rx_queue,
>  		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
>  		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
>  #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> -		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> -			ts_ns = ice_tstamp_convert_32b_64b(hw,
> +		if (ice_timestamp_dynflag > 0) {
> +			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> +				rxq->hw_register_set,
>  				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
> -			if (ice_timestamp_dynflag > 0) {
> -				*RTE_MBUF_DYNFIELD(rxm,
> -					ice_timestamp_dynfield_offset,
> -					rte_mbuf_timestamp_t *) = ts_ns;
> -				rxm->ol_flags |= ice_timestamp_dynflag;
> -			}
> +			rxq->hw_register_set = 0;
> +			*RTE_MBUF_DYNFIELD(rxm,
> +				ice_timestamp_dynfield_offset,
> +				rte_mbuf_timestamp_t *) = ts_ns;
> +			rxm->ol_flags |= ice_timestamp_dynflag;
>  		}
> 
>  		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
> diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h index
> 146dc1f..ef58c0d 100644
> --- a/drivers/net/ice/ice_rxtx.h
> +++ b/drivers/net/ice/ice_rxtx.h
> @@ -93,6 +93,7 @@ struct ice_rx_queue {
>  	ice_rx_release_mbufs_t rx_rel_mbufs;
>  	uint64_t offloads;
>  	uint32_t time_high;
> +	uint32_t hw_register_set;
>  	const struct rte_memzone *mz;
>  };
> 
> @@ -321,29 +322,36 @@ void ice_fdir_rx_parsing_enable(struct ice_adapter
> *ad, bool on)
> 
>  /* Helper function to convert a 32b nanoseconds timestamp to 64b. */
> static inline -uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw,
> uint32_t in_timestamp)
> +uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw, struct ice_adapter
> *ad,
> +				    uint32_t flag, uint32_t in_timestamp)
>  {
>  	const uint64_t mask = 0xFFFFFFFF;
>  	uint32_t hi, lo, lo2, delta;
> -	uint64_t time, ns;
> +	uint64_t ns;
> 
> -	lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
> -	hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
> -	lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
> -
> -	if (lo2 < lo) {
> +	if (flag) {
>  		lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
>  		hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
> -	}
> 
> -	time = ((uint64_t)hi << 32) | lo;
> +		if (lo > UINT32_MAX)

lo is type of uint32_t, the check should always be false.



  reply	other threads:[~2021-10-28  9:09 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-28  7:58 [dpdk-dev] [PATCH] net/ice: fix performance issue for Rx timestamp Simei Su
2021-10-28  8:49 ` Zhang, Qi Z [this message]
2021-10-28 14:42 ` [dpdk-dev] [PATCH v2] " Simei Su
2021-10-29  2:24   ` Zhang, Qi Z
2021-10-29 10:15   ` [dpdk-dev] [PATCH v3] " Simei Su
2021-10-29 12:56     ` [dpdk-dev] [PATCH v4] " Simei Su
2021-11-01  1:20       ` Zhang, Qi Z

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1269c978d11f4844ace2602529d5f246@intel.com \
    --to=qi.z.zhang@intel.com \
    --cc=dev@dpdk.org \
    --cc=harry.van.haaren@intel.com \
    --cc=simei.su@intel.com \
    --cc=wenjun1.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.