All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net 0/2] mlx5: bugfixes for ptp fifo queue
@ 2023-01-22 16:16 Vadim Fedorenko
  2023-01-22 16:16 ` [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
  2023-01-22 16:16 ` [PATCH net 2/2] mlx5: fix skb leak while fifo resync Vadim Fedorenko
  0 siblings, 2 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-22 16:16 UTC (permalink / raw)
  To: Aya Levin, Saeed Mahameed, Jakub Kicinski; +Cc: Vadim Fedorenko, netdev

Simple FIFO implementation for PTP queue has several bug which lead to
use-after-free and skb leaks. This series fixes the issues and adds new
counters of out-of-order CQEs for this queue.

Vadim Fedorenko (2):
  mlx5: fix possible ptp queue fifo overflow
  mlx5: fix skb leak while fifo resync

 .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 29 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
 .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
 4 files changed, 31 insertions(+), 8 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-22 16:16 [PATCH net 0/2] mlx5: bugfixes for ptp fifo queue Vadim Fedorenko
@ 2023-01-22 16:16 ` Vadim Fedorenko
  2023-01-23  7:20   ` Leon Romanovsky
                     ` (2 more replies)
  2023-01-22 16:16 ` [PATCH net 2/2] mlx5: fix skb leak while fifo resync Vadim Fedorenko
  1 sibling, 3 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-22 16:16 UTC (permalink / raw)
  To: Aya Levin, Saeed Mahameed, Jakub Kicinski; +Cc: Vadim Fedorenko, netdev

Fifo pointers are not checked for overflow and this could potentially
lead to overflow and double free under heavy PTP traffic.

Also there were accidental OOO cqe which lead to absolutely broken fifo.
Add checks to workaround OOO cqe and add counters to show the amount of
such events.

Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
 .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 903de88bab53..11a99e0f00c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
 	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
 }
 
-static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
 {
 	struct skb_shared_hwtstamps hwts = {};
 	struct sk_buff *skb;
 
 	ptpsq->cq_stats->resync_event++;
 
-	while (skb_cc != skb_id) {
-		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
+		ptpsq->cq_stats->ooo_cqe++;
+		return false;
+	}
+
+	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
 		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
 		skb_tstamp_tx(skb, &hwts);
 		ptpsq->cq_stats->resync_cqe++;
 		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 	}
+
+	if (!skb) {
+		ptpsq->cq_stats->fifo_empty++;
+		return false;
+	}
+
+	return true;
 }
 
 static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
@@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
 	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	ktime_t hwtstamp;
 
 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
@@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 		goto out;
 	}
 
-	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
-		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
+	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
+	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
+		goto out;
+	}
 
 	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
 	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
@@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 	ptpsq->cq_stats->cqe++;
 
 out:
-	napi_consume_skb(skb, budget);
+	if (skb)
+		napi_consume_skb(skb, budget);
 }
 
 static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index aeed165a2dec..0bd2dd694f04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
 static inline bool
 mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
 {
-	return (*fifo->pc - *fifo->cc) < fifo->mask;
+	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
 }
 
 static inline bool
@@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
 {
 	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
 
+	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s overflow", __func__);
 	*skb_item = skb;
 }
 
 static inline
 struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
 {
+	if (*fifo->pc == *fifo->cc)
+		return NULL;
+
 	return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 7c5b17c917a5..dffcba42af83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -2119,6 +2119,8 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) },
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) },
+	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, ooo_cqe) },
+	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, fifo_empty) },
 };
 
 static const struct counter_desc ptp_rq_stats_desc[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index edbf177f3c69..4e58ab49017d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -457,6 +457,8 @@ struct mlx5e_ptp_cq_stats {
 	u64 abort_abs_diff_ns;
 	u64 resync_cqe;
 	u64 resync_event;
+	u64 ooo_cqe;
+	u64 fifo_empty;
 };
 
 struct mlx5e_stats {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net 2/2] mlx5: fix skb leak while fifo resync
  2023-01-22 16:16 [PATCH net 0/2] mlx5: bugfixes for ptp fifo queue Vadim Fedorenko
  2023-01-22 16:16 ` [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
@ 2023-01-22 16:16 ` Vadim Fedorenko
  2023-01-23 12:38   ` Gal Pressman
  1 sibling, 1 reply; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-22 16:16 UTC (permalink / raw)
  To: Aya Levin, Saeed Mahameed, Jakub Kicinski; +Cc: Vadim Fedorenko, netdev

During ptp resync operation SKBs were poped from the fifo but were never
freed neither by napi_consume nor by dev_kfree_skb_any. Add call to
napi_consume_skb to properly free SKBs.

Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 11a99e0f00c6..d60bb997c53b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -102,6 +102,7 @@ static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
 		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
 		skb_tstamp_tx(skb, &hwts);
 		ptpsq->cq_stats->resync_cqe++;
+		napi_consume_skb(skb, 1);
 		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 	}
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-22 16:16 ` [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
@ 2023-01-23  7:20   ` Leon Romanovsky
  2023-01-23 14:19     ` Vadim Fedorenko
  2023-01-23 12:32   ` Gal Pressman
  2023-01-23 17:24   ` Vadim Fedorenko
  2 siblings, 1 reply; 16+ messages in thread
From: Leon Romanovsky @ 2023-01-23  7:20 UTC (permalink / raw)
  To: Vadim Fedorenko; +Cc: Aya Levin, Saeed Mahameed, Jakub Kicinski, netdev

On Sun, Jan 22, 2023 at 08:16:01AM -0800, Vadim Fedorenko wrote:
> Fifo pointers are not checked for overflow and this could potentially
> lead to overflow and double free under heavy PTP traffic.
> 
> Also there were accidental OOO cqe which lead to absolutely broken fifo.
> Add checks to workaround OOO cqe and add counters to show the amount of
> such events.
> 
> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>  .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>  4 files changed, 30 insertions(+), 8 deletions(-)

<...>

> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>  {
>  	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>  
> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s overflow", __func__);

nit, ""%s overflow", __func__" is not needed as call trace already includes function name.

Thanks

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-22 16:16 ` [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
  2023-01-23  7:20   ` Leon Romanovsky
@ 2023-01-23 12:32   ` Gal Pressman
  2023-01-23 23:49     ` Vadim Fedorenko
  2023-01-23 17:24   ` Vadim Fedorenko
  2 siblings, 1 reply; 16+ messages in thread
From: Gal Pressman @ 2023-01-23 12:32 UTC (permalink / raw)
  To: Vadim Fedorenko, Aya Levin, Saeed Mahameed, Jakub Kicinski; +Cc: netdev

Hi Vadim,

On 22/01/2023 18:16, Vadim Fedorenko wrote:
> Fifo pointers are not checked for overflow and this could potentially
> lead to overflow and double free under heavy PTP traffic.
> 
> Also there were accidental OOO cqe which lead to absolutely broken fifo.
> Add checks to workaround OOO cqe and add counters to show the amount of
> such events.
> 
> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")

Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
timestamp") more appropriate?

> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>  .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>  4 files changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index 903de88bab53..11a99e0f00c6 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>  	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>  }
>  
> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
>  {
>  	struct skb_shared_hwtstamps hwts = {};
>  	struct sk_buff *skb;
>  
>  	ptpsq->cq_stats->resync_event++;
>  
> -	while (skb_cc != skb_id) {
> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
> +		ptpsq->cq_stats->ooo_cqe++;
> +		return false;
> +	}

I honestly don't understand how this could happen, can you please
provide more information about your issue? Did you actually witness ooo
completions or is it a theoretical issue?
We know ptp CQEs can be dropped in some rare cases (that's the reason we
implemented this resync flow), but completions should always arrive
in-order.

> +
> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>  		skb_tstamp_tx(skb, &hwts);
>  		ptpsq->cq_stats->resync_cqe++;
>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  	}
> +
> +	if (!skb) {
> +		ptpsq->cq_stats->fifo_empty++;

Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
the SQ?

> +		return false;> +	}
> +
> +	return true;
>  }
>  
>  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>  	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb = NULL;
>  	ktime_t hwtstamp;
>  
>  	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  		goto out;
>  	}
>  
> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
> +		goto out;
> +	}
>  
>  	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>  	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  	ptpsq->cq_stats->cqe++;
>  
>  out:
> -	napi_consume_skb(skb, budget);
> +	if (skb)
> +		napi_consume_skb(skb, budget);
>  }
>  
>  static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> index aeed165a2dec..0bd2dd694f04 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>  static inline bool
>  mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>  {
> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;

What is this cast for?

>  }
>  
>  static inline bool
> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>  {
>  	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>  
> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s overflow", __func__);

The fifo is the same size of the SQ, how can it overflow?

>  	*skb_item = skb;
>  }

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 2/2] mlx5: fix skb leak while fifo resync
  2023-01-22 16:16 ` [PATCH net 2/2] mlx5: fix skb leak while fifo resync Vadim Fedorenko
@ 2023-01-23 12:38   ` Gal Pressman
  2023-01-23 16:52     ` Vadim Fedorenko
  2023-01-24  2:03     ` Jakub Kicinski
  0 siblings, 2 replies; 16+ messages in thread
From: Gal Pressman @ 2023-01-23 12:38 UTC (permalink / raw)
  To: Vadim Fedorenko, Aya Levin, Saeed Mahameed, Jakub Kicinski; +Cc: netdev

On 22/01/2023 18:16, Vadim Fedorenko wrote:
> During ptp resync operation SKBs were poped from the fifo but were never
> freed neither by napi_consume nor by dev_kfree_skb_any. Add call to
> napi_consume_skb to properly free SKBs.
> 
> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")

Same comment as previous patch?

> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index 11a99e0f00c6..d60bb997c53b 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -102,6 +102,7 @@ static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>  		skb_tstamp_tx(skb, &hwts);
>  		ptpsq->cq_stats->resync_cqe++;
> +		napi_consume_skb(skb, 1);

Was wondering whether we should pass the actual budget here instead of
1, but looking at napi_consume_skb() it doesn't really matter..

Anyway:
Reviewed-by: Gal Pressman <gal@nvidia.com>

>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  	}
>  

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-23  7:20   ` Leon Romanovsky
@ 2023-01-23 14:19     ` Vadim Fedorenko
  0 siblings, 0 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-23 14:19 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Aya Levin, Saeed Mahameed, Jakub Kicinski, netdev

On 23/01/2023 07:20, Leon Romanovsky wrote:
> 
> On Sun, Jan 22, 2023 at 08:16:01AM -0800, Vadim Fedorenko wrote:
>> Fifo pointers are not checked for overflow and this could potentially
>> lead to overflow and double free under heavy PTP traffic.
>>
>> Also there were accidental OOO cqe which lead to absolutely broken fifo.
>> Add checks to workaround OOO cqe and add counters to show the amount of
>> such events.
>>
>> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>>   .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>>   .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>>   .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>>   .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>>   4 files changed, 30 insertions(+), 8 deletions(-)
> 
> <...>
> 
>> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>>   {
>>   	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>   
>> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s overflow", __func__);
> 
> nit, ""%s overflow", __func__" is not needed as call trace already includes function name.

yep, sure. But I still think we would like to have small tip about 
reasons. And also some other functions in mlx5 have the same debug info 
with WARN_ONCE.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 2/2] mlx5: fix skb leak while fifo resync
  2023-01-23 12:38   ` Gal Pressman
@ 2023-01-23 16:52     ` Vadim Fedorenko
  2023-01-24  2:03     ` Jakub Kicinski
  1 sibling, 0 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-23 16:52 UTC (permalink / raw)
  To: Gal Pressman, Aya Levin, Saeed Mahameed, Jakub Kicinski; +Cc: netdev

On 23/01/2023 12:38, Gal Pressman wrote:
> On 22/01/2023 18:16, Vadim Fedorenko wrote:
>> During ptp resync operation SKBs were poped from the fifo but were never
>> freed neither by napi_consume nor by dev_kfree_skb_any. Add call to
>> napi_consume_skb to properly free SKBs.
>>
>> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
> 
> Same comment as previous patch?

Yeah, and it's correct for this patch too. Commit mentioned in Fixes 
introduced several bugs apart from architectural problem. The first bug 
of wrong checks and possible overflow/underflow of FIFO is fixed by 
previous patch. This patch fixes another issue of leaking SKBs from FIFO 
during synchronisation process.

> 
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> index 11a99e0f00c6..d60bb997c53b 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> @@ -102,6 +102,7 @@ static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
>>   		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>>   		skb_tstamp_tx(skb, &hwts);
>>   		ptpsq->cq_stats->resync_cqe++;
>> +		napi_consume_skb(skb, 1);
> 
> Was wondering whether we should pass the actual budget here instead of
> 1, but looking at napi_consume_skb() it doesn't really matter..
> 
> Anyway:
> Reviewed-by: Gal Pressman <gal@nvidia.com>
> 
>>   		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   	}
>>   


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-22 16:16 ` [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
  2023-01-23  7:20   ` Leon Romanovsky
  2023-01-23 12:32   ` Gal Pressman
@ 2023-01-23 17:24   ` Vadim Fedorenko
  2023-01-24 14:39     ` Gal Pressman
  2 siblings, 1 reply; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-23 17:24 UTC (permalink / raw)
  To: Gal Pressman; +Cc: netdev, Aya Levin, Saeed Mahameed, Jakub Kicinski

 > Hi Vadim,
 >

Hi Gal!
Your mail didn't show up in my mailbox for some reasons, so I tried to 
construct it back from mailing list. This may end up with some side 
effects, but I did my best to avoid it.

 > On 22/01/2023 18:16, Vadim Fedorenko wrote:
 >> Fifo pointers are not checked for overflow and this could potentially
 >> lead to overflow and double free under heavy PTP traffic.
 >>
 >> Also there were accidental OOO cqe which lead to absolutely broken fifo.
 >> Add checks to workaround OOO cqe and add counters to show the amount of
 >> such events.
 >>
 >> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include 
PTP-SQ")
 >
 > Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
 > timestamp") more appropriate?

It looks like the bugs were actually introduced by the commit in Fixes 
even though the commit you mentioned introduced the feature itself. But 
I might be wrong, I'll recheck it.

 >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
 >> ---
 >>  .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
 >>  .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
 >>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
 >>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
 >>  4 files changed, 30 insertions(+), 8 deletions(-)
 >>
 >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
 >> index 903de88bab53..11a99e0f00c6 100644
 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
 >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
 >> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct 
mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
 >>  	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
 >>  }
 >>
 >> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq 
*ptpsq, u16 skb_cc, u16 skb_id)
 >> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq 
*ptpsq, u16 skb_cc, u16 skb_id)
 >>  {
 >>  	struct skb_shared_hwtstamps hwts = {};
 >>  	struct sk_buff *skb;
 >>
 >>  	ptpsq->cq_stats->resync_event++;
 >>
 >> -	while (skb_cc != skb_id) {
 >> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
 >> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
 >> +		ptpsq->cq_stats->ooo_cqe++;
 >> +		return false;
 >> +	}
 >
 >I honestly don't understand how this could happen, can you please
 >provide more information about your issue? Did you actually witness ooo
 >completions or is it a theoretical issue?
 >We know ptp CQEs can be dropped in some rare cases (that's the reason we
 >implemented this resync flow), but completions should always arrive
 >in-order.

I was also surprised to see OOO completions but it's the reality. With a 
little bit of debug I found this issue:

[65578.231710] FIFO drop found, skb_cc = 141, skb_id = 140
[65578.293358] FIFO drop found, skb_cc = 141, skb_id = 143
[65578.301240] FIFO drop found, skb_cc = 145, skb_id = 142
[65578.365277] FIFO drop found, skb_cc = 173, skb_id = 141
[65578.426681] FIFO drop found, skb_cc = 173, skb_id = 145
[65578.488089] FIFO drop found, skb_cc = 173, skb_id = 146
[65578.549489] FIFO drop found, skb_cc = 173, skb_id = 147
[65578.610897] FIFO drop found, skb_cc = 173, skb_id = 148
[65578.672301] FIFO drop found, skb_cc = 173, skb_id = 149

It really shows that CQE are coming OOO sometimes.

 >> +
 >> +	while (skb_cc != skb_id && (skb = 
mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
 >>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
 >>  		skb_tstamp_tx(skb, &hwts);
 >>  		ptpsq->cq_stats->resync_cqe++;
 >>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 >>  	}
 >> +
 >> +	if (!skb) {
 >> +		ptpsq->cq_stats->fifo_empty++;
 >
 >Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
 >the SQ?

Yep, and that's what I've seen before I fixed mlx5e_ptp_ts_cqe_drop() 
check. I added this counter just to be sure I won't happen again.

 >> +		return false;
 >> +	}
 >> +
 >> +	return true;
 >>  }
 >>
 >>  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 >> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
mlx5e_ptpsq *ptpsq,
 >>  	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
 >>  	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 >>  	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
 >> -	struct sk_buff *skb;
 >> +	struct sk_buff *skb = NULL;
 >>  	ktime_t hwtstamp;
 >>
 >>  	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
 >> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
mlx5e_ptpsq *ptpsq,
 >>  		goto out;
 >>  	}
 >>
 >> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
 >> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
 >> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
 >> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
 >> +		goto out;
 >> +	}
 >>
 >>  	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
 >>  	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, 
get_cqe_ts(cqe));
 >> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
mlx5e_ptpsq *ptpsq,
 >>  	ptpsq->cq_stats->cqe++;
 >>
 >>  out:
 >> -	napi_consume_skb(skb, budget);
 >> +	if (skb)
 >> +		napi_consume_skb(skb, budget);
 >>  }
 >>
 >>  static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
 >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
 >> index aeed165a2dec..0bd2dd694f04 100644
 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
 >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
 >> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
 >>  static inline bool
 >>  mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
 >>  {
 >> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
 >> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
 >
 >What is this cast for?

To properly check u16 overflow cases. (*fifo->pc - *fifo->cc) is casted 
to int if we don't put explicit cast here. And it easily ends up with 
negative value which we be less than mask until fifo->cc overflows too.

 >>  }
 >>
 >>  static inline bool
 >> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo 
*fifo, struct sk_buff *skb)
 >>  {
 >>  	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
 >>
 >> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s 
overflow", __func__);
 >
 >The fifo is the same size of the SQ, how can it overflow?
 >

There is one fifo_push call in mlx5e_txwqe_complete before 
mlx5e_skb_fifo_has_room() is checked, so it can potentially overflow.

 >>  	*skb_item = skb;
 >>  }

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-23 12:32   ` Gal Pressman
@ 2023-01-23 23:49     ` Vadim Fedorenko
  0 siblings, 0 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-23 23:49 UTC (permalink / raw)
  To: Gal Pressman, Vadim Fedorenko, Aya Levin, Saeed Mahameed, Jakub Kicinski
  Cc: netdev

On 23.01.2023 12:32, Gal Pressman wrote:
> Hi Vadim,
> 
> On 22/01/2023 18:16, Vadim Fedorenko wrote:
>> Fifo pointers are not checked for overflow and this could potentially
>> lead to overflow and double free under heavy PTP traffic.
>>
>> Also there were accidental OOO cqe which lead to absolutely broken fifo.
>> Add checks to workaround OOO cqe and add counters to show the amount of
>> such events.
>>
>> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
> 
> Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
> timestamp") more appropriate?
> 
I re-checked the log and realised that you are absolutely right, I'll update it 
in v2, thanks!


>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>>   .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>>   .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>>   .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>>   .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>>   4 files changed, 30 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> index 903de88bab53..11a99e0f00c6 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>>   	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>>   }
>>   
>> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
>> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
>>   {
>>   	struct skb_shared_hwtstamps hwts = {};
>>   	struct sk_buff *skb;
>>   
>>   	ptpsq->cq_stats->resync_event++;
>>   
>> -	while (skb_cc != skb_id) {
>> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
>> +		ptpsq->cq_stats->ooo_cqe++;
>> +		return false;
>> +	}
> 
> I honestly don't understand how this could happen, can you please
> provide more information about your issue? Did you actually witness ooo
> completions or is it a theoretical issue?
> We know ptp CQEs can be dropped in some rare cases (that's the reason we
> implemented this resync flow), but completions should always arrive
> in-order.
> 
>> +
>> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>>   		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>>   		skb_tstamp_tx(skb, &hwts);
>>   		ptpsq->cq_stats->resync_cqe++;
>>   		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   	}
>> +
>> +	if (!skb) {
>> +		ptpsq->cq_stats->fifo_empty++;
> 
> Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
> the SQ?
> 
>> +		return false;> +	}
>> +
>> +	return true;
>>   }
>>   
>>   static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>>   	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
>> -	struct sk_buff *skb;
>> +	struct sk_buff *skb = NULL;
>>   	ktime_t hwtstamp;
>>   
>>   	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
>> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   		goto out;
>>   	}
>>   
>> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
>> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
>> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
>> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
>> +		goto out;
>> +	}
>>   
>>   	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>>   	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
>> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   	ptpsq->cq_stats->cqe++;
>>   
>>   out:
>> -	napi_consume_skb(skb, budget);
>> +	if (skb)
>> +		napi_consume_skb(skb, budget);
>>   }
>>   
>>   static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> index aeed165a2dec..0bd2dd694f04 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>>   static inline bool
>>   mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>>   {
>> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
>> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
> 
> What is this cast for?
> 
>>   }
>>   
>>   static inline bool
>> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>>   {
>>   	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>   
>> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s overflow", __func__);
> 
> The fifo is the same size of the SQ, how can it overflow?
> 
>>   	*skb_item = skb;
>>   }


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 2/2] mlx5: fix skb leak while fifo resync
  2023-01-23 12:38   ` Gal Pressman
  2023-01-23 16:52     ` Vadim Fedorenko
@ 2023-01-24  2:03     ` Jakub Kicinski
  1 sibling, 0 replies; 16+ messages in thread
From: Jakub Kicinski @ 2023-01-24  2:03 UTC (permalink / raw)
  To: Gal Pressman; +Cc: Vadim Fedorenko, Aya Levin, Saeed Mahameed, netdev

On Mon, 23 Jan 2023 14:38:35 +0200 Gal Pressman wrote:
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> > index 11a99e0f00c6..d60bb997c53b 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> > @@ -102,6 +102,7 @@ static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
> >  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
> >  		skb_tstamp_tx(skb, &hwts);
> >  		ptpsq->cq_stats->resync_cqe++;
> > +		napi_consume_skb(skb, 1);  
> 
> Was wondering whether we should pass the actual budget here instead of
> 1, but looking at napi_consume_skb() it doesn't really matter..

We should pass the real budget in. The exact value does not matter, 
but it could matter whether it's zero or not. Budget is zero when
NAPI gets polled from an IRQ context.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-23 17:24   ` Vadim Fedorenko
@ 2023-01-24 14:39     ` Gal Pressman
  2023-01-24 15:09       ` Vadim Fedorenko
  2023-01-25 14:42       ` Vadim Fedorenko
  0 siblings, 2 replies; 16+ messages in thread
From: Gal Pressman @ 2023-01-24 14:39 UTC (permalink / raw)
  To: Vadim Fedorenko; +Cc: netdev, Aya Levin, Saeed Mahameed, Jakub Kicinski

On 23/01/2023 19:24, Vadim Fedorenko wrote:
>  > Hi Vadim,
>  >
> 
> Hi Gal!
> Your mail didn't show up in my mailbox for some reasons, so I tried to 
> construct it back from mailing list. This may end up with some side 
> effects, but I did my best to avoid it.
> 
>  > On 22/01/2023 18:16, Vadim Fedorenko wrote:
>  >> Fifo pointers are not checked for overflow and this could potentially
>  >> lead to overflow and double free under heavy PTP traffic.
>  >>
>  >> Also there were accidental OOO cqe which lead to absolutely broken fifo.
>  >> Add checks to workaround OOO cqe and add counters to show the amount of
>  >> such events.
>  >>
>  >> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include 
> PTP-SQ")
>  >
>  > Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
>  > timestamp") more appropriate?
> 
> It looks like the bugs were actually introduced by the commit in Fixes 
> even though the commit you mentioned introduced the feature itself. But 
> I might be wrong, I'll recheck it.
> 
>  >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>  >> ---
>  >>  .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>  >>  .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>  >>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>  >>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>  >>  4 files changed, 30 insertions(+), 8 deletions(-)
>  >>
>  >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c 
> b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>  >> index 903de88bab53..11a99e0f00c6 100644
>  >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>  >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>  >> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct 
> mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>  >>  	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>  >>  }
>  >>
>  >> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq 
> *ptpsq, u16 skb_cc, u16 skb_id)
>  >> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq 
> *ptpsq, u16 skb_cc, u16 skb_id)
>  >>  {
>  >>  	struct skb_shared_hwtstamps hwts = {};
>  >>  	struct sk_buff *skb;
>  >>
>  >>  	ptpsq->cq_stats->resync_event++;
>  >>
>  >> -	while (skb_cc != skb_id) {
>  >> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>  >> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {

Can you explain how this checks for ooo?

>  >> +		ptpsq->cq_stats->ooo_cqe++;
>  >> +		return false;
>  >> +	}
>  >
>  >I honestly don't understand how this could happen, can you please
>  >provide more information about your issue? Did you actually witness ooo
>  >completions or is it a theoretical issue?
>  >We know ptp CQEs can be dropped in some rare cases (that's the reason we
>  >implemented this resync flow), but completions should always arrive
>  >in-order.
> 
> I was also surprised to see OOO completions but it's the reality. With a 
> little bit of debug I found this issue:

Where are these prints added? I assume inside the 'if
(mlx5e_ptp_ts_cqe_drop())' statement?

> 
> [65578.231710] FIFO drop found, skb_cc = 141, skb_id = 140

Is this the first drop? In order for skb_cc to reach 141 it means it has
already seen skb_id 140 (and consumed it). But here we see skb_id 140
again? Is it a duplicate completion? Or is it a full wraparound?
I'm now realising that the naming of the variables is very confusing,
skb_cc isn't really the consumer counter, it is the cosumer index
(masked value).

> [65578.293358] FIFO drop found, skb_cc = 141, skb_id = 143

How come we see the same skb_cc twice? When a drop is found we increment it.

> [65578.301240] FIFO drop found, skb_cc = 145, skb_id = 142
> [65578.365277] FIFO drop found, skb_cc = 173, skb_id = 141
> [65578.426681] FIFO drop found, skb_cc = 173, skb_id = 145
> [65578.488089] FIFO drop found, skb_cc = 173, skb_id = 146
> [65578.549489] FIFO drop found, skb_cc = 173, skb_id = 147
> [65578.610897] FIFO drop found, skb_cc = 173, skb_id = 148
> [65578.672301] FIFO drop found, skb_cc = 173, skb_id = 149

Confusing :S, did you manage to make sense out of these prints? We need
prints when !dropped as well, otherwise it's impossible to tell when a
wraparound occurred.

Anyway, I'd like to zoom out for a second, the whole fifo was designed
under the assumption that completions are in-order (this is a guarantee
for all SQs, not just ptp ones!), this fix seems more of a bandage that
potentially hides a more severe issue.

> 
> It really shows that CQE are coming OOO sometimes.

Can we reproduce it somehow?
Can you please try to update your firmware version? I'm quite confident
that this issue is fixed already.

> 
>  >> +
>  >> +	while (skb_cc != skb_id && (skb = 
> mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>  >>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>  >>  		skb_tstamp_tx(skb, &hwts);
>  >>  		ptpsq->cq_stats->resync_cqe++;
>  >>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  >>  	}
>  >> +
>  >> +	if (!skb) {
>  >> +		ptpsq->cq_stats->fifo_empty++;
>  >
>  >Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
>  >the SQ?
> 
> Yep, and that's what I've seen before I fixed mlx5e_ptp_ts_cqe_drop() 
> check. I added this counter just to be sure I won't happen again.
> 
>  >> +		return false;
>  >> +	}
>  >> +
>  >> +	return true;
>  >>  }
>  >>
>  >>  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  >> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
> mlx5e_ptpsq *ptpsq,
>  >>  	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>  >>  	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  >>  	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
>  >> -	struct sk_buff *skb;
>  >> +	struct sk_buff *skb = NULL;
>  >>  	ktime_t hwtstamp;
>  >>
>  >>  	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
>  >> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
> mlx5e_ptpsq *ptpsq,
>  >>  		goto out;
>  >>  	}
>  >>
>  >> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
>  >> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
>  >> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
>  >> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
>  >> +		goto out;
>  >> +	}
>  >>
>  >>  	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>  >>  	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, 
> get_cqe_ts(cqe));
>  >> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
> mlx5e_ptpsq *ptpsq,
>  >>  	ptpsq->cq_stats->cqe++;
>  >>
>  >>  out:
>  >> -	napi_consume_skb(skb, budget);
>  >> +	if (skb)
>  >> +		napi_consume_skb(skb, budget);
>  >>  }
>  >>
>  >>  static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
>  >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h 
> b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>  >> index aeed165a2dec..0bd2dd694f04 100644
>  >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>  >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>  >> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>  >>  static inline bool
>  >>  mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>  >>  {
>  >> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
>  >> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
>  >
>  >What is this cast for?
> 
> To properly check u16 overflow cases. (*fifo->pc - *fifo->cc) is casted 
> to int if we don't put explicit cast here. And it easily ends up with 
> negative value which we be less than mask until fifo->cc overflows too.

Ack.

> 
>  >>  }
>  >>
>  >>  static inline bool
>  >> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo 
> *fifo, struct sk_buff *skb)
>  >>  {
>  >>  	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>  >>
>  >> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s 
> overflow", __func__);
>  >
>  >The fifo is the same size of the SQ, how can it overflow?
>  >
> 
> There is one fifo_push call in mlx5e_txwqe_complete before 
> mlx5e_skb_fifo_has_room() is checked, so it can potentially overflow.
> 
>  >>  	*skb_item = skb;
>  >>  }

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-24 14:39     ` Gal Pressman
@ 2023-01-24 15:09       ` Vadim Fedorenko
  2023-01-25 14:42       ` Vadim Fedorenko
  1 sibling, 0 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-24 15:09 UTC (permalink / raw)
  To: Gal Pressman; +Cc: netdev, Aya Levin, Saeed Mahameed, Jakub Kicinski

On 24/01/2023 14:39, Gal Pressman wrote:
> On 23/01/2023 19:24, Vadim Fedorenko wrote:
>>   > Hi Vadim,
>>   >
>>
>> Hi Gal!
>> Your mail didn't show up in my mailbox for some reasons, so I tried to
>> construct it back from mailing list. This may end up with some side
>> effects, but I did my best to avoid it.
>>
>>   > On 22/01/2023 18:16, Vadim Fedorenko wrote:
>>   >> Fifo pointers are not checked for overflow and this could potentially
>>   >> lead to overflow and double free under heavy PTP traffic.
>>   >>
>>   >> Also there were accidental OOO cqe which lead to absolutely broken fifo.
>>   >> Add checks to workaround OOO cqe and add counters to show the amount of
>>   >> such events.
>>   >>
>>   >> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include
>> PTP-SQ")
>>   >
>>   > Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
>>   > timestamp") more appropriate?
>>
>> It looks like the bugs were actually introduced by the commit in Fixes
>> even though the commit you mentioned introduced the feature itself. But
>> I might be wrong, I'll recheck it.
>>
>>   >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>>   >> ---
>>   >>  .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>>   >>  .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>>   >>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>>   >>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>>   >>  4 files changed, 30 insertions(+), 8 deletions(-)
>>   >>
>>   >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>>   >> index 903de88bab53..11a99e0f00c6 100644
>>   >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>>   >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>>   >> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct
>> mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>>   >>  	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>>   >>  }
>>   >>
>>   >> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq
>> *ptpsq, u16 skb_cc, u16 skb_id)
>>   >> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq
>> *ptpsq, u16 skb_cc, u16 skb_id)
>>   >>  {
>>   >>  	struct skb_shared_hwtstamps hwts = {};
>>   >>  	struct sk_buff *skb;
>>   >>
>>   >>  	ptpsq->cq_stats->resync_event++;
>>   >>
>>   >> -	while (skb_cc != skb_id) {
>>   >> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>>   >> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
> 
> Can you explain how this checks for ooo?

That means that if the consumer index is pointing to the skb index which 
is after the index of received skb then FIFO was resynced and drained 
some of skbs and received cqe came out of order. The second check is for 
the situation when counters were wrapped around.

> 
>>   >> +		ptpsq->cq_stats->ooo_cqe++;
>>   >> +		return false;
>>   >> +	}
>>   >
>>   >I honestly don't understand how this could happen, can you please
>>   >provide more information about your issue? Did you actually witness ooo
>>   >completions or is it a theoretical issue?
>>   >We know ptp CQEs can be dropped in some rare cases (that's the reason we
>>   >implemented this resync flow), but completions should always arrive
>>   >in-order.
>>
>> I was also surprised to see OOO completions but it's the reality. With a
>> little bit of debug I found this issue:
> 
> Where are these prints added? I assume inside the 'if
> (mlx5e_ptp_ts_cqe_drop())' statement?
> 

Yes, they were added to mlx5e_ptp_ts_cqe_drop() function but effectively 
the same as if added in the if-statement.

>>
>> [65578.231710] FIFO drop found, skb_cc = 141, skb_id = 140
> 
> Is this the first drop? In order for skb_cc to reach 141 it means it has
> already seen skb_id 140 (and consumed it). But here we see skb_id 140
> again? Is it a duplicate completion? Or is it a full wraparound?
> I'm now realising that the naming of the variables is very confusing,
> skb_cc isn't really the consumer counter, it is the cosumer index
> (masked value).
> 

Well, if I remember correctly it wasn't the first drop. That's what I 
was ably to recover from my console's log. But it's better to examine 
next lines of the log.

>> [65578.293358] FIFO drop found, skb_cc = 141, skb_id = 143
> 
> How come we see the same skb_cc twice? When a drop is found we increment it.

First of all I fixed FIFO code not to drain the whole queue once it 
receives ooo cqe. Once it received skb_id lower than skb_cc, it was 
simply dropping cqe because we know than ids in the queue are ascending 
and there is no need to search for lower values. Then skb_id 143 came, 
but skb_cc was not changed and was pointing to 141. The queue was 
resynced to skb_cc 143 at that point. Then skb_id 144 came and it was fine.

>> [65578.301240] FIFO drop found, skb_cc = 145, skb_id = 142

Then apparently skb_id 142 comes. The queue was drained (re-synced) to 
143 before and skb_cc 142 was dropped from the queue during this event.

>> [65578.365277] FIFO drop found, skb_cc = 173, skb_id = 141
>> [65578.426681] FIFO drop found, skb_cc = 173, skb_id = 145
>> [65578.488089] FIFO drop found, skb_cc = 173, skb_id = 146
>> [65578.549489] FIFO drop found, skb_cc = 173, skb_id = 147
>> [65578.610897] FIFO drop found, skb_cc = 173, skb_id = 148
>> [65578.672301] FIFO drop found, skb_cc = 173, skb_id = 149

Not sure what happend then to move skb_cc to 173, but we can see that 
missed cqe are coming now and dropped again

> 
> Confusing :S, did you manage to make sense out of these prints? We need
> prints when !dropped as well, otherwise it's impossible to tell when a
> wraparound occurred.

Printig of !dropped events creates a lot of mess in our setup as we are 
running with this issue on PTP grand master machine.

> 
> Anyway, I'd like to zoom out for a second, the whole fifo was designed
> under the assumption that completions are in-order (this is a guarantee
> for all SQs, not just ptp ones!), this fix seems more of a bandage that
> potentially hides a more severe issue.
> 

I do agree that it looks like a bandage but it's better to have a 
mitigation with possible drops of timestamps instead of kernel crashes 
because of use-after-free and memory corruptions.

>>
>> It really shows that CQE are coming OOO sometimes.
> 
> Can we reproduce it somehow?
> Can you please try to update your firmware version? I'm quite confident
> that this issue is fixed already.
> 

The firmware is pretty new - 22.35.1012 (MT_0000000500), don't believe 
there is a newer GA release to test.
I can reproduce it easily on a couple of machines now.

>>
>>   >> +
>>   >> +	while (skb_cc != skb_id && (skb =
>> mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>>   >>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>>   >>  		skb_tstamp_tx(skb, &hwts);
>>   >>  		ptpsq->cq_stats->resync_cqe++;
>>   >>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   >>  	}
>>   >> +
>>   >> +	if (!skb) {
>>   >> +		ptpsq->cq_stats->fifo_empty++;
>>   >
>>   >Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
>>   >the SQ?
>>
>> Yep, and that's what I've seen before I fixed mlx5e_ptp_ts_cqe_drop()
>> check. I added this counter just to be sure I won't happen again.
>>
>>   >> +		return false;
>>   >> +	}
>>   >> +
>>   >> +	return true;
>>   >>  }
>>   >>
>>   >>  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   >> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct
>> mlx5e_ptpsq *ptpsq,
>>   >>  	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>>   >>  	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   >>  	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
>>   >> -	struct sk_buff *skb;
>>   >> +	struct sk_buff *skb = NULL;
>>   >>  	ktime_t hwtstamp;
>>   >>
>>   >>  	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
>>   >> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct
>> mlx5e_ptpsq *ptpsq,
>>   >>  		goto out;
>>   >>  	}
>>   >>
>>   >> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
>>   >> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
>>   >> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
>>   >> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
>>   >> +		goto out;
>>   >> +	}
>>   >>
>>   >>  	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>>   >>  	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock,
>> get_cqe_ts(cqe));
>>   >> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct
>> mlx5e_ptpsq *ptpsq,
>>   >>  	ptpsq->cq_stats->cqe++;
>>   >>
>>   >>  out:
>>   >> -	napi_consume_skb(skb, budget);
>>   >> +	if (skb)
>>   >> +		napi_consume_skb(skb, budget);
>>   >>  }
>>   >>
>>   >>  static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
>>   >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>>   >> index aeed165a2dec..0bd2dd694f04 100644
>>   >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>>   >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>>   >> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>>   >>  static inline bool
>>   >>  mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>>   >>  {
>>   >> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
>>   >> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
>>   >
>>   >What is this cast for?
>>
>> To properly check u16 overflow cases. (*fifo->pc - *fifo->cc) is casted
>> to int if we don't put explicit cast here. And it easily ends up with
>> negative value which we be less than mask until fifo->cc overflows too.
> 
> Ack.
> 
>>
>>   >>  }
>>   >>
>>   >>  static inline bool
>>   >> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo
>> *fifo, struct sk_buff *skb)
>>   >>  {
>>   >>  	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>   >>
>>   >> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s
>> overflow", __func__);
>>   >
>>   >The fifo is the same size of the SQ, how can it overflow?
>>   >
>>
>> There is one fifo_push call in mlx5e_txwqe_complete before
>> mlx5e_skb_fifo_has_room() is checked, so it can potentially overflow.
>>
>>   >>  	*skb_item = skb;
>>   >>  }


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-24 14:39     ` Gal Pressman
  2023-01-24 15:09       ` Vadim Fedorenko
@ 2023-01-25 14:42       ` Vadim Fedorenko
  2023-01-25 20:33         ` Saeed Mahameed
  1 sibling, 1 reply; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-25 14:42 UTC (permalink / raw)
  To: Gal Pressman; +Cc: netdev, Aya Levin, Saeed Mahameed, Jakub Kicinski

On 24/01/2023 14:39, Gal Pressman wrote:
> On 23/01/2023 19:24, Vadim Fedorenko wrote:
>>   > Hi Vadim,
>>   >
>>
>>   >> +		ptpsq->cq_stats->ooo_cqe++;
>>   >> +		return false;
>>   >> +	}
>>   >
>>   >I honestly don't understand how this could happen, can you please
>>   >provide more information about your issue? Did you actually witness ooo
>>   >completions or is it a theoretical issue?
>>   >We know ptp CQEs can be dropped in some rare cases (that's the reason we
>>   >implemented this resync flow), but completions should always arrive
>>   >in-order.
>>
>> I was also surprised to see OOO completions but it's the reality. With a
>> little bit of debug I found this issue:
> 
> Where are these prints added? I assume inside the 'if
> (mlx5e_ptp_ts_cqe_drop())' statement?
> 
>>
>> [65578.231710] FIFO drop found, skb_cc = 141, skb_id = 140
> 
> Is this the first drop? In order for skb_cc to reach 141 it means it has
> already seen skb_id 140 (and consumed it). But here we see skb_id 140
> again? Is it a duplicate completion? Or is it a full wraparound?
> I'm now realising that the naming of the variables is very confusing,
> skb_cc isn't really the consumer counter, it is the cosumer index
> (masked value).
> 
>> [65578.293358] FIFO drop found, skb_cc = 141, skb_id = 143
> 
> How come we see the same skb_cc twice? When a drop is found we increment it.
> 
>> [65578.301240] FIFO drop found, skb_cc = 145, skb_id = 142
>> [65578.365277] FIFO drop found, skb_cc = 173, skb_id = 141
>> [65578.426681] FIFO drop found, skb_cc = 173, skb_id = 145
>> [65578.488089] FIFO drop found, skb_cc = 173, skb_id = 146
>> [65578.549489] FIFO drop found, skb_cc = 173, skb_id = 147
>> [65578.610897] FIFO drop found, skb_cc = 173, skb_id = 148
>> [65578.672301] FIFO drop found, skb_cc = 173, skb_id = 149
> 
> Confusing :S, did you manage to make sense out of these prints? We need
> prints when !dropped as well, otherwise it's impossible to tell when a
> wraparound occurred.
> 
> Anyway, I'd like to zoom out for a second, the whole fifo was designed
> under the assumption that completions are in-order (this is a guarantee
> for all SQs, not just ptp ones!), this fix seems more of a bandage that
> potentially hides a more severe issue.
> 
>>
>> It really shows that CQE are coming OOO sometimes.
> 
> Can we reproduce it somehow?
> Can you please try to update your firmware version? I'm quite confident
> that this issue is fixed already.
> 
I added some debug prints on top of the patches to show skb_cc and 
skb_id for every packet that is found by mlx5e_ptp_ts_cqe_drop() and 10 
packets after. The output is in https://dpaste.org/rMybA/raw

It clearly shows that some reordering is happening in CQE.
I'm open to add more debug info if you need it.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-25 14:42       ` Vadim Fedorenko
@ 2023-01-25 20:33         ` Saeed Mahameed
  2023-01-25 21:24           ` Vadim Fedorenko
  0 siblings, 1 reply; 16+ messages in thread
From: Saeed Mahameed @ 2023-01-25 20:33 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: Gal Pressman, netdev, Aya Levin, Saeed Mahameed, Jakub Kicinski

On 25 Jan 14:42, Vadim Fedorenko wrote:
>On 24/01/2023 14:39, Gal Pressman wrote:
>> Anyway, I'd like to zoom out for a second, the whole fifo was designed
>> under the assumption that completions are in-order (this is a guarantee
>> for all SQs, not just ptp ones!), this fix seems more of a bandage that
>> potentially hides a more severe issue.
>>
>>>
>>> It really shows that CQE are coming OOO sometimes.
>>
>> Can we reproduce it somehow?
>> Can you please try to update your firmware version? I'm quite confident
>> that this issue is fixed already.
>>

Hi Vadim, 

As Gal pointed out above,
we shouldn't be seeing OOO on TX data path, otherwise, what's the point
of the fifo ? Also you can't have a proper reseliency since it seems when
this OOO happen the skb_cc, which is derived from the we_counter seems to
fall out of range which makes me think it can be a completely random
value, so we can't really be protected from all OOO scenarios.

This is clearly a FW bug and we will get to the bottom of
this internally, Can you please create a bug request ?

For the SKB leak, I will take the 2nd patch as is and improve it as
necessary if that's ok with you.

Thanks,
Saeed.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
  2023-01-25 20:33         ` Saeed Mahameed
@ 2023-01-25 21:24           ` Vadim Fedorenko
  0 siblings, 0 replies; 16+ messages in thread
From: Vadim Fedorenko @ 2023-01-25 21:24 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Gal Pressman, netdev, Aya Levin, Saeed Mahameed, Jakub Kicinski

On 25/01/2023 20:33, Saeed Mahameed wrote:
> On 25 Jan 14:42, Vadim Fedorenko wrote:
>> On 24/01/2023 14:39, Gal Pressman wrote:
>>> Anyway, I'd like to zoom out for a second, the whole fifo was designed
>>> under the assumption that completions are in-order (this is a guarantee
>>> for all SQs, not just ptp ones!), this fix seems more of a bandage that
>>> potentially hides a more severe issue.
>>>
>>>>
>>>> It really shows that CQE are coming OOO sometimes.
>>>
>>> Can we reproduce it somehow?
>>> Can you please try to update your firmware version? I'm quite confident
>>> that this issue is fixed already.
>>>
> 
> Hi Vadim,

Hi Saeed,
Thanks for taking a look at the issue.

> As Gal pointed out above,
> we shouldn't be seeing OOO on TX data path, otherwise, what's the point
> of the fifo ? Also you can't have a proper reseliency since it seems when
> this OOO happen the skb_cc, which is derived from the we_counter seems to
> fall out of range which makes me think it can be a completely random
> value, so we can't really be protected from all OOO scenarios.
>

Well, from my log I haven't seen any random values actually, just real 
1-2 cqe out of the order, and with my patches these CQEs are simply 
dropped and CQE timestamp is returned instead of port timestamp. Which 
is good enough.

> This is clearly a FW bug and we will get to the bottom of
> this internally, Can you please create a bug request ?
> 

Thanks for confirming FW bug, let's work on it.

> For the SKB leak, I will take the 2nd patch as is and improve it as
> necessary if that's ok with you.
> 
I have already prepared v3 with the changes that Jakub suggested. I'll 
reorder patches to have fix for has_room() and SKB leak fix in the first 
(or even separate) patch and OOO fixes in the second one and we can 
review them separately.

Thanks,
Vadim


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-01-25 21:24 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-22 16:16 [PATCH net 0/2] mlx5: bugfixes for ptp fifo queue Vadim Fedorenko
2023-01-22 16:16 ` [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
2023-01-23  7:20   ` Leon Romanovsky
2023-01-23 14:19     ` Vadim Fedorenko
2023-01-23 12:32   ` Gal Pressman
2023-01-23 23:49     ` Vadim Fedorenko
2023-01-23 17:24   ` Vadim Fedorenko
2023-01-24 14:39     ` Gal Pressman
2023-01-24 15:09       ` Vadim Fedorenko
2023-01-25 14:42       ` Vadim Fedorenko
2023-01-25 20:33         ` Saeed Mahameed
2023-01-25 21:24           ` Vadim Fedorenko
2023-01-22 16:16 ` [PATCH net 2/2] mlx5: fix skb leak while fifo resync Vadim Fedorenko
2023-01-23 12:38   ` Gal Pressman
2023-01-23 16:52     ` Vadim Fedorenko
2023-01-24  2:03     ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.