All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 1/1] Drop Tx network packet when Tx TmFIFO is full
@ 2024-01-04 15:04 Liming Sun
  2024-01-04 15:14 ` [PATCH v2 " Liming Sun
  2024-01-11 17:31 ` [PATCH v3] " Liming Sun
  0 siblings, 2 replies; 11+ messages in thread
From: Liming Sun @ 2024-01-04 15:04 UTC (permalink / raw)
  To: Vadim Pasternak, David Thompson, Hans de Goede, Mark Gross,
	Dan Carpenter
  Cc: Liming Sun, platform-driver-x86, linux-kernel

Starting from Linux 5.16 kernel, Tx timeout mechanism was added
into the virtio_net driver which prints the "Tx timeout" message
when a packet is stuck in Tx queue for too long which could happen
when external host driver is stuck or stopped and failed to read
the FIFO.

Below is an example of the reported message:

"[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
last trans: 3079892256".

To avoid such "Tx timeout" messages, this commit adds a timeout
mechanism to drop and release the pending Tx packet if not able to
transmit for two seconds due to Tx FIFO full.

This commit also handles the special case that the packet is half-
transmitted into the Tx FIFO. In such case, the packet is discarded
with remaining length stored in vring->rem_padding. So paddings with
zeros can be sent out when Tx space is available to maintain the
integrity of the packet format. The padded packet will be dropped on
the receiving side.

Signed-off-by: Liming Sun <limings@nvidia.com>
---
 drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index 5c683b4eaf10..47ed2a6027a6 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -47,6 +47,9 @@
 /* Message with data needs at least two words (for header & data). */
 #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
 
+/* Tx timeout in milliseconds. */
+#define TMFIFO_TX_TIMEOUT			2000
+
 /* ACPI UID for BlueField-3. */
 #define TMFIFO_BF3_UID				1
 
@@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
  * @drop_desc: dummy desc for packet dropping
  * @cur_len: processed length of the current descriptor
  * @rem_len: remaining length of the pending packet
+ * @rem_padding: remaining bytes to send as paddings
  * @pkt_len: total length of the pending packet
  * @next_avail: next avail descriptor id
  * @num: vring size (number of descriptors)
  * @align: vring alignment size
  * @index: vring index
  * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @tx_timeout: expire time of last tx packet
  * @fifo: pointer to the tmfifo structure
  */
 struct mlxbf_tmfifo_vring {
@@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
 	struct vring_desc drop_desc;
 	int cur_len;
 	int rem_len;
+	int rem_padding;
 	u32 pkt_len;
 	u16 next_avail;
 	int num;
 	int align;
 	int index;
 	int vdev_id;
+	unsigned long tx_timeout;
 	struct mlxbf_tmfifo *fifo;
 };
 
@@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
 	return true;
 }
 
+static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
+{
+	unsigned long flags;
+
+	/* Only handle Tx timeout for network vdev. */
+	if (vring->vdev_id != VIRTIO_ID_NET)
+		return;
+
+	/* Initialize the timeout or return if not expired. */
+	if (!vring->tx_timeout) {
+		/* Initialize the timeout. */
+		vring->tx_timeout = jiffies +
+			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
+		return;
+	} else if (time_before(jiffies, vring->tx_timeout)) {
+		/* Return if not timeout yet. */
+		return;
+	}
+
+	/*
+         * Drop the packet after timeout. The outstanding packet is
+         * released and the remaining bytes will be sent with padding byte 0x00
+         * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
+         * either dropped directly, or appended into existing outstanding packet
+         * thus dropped as corrupted network packet.
+         */
+	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
+	mlxbf_tmfifo_release_pkt(vring);
+	vring->cur_len = 0;
+	vring->rem_len = 0;
+	vring->fifo->vring[0] = NULL;
+
+	/*
+	 * Make sure the load/store are in order before
+	 * returning back to virtio.
+	 */
+	virtio_mb(false);
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
+}
+
 /* Rx & Tx processing of a queue. */
 static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 {
@@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 		return;
 
 	do {
+retry:
 		/* Get available FIFO space. */
 		if (avail == 0) {
 			if (is_rx)
@@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 				break;
 		}
 
+		/* Insert paddings for discarded Tx packet. */
+		if (!is_rx) {
+			vring->tx_timeout = 0;
+			while (vring->rem_padding >= sizeof(u64)) {
+				writeq(0, vring->fifo->tx.data);
+				vring->rem_padding -= sizeof(u64);
+				if (--avail == 0)
+					goto retry;
+			}
+		}
+
 		/* Console output always comes from the Tx buffer. */
 		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
 			mlxbf_tmfifo_console_tx(fifo, avail);
@@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 		/* Handle one descriptor. */
 		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
 	} while (more);
+
+	/* Check Tx timeout. */
+	if (avail <= 0 && !is_rx)
+		mlxbf_tmfifo_check_tx_timeout(vring);
 }
 
 /* Handle Rx or Tx queues. */
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-04 15:04 [PATCH v1 1/1] Drop Tx network packet when Tx TmFIFO is full Liming Sun
@ 2024-01-04 15:14 ` Liming Sun
  2024-01-04 17:38   ` Ilpo Järvinen
  2024-01-11 17:31 ` [PATCH v3] " Liming Sun
  1 sibling, 1 reply; 11+ messages in thread
From: Liming Sun @ 2024-01-04 15:14 UTC (permalink / raw)
  To: Vadim Pasternak, David Thompson, Hans de Goede, Mark Gross,
	Dan Carpenter
  Cc: Liming Sun, platform-driver-x86, linux-kernel

Starting from Linux 5.16 kernel, Tx timeout mechanism was added
into the virtio_net driver which prints the "Tx timeout" message
when a packet is stuck in Tx queue for too long which could happen
when external host driver is stuck or stopped and failed to read
the FIFO.

Below is an example of the reported message:

"[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
last trans: 3079892256".

To avoid such "Tx timeout" messages, this commit adds a timeout
mechanism to drop and release the pending Tx packet if not able to
transmit for two seconds due to Tx FIFO full.

This commit also handles the special case that the packet is half-
transmitted into the Tx FIFO. In such case, the packet is discarded
with remaining length stored in vring->rem_padding. So paddings with
zeros can be sent out when Tx space is available to maintain the
integrity of the packet format. The padded packet will be dropped on
the receiving side.

Signed-off-by: Liming Sun <limings@nvidia.com>
---
 drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index 5c683b4eaf10..f39b7b9d2bfe 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -47,6 +47,9 @@
 /* Message with data needs at least two words (for header & data). */
 #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
 
+/* Tx timeout in milliseconds. */
+#define TMFIFO_TX_TIMEOUT			2000
+
 /* ACPI UID for BlueField-3. */
 #define TMFIFO_BF3_UID				1
 
@@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
  * @drop_desc: dummy desc for packet dropping
  * @cur_len: processed length of the current descriptor
  * @rem_len: remaining length of the pending packet
+ * @rem_padding: remaining bytes to send as paddings
  * @pkt_len: total length of the pending packet
  * @next_avail: next avail descriptor id
  * @num: vring size (number of descriptors)
  * @align: vring alignment size
  * @index: vring index
  * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @tx_timeout: expire time of last tx packet
  * @fifo: pointer to the tmfifo structure
  */
 struct mlxbf_tmfifo_vring {
@@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
 	struct vring_desc drop_desc;
 	int cur_len;
 	int rem_len;
+	int rem_padding;
 	u32 pkt_len;
 	u16 next_avail;
 	int num;
 	int align;
 	int index;
 	int vdev_id;
+	unsigned long tx_timeout;
 	struct mlxbf_tmfifo *fifo;
 };
 
@@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
 	return true;
 }
 
+static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
+{
+	unsigned long flags;
+
+	/* Only handle Tx timeout for network vdev. */
+	if (vring->vdev_id != VIRTIO_ID_NET)
+		return;
+
+	/* Initialize the timeout or return if not expired. */
+	if (!vring->tx_timeout) {
+		/* Initialize the timeout. */
+		vring->tx_timeout = jiffies +
+			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
+		return;
+	} else if (time_before(jiffies, vring->tx_timeout)) {
+		/* Return if not timeout yet. */
+		return;
+	}
+
+	/*
+	 * Drop the packet after timeout. The outstanding packet is
+	 * released and the remaining bytes will be sent with padding byte 0x00
+	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
+	 * either dropped directly, or appended into existing outstanding packet
+	 * thus dropped as corrupted network packet.
+	 */
+	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
+	mlxbf_tmfifo_release_pkt(vring);
+	vring->cur_len = 0;
+	vring->rem_len = 0;
+	vring->fifo->vring[0] = NULL;
+
+	/*
+	 * Make sure the load/store are in order before
+	 * returning back to virtio.
+	 */
+	virtio_mb(false);
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
+}
+
 /* Rx & Tx processing of a queue. */
 static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 {
@@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 		return;
 
 	do {
+retry:
 		/* Get available FIFO space. */
 		if (avail == 0) {
 			if (is_rx)
@@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 				break;
 		}
 
+		/* Insert paddings for discarded Tx packet. */
+		if (!is_rx) {
+			vring->tx_timeout = 0;
+			while (vring->rem_padding >= sizeof(u64)) {
+				writeq(0, vring->fifo->tx.data);
+				vring->rem_padding -= sizeof(u64);
+				if (--avail == 0)
+					goto retry;
+			}
+		}
+
 		/* Console output always comes from the Tx buffer. */
 		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
 			mlxbf_tmfifo_console_tx(fifo, avail);
@@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 		/* Handle one descriptor. */
 		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
 	} while (more);
+
+	/* Check Tx timeout. */
+	if (avail <= 0 && !is_rx)
+		mlxbf_tmfifo_check_tx_timeout(vring);
 }
 
 /* Handle Rx or Tx queues. */
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-04 15:14 ` [PATCH v2 " Liming Sun
@ 2024-01-04 17:38   ` Ilpo Järvinen
  2024-01-05 17:40     ` Liming Sun
  0 siblings, 1 reply; 11+ messages in thread
From: Ilpo Järvinen @ 2024-01-04 17:38 UTC (permalink / raw)
  To: Liming Sun
  Cc: Vadim Pasternak, David Thompson, Hans de Goede, Mark Gross,
	Dan Carpenter, platform-driver-x86, LKML

[-- Attachment #1: Type: text/plain, Size: 5777 bytes --]

On Thu, 4 Jan 2024, Liming Sun wrote:

> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> into the virtio_net driver which prints the "Tx timeout" message
> when a packet is stuck in Tx queue for too long which could happen
> when external host driver is stuck or stopped and failed to read
> the FIFO.
> 
> Below is an example of the reported message:
> 
> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> last trans: 3079892256".
> 
> To avoid such "Tx timeout" messages, this commit adds a timeout
> mechanism to drop and release the pending Tx packet if not able to
> transmit for two seconds due to Tx FIFO full.
> 
> This commit also handles the special case that the packet is half-
> transmitted into the Tx FIFO. In such case, the packet is discarded
> with remaining length stored in vring->rem_padding. So paddings with
> zeros can be sent out when Tx space is available to maintain the
> integrity of the packet format. The padded packet will be dropped on
> the receiving side.
> 
> Signed-off-by: Liming Sun <limings@nvidia.com>

This doesn't really explain how it helps (other than avoiding the 
message which sounds like just hiding the issue). That is, how this helps 
to resume Tx? Or does Tx resume? There's nothing to indicate either way.

-- 
 i.


> ---
>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
> index 5c683b4eaf10..f39b7b9d2bfe 100644
> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -47,6 +47,9 @@
>  /* Message with data needs at least two words (for header & data). */
>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
>  
> +/* Tx timeout in milliseconds. */
> +#define TMFIFO_TX_TIMEOUT			2000
> +
>  /* ACPI UID for BlueField-3. */
>  #define TMFIFO_BF3_UID				1
>  
> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
>   * @drop_desc: dummy desc for packet dropping
>   * @cur_len: processed length of the current descriptor
>   * @rem_len: remaining length of the pending packet
> + * @rem_padding: remaining bytes to send as paddings
>   * @pkt_len: total length of the pending packet
>   * @next_avail: next avail descriptor id
>   * @num: vring size (number of descriptors)
>   * @align: vring alignment size
>   * @index: vring index
>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> + * @tx_timeout: expire time of last tx packet
>   * @fifo: pointer to the tmfifo structure
>   */
>  struct mlxbf_tmfifo_vring {
> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
>  	struct vring_desc drop_desc;
>  	int cur_len;
>  	int rem_len;
> +	int rem_padding;
>  	u32 pkt_len;
>  	u16 next_avail;
>  	int num;
>  	int align;
>  	int index;
>  	int vdev_id;
> +	unsigned long tx_timeout;
>  	struct mlxbf_tmfifo *fifo;
>  };
>  
> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
>  	return true;
>  }
>  
> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
> +{
> +	unsigned long flags;
> +
> +	/* Only handle Tx timeout for network vdev. */
> +	if (vring->vdev_id != VIRTIO_ID_NET)
> +		return;
> +
> +	/* Initialize the timeout or return if not expired. */
> +	if (!vring->tx_timeout) {
> +		/* Initialize the timeout. */
> +		vring->tx_timeout = jiffies +
> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> +		return;
> +	} else if (time_before(jiffies, vring->tx_timeout)) {
> +		/* Return if not timeout yet. */
> +		return;
> +	}
> +
> +	/*
> +	 * Drop the packet after timeout. The outstanding packet is
> +	 * released and the remaining bytes will be sent with padding byte 0x00
> +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> +	 * either dropped directly, or appended into existing outstanding packet
> +	 * thus dropped as corrupted network packet.
> +	 */
> +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> +	mlxbf_tmfifo_release_pkt(vring);
> +	vring->cur_len = 0;
> +	vring->rem_len = 0;
> +	vring->fifo->vring[0] = NULL;
> +
> +	/*
> +	 * Make sure the load/store are in order before
> +	 * returning back to virtio.
> +	 */
> +	virtio_mb(false);
> +
> +	/* Notify upper layer. */
> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> +	vring_interrupt(0, vring->vq);
> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> +}
> +
>  /* Rx & Tx processing of a queue. */
>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  {
> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  		return;
>  
>  	do {
> +retry:
>  		/* Get available FIFO space. */
>  		if (avail == 0) {
>  			if (is_rx)
> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  				break;
>  		}
>  
> +		/* Insert paddings for discarded Tx packet. */
> +		if (!is_rx) {
> +			vring->tx_timeout = 0;
> +			while (vring->rem_padding >= sizeof(u64)) {
> +				writeq(0, vring->fifo->tx.data);
> +				vring->rem_padding -= sizeof(u64);
> +				if (--avail == 0)
> +					goto retry;
> +			}
> +		}
> +
>  		/* Console output always comes from the Tx buffer. */
>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
>  			mlxbf_tmfifo_console_tx(fifo, avail);
> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  		/* Handle one descriptor. */
>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
>  	} while (more);
> +
> +	/* Check Tx timeout. */
> +	if (avail <= 0 && !is_rx)
> +		mlxbf_tmfifo_check_tx_timeout(vring);
>  }
>  
>  /* Handle Rx or Tx queues. */
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-04 17:38   ` Ilpo Järvinen
@ 2024-01-05 17:40     ` Liming Sun
  2024-01-08 14:23       ` Hans de Goede
  0 siblings, 1 reply; 11+ messages in thread
From: Liming Sun @ 2024-01-05 17:40 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Vadim Pasternak, David Thompson, Hans de Goede, Mark Gross,
	Dan Carpenter, platform-driver-x86, LKML



> -----Original Message-----
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Sent: Thursday, January 4, 2024 12:39 PM
> To: Liming Sun <limings@nvidia.com>
> Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> <davthompson@nvidia.com>; Hans de Goede <hdegoede@redhat.com>;
> Mark Gross <markgross@kernel.org>; Dan Carpenter
> <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org; LKML
> <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> 
> On Thu, 4 Jan 2024, Liming Sun wrote:
> 
> > Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> > into the virtio_net driver which prints the "Tx timeout" message
> > when a packet is stuck in Tx queue for too long which could happen
> > when external host driver is stuck or stopped and failed to read
> > the FIFO.
> >
> > Below is an example of the reported message:
> >
> > "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> > queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> > last trans: 3079892256".
> >
> > To avoid such "Tx timeout" messages, this commit adds a timeout
> > mechanism to drop and release the pending Tx packet if not able to
> > transmit for two seconds due to Tx FIFO full.
> >
> > This commit also handles the special case that the packet is half-
> > transmitted into the Tx FIFO. In such case, the packet is discarded
> > with remaining length stored in vring->rem_padding. So paddings with
> > zeros can be sent out when Tx space is available to maintain the
> > integrity of the packet format. The padded packet will be dropped on
> > the receiving side.
> >
> > Signed-off-by: Liming Sun <limings@nvidia.com>
> 
> This doesn't really explain how it helps (other than avoiding the
> message which sounds like just hiding the issue). That is, how this helps
> to resume Tx? Or does Tx resume? There's nothing to indicate either way.

As the commit message mentioned, the expired packet is discarded and the
packet buffer is released (see changes of calling mlxbf_tmfifo_release_pkt()).
The Tx will resume automatically once the FIFO space is available, such as when
external host driver starts to drain the TMFIFO. No need for any other logic.

> 
> --
>  i.
> 
> 
> > ---
> >  drivers/platform/mellanox/mlxbf-tmfifo.c | 67
> ++++++++++++++++++++++++
> >  1 file changed, 67 insertions(+)
> >
> > diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > index 5c683b4eaf10..f39b7b9d2bfe 100644
> > --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> > +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > @@ -47,6 +47,9 @@
> >  /* Message with data needs at least two words (for header & data). */
> >  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
> >
> > +/* Tx timeout in milliseconds. */
> > +#define TMFIFO_TX_TIMEOUT			2000
> > +
> >  /* ACPI UID for BlueField-3. */
> >  #define TMFIFO_BF3_UID				1
> >
> > @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
> >   * @drop_desc: dummy desc for packet dropping
> >   * @cur_len: processed length of the current descriptor
> >   * @rem_len: remaining length of the pending packet
> > + * @rem_padding: remaining bytes to send as paddings
> >   * @pkt_len: total length of the pending packet
> >   * @next_avail: next avail descriptor id
> >   * @num: vring size (number of descriptors)
> >   * @align: vring alignment size
> >   * @index: vring index
> >   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> > + * @tx_timeout: expire time of last tx packet
> >   * @fifo: pointer to the tmfifo structure
> >   */
> >  struct mlxbf_tmfifo_vring {
> > @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
> >  	struct vring_desc drop_desc;
> >  	int cur_len;
> >  	int rem_len;
> > +	int rem_padding;
> >  	u32 pkt_len;
> >  	u16 next_avail;
> >  	int num;
> >  	int align;
> >  	int index;
> >  	int vdev_id;
> > +	unsigned long tx_timeout;
> >  	struct mlxbf_tmfifo *fifo;
> >  };
> >
> > @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct
> mlxbf_tmfifo_vring *vring,
> >  	return true;
> >  }
> >
> > +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring
> *vring)
> > +{
> > +	unsigned long flags;
> > +
> > +	/* Only handle Tx timeout for network vdev. */
> > +	if (vring->vdev_id != VIRTIO_ID_NET)
> > +		return;
> > +
> > +	/* Initialize the timeout or return if not expired. */
> > +	if (!vring->tx_timeout) {
> > +		/* Initialize the timeout. */
> > +		vring->tx_timeout = jiffies +
> > +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> > +		return;
> > +	} else if (time_before(jiffies, vring->tx_timeout)) {
> > +		/* Return if not timeout yet. */
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * Drop the packet after timeout. The outstanding packet is
> > +	 * released and the remaining bytes will be sent with padding byte
> 0x00
> > +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> > +	 * either dropped directly, or appended into existing outstanding
> packet
> > +	 * thus dropped as corrupted network packet.
> > +	 */
> > +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> > +	mlxbf_tmfifo_release_pkt(vring);
> > +	vring->cur_len = 0;
> > +	vring->rem_len = 0;
> > +	vring->fifo->vring[0] = NULL;
> > +
> > +	/*
> > +	 * Make sure the load/store are in order before
> > +	 * returning back to virtio.
> > +	 */
> > +	virtio_mb(false);
> > +
> > +	/* Notify upper layer. */
> > +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> > +	vring_interrupt(0, vring->vq);
> > +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> > +}
> > +
> >  /* Rx & Tx processing of a queue. */
> >  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> >  {
> > @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct
> mlxbf_tmfifo_vring *vring, bool is_rx)
> >  		return;
> >
> >  	do {
> > +retry:
> >  		/* Get available FIFO space. */
> >  		if (avail == 0) {
> >  			if (is_rx)
> > @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct
> mlxbf_tmfifo_vring *vring, bool is_rx)
> >  				break;
> >  		}
> >
> > +		/* Insert paddings for discarded Tx packet. */
> > +		if (!is_rx) {
> > +			vring->tx_timeout = 0;
> > +			while (vring->rem_padding >= sizeof(u64)) {
> > +				writeq(0, vring->fifo->tx.data);
> > +				vring->rem_padding -= sizeof(u64);
> > +				if (--avail == 0)
> > +					goto retry;
> > +			}
> > +		}
> > +
> >  		/* Console output always comes from the Tx buffer. */
> >  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> >  			mlxbf_tmfifo_console_tx(fifo, avail);
> > @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct
> mlxbf_tmfifo_vring *vring, bool is_rx)
> >  		/* Handle one descriptor. */
> >  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
> >  	} while (more);
> > +
> > +	/* Check Tx timeout. */
> > +	if (avail <= 0 && !is_rx)
> > +		mlxbf_tmfifo_check_tx_timeout(vring);
> >  }
> >
> >  /* Handle Rx or Tx queues. */
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-05 17:40     ` Liming Sun
@ 2024-01-08 14:23       ` Hans de Goede
  2024-01-08 17:27         ` Liming Sun
  0 siblings, 1 reply; 11+ messages in thread
From: Hans de Goede @ 2024-01-08 14:23 UTC (permalink / raw)
  To: Liming Sun, Ilpo Järvinen
  Cc: Vadim Pasternak, David Thompson, Mark Gross, Dan Carpenter,
	platform-driver-x86, LKML

Hi,

On 1/5/24 18:40, Liming Sun wrote:
> 
> 
>> -----Original Message-----
>> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
>> Sent: Thursday, January 4, 2024 12:39 PM
>> To: Liming Sun <limings@nvidia.com>
>> Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
>> <davthompson@nvidia.com>; Hans de Goede <hdegoede@redhat.com>;
>> Mark Gross <markgross@kernel.org>; Dan Carpenter
>> <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org; LKML
>> <linux-kernel@vger.kernel.org>
>> Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
>>
>> On Thu, 4 Jan 2024, Liming Sun wrote:
>>
>>> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
>>> into the virtio_net driver which prints the "Tx timeout" message
>>> when a packet is stuck in Tx queue for too long which could happen
>>> when external host driver is stuck or stopped and failed to read
>>> the FIFO.
>>>
>>> Below is an example of the reported message:
>>>
>>> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
>>> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
>>> last trans: 3079892256".
>>>
>>> To avoid such "Tx timeout" messages, this commit adds a timeout
>>> mechanism to drop and release the pending Tx packet if not able to
>>> transmit for two seconds due to Tx FIFO full.
>>>
>>> This commit also handles the special case that the packet is half-
>>> transmitted into the Tx FIFO. In such case, the packet is discarded
>>> with remaining length stored in vring->rem_padding. So paddings with
>>> zeros can be sent out when Tx space is available to maintain the
>>> integrity of the packet format. The padded packet will be dropped on
>>> the receiving side.
>>>
>>> Signed-off-by: Liming Sun <limings@nvidia.com>
>>
>> This doesn't really explain how it helps (other than avoiding the
>> message which sounds like just hiding the issue). That is, how this helps
>> to resume Tx? Or does Tx resume? There's nothing to indicate either way.
> 
> As the commit message mentioned, the expired packet is discarded and the
> packet buffer is released (see changes of calling mlxbf_tmfifo_release_pkt()).
> The Tx will resume automatically once the FIFO space is available, such as when
> external host driver starts to drain the TMFIFO. No need for any other logic.

Hmm, it seems to me that the same (resuming on FIFO space available)
will happen without this patch ?

So as Ilpo mentioned the only purpose here seems to be to avoid the warning
getting logged? And things work properly without this too ?

I guess the advantage of this patch is that during a blocked FIFO packets
get discarded rather the piling up ?

Regards,

Hans




>>> ---
>>>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67
>> ++++++++++++++++++++++++
>>>  1 file changed, 67 insertions(+)
>>>
>>> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
>> b/drivers/platform/mellanox/mlxbf-tmfifo.c
>>> index 5c683b4eaf10..f39b7b9d2bfe 100644
>>> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
>>> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
>>> @@ -47,6 +47,9 @@
>>>  /* Message with data needs at least two words (for header & data). */
>>>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
>>>
>>> +/* Tx timeout in milliseconds. */
>>> +#define TMFIFO_TX_TIMEOUT			2000
>>> +
>>>  /* ACPI UID for BlueField-3. */
>>>  #define TMFIFO_BF3_UID				1
>>>
>>> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
>>>   * @drop_desc: dummy desc for packet dropping
>>>   * @cur_len: processed length of the current descriptor
>>>   * @rem_len: remaining length of the pending packet
>>> + * @rem_padding: remaining bytes to send as paddings
>>>   * @pkt_len: total length of the pending packet
>>>   * @next_avail: next avail descriptor id
>>>   * @num: vring size (number of descriptors)
>>>   * @align: vring alignment size
>>>   * @index: vring index
>>>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
>>> + * @tx_timeout: expire time of last tx packet
>>>   * @fifo: pointer to the tmfifo structure
>>>   */
>>>  struct mlxbf_tmfifo_vring {
>>> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
>>>  	struct vring_desc drop_desc;
>>>  	int cur_len;
>>>  	int rem_len;
>>> +	int rem_padding;
>>>  	u32 pkt_len;
>>>  	u16 next_avail;
>>>  	int num;
>>>  	int align;
>>>  	int index;
>>>  	int vdev_id;
>>> +	unsigned long tx_timeout;
>>>  	struct mlxbf_tmfifo *fifo;
>>>  };
>>>
>>> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct
>> mlxbf_tmfifo_vring *vring,
>>>  	return true;
>>>  }
>>>
>>> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring
>> *vring)
>>> +{
>>> +	unsigned long flags;
>>> +
>>> +	/* Only handle Tx timeout for network vdev. */
>>> +	if (vring->vdev_id != VIRTIO_ID_NET)
>>> +		return;
>>> +
>>> +	/* Initialize the timeout or return if not expired. */
>>> +	if (!vring->tx_timeout) {
>>> +		/* Initialize the timeout. */
>>> +		vring->tx_timeout = jiffies +
>>> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
>>> +		return;
>>> +	} else if (time_before(jiffies, vring->tx_timeout)) {
>>> +		/* Return if not timeout yet. */
>>> +		return;
>>> +	}
>>> +
>>> +	/*
>>> +	 * Drop the packet after timeout. The outstanding packet is
>>> +	 * released and the remaining bytes will be sent with padding byte
>> 0x00
>>> +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
>>> +	 * either dropped directly, or appended into existing outstanding
>> packet
>>> +	 * thus dropped as corrupted network packet.
>>> +	 */
>>> +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
>>> +	mlxbf_tmfifo_release_pkt(vring);
>>> +	vring->cur_len = 0;
>>> +	vring->rem_len = 0;
>>> +	vring->fifo->vring[0] = NULL;
>>> +
>>> +	/*
>>> +	 * Make sure the load/store are in order before
>>> +	 * returning back to virtio.
>>> +	 */
>>> +	virtio_mb(false);
>>> +
>>> +	/* Notify upper layer. */
>>> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
>>> +	vring_interrupt(0, vring->vq);
>>> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
>>> +}
>>> +
>>>  /* Rx & Tx processing of a queue. */
>>>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>>>  {
>>> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct
>> mlxbf_tmfifo_vring *vring, bool is_rx)
>>>  		return;
>>>
>>>  	do {
>>> +retry:
>>>  		/* Get available FIFO space. */
>>>  		if (avail == 0) {
>>>  			if (is_rx)
>>> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct
>> mlxbf_tmfifo_vring *vring, bool is_rx)
>>>  				break;
>>>  		}
>>>
>>> +		/* Insert paddings for discarded Tx packet. */
>>> +		if (!is_rx) {
>>> +			vring->tx_timeout = 0;
>>> +			while (vring->rem_padding >= sizeof(u64)) {
>>> +				writeq(0, vring->fifo->tx.data);
>>> +				vring->rem_padding -= sizeof(u64);
>>> +				if (--avail == 0)
>>> +					goto retry;
>>> +			}
>>> +		}
>>> +
>>>  		/* Console output always comes from the Tx buffer. */
>>>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
>>>  			mlxbf_tmfifo_console_tx(fifo, avail);
>>> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct
>> mlxbf_tmfifo_vring *vring, bool is_rx)
>>>  		/* Handle one descriptor. */
>>>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
>>>  	} while (more);
>>> +
>>> +	/* Check Tx timeout. */
>>> +	if (avail <= 0 && !is_rx)
>>> +		mlxbf_tmfifo_check_tx_timeout(vring);
>>>  }
>>>
>>>  /* Handle Rx or Tx queues. */
>>>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-08 14:23       ` Hans de Goede
@ 2024-01-08 17:27         ` Liming Sun
  2024-01-08 19:03           ` Ilpo Järvinen
  0 siblings, 1 reply; 11+ messages in thread
From: Liming Sun @ 2024-01-08 17:27 UTC (permalink / raw)
  To: Hans de Goede, Ilpo Järvinen
  Cc: Vadim Pasternak, David Thompson, Mark Gross, Dan Carpenter,
	platform-driver-x86, LKML



> -----Original Message-----
> From: Hans de Goede <hdegoede@redhat.com>
> Sent: Monday, January 8, 2024 9:23 AM
> To: Liming Sun <limings@nvidia.com>; Ilpo Järvinen
> <ilpo.jarvinen@linux.intel.com>
> Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> <davthompson@nvidia.com>; Mark Gross <markgross@kernel.org>; Dan
> Carpenter <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org;
> LKML <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> 
> Hi,
> 
> On 1/5/24 18:40, Liming Sun wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> >> Sent: Thursday, January 4, 2024 12:39 PM
> >> To: Liming Sun <limings@nvidia.com>
> >> Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> >> <davthompson@nvidia.com>; Hans de Goede <hdegoede@redhat.com>;
> >> Mark Gross <markgross@kernel.org>; Dan Carpenter
> >> <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org; LKML
> >> <linux-kernel@vger.kernel.org>
> >> Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> >>
> >> On Thu, 4 Jan 2024, Liming Sun wrote:
> >>
> >>> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> >>> into the virtio_net driver which prints the "Tx timeout" message
> >>> when a packet is stuck in Tx queue for too long which could happen
> >>> when external host driver is stuck or stopped and failed to read
> >>> the FIFO.
> >>>
> >>> Below is an example of the reported message:
> >>>
> >>> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> >>> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> >>> last trans: 3079892256".
> >>>
> >>> To avoid such "Tx timeout" messages, this commit adds a timeout
> >>> mechanism to drop and release the pending Tx packet if not able to
> >>> transmit for two seconds due to Tx FIFO full.
> >>>
> >>> This commit also handles the special case that the packet is half-
> >>> transmitted into the Tx FIFO. In such case, the packet is discarded
> >>> with remaining length stored in vring->rem_padding. So paddings with
> >>> zeros can be sent out when Tx space is available to maintain the
> >>> integrity of the packet format. The padded packet will be dropped on
> >>> the receiving side.
> >>>
> >>> Signed-off-by: Liming Sun <limings@nvidia.com>
> >>
> >> This doesn't really explain how it helps (other than avoiding the
> >> message which sounds like just hiding the issue). That is, how this helps
> >> to resume Tx? Or does Tx resume? There's nothing to indicate either way.
> >
> > As the commit message mentioned, the expired packet is discarded and the
> > packet buffer is released (see changes of calling mlxbf_tmfifo_release_pkt()).
> > The Tx will resume automatically once the FIFO space is available, such as
> when
> > external host driver starts to drain the TMFIFO. No need for any other logic.
> 
> Hmm, it seems to me that the same (resuming on FIFO space available)
> will happen without this patch ?

Yes, it's true.

> 
> So as Ilpo mentioned the only purpose here seems to be to avoid the warning
> getting logged? And things work properly without this too ?
> 
> I guess the advantage of this patch is that during a blocked FIFO packets
> get discarded rather the piling up ?
> 
> Regards,
> 
> Hans

Probably I misunderstood Ilpo's comments.
Yes, the only purpose is to avoid the warning messages (since users and QA doesn't like these messages).
It is only for networking packet, which seems reasonable to drop it if not able to complete the transmission in seconds.

> 
> 
> 
> 
> >>> ---
> >>>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67
> >> ++++++++++++++++++++++++
> >>>  1 file changed, 67 insertions(+)
> >>>
> >>> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> >> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> >>> index 5c683b4eaf10..f39b7b9d2bfe 100644
> >>> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> >>> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> >>> @@ -47,6 +47,9 @@
> >>>  /* Message with data needs at least two words (for header & data). */
> >>>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
> >>>
> >>> +/* Tx timeout in milliseconds. */
> >>> +#define TMFIFO_TX_TIMEOUT			2000
> >>> +
> >>>  /* ACPI UID for BlueField-3. */
> >>>  #define TMFIFO_BF3_UID				1
> >>>
> >>> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
> >>>   * @drop_desc: dummy desc for packet dropping
> >>>   * @cur_len: processed length of the current descriptor
> >>>   * @rem_len: remaining length of the pending packet
> >>> + * @rem_padding: remaining bytes to send as paddings
> >>>   * @pkt_len: total length of the pending packet
> >>>   * @next_avail: next avail descriptor id
> >>>   * @num: vring size (number of descriptors)
> >>>   * @align: vring alignment size
> >>>   * @index: vring index
> >>>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> >>> + * @tx_timeout: expire time of last tx packet
> >>>   * @fifo: pointer to the tmfifo structure
> >>>   */
> >>>  struct mlxbf_tmfifo_vring {
> >>> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
> >>>  	struct vring_desc drop_desc;
> >>>  	int cur_len;
> >>>  	int rem_len;
> >>> +	int rem_padding;
> >>>  	u32 pkt_len;
> >>>  	u16 next_avail;
> >>>  	int num;
> >>>  	int align;
> >>>  	int index;
> >>>  	int vdev_id;
> >>> +	unsigned long tx_timeout;
> >>>  	struct mlxbf_tmfifo *fifo;
> >>>  };
> >>>
> >>> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct
> >> mlxbf_tmfifo_vring *vring,
> >>>  	return true;
> >>>  }
> >>>
> >>> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring
> >> *vring)
> >>> +{
> >>> +	unsigned long flags;
> >>> +
> >>> +	/* Only handle Tx timeout for network vdev. */
> >>> +	if (vring->vdev_id != VIRTIO_ID_NET)
> >>> +		return;
> >>> +
> >>> +	/* Initialize the timeout or return if not expired. */
> >>> +	if (!vring->tx_timeout) {
> >>> +		/* Initialize the timeout. */
> >>> +		vring->tx_timeout = jiffies +
> >>> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> >>> +		return;
> >>> +	} else if (time_before(jiffies, vring->tx_timeout)) {
> >>> +		/* Return if not timeout yet. */
> >>> +		return;
> >>> +	}
> >>> +
> >>> +	/*
> >>> +	 * Drop the packet after timeout. The outstanding packet is
> >>> +	 * released and the remaining bytes will be sent with padding byte
> >> 0x00
> >>> +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> >>> +	 * either dropped directly, or appended into existing outstanding
> >> packet
> >>> +	 * thus dropped as corrupted network packet.
> >>> +	 */
> >>> +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> >>> +	mlxbf_tmfifo_release_pkt(vring);
> >>> +	vring->cur_len = 0;
> >>> +	vring->rem_len = 0;
> >>> +	vring->fifo->vring[0] = NULL;
> >>> +
> >>> +	/*
> >>> +	 * Make sure the load/store are in order before
> >>> +	 * returning back to virtio.
> >>> +	 */
> >>> +	virtio_mb(false);
> >>> +
> >>> +	/* Notify upper layer. */
> >>> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> >>> +	vring_interrupt(0, vring->vq);
> >>> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> >>> +}
> >>> +
> >>>  /* Rx & Tx processing of a queue. */
> >>>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> >>>  {
> >>> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct
> >> mlxbf_tmfifo_vring *vring, bool is_rx)
> >>>  		return;
> >>>
> >>>  	do {
> >>> +retry:
> >>>  		/* Get available FIFO space. */
> >>>  		if (avail == 0) {
> >>>  			if (is_rx)
> >>> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct
> >> mlxbf_tmfifo_vring *vring, bool is_rx)
> >>>  				break;
> >>>  		}
> >>>
> >>> +		/* Insert paddings for discarded Tx packet. */
> >>> +		if (!is_rx) {
> >>> +			vring->tx_timeout = 0;
> >>> +			while (vring->rem_padding >= sizeof(u64)) {
> >>> +				writeq(0, vring->fifo->tx.data);
> >>> +				vring->rem_padding -= sizeof(u64);
> >>> +				if (--avail == 0)
> >>> +					goto retry;
> >>> +			}
> >>> +		}
> >>> +
> >>>  		/* Console output always comes from the Tx buffer. */
> >>>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> >>>  			mlxbf_tmfifo_console_tx(fifo, avail);
> >>> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct
> >> mlxbf_tmfifo_vring *vring, bool is_rx)
> >>>  		/* Handle one descriptor. */
> >>>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
> >>>  	} while (more);
> >>> +
> >>> +	/* Check Tx timeout. */
> >>> +	if (avail <= 0 && !is_rx)
> >>> +		mlxbf_tmfifo_check_tx_timeout(vring);
> >>>  }
> >>>
> >>>  /* Handle Rx or Tx queues. */
> >>>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-08 17:27         ` Liming Sun
@ 2024-01-08 19:03           ` Ilpo Järvinen
  2024-01-08 20:02             ` Liming Sun
  0 siblings, 1 reply; 11+ messages in thread
From: Ilpo Järvinen @ 2024-01-08 19:03 UTC (permalink / raw)
  To: Liming Sun
  Cc: Hans de Goede, Vadim Pasternak, David Thompson, Mark Gross,
	Dan Carpenter, platform-driver-x86, LKML

[-- Attachment #1: Type: text/plain, Size: 9687 bytes --]

On Mon, 8 Jan 2024, Liming Sun wrote:
> > -----Original Message-----
> > From: Hans de Goede <hdegoede@redhat.com>
> > Sent: Monday, January 8, 2024 9:23 AM
> > To: Liming Sun <limings@nvidia.com>; Ilpo Järvinen
> > <ilpo.jarvinen@linux.intel.com>
> > Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> > <davthompson@nvidia.com>; Mark Gross <markgross@kernel.org>; Dan
> > Carpenter <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org;
> > LKML <linux-kernel@vger.kernel.org>
> > Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> > 
> > Hi,
> > 
> > On 1/5/24 18:40, Liming Sun wrote:
> > >
> > >
> > >> -----Original Message-----
> > >> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > >> Sent: Thursday, January 4, 2024 12:39 PM
> > >> To: Liming Sun <limings@nvidia.com>
> > >> Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> > >> <davthompson@nvidia.com>; Hans de Goede <hdegoede@redhat.com>;
> > >> Mark Gross <markgross@kernel.org>; Dan Carpenter
> > >> <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org; LKML
> > >> <linux-kernel@vger.kernel.org>
> > >> Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> > >>
> > >> On Thu, 4 Jan 2024, Liming Sun wrote:
> > >>
> > >>> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> > >>> into the virtio_net driver which prints the "Tx timeout" message
> > >>> when a packet is stuck in Tx queue for too long which could happen
> > >>> when external host driver is stuck or stopped and failed to read
> > >>> the FIFO.
> > >>>
> > >>> Below is an example of the reported message:
> > >>>
> > >>> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> > >>> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> > >>> last trans: 3079892256".
> > >>>
> > >>> To avoid such "Tx timeout" messages, this commit adds a timeout
> > >>> mechanism to drop and release the pending Tx packet if not able to
> > >>> transmit for two seconds due to Tx FIFO full.
> > >>>
> > >>> This commit also handles the special case that the packet is half-
> > >>> transmitted into the Tx FIFO. In such case, the packet is discarded
> > >>> with remaining length stored in vring->rem_padding. So paddings with
> > >>> zeros can be sent out when Tx space is available to maintain the
> > >>> integrity of the packet format. The padded packet will be dropped on
> > >>> the receiving side.
> > >>>
> > >>> Signed-off-by: Liming Sun <limings@nvidia.com>
> > >>
> > >> This doesn't really explain how it helps (other than avoiding the
> > >> message which sounds like just hiding the issue). That is, how this helps
> > >> to resume Tx? Or does Tx resume? There's nothing to indicate either way.
> > >
> > > As the commit message mentioned, the expired packet is discarded and the
> > > packet buffer is released (see changes of calling mlxbf_tmfifo_release_pkt()).
> > > The Tx will resume automatically once the FIFO space is available, such as
> > when
> > > external host driver starts to drain the TMFIFO. No need for any other logic.
> > 
> > Hmm, it seems to me that the same (resuming on FIFO space available)
> > will happen without this patch ?
> 
> Yes, it's true.
> 
> > 
> > So as Ilpo mentioned the only purpose here seems to be to avoid the warning
> > getting logged? And things work properly without this too ?
> > 
> > I guess the advantage of this patch is that during a blocked FIFO packets
> > get discarded rather the piling up ?
> > 
> > Regards,
> > 
> > Hans
> 
> Probably I misunderstood Ilpo's comments.
> Yes, the only purpose is to avoid the warning messages (since users and QA doesn't like these messages).
> It is only for networking packet, which seems reasonable to drop it if not able to complete the transmission in seconds.

Okay, makes much more sense now as I couldn't see anything that would have 
helped to resume from "stuck" or "failed" state.

I think the first paragraph of the commit message misleads the reader to 
think something is "stuck" or "failed". And since this doesn't exactly fix 
such an issue that is not there in the first place it leads to confusion.

Perhaps it would be good to try to rephrase the first paragraph such that 
it wouldn't hint something is stuck when there is only a delay spike in 
FIFO's consumer side (if I now understand the problem space correctly?).

-- 
 i.

> > >>> ---
> > >>>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67
> > >> ++++++++++++++++++++++++
> > >>>  1 file changed, 67 insertions(+)
> > >>>
> > >>> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> > >> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > >>> index 5c683b4eaf10..f39b7b9d2bfe 100644
> > >>> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> > >>> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > >>> @@ -47,6 +47,9 @@
> > >>>  /* Message with data needs at least two words (for header & data). */
> > >>>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
> > >>>
> > >>> +/* Tx timeout in milliseconds. */
> > >>> +#define TMFIFO_TX_TIMEOUT			2000
> > >>> +
> > >>>  /* ACPI UID for BlueField-3. */
> > >>>  #define TMFIFO_BF3_UID				1
> > >>>
> > >>> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
> > >>>   * @drop_desc: dummy desc for packet dropping
> > >>>   * @cur_len: processed length of the current descriptor
> > >>>   * @rem_len: remaining length of the pending packet
> > >>> + * @rem_padding: remaining bytes to send as paddings
> > >>>   * @pkt_len: total length of the pending packet
> > >>>   * @next_avail: next avail descriptor id
> > >>>   * @num: vring size (number of descriptors)
> > >>>   * @align: vring alignment size
> > >>>   * @index: vring index
> > >>>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> > >>> + * @tx_timeout: expire time of last tx packet
> > >>>   * @fifo: pointer to the tmfifo structure
> > >>>   */
> > >>>  struct mlxbf_tmfifo_vring {
> > >>> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
> > >>>  	struct vring_desc drop_desc;
> > >>>  	int cur_len;
> > >>>  	int rem_len;
> > >>> +	int rem_padding;
> > >>>  	u32 pkt_len;
> > >>>  	u16 next_avail;
> > >>>  	int num;
> > >>>  	int align;
> > >>>  	int index;
> > >>>  	int vdev_id;
> > >>> +	unsigned long tx_timeout;
> > >>>  	struct mlxbf_tmfifo *fifo;
> > >>>  };
> > >>>
> > >>> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct
> > >> mlxbf_tmfifo_vring *vring,
> > >>>  	return true;
> > >>>  }
> > >>>
> > >>> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring
> > >> *vring)
> > >>> +{
> > >>> +	unsigned long flags;
> > >>> +
> > >>> +	/* Only handle Tx timeout for network vdev. */
> > >>> +	if (vring->vdev_id != VIRTIO_ID_NET)
> > >>> +		return;
> > >>> +
> > >>> +	/* Initialize the timeout or return if not expired. */
> > >>> +	if (!vring->tx_timeout) {
> > >>> +		/* Initialize the timeout. */
> > >>> +		vring->tx_timeout = jiffies +
> > >>> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> > >>> +		return;
> > >>> +	} else if (time_before(jiffies, vring->tx_timeout)) {
> > >>> +		/* Return if not timeout yet. */
> > >>> +		return;
> > >>> +	}
> > >>> +
> > >>> +	/*
> > >>> +	 * Drop the packet after timeout. The outstanding packet is
> > >>> +	 * released and the remaining bytes will be sent with padding byte
> > >> 0x00
> > >>> +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> > >>> +	 * either dropped directly, or appended into existing outstanding
> > >> packet
> > >>> +	 * thus dropped as corrupted network packet.
> > >>> +	 */
> > >>> +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> > >>> +	mlxbf_tmfifo_release_pkt(vring);
> > >>> +	vring->cur_len = 0;
> > >>> +	vring->rem_len = 0;
> > >>> +	vring->fifo->vring[0] = NULL;
> > >>> +
> > >>> +	/*
> > >>> +	 * Make sure the load/store are in order before
> > >>> +	 * returning back to virtio.
> > >>> +	 */
> > >>> +	virtio_mb(false);
> > >>> +
> > >>> +	/* Notify upper layer. */
> > >>> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> > >>> +	vring_interrupt(0, vring->vq);
> > >>> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> > >>> +}
> > >>> +
> > >>>  /* Rx & Tx processing of a queue. */
> > >>>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> > >>>  {
> > >>> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct
> > >> mlxbf_tmfifo_vring *vring, bool is_rx)
> > >>>  		return;
> > >>>
> > >>>  	do {
> > >>> +retry:
> > >>>  		/* Get available FIFO space. */
> > >>>  		if (avail == 0) {
> > >>>  			if (is_rx)
> > >>> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct
> > >> mlxbf_tmfifo_vring *vring, bool is_rx)
> > >>>  				break;
> > >>>  		}
> > >>>
> > >>> +		/* Insert paddings for discarded Tx packet. */
> > >>> +		if (!is_rx) {
> > >>> +			vring->tx_timeout = 0;
> > >>> +			while (vring->rem_padding >= sizeof(u64)) {
> > >>> +				writeq(0, vring->fifo->tx.data);
> > >>> +				vring->rem_padding -= sizeof(u64);
> > >>> +				if (--avail == 0)
> > >>> +					goto retry;
> > >>> +			}
> > >>> +		}
> > >>> +
> > >>>  		/* Console output always comes from the Tx buffer. */
> > >>>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> > >>>  			mlxbf_tmfifo_console_tx(fifo, avail);
> > >>> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct
> > >> mlxbf_tmfifo_vring *vring, bool is_rx)
> > >>>  		/* Handle one descriptor. */
> > >>>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
> > >>>  	} while (more);
> > >>> +
> > >>> +	/* Check Tx timeout. */
> > >>> +	if (avail <= 0 && !is_rx)
> > >>> +		mlxbf_tmfifo_check_tx_timeout(vring);
> > >>>  }
> > >>>
> > >>>  /* Handle Rx or Tx queues. */
> > >>>
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
  2024-01-08 19:03           ` Ilpo Järvinen
@ 2024-01-08 20:02             ` Liming Sun
  0 siblings, 0 replies; 11+ messages in thread
From: Liming Sun @ 2024-01-08 20:02 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Hans de Goede, Vadim Pasternak, David Thompson, Mark Gross,
	Dan Carpenter, platform-driver-x86, LKML



> -----Original Message-----
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Sent: Monday, January 8, 2024 2:03 PM
> To: Liming Sun <limings@nvidia.com>
> Cc: Hans de Goede <hdegoede@redhat.com>; Vadim Pasternak
> <vadimp@nvidia.com>; David Thompson <davthompson@nvidia.com>; Mark
> Gross <markgross@kernel.org>; Dan Carpenter <dan.carpenter@linaro.org>;
> platform-driver-x86@vger.kernel.org; LKML <linux-kernel@vger.kernel.org>
> Subject: RE: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> 
> On Mon, 8 Jan 2024, Liming Sun wrote:
> > > -----Original Message-----
> > > From: Hans de Goede <hdegoede@redhat.com>
> > > Sent: Monday, January 8, 2024 9:23 AM
> > > To: Liming Sun <limings@nvidia.com>; Ilpo Järvinen
> > > <ilpo.jarvinen@linux.intel.com>
> > > Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> > > <davthompson@nvidia.com>; Mark Gross <markgross@kernel.org>; Dan
> > > Carpenter <dan.carpenter@linaro.org>; platform-driver-
> x86@vger.kernel.org;
> > > LKML <linux-kernel@vger.kernel.org>
> > > Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is full
> > >
> > > Hi,
> > >
> > > On 1/5/24 18:40, Liming Sun wrote:
> > > >
> > > >
> > > >> -----Original Message-----
> > > >> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > > >> Sent: Thursday, January 4, 2024 12:39 PM
> > > >> To: Liming Sun <limings@nvidia.com>
> > > >> Cc: Vadim Pasternak <vadimp@nvidia.com>; David Thompson
> > > >> <davthompson@nvidia.com>; Hans de Goede
> <hdegoede@redhat.com>;
> > > >> Mark Gross <markgross@kernel.org>; Dan Carpenter
> > > >> <dan.carpenter@linaro.org>; platform-driver-x86@vger.kernel.org;
> LKML
> > > >> <linux-kernel@vger.kernel.org>
> > > >> Subject: Re: [PATCH v2 1/1] Drop Tx network packet when Tx TmFIFO is
> full
> > > >>
> > > >> On Thu, 4 Jan 2024, Liming Sun wrote:
> > > >>
> > > >>> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> > > >>> into the virtio_net driver which prints the "Tx timeout" message
> > > >>> when a packet is stuck in Tx queue for too long which could happen
> > > >>> when external host driver is stuck or stopped and failed to read
> > > >>> the FIFO.
> > > >>>
> > > >>> Below is an example of the reported message:
> > > >>>
> > > >>> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> > > >>> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> > > >>> last trans: 3079892256".
> > > >>>
> > > >>> To avoid such "Tx timeout" messages, this commit adds a timeout
> > > >>> mechanism to drop and release the pending Tx packet if not able to
> > > >>> transmit for two seconds due to Tx FIFO full.
> > > >>>
> > > >>> This commit also handles the special case that the packet is half-
> > > >>> transmitted into the Tx FIFO. In such case, the packet is discarded
> > > >>> with remaining length stored in vring->rem_padding. So paddings with
> > > >>> zeros can be sent out when Tx space is available to maintain the
> > > >>> integrity of the packet format. The padded packet will be dropped on
> > > >>> the receiving side.
> > > >>>
> > > >>> Signed-off-by: Liming Sun <limings@nvidia.com>
> > > >>
> > > >> This doesn't really explain how it helps (other than avoiding the
> > > >> message which sounds like just hiding the issue). That is, how this helps
> > > >> to resume Tx? Or does Tx resume? There's nothing to indicate either
> way.
> > > >
> > > > As the commit message mentioned, the expired packet is discarded and
> the
> > > > packet buffer is released (see changes of calling
> mlxbf_tmfifo_release_pkt()).
> > > > The Tx will resume automatically once the FIFO space is available, such as
> > > when
> > > > external host driver starts to drain the TMFIFO. No need for any other
> logic.
> > >
> > > Hmm, it seems to me that the same (resuming on FIFO space available)
> > > will happen without this patch ?
> >
> > Yes, it's true.
> >
> > >
> > > So as Ilpo mentioned the only purpose here seems to be to avoid the
> warning
> > > getting logged? And things work properly without this too ?
> > >
> > > I guess the advantage of this patch is that during a blocked FIFO packets
> > > get discarded rather the piling up ?
> > >
> > > Regards,
> > >
> > > Hans
> >
> > Probably I misunderstood Ilpo's comments.
> > Yes, the only purpose is to avoid the warning messages (since users and QA
> doesn't like these messages).
> > It is only for networking packet, which seems reasonable to drop it if not able
> to complete the transmission in seconds.
> 
> Okay, makes much more sense now as I couldn't see anything that would have
> helped to resume from "stuck" or "failed" state.
> 
> I think the first paragraph of the commit message misleads the reader to
> think something is "stuck" or "failed". And since this doesn't exactly fix
> such an issue that is not there in the first place it leads to confusion.
> 
> Perhaps it would be good to try to rephrase the first paragraph such that
> it wouldn't hint something is stuck when there is only a delay spike in
> FIFO's consumer side (if I now understand the problem space correctly?).

This problem could happen in several conditions:

1. The host driver (the consumer side of the FIFO) is slow.
2. The host driver (the consumer side of the FIFO) is stuck or stopped (not running).

There is no keepalive mechanism yet (which could be added later). So for case #2,
it'll be packet stuck in the FIFO.

Sure, I could rephrase the first paragraph to avoid the 'stuck' related misleading.

> 
> --
>  i.
> 
> > > >>> ---
> > > >>>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67
> > > >> ++++++++++++++++++++++++
> > > >>>  1 file changed, 67 insertions(+)
> > > >>>
> > > >>> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> > > >> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > > >>> index 5c683b4eaf10..f39b7b9d2bfe 100644
> > > >>> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> > > >>> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > > >>> @@ -47,6 +47,9 @@
> > > >>>  /* Message with data needs at least two words (for header & data). */
> > > >>>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
> > > >>>
> > > >>> +/* Tx timeout in milliseconds. */
> > > >>> +#define TMFIFO_TX_TIMEOUT			2000
> > > >>> +
> > > >>>  /* ACPI UID for BlueField-3. */
> > > >>>  #define TMFIFO_BF3_UID				1
> > > >>>
> > > >>> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
> > > >>>   * @drop_desc: dummy desc for packet dropping
> > > >>>   * @cur_len: processed length of the current descriptor
> > > >>>   * @rem_len: remaining length of the pending packet
> > > >>> + * @rem_padding: remaining bytes to send as paddings
> > > >>>   * @pkt_len: total length of the pending packet
> > > >>>   * @next_avail: next avail descriptor id
> > > >>>   * @num: vring size (number of descriptors)
> > > >>>   * @align: vring alignment size
> > > >>>   * @index: vring index
> > > >>>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> > > >>> + * @tx_timeout: expire time of last tx packet
> > > >>>   * @fifo: pointer to the tmfifo structure
> > > >>>   */
> > > >>>  struct mlxbf_tmfifo_vring {
> > > >>> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
> > > >>>  	struct vring_desc drop_desc;
> > > >>>  	int cur_len;
> > > >>>  	int rem_len;
> > > >>> +	int rem_padding;
> > > >>>  	u32 pkt_len;
> > > >>>  	u16 next_avail;
> > > >>>  	int num;
> > > >>>  	int align;
> > > >>>  	int index;
> > > >>>  	int vdev_id;
> > > >>> +	unsigned long tx_timeout;
> > > >>>  	struct mlxbf_tmfifo *fifo;
> > > >>>  };
> > > >>>
> > > >>> @@ -819,6 +826,50 @@ static bool
> mlxbf_tmfifo_rxtx_one_desc(struct
> > > >> mlxbf_tmfifo_vring *vring,
> > > >>>  	return true;
> > > >>>  }
> > > >>>
> > > >>> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring
> > > >> *vring)
> > > >>> +{
> > > >>> +	unsigned long flags;
> > > >>> +
> > > >>> +	/* Only handle Tx timeout for network vdev. */
> > > >>> +	if (vring->vdev_id != VIRTIO_ID_NET)
> > > >>> +		return;
> > > >>> +
> > > >>> +	/* Initialize the timeout or return if not expired. */
> > > >>> +	if (!vring->tx_timeout) {
> > > >>> +		/* Initialize the timeout. */
> > > >>> +		vring->tx_timeout = jiffies +
> > > >>> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> > > >>> +		return;
> > > >>> +	} else if (time_before(jiffies, vring->tx_timeout)) {
> > > >>> +		/* Return if not timeout yet. */
> > > >>> +		return;
> > > >>> +	}
> > > >>> +
> > > >>> +	/*
> > > >>> +	 * Drop the packet after timeout. The outstanding packet is
> > > >>> +	 * released and the remaining bytes will be sent with padding
> byte
> > > >> 0x00
> > > >>> +	 * as a recovery. On the peer(host) side, the padding bytes
> 0x00 will be
> > > >>> +	 * either dropped directly, or appended into existing
> outstanding
> > > >> packet
> > > >>> +	 * thus dropped as corrupted network packet.
> > > >>> +	 */
> > > >>> +	vring->rem_padding = round_up(vring->rem_len,
> sizeof(u64));
> > > >>> +	mlxbf_tmfifo_release_pkt(vring);
> > > >>> +	vring->cur_len = 0;
> > > >>> +	vring->rem_len = 0;
> > > >>> +	vring->fifo->vring[0] = NULL;
> > > >>> +
> > > >>> +	/*
> > > >>> +	 * Make sure the load/store are in order before
> > > >>> +	 * returning back to virtio.
> > > >>> +	 */
> > > >>> +	virtio_mb(false);
> > > >>> +
> > > >>> +	/* Notify upper layer. */
> > > >>> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> > > >>> +	vring_interrupt(0, vring->vq);
> > > >>> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> > > >>> +}
> > > >>> +
> > > >>>  /* Rx & Tx processing of a queue. */
> > > >>>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool
> is_rx)
> > > >>>  {
> > > >>> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct
> > > >> mlxbf_tmfifo_vring *vring, bool is_rx)
> > > >>>  		return;
> > > >>>
> > > >>>  	do {
> > > >>> +retry:
> > > >>>  		/* Get available FIFO space. */
> > > >>>  		if (avail == 0) {
> > > >>>  			if (is_rx)
> > > >>> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct
> > > >> mlxbf_tmfifo_vring *vring, bool is_rx)
> > > >>>  				break;
> > > >>>  		}
> > > >>>
> > > >>> +		/* Insert paddings for discarded Tx packet. */
> > > >>> +		if (!is_rx) {
> > > >>> +			vring->tx_timeout = 0;
> > > >>> +			while (vring->rem_padding >= sizeof(u64)) {
> > > >>> +				writeq(0, vring->fifo->tx.data);
> > > >>> +				vring->rem_padding -= sizeof(u64);
> > > >>> +				if (--avail == 0)
> > > >>> +					goto retry;
> > > >>> +			}
> > > >>> +		}
> > > >>> +
> > > >>>  		/* Console output always comes from the Tx buffer. */
> > > >>>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> > > >>>  			mlxbf_tmfifo_console_tx(fifo, avail);
> > > >>> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct
> > > >> mlxbf_tmfifo_vring *vring, bool is_rx)
> > > >>>  		/* Handle one descriptor. */
> > > >>>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx,
> &avail);
> > > >>>  	} while (more);
> > > >>> +
> > > >>> +	/* Check Tx timeout. */
> > > >>> +	if (avail <= 0 && !is_rx)
> > > >>> +		mlxbf_tmfifo_check_tx_timeout(vring);
> > > >>>  }
> > > >>>
> > > >>>  /* Handle Rx or Tx queues. */
> > > >>>
> >
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] Drop Tx network packet when Tx TmFIFO is full
  2024-01-04 15:04 [PATCH v1 1/1] Drop Tx network packet when Tx TmFIFO is full Liming Sun
  2024-01-04 15:14 ` [PATCH v2 " Liming Sun
@ 2024-01-11 17:31 ` Liming Sun
  2024-01-12 15:21   ` Ilpo Järvinen
  2024-01-22 11:24   ` Hans de Goede
  1 sibling, 2 replies; 11+ messages in thread
From: Liming Sun @ 2024-01-11 17:31 UTC (permalink / raw)
  To: Vadim Pasternak, David Thompson, Hans de Goede, Mark Gross,
	Dan Carpenter
  Cc: Liming Sun, platform-driver-x86, linux-kernel

Starting from Linux 5.16 kernel, Tx timeout mechanism was added
in the virtio_net driver which prints the "Tx timeout" warning
message when a packet stays in Tx queue for too long. Below is an
example of the reported message:

"[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
last trans: 3079892256".

This issue could happen when external host driver which drains the
FIFO is restared, stopped or upgraded. To avoid such confusing
"Tx timeout" messages, this commit adds logic to drop the outstanding
Tx packet if it's not able to transmit in two seconds due to Tx FIFO
full, which can be considered as congestion or out-of-resource drop.

This commit also handles the special case that the packet is half-
transmitted into the Tx FIFO. In such case, the packet is discarded
with remaining length stored in vring->rem_padding. So paddings with
zeros can be sent out when Tx space is available to maintain the
integrity of the packet format. The padded packet will be dropped on
the receiving side.

Signed-off-by: Liming Sun <limings@nvidia.com>
---
v2->v3:
  Updates for Ilpo's comments:
  - Revises commit message to avoid confusion.
v2: Fixed formatting warning
v1: Initial version
---
 drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index 5c683b4eaf10..f39b7b9d2bfe 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -47,6 +47,9 @@
 /* Message with data needs at least two words (for header & data). */
 #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
 
+/* Tx timeout in milliseconds. */
+#define TMFIFO_TX_TIMEOUT			2000
+
 /* ACPI UID for BlueField-3. */
 #define TMFIFO_BF3_UID				1
 
@@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
  * @drop_desc: dummy desc for packet dropping
  * @cur_len: processed length of the current descriptor
  * @rem_len: remaining length of the pending packet
+ * @rem_padding: remaining bytes to send as paddings
  * @pkt_len: total length of the pending packet
  * @next_avail: next avail descriptor id
  * @num: vring size (number of descriptors)
  * @align: vring alignment size
  * @index: vring index
  * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @tx_timeout: expire time of last tx packet
  * @fifo: pointer to the tmfifo structure
  */
 struct mlxbf_tmfifo_vring {
@@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
 	struct vring_desc drop_desc;
 	int cur_len;
 	int rem_len;
+	int rem_padding;
 	u32 pkt_len;
 	u16 next_avail;
 	int num;
 	int align;
 	int index;
 	int vdev_id;
+	unsigned long tx_timeout;
 	struct mlxbf_tmfifo *fifo;
 };
 
@@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
 	return true;
 }
 
+static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
+{
+	unsigned long flags;
+
+	/* Only handle Tx timeout for network vdev. */
+	if (vring->vdev_id != VIRTIO_ID_NET)
+		return;
+
+	/* Initialize the timeout or return if not expired. */
+	if (!vring->tx_timeout) {
+		/* Initialize the timeout. */
+		vring->tx_timeout = jiffies +
+			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
+		return;
+	} else if (time_before(jiffies, vring->tx_timeout)) {
+		/* Return if not timeout yet. */
+		return;
+	}
+
+	/*
+	 * Drop the packet after timeout. The outstanding packet is
+	 * released and the remaining bytes will be sent with padding byte 0x00
+	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
+	 * either dropped directly, or appended into existing outstanding packet
+	 * thus dropped as corrupted network packet.
+	 */
+	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
+	mlxbf_tmfifo_release_pkt(vring);
+	vring->cur_len = 0;
+	vring->rem_len = 0;
+	vring->fifo->vring[0] = NULL;
+
+	/*
+	 * Make sure the load/store are in order before
+	 * returning back to virtio.
+	 */
+	virtio_mb(false);
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
+}
+
 /* Rx & Tx processing of a queue. */
 static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 {
@@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 		return;
 
 	do {
+retry:
 		/* Get available FIFO space. */
 		if (avail == 0) {
 			if (is_rx)
@@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 				break;
 		}
 
+		/* Insert paddings for discarded Tx packet. */
+		if (!is_rx) {
+			vring->tx_timeout = 0;
+			while (vring->rem_padding >= sizeof(u64)) {
+				writeq(0, vring->fifo->tx.data);
+				vring->rem_padding -= sizeof(u64);
+				if (--avail == 0)
+					goto retry;
+			}
+		}
+
 		/* Console output always comes from the Tx buffer. */
 		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
 			mlxbf_tmfifo_console_tx(fifo, avail);
@@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
 		/* Handle one descriptor. */
 		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
 	} while (more);
+
+	/* Check Tx timeout. */
+	if (avail <= 0 && !is_rx)
+		mlxbf_tmfifo_check_tx_timeout(vring);
 }
 
 /* Handle Rx or Tx queues. */
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] Drop Tx network packet when Tx TmFIFO is full
  2024-01-11 17:31 ` [PATCH v3] " Liming Sun
@ 2024-01-12 15:21   ` Ilpo Järvinen
  2024-01-22 11:24   ` Hans de Goede
  1 sibling, 0 replies; 11+ messages in thread
From: Ilpo Järvinen @ 2024-01-12 15:21 UTC (permalink / raw)
  To: Liming Sun
  Cc: Vadim Pasternak, David Thompson, Hans de Goede, Mark Gross,
	Dan Carpenter, platform-driver-x86, LKML

[-- Attachment #1: Type: text/plain, Size: 1523 bytes --]

On Thu, 11 Jan 2024, Liming Sun wrote:

> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> in the virtio_net driver which prints the "Tx timeout" warning
> message when a packet stays in Tx queue for too long. Below is an
> example of the reported message:
> 
> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> last trans: 3079892256".
> 
> This issue could happen when external host driver which drains the
> FIFO is restared, stopped or upgraded. To avoid such confusing
> "Tx timeout" messages, this commit adds logic to drop the outstanding
> Tx packet if it's not able to transmit in two seconds due to Tx FIFO
> full, which can be considered as congestion or out-of-resource drop.
> 
> This commit also handles the special case that the packet is half-
> transmitted into the Tx FIFO. In such case, the packet is discarded
> with remaining length stored in vring->rem_padding. So paddings with
> zeros can be sent out when Tx space is available to maintain the
> integrity of the packet format. The padded packet will be dropped on
> the receiving side.
> 
> Signed-off-by: Liming Sun <limings@nvidia.com>
> ---
> v2->v3:
>   Updates for Ilpo's comments:
>   - Revises commit message to avoid confusion.
> v2: Fixed formatting warning
> v1: Initial version

Thanks, the commit message makes much more sense now!

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>

-- 
 i.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] Drop Tx network packet when Tx TmFIFO is full
  2024-01-11 17:31 ` [PATCH v3] " Liming Sun
  2024-01-12 15:21   ` Ilpo Järvinen
@ 2024-01-22 11:24   ` Hans de Goede
  1 sibling, 0 replies; 11+ messages in thread
From: Hans de Goede @ 2024-01-22 11:24 UTC (permalink / raw)
  To: Liming Sun, Vadim Pasternak, David Thompson, Mark Gross, Dan Carpenter
  Cc: platform-driver-x86, linux-kernel

Hi,

On 1/11/24 18:31, Liming Sun wrote:
> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> in the virtio_net driver which prints the "Tx timeout" warning
> message when a packet stays in Tx queue for too long. Below is an
> example of the reported message:
> 
> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> last trans: 3079892256".
> 
> This issue could happen when external host driver which drains the
> FIFO is restared, stopped or upgraded. To avoid such confusing
> "Tx timeout" messages, this commit adds logic to drop the outstanding
> Tx packet if it's not able to transmit in two seconds due to Tx FIFO
> full, which can be considered as congestion or out-of-resource drop.
> 
> This commit also handles the special case that the packet is half-
> transmitted into the Tx FIFO. In such case, the packet is discarded
> with remaining length stored in vring->rem_padding. So paddings with
> zeros can be sent out when Tx space is available to maintain the
> integrity of the packet format. The padded packet will be dropped on
> the receiving side.
> 
> Signed-off-by: Liming Sun <limings@nvidia.com>

Thank you for your patch/series, I've applied this patch
(series) to my review-hans branch:
https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans

Note it will show up in the pdx86 review-hans branch once I've
pushed my local branch there, which might take a while.

I will include this patch in my next fixes pull-req to Linus
for the current kernel development cycle.

Regards,

Hans




> ---
> v2->v3:
>   Updates for Ilpo's comments:
>   - Revises commit message to avoid confusion.
> v2: Fixed formatting warning
> v1: Initial version
> ---
>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
> index 5c683b4eaf10..f39b7b9d2bfe 100644
> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -47,6 +47,9 @@
>  /* Message with data needs at least two words (for header & data). */
>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
>  
> +/* Tx timeout in milliseconds. */
> +#define TMFIFO_TX_TIMEOUT			2000
> +
>  /* ACPI UID for BlueField-3. */
>  #define TMFIFO_BF3_UID				1
>  
> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
>   * @drop_desc: dummy desc for packet dropping
>   * @cur_len: processed length of the current descriptor
>   * @rem_len: remaining length of the pending packet
> + * @rem_padding: remaining bytes to send as paddings
>   * @pkt_len: total length of the pending packet
>   * @next_avail: next avail descriptor id
>   * @num: vring size (number of descriptors)
>   * @align: vring alignment size
>   * @index: vring index
>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> + * @tx_timeout: expire time of last tx packet
>   * @fifo: pointer to the tmfifo structure
>   */
>  struct mlxbf_tmfifo_vring {
> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
>  	struct vring_desc drop_desc;
>  	int cur_len;
>  	int rem_len;
> +	int rem_padding;
>  	u32 pkt_len;
>  	u16 next_avail;
>  	int num;
>  	int align;
>  	int index;
>  	int vdev_id;
> +	unsigned long tx_timeout;
>  	struct mlxbf_tmfifo *fifo;
>  };
>  
> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
>  	return true;
>  }
>  
> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
> +{
> +	unsigned long flags;
> +
> +	/* Only handle Tx timeout for network vdev. */
> +	if (vring->vdev_id != VIRTIO_ID_NET)
> +		return;
> +
> +	/* Initialize the timeout or return if not expired. */
> +	if (!vring->tx_timeout) {
> +		/* Initialize the timeout. */
> +		vring->tx_timeout = jiffies +
> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> +		return;
> +	} else if (time_before(jiffies, vring->tx_timeout)) {
> +		/* Return if not timeout yet. */
> +		return;
> +	}
> +
> +	/*
> +	 * Drop the packet after timeout. The outstanding packet is
> +	 * released and the remaining bytes will be sent with padding byte 0x00
> +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> +	 * either dropped directly, or appended into existing outstanding packet
> +	 * thus dropped as corrupted network packet.
> +	 */
> +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> +	mlxbf_tmfifo_release_pkt(vring);
> +	vring->cur_len = 0;
> +	vring->rem_len = 0;
> +	vring->fifo->vring[0] = NULL;
> +
> +	/*
> +	 * Make sure the load/store are in order before
> +	 * returning back to virtio.
> +	 */
> +	virtio_mb(false);
> +
> +	/* Notify upper layer. */
> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> +	vring_interrupt(0, vring->vq);
> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> +}
> +
>  /* Rx & Tx processing of a queue. */
>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  {
> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  		return;
>  
>  	do {
> +retry:
>  		/* Get available FIFO space. */
>  		if (avail == 0) {
>  			if (is_rx)
> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  				break;
>  		}
>  
> +		/* Insert paddings for discarded Tx packet. */
> +		if (!is_rx) {
> +			vring->tx_timeout = 0;
> +			while (vring->rem_padding >= sizeof(u64)) {
> +				writeq(0, vring->fifo->tx.data);
> +				vring->rem_padding -= sizeof(u64);
> +				if (--avail == 0)
> +					goto retry;
> +			}
> +		}
> +
>  		/* Console output always comes from the Tx buffer. */
>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
>  			mlxbf_tmfifo_console_tx(fifo, avail);
> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  		/* Handle one descriptor. */
>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
>  	} while (more);
> +
> +	/* Check Tx timeout. */
> +	if (avail <= 0 && !is_rx)
> +		mlxbf_tmfifo_check_tx_timeout(vring);
>  }
>  
>  /* Handle Rx or Tx queues. */


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-01-22 11:24 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-04 15:04 [PATCH v1 1/1] Drop Tx network packet when Tx TmFIFO is full Liming Sun
2024-01-04 15:14 ` [PATCH v2 " Liming Sun
2024-01-04 17:38   ` Ilpo Järvinen
2024-01-05 17:40     ` Liming Sun
2024-01-08 14:23       ` Hans de Goede
2024-01-08 17:27         ` Liming Sun
2024-01-08 19:03           ` Ilpo Järvinen
2024-01-08 20:02             ` Liming Sun
2024-01-11 17:31 ` [PATCH v3] " Liming Sun
2024-01-12 15:21   ` Ilpo Järvinen
2024-01-22 11:24   ` Hans de Goede

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.