linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nvme-tcp: strict pdu pacing to avoid send stalls on TLS
@ 2024-04-17 15:39 Hannes Reinecke
  2024-04-18  8:01 ` Sagi Grimberg
  0 siblings, 1 reply; 3+ messages in thread
From: Hannes Reinecke @ 2024-04-17 15:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

TLS requires a strict pdu pacing via MSG_EOR to signal the end
of a record and subsequent encryption. If we do not set MSG_EOR
at the end of a sequence the record won't be closed, encryption
doesn't start, and we end up with a send stall as the message
will never be passed on to the TCP layer.
So do not check for the queue status when figuring out whether
MSG_MORE should be set but rather make it dependent on the current
command only.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/tcp.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2b821cbbdf1f..b460ebf72a1a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1049,7 +1049,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		int req_data_sent = req->data_sent;
 		int ret;
 
-		if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
+		if (last && !queue->data_digest)
 			msg.msg_flags |= MSG_EOR;
 		else
 			msg.msg_flags |= MSG_MORE;
@@ -1105,7 +1105,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	int len = sizeof(*pdu) + hdgst - req->offset;
 	int ret;
 
-	if (inline_data || nvme_tcp_queue_more(queue))
+	if (inline_data)
 		msg.msg_flags |= MSG_MORE;
 	else
 		msg.msg_flags |= MSG_EOR;
@@ -1175,17 +1175,12 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
 	size_t offset = req->offset;
 	u32 h2cdata_left = req->h2cdata_left;
 	int ret;
-	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
 	struct kvec iov = {
 		.iov_base = (u8 *)&req->ddgst + req->offset,
 		.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
 	};
 
-	if (nvme_tcp_queue_more(queue))
-		msg.msg_flags |= MSG_MORE;
-	else
-		msg.msg_flags |= MSG_EOR;
-
 	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
 	if (unlikely(ret <= 0))
 		return ret;
-- 
2.35.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] nvme-tcp: strict pdu pacing to avoid send stalls on TLS
  2024-04-17 15:39 [PATCH] nvme-tcp: strict pdu pacing to avoid send stalls on TLS Hannes Reinecke
@ 2024-04-18  8:01 ` Sagi Grimberg
  2024-04-18  9:05   ` Hannes Reinecke
  0 siblings, 1 reply; 3+ messages in thread
From: Sagi Grimberg @ 2024-04-18  8:01 UTC (permalink / raw)
  To: Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme



On 17/04/2024 18:39, Hannes Reinecke wrote:
> TLS requires a strict pdu pacing via MSG_EOR to signal the end
> of a record and subsequent encryption. If we do not set MSG_EOR
> at the end of a sequence the record won't be closed, encryption
> doesn't start, and we end up with a send stall as the message
> will never be passed on to the TCP layer.
> So do not check for the queue status when figuring out whether
> MSG_MORE should be set but rather make it dependent on the current
> command only.

How about making nvme_tcp_queue_more take into account nvme_tcp_tls()?
so we preserve the behavior without tls.

i.e. something like:
--
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 0ba62fc647b3..bbffc67f8a1e 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -360,12 +360,18 @@ static inline void nvme_tcp_send_all(struct 
nvme_tcp_queue *queue)
         } while (ret > 0);
  }

-static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue)
  {
         return !list_empty(&queue->send_list) ||
                 !llist_empty(&queue->req_list);
  }

+static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+{
+       return !nvme_tcp_tls(queue->ctrl) &&
+               nvme_tcp_queue_has_pending(queue);
+}
+
  static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
                 bool sync, bool last)
  {
@@ -386,7 +392,7 @@ static inline void nvme_tcp_queue_request(struct 
nvme_tcp_request *req,
                 mutex_unlock(&queue->send_mutex);
         }

-       if (last && nvme_tcp_queue_more(queue))
+       if (last && nvme_tcp_queue_has_pending(queue))
                 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
  }
--


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] nvme-tcp: strict pdu pacing to avoid send stalls on TLS
  2024-04-18  8:01 ` Sagi Grimberg
@ 2024-04-18  9:05   ` Hannes Reinecke
  0 siblings, 0 replies; 3+ messages in thread
From: Hannes Reinecke @ 2024-04-18  9:05 UTC (permalink / raw)
  To: Sagi Grimberg, Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme

On 4/18/24 10:01, Sagi Grimberg wrote:
> 
> 
> On 17/04/2024 18:39, Hannes Reinecke wrote:
>> TLS requires a strict pdu pacing via MSG_EOR to signal the end
>> of a record and subsequent encryption. If we do not set MSG_EOR
>> at the end of a sequence the record won't be closed, encryption
>> doesn't start, and we end up with a send stall as the message
>> will never be passed on to the TCP layer.
>> So do not check for the queue status when figuring out whether
>> MSG_MORE should be set but rather make it dependent on the current
>> command only.
> 
> How about making nvme_tcp_queue_more take into account nvme_tcp_tls()?
> so we preserve the behavior without tls.
> 
> i.e. something like:
> -- 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 0ba62fc647b3..bbffc67f8a1e 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -360,12 +360,18 @@ static inline void nvme_tcp_send_all(struct 
> nvme_tcp_queue *queue)
>          } while (ret > 0);
>   }
> 
> -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
> +static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue 
> *queue)
>   {
>          return !list_empty(&queue->send_list) ||
>                  !llist_empty(&queue->req_list);
>   }
> 
> +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
> +{
> +       return !nvme_tcp_tls(queue->ctrl) &&
> +               nvme_tcp_queue_has_pending(queue);
> +}
> +
>   static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
>                  bool sync, bool last)
>   {
> @@ -386,7 +392,7 @@ static inline void nvme_tcp_queue_request(struct 
> nvme_tcp_request *req,
>                  mutex_unlock(&queue->send_mutex);
>          }
> 
> -       if (last && nvme_tcp_queue_more(queue))
> +       if (last && nvme_tcp_queue_has_pending(queue))
>                  queue_work_on(queue->io_cpu, nvme_tcp_wq, 
> &queue->io_work);
>   }
> -- 
> 
Would work as well, I guess.
I'll give it a go.

Cheers,

Hannes



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-04-18  9:06 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-17 15:39 [PATCH] nvme-tcp: strict pdu pacing to avoid send stalls on TLS Hannes Reinecke
2024-04-18  8:01 ` Sagi Grimberg
2024-04-18  9:05   ` Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).