kernel-tls-handshake.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Chuck Lever <cel@kernel.org>,
	kernel-tls-handshake@lists.linux.dev,
	Hannes Reinecke <hare@suse.de>
Subject: [PATCH 05/18] nvme-tcp: implement recvmsg rx flow for TLS
Date: Tue, 21 Mar 2023 13:43:12 +0100	[thread overview]
Message-ID: <20230321124325.77385-6-hare@suse.de> (raw)
In-Reply-To: <20230321124325.77385-1-hare@suse.de>

TLS offload only implements recvmsg(), so implement the receive
side with using recvmsg().

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/tcp.c | 156 ++++++++++++++++++++--------------------
 1 file changed, 77 insertions(+), 79 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 42c0598c31f2..0e14b1b90855 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -529,7 +529,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
 	queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) +
 				nvme_tcp_hdgst_len(queue);
 	queue->pdu_offset = 0;
-	queue->data_remaining = -1;
+	queue->data_remaining = 0;
 	queue->ddgst_remaining = 0;
 }
 
@@ -707,25 +707,32 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
 	return 0;
 }
 
-static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
-		unsigned int *offset, size_t *len)
+static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, bool pending)
 {
 	struct nvme_tcp_hdr *hdr;
-	char *pdu = queue->pdu;
-	size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining);
+	size_t rcv_len = queue->pdu_remaining;
+	struct msghdr msg = {
+		.msg_flags = pending ? 0 : MSG_DONTWAIT,
+	};
+	struct kvec iov = {
+		.iov_base = (u8 *)queue->pdu + queue->pdu_offset,
+		.iov_len = rcv_len,
+	};
 	int ret;
 
-	ret = skb_copy_bits(skb, *offset,
-		&pdu[queue->pdu_offset], rcv_len);
-	if (unlikely(ret))
+	if (nvme_tcp_recv_state(queue) != NVME_TCP_RECV_PDU)
+		return 0;
+
+	ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+			     iov.iov_len, msg.msg_flags);
+	if (ret <= 0)
 		return ret;
 
+	rcv_len = ret;
 	queue->pdu_remaining -= rcv_len;
 	queue->pdu_offset += rcv_len;
-	*offset += rcv_len;
-	*len -= rcv_len;
 	if (queue->pdu_remaining)
-		return 0;
+		return queue->pdu_remaining;
 
 	hdr = queue->pdu;
 	if (queue->hdr_digest) {
@@ -734,7 +741,6 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 			return ret;
 	}
 
-
 	if (queue->data_digest) {
 		ret = nvme_tcp_check_ddgst(queue, queue->pdu);
 		if (unlikely(ret))
@@ -765,19 +771,21 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
 		nvme_complete_rq(rq);
 }
 
-static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
-			      unsigned int *offset, size_t *len)
+static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue)
 {
 	struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
 	struct request *rq =
 		nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 
+	if (nvme_tcp_recv_state(queue) != NVME_TCP_RECV_DATA)
+		return 0;
+
 	while (true) {
-		int recv_len, ret;
+		struct msghdr msg;
+		int ret;
 
-		recv_len = min_t(size_t, *len, queue->data_remaining);
-		if (!recv_len)
+		if (!queue->data_remaining)
 			break;
 
 		if (!iov_iter_count(&req->iter)) {
@@ -798,25 +806,20 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		}
 
 		/* we can read only from what is left in this bio */
-		recv_len = min_t(size_t, recv_len,
-				iov_iter_count(&req->iter));
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_iter = req->iter;
 
-		if (queue->data_digest)
-			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
-				&req->iter, recv_len, queue->rcv_hash);
-		else
-			ret = skb_copy_datagram_iter(skb, *offset,
-					&req->iter, recv_len);
-		if (ret) {
+		ret = sock_recvmsg(queue->sock, &msg, 0);
+		if (ret <= 0) {
 			dev_err(queue->ctrl->ctrl.device,
-				"queue %d failed to copy request %#x data",
+				"queue %d failed to receive request %#x data",
 				nvme_tcp_queue_id(queue), rq->tag);
 			return ret;
 		}
 
-		*len -= recv_len;
-		*offset += recv_len;
-		queue->data_remaining -= recv_len;
+		queue->data_remaining -= ret;
+		if (queue->data_remaining)
+			nvme_tcp_advance_req(req, ret);
 	}
 
 	if (!queue->data_remaining) {
@@ -833,27 +836,36 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		}
 	}
 
-	return 0;
+	return queue->data_remaining;
 }
 
-static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
-		struct sk_buff *skb, unsigned int *offset, size_t *len)
+static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue)
 {
 	struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
 	char *ddgst = (char *)&queue->recv_ddgst;
-	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
+	size_t recv_len = queue->ddgst_remaining;
 	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
+	struct msghdr msg = {
+		.msg_flags = 0,
+	};
+	struct kvec iov = {
+		.iov_base = (u8 *)ddgst + off,
+		.iov_len = recv_len,
+	};
 	int ret;
 
-	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
-	if (unlikely(ret))
+	if (nvme_tcp_recv_state(queue) != NVME_TCP_RECV_DDGST)
+		return 0;
+
+	ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len,
+			     msg.msg_flags);
+	if (ret <= 0)
 		return ret;
 
+	recv_len = ret;
 	queue->ddgst_remaining -= recv_len;
-	*offset += recv_len;
-	*len -= recv_len;
 	if (queue->ddgst_remaining)
-		return 0;
+		return queue->ddgst_remaining;
 
 	if (queue->recv_ddgst != queue->exp_ddgst) {
 		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
@@ -881,37 +893,41 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 	return 0;
 }
 
-static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
-			     unsigned int offset, size_t len)
+static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue, bool pending)
 {
-	struct nvme_tcp_queue *queue = desc->arg.data;
-	size_t consumed = len;
 	int result;
+	int nr_cqe = queue->nr_cqe;
 
-	while (len) {
+	do {
 		switch (nvme_tcp_recv_state(queue)) {
 		case NVME_TCP_RECV_PDU:
-			result = nvme_tcp_recv_pdu(queue, skb, &offset, &len);
-			break;
+			result = nvme_tcp_recv_pdu(queue, pending);
+			if (result)
+				break;
+			fallthrough;
 		case NVME_TCP_RECV_DATA:
-			result = nvme_tcp_recv_data(queue, skb, &offset, &len);
-			break;
+			result = nvme_tcp_recv_data(queue);
+			if (result)
+				break;
+			fallthrough;
 		case NVME_TCP_RECV_DDGST:
-			result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len);
+			result = nvme_tcp_recv_ddgst(queue);
 			break;
 		default:
 			result = -EFAULT;
 		}
-		if (result) {
-			dev_err(queue->ctrl->ctrl.device,
-				"receive failed:  %d\n", result);
-			queue->rd_enabled = false;
-			nvme_tcp_error_recovery(&queue->ctrl->ctrl);
-			return result;
-		}
+		if (nr_cqe != queue->nr_cqe)
+			break;
+	} while (result >= 0);
+	if (result < 0 && result != -EAGAIN) {
+		dev_err(queue->ctrl->ctrl.device,
+			"receive failed: %d state %d %s\n",
+			result, nvme_tcp_recv_state(queue),
+			pending ? "pending" : "");
+		queue->rd_enabled = false;
+		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
 	}
-
-	return consumed;
+	return result < 0 ? result : (queue->nr_cqe - nr_cqe);
 }
 
 static void nvme_tcp_data_ready(struct sock *sk)
@@ -1203,22 +1219,6 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
 	return ret;
 }
 
-static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
-{
-	struct socket *sock = queue->sock;
-	struct sock *sk = sock->sk;
-	read_descriptor_t rd_desc;
-	int consumed;
-
-	rd_desc.arg.data = queue;
-	rd_desc.count = 1;
-	lock_sock(sk);
-	queue->nr_cqe = 0;
-	consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
-	release_sock(sk);
-	return consumed;
-}
-
 static void nvme_tcp_io_work(struct work_struct *w)
 {
 	struct nvme_tcp_queue *queue =
@@ -1232,13 +1232,11 @@ static void nvme_tcp_io_work(struct work_struct *w)
 		if (mutex_trylock(&queue->send_mutex)) {
 			result = nvme_tcp_try_send(queue);
 			mutex_unlock(&queue->send_mutex);
-			if (result > 0)
-				pending = true;
-			else if (unlikely(result < 0))
+			if (unlikely(result < 0))
 				break;
 		}
 
-		result = nvme_tcp_try_recv(queue);
+		result = nvme_tcp_try_recv(queue, pending);
 		if (result > 0)
 			pending = true;
 		else if (unlikely(result < 0))
@@ -2491,7 +2489,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 	set_bit(NVME_TCP_Q_POLLING, &queue->flags);
 	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
 		sk_busy_loop(sk, true);
-	nvme_tcp_try_recv(queue);
+	nvme_tcp_try_recv(queue, false);
 	clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
 	return queue->nr_cqe;
 }
-- 
2.35.3


  parent reply	other threads:[~2023-03-21 12:44 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-21 12:43 [RFC PATCH 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
2023-03-21 12:43 ` [PATCH 01/18] nvme-keyring: register '.nvme' keyring Hannes Reinecke
2023-03-21 13:50   ` Sagi Grimberg
2023-03-21 14:11     ` Hannes Reinecke
2023-03-21 12:43 ` [PATCH 02/18] nvme-keyring: define a 'psk' keytype Hannes Reinecke
2023-03-22  8:29   ` Sagi Grimberg
2023-03-22  8:38     ` Hannes Reinecke
2023-03-22  8:49       ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 03/18] nvme: add TCP TSAS definitions Hannes Reinecke
2023-03-21 13:46   ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 04/18] nvme-tcp: add definitions for TLS cipher suites Hannes Reinecke
2023-03-22  8:18   ` Sagi Grimberg
2023-03-21 12:43 ` Hannes Reinecke [this message]
2023-03-21 13:39   ` [PATCH 05/18] nvme-tcp: implement recvmsg rx flow for TLS Sagi Grimberg
2023-03-21 13:59     ` Hannes Reinecke
2023-03-22  8:01       ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 06/18] nvme-tcp: call 'queue->data_ready()' in nvme_tcp_data_ready() Hannes Reinecke
2023-03-21 13:44   ` Sagi Grimberg
2023-03-21 14:09     ` Hannes Reinecke
2023-03-22  0:18       ` Chris Leech
2023-03-22  6:59         ` Hannes Reinecke
2023-03-22  8:12           ` Sagi Grimberg
2023-03-22  8:08       ` Sagi Grimberg
2023-03-22  8:26         ` Hannes Reinecke
2023-03-22 10:13           ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 07/18] nvme/tcp: allocate socket file Hannes Reinecke
2023-03-21 13:52   ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 08/18] nvme-tcp: enable TLS handshake upcall Hannes Reinecke
2023-03-22  8:45   ` Sagi Grimberg
2023-03-22  9:12     ` Hannes Reinecke
2023-03-22 10:56       ` Sagi Grimberg
2023-03-22 12:54         ` Hannes Reinecke
2023-03-22 13:16           ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 09/18] nvme-tcp: add connect option 'tls' Hannes Reinecke
2023-03-22  9:24   ` Sagi Grimberg
2023-03-22  9:59     ` Hannes Reinecke
2023-03-22 10:09       ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 10/18] nvme-tcp: fixup send workflow for kTLS Hannes Reinecke
2023-03-22  9:31   ` Sagi Grimberg
2023-03-22 10:08     ` Hannes Reinecke
2023-03-22 11:18       ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 11/18] nvme-tcp: control message handling for recvmsg() Hannes Reinecke
2023-03-22 11:33   ` Sagi Grimberg
2023-03-22 11:48     ` Hannes Reinecke
2023-03-22 11:50       ` Sagi Grimberg
2023-03-22 12:17         ` Hannes Reinecke
2023-03-22 12:29           ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 12/18] nvmet: make TCP sectype settable via configfs Hannes Reinecke
2023-03-22 11:38   ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 13/18] nvmet-tcp: allocate socket file Hannes Reinecke
2023-03-22 11:46   ` Sagi Grimberg
2023-03-22 12:07     ` Hannes Reinecke
2023-03-21 12:43 ` [PATCH 14/18] security/keys: export key_lookup() Hannes Reinecke
2023-03-21 12:43 ` [PATCH 15/18] nvmet-tcp: enable TLS handshake upcall Hannes Reinecke
2023-03-22 12:13   ` Sagi Grimberg
2023-03-22 12:34     ` Hannes Reinecke
2023-03-22 12:51       ` Sagi Grimberg
2023-03-22 13:47         ` Hannes Reinecke
2023-03-22 15:42           ` Sagi Grimberg
2023-03-22 16:43             ` Hannes Reinecke
2023-03-22 16:49               ` Chuck Lever III
2023-03-23  7:21                 ` Sagi Grimberg
2023-03-24 11:29                   ` Hannes Reinecke
2023-03-26  7:18                     ` Sagi Grimberg
2023-03-27  6:20                       ` Hannes Reinecke
2023-03-28  8:44                         ` Sagi Grimberg
2023-03-28  9:20                           ` Hannes Reinecke
2023-03-28  9:43                             ` Sagi Grimberg
2023-03-28 10:04                               ` Hannes Reinecke
2023-03-28 13:22                           ` Chuck Lever III
2023-03-28 15:29                             ` Sagi Grimberg
2023-03-28 15:56                               ` Chuck Lever III
2023-03-29  6:33                                 ` Sagi Grimberg
2023-03-23  7:44               ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 16/18] nvmet-tcp: rework sendpage for kTLS Hannes Reinecke
2023-03-22 12:16   ` Sagi Grimberg
2023-03-21 12:43 ` [PATCH 17/18] nvmet-tcp: control messages for recvmsg() Hannes Reinecke
2023-03-21 12:43 ` [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS Hannes Reinecke
2023-03-22 12:24   ` Sagi Grimberg
2023-03-22 12:38     ` Hannes Reinecke
2023-03-21 13:12 ` [RFC PATCH 00/18] nvme: In-kernel TLS support for TCP Sagi Grimberg
2023-03-21 13:30   ` Hannes Reinecke
2023-03-22  8:16     ` Sagi Grimberg
2023-03-22  8:28       ` Hannes Reinecke
2023-03-22 12:53         ` Sagi Grimberg
2023-03-22 15:10           ` Hannes Reinecke
2023-03-22 15:43             ` Sagi Grimberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230321124325.77385-6-hare@suse.de \
    --to=hare@suse.de \
    --cc=cel@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=kernel-tls-handshake@lists.linux.dev \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).