All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler
@ 2023-03-13  8:56 Sagi Grimberg
  2023-03-13  8:56 ` [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection Sagi Grimberg
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Sagi Grimberg @ 2023-03-13  8:56 UTC (permalink / raw)
  To: linux-nvme
  Cc: Christoph Hellwig, Keith Busch, Chaitanya Kulkarni, Akinobu Mita

For non in-capsule writes we reuse the request pdu space for a h2cdata
pdu in order to avoid over allocating space (either preallocate or
dynamically upon receving an r2t pdu). However if the request times out
the core expects to find the opcode in the start of the request, which
we override.

In order to prevent that, without sacrificing additional 24 bytes per
request, we just use the tail of the command pdu space instead (last
24 bytes from the 72 bytes command pdu). That should make the command
opcode always available, and we get away from allocating more space.

If in the future we would need the last 24 bytes of the nvme command
available we would need to allocate a dedicated space for it in the
request, but until then we can avoid doing so.

Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/tcp.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index ef27122acce6..c73811907afa 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -207,6 +207,18 @@ static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
 	return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
 }
 
+static inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req)
+{
+	return req->pdu;
+}
+
+static inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req)
+{
+	/* use the pdu space in the back for the data pdu */
+	return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) -
+		sizeof(struct nvme_tcp_data_pdu);
+}
+
 static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req)
 {
 	if (nvme_is_fabrics(req->req.cmd))
@@ -613,7 +625,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
 
 static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
 {
-	struct nvme_tcp_data_pdu *data = req->pdu;
+	struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req);
 	struct nvme_tcp_queue *queue = req->queue;
 	struct request *rq = blk_mq_rq_from_pdu(req);
 	u32 h2cdata_sent = req->pdu_len;
@@ -1035,7 +1047,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	bool inline_data = nvme_tcp_has_inline_data(req);
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) + hdgst - req->offset;
@@ -1074,7 +1086,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
-	struct nvme_tcp_data_pdu *pdu = req->pdu;
+	struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req);
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) - req->offset + hdgst;
 	int ret;
@@ -2281,7 +2293,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
 	int qid = nvme_tcp_queue_id(req->queue);
 
@@ -2320,7 +2332,7 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
 			struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	struct nvme_command *c = &pdu->cmd;
 
 	c->common.flags |= NVME_CMD_SGL_METABUF;
@@ -2340,7 +2352,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 		struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	struct nvme_tcp_queue *queue = req->queue;
 	u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
 	blk_status_t ret;
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection
  2023-03-13  8:56 [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Sagi Grimberg
@ 2023-03-13  8:56 ` Sagi Grimberg
  2023-03-15  5:15   ` Chaitanya Kulkarni
  2023-03-15  5:14 ` [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Chaitanya Kulkarni
  2023-03-15 13:59 ` Christoph Hellwig
  2 siblings, 1 reply; 5+ messages in thread
From: Sagi Grimberg @ 2023-03-13  8:56 UTC (permalink / raw)
  To: linux-nvme
  Cc: Christoph Hellwig, Keith Busch, Chaitanya Kulkarni, Akinobu Mita

Make sure that we don't somehow mess up the wire structures in the spec.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/tcp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c73811907afa..0ec7d3329595 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2691,6 +2691,15 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
+
 	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
 			WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
 	if (!nvme_tcp_wq)
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler
  2023-03-13  8:56 [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Sagi Grimberg
  2023-03-13  8:56 ` [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection Sagi Grimberg
@ 2023-03-15  5:14 ` Chaitanya Kulkarni
  2023-03-15 13:59 ` Christoph Hellwig
  2 siblings, 0 replies; 5+ messages in thread
From: Chaitanya Kulkarni @ 2023-03-15  5:14 UTC (permalink / raw)
  To: Sagi Grimberg, linux-nvme
  Cc: Christoph Hellwig, Keith Busch, Chaitanya Kulkarni, Akinobu Mita

On 3/13/2023 1:56 AM, Sagi Grimberg wrote:
> For non in-capsule writes we reuse the request pdu space for a h2cdata
> pdu in order to avoid over allocating space (either preallocate or
> dynamically upon receving an r2t pdu). However if the request times out
> the core expects to find the opcode in the start of the request, which
> we override.
> 
> In order to prevent that, without sacrificing additional 24 bytes per
> request, we just use the tail of the command pdu space instead (last
> 24 bytes from the 72 bytes command pdu). That should make the command
> opcode always available, and we get away from allocating more space.
> 
> If in the future we would need the last 24 bytes of the nvme command
> available we would need to allocate a dedicated space for it in the
> request, but until then we can avoid doing so.
> 
> Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
> Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---


This looks good to me.

Reviewed-by: Chaitanya Kulkarni <kkch@nvidia.com>

-ck



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection
  2023-03-13  8:56 ` [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection Sagi Grimberg
@ 2023-03-15  5:15   ` Chaitanya Kulkarni
  0 siblings, 0 replies; 5+ messages in thread
From: Chaitanya Kulkarni @ 2023-03-15  5:15 UTC (permalink / raw)
  To: Sagi Grimberg, linux-nvme
  Cc: Christoph Hellwig, Keith Busch, Chaitanya Kulkarni, Akinobu Mita

On 3/13/2023 1:56 AM, Sagi Grimberg wrote:
> Make sure that we don't somehow mess up the wire structures in the spec.
> 
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---

Thanks for doing this.

Reviewed-by: Chaitanya Kulkarni <kkch@nvidia.com>

-ck



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler
  2023-03-13  8:56 [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Sagi Grimberg
  2023-03-13  8:56 ` [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection Sagi Grimberg
  2023-03-15  5:14 ` [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Chaitanya Kulkarni
@ 2023-03-15 13:59 ` Christoph Hellwig
  2 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2023-03-15 13:59 UTC (permalink / raw)
  To: Sagi Grimberg
  Cc: linux-nvme, Christoph Hellwig, Keith Busch, Chaitanya Kulkarni,
	Akinobu Mita

Thanks, applied to nvme-6.3.


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-03-15 14:00 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-13  8:56 [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Sagi Grimberg
2023-03-13  8:56 ` [PATCH 2/2] nvme-tcp: add nvme-tcp pdu size build protection Sagi Grimberg
2023-03-15  5:15   ` Chaitanya Kulkarni
2023-03-15  5:14 ` [PATCH 1/2] nvme-tcp: fix opcode reporting in the timeout handler Chaitanya Kulkarni
2023-03-15 13:59 ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.