stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Varun Prakash <varun@chelsio.com>,
	Sagi Grimberg <sagi@grimberg.me>, Christoph Hellwig <hch@lst.de>,
	Sasha Levin <sashal@kernel.org>,
	kbusch@kernel.org, axboe@fb.com, linux-nvme@lists.infradead.org
Subject: [PATCH AUTOSEL 5.16 17/28] nvme-tcp: send H2CData PDUs based on MAXH2CDATA
Date: Tue,  1 Mar 2022 15:13:22 -0500	[thread overview]
Message-ID: <20220301201344.18191-17-sashal@kernel.org> (raw)
In-Reply-To: <20220301201344.18191-1-sashal@kernel.org>

From: Varun Prakash <varun@chelsio.com>

[ Upstream commit c2700d2886a87f83f31e0a301de1d2350b52c79b ]

As per NVMe/TCP specification (revision 1.0a, section 3.6.2.3)
Maximum Host to Controller Data length (MAXH2CDATA): Specifies the
maximum number of PDU-Data bytes per H2CData PDU in bytes. This value
is a multiple of dwords and should be no less than 4,096.

Current code sets H2CData PDU data_length to r2t_length,
it does not check MAXH2CDATA value. Fix this by setting H2CData PDU
data_length to min(req->h2cdata_left, queue->maxh2cdata).

Also validate MAXH2CDATA value returned by target in ICResp PDU,
if it is not a multiple of dword or if it is less than 4096 return
-EINVAL from nvme_tcp_init_connection().

Signed-off-by: Varun Prakash <varun@chelsio.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/tcp.c  | 63 +++++++++++++++++++++++++++++++---------
 include/linux/nvme-tcp.h |  1 +
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 891a36d02e7c7..65e00c64a588b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -44,6 +44,8 @@ struct nvme_tcp_request {
 	u32			data_len;
 	u32			pdu_len;
 	u32			pdu_sent;
+	u32			h2cdata_left;
+	u32			h2cdata_offset;
 	u16			ttag;
 	__le16			status;
 	struct list_head	entry;
@@ -95,6 +97,7 @@ struct nvme_tcp_queue {
 	struct nvme_tcp_request *request;
 
 	int			queue_size;
+	u32			maxh2cdata;
 	size_t			cmnd_capsule_len;
 	struct nvme_tcp_ctrl	*ctrl;
 	unsigned long		flags;
@@ -572,23 +575,26 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
 	return ret;
 }
 
-static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
-		struct nvme_tcp_r2t_pdu *pdu)
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_data_pdu *data = req->pdu;
 	struct nvme_tcp_queue *queue = req->queue;
 	struct request *rq = blk_mq_rq_from_pdu(req);
+	u32 h2cdata_sent = req->pdu_len;
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	u8 ddgst = nvme_tcp_ddgst_len(queue);
 
 	req->state = NVME_TCP_SEND_H2C_PDU;
 	req->offset = 0;
-	req->pdu_len = le32_to_cpu(pdu->r2t_length);
+	req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
 	req->pdu_sent = 0;
+	req->h2cdata_left -= req->pdu_len;
+	req->h2cdata_offset += h2cdata_sent;
 
 	memset(data, 0, sizeof(*data));
 	data->hdr.type = nvme_tcp_h2c_data;
-	data->hdr.flags = NVME_TCP_F_DATA_LAST;
+	if (!req->h2cdata_left)
+		data->hdr.flags = NVME_TCP_F_DATA_LAST;
 	if (queue->hdr_digest)
 		data->hdr.flags |= NVME_TCP_F_HDGST;
 	if (queue->data_digest)
@@ -597,9 +603,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
 	data->hdr.pdo = data->hdr.hlen + hdgst;
 	data->hdr.plen =
 		cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
-	data->ttag = pdu->ttag;
+	data->ttag = req->ttag;
 	data->command_id = nvme_cid(rq);
-	data->data_offset = pdu->r2t_offset;
+	data->data_offset = cpu_to_le32(req->h2cdata_offset);
 	data->data_length = cpu_to_le32(req->pdu_len);
 }
 
@@ -609,6 +615,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
 	struct nvme_tcp_request *req;
 	struct request *rq;
 	u32 r2t_length = le32_to_cpu(pdu->r2t_length);
+	u32 r2t_offset = le32_to_cpu(pdu->r2t_offset);
 
 	rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
 	if (!rq) {
@@ -633,14 +640,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
 		return -EPROTO;
 	}
 
-	if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+	if (unlikely(r2t_offset < req->data_sent)) {
 		dev_err(queue->ctrl->ctrl.device,
 			"req %d unexpected r2t offset %u (expected %zu)\n",
-			rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
+			rq->tag, r2t_offset, req->data_sent);
 		return -EPROTO;
 	}
 
-	nvme_tcp_setup_h2c_data_pdu(req, pdu);
+	req->pdu_len = 0;
+	req->h2cdata_left = r2t_length;
+	req->h2cdata_offset = r2t_offset;
+	req->ttag = pdu->ttag;
+
+	nvme_tcp_setup_h2c_data_pdu(req);
 	nvme_tcp_queue_request(req, false, true);
 
 	return 0;
@@ -928,6 +940,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
 	int req_data_len = req->data_len;
+	u32 h2cdata_left = req->h2cdata_left;
 
 	while (true) {
 		struct page *page = nvme_tcp_req_cur_page(req);
@@ -972,7 +985,10 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 				req->state = NVME_TCP_SEND_DDGST;
 				req->offset = 0;
 			} else {
-				nvme_tcp_done_send_req(queue);
+				if (h2cdata_left)
+					nvme_tcp_setup_h2c_data_pdu(req);
+				else
+					nvme_tcp_done_send_req(queue);
 			}
 			return 1;
 		}
@@ -1030,9 +1046,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 	if (queue->hdr_digest && !req->offset)
 		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
 
-	ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
-			offset_in_page(pdu) + req->offset, len,
-			MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+	if (!req->h2cdata_left)
+		ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
+				offset_in_page(pdu) + req->offset, len,
+				MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+	else
+		ret = sock_no_sendpage(queue->sock, virt_to_page(pdu),
+				offset_in_page(pdu) + req->offset, len,
+				MSG_DONTWAIT | MSG_MORE);
 	if (unlikely(ret <= 0))
 		return ret;
 
@@ -1052,6 +1073,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
 	size_t offset = req->offset;
+	u32 h2cdata_left = req->h2cdata_left;
 	int ret;
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
 	struct kvec iov = {
@@ -1069,7 +1091,10 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
 		return ret;
 
 	if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
-		nvme_tcp_done_send_req(queue);
+		if (h2cdata_left)
+			nvme_tcp_setup_h2c_data_pdu(req);
+		else
+			nvme_tcp_done_send_req(queue);
 		return 1;
 	}
 
@@ -1261,6 +1286,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
 	struct msghdr msg = {};
 	struct kvec iov;
 	bool ctrl_hdgst, ctrl_ddgst;
+	u32 maxh2cdata;
 	int ret;
 
 	icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
@@ -1344,6 +1370,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
 		goto free_icresp;
 	}
 
+	maxh2cdata = le32_to_cpu(icresp->maxdata);
+	if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
+		pr_err("queue %d: invalid maxh2cdata returned %u\n",
+		       nvme_tcp_queue_id(queue), maxh2cdata);
+		goto free_icresp;
+	}
+	queue->maxh2cdata = maxh2cdata;
+
 	ret = 0;
 free_icresp:
 	kfree(icresp);
@@ -2329,6 +2363,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 	req->data_sent = 0;
 	req->pdu_len = 0;
 	req->pdu_sent = 0;
+	req->h2cdata_left = 0;
 	req->data_len = blk_rq_nr_phys_segments(rq) ?
 				blk_rq_payload_bytes(rq) : 0;
 	req->curr_bio = rq->bio;
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 959e0bd9a913e..75470159a194d 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -12,6 +12,7 @@
 #define NVME_TCP_DISC_PORT	8009
 #define NVME_TCP_ADMIN_CCSZ	SZ_8K
 #define NVME_TCP_DIGEST_LENGTH	4
+#define NVME_TCP_MIN_MAXH2CDATA 4096
 
 enum nvme_tcp_pfv {
 	NVME_TCP_PFV_1_0 = 0x0,
-- 
2.34.1


  parent reply	other threads:[~2022-03-01 20:16 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-01 20:13 [PATCH AUTOSEL 5.16 01/28] selftests/bpf: Add test for bpf_timer overwriting crash Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 02/28] swiotlb: fix info leak with DMA_FROM_DEVICE Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 03/28] usb: dwc3: pci: add support for the Intel Raptor Lake-S Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 04/28] pinctrl: tigerlake: Revert "Add Alder Lake-M ACPI ID" Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 05/28] KVM: Fix lockdep false negative during host resume Sasha Levin
2022-03-01 20:19   ` Paolo Bonzini
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 06/28] kvm: x86: Disable KVM_HC_CLOCK_PAIRING if tsc is in always catchup mode Sasha Levin
2022-03-01 20:19   ` Paolo Bonzini
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 07/28] x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0 Sasha Levin
2022-03-01 20:22   ` Paolo Bonzini
2022-06-03 18:40     ` Peter Xu
2022-06-06 16:18       ` Paolo Bonzini
2022-06-06 21:27         ` Peter Xu
2022-06-07 12:54           ` Paolo Bonzini
2022-06-07 15:04             ` Sean Christopherson
2022-06-07 18:17               ` Peter Xu
2022-06-07 18:47                 ` Sean Christopherson
2022-06-07 21:01                   ` Peter Xu
2022-06-07 18:55                 ` Peter Xu
2022-06-08 20:34                   ` Leonardo Bras Soares Passos
2022-06-08 20:53                     ` Peter Xu
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 08/28] spi: rockchip: Fix error in getting num-cs property Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 09/28] spi: rockchip: terminate dma transmission when slave abort Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 10/28] drm/vc4: hdmi: Unregister codec device on unbind Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 11/28] of/fdt: move elfcorehdr reservation early for crash dump kernel Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 12/28] x86/kvm: Don't use pv tlb/ipi/sched_yield if on 1 vCPU Sasha Levin
2022-03-01 20:17   ` Paolo Bonzini
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 13/28] drivers: hamradio: 6pack: fix UAF bug caused by mod_timer() Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 14/28] net-sysfs: add check for netdevice being present to speed_show Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 15/28] sr9700: sanity check for packet length Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 16/28] hwmon: (pmbus) Clear pmbus fault/warning bits after read Sasha Levin
2022-03-01 20:13 ` Sasha Levin [this message]
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 18/28] PCI: Mark all AMD Navi10 and Navi14 GPU ATS as broken Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 19/28] gpio: Return EPROBE_DEFER if gc->to_irq is NULL Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 20/28] drm/amdgpu: bypass tiling flag check in virtual display case (v2) Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 21/28] Revert "xen-netback: remove 'hotplug-status' once it has served its purpose" Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 22/28] Revert "xen-netback: Check for hotplug-status existence before watching" Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 23/28] ipv6: prevent a possible race condition with lifetimes Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 24/28] tracing: Ensure trace buffer is at least 4096 bytes large Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 25/28] tracing/osnoise: Make osnoise_main to sleep for microseconds Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 26/28] tracing: Fix selftest config check for function graph start up test Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 27/28] selftest/vm: fix map_fixed_noreplace test failure Sasha Levin
2022-03-01 20:13 ` [PATCH AUTOSEL 5.16 28/28] selftests/memfd: clean up mapping in mfd_fail_write Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220301201344.18191-17-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=axboe@fb.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    --cc=stable@vger.kernel.org \
    --cc=varun@chelsio.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).