All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data
  2018-05-16 21:18 [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise
@ 2018-05-16 19:57 ` Steve Wise
  2018-05-17 11:43   ` Christoph Hellwig
  2018-05-16 19:58 ` [PATCH RFC v2 2/2] nvmet-rdma: support 16K " Steve Wise
  2018-05-16 22:01 ` [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise
  2 siblings, 1 reply; 9+ messages in thread
From: Steve Wise @ 2018-05-16 19:57 UTC (permalink / raw)


Allow up to 4 segments of inline data for NVMF WRITE operations. This
reduces latency for small WRITEs by removing the need for the target to
issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp.

Also cap the inline segments used based on the limitations of the
device.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
 drivers/nvme/host/rdma.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1eb4438..9ae261d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -40,13 +40,14 @@
 
 #define NVME_RDMA_MAX_SEGMENTS		256
 
-#define NVME_RDMA_MAX_INLINE_SEGMENTS	1
+#define NVME_RDMA_MAX_INLINE_SEGMENTS	4
 
 struct nvme_rdma_device {
 	struct ib_device	*dev;
 	struct ib_pd		*pd;
 	struct kref		ref;
 	struct list_head	entry;
+	unsigned int		num_inline_segments;
 };
 
 struct nvme_rdma_qe {
@@ -249,7 +250,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
 	/* +1 for drain */
 	init_attr.cap.max_recv_wr = queue->queue_size + 1;
 	init_attr.cap.max_recv_sge = 1;
-	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
+	init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
 	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	init_attr.qp_type = IB_QPT_RC;
 	init_attr.send_cq = queue->ib_cq;
@@ -374,6 +375,9 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
 		goto out_free_pd;
 	}
 
+	ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
+					ndev->dev->attrs.max_sge - 1);
+	pr_debug("num_inline_segments = %u\n", ndev->num_inline_segments);
 	list_add(&ndev->entry, &device_list);
 out_unlock:
 	mutex_unlock(&device_list_mutex);
@@ -1086,19 +1090,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
 }
 
 static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
-		struct nvme_rdma_request *req, struct nvme_command *c)
+		struct nvme_rdma_request *req, struct nvme_command *c,
+		int count)
 {
 	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+	struct scatterlist *sgl = req->sg_table.sgl;
+	struct ib_sge *sge = &req->sge[1];
+	u32 len = 0;
+	int i;
 
-	req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
-	req->sge[1].length = sg_dma_len(req->sg_table.sgl);
-	req->sge[1].lkey = queue->device->pd->local_dma_lkey;
+	for (i = 0; i < count; i++, sgl++, sge++) {
+		sge->addr = sg_dma_address(sgl);
+		sge->length = sg_dma_len(sgl);
+		sge->lkey = queue->device->pd->local_dma_lkey;
+		len += sge->length;
+	}
 
 	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
-	sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
+	sg->length = cpu_to_le32(len);
 	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
 
-	req->num_sge++;
+	req->num_sge += count;
 	return 0;
 }
 
@@ -1191,13 +1203,13 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 		return -EIO;
 	}
 
-	if (count == 1) {
+	if (count <= dev->num_inline_segments) {
 		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
 		    blk_rq_payload_bytes(rq) <=
 				nvme_rdma_inline_data_size(queue))
-			return nvme_rdma_map_sg_inline(queue, req, c);
+			return nvme_rdma_map_sg_inline(queue, req, c, count);
 
-		if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
+		if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
 			return nvme_rdma_map_sg_single(queue, req, c);
 	}
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 2/2] nvmet-rdma: support 16K inline data
  2018-05-16 21:18 [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise
  2018-05-16 19:57 ` [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data Steve Wise
@ 2018-05-16 19:58 ` Steve Wise
  2018-05-17 11:52   ` Christoph Hellwig
  2018-05-16 22:01 ` [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise
  2 siblings, 1 reply; 9+ messages in thread
From: Steve Wise @ 2018-05-16 19:58 UTC (permalink / raw)


Add a new configfs port attribute, called inline_data_size, to
allow configuring the size of inline data for a given port.
The maximum size allowed is still enforced by nvmet-rdma with
NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to max(16KB,
PAGE_SIZE).  And the default size, if not specified via configfs,
is still PAGE_SIZE.  This preserves the existing behavior, but allows
larger inline sizes.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
 drivers/nvme/target/admin-cmd.c |  4 ++--
 drivers/nvme/target/configfs.c  | 34 ++++++++++++++++++++++++++++++++++
 drivers/nvme/target/discovery.c |  2 +-
 drivers/nvme/target/nvmet.h     |  4 +++-
 drivers/nvme/target/rdma.c      | 41 +++++++++++++++++++++++++++++------------
 5 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5e0e9fc..a9e3223 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -247,14 +247,14 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
 	if (ctrl->ops->has_keyed_sgls)
 		id->sgls |= cpu_to_le32(1 << 2);
-	if (ctrl->ops->sqe_inline_size)
+	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
 
 	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
 
 	/* Max command capsule size is sqe + single page of in-capsule data */
 	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
-				  ctrl->ops->sqe_inline_size) / 16);
+				  req->port->inline_data_size) / 16);
 	/* Max response capsule size is cqe */
 	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
 
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ad9ff27..968bdcb 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -214,6 +214,38 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_, addr_trsvcid);
 
+static ssize_t nvmet_inline_data_size_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%u\n",
+			port->inline_data_size);
+}
+
+static ssize_t nvmet_inline_data_size_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	unsigned int size;
+	int ret;
+
+	if (port->enabled) {
+		pr_err("Cannot modify inline_data_size enabled\n");
+		pr_err("Disable the port before modifying\n");
+		return -EACCES;
+	}
+	ret = kstrtouint((const char *)page, 0, &size);
+	if (ret) {
+		pr_err("Invalid value '%s' for inline_data_size\n", page);
+		return -EINVAL;
+	}
+	port->inline_data_size = size;
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, inline_data_size);
+
 static ssize_t nvmet_addr_trtype_show(struct config_item *item,
 		char *page)
 {
@@ -870,6 +902,7 @@ static void nvmet_port_release(struct config_item *item)
 	&nvmet_attr_addr_traddr,
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
+	&nvmet_attr_inline_data_size,
 	NULL,
 };
 
@@ -899,6 +932,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
 	INIT_LIST_HEAD(&port->entry);
 	INIT_LIST_HEAD(&port->subsystems);
 	INIT_LIST_HEAD(&port->referrals);
+	port->inline_data_size = NVMET_DEFAULT_INLINE_DATA_SIZE;
 
 	port->disc_addr.portid = cpu_to_le16(portid);
 	config_group_init_type_name(&port->group, name, &nvmet_port_type);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 231e04e..fc2e675 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
 	if (ctrl->ops->has_keyed_sgls)
 		id->sgls |= cpu_to_le32(1 << 2);
-	if (ctrl->ops->sqe_inline_size)
+	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
 
 	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84a..5be528f 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -79,6 +79,8 @@ struct nvmet_sq {
 	struct completion	confirm_done;
 };
 
+#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1
+
 /**
  * struct nvmet_port -	Common structure to keep port
  *				information for the target.
@@ -98,6 +100,7 @@ struct nvmet_port {
 	struct list_head		referrals;
 	void				*priv;
 	bool				enabled;
+	int				inline_data_size;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -202,7 +205,6 @@ struct nvmet_subsys_link {
 struct nvmet_fabrics_ops {
 	struct module *owner;
 	unsigned int type;
-	unsigned int sqe_inline_size;
 	unsigned int msdbd;
 	bool has_keyed_sgls : 1;
 	void (*queue_response)(struct nvmet_req *req);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d..4fe4a2d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,9 +33,10 @@
 #include "nvmet.h"
 
 /*
- * We allow up to a page of inline data to go with the SQE
+ * We allow at least 1 page, and up to 16KB of inline data to go with the SQE
  */
-#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
+#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
+#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
 
 struct nvmet_rdma_cmd {
 	struct ib_sge		sge[2];
@@ -116,6 +117,7 @@ struct nvmet_rdma_device {
 	size_t			srq_size;
 	struct kref		ref;
 	struct list_head	entry;
+	int			inline_data_size;
 };
 
 static bool nvmet_rdma_use_srq;
@@ -187,6 +189,8 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
 static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 			struct nvmet_rdma_cmd *c, bool admin)
 {
+	unsigned int inline_data_size = ndev->inline_data_size;
+
 	/* NVMe command / RDMA RECV */
 	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
 	if (!c->nvme_cmd)
@@ -202,15 +206,15 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 
 	if (!admin) {
 		c->inline_page = alloc_pages(GFP_KERNEL,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 		if (!c->inline_page)
 			goto out_unmap_cmd;
 		c->sge[1].addr = ib_dma_map_page(ndev->device,
-				c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
+				c->inline_page, 0, inline_data_size,
 				DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
 			goto out_free_inline_page;
-		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
+		c->sge[1].length = inline_data_size;
 		c->sge[1].lkey = ndev->pd->local_dma_lkey;
 	}
 
@@ -225,7 +229,7 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 out_free_inline_page:
 	if (!admin) {
 		__free_pages(c->inline_page,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 	}
 out_unmap_cmd:
 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
@@ -240,11 +244,13 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *c, bool admin)
 {
+	unsigned int inline_data_size = ndev->inline_data_size;
+
 	if (!admin) {
 		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
-				NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
+				inline_data_size, DMA_FROM_DEVICE);
 		__free_pages(c->inline_page,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 	}
 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
 				sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
@@ -544,7 +550,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
 	if (!nvme_is_write(rsp->req.cmd))
 		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 
-	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
+	if (off + len > rsp->queue->dev->inline_data_size) {
 		pr_err("invalid inline data offset!\n");
 		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
 	}
@@ -793,6 +799,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
 static struct nvmet_rdma_device *
 nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 {
+	struct nvmet_port *port = cm_id->context;
 	struct nvmet_rdma_device *ndev;
 	int ret;
 
@@ -807,6 +814,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
 	if (!ndev)
 		goto out_err;
 
+	ndev->inline_data_size = port->inline_data_size;
 	ndev->device = cm_id->device;
 	kref_init(&ndev->ref);
 
@@ -1379,6 +1387,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		return -EINVAL;
 	}
 
+	if (port->inline_data_size == NVMET_DEFAULT_INLINE_DATA_SIZE) {
+		port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
+	} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
+		pr_err("invalid inline_data_size %d (max supported is %u)\n",
+			port->inline_data_size,
+			NVMET_RDMA_MAX_INLINE_DATA_SIZE);
+		return -EINVAL;
+	}
+
 	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
 			port->disc_addr.trsvcid, &addr);
 	if (ret) {
@@ -1418,8 +1435,9 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		goto out_destroy_id;
 	}
 
-	pr_info("enabling port %d (%pISpcs)\n",
-		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
+	pr_info("enabling port %d (%pISpcs) inline_data_size %d\n",
+		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr,
+		port->inline_data_size);
 	port->priv = cm_id;
 	return 0;
 
@@ -1456,7 +1474,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
 static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_RDMA,
-	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
 	.msdbd			= 1,
 	.has_keyed_sgls		= 1,
 	.add_port		= nvmet_rdma_add_port,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support
@ 2018-05-16 21:18 Steve Wise
  2018-05-16 19:57 ` [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data Steve Wise
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Steve Wise @ 2018-05-16 21:18 UTC (permalink / raw)


For small nvmf write IO over the rdma transport, it is advantagous to
make use of inline mode to avoid the latency of the target issuing an
rdma read to fetch the data.  Currently inline is used for <= 4K writes.
8K, though, requires the rdma read.  For iWARP transports additional
latency is incurred because the target mr of the read must be registered
with remote write access.  By allowing 2 pages worth of inline payload,
I see a reduction in 8K nvmf write latency of anywhere from 2-7 usecs
depending on the RDMA transport..

This series is a respin of a series floated last year by Parav and Max [1].
I'm continuing it now and trying to address the comments from their
submission.

A few of the comments have been addressed:

- nvme-rdma: Support up to 4 segments of inline data.

- nvme-rdma: Cap the number of inline segments to not exceed device limitations.

- nvmet-rdma: Make the inline data size configurable in nvmet-rdma via configfs.

Other issues I haven't addressed:

- nvme-rdma: make the sge array for inline segments dynamic based on the
target's advertised inline_data_size.  Since we're limiting the max count
to 4, I'm not sure this is worth the complexity of allocating the sge array
vs just embedding the max.

- nvmet-rdma: concern about high order page allocations.  Is 4 pages
too high?  One possibility is that, if the device max_sge allows, use
a few more sges.  IE 16K could be 2 8K sges, or 4 4K.  This probably makes
passing the inline data to bio more complex.  I haven't looked into this
yet.

- nvmet-rdma: reduce the qp depth if the inline size greatly increases
the memory footprint.  I'm not sure how to do this in a reasonable mannor.
Since the inline data size is now configurable, do we still need this?

- nvmet-rdma: make the qp depth configurable so the admin can reduce it
manually to lower the memory footprint.

Please comment!

Thanks,

Steve.

[1] Original submissions:
http://lists.infradead.org/pipermail/linux-nvme/2017-February/008057.html
http://lists.infradead.org/pipermail/linux-nvme/2017-February/008059.html


Steve Wise (2):
  nvme-rdma: support up to 4 segments of inline data
  nvmet-rdma: support 16K inline data

 drivers/nvme/host/rdma.c        | 34 +++++++++++++++++++++++-----------
 drivers/nvme/target/admin-cmd.c |  4 ++--
 drivers/nvme/target/configfs.c  | 34 ++++++++++++++++++++++++++++++++++
 drivers/nvme/target/discovery.c |  2 +-
 drivers/nvme/target/nvmet.h     |  4 +++-
 drivers/nvme/target/rdma.c      | 41 +++++++++++++++++++++++++++++------------
 6 files changed, 92 insertions(+), 27 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support
  2018-05-16 21:18 [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise
  2018-05-16 19:57 ` [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data Steve Wise
  2018-05-16 19:58 ` [PATCH RFC v2 2/2] nvmet-rdma: support 16K " Steve Wise
@ 2018-05-16 22:01 ` Steve Wise
  2 siblings, 0 replies; 9+ messages in thread
From: Steve Wise @ 2018-05-16 22:01 UTC (permalink / raw)


Oops!? The subject should be "16K Inline Support"

Steve.


On 5/16/2018 4:18 PM, Steve Wise wrote:
> For small nvmf write IO over the rdma transport, it is advantagous to
> make use of inline mode to avoid the latency of the target issuing an
> rdma read to fetch the data.  Currently inline is used for <= 4K writes.
> 8K, though, requires the rdma read.  For iWARP transports additional
> latency is incurred because the target mr of the read must be registered
> with remote write access.  By allowing 2 pages worth of inline payload,
> I see a reduction in 8K nvmf write latency of anywhere from 2-7 usecs
> depending on the RDMA transport..
>
> This series is a respin of a series floated last year by Parav and Max [1].
> I'm continuing it now and trying to address the comments from their
> submission.
>
> A few of the comments have been addressed:
>
> - nvme-rdma: Support up to 4 segments of inline data.
>
> - nvme-rdma: Cap the number of inline segments to not exceed device limitations.
>
> - nvmet-rdma: Make the inline data size configurable in nvmet-rdma via configfs.
>
> Other issues I haven't addressed:
>
> - nvme-rdma: make the sge array for inline segments dynamic based on the
> target's advertised inline_data_size.  Since we're limiting the max count
> to 4, I'm not sure this is worth the complexity of allocating the sge array
> vs just embedding the max.
>
> - nvmet-rdma: concern about high order page allocations.  Is 4 pages
> too high?  One possibility is that, if the device max_sge allows, use
> a few more sges.  IE 16K could be 2 8K sges, or 4 4K.  This probably makes
> passing the inline data to bio more complex.  I haven't looked into this
> yet.
>
> - nvmet-rdma: reduce the qp depth if the inline size greatly increases
> the memory footprint.  I'm not sure how to do this in a reasonable mannor.
> Since the inline data size is now configurable, do we still need this?
>
> - nvmet-rdma: make the qp depth configurable so the admin can reduce it
> manually to lower the memory footprint.
>
> Please comment!
>
> Thanks,
>
> Steve.
>
> [1] Original submissions:
> http://lists.infradead.org/pipermail/linux-nvme/2017-February/008057.html
> http://lists.infradead.org/pipermail/linux-nvme/2017-February/008059.html
>
>
> Steve Wise (2):
>   nvme-rdma: support up to 4 segments of inline data
>   nvmet-rdma: support 16K inline data
>
>  drivers/nvme/host/rdma.c        | 34 +++++++++++++++++++++++-----------
>  drivers/nvme/target/admin-cmd.c |  4 ++--
>  drivers/nvme/target/configfs.c  | 34 ++++++++++++++++++++++++++++++++++
>  drivers/nvme/target/discovery.c |  2 +-
>  drivers/nvme/target/nvmet.h     |  4 +++-
>  drivers/nvme/target/rdma.c      | 41 +++++++++++++++++++++++++++++------------
>  6 files changed, 92 insertions(+), 27 deletions(-)
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data
  2018-05-16 19:57 ` [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data Steve Wise
@ 2018-05-17 11:43   ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2018-05-17 11:43 UTC (permalink / raw)


Looks good,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 2/2] nvmet-rdma: support 16K inline data
  2018-05-16 19:58 ` [PATCH RFC v2 2/2] nvmet-rdma: support 16K " Steve Wise
@ 2018-05-17 11:52   ` Christoph Hellwig
  2018-05-17 14:24     ` Steve Wise
  0 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2018-05-17 11:52 UTC (permalink / raw)


> +static ssize_t nvmet_inline_data_size_show(struct config_item *item,
> +		char *page)
> +{
> +	struct nvmet_port *port = to_nvmet_port(item);
> +
> +	return snprintf(page, PAGE_SIZE, "%u\n",
> +			port->inline_data_size);

Please fir the whole sprintf statement onto a single line.

> +}
> +
> +static ssize_t nvmet_inline_data_size_store(struct config_item *item,
> +		const char *page, size_t count)
> +{
> +	struct nvmet_port *port = to_nvmet_port(item);
> +	unsigned int size;
> +	int ret;
> +
> +	if (port->enabled) {
> +		pr_err("Cannot modify inline_data_size enabled\n");
> +		pr_err("Disable the port before modifying\n");
> +		return -EACCES;
> +	}
> +	ret = kstrtouint((const char *)page, 0, &size);

This cast looks bogus.

Also inline_data_size shoul be and u32 as that is closest to what
is on the wire, and you thus should use kstrtou32 and pass the
inline_data_size straight to kstrtou32 instead of bouncing it through
a local variable.

> +CONFIGFS_ATTR(nvmet_, inline_data_size);

The characters before the first _ in the name are used as a group
by nvmetcli.  So I think this should get a param_ or so prefix
before the inline_data_size.  Also currently this attribute only
makes sense for rdma, so I think we still need a flag in
nvmet_fabrics_ops that enables/disables this attribute.

Last but not least please also send a nvmetcli patch to support
this new attribute.

> +#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1

0 makes much more sense as the default, and then we don't even need
a name for it.

> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)

So for 64k pages the minimum is bigger than the maximum? :)

> +	int			inline_data_size;

u32

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 2/2] nvmet-rdma: support 16K inline data
  2018-05-17 11:52   ` Christoph Hellwig
@ 2018-05-17 14:24     ` Steve Wise
  2018-05-18  9:08       ` Christoph Hellwig
  0 siblings, 1 reply; 9+ messages in thread
From: Steve Wise @ 2018-05-17 14:24 UTC (permalink / raw)




On 5/17/2018 6:52 AM, Christoph Hellwig wrote:
>> +static ssize_t nvmet_inline_data_size_show(struct config_item *item,
>> +		char *page)
>> +{
>> +	struct nvmet_port *port = to_nvmet_port(item);
>> +
>> +	return snprintf(page, PAGE_SIZE, "%u\n",
>> +			port->inline_data_size);
> Please fir the whole sprintf statement onto a single line.

sure

>> +}
>> +
>> +static ssize_t nvmet_inline_data_size_store(struct config_item *item,
>> +		const char *page, size_t count)
>> +{
>> +	struct nvmet_port *port = to_nvmet_port(item);
>> +	unsigned int size;
>> +	int ret;
>> +
>> +	if (port->enabled) {
>> +		pr_err("Cannot modify inline_data_size enabled\n");
>> +		pr_err("Disable the port before modifying\n");
>> +		return -EACCES;
>> +	}
>> +	ret = kstrtouint((const char *)page, 0, &size);
> This cast looks bogus.
>
> Also inline_data_size shoul be and u32 as that is closest to what
> is on the wire, and you thus should use kstrtou32 and pass the
> inline_data_size straight to kstrtou32 instead of bouncing it through
> a local variable.

I made it an int so it could be initialized to -1 indicating it is not
set by the config.? This allows the rdma transport to use its default
value if the config does not specify any value.? I did this so the admin
could totally disable inline by specifying 0.?? So I needed a value that
indicates "unspecified".


>> +CONFIGFS_ATTR(nvmet_, inline_data_size);
> The characters before the first _ in the name are used as a group
> by nvmetcli.  So I think this should get a param_ or so prefix
> before the inline_data_size.  Also currently this attribute only
> makes sense for rdma, so I think we still need a flag in
> nvmet_fabrics_ops that enables/disables this attribute.

Ah, so setting it in a port that isn't the rdma transport will cause a
failure.? That makes sense.

> Last but not least please also send a nvmetcli patch to support
> this new attribute.

Will do.

>> +#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1
> 0 makes much more sense as the default, and then we don't even need
> a name for it.

I wanted the user to be able to disable inline by setting it to 0.? Is
that not needed?? Maybe by adding back the nvmet_fabrics_ops field will
alleviate this issue.? Perhaps a default_inline_size field that rdma
sets to PAGE_SIZE.? Then configfs can default it to that.?

>> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
>> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
> So for 64k pages the minimum is bigger than the maximum? :)

For 64k pages, the default is 64K and the max is 64K.

Steve.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 2/2] nvmet-rdma: support 16K inline data
  2018-05-17 14:24     ` Steve Wise
@ 2018-05-18  9:08       ` Christoph Hellwig
  2018-05-18 16:36         ` Steve Wise
  0 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2018-05-18  9:08 UTC (permalink / raw)


On Thu, May 17, 2018@09:24:57AM -0500, Steve Wise wrote:
> >> +	ret = kstrtouint((const char *)page, 0, &size);
> > This cast looks bogus.
> >
> > Also inline_data_size shoul be and u32 as that is closest to what
> > is on the wire, and you thus should use kstrtou32 and pass the
> > inline_data_size straight to kstrtou32 instead of bouncing it through
> > a local variable.
> 
> I made it an int so it could be initialized to -1 indicating it is not
> set by the config.? This allows the rdma transport to use its default
> value if the config does not specify any value.? I did this so the admin
> could totally disable inline by specifying 0.?? So I needed a value that
> indicates "unspecified".

Ok, make sense.  So lets keep that behavior, and let every negative
value mean default so that we don't need another error check here.
Rest of the comments above still stands.

> >> +#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1
> > 0 makes much more sense as the default, and then we don't even need
> > a name for it.
> 
> I wanted the user to be able to disable inline by setting it to 0.? Is
> that not needed?? Maybe by adding back the nvmet_fabrics_ops field will
> alleviate this issue.? Perhaps a default_inline_size field that rdma
> sets to PAGE_SIZE.? Then configfs can default it to that.?

As said above I think we can keep the negative means default, I'd
still use the plain -1 instead of a define there.

> 
> >> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> >> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
> > So for 64k pages the minimum is bigger than the maximum? :)
> 
> For 64k pages, the default is 64K and the max is 64K.

Indeed, sorry.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH RFC v2 2/2] nvmet-rdma: support 16K inline data
  2018-05-18  9:08       ` Christoph Hellwig
@ 2018-05-18 16:36         ` Steve Wise
  0 siblings, 0 replies; 9+ messages in thread
From: Steve Wise @ 2018-05-18 16:36 UTC (permalink / raw)


> 
> On Thu, May 17, 2018@09:24:57AM -0500, Steve Wise wrote:
> > >> +	ret = kstrtouint((const char *)page, 0, &size);
> > > This cast looks bogus.
> > >
> > > Also inline_data_size shoul be and u32 as that is closest to what
> > > is on the wire, and you thus should use kstrtou32 and pass the
> > > inline_data_size straight to kstrtou32 instead of bouncing it through
> > > a local variable.
> >
> > I made it an int so it could be initialized to -1 indicating it is not
> > set by the config.? This allows the rdma transport to use its default
> > value if the config does not specify any value.? I did this so the admin
> > could totally disable inline by specifying 0.?? So I needed a value that
> > indicates "unspecified".
> 
> Ok, make sense.  So lets keep that behavior, and let every negative
> value mean default so that we don't need another error check here.
> Rest of the comments above still stands.

Agreed.  

Looking at nvmet_rdma_alloc_cmd(), I think I'll also need to fix rdma.c to
support no inline data usage.

Thanks!

Steve.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-05-18 16:36 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-16 21:18 [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise
2018-05-16 19:57 ` [PATCH RFC v2 1/2] nvme-rdma: support up to 4 segments of inline data Steve Wise
2018-05-17 11:43   ` Christoph Hellwig
2018-05-16 19:58 ` [PATCH RFC v2 2/2] nvmet-rdma: support 16K " Steve Wise
2018-05-17 11:52   ` Christoph Hellwig
2018-05-17 14:24     ` Steve Wise
2018-05-18  9:08       ` Christoph Hellwig
2018-05-18 16:36         ` Steve Wise
2018-05-16 22:01 ` [PATCH RFC v2 0/2] NVMF/RDMA 8K Inline Support Steve Wise

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.