All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-29 18:26 [PATCH v3 0/3] NVMF/RDMA 16K Inline Support Steve Wise
@ 2018-05-29 18:25 ` Steve Wise
  2018-05-29 20:23   ` Ruhl, Michael J
  2018-05-29 18:25 ` [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data Steve Wise
  2018-05-29 18:25 ` [PATCH v3 3/3] nvmet-rdma: support 16K " Steve Wise
  2 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-29 18:25 UTC (permalink / raw)


The code was checking bit 20 instead of bit 2.  Also fixed
the log entry.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
 drivers/nvme/host/rdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1eb4438..f11faa8 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1949,8 +1949,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	}
 
 	/* sanity check keyed sgls */
-	if (!(ctrl->ctrl.sgls & (1 << 20))) {
-		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+	if (!(ctrl->ctrl.sgls & (1 << 2))) {
+		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not supported!\n");
 		ret = -EINVAL;
 		goto out_remove_admin_queue;
 	}
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data
  2018-05-29 18:26 [PATCH v3 0/3] NVMF/RDMA 16K Inline Support Steve Wise
  2018-05-29 18:25 ` [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support Steve Wise
@ 2018-05-29 18:25 ` Steve Wise
  2018-05-30 21:42   ` Sagi Grimberg
  2018-05-29 18:25 ` [PATCH v3 3/3] nvmet-rdma: support 16K " Steve Wise
  2 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-29 18:25 UTC (permalink / raw)


Allow up to 4 segments of inline data for NVMF WRITE operations. This
reduces latency for small WRITEs by removing the need for the target to
issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp.

Also cap the inline segments used based on the limitations of the
device.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
Reviewed-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/rdma.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f11faa8..32d2f4c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -40,13 +40,14 @@
 
 #define NVME_RDMA_MAX_SEGMENTS		256
 
-#define NVME_RDMA_MAX_INLINE_SEGMENTS	1
+#define NVME_RDMA_MAX_INLINE_SEGMENTS	4
 
 struct nvme_rdma_device {
 	struct ib_device	*dev;
 	struct ib_pd		*pd;
 	struct kref		ref;
 	struct list_head	entry;
+	unsigned int		num_inline_segments;
 };
 
 struct nvme_rdma_qe {
@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
 	struct sockaddr_storage src_addr;
 
 	struct nvme_ctrl	ctrl;
+	bool			use_inline_data;
 };
 
 static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
 	/* +1 for drain */
 	init_attr.cap.max_recv_wr = queue->queue_size + 1;
 	init_attr.cap.max_recv_sge = 1;
-	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
+	init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
 	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	init_attr.qp_type = IB_QPT_RC;
 	init_attr.send_cq = queue->ib_cq;
@@ -374,6 +376,9 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
 		goto out_free_pd;
 	}
 
+	ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
+					ndev->dev->attrs.max_sge - 1);
+	pr_debug("num_inline_segments = %u\n", ndev->num_inline_segments);
 	list_add(&ndev->entry, &device_list);
 out_unlock:
 	mutex_unlock(&device_list_mutex);
@@ -1086,19 +1091,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
 }
 
 static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
-		struct nvme_rdma_request *req, struct nvme_command *c)
+		struct nvme_rdma_request *req, struct nvme_command *c,
+		int count)
 {
 	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+	struct scatterlist *sgl = req->sg_table.sgl;
+	struct ib_sge *sge = &req->sge[1];
+	u32 len = 0;
+	int i;
 
-	req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
-	req->sge[1].length = sg_dma_len(req->sg_table.sgl);
-	req->sge[1].lkey = queue->device->pd->local_dma_lkey;
+	for (i = 0; i < count; i++, sgl++, sge++) {
+		sge->addr = sg_dma_address(sgl);
+		sge->length = sg_dma_len(sgl);
+		sge->lkey = queue->device->pd->local_dma_lkey;
+		len += sge->length;
+	}
 
 	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
-	sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
+	sg->length = cpu_to_le32(len);
 	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
 
-	req->num_sge++;
+	req->num_sge += count;
 	return 0;
 }
 
@@ -1191,13 +1204,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 		return -EIO;
 	}
 
-	if (count == 1) {
+	if (count <= dev->num_inline_segments) {
 		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+		    queue->ctrl->use_inline_data &&
 		    blk_rq_payload_bytes(rq) <=
 				nvme_rdma_inline_data_size(queue))
-			return nvme_rdma_map_sg_inline(queue, req, c);
+			return nvme_rdma_map_sg_inline(queue, req, c, count);
 
-		if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
+		if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
 			return nvme_rdma_map_sg_single(queue, req, c);
 	}
 
@@ -1955,6 +1969,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 		goto out_remove_admin_queue;
 	}
 
+	if ((ctrl->ctrl.sgls & (1 << 20)))
+		ctrl->use_inline_data = true;
+
 	if (opts->queue_size > ctrl->ctrl.maxcmd) {
 		/* warn if maxcmd is lower than queue_size */
 		dev_warn(ctrl->ctrl.device,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-29 18:26 [PATCH v3 0/3] NVMF/RDMA 16K Inline Support Steve Wise
  2018-05-29 18:25 ` [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support Steve Wise
  2018-05-29 18:25 ` [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data Steve Wise
@ 2018-05-29 18:25 ` Steve Wise
  2018-05-30 15:49   ` Christopher Lameter
  2018-06-03  8:39   ` Max Gurtovoy
  2 siblings, 2 replies; 39+ messages in thread
From: Steve Wise @ 2018-05-29 18:25 UTC (permalink / raw)


Add a new configfs port attribute, called inline_data_size,
to allow configuring the size of inline data for a given port.
The maximum size allowed is still enforced by nvmet-rdma with
NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to max(16KB,
PAGE_SIZE).  And the default size, if not specified via configfs,
is still PAGE_SIZE.  This preserves the existing behavior, but allows
larger inline sizes.

Also support a configuration where inline_data_size is 0, which disables
using inline data.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
 drivers/nvme/target/admin-cmd.c |  4 ++--
 drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
 drivers/nvme/target/core.c      |  4 ++++
 drivers/nvme/target/discovery.c |  2 +-
 drivers/nvme/target/nvmet.h     |  2 +-
 drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-------------
 6 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5e0e9fc..a9e3223 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -247,14 +247,14 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
 	if (ctrl->ops->has_keyed_sgls)
 		id->sgls |= cpu_to_le32(1 << 2);
-	if (ctrl->ops->sqe_inline_size)
+	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
 
 	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
 
 	/* Max command capsule size is sqe + single page of in-capsule data */
 	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
-				  ctrl->ops->sqe_inline_size) / 16);
+				  req->port->inline_data_size) / 16);
 	/* Max response capsule size is cqe */
 	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
 
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ad9ff27..9867783 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -214,6 +214,35 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_, addr_trsvcid);
 
+static ssize_t nvmet_param_inline_data_size_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%d\n", port->inline_data_size);
+}
+
+static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	int ret;
+
+	if (port->enabled) {
+		pr_err("Cannot modify inline_data_size enabled\n");
+		pr_err("Disable the port before modifying\n");
+		return -EACCES;
+	}
+	ret = kstrtoint(page, 0, &port->inline_data_size);
+	if (ret) {
+		pr_err("Invalid value '%s' for inline_data_size\n", page);
+		return -EINVAL;
+	}
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_inline_data_size);
+
 static ssize_t nvmet_addr_trtype_show(struct config_item *item,
 		char *page)
 {
@@ -870,6 +899,7 @@ static void nvmet_port_release(struct config_item *item)
 	&nvmet_attr_addr_traddr,
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
+	&nvmet_attr_param_inline_data_size,
 	NULL,
 };
 
@@ -899,6 +929,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
 	INIT_LIST_HEAD(&port->entry);
 	INIT_LIST_HEAD(&port->subsystems);
 	INIT_LIST_HEAD(&port->referrals);
+	port->inline_data_size = -1;	/* < 0 == let the transport choose */
 
 	port->disc_addr.portid = cpu_to_le16(portid);
 	config_group_init_type_name(&port->group, name, &nvmet_port_type);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e95424f..695ec17 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -189,6 +189,10 @@ int nvmet_enable_port(struct nvmet_port *port)
 		return ret;
 	}
 
+	/* If the transport didn't set inline_data_size, then disable it. */
+	if (port->inline_data_size < 0)
+		port->inline_data_size = 0;
+
 	port->enabled = true;
 	return 0;
 }
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 231e04e..fc2e675 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
 	if (ctrl->ops->has_keyed_sgls)
 		id->sgls |= cpu_to_le32(1 << 2);
-	if (ctrl->ops->sqe_inline_size)
+	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
 
 	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84a..db29e45 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -98,6 +98,7 @@ struct nvmet_port {
 	struct list_head		referrals;
 	void				*priv;
 	bool				enabled;
+	int				inline_data_size;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -202,7 +203,6 @@ struct nvmet_subsys_link {
 struct nvmet_fabrics_ops {
 	struct module *owner;
 	unsigned int type;
-	unsigned int sqe_inline_size;
 	unsigned int msdbd;
 	bool has_keyed_sgls : 1;
 	void (*queue_response)(struct nvmet_req *req);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d..2f0b08e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,9 +33,10 @@
 #include "nvmet.h"
 
 /*
- * We allow up to a page of inline data to go with the SQE
+ * We allow at least 1 page, and up to 16KB of inline data to go with the SQE
  */
-#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
+#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
+#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
 
 struct nvmet_rdma_cmd {
 	struct ib_sge		sge[2];
@@ -116,6 +117,7 @@ struct nvmet_rdma_device {
 	size_t			srq_size;
 	struct kref		ref;
 	struct list_head	entry;
+	int			inline_data_size;
 };
 
 static bool nvmet_rdma_use_srq;
@@ -187,6 +189,8 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
 static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 			struct nvmet_rdma_cmd *c, bool admin)
 {
+	int inline_data_size = ndev->inline_data_size;
+
 	/* NVMe command / RDMA RECV */
 	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
 	if (!c->nvme_cmd)
@@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 	c->sge[0].length = sizeof(*c->nvme_cmd);
 	c->sge[0].lkey = ndev->pd->local_dma_lkey;
 
-	if (!admin) {
+	if (!admin && inline_data_size) {
 		c->inline_page = alloc_pages(GFP_KERNEL,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 		if (!c->inline_page)
 			goto out_unmap_cmd;
 		c->sge[1].addr = ib_dma_map_page(ndev->device,
-				c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
+				c->inline_page, 0, inline_data_size,
 				DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
 			goto out_free_inline_page;
-		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
+		c->sge[1].length = inline_data_size;
 		c->sge[1].lkey = ndev->pd->local_dma_lkey;
 	}
 
@@ -225,7 +229,7 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 out_free_inline_page:
 	if (!admin) {
 		__free_pages(c->inline_page,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 	}
 out_unmap_cmd:
 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
@@ -240,11 +244,13 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *c, bool admin)
 {
-	if (!admin) {
+	int inline_data_size = ndev->inline_data_size;
+
+	if (!admin && inline_data_size) {
 		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
-				NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
+				inline_data_size, DMA_FROM_DEVICE);
 		__free_pages(c->inline_page,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 	}
 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
 				sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
@@ -544,7 +550,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
 	if (!nvme_is_write(rsp->req.cmd))
 		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 
-	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
+	if (off + len > rsp->queue->dev->inline_data_size) {
 		pr_err("invalid inline data offset!\n");
 		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
 	}
@@ -793,6 +799,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
 static struct nvmet_rdma_device *
 nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 {
+	struct nvmet_port *port = cm_id->context;
 	struct nvmet_rdma_device *ndev;
 	int ret;
 
@@ -807,6 +814,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
 	if (!ndev)
 		goto out_err;
 
+	ndev->inline_data_size = port->inline_data_size;
 	ndev->device = cm_id->device;
 	kref_init(&ndev->ref);
 
@@ -1379,6 +1387,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		return -EINVAL;
 	}
 
+	if (port->inline_data_size < 0) {
+		port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
+	} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
+		pr_err("invalid inline_data_size %d (max supported is %u)\n",
+			port->inline_data_size,
+			NVMET_RDMA_MAX_INLINE_DATA_SIZE);
+		return -EINVAL;
+	}
+
 	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
 			port->disc_addr.trsvcid, &addr);
 	if (ret) {
@@ -1418,8 +1435,9 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		goto out_destroy_id;
 	}
 
-	pr_info("enabling port %d (%pISpcs)\n",
-		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
+	pr_info("enabling port %d (%pISpcs) inline_data_size %d\n",
+		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr,
+		port->inline_data_size);
 	port->priv = cm_id;
 	return 0;
 
@@ -1456,7 +1474,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
 static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_RDMA,
-	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
 	.msdbd			= 1,
 	.has_keyed_sgls		= 1,
 	.add_port		= nvmet_rdma_add_port,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v3 0/3] NVMF/RDMA 16K Inline Support
@ 2018-05-29 18:26 Steve Wise
  2018-05-29 18:25 ` [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support Steve Wise
                   ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Steve Wise @ 2018-05-29 18:26 UTC (permalink / raw)


Hey,

For small nvmf write IO over the rdma transport, it is advantagous to
make use of inline mode to avoid the latency of the target issuing an
rdma read to fetch the data.  Currently inline is used for <= 4K writes.
8K, though, requires the rdma read.  For iWARP transports additional
latency is incurred because the target mr of the read must be registered
with remote write access.  By allowing 2 pages worth of inline payload,
I see a reduction in 8K nvmf write latency of anywhere from 2-7 usecs
depending on the RDMA transport..

This series is a respin of a series floated last year by Parav and Max
[1].  I'm continuing it now and have addressed some of the comments from
their submission [2].

Changes since RFC v2:

- Removed RFC tag

- prefix the inline_data_size configfs attribute with param_

- implementation/formatting tweaks suggested by Christoph

- support inline_data_size of 0, which disables inline data use

- added a new patch to fix the check for keyed sgls (bit 2 instead of 20).

- check the inline_data bit (bit 20 in the ctrl.sgls field) when
connecting and only use inline if it was set for that device.

- added Christoph's review-by tag for patch 1

[1] Original submissions:
http://lists.infradead.org/pipermail/linux-nvme/2017-February/008057.html
http://lists.infradead.org/pipermail/linux-nvme/2017-February/008059.html


[2] These comments from [1] have been addressed:

- nvme-rdma: Support up to 4 segments of inline data.

- nvme-rdma: Cap the number of inline segments to not exceed device limitations.

- nvmet-rdma: Make the inline data size configurable in nvmet-rdma via configfs.

Other issues from [1] that I don't plan to incorporate into the series:

- nvme-rdma: make the sge array for inline segments dynamic based on the
target's advertised inline_data_size.  Since we're limiting the max count
to 4, I'm not sure this is worth the complexity of allocating the sge array
vs just embedding the max.

- nvmet-rdma: concern about high order page allocations.  Is 4 pages
too high?  One possibility is that, if the device max_sge allows, use
a few more sges.  IE 16K could be 2 8K sges, or 4 4K.  This probably makes
passing the inline data to bio more complex.  I haven't looked into this
yet.

- nvmet-rdma: reduce the qp depth if the inline size greatly increases
the memory footprint.  I'm not sure how to do this in a reasonable mannor.
Since the inline data size is now configurable, do we still need this?

- nvmet-rdma: make the qp depth configurable so the admin can reduce it
manually to lower the memory footprint.

Steve Wise (3):
  nvme-rdma: correctly check for target keyed sgl support
  nvme-rdma: support up to 4 segments of inline data
  nvmet-rdma: support 16K inline data

 drivers/nvme/host/rdma.c        | 43 +++++++++++++++++++++++++++------------
 drivers/nvme/target/admin-cmd.c |  4 ++--
 drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
 drivers/nvme/target/core.c      |  4 ++++
 drivers/nvme/target/discovery.c |  2 +-
 drivers/nvme/target/nvmet.h     |  2 +-
 drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-------------
 7 files changed, 100 insertions(+), 31 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-29 18:25 ` [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support Steve Wise
@ 2018-05-29 20:23   ` Ruhl, Michael J
  2018-05-30 14:39     ` Steve Wise
  2018-05-31 17:00     ` hch
  0 siblings, 2 replies; 39+ messages in thread
From: Ruhl, Michael J @ 2018-05-29 20:23 UTC (permalink / raw)


>-----Original Message-----
>From: linux-rdma-owner at vger.kernel.org [mailto:linux-rdma-
>owner at vger.kernel.org] On Behalf Of Steve Wise
>Sent: Tuesday, May 29, 2018 2:26 PM
>To: axboe at kernel.dk; hch at lst.de; Busch, Keith <keith.busch at intel.com>;
>sagi at grimberg.me; linux-nvme at lists.infradead.org
>Cc: parav at mellanox.com; maxg at mellanox.com; linux-rdma at vger.kernel.org
>Subject: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl
>support
>
>The code was checking bit 20 instead of bit 2.  Also fixed
>the log entry.
>
>Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>---
> drivers/nvme/host/rdma.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>index 1eb4438..f11faa8 100644
>--- a/drivers/nvme/host/rdma.c
>+++ b/drivers/nvme/host/rdma.c
>@@ -1949,8 +1949,8 @@ static struct nvme_ctrl
>*nvme_rdma_create_ctrl(struct device *dev,
> 	}
>
> 	/* sanity check keyed sgls */
>-	if (!(ctrl->ctrl.sgls & (1 << 20))) {
>-		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not
>support\n");
>+	if (!(ctrl->ctrl.sgls & (1 << 2))) {
>+		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not
>supported!\n");

I can see that the 2 and 20 are defined for specific things.  Since they are
used in several places (in the next 2 patches), is there any chance these
could be defined bits?

Mike

> 		ret = -EINVAL;
> 		goto out_remove_admin_queue;
> 	}
>--
>1.8.3.1
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>the body of a message to majordomo at vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-29 20:23   ` Ruhl, Michael J
@ 2018-05-30 14:39     ` Steve Wise
  2018-05-30 15:11       ` Steve Wise
  2018-05-31 17:00     ` hch
  1 sibling, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-30 14:39 UTC (permalink / raw)




On 5/29/2018 3:23 PM, Ruhl, Michael J wrote:
>> -----Original Message-----
>> From: linux-rdma-owner at vger.kernel.org [mailto:linux-rdma-
>> owner at vger.kernel.org] On Behalf Of Steve Wise
>> Sent: Tuesday, May 29, 2018 2:26 PM
>> To: axboe at kernel.dk; hch at lst.de; Busch, Keith <keith.busch at intel.com>;
>> sagi at grimberg.me; linux-nvme at lists.infradead.org
>> Cc: parav at mellanox.com; maxg at mellanox.com; linux-rdma at vger.kernel.org
>> Subject: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl
>> support
>>
>> The code was checking bit 20 instead of bit 2.  Also fixed
>> the log entry.
>>
>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>> ---
>> drivers/nvme/host/rdma.c | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 1eb4438..f11faa8 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -1949,8 +1949,8 @@ static struct nvme_ctrl
>> *nvme_rdma_create_ctrl(struct device *dev,
>> 	}
>>
>> 	/* sanity check keyed sgls */
>> -	if (!(ctrl->ctrl.sgls & (1 << 20))) {
>> -		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not
>> support\n");
>> +	if (!(ctrl->ctrl.sgls & (1 << 2))) {
>> +		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not
>> supported!\n");
> I can see that the 2 and 20 are defined for specific things.  Since they are
> used in several places (in the next 2 patches), is there any chance these
> could be defined bits?
>
> Mike
>

That seems reasonable.? I would think these defines could also be shared
across the host and target.? Perhaps include/linux/nvme.h?? I see both
nvme/host/nvme.h and nvme/target/nvmet.h include linux/nvme.h.? So they
could go there.?

Christoph, what do you think?? It seems like these are either nvme
protocol bits or nvme/f protocol bits...

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-30 14:39     ` Steve Wise
@ 2018-05-30 15:11       ` Steve Wise
  2018-05-30 21:37         ` Sagi Grimberg
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-30 15:11 UTC (permalink / raw)




On 5/30/2018 9:39 AM, Steve Wise wrote:
>
> On 5/29/2018 3:23 PM, Ruhl, Michael J wrote:
>>> -----Original Message-----
>>> From: linux-rdma-owner at vger.kernel.org [mailto:linux-rdma-
>>> owner at vger.kernel.org] On Behalf Of Steve Wise
>>> Sent: Tuesday, May 29, 2018 2:26 PM
>>> To: axboe at kernel.dk; hch at lst.de; Busch, Keith <keith.busch at intel.com>;
>>> sagi at grimberg.me; linux-nvme at lists.infradead.org
>>> Cc: parav at mellanox.com; maxg at mellanox.com; linux-rdma at vger.kernel.org
>>> Subject: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl
>>> support
>>>
>>> The code was checking bit 20 instead of bit 2.  Also fixed
>>> the log entry.
>>>
>>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>>> ---
>>> drivers/nvme/host/rdma.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>>> index 1eb4438..f11faa8 100644
>>> --- a/drivers/nvme/host/rdma.c
>>> +++ b/drivers/nvme/host/rdma.c
>>> @@ -1949,8 +1949,8 @@ static struct nvme_ctrl
>>> *nvme_rdma_create_ctrl(struct device *dev,
>>> 	}
>>>
>>> 	/* sanity check keyed sgls */
>>> -	if (!(ctrl->ctrl.sgls & (1 << 20))) {
>>> -		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not
>>> support\n");
>>> +	if (!(ctrl->ctrl.sgls & (1 << 2))) {
>>> +		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not
>>> supported!\n");
>> I can see that the 2 and 20 are defined for specific things.  Since they are
>> used in several places (in the next 2 patches), is there any chance these
>> could be defined bits?
>>
>> Mike
>>
> That seems reasonable.? I would think these defines could also be shared
> across the host and target.? Perhaps include/linux/nvme.h?? I see both
> nvme/host/nvme.h and nvme/target/nvmet.h include linux/nvme.h.? So they
> could go there.?
>
> Christoph, what do you think?? It seems like these are either nvme
> protocol bits or nvme/f protocol bits...
>
> Steve.
>
>

It looks like these bits are part of the Identify Controller Data
Structure, figure 90 in the 1.2.1 spec.? The SGL Support (SGLS) field
within that struct.?? So I guess it belongs in include/linux/nvme.h??
But I don't see any of this big structure in that header.?



Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-29 18:25 ` [PATCH v3 3/3] nvmet-rdma: support 16K " Steve Wise
@ 2018-05-30 15:49   ` Christopher Lameter
  2018-05-30 16:46     ` Steve Wise
  2018-05-30 21:45     ` Sagi Grimberg
  2018-06-03  8:39   ` Max Gurtovoy
  1 sibling, 2 replies; 39+ messages in thread
From: Christopher Lameter @ 2018-05-30 15:49 UTC (permalink / raw)


On Tue, 29 May 2018, Steve Wise wrote:

> @@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>  	c->sge[0].length = sizeof(*c->nvme_cmd);
>  	c->sge[0].lkey = ndev->pd->local_dma_lkey;
>
> -	if (!admin) {
> +	if (!admin && inline_data_size) {
>  		c->inline_page = alloc_pages(GFP_KERNEL,
> -				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +				get_order(inline_data_size));

Now we do higher order allocations here. This means that the allocation
can fail if system memory is highly fragmented. And the allocations can no
longer be satisfied from the per cpu caches. So allocation performance
will drop.

>  		if (!c->inline_page)
>  			goto out_unmap_cmd;

Maybe think about some sort of fallback to vmalloc or so if this
alloc fails?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-30 15:49   ` Christopher Lameter
@ 2018-05-30 16:46     ` Steve Wise
  2018-05-30 17:02       ` Christopher Lameter
  2018-05-30 21:45     ` Sagi Grimberg
  1 sibling, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-30 16:46 UTC (permalink / raw)



On 5/30/2018 10:49 AM, Christopher Lameter wrote:
> On Tue, 29 May 2018, Steve Wise wrote:
>
>> @@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>>  	c->sge[0].length = sizeof(*c->nvme_cmd);
>>  	c->sge[0].lkey = ndev->pd->local_dma_lkey;
>>
>> -	if (!admin) {
>> +	if (!admin && inline_data_size) {
>>  		c->inline_page = alloc_pages(GFP_KERNEL,
>> -				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
>> +				get_order(inline_data_size));
> Now we do higher order allocations here. This means that the allocation
> can fail if system memory is highly fragmented. And the allocations can no
> longer be satisfied from the per cpu caches. So allocation performance
> will drop.

Yes.

>>  		if (!c->inline_page)
>>  			goto out_unmap_cmd;
> Maybe think about some sort of fallback to vmalloc or so if this
> alloc fails?

The memory needs to be physically contiguous and will be mapped for DMA,
so vmalloc() won't work.?

I could complicate the design and allocate a scatter gather table for
this memory, and then register it into a single RDMA MR.?? That would
allow allocating non-contiguous pages.? But is that complication worth
it here?

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-30 16:46     ` Steve Wise
@ 2018-05-30 17:02       ` Christopher Lameter
  0 siblings, 0 replies; 39+ messages in thread
From: Christopher Lameter @ 2018-05-30 17:02 UTC (permalink / raw)


On Wed, 30 May 2018, Steve Wise wrote:

> > Now we do higher order allocations here. This means that the allocation
> > can fail if system memory is highly fragmented. And the allocations can no
> > longer be satisfied from the per cpu caches. So allocation performance
> > will drop.
>
> Yes.

Maybe do some test with fragmented memory?

> >>  		if (!c->inline_page)
> >>  			goto out_unmap_cmd;
> > Maybe think about some sort of fallback to vmalloc or so if this
> > alloc fails?
>
> The memory needs to be physically contiguous and will be mapped for DMA,
> so vmalloc() won't work.?
>
> I could complicate the design and allocate a scatter gather table for
> this memory, and then register it into a single RDMA MR.?? That would
> allow allocating non-contiguous pages.? But is that complication worth
> it here?

Not sure..... Just saw the higher alloc and I have seen what other
subsystems went through when dealing with larger order allocations

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-30 15:11       ` Steve Wise
@ 2018-05-30 21:37         ` Sagi Grimberg
  2018-05-31 17:02           ` hch
  0 siblings, 1 reply; 39+ messages in thread
From: Sagi Grimberg @ 2018-05-30 21:37 UTC (permalink / raw)



>>> I can see that the 2 and 20 are defined for specific things.  Since they are
>>> used in several places (in the next 2 patches), is there any chance these
>>> could be defined bits?
>>>
>>> Mike
>>>
>> That seems reasonable.? I would think these defines could also be shared
>> across the host and target.? Perhaps include/linux/nvme.h?? I see both
>> nvme/host/nvme.h and nvme/target/nvmet.h include linux/nvme.h.? So they
>> could go there.
>>
>> Christoph, what do you think?? It seems like these are either nvme
>> protocol bits or nvme/f protocol bits...
>>
>> Steve.
>>
>>
> 
> It looks like these bits are part of the Identify Controller Data
> Structure, figure 90 in the 1.2.1 spec.? The SGL Support (SGLS) field
> within that struct.?? So I guess it belongs in include/linux/nvme.h?
> But I don't see any of this big structure in that header.
> 

Yea, good idea, though please add the offset type sgl as well.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data
  2018-05-29 18:25 ` [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data Steve Wise
@ 2018-05-30 21:42   ` Sagi Grimberg
  2018-05-30 21:46     ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: Sagi Grimberg @ 2018-05-30 21:42 UTC (permalink / raw)




On 05/29/2018 09:25 PM, Steve Wise wrote:
> Allow up to 4 segments of inline data for NVMF WRITE operations. This
> reduces latency for small WRITEs by removing the need for the target to
> issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp.
> 
> Also cap the inline segments used based on the limitations of the
> device.
> 
> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> Reviewed-by: Christoph Hellwig <hch at lst.de>
> ---
>   drivers/nvme/host/rdma.c | 39 ++++++++++++++++++++++++++++-----------
>   1 file changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index f11faa8..32d2f4c 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -40,13 +40,14 @@
>   
>   #define NVME_RDMA_MAX_SEGMENTS		256
>   
> -#define NVME_RDMA_MAX_INLINE_SEGMENTS	1
> +#define NVME_RDMA_MAX_INLINE_SEGMENTS	4
>   
>   struct nvme_rdma_device {
>   	struct ib_device	*dev;
>   	struct ib_pd		*pd;
>   	struct kref		ref;
>   	struct list_head	entry;
> +	unsigned int		num_inline_segments;
>   };
>   
>   struct nvme_rdma_qe {
> @@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
>   	struct sockaddr_storage src_addr;
>   
>   	struct nvme_ctrl	ctrl;
> +	bool			use_inline_data;
>   };
>   
>   static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
> @@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
>   	/* +1 for drain */
>   	init_attr.cap.max_recv_wr = queue->queue_size + 1;
>   	init_attr.cap.max_recv_sge = 1;
> -	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
> +	init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
>   	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
>   	init_attr.qp_type = IB_QPT_RC;
>   	init_attr.send_cq = queue->ib_cq;
> @@ -374,6 +376,9 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
>   		goto out_free_pd;
>   	}
>   
> +	ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
> +					ndev->dev->attrs.max_sge - 1);
> +	pr_debug("num_inline_segments = %u\n", ndev->num_inline_segments);

insist on keeping it? ibv_devinfo -v can give this info to the 
user/developer.

>   	list_add(&ndev->entry, &device_list);
>   out_unlock:
>   	mutex_unlock(&device_list_mutex);
> @@ -1086,19 +1091,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
>   }
>   
>   static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
> -		struct nvme_rdma_request *req, struct nvme_command *c)
> +		struct nvme_rdma_request *req, struct nvme_command *c,
> +		int count)
>   {
>   	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
> +	struct scatterlist *sgl = req->sg_table.sgl;
> +	struct ib_sge *sge = &req->sge[1];
> +	u32 len = 0;
> +	int i;
>   
> -	req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
> -	req->sge[1].length = sg_dma_len(req->sg_table.sgl);
> -	req->sge[1].lkey = queue->device->pd->local_dma_lkey;
> +	for (i = 0; i < count; i++, sgl++, sge++) {
> +		sge->addr = sg_dma_address(sgl);
> +		sge->length = sg_dma_len(sgl);
> +		sge->lkey = queue->device->pd->local_dma_lkey;
> +		len += sge->length;
> +	}
>   
>   	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
> -	sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
> +	sg->length = cpu_to_le32(len);
>   	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
>   
> -	req->num_sge++;
> +	req->num_sge += count;
>   	return 0;
>   }
>   
> @@ -1191,13 +1204,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
>   		return -EIO;
>   	}
>   
> -	if (count == 1) {
> +	if (count <= dev->num_inline_segments) {
>   		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
> +		    queue->ctrl->use_inline_data &&
>   		    blk_rq_payload_bytes(rq) <=
>   				nvme_rdma_inline_data_size(queue))
> -			return nvme_rdma_map_sg_inline(queue, req, c);
> +			return nvme_rdma_map_sg_inline(queue, req, c, count);
>   
> -		if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
> +		if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
>   			return nvme_rdma_map_sg_single(queue, req, c);
>   	}
>   
> @@ -1955,6 +1969,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
>   		goto out_remove_admin_queue;
>   	}
>   
> +	if ((ctrl->ctrl.sgls & (1 << 20)))
> +		ctrl->use_inline_data = true;
> +

Here it is... discard my last comment.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-30 15:49   ` Christopher Lameter
  2018-05-30 16:46     ` Steve Wise
@ 2018-05-30 21:45     ` Sagi Grimberg
  2018-05-30 21:52       ` Steve Wise
  1 sibling, 1 reply; 39+ messages in thread
From: Sagi Grimberg @ 2018-05-30 21:45 UTC (permalink / raw)



>> @@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>>   	c->sge[0].length = sizeof(*c->nvme_cmd);
>>   	c->sge[0].lkey = ndev->pd->local_dma_lkey;
>>
>> -	if (!admin) {
>> +	if (!admin && inline_data_size) {
>>   		c->inline_page = alloc_pages(GFP_KERNEL,
>> -				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
>> +				get_order(inline_data_size));
> 
> Now we do higher order allocations here. This means that the allocation
> can fail if system memory is highly fragmented. And the allocations can no
> longer be satisfied from the per cpu caches. So allocation performance
> will drop.

That was my first thought as well. I'm not too keen on having
higher-order allocations on this, not at all. nvmet-rdma will
allocate a whole bunch of those. I think we should try to be
good citizens. I don't think its too complicated to do is it?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data
  2018-05-30 21:42   ` Sagi Grimberg
@ 2018-05-30 21:46     ` Steve Wise
  0 siblings, 0 replies; 39+ messages in thread
From: Steve Wise @ 2018-05-30 21:46 UTC (permalink / raw)


Hey Sagi,


On 5/30/2018 4:42 PM, Sagi Grimberg wrote:
>
>
> On 05/29/2018 09:25 PM, Steve Wise wrote:
>> Allow up to 4 segments of inline data for NVMF WRITE operations. This
>> reduces latency for small WRITEs by removing the need for the target to
>> issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp.
>>
>> Also cap the inline segments used based on the limitations of the
>> device.
>>
>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>> Reviewed-by: Christoph Hellwig <hch at lst.de>
>> ---
>> ? drivers/nvme/host/rdma.c | 39 ++++++++++++++++++++++++++++-----------
>> ? 1 file changed, 28 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index f11faa8..32d2f4c 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -40,13 +40,14 @@
>> ? ? #define NVME_RDMA_MAX_SEGMENTS??????? 256
>> ? -#define NVME_RDMA_MAX_INLINE_SEGMENTS??? 1
>> +#define NVME_RDMA_MAX_INLINE_SEGMENTS??? 4
>> ? ? struct nvme_rdma_device {
>> ????? struct ib_device??? *dev;
>> ????? struct ib_pd??????? *pd;
>> ????? struct kref??????? ref;
>> ????? struct list_head??? entry;
>> +??? unsigned int??????? num_inline_segments;
>> ? };
>> ? ? struct nvme_rdma_qe {
>> @@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
>> ????? struct sockaddr_storage src_addr;
>> ? ????? struct nvme_ctrl??? ctrl;
>> +??? bool??????????? use_inline_data;
>> ? };
>> ? ? static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct
>> nvme_ctrl *ctrl)
>> @@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct
>> nvme_rdma_queue *queue, const int factor)
>> ????? /* +1 for drain */
>> ????? init_attr.cap.max_recv_wr = queue->queue_size + 1;
>> ????? init_attr.cap.max_recv_sge = 1;
>> -??? init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
>> +??? init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
>> ????? init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
>> ????? init_attr.qp_type = IB_QPT_RC;
>> ????? init_attr.send_cq = queue->ib_cq;
>> @@ -374,6 +376,9 @@ static int nvme_rdma_dev_get(struct
>> nvme_rdma_device *dev)
>> ????????? goto out_free_pd;
>> ????? }
>> ? +??? ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
>> +??????????????????? ndev->dev->attrs.max_sge - 1);
>> +??? pr_debug("num_inline_segments = %u\n", ndev->num_inline_segments);
>
> insist on keeping it? ibv_devinfo -v can give this info to the
> user/developer.
>

I agree.? I'll remove it.?

>> ????? list_add(&ndev->entry, &device_list);
>> ? out_unlock:
>> ????? mutex_unlock(&device_list_mutex);
>> @@ -1086,19 +1091,27 @@ static int nvme_rdma_set_sg_null(struct
>> nvme_command *c)
>> ? }
>> ? ? static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
>> -??????? struct nvme_rdma_request *req, struct nvme_command *c)
>> +??????? struct nvme_rdma_request *req, struct nvme_command *c,
>> +??????? int count)
>> ? {
>> ????? struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
>> +??? struct scatterlist *sgl = req->sg_table.sgl;
>> +??? struct ib_sge *sge = &req->sge[1];
>> +??? u32 len = 0;
>> +??? int i;
>> ? -??? req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
>> -??? req->sge[1].length = sg_dma_len(req->sg_table.sgl);
>> -??? req->sge[1].lkey = queue->device->pd->local_dma_lkey;
>> +??? for (i = 0; i < count; i++, sgl++, sge++) {
>> +??????? sge->addr = sg_dma_address(sgl);
>> +??????? sge->length = sg_dma_len(sgl);
>> +??????? sge->lkey = queue->device->pd->local_dma_lkey;
>> +??????? len += sge->length;
>> +??? }
>> ? ????? sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
>> -??? sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
>> +??? sg->length = cpu_to_le32(len);
>> ????? sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
>> ? -??? req->num_sge++;
>> +??? req->num_sge += count;
>> ????? return 0;
>> ? }
>> ? @@ -1191,13 +1204,14 @@ static int nvme_rdma_map_data(struct
>> nvme_rdma_queue *queue,
>> ????????? return -EIO;
>> ????? }
>> ? -??? if (count == 1) {
>> +??? if (count <= dev->num_inline_segments) {
>> ????????? if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
>> +??????????? queue->ctrl->use_inline_data &&
>> ????????????? blk_rq_payload_bytes(rq) <=
>> ????????????????? nvme_rdma_inline_data_size(queue))
>> -??????????? return nvme_rdma_map_sg_inline(queue, req, c);
>> +??????????? return nvme_rdma_map_sg_inline(queue, req, c, count);
>> ? -??????? if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
>> +??????? if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
>> ????????????? return nvme_rdma_map_sg_single(queue, req, c);
>> ????? }
>> ? @@ -1955,6 +1969,9 @@ static struct nvme_ctrl
>> *nvme_rdma_create_ctrl(struct device *dev,
>> ????????? goto out_remove_admin_queue;
>> ????? }
>> ? +??? if ((ctrl->ctrl.sgls & (1 << 20)))
>> +??????? ctrl->use_inline_data = true;
>> +
>
> Here it is... discard my last comment.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-30 21:45     ` Sagi Grimberg
@ 2018-05-30 21:52       ` Steve Wise
  2018-05-30 22:13         ` Sagi Grimberg
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-30 21:52 UTC (permalink / raw)




On 5/30/2018 4:45 PM, Sagi Grimberg wrote:
>
>>> @@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct
>>> nvmet_rdma_device *ndev,
>>> ????? c->sge[0].length = sizeof(*c->nvme_cmd);
>>> ????? c->sge[0].lkey = ndev->pd->local_dma_lkey;
>>>
>>> -??? if (!admin) {
>>> +??? if (!admin && inline_data_size) {
>>> ????????? c->inline_page = alloc_pages(GFP_KERNEL,
>>> -??????????????? get_order(NVMET_RDMA_INLINE_DATA_SIZE));
>>> +??????????????? get_order(inline_data_size));
>>
>> Now we do higher order allocations here. This means that the allocation
>> can fail if system memory is highly fragmented. And the allocations
>> can no
>> longer be satisfied from the per cpu caches. So allocation performance
>> will drop.
>
> That was my first thought as well. I'm not too keen on having
> higher-order allocations on this, not at all. nvmet-rdma will
> allocate a whole bunch of those. I think we should try to be
> good citizens. I don't think its too complicated to do is it?

Its ugly because registering an MR for the inline pages requires a
connected QP to use the REG_MR WR.? Maybe I'll just split the needed
pages across the remaining recv sge entries available?? IE if the device
supports 5 recv sges, then 4 can be used for inline, and thus 4
non-contiguous pages could be used.? cxgb4, however, only support 4 recv
sges, so it would only support 12K of inline with this implementation.?
And perhaps there are rdma devices with even fewer recv sges?

Do you have any other idea on how to avoid higher-order allocations?

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-30 21:52       ` Steve Wise
@ 2018-05-30 22:13         ` Sagi Grimberg
  2018-05-30 22:26           ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: Sagi Grimberg @ 2018-05-30 22:13 UTC (permalink / raw)



> Its ugly because registering an MR for the inline pages requires a
> connected QP to use the REG_MR WR.

No, why MR?

> Maybe I'll just split the needed
> pages across the remaining recv sge entries available?? IE if the device
> supports 5 recv sges, then 4 can be used for inline, and thus 4
> non-contiguous pages could be used.

That's what I had in mind...

> cxgb4, however, only support 4 recv
> sges, so it would only support 12K of inline with this implementation.

:(

> And perhaps there are rdma devices with even fewer recv sges?
> 
> Do you have any other idea on how to avoid higher-order allocations?

Not really. I guess that we should be able to recv the user
inline_data_size. So maybe it would be ok if the we'd do that only if
we must?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-30 22:13         ` Sagi Grimberg
@ 2018-05-30 22:26           ` Steve Wise
  0 siblings, 0 replies; 39+ messages in thread
From: Steve Wise @ 2018-05-30 22:26 UTC (permalink / raw)




On 5/30/2018 5:13 PM, Sagi Grimberg wrote:
>
>> Its ugly because registering an MR for the inline pages requires a
>> connected QP to use the REG_MR WR.
>
> No, why MR?
>
>> Maybe I'll just split the needed
>> pages across the remaining recv sge entries available?? IE if the device
>> supports 5 recv sges, then 4 can be used for inline, and thus 4
>> non-contiguous pages could be used.
>
> That's what I had in mind...
>
>> cxgb4, however, only support 4 recv
>> sges, so it would only support 12K of inline with this implementation.
>
> :(
>
>> And perhaps there are rdma devices with even fewer recv sges?
>>
>> Do you have any other idea on how to avoid higher-order allocations?
>
> Not really. I guess that we should be able to recv the user
> inline_data_size. So maybe it would be ok if the we'd do that only if
> we must?

I'll explore this idea.?? I also thought about just reducing the inline
data size based on the number of sges available.? But I think the inline
data size has already been advertised to the host side at the time the
target is allocating these pages. :(

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-29 20:23   ` Ruhl, Michael J
  2018-05-30 14:39     ` Steve Wise
@ 2018-05-31 17:00     ` hch
  1 sibling, 0 replies; 39+ messages in thread
From: hch @ 2018-05-31 17:00 UTC (permalink / raw)


On Tue, May 29, 2018@08:23:09PM +0000, Ruhl, Michael J wrote:
> I can see that the 2 and 20 are defined for specific things.  Since they are
> used in several places (in the next 2 patches), is there any chance these
> could be defined bits?

So far our rule of thumb was to use defines where the NVMe spec
defines symbolic names, and use plain bit numbers where the spec
does the same.  The SGLS field falls into the latter category.

If we end up with a lot of users a little helper function might be
nice, but I don't think we aren't there yet.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-30 21:37         ` Sagi Grimberg
@ 2018-05-31 17:02           ` hch
  2018-05-31 17:17             ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: hch @ 2018-05-31 17:02 UTC (permalink / raw)


On Thu, May 31, 2018@12:37:33AM +0300, Sagi Grimberg wrote:
>> It looks like these bits are part of the Identify Controller Data
>> Structure, figure 90 in the 1.2.1 spec.? The SGL Support (SGLS) field
>> within that struct.?? So I guess it belongs in include/linux/nvme.h?
>> But I don't see any of this big structure in that header.
>>
>
> Yea, good idea, though please add the offset type sgl as well.

Honestly, I'd rather keep defines for every little bit without
a name out of the headers.  The bit numbers will show up right
there when looking for the field in the nvme spec, while making
up our own names will just create confusion.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-31 17:02           ` hch
@ 2018-05-31 17:17             ` Steve Wise
  2018-05-31 17:25               ` hch
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-05-31 17:17 UTC (permalink / raw)




On 5/31/2018 12:02 PM, hch@lst.de wrote:
> On Thu, May 31, 2018@12:37:33AM +0300, Sagi Grimberg wrote:
>>> It looks like these bits are part of the Identify Controller Data
>>> Structure, figure 90 in the 1.2.1 spec.? The SGL Support (SGLS) field
>>> within that struct.?? So I guess it belongs in include/linux/nvme.h?
>>> But I don't see any of this big structure in that header.
>>>
>> Yea, good idea, though please add the offset type sgl as well.
> Honestly, I'd rather keep defines for every little bit without
> a name out of the headers.  The bit numbers will show up right
> there when looking for the field in the nvme spec, while making
> up our own names will just create confusion.
>

Ok then, I will leave them as bit numbers...

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-31 17:17             ` Steve Wise
@ 2018-05-31 17:25               ` hch
  2018-06-01 13:08                 ` Steve Wise
  2018-06-03 11:57                 ` Sagi Grimberg
  0 siblings, 2 replies; 39+ messages in thread
From: hch @ 2018-05-31 17:25 UTC (permalink / raw)


On Thu, May 31, 2018@12:17:07PM -0500, Steve Wise wrote:
> Ok then, I will leave them as bit numbers...

Btw, modulo the debug printk 1 & 2 look fine to me and I'd like to
pull them in.  Sagi, are you ok with the patches?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-31 17:25               ` hch
@ 2018-06-01 13:08                 ` Steve Wise
  2018-06-03 11:57                 ` Sagi Grimberg
  1 sibling, 0 replies; 39+ messages in thread
From: Steve Wise @ 2018-06-01 13:08 UTC (permalink / raw)




> -----Original Message-----
> From: Linux-nvme <linux-nvme-bounces at lists.infradead.org> On Behalf Of
> hch at lst.de
> Sent: Thursday, May 31, 2018 12:26 PM
> To: Steve Wise <swise at opengridcomputing.com>
> Cc: axboe at kernel.dk; Sagi Grimberg <sagi at grimberg.me>; linux-
> rdma at vger.kernel.org; linux-nvme at lists.infradead.org; Busch, Keith
> <keith.busch at intel.com>; Ruhl, Michael J <michael.j.ruhl at intel.com>;
> maxg at mellanox.com; hch at lst.de; parav at mellanox.com
> Subject: Re: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed
sgl
> support
> 
> On Thu, May 31, 2018@12:17:07PM -0500, Steve Wise wrote:
> > Ok then, I will leave them as bit numbers...
> 
> Btw, modulo the debug printk 1 & 2 look fine to me and I'd like to
> pull them in.  Sagi, are you ok with the patches?
> 

If you do, I'll drop them from my series and repost the target side soon
with the changes to only allocate single pages.

Thanks

Steve

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-05-29 18:25 ` [PATCH v3 3/3] nvmet-rdma: support 16K " Steve Wise
  2018-05-30 15:49   ` Christopher Lameter
@ 2018-06-03  8:39   ` Max Gurtovoy
  2018-06-03 18:25     ` Steve Wise
  1 sibling, 1 reply; 39+ messages in thread
From: Max Gurtovoy @ 2018-06-03  8:39 UTC (permalink / raw)




On 5/29/2018 9:25 PM, Steve Wise wrote:
> Add a new configfs port attribute, called inline_data_size,
> to allow configuring the size of inline data for a given port.
> The maximum size allowed is still enforced by nvmet-rdma with
> NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to max(16KB,
> PAGE_SIZE).  And the default size, if not specified via configfs,
> is still PAGE_SIZE.  This preserves the existing behavior, but allows
> larger inline sizes.
> 
> Also support a configuration where inline_data_size is 0, which disables
> using inline data.
> 
> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> ---
>   drivers/nvme/target/admin-cmd.c |  4 ++--
>   drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
>   drivers/nvme/target/core.c      |  4 ++++
>   drivers/nvme/target/discovery.c |  2 +-
>   drivers/nvme/target/nvmet.h     |  2 +-
>   drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-------------
>   6 files changed, 70 insertions(+), 18 deletions(-)

snip..

> 
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 52e0c5d..2f0b08e 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -33,9 +33,10 @@
>   #include "nvmet.h"
>   
>   /*
> - * We allow up to a page of inline data to go with the SQE
> + * We allow at least 1 page, and up to 16KB of inline data to go with the SQE
>    */
> -#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)

why not use SZ_16K ? why we need to mention the PAGE_SIZE ?

>   
>   struct nvmet_rdma_cmd {
>   	struct ib_sge		sge[2];
> @@ -116,6 +117,7 @@ struct nvmet_rdma_device {
>   	size_t			srq_size;
>   	struct kref		ref;
>   	struct list_head	entry;
> +	int			inline_data_size;
>   };
>   
>   static bool nvmet_rdma_use_srq;
> @@ -187,6 +189,8 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
>   static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>   			struct nvmet_rdma_cmd *c, bool admin)
>   {
> +	int inline_data_size = ndev->inline_data_size;
> +
>   	/* NVMe command / RDMA RECV */
>   	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
>   	if (!c->nvme_cmd)
> @@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>   	c->sge[0].length = sizeof(*c->nvme_cmd);
>   	c->sge[0].lkey = ndev->pd->local_dma_lkey;
>   
> -	if (!admin) {
> +	if (!admin && inline_data_size) {
>   		c->inline_page = alloc_pages(GFP_KERNEL,
> -				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +				get_order(inline_data_size));
>   		if (!c->inline_page)
>   			goto out_unmap_cmd;
>   		c->sge[1].addr = ib_dma_map_page(ndev->device,
> -				c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
> +				c->inline_page, 0, inline_data_size,
>   				DMA_FROM_DEVICE);
>   		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
>   			goto out_free_inline_page;
> -		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
> +		c->sge[1].length = inline_data_size;
>   		c->sge[1].lkey = ndev->pd->local_dma_lkey;
>   	}
>   
> @@ -225,7 +229,7 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>   out_free_inline_page:
>   	if (!admin) {
>   		__free_pages(c->inline_page,
> -				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +				get_order(inline_data_size));
>   	}
>   out_unmap_cmd:
>   	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
> @@ -240,11 +244,13 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
>   static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
>   		struct nvmet_rdma_cmd *c, bool admin)
>   {
> -	if (!admin) {
> +	int inline_data_size = ndev->inline_data_size;
> +
> +	if (!admin && inline_data_size) {
>   		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
> -				NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
> +				inline_data_size, DMA_FROM_DEVICE);
>   		__free_pages(c->inline_page,
> -				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +				get_order(inline_data_size));
>   	}
>   	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
>   				sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
> @@ -544,7 +550,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
>   	if (!nvme_is_write(rsp->req.cmd))
>   		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
>   
> -	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
> +	if (off + len > rsp->queue->dev->inline_data_size) {
>   		pr_err("invalid inline data offset!\n");
>   		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
>   	}
> @@ -793,6 +799,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
>   static struct nvmet_rdma_device *
>   nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
>   {
> +	struct nvmet_port *port = cm_id->context;
>   	struct nvmet_rdma_device *ndev;
>   	int ret;
>   
> @@ -807,6 +814,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
>   	if (!ndev)
>   		goto out_err;
>   
> +	ndev->inline_data_size = port->inline_data_size;
>   	ndev->device = cm_id->device;
>   	kref_init(&ndev->ref);
>   
> @@ -1379,6 +1387,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
>   		return -EINVAL;
>   	}
>   
> +	if (port->inline_data_size < 0) {
> +		port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
> +	} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
> +		pr_err("invalid inline_data_size %d (max supported is %u)\n",
> +			port->inline_data_size,
> +			NVMET_RDMA_MAX_INLINE_DATA_SIZE);
> +		return -EINVAL;
> +	}
> +
>   	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
>   			port->disc_addr.trsvcid, &addr);
>   	if (ret) {
> @@ -1418,8 +1435,9 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
>   		goto out_destroy_id;
>   	}
>   
> -	pr_info("enabling port %d (%pISpcs)\n",
> -		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
> +	pr_info("enabling port %d (%pISpcs) inline_data_size %d\n",
> +		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr,
> +		port->inline_data_size);
>   	port->priv = cm_id;
>   	return 0;
>   
> @@ -1456,7 +1474,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
>   static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
>   	.owner			= THIS_MODULE,
>   	.type			= NVMF_TRTYPE_RDMA,
> -	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
>   	.msdbd			= 1,
>   	.has_keyed_sgls		= 1,
>   	.add_port		= nvmet_rdma_add_port,
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-05-31 17:25               ` hch
  2018-06-01 13:08                 ` Steve Wise
@ 2018-06-03 11:57                 ` Sagi Grimberg
  2018-06-03 18:27                   ` Steve Wise
  1 sibling, 1 reply; 39+ messages in thread
From: Sagi Grimberg @ 2018-06-03 11:57 UTC (permalink / raw)



>> Ok then, I will leave them as bit numbers...
> 
> Btw, modulo the debug printk 1 & 2 look fine to me and I'd like to
> pull them in.  Sagi, are you ok with the patches?

I'd like to see a version that avoids higher order allocations first
as we can have a non negligible number of these allocations.
If it turns out too much of hassle we can stay with this.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-06-03  8:39   ` Max Gurtovoy
@ 2018-06-03 18:25     ` Steve Wise
  2018-06-04 13:58       ` Max Gurtovoy
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-06-03 18:25 UTC (permalink / raw)




> -----Original Message-----
> From: Max Gurtovoy <maxg at mellanox.com>
> Sent: Sunday, June 3, 2018 3:40 AM
> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
> nvme at lists.infradead.org
> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
> 
> 
> 
> On 5/29/2018 9:25 PM, Steve Wise wrote:
> > Add a new configfs port attribute, called inline_data_size,
> > to allow configuring the size of inline data for a given port.
> > The maximum size allowed is still enforced by nvmet-rdma with
> > NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to max(16KB,
> > PAGE_SIZE).  And the default size, if not specified via configfs,
> > is still PAGE_SIZE.  This preserves the existing behavior, but allows
> > larger inline sizes.
> >
> > Also support a configuration where inline_data_size is 0, which disables
> > using inline data.
> >
> > Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> > ---
> >   drivers/nvme/target/admin-cmd.c |  4 ++--
> >   drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
> >   drivers/nvme/target/core.c      |  4 ++++
> >   drivers/nvme/target/discovery.c |  2 +-
> >   drivers/nvme/target/nvmet.h     |  2 +-
> >   drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-----
> --------
> >   6 files changed, 70 insertions(+), 18 deletions(-)
> 
> snip..
> 
> >
> > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> > index 52e0c5d..2f0b08e 100644
> > --- a/drivers/nvme/target/rdma.c
> > +++ b/drivers/nvme/target/rdma.c
> > @@ -33,9 +33,10 @@
> >   #include "nvmet.h"
> >
> >   /*
> > - * We allow up to a page of inline data to go with the SQE
> > + * We allow at least 1 page, and up to 16KB of inline data to go with
the
> SQE
> >    */
> > -#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
> > +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> > +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int,
> SZ_16K, PAGE_SIZE)
> 
> why not use SZ_16K ? why we need to mention the PAGE_SIZE ?
>

The idea is to allow at least 1 page.  So for, say, a 64K page system, we'll
allow 64K since we're allocating a page minimum for the buffer.

 
> >
> >   struct nvmet_rdma_cmd {
> >   	struct ib_sge		sge[2];
> > @@ -116,6 +117,7 @@ struct nvmet_rdma_device {
> >   	size_t			srq_size;
> >   	struct kref		ref;
> >   	struct list_head	entry;
> > +	int			inline_data_size;
> >   };
> >
> >   static bool nvmet_rdma_use_srq;
> > @@ -187,6 +189,8 @@ static inline bool
> nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
> >   static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
> >   			struct nvmet_rdma_cmd *c, bool admin)
> >   {
> > +	int inline_data_size = ndev->inline_data_size;
> > +
> >   	/* NVMe command / RDMA RECV */
> >   	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
> >   	if (!c->nvme_cmd)
> > @@ -200,17 +204,17 @@ static int nvmet_rdma_alloc_cmd(struct
> nvmet_rdma_device *ndev,
> >   	c->sge[0].length = sizeof(*c->nvme_cmd);
> >   	c->sge[0].lkey = ndev->pd->local_dma_lkey;
> >
> > -	if (!admin) {
> > +	if (!admin && inline_data_size) {
> >   		c->inline_page = alloc_pages(GFP_KERNEL,
> > -
> 	get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> > +				get_order(inline_data_size));
> >   		if (!c->inline_page)
> >   			goto out_unmap_cmd;
> >   		c->sge[1].addr = ib_dma_map_page(ndev->device,
> > -				c->inline_page, 0,
> NVMET_RDMA_INLINE_DATA_SIZE,
> > +				c->inline_page, 0, inline_data_size,
> >   				DMA_FROM_DEVICE);
> >   		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
> >   			goto out_free_inline_page;
> > -		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
> > +		c->sge[1].length = inline_data_size;
> >   		c->sge[1].lkey = ndev->pd->local_dma_lkey;
> >   	}
> >
> > @@ -225,7 +229,7 @@ static int nvmet_rdma_alloc_cmd(struct
> nvmet_rdma_device *ndev,
> >   out_free_inline_page:
> >   	if (!admin) {
> >   		__free_pages(c->inline_page,
> > -
> 	get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> > +				get_order(inline_data_size));
> >   	}
> >   out_unmap_cmd:
> >   	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
> > @@ -240,11 +244,13 @@ static int nvmet_rdma_alloc_cmd(struct
> nvmet_rdma_device *ndev,
> >   static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
> >   		struct nvmet_rdma_cmd *c, bool admin)
> >   {
> > -	if (!admin) {
> > +	int inline_data_size = ndev->inline_data_size;
> > +
> > +	if (!admin && inline_data_size) {
> >   		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
> > -				NVMET_RDMA_INLINE_DATA_SIZE,
> DMA_FROM_DEVICE);
> > +				inline_data_size, DMA_FROM_DEVICE);
> >   		__free_pages(c->inline_page,
> > -
> 	get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> > +				get_order(inline_data_size));
> >   	}
> >   	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
> >   				sizeof(*c->nvme_cmd),
> DMA_FROM_DEVICE);
> > @@ -544,7 +550,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct
> nvmet_rdma_rsp *rsp)
> >   	if (!nvme_is_write(rsp->req.cmd))
> >   		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> >
> > -	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
> > +	if (off + len > rsp->queue->dev->inline_data_size) {
> >   		pr_err("invalid inline data offset!\n");
> >   		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
> >   	}
> > @@ -793,6 +799,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
> >   static struct nvmet_rdma_device *
> >   nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
> >   {
> > +	struct nvmet_port *port = cm_id->context;
> >   	struct nvmet_rdma_device *ndev;
> >   	int ret;
> >
> > @@ -807,6 +814,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
> >   	if (!ndev)
> >   		goto out_err;
> >
> > +	ndev->inline_data_size = port->inline_data_size;
> >   	ndev->device = cm_id->device;
> >   	kref_init(&ndev->ref);
> >
> > @@ -1379,6 +1387,15 @@ static int nvmet_rdma_add_port(struct
> nvmet_port *port)
> >   		return -EINVAL;
> >   	}
> >
> > +	if (port->inline_data_size < 0) {
> > +		port->inline_data_size =
> NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
> > +	} else if (port->inline_data_size >
> NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
> > +		pr_err("invalid inline_data_size %d (max supported is
> %u)\n",
> > +			port->inline_data_size,
> > +			NVMET_RDMA_MAX_INLINE_DATA_SIZE);
> > +		return -EINVAL;
> > +	}
> > +
> >   	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
> >   			port->disc_addr.trsvcid, &addr);
> >   	if (ret) {
> > @@ -1418,8 +1435,9 @@ static int nvmet_rdma_add_port(struct
> nvmet_port *port)
> >   		goto out_destroy_id;
> >   	}
> >
> > -	pr_info("enabling port %d (%pISpcs)\n",
> > -		le16_to_cpu(port->disc_addr.portid), (struct sockaddr
> *)&addr);
> > +	pr_info("enabling port %d (%pISpcs) inline_data_size %d\n",
> > +		le16_to_cpu(port->disc_addr.portid), (struct sockaddr
> *)&addr,
> > +		port->inline_data_size);
> >   	port->priv = cm_id;
> >   	return 0;
> >
> > @@ -1456,7 +1474,6 @@ static void nvmet_rdma_disc_port_addr(struct
> nvmet_req *req,
> >   static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
> >   	.owner			= THIS_MODULE,
> >   	.type			= NVMF_TRTYPE_RDMA,
> > -	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
> >   	.msdbd			= 1,
> >   	.has_keyed_sgls		= 1,
> >   	.add_port		= nvmet_rdma_add_port,
> >

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-03 11:57                 ` Sagi Grimberg
@ 2018-06-03 18:27                   ` Steve Wise
  2018-06-04 12:01                     ` Sagi Grimberg
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-06-03 18:27 UTC (permalink / raw)




> -----Original Message-----
> From: Sagi Grimberg <sagi at grimberg.me>
> Sent: Sunday, June 3, 2018 6:57 AM
> To: hch at lst.de; Steve Wise <swise at opengridcomputing.com>
> Cc: Ruhl, Michael J <michael.j.ruhl at intel.com>; axboe at kernel.dk; Busch,
> Keith <keith.busch at intel.com>; linux-nvme at lists.infradead.org;
> parav at mellanox.com; maxg at mellanox.com; linux-rdma at vger.kernel.org
> Subject: Re: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl
> support
> 
> 
> >> Ok then, I will leave them as bit numbers...
> >
> > Btw, modulo the debug printk 1 & 2 look fine to me and I'd like to
> > pull them in.  Sagi, are you ok with the patches?
> 
> I'd like to see a version that avoids higher order allocations first
> as we can have a non negligible number of these allocations.
> If it turns out too much of hassle we can stay with this.

He's referring to patch 1 and 2, which are the host side.  No page allocations. 

Also, I made the changes to avoid higher order allocations for patch 3.  Testing now.   I'll send it out tomorrow.

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-03 18:27                   ` Steve Wise
@ 2018-06-04 12:01                     ` Sagi Grimberg
  2018-06-04 12:11                       ` Christoph Hellwig
  0 siblings, 1 reply; 39+ messages in thread
From: Sagi Grimberg @ 2018-06-04 12:01 UTC (permalink / raw)



> He's referring to patch 1 and 2, which are the host side.  No page allocations.

I'm good with 1 & 2,

Christoph, you can add my

Reviewed-by: Sagi Grimberg <sagi at grimberg.me>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 12:01                     ` Sagi Grimberg
@ 2018-06-04 12:11                       ` Christoph Hellwig
  2018-06-04 12:17                         ` Steve Wise
  2018-06-04 13:52                         ` Max Gurtovoy
  0 siblings, 2 replies; 39+ messages in thread
From: Christoph Hellwig @ 2018-06-04 12:11 UTC (permalink / raw)


On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
>
>> He's referring to patch 1 and 2, which are the host side.  No page allocations.
>
> I'm good with 1 & 2,
>
> Christoph, you can add my
>
> Reviewed-by: Sagi Grimberg <sagi at grimberg.me>

We've missed the merge window now, so we can just wait for a proper
resend from Steve I think.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 12:11                       ` Christoph Hellwig
@ 2018-06-04 12:17                         ` Steve Wise
  2018-06-04 13:52                         ` Max Gurtovoy
  1 sibling, 0 replies; 39+ messages in thread
From: Steve Wise @ 2018-06-04 12:17 UTC (permalink / raw)


> On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
> >
> >> He's referring to patch 1 and 2, which are the host side.  No page
> allocations.
> >
> > I'm good with 1 & 2,
> >
> > Christoph, you can add my
> >
> > Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
> 
> We've missed the merge window now, so we can just wait for a proper
> resend from Steve I think.

Ok I'll be sending a new version soon.

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 12:11                       ` Christoph Hellwig
  2018-06-04 12:17                         ` Steve Wise
@ 2018-06-04 13:52                         ` Max Gurtovoy
  2018-06-04 14:21                           ` Steve Wise
  1 sibling, 1 reply; 39+ messages in thread
From: Max Gurtovoy @ 2018-06-04 13:52 UTC (permalink / raw)




On 6/4/2018 3:11 PM, Christoph Hellwig wrote:
> On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
>>
>>> He's referring to patch 1 and 2, which are the host side.  No page allocations.
>>
>> I'm good with 1 & 2,
>>
>> Christoph, you can add my
>>
>> Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
> 
> We've missed the merge window now, so we can just wait for a proper
> resend from Steve I think.
> 

There are still issue that I'm trying to help Steve with their debug so 
let's wait with the merge until we figure them out.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-06-03 18:25     ` Steve Wise
@ 2018-06-04 13:58       ` Max Gurtovoy
  2018-06-04 14:18         ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: Max Gurtovoy @ 2018-06-04 13:58 UTC (permalink / raw)




On 6/3/2018 9:25 PM, Steve Wise wrote:
> 
> 
>> -----Original Message-----
>> From: Max Gurtovoy <maxg at mellanox.com>
>> Sent: Sunday, June 3, 2018 3:40 AM
>> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
>> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
>> nvme at lists.infradead.org
>> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
>> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
>>
>>
>>
>> On 5/29/2018 9:25 PM, Steve Wise wrote:
>>> Add a new configfs port attribute, called inline_data_size,
>>> to allow configuring the size of inline data for a given port.
>>> The maximum size allowed is still enforced by nvmet-rdma with
>>> NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to max(16KB,
>>> PAGE_SIZE).  And the default size, if not specified via configfs,
>>> is still PAGE_SIZE.  This preserves the existing behavior, but allows
>>> larger inline sizes.
>>>
>>> Also support a configuration where inline_data_size is 0, which disables
>>> using inline data.
>>>
>>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>>> ---
>>>    drivers/nvme/target/admin-cmd.c |  4 ++--
>>>    drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
>>>    drivers/nvme/target/core.c      |  4 ++++
>>>    drivers/nvme/target/discovery.c |  2 +-
>>>    drivers/nvme/target/nvmet.h     |  2 +-
>>>    drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-----
>> --------
>>>    6 files changed, 70 insertions(+), 18 deletions(-)
>>
>> snip..
>>
>>>
>>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
>>> index 52e0c5d..2f0b08e 100644
>>> --- a/drivers/nvme/target/rdma.c
>>> +++ b/drivers/nvme/target/rdma.c
>>> @@ -33,9 +33,10 @@
>>>    #include "nvmet.h"
>>>
>>>    /*
>>> - * We allow up to a page of inline data to go with the SQE
>>> + * We allow at least 1 page, and up to 16KB of inline data to go with
> the
>> SQE
>>>     */
>>> -#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
>>> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
>>> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int,
>> SZ_16K, PAGE_SIZE)
>>
>> why not use SZ_16K ? why we need to mention the PAGE_SIZE ?
>>
> 
> The idea is to allow at least 1 page.  So for, say, a 64K page system, we'll
> allow 64K since we're allocating a page minimum for the buffer.

IMO you want to support upto 16K inline data and not upto 64k (also in 
PowerPC system)...

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-06-04 13:58       ` Max Gurtovoy
@ 2018-06-04 14:18         ` Steve Wise
  2018-06-05  8:52           ` Max Gurtovoy
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-06-04 14:18 UTC (permalink / raw)




> -----Original Message-----
> From: Max Gurtovoy <maxg at mellanox.com>
> Sent: Monday, June 4, 2018 8:58 AM
> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
> nvme at lists.infradead.org
> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
> 
> 
> 
> On 6/3/2018 9:25 PM, Steve Wise wrote:
> >
> >
> >> -----Original Message-----
> >> From: Max Gurtovoy <maxg at mellanox.com>
> >> Sent: Sunday, June 3, 2018 3:40 AM
> >> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
> >> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
> >> nvme at lists.infradead.org
> >> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
> >> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
> >>
> >>
> >>
> >> On 5/29/2018 9:25 PM, Steve Wise wrote:
> >>> Add a new configfs port attribute, called inline_data_size,
> >>> to allow configuring the size of inline data for a given port.
> >>> The maximum size allowed is still enforced by nvmet-rdma with
> >>> NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to
> max(16KB,
> >>> PAGE_SIZE).  And the default size, if not specified via configfs,
> >>> is still PAGE_SIZE.  This preserves the existing behavior, but allows
> >>> larger inline sizes.
> >>>
> >>> Also support a configuration where inline_data_size is 0, which
disables
> >>> using inline data.
> >>>
> >>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> >>> ---
> >>>    drivers/nvme/target/admin-cmd.c |  4 ++--
> >>>    drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
> >>>    drivers/nvme/target/core.c      |  4 ++++
> >>>    drivers/nvme/target/discovery.c |  2 +-
> >>>    drivers/nvme/target/nvmet.h     |  2 +-
> >>>    drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-
> ----
> >> --------
> >>>    6 files changed, 70 insertions(+), 18 deletions(-)
> >>
> >> snip..
> >>
> >>>
> >>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> >>> index 52e0c5d..2f0b08e 100644
> >>> --- a/drivers/nvme/target/rdma.c
> >>> +++ b/drivers/nvme/target/rdma.c
> >>> @@ -33,9 +33,10 @@
> >>>    #include "nvmet.h"
> >>>
> >>>    /*
> >>> - * We allow up to a page of inline data to go with the SQE
> >>> + * We allow at least 1 page, and up to 16KB of inline data to go with
> > the
> >> SQE
> >>>     */
> >>> -#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
> >>> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> >>> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int,
> >> SZ_16K, PAGE_SIZE)
> >>
> >> why not use SZ_16K ? why we need to mention the PAGE_SIZE ?
> >>
> >
> > The idea is to allow at least 1 page.  So for, say, a 64K page system,
we'll
> > allow 64K since we're allocating a page minimum for the buffer.
> 
> IMO you want to support upto 16K inline data and not upto 64k (also in
> PowerPC system)...

Why?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 13:52                         ` Max Gurtovoy
@ 2018-06-04 14:21                           ` Steve Wise
  2018-06-04 14:29                             ` Max Gurtovoy
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-06-04 14:21 UTC (permalink / raw)




> -----Original Message-----
> From: Max Gurtovoy <maxg at mellanox.com>
> Sent: Monday, June 4, 2018 8:52 AM
> To: Christoph Hellwig <hch at lst.de>; Sagi Grimberg <sagi at grimberg.me>
> Cc: Steve Wise <swise at opengridcomputing.com>; 'Ruhl, Michael J'
> <michael.j.ruhl at intel.com>; axboe at kernel.dk; 'Busch, Keith'
> <keith.busch at intel.com>; linux-nvme at lists.infradead.org;
> parav at mellanox.com; linux-rdma at vger.kernel.org
> Subject: Re: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed
sgl
> support
> 
> 
> 
> On 6/4/2018 3:11 PM, Christoph Hellwig wrote:
> > On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
> >>
> >>> He's referring to patch 1 and 2, which are the host side.  No page
> allocations.
> >>
> >> I'm good with 1 & 2,
> >>
> >> Christoph, you can add my
> >>
> >> Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
> >
> > We've missed the merge window now, so we can just wait for a proper
> > resend from Steve I think.
> >
> 
> There are still issue that I'm trying to help Steve with their debug so
> let's wait with the merge until we figure them out.

I would like review on my new nvmet-rdma changes to avoid > 0 order page
allocations though.  Perhaps I'll resend the series and add the RFC tag (or
WIP?) with verbiage saying don't merge yet.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 14:21                           ` Steve Wise
@ 2018-06-04 14:29                             ` Max Gurtovoy
  2018-06-04 14:31                               ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: Max Gurtovoy @ 2018-06-04 14:29 UTC (permalink / raw)




On 6/4/2018 5:21 PM, Steve Wise wrote:
> 
> 
>> -----Original Message-----
>> From: Max Gurtovoy <maxg at mellanox.com>
>> Sent: Monday, June 4, 2018 8:52 AM
>> To: Christoph Hellwig <hch at lst.de>; Sagi Grimberg <sagi at grimberg.me>
>> Cc: Steve Wise <swise at opengridcomputing.com>; 'Ruhl, Michael J'
>> <michael.j.ruhl at intel.com>; axboe at kernel.dk; 'Busch, Keith'
>> <keith.busch at intel.com>; linux-nvme at lists.infradead.org;
>> parav at mellanox.com; linux-rdma at vger.kernel.org
>> Subject: Re: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed
> sgl
>> support
>>
>>
>>
>> On 6/4/2018 3:11 PM, Christoph Hellwig wrote:
>>> On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
>>>>
>>>>> He's referring to patch 1 and 2, which are the host side.  No page
>> allocations.
>>>>
>>>> I'm good with 1 & 2,
>>>>
>>>> Christoph, you can add my
>>>>
>>>> Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
>>>
>>> We've missed the merge window now, so we can just wait for a proper
>>> resend from Steve I think.
>>>
>>
>> There are still issue that I'm trying to help Steve with their debug so
>> let's wait with the merge until we figure them out.
> 
> I would like review on my new nvmet-rdma changes to avoid > 0 order page
> allocations though.  Perhaps I'll resend the series and add the RFC tag (or
> WIP?) with verbiage saying don't merge yet.
> 
> 

you should add to your new code:

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 2b6dc19..5828bf2 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -964,7 +965,7 @@ static int nvmet_rdma_create_queue_ib(struct 
nvmet_rdma_queue *queue)
         } else {
                 /* +1 for drain */
                 qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
-               qp_attr.cap.max_recv_sge = 2;
+               qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
         }

         ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);



I currently see some timeout in the initiator also with 4k inline but it 
works good with old initiator.

-Max.

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 14:29                             ` Max Gurtovoy
@ 2018-06-04 14:31                               ` Steve Wise
  2018-06-04 14:37                                 ` Max Gurtovoy
  0 siblings, 1 reply; 39+ messages in thread
From: Steve Wise @ 2018-06-04 14:31 UTC (permalink / raw)


> 
> 
> 
> On 6/4/2018 5:21 PM, Steve Wise wrote:
> >
> >
> >> -----Original Message-----
> >> From: Max Gurtovoy <maxg at mellanox.com>
> >> Sent: Monday, June 4, 2018 8:52 AM
> >> To: Christoph Hellwig <hch at lst.de>; Sagi Grimberg <sagi at grimberg.me>
> >> Cc: Steve Wise <swise at opengridcomputing.com>; 'Ruhl, Michael J'
> >> <michael.j.ruhl at intel.com>; axboe at kernel.dk; 'Busch, Keith'
> >> <keith.busch at intel.com>; linux-nvme at lists.infradead.org;
> >> parav at mellanox.com; linux-rdma at vger.kernel.org
> >> Subject: Re: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed
> > sgl
> >> support
> >>
> >>
> >>
> >> On 6/4/2018 3:11 PM, Christoph Hellwig wrote:
> >>> On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
> >>>>
> >>>>> He's referring to patch 1 and 2, which are the host side.  No page
> >> allocations.
> >>>>
> >>>> I'm good with 1 & 2,
> >>>>
> >>>> Christoph, you can add my
> >>>>
> >>>> Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
> >>>
> >>> We've missed the merge window now, so we can just wait for a proper
> >>> resend from Steve I think.
> >>>
> >>
> >> There are still issue that I'm trying to help Steve with their debug so
> >> let's wait with the merge until we figure them out.
> >
> > I would like review on my new nvmet-rdma changes to avoid > 0 order
> page
> > allocations though.  Perhaps I'll resend the series and add the RFC tag
(or
> > WIP?) with verbiage saying don't merge yet.
> >
> >
> 
> you should add to your new code:
> 
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 2b6dc19..5828bf2 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -964,7 +965,7 @@ static int nvmet_rdma_create_queue_ib(struct
> nvmet_rdma_queue *queue)
>          } else {
>                  /* +1 for drain */
>                  qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
> -               qp_attr.cap.max_recv_sge = 2;
> +               qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
>          }
> 
>          ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
>

Yes.  Good catch.
 
> 
> 
> I currently see some timeout in the initiator also with 4k inline but it
> works good with old initiator.
> 

This is with my github repo?

Steve

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 14:31                               ` Steve Wise
@ 2018-06-04 14:37                                 ` Max Gurtovoy
  2018-06-04 14:45                                   ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: Max Gurtovoy @ 2018-06-04 14:37 UTC (permalink / raw)




On 6/4/2018 5:31 PM, Steve Wise wrote:
>>
>>
>>
>> On 6/4/2018 5:21 PM, Steve Wise wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Max Gurtovoy <maxg at mellanox.com>
>>>> Sent: Monday, June 4, 2018 8:52 AM
>>>> To: Christoph Hellwig <hch at lst.de>; Sagi Grimberg <sagi at grimberg.me>
>>>> Cc: Steve Wise <swise at opengridcomputing.com>; 'Ruhl, Michael J'
>>>> <michael.j.ruhl at intel.com>; axboe at kernel.dk; 'Busch, Keith'
>>>> <keith.busch at intel.com>; linux-nvme at lists.infradead.org;
>>>> parav at mellanox.com; linux-rdma at vger.kernel.org
>>>> Subject: Re: [PATCH v3 1/3] nvme-rdma: correctly check for target keyed
>>> sgl
>>>> support
>>>>
>>>>
>>>>
>>>> On 6/4/2018 3:11 PM, Christoph Hellwig wrote:
>>>>> On Mon, Jun 04, 2018@03:01:43PM +0300, Sagi Grimberg wrote:
>>>>>>
>>>>>>> He's referring to patch 1 and 2, which are the host side.  No page
>>>> allocations.
>>>>>>
>>>>>> I'm good with 1 & 2,
>>>>>>
>>>>>> Christoph, you can add my
>>>>>>
>>>>>> Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
>>>>>
>>>>> We've missed the merge window now, so we can just wait for a proper
>>>>> resend from Steve I think.
>>>>>
>>>>
>>>> There are still issue that I'm trying to help Steve with their debug so
>>>> let's wait with the merge until we figure them out.
>>>
>>> I would like review on my new nvmet-rdma changes to avoid > 0 order
>> page
>>> allocations though.  Perhaps I'll resend the series and add the RFC tag
> (or
>>> WIP?) with verbiage saying don't merge yet.
>>>
>>>
>>
>> you should add to your new code:
>>
>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
>> index 2b6dc19..5828bf2 100644
>> --- a/drivers/nvme/target/rdma.c
>> +++ b/drivers/nvme/target/rdma.c
>> @@ -964,7 +965,7 @@ static int nvmet_rdma_create_queue_ib(struct
>> nvmet_rdma_queue *queue)
>>           } else {
>>                   /* +1 for drain */
>>                   qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
>> -               qp_attr.cap.max_recv_sge = 2;
>> +               qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
>>           }
>>
>>           ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
>>
> 
> Yes.  Good catch.
>   
>>
>>
>> I currently see some timeout in the initiator also with 4k inline but it
>> works good with old initiator.
>>
> 
> This is with my github repo?

The initiator is from V3 here.
Is there a difference in the initiator code ?

> 
> Steve
> 
> 
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support
  2018-06-04 14:37                                 ` Max Gurtovoy
@ 2018-06-04 14:45                                   ` Steve Wise
  0 siblings, 0 replies; 39+ messages in thread
From: Steve Wise @ 2018-06-04 14:45 UTC (permalink / raw)



> >> you should add to your new code:
> >>
> >> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> >> index 2b6dc19..5828bf2 100644
> >> --- a/drivers/nvme/target/rdma.c
> >> +++ b/drivers/nvme/target/rdma.c
> >> @@ -964,7 +965,7 @@ static int nvmet_rdma_create_queue_ib(struct
> >> nvmet_rdma_queue *queue)
> >>           } else {
> >>                   /* +1 for drain */
> >>                   qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
> >> -               qp_attr.cap.max_recv_sge = 2;
> >> +               qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
> >>           }
> >>
> >>           ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
> >>
> >
> > Yes.  Good catch.
> >
> >>
> >>
> >> I currently see some timeout in the initiator also with 4k inline but
it
> >> works good with old initiator.
> >>
> >
> > This is with my github repo?
> 
> The initiator is from V3 here.
> Is there a difference in the initiator code ?

Just removing a pr_debug().

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-06-04 14:18         ` Steve Wise
@ 2018-06-05  8:52           ` Max Gurtovoy
  2018-06-05 14:28             ` Steve Wise
  0 siblings, 1 reply; 39+ messages in thread
From: Max Gurtovoy @ 2018-06-05  8:52 UTC (permalink / raw)




On 6/4/2018 5:18 PM, Steve Wise wrote:
> 
> 
>> -----Original Message-----
>> From: Max Gurtovoy <maxg at mellanox.com>
>> Sent: Monday, June 4, 2018 8:58 AM
>> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
>> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
>> nvme at lists.infradead.org
>> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
>> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
>>
>>
>>
>> On 6/3/2018 9:25 PM, Steve Wise wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Max Gurtovoy <maxg at mellanox.com>
>>>> Sent: Sunday, June 3, 2018 3:40 AM
>>>> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
>>>> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
>>>> nvme at lists.infradead.org
>>>> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
>>>> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
>>>>
>>>>
>>>>
>>>> On 5/29/2018 9:25 PM, Steve Wise wrote:
>>>>> Add a new configfs port attribute, called inline_data_size,
>>>>> to allow configuring the size of inline data for a given port.
>>>>> The maximum size allowed is still enforced by nvmet-rdma with
>>>>> NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to
>> max(16KB,
>>>>> PAGE_SIZE).  And the default size, if not specified via configfs,
>>>>> is still PAGE_SIZE.  This preserves the existing behavior, but allows
>>>>> larger inline sizes.
>>>>>
>>>>> Also support a configuration where inline_data_size is 0, which
> disables
>>>>> using inline data.
>>>>>
>>>>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>>>>> ---
>>>>>     drivers/nvme/target/admin-cmd.c |  4 ++--
>>>>>     drivers/nvme/target/configfs.c  | 31 ++++++++++++++++++++++++++++
>>>>>     drivers/nvme/target/core.c      |  4 ++++
>>>>>     drivers/nvme/target/discovery.c |  2 +-
>>>>>     drivers/nvme/target/nvmet.h     |  2 +-
>>>>>     drivers/nvme/target/rdma.c      | 45 ++++++++++++++++++++++++++++-
>> ----
>>>> --------
>>>>>     6 files changed, 70 insertions(+), 18 deletions(-)
>>>>
>>>> snip..
>>>>
>>>>>
>>>>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
>>>>> index 52e0c5d..2f0b08e 100644
>>>>> --- a/drivers/nvme/target/rdma.c
>>>>> +++ b/drivers/nvme/target/rdma.c
>>>>> @@ -33,9 +33,10 @@
>>>>>     #include "nvmet.h"
>>>>>
>>>>>     /*
>>>>> - * We allow up to a page of inline data to go with the SQE
>>>>> + * We allow at least 1 page, and up to 16KB of inline data to go with
>>> the
>>>> SQE
>>>>>      */
>>>>> -#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
>>>>> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
>>>>> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int,
>>>> SZ_16K, PAGE_SIZE)
>>>>
>>>> why not use SZ_16K ? why we need to mention the PAGE_SIZE ?
>>>>
>>>
>>> The idea is to allow at least 1 page.  So for, say, a 64K page system,
> we'll
>>> allow 64K since we're allocating a page minimum for the buffer.
>>
>> IMO you want to support upto 16K inline data and not upto 64k (also in
>> PowerPC system)...
> 
> Why?
> 

Ok we can use PAGE_SIZE from resource and perf perspective, but please 
change the subject since it is confusing. I guess the subject can be 
similar to the one you used in the initiator.

I think we should consider clamping to NVMET_RDMA_MAX_INLINE_DATA_SIZE 
in case port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE instead 
of failing the operation (and add an info print).

-Max.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3 3/3] nvmet-rdma: support 16K inline data
  2018-06-05  8:52           ` Max Gurtovoy
@ 2018-06-05 14:28             ` Steve Wise
  0 siblings, 0 replies; 39+ messages in thread
From: Steve Wise @ 2018-06-05 14:28 UTC (permalink / raw)




On 6/5/2018 3:52 AM, Max Gurtovoy wrote:
>
>
> On 6/4/2018 5:18 PM, Steve Wise wrote:
>>
>>
>>> -----Original Message-----
>>> From: Max Gurtovoy <maxg at mellanox.com>
>>> Sent: Monday, June 4, 2018 8:58 AM
>>> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
>>> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
>>> nvme at lists.infradead.org
>>> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
>>> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
>>>
>>>
>>>
>>> On 6/3/2018 9:25 PM, Steve Wise wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Max Gurtovoy <maxg at mellanox.com>
>>>>> Sent: Sunday, June 3, 2018 3:40 AM
>>>>> To: Steve Wise <swise at opengridcomputing.com>; axboe at kernel.dk;
>>>>> hch at lst.de; keith.busch at intel.com; sagi at grimberg.me; linux-
>>>>> nvme at lists.infradead.org
>>>>> Cc: parav at mellanox.com; linux-rdma at vger.kernel.org
>>>>> Subject: Re: [PATCH v3 3/3] nvmet-rdma: support 16K inline data
>>>>>
>>>>>
>>>>>
>>>>> On 5/29/2018 9:25 PM, Steve Wise wrote:
>>>>>> Add a new configfs port attribute, called inline_data_size,
>>>>>> to allow configuring the size of inline data for a given port.
>>>>>> The maximum size allowed is still enforced by nvmet-rdma with
>>>>>> NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to
>>> max(16KB,
>>>>>> PAGE_SIZE).? And the default size, if not specified via configfs,
>>>>>> is still PAGE_SIZE.? This preserves the existing behavior, but
>>>>>> allows
>>>>>> larger inline sizes.
>>>>>>
>>>>>> Also support a configuration where inline_data_size is 0, which
>> disables
>>>>>> using inline data.
>>>>>>
>>>>>> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
>>>>>> ---
>>>>>> ??? drivers/nvme/target/admin-cmd.c |? 4 ++--
>>>>>> ??? drivers/nvme/target/configfs.c? | 31
>>>>>> ++++++++++++++++++++++++++++
>>>>>> ??? drivers/nvme/target/core.c????? |? 4 ++++
>>>>>> ??? drivers/nvme/target/discovery.c |? 2 +-
>>>>>> ??? drivers/nvme/target/nvmet.h???? |? 2 +-
>>>>>> ??? drivers/nvme/target/rdma.c????? | 45
>>>>>> ++++++++++++++++++++++++++++-
>>> ----
>>>>> --------
>>>>>> ??? 6 files changed, 70 insertions(+), 18 deletions(-)
>>>>>
>>>>> snip..
>>>>>
>>>>>>
>>>>>> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
>>>>>> index 52e0c5d..2f0b08e 100644
>>>>>> --- a/drivers/nvme/target/rdma.c
>>>>>> +++ b/drivers/nvme/target/rdma.c
>>>>>> @@ -33,9 +33,10 @@
>>>>>> ??? #include "nvmet.h"
>>>>>>
>>>>>> ??? /*
>>>>>> - * We allow up to a page of inline data to go with the SQE
>>>>>> + * We allow at least 1 page, and up to 16KB of inline data to go
>>>>>> with
>>>> the
>>>>> SQE
>>>>>> ???? */
>>>>>> -#define NVMET_RDMA_INLINE_DATA_SIZE??? PAGE_SIZE
>>>>>> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE??? PAGE_SIZE
>>>>>> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE??????? max_t(int,
>>>>> SZ_16K, PAGE_SIZE)
>>>>>
>>>>> why not use SZ_16K ? why we need to mention the PAGE_SIZE ?
>>>>>
>>>>
>>>> The idea is to allow at least 1 page.? So for, say, a 64K page system,
>> we'll
>>>> allow 64K since we're allocating a page minimum for the buffer.
>>>
>>> IMO you want to support upto 16K inline data and not upto 64k (also in
>>> PowerPC system)...
>>
>> Why?
>>
>
> Ok we can use PAGE_SIZE from resource and perf perspective, but please
> change the subject since it is confusing. I guess the subject can be
> similar to the one you used in the initiator.
>
> I think we should consider clamping to NVMET_RDMA_MAX_INLINE_DATA_SIZE
> in case port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE
> instead of failing the operation (and add an info print).
>

This sounds reasonable.? In my yet-to-be sent new version of this, I
have similar logic if the rdma device doesn't have enough sge entries to
support the configured size.? In that case, the inline data size is
clamped to the max that can be supported by the device.

Steve.

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2018-06-05 14:28 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-29 18:26 [PATCH v3 0/3] NVMF/RDMA 16K Inline Support Steve Wise
2018-05-29 18:25 ` [PATCH v3 1/3] nvme-rdma: correctly check for target keyed sgl support Steve Wise
2018-05-29 20:23   ` Ruhl, Michael J
2018-05-30 14:39     ` Steve Wise
2018-05-30 15:11       ` Steve Wise
2018-05-30 21:37         ` Sagi Grimberg
2018-05-31 17:02           ` hch
2018-05-31 17:17             ` Steve Wise
2018-05-31 17:25               ` hch
2018-06-01 13:08                 ` Steve Wise
2018-06-03 11:57                 ` Sagi Grimberg
2018-06-03 18:27                   ` Steve Wise
2018-06-04 12:01                     ` Sagi Grimberg
2018-06-04 12:11                       ` Christoph Hellwig
2018-06-04 12:17                         ` Steve Wise
2018-06-04 13:52                         ` Max Gurtovoy
2018-06-04 14:21                           ` Steve Wise
2018-06-04 14:29                             ` Max Gurtovoy
2018-06-04 14:31                               ` Steve Wise
2018-06-04 14:37                                 ` Max Gurtovoy
2018-06-04 14:45                                   ` Steve Wise
2018-05-31 17:00     ` hch
2018-05-29 18:25 ` [PATCH v3 2/3] nvme-rdma: support up to 4 segments of inline data Steve Wise
2018-05-30 21:42   ` Sagi Grimberg
2018-05-30 21:46     ` Steve Wise
2018-05-29 18:25 ` [PATCH v3 3/3] nvmet-rdma: support 16K " Steve Wise
2018-05-30 15:49   ` Christopher Lameter
2018-05-30 16:46     ` Steve Wise
2018-05-30 17:02       ` Christopher Lameter
2018-05-30 21:45     ` Sagi Grimberg
2018-05-30 21:52       ` Steve Wise
2018-05-30 22:13         ` Sagi Grimberg
2018-05-30 22:26           ` Steve Wise
2018-06-03  8:39   ` Max Gurtovoy
2018-06-03 18:25     ` Steve Wise
2018-06-04 13:58       ` Max Gurtovoy
2018-06-04 14:18         ` Steve Wise
2018-06-05  8:52           ` Max Gurtovoy
2018-06-05 14:28             ` Steve Wise

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.