All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-26 14:13 ` Krishnamraju Eraparaju
  0 siblings, 0 replies; 20+ messages in thread
From: Krishnamraju Eraparaju @ 2020-02-26 14:13 UTC (permalink / raw)
  To: linux-nvme, sagi, hch; +Cc: linux-rdma, nirranjan, bharat, krishna2

Current nvmet-rdma code allocates MR pool budget based on host's SQ
size, assuming both host and target use the same "max_pages_per_mr"
count. But if host's max_pages_per_mr is greater than target's, then
target can run out of MRs while processing larger IO WRITEs.

That is, say host's SQ size is 100, then the MR pool budget allocated
currently at target will also be 100 MRs. But 100 IO WRITE Requests
with 256 sg_count(IO size above 1MB) require 200 MRs when target's
"max_pages_per_mr" is 128.

The proposed patch enables host to advertise the max_fr_pages(via
nvme_rdma_cm_req) such that target can allocate that many number of
RW ctxs(if host's max_fr_pages is higher than target's).

Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
---
 drivers/nvme/host/rdma.c   |  2 ++
 drivers/nvme/target/rdma.c | 23 ++++++++++++++++++++---
 include/linux/nvme-rdma.h  |  4 +++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2a47c6c5007e..5970f0eedbd6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1614,6 +1614,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 		priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
 	}
 
+	priv.hmax_fr_pages = cpu_to_le32(ctrl->max_fr_pages);
+
 	ret = rdma_connect(queue->cm_id, &param);
 	if (ret) {
 		dev_err(ctrl->ctrl.device,
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 37d262a65877..2a3893e3c4e7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -98,6 +98,7 @@ struct nvmet_rdma_queue {
 	int			host_qid;
 	int			recv_queue_size;
 	int			send_queue_size;
+	int			rdma_rw_ctxs_factor;
 
 	struct list_head	queue_list;
 };
@@ -1008,7 +1009,8 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	qp_attr.qp_type = IB_QPT_RC;
 	/* +1 for drain */
 	qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
-	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
+	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size *
+					queue->rdma_rw_ctxs_factor;
 	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
 					ndev->device->attrs.max_send_sge);
 
@@ -1094,6 +1096,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 				struct nvmet_rdma_queue *queue)
 {
 	struct nvme_rdma_cm_req *req;
+	u32 host_fr_pages_len, tgt_fr_pages_len;
 
 	req = (struct nvme_rdma_cm_req *)conn->private_data;
 	if (!req || conn->private_data_len == 0)
@@ -1111,6 +1114,19 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
+	host_fr_pages_len = le32_to_cpu(req->hmax_fr_pages),
+	tgt_fr_pages_len = queue->dev->device->attrs.max_fast_reg_page_list_len;
+
+	if (host_fr_pages_len > tgt_fr_pages_len)
+		/*
+		 * Allocate more RW contexts as more MRs are required when
+		 * host_fr_pages_len is higher than target's.
+		 */
+		queue->rdma_rw_ctxs_factor =
+			DIV_ROUND_UP(host_fr_pages_len, tgt_fr_pages_len);
+	else
+		queue->rdma_rw_ctxs_factor = 1;
+
 	if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
 		return NVME_RDMA_CM_INVALID_HSQSIZE;
 
@@ -1147,6 +1163,9 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 		goto out_reject;
 	}
 
+	queue->dev = ndev;
+	queue->cm_id = cm_id;
+
 	ret = nvmet_sq_init(&queue->nvme_sq);
 	if (ret) {
 		ret = NVME_RDMA_CM_NO_RSC;
@@ -1162,8 +1181,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	 * inside a CM callback would trigger a deadlock. (great API design..)
 	 */
 	INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
-	queue->dev = ndev;
-	queue->cm_id = cm_id;
 
 	spin_lock_init(&queue->state_lock);
 	queue->state = NVMET_RDMA_Q_CONNECTING;
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 3ec8e50efa16..2d6f2cf1e319 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -52,13 +52,15 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
  * @qid:           queue Identifier for the Admin or I/O Queue
  * @hrqsize:       host receive queue size to be created
  * @hsqsize:       host send queue size to be created
+ * @hmax_fr_pages: host maximum pages per fast reg
  */
 struct nvme_rdma_cm_req {
 	__le16		recfmt;
 	__le16		qid;
 	__le16		hrqsize;
 	__le16		hsqsize;
-	u8		rsvd[24];
+	__le32		hmax_fr_pages;
+	u8		rsvd[20];
 };
 
 /**
-- 
2.23.0.rc0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-26 14:13 ` Krishnamraju Eraparaju
  0 siblings, 0 replies; 20+ messages in thread
From: Krishnamraju Eraparaju @ 2020-02-26 14:13 UTC (permalink / raw)
  To: linux-nvme, sagi, hch; +Cc: linux-rdma, krishna2, bharat, nirranjan

Current nvmet-rdma code allocates MR pool budget based on host's SQ
size, assuming both host and target use the same "max_pages_per_mr"
count. But if host's max_pages_per_mr is greater than target's, then
target can run out of MRs while processing larger IO WRITEs.

That is, say host's SQ size is 100, then the MR pool budget allocated
currently at target will also be 100 MRs. But 100 IO WRITE Requests
with 256 sg_count(IO size above 1MB) require 200 MRs when target's
"max_pages_per_mr" is 128.

The proposed patch enables host to advertise the max_fr_pages(via
nvme_rdma_cm_req) such that target can allocate that many number of
RW ctxs(if host's max_fr_pages is higher than target's).

Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
---
 drivers/nvme/host/rdma.c   |  2 ++
 drivers/nvme/target/rdma.c | 23 ++++++++++++++++++++---
 include/linux/nvme-rdma.h  |  4 +++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2a47c6c5007e..5970f0eedbd6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1614,6 +1614,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 		priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
 	}
 
+	priv.hmax_fr_pages = cpu_to_le32(ctrl->max_fr_pages);
+
 	ret = rdma_connect(queue->cm_id, &param);
 	if (ret) {
 		dev_err(ctrl->ctrl.device,
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 37d262a65877..2a3893e3c4e7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -98,6 +98,7 @@ struct nvmet_rdma_queue {
 	int			host_qid;
 	int			recv_queue_size;
 	int			send_queue_size;
+	int			rdma_rw_ctxs_factor;
 
 	struct list_head	queue_list;
 };
@@ -1008,7 +1009,8 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	qp_attr.qp_type = IB_QPT_RC;
 	/* +1 for drain */
 	qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
-	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
+	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size *
+					queue->rdma_rw_ctxs_factor;
 	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
 					ndev->device->attrs.max_send_sge);
 
@@ -1094,6 +1096,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 				struct nvmet_rdma_queue *queue)
 {
 	struct nvme_rdma_cm_req *req;
+	u32 host_fr_pages_len, tgt_fr_pages_len;
 
 	req = (struct nvme_rdma_cm_req *)conn->private_data;
 	if (!req || conn->private_data_len == 0)
@@ -1111,6 +1114,19 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
+	host_fr_pages_len = le32_to_cpu(req->hmax_fr_pages),
+	tgt_fr_pages_len = queue->dev->device->attrs.max_fast_reg_page_list_len;
+
+	if (host_fr_pages_len > tgt_fr_pages_len)
+		/*
+		 * Allocate more RW contexts as more MRs are required when
+		 * host_fr_pages_len is higher than target's.
+		 */
+		queue->rdma_rw_ctxs_factor =
+			DIV_ROUND_UP(host_fr_pages_len, tgt_fr_pages_len);
+	else
+		queue->rdma_rw_ctxs_factor = 1;
+
 	if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
 		return NVME_RDMA_CM_INVALID_HSQSIZE;
 
@@ -1147,6 +1163,9 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 		goto out_reject;
 	}
 
+	queue->dev = ndev;
+	queue->cm_id = cm_id;
+
 	ret = nvmet_sq_init(&queue->nvme_sq);
 	if (ret) {
 		ret = NVME_RDMA_CM_NO_RSC;
@@ -1162,8 +1181,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	 * inside a CM callback would trigger a deadlock. (great API design..)
 	 */
 	INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
-	queue->dev = ndev;
-	queue->cm_id = cm_id;
 
 	spin_lock_init(&queue->state_lock);
 	queue->state = NVMET_RDMA_Q_CONNECTING;
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 3ec8e50efa16..2d6f2cf1e319 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -52,13 +52,15 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
  * @qid:           queue Identifier for the Admin or I/O Queue
  * @hrqsize:       host receive queue size to be created
  * @hsqsize:       host send queue size to be created
+ * @hmax_fr_pages: host maximum pages per fast reg
  */
 struct nvme_rdma_cm_req {
 	__le16		recfmt;
 	__le16		qid;
 	__le16		hrqsize;
 	__le16		hsqsize;
-	u8		rsvd[24];
+	__le32		hmax_fr_pages;
+	u8		rsvd[20];
 };
 
 /**
-- 
2.23.0.rc0


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-02-26 14:13 ` Krishnamraju Eraparaju
@ 2020-02-26 17:07   ` Jason Gunthorpe
  -1 siblings, 0 replies; 20+ messages in thread
From: Jason Gunthorpe @ 2020-02-26 17:07 UTC (permalink / raw)
  To: Krishnamraju Eraparaju
  Cc: linux-nvme, sagi, hch, linux-rdma, nirranjan, bharat

On Wed, Feb 26, 2020 at 07:43:18PM +0530, Krishnamraju Eraparaju wrote:
> diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
> index 3ec8e50efa16..2d6f2cf1e319 100644
> +++ b/include/linux/nvme-rdma.h
> @@ -52,13 +52,15 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
>   * @qid:           queue Identifier for the Admin or I/O Queue
>   * @hrqsize:       host receive queue size to be created
>   * @hsqsize:       host send queue size to be created
> + * @hmax_fr_pages: host maximum pages per fast reg
>   */
>  struct nvme_rdma_cm_req {
>  	__le16		recfmt;
>  	__le16		qid;
>  	__le16		hrqsize;
>  	__le16		hsqsize;
> -	u8		rsvd[24];
> +	__le32		hmax_fr_pages;
> +	u8		rsvd[20];
>  };

This is an on the wire change - do you need to get approval from some
standards body?

Jason

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-26 17:07   ` Jason Gunthorpe
  0 siblings, 0 replies; 20+ messages in thread
From: Jason Gunthorpe @ 2020-02-26 17:07 UTC (permalink / raw)
  To: Krishnamraju Eraparaju
  Cc: sagi, linux-rdma, bharat, nirranjan, linux-nvme, hch

On Wed, Feb 26, 2020 at 07:43:18PM +0530, Krishnamraju Eraparaju wrote:
> diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
> index 3ec8e50efa16..2d6f2cf1e319 100644
> +++ b/include/linux/nvme-rdma.h
> @@ -52,13 +52,15 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
>   * @qid:           queue Identifier for the Admin or I/O Queue
>   * @hrqsize:       host receive queue size to be created
>   * @hsqsize:       host send queue size to be created
> + * @hmax_fr_pages: host maximum pages per fast reg
>   */
>  struct nvme_rdma_cm_req {
>  	__le16		recfmt;
>  	__le16		qid;
>  	__le16		hrqsize;
>  	__le16		hsqsize;
> -	u8		rsvd[24];
> +	__le32		hmax_fr_pages;
> +	u8		rsvd[20];
>  };

This is an on the wire change - do you need to get approval from some
standards body?

Jason

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-02-26 14:13 ` Krishnamraju Eraparaju
@ 2020-02-26 23:05   ` Sagi Grimberg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-02-26 23:05 UTC (permalink / raw)
  To: Krishnamraju Eraparaju, linux-nvme, hch; +Cc: linux-rdma, nirranjan, bharat


> Current nvmet-rdma code allocates MR pool budget based on host's SQ
> size, assuming both host and target use the same "max_pages_per_mr"
> count. But if host's max_pages_per_mr is greater than target's, then
> target can run out of MRs while processing larger IO WRITEs.
> 
> That is, say host's SQ size is 100, then the MR pool budget allocated
> currently at target will also be 100 MRs. But 100 IO WRITE Requests
> with 256 sg_count(IO size above 1MB) require 200 MRs when target's
> "max_pages_per_mr" is 128.

The patch doesn't say if this is an actual bug you are seeing or
theoretical.

> The proposed patch enables host to advertise the max_fr_pages(via
> nvme_rdma_cm_req) such that target can allocate that many number of
> RW ctxs(if host's max_fr_pages is higher than target's).

As mentioned by Jason, this s a non-compatible change, if you want to
introduce this you need to go through the standard and update the
cm private_data layout (would mean that the fmt needs to increment as
well to be backward compatible).


As a stop-gap, nvmet needs to limit the controller mdts to how much
it can allocate based on the HCA capabilities
(max_fast_reg_page_list_len).

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-26 23:05   ` Sagi Grimberg
  0 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-02-26 23:05 UTC (permalink / raw)
  To: Krishnamraju Eraparaju, linux-nvme, hch; +Cc: linux-rdma, bharat, nirranjan


> Current nvmet-rdma code allocates MR pool budget based on host's SQ
> size, assuming both host and target use the same "max_pages_per_mr"
> count. But if host's max_pages_per_mr is greater than target's, then
> target can run out of MRs while processing larger IO WRITEs.
> 
> That is, say host's SQ size is 100, then the MR pool budget allocated
> currently at target will also be 100 MRs. But 100 IO WRITE Requests
> with 256 sg_count(IO size above 1MB) require 200 MRs when target's
> "max_pages_per_mr" is 128.

The patch doesn't say if this is an actual bug you are seeing or
theoretical.

> The proposed patch enables host to advertise the max_fr_pages(via
> nvme_rdma_cm_req) such that target can allocate that many number of
> RW ctxs(if host's max_fr_pages is higher than target's).

As mentioned by Jason, this s a non-compatible change, if you want to
introduce this you need to go through the standard and update the
cm private_data layout (would mean that the fmt needs to increment as
well to be backward compatible).


As a stop-gap, nvmet needs to limit the controller mdts to how much
it can allocate based on the HCA capabilities
(max_fast_reg_page_list_len).

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-02-26 17:07   ` Jason Gunthorpe
@ 2020-02-26 23:19     ` Sagi Grimberg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-02-26 23:19 UTC (permalink / raw)
  To: Jason Gunthorpe, Krishnamraju Eraparaju
  Cc: linux-nvme, hch, linux-rdma, nirranjan, bharat


>> diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
>> index 3ec8e50efa16..2d6f2cf1e319 100644
>> +++ b/include/linux/nvme-rdma.h
>> @@ -52,13 +52,15 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
>>    * @qid:           queue Identifier for the Admin or I/O Queue
>>    * @hrqsize:       host receive queue size to be created
>>    * @hsqsize:       host send queue size to be created
>> + * @hmax_fr_pages: host maximum pages per fast reg
>>    */
>>   struct nvme_rdma_cm_req {
>>   	__le16		recfmt;
>>   	__le16		qid;
>>   	__le16		hrqsize;
>>   	__le16		hsqsize;
>> -	u8		rsvd[24];
>> +	__le32		hmax_fr_pages;
>> +	u8		rsvd[20];
>>   };
> 
> This is an on the wire change - do you need to get approval from some
> standards body?

Yes, this needs to go though the NVMe TWG for sure.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-26 23:19     ` Sagi Grimberg
  0 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-02-26 23:19 UTC (permalink / raw)
  To: Jason Gunthorpe, Krishnamraju Eraparaju
  Cc: linux-rdma, bharat, nirranjan, hch, linux-nvme


>> diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
>> index 3ec8e50efa16..2d6f2cf1e319 100644
>> +++ b/include/linux/nvme-rdma.h
>> @@ -52,13 +52,15 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
>>    * @qid:           queue Identifier for the Admin or I/O Queue
>>    * @hrqsize:       host receive queue size to be created
>>    * @hsqsize:       host send queue size to be created
>> + * @hmax_fr_pages: host maximum pages per fast reg
>>    */
>>   struct nvme_rdma_cm_req {
>>   	__le16		recfmt;
>>   	__le16		qid;
>>   	__le16		hrqsize;
>>   	__le16		hsqsize;
>> -	u8		rsvd[24];
>> +	__le32		hmax_fr_pages;
>> +	u8		rsvd[20];
>>   };
> 
> This is an on the wire change - do you need to get approval from some
> standards body?

Yes, this needs to go though the NVMe TWG for sure.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-02-26 23:05   ` Sagi Grimberg
@ 2020-02-27 15:46     ` Krishnamraju Eraparaju
  -1 siblings, 0 replies; 20+ messages in thread
From: Krishnamraju Eraparaju @ 2020-02-27 15:46 UTC (permalink / raw)
  To: Sagi Grimberg, jgg; +Cc: linux-nvme, hch, linux-rdma, nirranjan, bharat

Hi Sagi & Jason,
	
Thanks for the comments, please see inline.

On Wednesday, February 02/26/20, 2020 at 15:05:59 -0800, Sagi Grimberg wrote:
> 
> >Current nvmet-rdma code allocates MR pool budget based on host's SQ
> >size, assuming both host and target use the same "max_pages_per_mr"
> >count. But if host's max_pages_per_mr is greater than target's, then
> >target can run out of MRs while processing larger IO WRITEs.
> >
> >That is, say host's SQ size is 100, then the MR pool budget allocated
> >currently at target will also be 100 MRs. But 100 IO WRITE Requests
> >with 256 sg_count(IO size above 1MB) require 200 MRs when target's
> >"max_pages_per_mr" is 128.
> 
> The patch doesn't say if this is an actual bug you are seeing or
> theoretical.
	
I've noticed this issue while running the below fio command:
fio --rw=randwrite --name=random --norandommap --ioengine=libaio
--size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
--direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
--unit_base=1 --bs=4m --kb_base=1000

Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
respectively.
	
Traces on Target:

#cat /sys/kernel/debug/tracing/trace_pipe|grep -v "status=0x0"
kworker/8:1H-2461  [008] .... 25476.995437: nvmet_req_complete: nvmet1:
disk=/dev/ram0, qid=1, cmdid=3, res=0xffff8b7f2ae534d0, status=0x6
kworker/8:1H-2461  [008] .... 25476.995467: nvmet_req_complete: nvmet1:
disk=/dev/ram0, qid=1, cmdid=4, res=0xffff8b7f2ae53700, status=0x6
kworker/8:1H-2461  [008] .... 25476.995511: nvmet_req_complete: nvmet1:
disk=/dev/ram0, qid=1, cmdid=1, res=0xffff8b7f2ae53980, status=0x6

> 
> >The proposed patch enables host to advertise the max_fr_pages(via
> >nvme_rdma_cm_req) such that target can allocate that many number of
> >RW ctxs(if host's max_fr_pages is higher than target's).
> 
> As mentioned by Jason, this s a non-compatible change, if you want to
> introduce this you need to go through the standard and update the
> cm private_data layout (would mean that the fmt needs to increment as
> well to be backward compatible).

Sure, will initiate a discussion at NVMe TWG about CM private_data format.
Will update the response soon.
> 
> 
> As a stop-gap, nvmet needs to limit the controller mdts to how much
> it can allocate based on the HCA capabilities
> (max_fast_reg_page_list_len).

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-27 15:46     ` Krishnamraju Eraparaju
  0 siblings, 0 replies; 20+ messages in thread
From: Krishnamraju Eraparaju @ 2020-02-27 15:46 UTC (permalink / raw)
  To: Sagi Grimberg, jgg; +Cc: linux-rdma, bharat, nirranjan, hch, linux-nvme

Hi Sagi & Jason,
	
Thanks for the comments, please see inline.

On Wednesday, February 02/26/20, 2020 at 15:05:59 -0800, Sagi Grimberg wrote:
> 
> >Current nvmet-rdma code allocates MR pool budget based on host's SQ
> >size, assuming both host and target use the same "max_pages_per_mr"
> >count. But if host's max_pages_per_mr is greater than target's, then
> >target can run out of MRs while processing larger IO WRITEs.
> >
> >That is, say host's SQ size is 100, then the MR pool budget allocated
> >currently at target will also be 100 MRs. But 100 IO WRITE Requests
> >with 256 sg_count(IO size above 1MB) require 200 MRs when target's
> >"max_pages_per_mr" is 128.
> 
> The patch doesn't say if this is an actual bug you are seeing or
> theoretical.
	
I've noticed this issue while running the below fio command:
fio --rw=randwrite --name=random --norandommap --ioengine=libaio
--size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
--direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
--unit_base=1 --bs=4m --kb_base=1000

Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
respectively.
	
Traces on Target:

#cat /sys/kernel/debug/tracing/trace_pipe|grep -v "status=0x0"
kworker/8:1H-2461  [008] .... 25476.995437: nvmet_req_complete: nvmet1:
disk=/dev/ram0, qid=1, cmdid=3, res=0xffff8b7f2ae534d0, status=0x6
kworker/8:1H-2461  [008] .... 25476.995467: nvmet_req_complete: nvmet1:
disk=/dev/ram0, qid=1, cmdid=4, res=0xffff8b7f2ae53700, status=0x6
kworker/8:1H-2461  [008] .... 25476.995511: nvmet_req_complete: nvmet1:
disk=/dev/ram0, qid=1, cmdid=1, res=0xffff8b7f2ae53980, status=0x6

> 
> >The proposed patch enables host to advertise the max_fr_pages(via
> >nvme_rdma_cm_req) such that target can allocate that many number of
> >RW ctxs(if host's max_fr_pages is higher than target's).
> 
> As mentioned by Jason, this s a non-compatible change, if you want to
> introduce this you need to go through the standard and update the
> cm private_data layout (would mean that the fmt needs to increment as
> well to be backward compatible).

Sure, will initiate a discussion at NVMe TWG about CM private_data format.
Will update the response soon.
> 
> 
> As a stop-gap, nvmet needs to limit the controller mdts to how much
> it can allocate based on the HCA capabilities
> (max_fast_reg_page_list_len).

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-02-27 15:46     ` Krishnamraju Eraparaju
@ 2020-02-27 23:14       ` Sagi Grimberg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-02-27 23:14 UTC (permalink / raw)
  To: Krishnamraju Eraparaju, jgg
  Cc: linux-nvme, hch, linux-rdma, nirranjan, bharat


>> The patch doesn't say if this is an actual bug you are seeing or
>> theoretical.
> 	
> I've noticed this issue while running the below fio command:
> fio --rw=randwrite --name=random --norandommap --ioengine=libaio
> --size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
> --direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
> --unit_base=1 --bs=4m --kb_base=1000
> 
> Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
> max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
> respectively.

This needs to be documented in the change log.

>>> The proposed patch enables host to advertise the max_fr_pages(via
>>> nvme_rdma_cm_req) such that target can allocate that many number of
>>> RW ctxs(if host's max_fr_pages is higher than target's).
>>
>> As mentioned by Jason, this s a non-compatible change, if you want to
>> introduce this you need to go through the standard and update the
>> cm private_data layout (would mean that the fmt needs to increment as
>> well to be backward compatible).
> 
> Sure, will initiate a discussion at NVMe TWG about CM private_data format.
> Will update the response soon.
>>
>>
>> As a stop-gap, nvmet needs to limit the controller mdts to how much
>> it can allocate based on the HCA capabilities
>> (max_fast_reg_page_list_len).

Sounds good, please look at capping mdts in the mean time.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-02-27 23:14       ` Sagi Grimberg
  0 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-02-27 23:14 UTC (permalink / raw)
  To: Krishnamraju Eraparaju, jgg
  Cc: linux-rdma, bharat, nirranjan, hch, linux-nvme


>> The patch doesn't say if this is an actual bug you are seeing or
>> theoretical.
> 	
> I've noticed this issue while running the below fio command:
> fio --rw=randwrite --name=random --norandommap --ioengine=libaio
> --size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
> --direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
> --unit_base=1 --bs=4m --kb_base=1000
> 
> Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
> max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
> respectively.

This needs to be documented in the change log.

>>> The proposed patch enables host to advertise the max_fr_pages(via
>>> nvme_rdma_cm_req) such that target can allocate that many number of
>>> RW ctxs(if host's max_fr_pages is higher than target's).
>>
>> As mentioned by Jason, this s a non-compatible change, if you want to
>> introduce this you need to go through the standard and update the
>> cm private_data layout (would mean that the fmt needs to increment as
>> well to be backward compatible).
> 
> Sure, will initiate a discussion at NVMe TWG about CM private_data format.
> Will update the response soon.
>>
>>
>> As a stop-gap, nvmet needs to limit the controller mdts to how much
>> it can allocate based on the HCA capabilities
>> (max_fast_reg_page_list_len).

Sounds good, please look at capping mdts in the mean time.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-02-27 23:14       ` Sagi Grimberg
@ 2020-03-01 14:05         ` Max Gurtovoy
  -1 siblings, 0 replies; 20+ messages in thread
From: Max Gurtovoy @ 2020-03-01 14:05 UTC (permalink / raw)
  To: Sagi Grimberg, Krishnamraju Eraparaju, jgg
  Cc: linux-nvme, hch, linux-rdma, nirranjan, bharat


On 2/28/2020 1:14 AM, Sagi Grimberg wrote:
>
>>> The patch doesn't say if this is an actual bug you are seeing or
>>> theoretical.
>>
>> I've noticed this issue while running the below fio command:
>> fio --rw=randwrite --name=random --norandommap --ioengine=libaio
>> --size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
>> --direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
>> --unit_base=1 --bs=4m --kb_base=1000
>>
>> Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
>> max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
>> respectively.
>
> This needs to be documented in the change log.
>
>>>> The proposed patch enables host to advertise the max_fr_pages(via
>>>> nvme_rdma_cm_req) such that target can allocate that many number of
>>>> RW ctxs(if host's max_fr_pages is higher than target's).
>>>
>>> As mentioned by Jason, this s a non-compatible change, if you want to
>>> introduce this you need to go through the standard and update the
>>> cm private_data layout (would mean that the fmt needs to increment as
>>> well to be backward compatible).
>>
>> Sure, will initiate a discussion at NVMe TWG about CM private_data 
>> format.
>> Will update the response soon.
>>>
>>>
>>> As a stop-gap, nvmet needs to limit the controller mdts to how much
>>> it can allocate based on the HCA capabilities
>>> (max_fast_reg_page_list_len).
>
> Sounds good, please look at capping mdts in the mean time.

guys, see my patches from adding MD support.

I'm setting mdts per ctrl there.

we can merge it meantime for this issue.



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-03-01 14:05         ` Max Gurtovoy
  0 siblings, 0 replies; 20+ messages in thread
From: Max Gurtovoy @ 2020-03-01 14:05 UTC (permalink / raw)
  To: Sagi Grimberg, Krishnamraju Eraparaju, jgg
  Cc: linux-rdma, bharat, nirranjan, hch, linux-nvme


On 2/28/2020 1:14 AM, Sagi Grimberg wrote:
>
>>> The patch doesn't say if this is an actual bug you are seeing or
>>> theoretical.
>>
>> I've noticed this issue while running the below fio command:
>> fio --rw=randwrite --name=random --norandommap --ioengine=libaio
>> --size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
>> --direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
>> --unit_base=1 --bs=4m --kb_base=1000
>>
>> Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
>> max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
>> respectively.
>
> This needs to be documented in the change log.
>
>>>> The proposed patch enables host to advertise the max_fr_pages(via
>>>> nvme_rdma_cm_req) such that target can allocate that many number of
>>>> RW ctxs(if host's max_fr_pages is higher than target's).
>>>
>>> As mentioned by Jason, this s a non-compatible change, if you want to
>>> introduce this you need to go through the standard and update the
>>> cm private_data layout (would mean that the fmt needs to increment as
>>> well to be backward compatible).
>>
>> Sure, will initiate a discussion at NVMe TWG about CM private_data 
>> format.
>> Will update the response soon.
>>>
>>>
>>> As a stop-gap, nvmet needs to limit the controller mdts to how much
>>> it can allocate based on the HCA capabilities
>>> (max_fast_reg_page_list_len).
>
> Sounds good, please look at capping mdts in the mean time.

guys, see my patches from adding MD support.

I'm setting mdts per ctrl there.

we can merge it meantime for this issue.



_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-03-01 14:05         ` Max Gurtovoy
@ 2020-03-02  7:32           ` Krishnamraju Eraparaju
  -1 siblings, 0 replies; 20+ messages in thread
From: Krishnamraju Eraparaju @ 2020-03-02  7:32 UTC (permalink / raw)
  To: Max Gurtovoy, sagi
  Cc: Sagi Grimberg, jgg, linux-nvme, hch, linux-rdma, nirranjan, bharat

Hi Sagi & Max Gurtovoy,

Thanks for your pointers regarding mdts.

Looks like fixing this issue through mdts is more natural than fixing
through RDMA private data.

Issue is not occuring after appling the below patch(inspired by Max's
patch "nvmet-rdma: Implement set_mdts controller op").

So any consensus about merging the fix upstream, to fix this specific
issue?

diff --git a/drivers/nvme/target/admin-cmd.c
b/drivers/nvme/target/admin-cmd.c
index 56c21b50..0d468ab 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -346,8 +346,12 @@ static void nvmet_execute_identify_ctrl(struct
nvmet_req *req)
        /* we support multiple ports, multiples hosts and ANA: */
        id->cmic = (1 << 0) | (1 << 1) | (1 << 3);

-       /* no limit on data transfer sizes for now */
-       id->mdts = 0;
+       /* Limit MDTS according to transport capability */
+       if (ctrl->ops->set_mdts)
+               id->mdts = ctrl->ops->set_mdts(ctrl);
+       else
+               id->mdts = 0;
+
        id->cntlid = cpu_to_le16(ctrl->cntlid);
        id->ver = cpu_to_le32(ctrl->subsys->ver);

diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 46df45e..851f8ed 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -279,6 +279,7 @@ struct nvmet_fabrics_ops {
                        struct nvmet_port *port, char *traddr);
        u16 (*install_queue)(struct nvmet_sq *nvme_sq);
        void (*discovery_chg)(struct nvmet_port *port);
+       u8 (*set_mdts)(struct nvmet_ctrl *ctrl);
 };

 #define NVMET_MAX_INLINE_BIOVEC        8
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 37d262a..62363be 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1602,6 +1602,17 @@ static void nvmet_rdma_disc_port_addr(struct
nvmet_req *req,
        }
 }

+static u8 nvmet_rdma_set_mdts(struct nvmet_ctrl *ctrl)
+{
+       struct nvmet_port *port = ctrl->port;
+       struct rdma_cm_id *cm_id = port->priv;
+       u32 max_pages;
+
+       max_pages = cm_id->device->attrs.max_fast_reg_page_list_len;
+       /* Assume mpsmin == device_page_size == 4KB */
+       return ilog2(max_pages);
+}
+
 static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
        .owner                  = THIS_MODULE,
        .type                   = NVMF_TRTYPE_RDMA,
@@ -1612,6 +1623,7 @@ static void nvmet_rdma_disc_port_addr(struct
nvmet_req *req,
        .queue_response         = nvmet_rdma_queue_response,
        .delete_ctrl            = nvmet_rdma_delete_ctrl,
        .disc_traddr            = nvmet_rdma_disc_port_addr,
+       .set_mdts               = nvmet_rdma_set_mdts,
 };

 static void nvmet_rdma_remove_one(struct ib_device *ib_device, void
*client_data)


Thanks,
Krishna.

On Sunday, March 03/01/20, 2020 at 16:05:53 +0200, Max Gurtovoy wrote:
> 
> On 2/28/2020 1:14 AM, Sagi Grimberg wrote:
> >
> >>>The patch doesn't say if this is an actual bug you are seeing or
> >>>theoretical.
> >>
> >>I've noticed this issue while running the below fio command:
> >>fio --rw=randwrite --name=random --norandommap --ioengine=libaio
> >>--size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
> >>--direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
> >>--unit_base=1 --bs=4m --kb_base=1000
> >>
> >>Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
> >>max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
> >>respectively.
> >
> >This needs to be documented in the change log.
> >
> >>>>The proposed patch enables host to advertise the max_fr_pages(via
> >>>>nvme_rdma_cm_req) such that target can allocate that many number of
> >>>>RW ctxs(if host's max_fr_pages is higher than target's).
> >>>
> >>>As mentioned by Jason, this s a non-compatible change, if you want to
> >>>introduce this you need to go through the standard and update the
> >>>cm private_data layout (would mean that the fmt needs to increment as
> >>>well to be backward compatible).
> >>
> >>Sure, will initiate a discussion at NVMe TWG about CM
> >>private_data format.
> >>Will update the response soon.
> >>>
> >>>
> >>>As a stop-gap, nvmet needs to limit the controller mdts to how much
> >>>it can allocate based on the HCA capabilities
> >>>(max_fast_reg_page_list_len).
> >
> >Sounds good, please look at capping mdts in the mean time.
> 
> guys, see my patches from adding MD support.
> 
> I'm setting mdts per ctrl there.
> 
> we can merge it meantime for this issue.
> 
> 

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-03-02  7:32           ` Krishnamraju Eraparaju
  0 siblings, 0 replies; 20+ messages in thread
From: Krishnamraju Eraparaju @ 2020-03-02  7:32 UTC (permalink / raw)
  To: Max Gurtovoy, sagi
  Cc: Sagi Grimberg, linux-rdma, bharat, nirranjan, linux-nvme, jgg, hch

Hi Sagi & Max Gurtovoy,

Thanks for your pointers regarding mdts.

Looks like fixing this issue through mdts is more natural than fixing
through RDMA private data.

Issue is not occuring after appling the below patch(inspired by Max's
patch "nvmet-rdma: Implement set_mdts controller op").

So any consensus about merging the fix upstream, to fix this specific
issue?

diff --git a/drivers/nvme/target/admin-cmd.c
b/drivers/nvme/target/admin-cmd.c
index 56c21b50..0d468ab 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -346,8 +346,12 @@ static void nvmet_execute_identify_ctrl(struct
nvmet_req *req)
        /* we support multiple ports, multiples hosts and ANA: */
        id->cmic = (1 << 0) | (1 << 1) | (1 << 3);

-       /* no limit on data transfer sizes for now */
-       id->mdts = 0;
+       /* Limit MDTS according to transport capability */
+       if (ctrl->ops->set_mdts)
+               id->mdts = ctrl->ops->set_mdts(ctrl);
+       else
+               id->mdts = 0;
+
        id->cntlid = cpu_to_le16(ctrl->cntlid);
        id->ver = cpu_to_le32(ctrl->subsys->ver);

diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 46df45e..851f8ed 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -279,6 +279,7 @@ struct nvmet_fabrics_ops {
                        struct nvmet_port *port, char *traddr);
        u16 (*install_queue)(struct nvmet_sq *nvme_sq);
        void (*discovery_chg)(struct nvmet_port *port);
+       u8 (*set_mdts)(struct nvmet_ctrl *ctrl);
 };

 #define NVMET_MAX_INLINE_BIOVEC        8
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 37d262a..62363be 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1602,6 +1602,17 @@ static void nvmet_rdma_disc_port_addr(struct
nvmet_req *req,
        }
 }

+static u8 nvmet_rdma_set_mdts(struct nvmet_ctrl *ctrl)
+{
+       struct nvmet_port *port = ctrl->port;
+       struct rdma_cm_id *cm_id = port->priv;
+       u32 max_pages;
+
+       max_pages = cm_id->device->attrs.max_fast_reg_page_list_len;
+       /* Assume mpsmin == device_page_size == 4KB */
+       return ilog2(max_pages);
+}
+
 static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
        .owner                  = THIS_MODULE,
        .type                   = NVMF_TRTYPE_RDMA,
@@ -1612,6 +1623,7 @@ static void nvmet_rdma_disc_port_addr(struct
nvmet_req *req,
        .queue_response         = nvmet_rdma_queue_response,
        .delete_ctrl            = nvmet_rdma_delete_ctrl,
        .disc_traddr            = nvmet_rdma_disc_port_addr,
+       .set_mdts               = nvmet_rdma_set_mdts,
 };

 static void nvmet_rdma_remove_one(struct ib_device *ib_device, void
*client_data)


Thanks,
Krishna.

On Sunday, March 03/01/20, 2020 at 16:05:53 +0200, Max Gurtovoy wrote:
> 
> On 2/28/2020 1:14 AM, Sagi Grimberg wrote:
> >
> >>>The patch doesn't say if this is an actual bug you are seeing or
> >>>theoretical.
> >>
> >>I've noticed this issue while running the below fio command:
> >>fio --rw=randwrite --name=random --norandommap --ioengine=libaio
> >>--size=16m --group_reporting --exitall --fsync_on_close=1 --invalidate=1
> >>--direct=1 --filename=/dev/nvme2n1 --iodepth=32 --numjobs=16
> >>--unit_base=1 --bs=4m --kb_base=1000
> >>
> >>Note: here NVMe Host is on SIW & Target is on iw_cxgb4 and the
> >>max_pages_per_mr supported by SIW and iw_cxgb4 are 255 and 128
> >>respectively.
> >
> >This needs to be documented in the change log.
> >
> >>>>The proposed patch enables host to advertise the max_fr_pages(via
> >>>>nvme_rdma_cm_req) such that target can allocate that many number of
> >>>>RW ctxs(if host's max_fr_pages is higher than target's).
> >>>
> >>>As mentioned by Jason, this s a non-compatible change, if you want to
> >>>introduce this you need to go through the standard and update the
> >>>cm private_data layout (would mean that the fmt needs to increment as
> >>>well to be backward compatible).
> >>
> >>Sure, will initiate a discussion at NVMe TWG about CM
> >>private_data format.
> >>Will update the response soon.
> >>>
> >>>
> >>>As a stop-gap, nvmet needs to limit the controller mdts to how much
> >>>it can allocate based on the HCA capabilities
> >>>(max_fast_reg_page_list_len).
> >
> >Sounds good, please look at capping mdts in the mean time.
> 
> guys, see my patches from adding MD support.
> 
> I'm setting mdts per ctrl there.
> 
> we can merge it meantime for this issue.
> 
> 

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-03-02  7:32           ` Krishnamraju Eraparaju
@ 2020-03-02 17:43             ` Sagi Grimberg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-03-02 17:43 UTC (permalink / raw)
  To: Krishnamraju Eraparaju, Max Gurtovoy
  Cc: jgg, linux-nvme, hch, linux-rdma, nirranjan, bharat


> Hi Sagi & Max Gurtovoy,
> 
> Thanks for your pointers regarding mdts.
> 
> Looks like fixing this issue through mdts is more natural than fixing
> through RDMA private data.
> 
> Issue is not occuring after appling the below patch(inspired by Max's
> patch "nvmet-rdma: Implement set_mdts controller op").
> 
> So any consensus about merging the fix upstream, to fix this specific
> issue?

I think we can do this asap.

Max, please send a patch for this.

Thanks,

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-03-02 17:43             ` Sagi Grimberg
  0 siblings, 0 replies; 20+ messages in thread
From: Sagi Grimberg @ 2020-03-02 17:43 UTC (permalink / raw)
  To: Krishnamraju Eraparaju, Max Gurtovoy
  Cc: linux-rdma, bharat, nirranjan, linux-nvme, jgg, hch


> Hi Sagi & Max Gurtovoy,
> 
> Thanks for your pointers regarding mdts.
> 
> Looks like fixing this issue through mdts is more natural than fixing
> through RDMA private data.
> 
> Issue is not occuring after appling the below patch(inspired by Max's
> patch "nvmet-rdma: Implement set_mdts controller op").
> 
> So any consensus about merging the fix upstream, to fix this specific
> issue?

I think we can do this asap.

Max, please send a patch for this.

Thanks,

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
  2020-03-02 17:43             ` Sagi Grimberg
@ 2020-03-03 23:11               ` Max Gurtovoy
  -1 siblings, 0 replies; 20+ messages in thread
From: Max Gurtovoy @ 2020-03-03 23:11 UTC (permalink / raw)
  To: Sagi Grimberg, Krishnamraju Eraparaju
  Cc: jgg, linux-nvme, hch, linux-rdma, nirranjan, bharat


On 3/2/2020 7:43 PM, Sagi Grimberg wrote:
>
>> Hi Sagi & Max Gurtovoy,
>>
>> Thanks for your pointers regarding mdts.
>>
>> Looks like fixing this issue through mdts is more natural than fixing
>> through RDMA private data.
>>
>> Issue is not occuring after appling the below patch(inspired by Max's
>> patch "nvmet-rdma: Implement set_mdts controller op").
>>
>> So any consensus about merging the fix upstream, to fix this specific
>> issue?
>
> I think we can do this asap.
>
> Max, please send a patch for this.


sure, I'll do that.

But since nvmet/rdma uses RW API that may use multiple MR's per IO 
request we need to implement the set_mdts differently.

We need to update the value of max_rdma_ctxs that set the MR pool size. 
We can limit the RDMA transport to support upto 1 or 2 MB and get the 
factor we need for setting the max_rdma_ctxs.

I'll send a fix soon.


>
> Thanks,

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len
@ 2020-03-03 23:11               ` Max Gurtovoy
  0 siblings, 0 replies; 20+ messages in thread
From: Max Gurtovoy @ 2020-03-03 23:11 UTC (permalink / raw)
  To: Sagi Grimberg, Krishnamraju Eraparaju
  Cc: linux-rdma, bharat, nirranjan, linux-nvme, jgg, hch


On 3/2/2020 7:43 PM, Sagi Grimberg wrote:
>
>> Hi Sagi & Max Gurtovoy,
>>
>> Thanks for your pointers regarding mdts.
>>
>> Looks like fixing this issue through mdts is more natural than fixing
>> through RDMA private data.
>>
>> Issue is not occuring after appling the below patch(inspired by Max's
>> patch "nvmet-rdma: Implement set_mdts controller op").
>>
>> So any consensus about merging the fix upstream, to fix this specific
>> issue?
>
> I think we can do this asap.
>
> Max, please send a patch for this.


sure, I'll do that.

But since nvmet/rdma uses RW API that may use multiple MR's per IO 
request we need to implement the set_mdts differently.

We need to update the value of max_rdma_ctxs that set the MR pool size. 
We can limit the RDMA transport to support upto 1 or 2 MB and get the 
factor we need for setting the max_rdma_ctxs.

I'll send a fix soon.


>
> Thanks,

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-03-03 23:11 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-26 14:13 [PATCH for-rc] nvme-rdma/nvmet-rdma: Allocate sufficient RW ctxs to match hosts pgs len Krishnamraju Eraparaju
2020-02-26 14:13 ` Krishnamraju Eraparaju
2020-02-26 17:07 ` Jason Gunthorpe
2020-02-26 17:07   ` Jason Gunthorpe
2020-02-26 23:19   ` Sagi Grimberg
2020-02-26 23:19     ` Sagi Grimberg
2020-02-26 23:05 ` Sagi Grimberg
2020-02-26 23:05   ` Sagi Grimberg
2020-02-27 15:46   ` Krishnamraju Eraparaju
2020-02-27 15:46     ` Krishnamraju Eraparaju
2020-02-27 23:14     ` Sagi Grimberg
2020-02-27 23:14       ` Sagi Grimberg
2020-03-01 14:05       ` Max Gurtovoy
2020-03-01 14:05         ` Max Gurtovoy
2020-03-02  7:32         ` Krishnamraju Eraparaju
2020-03-02  7:32           ` Krishnamraju Eraparaju
2020-03-02 17:43           ` Sagi Grimberg
2020-03-02 17:43             ` Sagi Grimberg
2020-03-03 23:11             ` Max Gurtovoy
2020-03-03 23:11               ` Max Gurtovoy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.