All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] nvme-fabrics: short-circuit connect retries
@ 2021-06-23 14:32 Hannes Reinecke
  2021-06-23 14:32 ` [PATCH 1/2] nvme-tcp: short-circuit reconnect retries Hannes Reinecke
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-23 14:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

Hi all,

commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
allowed the fc transport to honour the DNR bit during reconnect
retries, allowing to speed up error recovery.
These patches implement the same logic for RDMA and TCP.

As usual, comments and reviews are welcome.

Hannes Reinecke (2):
  nvme-tcp: short-circuit reconnect retries
  nvme-rdma: short-circuit reconnect retries

 drivers/nvme/host/rdma.c | 23 ++++++++++++++++-------
 drivers/nvme/host/tcp.c  | 22 ++++++++++++++--------
 2 files changed, 30 insertions(+), 15 deletions(-)

-- 
2.29.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/2] nvme-tcp: short-circuit reconnect retries
  2021-06-23 14:32 [PATCH 0/2] nvme-fabrics: short-circuit connect retries Hannes Reinecke
@ 2021-06-23 14:32 ` Hannes Reinecke
  2021-06-23 14:32 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
  2021-06-23 21:38 ` [PATCH 0/2] nvme-fabrics: short-circuit connect retries Sagi Grimberg
  2 siblings, 0 replies; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-23 14:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

Returning an nvme status from nvme_tcp_setup_ctrl() indicates
that the association was established and we have received a status
from the controller; consequently we should honour the DNR bit.
If not any future reconnect attempts will just return the same error, so
we can short-circuit the reconnect attempts and fail the connection
directly.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/tcp.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 34f4b3402f7c..ec5f511203b8 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1943,8 +1943,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 	nvme_tcp_destroy_io_queues(ctrl, remove);
 }
 
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
 {
+	bool recon = status > 0 && (status & NVME_SC_DNR) ? false : true;
+
 	/* If we are resetting/deleting then do nothing */
 	if (ctrl->state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
@@ -1952,13 +1954,13 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
 		return;
 	}
 
-	if (nvmf_should_reconnect(ctrl)) {
+	if (recon && nvmf_should_reconnect(ctrl)) {
 		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
 			ctrl->opts->reconnect_delay);
 		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
 				ctrl->opts->reconnect_delay * HZ);
 	} else {
-		dev_info(ctrl->device, "Removing controller...\n");
+		dev_info(ctrl->device, "Removing controller (%d)...\n", status);
 		nvme_delete_ctrl(ctrl);
 	}
 }
@@ -2038,10 +2040,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
 			struct nvme_tcp_ctrl, connect_work);
 	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+	int ret;
 
 	++ctrl->nr_reconnects;
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2054,7 +2058,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
 			ctrl->nr_reconnects);
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2077,7 +2081,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 		return;
 	}
 
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
 }
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2103,6 +2107,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, reset_work);
+	int ret;
 
 	nvme_stop_ctrl(ctrl);
 	nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2114,14 +2119,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto out_fail;
 
 	return;
 
 out_fail:
 	++ctrl->nr_reconnects;
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
-- 
2.29.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/2] nvme-rdma: short-circuit reconnect retries
  2021-06-23 14:32 [PATCH 0/2] nvme-fabrics: short-circuit connect retries Hannes Reinecke
  2021-06-23 14:32 ` [PATCH 1/2] nvme-tcp: short-circuit reconnect retries Hannes Reinecke
@ 2021-06-23 14:32 ` Hannes Reinecke
  2021-06-23 21:38 ` [PATCH 0/2] nvme-fabrics: short-circuit connect retries Sagi Grimberg
  2 siblings, 0 replies; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-23 14:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

Returning an nvme status from nvme_rdma_setup_ctrl() indicates
that the association was established and we have received a status
from the controller; consequently we should honour the DNR bit.
If not any future reconnect attempts will just return the same error, so
we can short-circuit the reconnect attempts and fail the connection
directly.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/rdma.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4697a94c0945..92c0bdf2bc74 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1067,8 +1067,11 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 	kfree(ctrl);
 }
 
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl,
+		int status)
 {
+	bool recon = status > 0 && (status & NVME_SC_DNR) ? false : true;
+
 	/* If we are resetting/deleting then do nothing */
 	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
@@ -1076,12 +1079,14 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 		return;
 	}
 
-	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+	if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
 		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
 			ctrl->ctrl.opts->reconnect_delay);
 		queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
 				ctrl->ctrl.opts->reconnect_delay * HZ);
 	} else {
+		dev_info(ctrl->ctrl.device, "Removing controller (%d)...\n",
+			 status);
 		nvme_delete_ctrl(&ctrl->ctrl);
 	}
 }
@@ -1166,10 +1171,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
 			struct nvme_rdma_ctrl, reconnect_work);
+	int ret;
 
 	++ctrl->ctrl.nr_reconnects;
 
-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1182,7 +1189,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
 			ctrl->ctrl.nr_reconnects);
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1203,7 +1210,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 		return;
 	}
 
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2259,6 +2266,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl =
 		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+	int ret;
 
 	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2269,14 +2277,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
 		goto out_fail;
 
 	return;
 
 out_fail:
 	++ctrl->ctrl.nr_reconnects;
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
-- 
2.29.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-23 14:32 [PATCH 0/2] nvme-fabrics: short-circuit connect retries Hannes Reinecke
  2021-06-23 14:32 ` [PATCH 1/2] nvme-tcp: short-circuit reconnect retries Hannes Reinecke
  2021-06-23 14:32 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
@ 2021-06-23 21:38 ` Sagi Grimberg
  2021-06-24  5:51   ` Hannes Reinecke
  2 siblings, 1 reply; 19+ messages in thread
From: Sagi Grimberg @ 2021-06-23 21:38 UTC (permalink / raw)
  To: Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme


> Hi all,
> 
> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
> allowed the fc transport to honour the DNR bit during reconnect
> retries, allowing to speed up error recovery.

How does this speed up error recovery?

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-23 21:38 ` [PATCH 0/2] nvme-fabrics: short-circuit connect retries Sagi Grimberg
@ 2021-06-24  5:51   ` Hannes Reinecke
  2021-06-24  7:29     ` Chao Leng
  0 siblings, 1 reply; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-24  5:51 UTC (permalink / raw)
  To: Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme

On 6/23/21 11:38 PM, Sagi Grimberg wrote:
> 
>> Hi all,
>>
>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>> allowed the fc transport to honour the DNR bit during reconnect
>> retries, allowing to speed up error recovery.
> 
> How does this speed up error recovery?

Well, not exactly error recovery (as there is nothing to recover).
But we won't attempt pointless retries, thereby reducing the noise in 
the message log.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-24  5:51   ` Hannes Reinecke
@ 2021-06-24  7:29     ` Chao Leng
  2021-06-24  8:10       ` Hannes Reinecke
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Leng @ 2021-06-24  7:29 UTC (permalink / raw)
  To: Hannes Reinecke, Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme



On 2021/6/24 13:51, Hannes Reinecke wrote:
> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>
>>> Hi all,
>>>
>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>> allowed the fc transport to honour the DNR bit during reconnect
>>> retries, allowing to speed up error recovery.
>>
>> How does this speed up error recovery?
> 
> Well, not exactly error recovery (as there is nothing to recover).
> But we won't attempt pointless retries, thereby reducing the noise in the message log.
This conflict with the tcp and rdma target.
You may need to delete the improper NVME_SC_DNR at the target.
However, this will cause compatibility issues between different versions.
> 
> Cheers,
> 
> Hannes

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-24  7:29     ` Chao Leng
@ 2021-06-24  8:10       ` Hannes Reinecke
  2021-06-26  1:03         ` Chao Leng
  0 siblings, 1 reply; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-24  8:10 UTC (permalink / raw)
  To: Chao Leng, Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme

On 6/24/21 9:29 AM, Chao Leng wrote:
> 
> 
> On 2021/6/24 13:51, Hannes Reinecke wrote:
>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>
>>>> Hi all,
>>>>
>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>> retries, allowing to speed up error recovery.
>>>
>>> How does this speed up error recovery?
>>
>> Well, not exactly error recovery (as there is nothing to recover).
>> But we won't attempt pointless retries, thereby reducing the noise in
>> the message log.
> This conflict with the tcp and rdma target.
> You may need to delete the improper NVME_SC_DNR at the target.
> However, this will cause compatibility issues between different versions.

Which ones?
I checked the DNR usage in the target code, and they seem to set it
correctly (ie the result would not change when the command is retried).
With the possible exception of ENOSPC handling, as this is arguably
dynamic and might change with a retry.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-24  8:10       ` Hannes Reinecke
@ 2021-06-26  1:03         ` Chao Leng
  2021-06-26 12:09           ` Hannes Reinecke
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Leng @ 2021-06-26  1:03 UTC (permalink / raw)
  To: Hannes Reinecke, Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme



On 2021/6/24 16:10, Hannes Reinecke wrote:
> On 6/24/21 9:29 AM, Chao Leng wrote:
>>
>>
>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>
>>>>> Hi all,
>>>>>
>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>> retries, allowing to speed up error recovery.
>>>>
>>>> How does this speed up error recovery?
>>>
>>> Well, not exactly error recovery (as there is nothing to recover).
>>> But we won't attempt pointless retries, thereby reducing the noise in
>>> the message log.
>> This conflict with the tcp and rdma target.
>> You may need to delete the improper NVME_SC_DNR at the target.
>> However, this will cause compatibility issues between different versions.
> 
> Which ones?
In many scenarios, the destination sets DNR for abnormal packets,
but each new connection may not have the same error.
> I checked the DNR usage in the target code, and they seem to set it
> correctly (ie the result would not change when the command is retried).
> With the possible exception of ENOSPC handling, as this is arguably
> dynamic and might change with a retry.
The DNR status of the old connection may not be relevant to the re-established connection.
> 
> Cheers,
> 
> Hannes
> 

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-26  1:03         ` Chao Leng
@ 2021-06-26 12:09           ` Hannes Reinecke
  2021-06-27 13:39             ` James Smart
  0 siblings, 1 reply; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-26 12:09 UTC (permalink / raw)
  To: Chao Leng, Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme

On 6/26/21 3:03 AM, Chao Leng wrote:
> 
> 
> On 2021/6/24 16:10, Hannes Reinecke wrote:
>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>
>>>
>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>
>>>>>> Hi all,
>>>>>>
>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>> retries, allowing to speed up error recovery.
>>>>>
>>>>> How does this speed up error recovery?
>>>>
>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>> But we won't attempt pointless retries, thereby reducing the noise in
>>>> the message log.
>>> This conflict with the tcp and rdma target.
>>> You may need to delete the improper NVME_SC_DNR at the target.
>>> However, this will cause compatibility issues between different 
>>> versions.
>>
>> Which ones?
> In many scenarios, the destination sets DNR for abnormal packets,
> but each new connection may not have the same error.

This patch series is only for the DNR bit set in response to the 
'connect' command.
If the target is not able to process the 'connect' command, but may be 
so in the future it really should not set the DNR bit.

>> I checked the DNR usage in the target code, and they seem to set it
>> correctly (ie the result would not change when the command is retried).
>> With the possible exception of ENOSPC handling, as this is arguably
>> dynamic and might change with a retry.
> The DNR status of the old connection may not be relevant to the 
> re-established connection.

See above.
We are just checking the DNR settings for the 'connect' command (or any 
other commands being sent during initial controller configuration).
If that fails the connect never was properly initialized; if the 
controller would return a different status after reconnect it simply 
should not set the DNR bit ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-26 12:09           ` Hannes Reinecke
@ 2021-06-27 13:39             ` James Smart
  2021-06-28  1:10               ` Chao Leng
  0 siblings, 1 reply; 19+ messages in thread
From: James Smart @ 2021-06-27 13:39 UTC (permalink / raw)
  To: Hannes Reinecke, Chao Leng, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme

On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
> On 6/26/21 3:03 AM, Chao Leng wrote:
>>
>>
>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>
>>>>
>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>
>>>>>>> Hi all,
>>>>>>>
>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>> retries, allowing to speed up error recovery.
>>>>>>
>>>>>> How does this speed up error recovery?
>>>>>
>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>> But we won't attempt pointless retries, thereby reducing the noise in
>>>>> the message log.
>>>> This conflict with the tcp and rdma target.
>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>> However, this will cause compatibility issues between different 
>>>> versions.
>>>
>>> Which ones?
>> In many scenarios, the destination sets DNR for abnormal packets,
>> but each new connection may not have the same error.
> 
> This patch series is only for the DNR bit set in response to the 
> 'connect' command.
> If the target is not able to process the 'connect' command, but may be 
> so in the future it really should not set the DNR bit.
> 
>>> I checked the DNR usage in the target code, and they seem to set it
>>> correctly (ie the result would not change when the command is retried).
>>> With the possible exception of ENOSPC handling, as this is arguably
>>> dynamic and might change with a retry.
>> The DNR status of the old connection may not be relevant to the 
>> re-established connection.
> 
> See above.
> We are just checking the DNR settings for the 'connect' command (or any 
> other commands being sent during initial controller configuration).
> If that fails the connect never was properly initialized; if the 
> controller would return a different status after reconnect it simply 
> should not set the DNR bit ...
> 
> Cheers,
> 
> Hannes

Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the same 
command is re-submitted to any controller in the NVM subsystem, then 
that re-submitted command is expected to fail."

Thus if the initial connect fails in this manner, any new association 
will be on a different controller, where it is now expected connect on 
that controller will fail too.  Thus - why continue to connect when it's 
expected each will fail.

-- james

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-27 13:39             ` James Smart
@ 2021-06-28  1:10               ` Chao Leng
  2021-06-29 11:00                 ` Hannes Reinecke
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Leng @ 2021-06-28  1:10 UTC (permalink / raw)
  To: James Smart, Hannes Reinecke, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme



On 2021/6/27 21:39, James Smart wrote:
> On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
>> On 6/26/21 3:03 AM, Chao Leng wrote:
>>>
>>>
>>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>>
>>>>>
>>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>>
>>>>>>>> Hi all,
>>>>>>>>
>>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>>> retries, allowing to speed up error recovery.
>>>>>>>
>>>>>>> How does this speed up error recovery?
>>>>>>
>>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>>> But we won't attempt pointless retries, thereby reducing the noise in
>>>>>> the message log.
>>>>> This conflict with the tcp and rdma target.
>>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>>> However, this will cause compatibility issues between different versions.
>>>>
>>>> Which ones?
>>> In many scenarios, the destination sets DNR for abnormal packets,
>>> but each new connection may not have the same error.
>>
>> This patch series is only for the DNR bit set in response to the 'connect' command.
>> If the target is not able to process the 'connect' command, but may be so in the future it really should not set the DNR bit.
>>
>>>> I checked the DNR usage in the target code, and they seem to set it
>>>> correctly (ie the result would not change when the command is retried).
>>>> With the possible exception of ENOSPC handling, as this is arguably
>>>> dynamic and might change with a retry.
>>> The DNR status of the old connection may not be relevant to the re-established connection.
>>
>> See above.
>> We are just checking the DNR settings for the 'connect' command (or any other commands being sent during initial controller configuration).
>> If that fails the connect never was properly initialized; if the controller would return a different status after reconnect it simply should not set the DNR bit ...
>>
>> Cheers,
>>
>> Hannes
> 
> Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the same command is re-submitted to any controller in the NVM subsystem, then that re-submitted command is expected to fail."
According to the definition of the protocol, this is not strictly implemented on the target side.
In nvme/target, there are many external errors, the DNR is set to 1.
For example, abormal fabrics cmd.
> 
> Thus if the initial connect fails in this manner, any new association will be on a different controller, where it is now expected connect on that controller will fail too.  Thus - why continue to connect when it's expected each will fail.
Agree.
We should not attempt to re-establish the connection if target can not work due to target inner error .
However, now  target does not behave exactly like this, so there are conflicts and compatibility issues.
> 
> -- james
> .

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-28  1:10               ` Chao Leng
@ 2021-06-29 11:00                 ` Hannes Reinecke
  2021-07-08  6:29                   ` Chao Leng
  0 siblings, 1 reply; 19+ messages in thread
From: Hannes Reinecke @ 2021-06-29 11:00 UTC (permalink / raw)
  To: Chao Leng, James Smart, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme

On 6/28/21 3:10 AM, Chao Leng wrote:
> 
> 
> On 2021/6/27 21:39, James Smart wrote:
>> On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
>>> On 6/26/21 3:03 AM, Chao Leng wrote:
>>>>
>>>>
>>>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>>>
>>>>>>
>>>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>>>
>>>>>>>>> Hi all,
>>>>>>>>>
>>>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>>>> retries, allowing to speed up error recovery.
>>>>>>>>
>>>>>>>> How does this speed up error recovery?
>>>>>>>
>>>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>>>> But we won't attempt pointless retries, thereby reducing the 
>>>>>>> noise in
>>>>>>> the message log.
>>>>>> This conflict with the tcp and rdma target.
>>>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>>>> However, this will cause compatibility issues between different 
>>>>>> versions.
>>>>>
>>>>> Which ones?
>>>> In many scenarios, the destination sets DNR for abnormal packets,
>>>> but each new connection may not have the same error.
>>>
>>> This patch series is only for the DNR bit set in response to the 
>>> 'connect' command.
>>> If the target is not able to process the 'connect' command, but may 
>>> be so in the future it really should not set the DNR bit.
>>>
>>>>> I checked the DNR usage in the target code, and they seem to set it
>>>>> correctly (ie the result would not change when the command is 
>>>>> retried).
>>>>> With the possible exception of ENOSPC handling, as this is arguably
>>>>> dynamic and might change with a retry.
>>>> The DNR status of the old connection may not be relevant to the 
>>>> re-established connection.
>>>
>>> See above.
>>> We are just checking the DNR settings for the 'connect' command (or 
>>> any other commands being sent during initial controller configuration).
>>> If that fails the connect never was properly initialized; if the 
>>> controller would return a different status after reconnect it simply 
>>> should not set the DNR bit ...
>>>
>>> Cheers,
>>>
>>> Hannes
>>
>> Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the 
>> same command is re-submitted to any controller in the NVM subsystem, 
>> then that re-submitted command is expected to fail."
> According to the definition of the protocol, this is not strictly 
> implemented on the target side.
> In nvme/target, there are many external errors, the DNR is set to 1.
> For example, abormal fabrics cmd.

Yes; but an abnormal fabrics cmd will be abnormal even after a reconnect.

>>
>> Thus if the initial connect fails in this manner, any new association 
>> will be on a different controller, where it is now expected connect on 
>> that controller will fail too.  Thus - why continue to connect when 
>> it's expected each will fail.
> Agree.
> We should not attempt to re-establish the connection if target can not 
> work due to target inner error .
> However, now  target does not behave exactly like this, so there are 
> conflicts and compatibility issues.

Note: we never said the target should _not_ send a DNR state. We only 
said the target should only set the DNR bit if a retry (even after 
reconnect or on a different controller) will not change the result.
Which from what I can see _is_ the case, so I do not see any issues here.

Please give details for the issues you are concerned with.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-06-29 11:00                 ` Hannes Reinecke
@ 2021-07-08  6:29                   ` Chao Leng
  2021-07-08  7:36                     ` Hannes Reinecke
  2021-07-09  4:57                     ` James Smart
  0 siblings, 2 replies; 19+ messages in thread
From: Chao Leng @ 2021-07-08  6:29 UTC (permalink / raw)
  To: Hannes Reinecke, James Smart, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme



On 2021/6/29 19:00, Hannes Reinecke wrote:
> On 6/28/21 3:10 AM, Chao Leng wrote:
>>
>>
>> On 2021/6/27 21:39, James Smart wrote:
>>> On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
>>>> On 6/26/21 3:03 AM, Chao Leng wrote:
>>>>>
>>>>>
>>>>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>>>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>>>>
>>>>>>>>>> Hi all,
>>>>>>>>>>
>>>>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>>>>> retries, allowing to speed up error recovery.
>>>>>>>>>
>>>>>>>>> How does this speed up error recovery?
>>>>>>>>
>>>>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>>>>> But we won't attempt pointless retries, thereby reducing the noise in
>>>>>>>> the message log.
>>>>>>> This conflict with the tcp and rdma target.
>>>>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>>>>> However, this will cause compatibility issues between different versions.
>>>>>>
>>>>>> Which ones?
>>>>> In many scenarios, the destination sets DNR for abnormal packets,
>>>>> but each new connection may not have the same error.
>>>>
>>>> This patch series is only for the DNR bit set in response to the 'connect' command.
>>>> If the target is not able to process the 'connect' command, but may be so in the future it really should not set the DNR bit.
>>>>
>>>>>> I checked the DNR usage in the target code, and they seem to set it
>>>>>> correctly (ie the result would not change when the command is retried).
>>>>>> With the possible exception of ENOSPC handling, as this is arguably
>>>>>> dynamic and might change with a retry.
>>>>> The DNR status of the old connection may not be relevant to the re-established connection.
>>>>
>>>> See above.
>>>> We are just checking the DNR settings for the 'connect' command (or any other commands being sent during initial controller configuration).
>>>> If that fails the connect never was properly initialized; if the controller would return a different status after reconnect it simply should not set the DNR bit ...
>>>>
>>>> Cheers,
>>>>
>>>> Hannes
>>>
>>> Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the same command is re-submitted to any controller in the NVM subsystem, then that re-submitted command is expected to fail."
>> According to the definition of the protocol, this is not strictly implemented on the target side.
>> In nvme/target, there are many external errors, the DNR is set to 1.
>> For example, abormal fabrics cmd.
> 
> Yes; but an abnormal fabrics cmd will be abnormal even after a reconnect.
Sorry, delayed reply due to leave.
There are many possibilities for abnormal fabrics cmd.
In some scenarios, if the link is re-established, the error may not occur again,
such as HBA inner error, network is abnormal or attacked etc.
> 
>>>
>>> Thus if the initial connect fails in this manner, any new association will be on a different controller, where it is now expected connect on that controller will fail too.  Thus - why continue to connect when it's expected each will fail.
>> Agree.
>> We should not attempt to re-establish the connection if target can not work due to target inner error .
>> However, now  target does not behave exactly like this, so there are conflicts and compatibility issues.
> 
> Note: we never said the target should _not_ send a DNR state. We only said the target should only set the DNR bit if a retry (even after reconnect or on a different controller) will not change the result.
> Which from what I can see _is_ the case, so I do not see any issues here.
> 
> Please give details for the issues you are concerned with.
For example, if host send a fabric cmd, but the fabric cmd is poisoned
due to HBA inner error, abnormal or attacked network, and then the target
set the DNR to reply response. If do not reconnect for DNR response of
old connection, thus the connecting can not aoto recovery. The fabric
cmd poisoning may be transient, may success if try for reconnecting.
so try to reconnecting is a better choice.
If do not reconnect for DNR response, target should set DNR state just
for target inner error.
> 
> Cheers,
> 
> Hannes

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-07-08  6:29                   ` Chao Leng
@ 2021-07-08  7:36                     ` Hannes Reinecke
  2021-07-09  4:57                     ` James Smart
  1 sibling, 0 replies; 19+ messages in thread
From: Hannes Reinecke @ 2021-07-08  7:36 UTC (permalink / raw)
  To: Chao Leng, James Smart, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme

On 7/8/21 8:29 AM, Chao Leng wrote:
> 
> 
> On 2021/6/29 19:00, Hannes Reinecke wrote:
>> On 6/28/21 3:10 AM, Chao Leng wrote:
>>>
>>>
>>> On 2021/6/27 21:39, James Smart wrote:
>>>> On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
>>>>> On 6/26/21 3:03 AM, Chao Leng wrote:
>>>>>>
>>>>>>
>>>>>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>>>>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>>>>>
>>>>>>>>>>> Hi all,
>>>>>>>>>>>
>>>>>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>>>>>> retries, allowing to speed up error recovery.
>>>>>>>>>>
>>>>>>>>>> How does this speed up error recovery?
>>>>>>>>>
>>>>>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>>>>>> But we won't attempt pointless retries, thereby reducing the 
>>>>>>>>> noise in
>>>>>>>>> the message log.
>>>>>>>> This conflict with the tcp and rdma target.
>>>>>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>>>>>> However, this will cause compatibility issues between different 
>>>>>>>> versions.
>>>>>>>
>>>>>>> Which ones?
>>>>>> In many scenarios, the destination sets DNR for abnormal packets,
>>>>>> but each new connection may not have the same error.
>>>>>
>>>>> This patch series is only for the DNR bit set in response to the 
>>>>> 'connect' command.
>>>>> If the target is not able to process the 'connect' command, but may 
>>>>> be so in the future it really should not set the DNR bit.
>>>>>
>>>>>>> I checked the DNR usage in the target code, and they seem to set it
>>>>>>> correctly (ie the result would not change when the command is 
>>>>>>> retried).
>>>>>>> With the possible exception of ENOSPC handling, as this is arguably
>>>>>>> dynamic and might change with a retry.
>>>>>> The DNR status of the old connection may not be relevant to the 
>>>>>> re-established connection.
>>>>>
>>>>> See above.
>>>>> We are just checking the DNR settings for the 'connect' command (or 
>>>>> any other commands being sent during initial controller 
>>>>> configuration).
>>>>> If that fails the connect never was properly initialized; if the 
>>>>> controller would return a different status after reconnect it 
>>>>> simply should not set the DNR bit ...
>>>>>
>>>>> Cheers,
>>>>>
>>>>> Hannes
>>>>
>>>> Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the 
>>>> same command is re-submitted to any controller in the NVM subsystem, 
>>>> then that re-submitted command is expected to fail."
>>> According to the definition of the protocol, this is not strictly 
>>> implemented on the target side.
>>> In nvme/target, there are many external errors, the DNR is set to 1.
>>> For example, abormal fabrics cmd.
>>
>> Yes; but an abnormal fabrics cmd will be abnormal even after a reconnect.
> Sorry, delayed reply due to leave.
> There are many possibilities for abnormal fabrics cmd.
> In some scenarios, if the link is re-established, the error may not 
> occur again,
> such as HBA inner error, network is abnormal or attacked etc.
>>
>>>>
>>>> Thus if the initial connect fails in this manner, any new 
>>>> association will be on a different controller, where it is now 
>>>> expected connect on that controller will fail too.  Thus - why 
>>>> continue to connect when it's expected each will fail.
>>> Agree.
>>> We should not attempt to re-establish the connection if target can 
>>> not work due to target inner error .
>>> However, now  target does not behave exactly like this, so there are 
>>> conflicts and compatibility issues.
>>
>> Note: we never said the target should _not_ send a DNR state. We only 
>> said the target should only set the DNR bit if a retry (even after 
>> reconnect or on a different controller) will not change the result.
>> Which from what I can see _is_ the case, so I do not see any issues here.
>>
>> Please give details for the issues you are concerned with.
> For example, if host send a fabric cmd, but the fabric cmd is poisoned
> due to HBA inner error, abnormal or attacked network, and then the target
> set the DNR to reply response. If do not reconnect for DNR response of
> old connection, thus the connecting can not aoto recovery. The fabric
> cmd poisoning may be transient, may success if try for reconnecting.
> so try to reconnecting is a better choice.

Beg to disagree here.
Any of the errors you describe here are something which _really_ do 
warrant a close inspection (as you for _sure_ will want to investigate 
if your network is under attack), so even _if_ a retry would 'fix' 
things we still should bail out to indicate to the caller that some 
interaction is in order.

> If do not reconnect for DNR response, target should set DNR state just
> for target inner error.

Yes.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-07-08  6:29                   ` Chao Leng
  2021-07-08  7:36                     ` Hannes Reinecke
@ 2021-07-09  4:57                     ` James Smart
  2021-07-09  8:34                       ` Chao Leng
  1 sibling, 1 reply; 19+ messages in thread
From: James Smart @ 2021-07-09  4:57 UTC (permalink / raw)
  To: Chao Leng, Hannes Reinecke, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme

On 7/7/2021 11:29 PM, Chao Leng wrote:
> 
> 
> On 2021/6/29 19:00, Hannes Reinecke wrote:
>> On 6/28/21 3:10 AM, Chao Leng wrote:
>>>
>>>
>>> On 2021/6/27 21:39, James Smart wrote:
>>>> On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
>>>>> On 6/26/21 3:03 AM, Chao Leng wrote:
>>>>>>
>>>>>>
>>>>>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>>>>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>>>>>
>>>>>>>>>>> Hi all,
>>>>>>>>>>>
>>>>>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>>>>>> retries, allowing to speed up error recovery.
>>>>>>>>>>
>>>>>>>>>> How does this speed up error recovery?
>>>>>>>>>
>>>>>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>>>>>> But we won't attempt pointless retries, thereby reducing the 
>>>>>>>>> noise in
>>>>>>>>> the message log.
>>>>>>>> This conflict with the tcp and rdma target.
>>>>>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>>>>>> However, this will cause compatibility issues between different 
>>>>>>>> versions.
>>>>>>>
>>>>>>> Which ones?
>>>>>> In many scenarios, the destination sets DNR for abnormal packets,
>>>>>> but each new connection may not have the same error.
>>>>>
>>>>> This patch series is only for the DNR bit set in response to the 
>>>>> 'connect' command.
>>>>> If the target is not able to process the 'connect' command, but may 
>>>>> be so in the future it really should not set the DNR bit.
>>>>>
>>>>>>> I checked the DNR usage in the target code, and they seem to set it
>>>>>>> correctly (ie the result would not change when the command is 
>>>>>>> retried).
>>>>>>> With the possible exception of ENOSPC handling, as this is arguably
>>>>>>> dynamic and might change with a retry.
>>>>>> The DNR status of the old connection may not be relevant to the 
>>>>>> re-established connection.
>>>>>
>>>>> See above.
>>>>> We are just checking the DNR settings for the 'connect' command (or 
>>>>> any other commands being sent during initial controller 
>>>>> configuration).
>>>>> If that fails the connect never was properly initialized; if the 
>>>>> controller would return a different status after reconnect it 
>>>>> simply should not set the DNR bit ...
>>>>>
>>>>> Cheers,
>>>>>
>>>>> Hannes
>>>>
>>>> Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the 
>>>> same command is re-submitted to any controller in the NVM subsystem, 
>>>> then that re-submitted command is expected to fail."
>>> According to the definition of the protocol, this is not strictly 
>>> implemented on the target side.
>>> In nvme/target, there are many external errors, the DNR is set to 1.
>>> For example, abormal fabrics cmd.
>>
>> Yes; but an abnormal fabrics cmd will be abnormal even after a reconnect.
> Sorry, delayed reply due to leave.
> There are many possibilities for abnormal fabrics cmd.
> In some scenarios, if the link is re-established, the error may not 
> occur again,
> such as HBA inner error, network is abnormal or attacked etc.
>>
>>>>
>>>> Thus if the initial connect fails in this manner, any new 
>>>> association will be on a different controller, where it is now 
>>>> expected connect on that controller will fail too.  Thus - why 
>>>> continue to connect when it's expected each will fail.
>>> Agree.
>>> We should not attempt to re-establish the connection if target can 
>>> not work due to target inner error .
>>> However, now  target does not behave exactly like this, so there are 
>>> conflicts and compatibility issues.
>>
>> Note: we never said the target should _not_ send a DNR state. We only 
>> said the target should only set the DNR bit if a retry (even after 
>> reconnect or on a different controller) will not change the result.
>> Which from what I can see _is_ the case, so I do not see any issues here.
>>
>> Please give details for the issues you are concerned with.
> For example, if host send a fabric cmd, but the fabric cmd is poisoned
> due to HBA inner error, abnormal or attacked network, and then the target
> set the DNR to reply response. If do not reconnect for DNR response of
> old connection, thus the connecting can not aoto recovery. The fabric
> cmd poisoning may be transient, may success if try for reconnecting.
> so try to reconnecting is a better choice.
> If do not reconnect for DNR response, target should set DNR state just
> for target inner error.
>>
>> Cheers,
>>
>> Hannes

It really doesn't matter what you describe is happening on the back end 
of the controllers/subsystem.  The rev 1.4 spec says "if the same 
command is re-submitted to any controller in the NVM subsystem, then 
that re-submitted command is expected to fail." - So, if there's a 
chance that a reconnect would succeed, which would be on a different 
controller - then subsystem is not following that statement. So you 
shouldn't be setting DNR.   If you disagree with this behavior, it will 
need to be taken up with the NVM Express group.

-- james


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-07-09  4:57                     ` James Smart
@ 2021-07-09  8:34                       ` Chao Leng
  2021-07-09  8:55                         ` Hannes Reinecke
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Leng @ 2021-07-09  8:34 UTC (permalink / raw)
  To: James Smart, Hannes Reinecke, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme



On 2021/7/9 12:57, James Smart wrote:
> On 7/7/2021 11:29 PM, Chao Leng wrote:
>>
>>
>> On 2021/6/29 19:00, Hannes Reinecke wrote:
>>> On 6/28/21 3:10 AM, Chao Leng wrote:
>>>>
>>>>
>>>> On 2021/6/27 21:39, James Smart wrote:
>>>>> On 6/26/2021 5:09 AM, Hannes Reinecke wrote:
>>>>>> On 6/26/21 3:03 AM, Chao Leng wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 2021/6/24 16:10, Hannes Reinecke wrote:
>>>>>>>> On 6/24/21 9:29 AM, Chao Leng wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 2021/6/24 13:51, Hannes Reinecke wrote:
>>>>>>>>>> On 6/23/21 11:38 PM, Sagi Grimberg wrote:
>>>>>>>>>>>
>>>>>>>>>>>> Hi all,
>>>>>>>>>>>>
>>>>>>>>>>>> commit f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries")
>>>>>>>>>>>> allowed the fc transport to honour the DNR bit during reconnect
>>>>>>>>>>>> retries, allowing to speed up error recovery.
>>>>>>>>>>>
>>>>>>>>>>> How does this speed up error recovery?
>>>>>>>>>>
>>>>>>>>>> Well, not exactly error recovery (as there is nothing to recover).
>>>>>>>>>> But we won't attempt pointless retries, thereby reducing the noise in
>>>>>>>>>> the message log.
>>>>>>>>> This conflict with the tcp and rdma target.
>>>>>>>>> You may need to delete the improper NVME_SC_DNR at the target.
>>>>>>>>> However, this will cause compatibility issues between different versions.
>>>>>>>>
>>>>>>>> Which ones?
>>>>>>> In many scenarios, the destination sets DNR for abnormal packets,
>>>>>>> but each new connection may not have the same error.
>>>>>>
>>>>>> This patch series is only for the DNR bit set in response to the 'connect' command.
>>>>>> If the target is not able to process the 'connect' command, but may be so in the future it really should not set the DNR bit.
>>>>>>
>>>>>>>> I checked the DNR usage in the target code, and they seem to set it
>>>>>>>> correctly (ie the result would not change when the command is retried).
>>>>>>>> With the possible exception of ENOSPC handling, as this is arguably
>>>>>>>> dynamic and might change with a retry.
>>>>>>> The DNR status of the old connection may not be relevant to the re-established connection.
>>>>>>
>>>>>> See above.
>>>>>> We are just checking the DNR settings for the 'connect' command (or any other commands being sent during initial controller configuration).
>>>>>> If that fails the connect never was properly initialized; if the controller would return a different status after reconnect it simply should not set the DNR bit ...
>>>>>>
>>>>>> Cheers,
>>>>>>
>>>>>> Hannes
>>>>>
>>>>> Agreed. Since 1.3 spec says: "If set to ‘1’, indicates that if the same command is re-submitted to any controller in the NVM subsystem, then that re-submitted command is expected to fail."
>>>> According to the definition of the protocol, this is not strictly implemented on the target side.
>>>> In nvme/target, there are many external errors, the DNR is set to 1.
>>>> For example, abormal fabrics cmd.
>>>
>>> Yes; but an abnormal fabrics cmd will be abnormal even after a reconnect.
>> Sorry, delayed reply due to leave.
>> There are many possibilities for abnormal fabrics cmd.
>> In some scenarios, if the link is re-established, the error may not occur again,
>> such as HBA inner error, network is abnormal or attacked etc.
>>>
>>>>>
>>>>> Thus if the initial connect fails in this manner, any new association will be on a different controller, where it is now expected connect on that controller will fail too.  Thus - why continue to connect when it's expected each will fail.
>>>> Agree.
>>>> We should not attempt to re-establish the connection if target can not work due to target inner error .
>>>> However, now  target does not behave exactly like this, so there are conflicts and compatibility issues.
>>>
>>> Note: we never said the target should _not_ send a DNR state. We only said the target should only set the DNR bit if a retry (even after reconnect or on a different controller) will not change the result.
>>> Which from what I can see _is_ the case, so I do not see any issues here.
>>>
>>> Please give details for the issues you are concerned with.
>> For example, if host send a fabric cmd, but the fabric cmd is poisoned
>> due to HBA inner error, abnormal or attacked network, and then the target
>> set the DNR to reply response. If do not reconnect for DNR response of
>> old connection, thus the connecting can not aoto recovery. The fabric
>> cmd poisoning may be transient, may success if try for reconnecting.
>> so try to reconnecting is a better choice.
>> If do not reconnect for DNR response, target should set DNR state just
>> for target inner error.
>>>
>>> Cheers,
>>>
>>> Hannes
> 
> It really doesn't matter what you describe is happening on the back end of the controllers/subsystem.  The rev 1.4 spec says "if the same command is re-submitted to any controller in the NVM subsystem, then that re-submitted command is expected to fail." - So, if there's a chance that a reconnect would succeed, which would be on a different controller - then subsystem is not following that statement. So you shouldn't be setting DNR.   If you disagree with this behavior, it will need to be taken up with the NVM Express group.
I agree the nvme spec. I mean that linux kernel nvme target does not behave exactly like this.
So need to modify both host and target. In addition, the compatibility between different versions should be considered.
> 
> -- james
> 
> .

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-07-09  8:34                       ` Chao Leng
@ 2021-07-09  8:55                         ` Hannes Reinecke
  2021-07-09  8:59                           ` Hannes Reinecke
  2021-07-16  7:38                           ` Christoph Hellwig
  0 siblings, 2 replies; 19+ messages in thread
From: Hannes Reinecke @ 2021-07-09  8:55 UTC (permalink / raw)
  To: Chao Leng, James Smart, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme

On 7/9/21 10:34 AM, Chao Leng wrote:
> 
> 
> On 2021/7/9 12:57, James Smart wrote:
>> On 7/7/2021 11:29 PM, Chao Leng wrote:
>>>
>>>
>>> On 2021/6/29 19:00, Hannes Reinecke wrote:
[ .. ]
>>>>
>>>> Please give details for the issues you are concerned with.
>>> For example, if host send a fabric cmd, but the fabric cmd is poisoned
>>> due to HBA inner error, abnormal or attacked network, and then the 
>>> target
>>> set the DNR to reply response. If do not reconnect for DNR response of
>>> old connection, thus the connecting can not aoto recovery. The fabric
>>> cmd poisoning may be transient, may success if try for reconnecting.
>>> so try to reconnecting is a better choice.
>>> If do not reconnect for DNR response, target should set DNR state just
>>> for target inner error.
>>>>
>>
>> It really doesn't matter what you describe is happening on the back 
>> end of the controllers/subsystem.  The rev 1.4 spec says "if the same 
>> command is re-submitted to any controller in the NVM subsystem, then 
>> that re-submitted command is expected to fail." - So, if there's a 
>> chance that a reconnect would succeed, which would be on a different 
>> controller - then subsystem is not following that statement. So you 
>> shouldn't be setting DNR.   If you disagree with this behavior, it 
>> will need to be taken up with the NVM Express group.
> I agree the nvme spec. I mean that linux kernel nvme target does not 
> behave exactly like this.
> So need to modify both host and target. In addition, the compatibility 
> between different versions should be considered.

Ah. You are talking about the _target_. I would be the first to admit 
that this could be cleared up quite a bit; there are lots of 
inconsistencies there.

And sure, I can do a patch for that, too.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-07-09  8:55                         ` Hannes Reinecke
@ 2021-07-09  8:59                           ` Hannes Reinecke
  2021-07-16  7:38                           ` Christoph Hellwig
  1 sibling, 0 replies; 19+ messages in thread
From: Hannes Reinecke @ 2021-07-09  8:59 UTC (permalink / raw)
  To: Chao Leng, James Smart, Sagi Grimberg, Christoph Hellwig
  Cc: Keith Busch, linux-nvme

On 7/9/21 10:55 AM, Hannes Reinecke wrote:
> On 7/9/21 10:34 AM, Chao Leng wrote:
>>
>>
>> On 2021/7/9 12:57, James Smart wrote:
>>> On 7/7/2021 11:29 PM, Chao Leng wrote:
>>>>
>>>>
>>>> On 2021/6/29 19:00, Hannes Reinecke wrote:
> [ .. ]
>>>>>
>>>>> Please give details for the issues you are concerned with.
>>>> For example, if host send a fabric cmd, but the fabric cmd is poisoned
>>>> due to HBA inner error, abnormal or attacked network, and then the 
>>>> target
>>>> set the DNR to reply response. If do not reconnect for DNR response of
>>>> old connection, thus the connecting can not aoto recovery. The fabric
>>>> cmd poisoning may be transient, may success if try for reconnecting.
>>>> so try to reconnecting is a better choice.
>>>> If do not reconnect for DNR response, target should set DNR state just
>>>> for target inner error.
>>>>>
>>>
>>> It really doesn't matter what you describe is happening on the back 
>>> end of the controllers/subsystem.  The rev 1.4 spec says "if the same 
>>> command is re-submitted to any controller in the NVM subsystem, then 
>>> that re-submitted command is expected to fail." - So, if there's a 
>>> chance that a reconnect would succeed, which would be on a different 
>>> controller - then subsystem is not following that statement. So you 
>>> shouldn't be setting DNR.   If you disagree with this behavior, it 
>>> will need to be taken up with the NVM Express group.
>> I agree the nvme spec. I mean that linux kernel nvme target does not 
>> behave exactly like this.
>> So need to modify both host and target. In addition, the compatibility 
>> between different versions should be considered.
> 
> Ah. You are talking about the _target_. I would be the first to admit 
> that this could be cleared up quite a bit; there are lots of 
> inconsistencies there.
> 
> And sure, I can do a patch for that, too.
> 
But incidentally, I'd rather solve this in a different patchset, as 
there are lots of places in the target code where we return 
NVME_SC_INTERNAL upon allocation failure, which by rights should be 
retryable. But then I'd rather implement ACRE for the target first, as 
then we could classify the retry frequency; for allocation failures we 
should give the target more time between retries as for eg locking issues.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/2] nvme-fabrics: short-circuit connect retries
  2021-07-09  8:55                         ` Hannes Reinecke
  2021-07-09  8:59                           ` Hannes Reinecke
@ 2021-07-16  7:38                           ` Christoph Hellwig
  1 sibling, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2021-07-16  7:38 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Chao Leng, James Smart, Sagi Grimberg, Christoph Hellwig,
	Keith Busch, linux-nvme

On Fri, Jul 09, 2021 at 10:55:13AM +0200, Hannes Reinecke wrote:
>> I agree the nvme spec. I mean that linux kernel nvme target does not 
>> behave exactly like this.
>> So need to modify both host and target. In addition, the compatibility 
>> between different versions should be considered.
>
> Ah. You are talking about the _target_. I would be the first to admit that 
> this could be cleared up quite a bit; there are lots of inconsistencies 
> there.
>
> And sure, I can do a patch for that, too.

I think we need to sort this out first to not create regressions in the
Linux to Linux interaction.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2021-07-16  7:39 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-23 14:32 [PATCH 0/2] nvme-fabrics: short-circuit connect retries Hannes Reinecke
2021-06-23 14:32 ` [PATCH 1/2] nvme-tcp: short-circuit reconnect retries Hannes Reinecke
2021-06-23 14:32 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
2021-06-23 21:38 ` [PATCH 0/2] nvme-fabrics: short-circuit connect retries Sagi Grimberg
2021-06-24  5:51   ` Hannes Reinecke
2021-06-24  7:29     ` Chao Leng
2021-06-24  8:10       ` Hannes Reinecke
2021-06-26  1:03         ` Chao Leng
2021-06-26 12:09           ` Hannes Reinecke
2021-06-27 13:39             ` James Smart
2021-06-28  1:10               ` Chao Leng
2021-06-29 11:00                 ` Hannes Reinecke
2021-07-08  6:29                   ` Chao Leng
2021-07-08  7:36                     ` Hannes Reinecke
2021-07-09  4:57                     ` James Smart
2021-07-09  8:34                       ` Chao Leng
2021-07-09  8:55                         ` Hannes Reinecke
2021-07-09  8:59                           ` Hannes Reinecke
2021-07-16  7:38                           ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.