All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] nvme: short-circuit connection retries
@ 2022-07-14 12:41 Hannes Reinecke
  2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
  2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
  0 siblings, 2 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke

Hi all,

here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.

As usual, comments and reviews are welcome.

Hannes Reinecke (2):
  nvme-tcp: short-circuit connect retries
  nvme-rdma: short-circuit connect retries

 drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
 drivers/nvme/host/tcp.c  | 25 ++++++++++++++++++-------
 2 files changed, 36 insertions(+), 14 deletions(-)

-- 
2.29.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
@ 2022-07-14 12:41 ` Hannes Reinecke
  2022-07-14 14:35   ` Sagi Grimberg
  2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
  1 sibling, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke

When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So pass the actual error status to nvme_tcp_reconnect_or_remove()
and short-circuit retries if the DNR bit is set.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 70096a2e8762..4220c1ad6b29 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 	nvme_tcp_destroy_io_queues(ctrl, remove);
 }
 
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
 {
+	bool recon = true;
+
 	/* If we are resetting/deleting then do nothing */
 	if (ctrl->state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
@@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
 		return;
 	}
 
-	if (nvmf_should_reconnect(ctrl)) {
+	if (status > 0 && (status & NVME_SC_DNR)) {
+		dev_info(ctrl->device, "reconnect failure %d\n", status);
+		recon = false;
+	}
+
+	if (recon && nvmf_should_reconnect(ctrl)) {
 		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
 			ctrl->opts->reconnect_delay);
 		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
@@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
 			struct nvme_tcp_ctrl, connect_work);
 	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+	int ret;
 
 	++ctrl->nr_reconnects;
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
 			ctrl->nr_reconnects);
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 		return;
 	}
 
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
 }
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, reset_work);
+	int ret;
 
 	nvme_stop_ctrl(ctrl);
 	nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto out_fail;
 
 	return;
 
 out_fail:
 	++ctrl->nr_reconnects;
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] nvme-rdma: short-circuit connect retries
  2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
  2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
@ 2022-07-14 12:41 ` Hannes Reinecke
  1 sibling, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke

When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So pass the actual error status to nvme_tcp_reconnect_or_remove()
and short-circuit retries if the DNR bit is set.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 84ce3347d158..bcc84f181dcd 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1065,8 +1065,10 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 	kfree(ctrl);
 }
 
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, int status)
 {
+	bool recon = true;
+
 	/* If we are resetting/deleting then do nothing */
 	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
@@ -1074,7 +1076,12 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 		return;
 	}
 
-	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+	if (status > 0 && (status & NVME_SC_DNR)) {
+		dev_info(ctrl->ctrl.device, "reconnect failure %d\n", status);
+		recon = false;
+	}
+
+	if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
 		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
 			ctrl->ctrl.opts->reconnect_delay);
 		queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
@@ -1173,10 +1180,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
 			struct nvme_rdma_ctrl, reconnect_work);
+	int ret;
 
 	++ctrl->ctrl.nr_reconnects;
 
-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1189,7 +1198,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
 			ctrl->ctrl.nr_reconnects);
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1212,7 +1221,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 		return;
 	}
 
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2274,6 +2283,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl =
 		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+	int ret;
 
 	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2284,14 +2294,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
 		goto out_fail;
 
 	return;
 
 out_fail:
 	++ctrl->ctrl.nr_reconnects;
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
@ 2022-07-14 14:35   ` Sagi Grimberg
  2022-07-14 15:17     ` Hannes Reinecke
  0 siblings, 1 reply; 6+ messages in thread
From: Sagi Grimberg @ 2022-07-14 14:35 UTC (permalink / raw)
  To: Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme


> When a reconnect attempt fails with a non-retryable status
> (eg when the subsystem has been unprovisioned) there hardly
> is any reason to retry the reconnect attempt.
> So pass the actual error status to nvme_tcp_reconnect_or_remove()
> and short-circuit retries if the DNR bit is set.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>   drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>   1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 70096a2e8762..4220c1ad6b29 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
>   	nvme_tcp_destroy_io_queues(ctrl, remove);
>   }
>   
> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
>   {
> +	bool recon = true;
> +
>   	/* If we are resetting/deleting then do nothing */
>   	if (ctrl->state != NVME_CTRL_CONNECTING) {
>   		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>   		return;
>   	}
>   
> -	if (nvmf_should_reconnect(ctrl)) {
> +	if (status > 0 && (status & NVME_SC_DNR)) {
> +		dev_info(ctrl->device, "reconnect failure %d\n", status);
> +		recon = false;
> +	}
> +

Why should we call this if we need to remove for sure?
I suggest we just change the call-site.

> +	if (recon && nvmf_should_reconnect(ctrl)) {
>   		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>   			ctrl->opts->reconnect_delay);
>   		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
> @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>   	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
>   			struct nvme_tcp_ctrl, connect_work);
>   	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
> +	int ret;
>   
>   	++ctrl->nr_reconnects;
>   
> -	if (nvme_tcp_setup_ctrl(ctrl, false))
> +	ret = nvme_tcp_setup_ctrl(ctrl, false);
> +	if (ret)
>   		goto requeue;

	status = nvme_tcp_setup_ctrl(ctrl, false);
	if (!status) {
		dev_info(ctrl->device,
			"Successfully reconnected (%d attempt)\n",
                         ctrl->nr_reconnects);
		ctrl->nr_reconnects = 0;
	} else {
		dev_info(ctrl->device,
			"Failed reconnect attempt %d (%d)\n",
				ctrl->nr_reconnects, status);
		if (status > 0 && (status & NVME_SC_DNR)) {
			dev_info(ctrl->device,
				"Removing controller...\n");
			nvme_delete_ctrl(ctrl);
		} else {
			nvme_tcp_reconnect_or_remove(ctrl);
		}
	}
}

>   
>   	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>   requeue:
>   	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>   			ctrl->nr_reconnects);
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, ret);
>   }
>   
>   static void nvme_tcp_error_recovery_work(struct work_struct *work)
> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);

This emphasizes that this is a redundant parameter.

>   }
>   
>   static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
> @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
>   {
>   	struct nvme_ctrl *ctrl =
>   		container_of(work, struct nvme_ctrl, reset_work);
> +	int ret;
>   
>   	nvme_stop_ctrl(ctrl);
>   	nvme_tcp_teardown_ctrl(ctrl, false);
> @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	if (nvme_tcp_setup_ctrl(ctrl, false))
> +	ret = nvme_tcp_setup_ctrl(ctrl, false);
> +	if (ret)
>   		goto out_fail;

Similar error handling here.

	status = nvme_tcp_setup_ctrl(ctrl, false);
	if (status) {
		++ctrl->nr_reconnects;
		if (status > 0 && (status & NVME_SC_DNR)) {
			dev_info(ctrl->device,
				"Removing controller...\n");
			nvme_delete_ctrl(ctrl);
		} else {
			nvme_tcp_reconnect_or_remove(ctrl);
		}
	}

>   
>   	return;
>   
>   out_fail:
>   	++ctrl->nr_reconnects;
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, ret);
>   }
>   
>   static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-14 14:35   ` Sagi Grimberg
@ 2022-07-14 15:17     ` Hannes Reinecke
  0 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 15:17 UTC (permalink / raw)
  To: Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme

On 7/14/22 16:35, Sagi Grimberg wrote:
> 
>> When a reconnect attempt fails with a non-retryable status
>> (eg when the subsystem has been unprovisioned) there hardly
>> is any reason to retry the reconnect attempt.
>> So pass the actual error status to nvme_tcp_reconnect_or_remove()
>> and short-circuit retries if the DNR bit is set.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>   drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>>   1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>> index 70096a2e8762..4220c1ad6b29 100644
>> --- a/drivers/nvme/host/tcp.c
>> +++ b/drivers/nvme/host/tcp.c
>> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct 
>> nvme_ctrl *ctrl,
>>       nvme_tcp_destroy_io_queues(ctrl, remove);
>>   }
>> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int 
>> status)
>>   {
>> +    bool recon = true;
>> +
>>       /* If we are resetting/deleting then do nothing */
>>       if (ctrl->state != NVME_CTRL_CONNECTING) {
>>           WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
>> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct 
>> nvme_ctrl *ctrl)
>>           return;
>>       }
>> -    if (nvmf_should_reconnect(ctrl)) {
>> +    if (status > 0 && (status & NVME_SC_DNR)) {
>> +        dev_info(ctrl->device, "reconnect failure %d\n", status);
>> +        recon = false;
>> +    }
>> +
> 
> Why should we call this if we need to remove for sure?
> I suggest we just change the call-site.
> 
>> +    if (recon && nvmf_should_reconnect(ctrl)) {
>>           dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>>               ctrl->opts->reconnect_delay);
>>           queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
>> @@ -2162,10 +2169,12 @@ static void 
>> nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>>       struct nvme_tcp_ctrl *tcp_ctrl = 
>> container_of(to_delayed_work(work),
>>               struct nvme_tcp_ctrl, connect_work);
>>       struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
>> +    int ret;
>>       ++ctrl->nr_reconnects;
>> -    if (nvme_tcp_setup_ctrl(ctrl, false))
>> +    ret = nvme_tcp_setup_ctrl(ctrl, false);
>> +    if (ret)
>>           goto requeue;
> 
>      status = nvme_tcp_setup_ctrl(ctrl, false);
>      if (!status) {
>          dev_info(ctrl->device,
>              "Successfully reconnected (%d attempt)\n",
>                          ctrl->nr_reconnects);
>          ctrl->nr_reconnects = 0;
>      } else {
>          dev_info(ctrl->device,
>              "Failed reconnect attempt %d (%d)\n",
>                  ctrl->nr_reconnects, status);
>          if (status > 0 && (status & NVME_SC_DNR)) {
>              dev_info(ctrl->device,
>                  "Removing controller...\n");
>              nvme_delete_ctrl(ctrl);
>          } else {
>              nvme_tcp_reconnect_or_remove(ctrl);
>          }
>      }
> }
> 
Good point.

>>       dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
>> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct 
>> work_struct *work)
>>   requeue:
>>       dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>>               ctrl->nr_reconnects);
>> -    nvme_tcp_reconnect_or_remove(ctrl);
>> +    nvme_tcp_reconnect_or_remove(ctrl, ret);
>>   }
>>   static void nvme_tcp_error_recovery_work(struct work_struct *work)
>> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct 
>> work_struct *work)
>>           return;
>>       }
>> -    nvme_tcp_reconnect_or_remove(ctrl);
>> +    nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
> 
> This emphasizes that this is a redundant parameter.
> 

Ok, will be reworking it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-15  6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke
@ 2022-07-15  6:33 ` Hannes Reinecke
  0 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-15  6:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So check the actual error status from nvme_tcp_setup_ctrl(), and
call nvme_delete_ctrl() instead of calling nvme_tcp_reconnect_or_remove()
if the DNR bit is set.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c854e69defb0..a15e3ed02b64 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2157,10 +2157,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
 			struct nvme_tcp_ctrl, connect_work);
 	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+	int ret;
 
 	++ctrl->nr_reconnects;
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2173,6 +2175,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
 			ctrl->nr_reconnects);
+	if (ret > 0 && (ret & NVME_SC_DNR)) {
+		dev_info(ctrl->device,
+			 "Removing ctrl...\n");
+		nvme_delete_ctrl(ctrl);
+		return;
+	}
 	nvme_tcp_reconnect_or_remove(ctrl);
 }
 
@@ -2224,6 +2232,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, reset_work);
+	int ret;
 
 	nvme_stop_ctrl(ctrl);
 	nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2235,9 +2244,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret) {
+		if (ret > 0 && (ret & NVME_SC_DNR)) {
+			dev_info(ctrl->device, "Removing ctrl...\n");
+			nvme_delete_ctrl(ctrl);
+			return;
+		}
 		goto out_fail;
-
+	}
 	return;
 
 out_fail:
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-07-15  6:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
2022-07-14 14:35   ` Sagi Grimberg
2022-07-14 15:17     ` Hannes Reinecke
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
2022-07-15  6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-15  6:33 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.