* [PATCH v2 1/2] nvme-tcp: short-circuit reconnect retries
2024-03-04 16:10 [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries Daniel Wagner
@ 2024-03-04 16:10 ` Daniel Wagner
2024-03-05 13:49 ` Christoph Hellwig
2024-03-04 16:10 ` [PATCH v2 2/2] nvme-rdma: " Daniel Wagner
2024-03-04 16:32 ` [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries Keith Busch
2 siblings, 1 reply; 6+ messages in thread
From: Daniel Wagner @ 2024-03-04 16:10 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, Hannes Reinecke, linux-nvme,
James Smart, Chao Leng
From: Hannes Reinecke <hare@suse.de>
Returning an nvme status from nvme_tcp_setup_ctrl() indicates
that the association was established and we have received a status
from the controller; consequently we should honour the DNR bit.
If not any future reconnect attempts will just return the same error, so
we can short-circuit the reconnect attempts and fail the connection
directly.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/host/tcp.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3b20c5ed033f..6b5f0b5f7fd3 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2149,9 +2149,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
nvme_tcp_destroy_io_queues(ctrl, remove);
}
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
{
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+ bool recon = status > 0 && (status & NVME_SC_DNR) ? false : true;
/* If we are resetting/deleting then do nothing */
if (state != NVME_CTRL_CONNECTING) {
@@ -2159,13 +2160,13 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
return;
}
- if (nvmf_should_reconnect(ctrl)) {
+ if (recon && nvmf_should_reconnect(ctrl)) {
dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
ctrl->opts->reconnect_delay);
queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
ctrl->opts->reconnect_delay * HZ);
} else {
- dev_info(ctrl->device, "Removing controller...\n");
+ dev_info(ctrl->device, "Removing controller (%d)...\n", status);
nvme_delete_ctrl(ctrl);
}
}
@@ -2246,10 +2247,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
struct nvme_tcp_ctrl, connect_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ int ret;
++ctrl->nr_reconnects;
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2262,7 +2265,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->device, "Failed reconnect attempt %d\n",
ctrl->nr_reconnects);
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, ret);
}
static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2289,7 +2292,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
return;
}
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
}
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2309,6 +2312,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, reset_work);
+ int ret;
nvme_stop_ctrl(ctrl);
nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2322,14 +2326,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->nr_reconnects;
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, ret);
}
static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 2/2] nvme-rdma: short-circuit reconnect retries
2024-03-04 16:10 [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries Daniel Wagner
2024-03-04 16:10 ` [PATCH v2 1/2] nvme-tcp: short-circuit reconnect retries Daniel Wagner
@ 2024-03-04 16:10 ` Daniel Wagner
2024-03-04 16:32 ` [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries Keith Busch
2 siblings, 0 replies; 6+ messages in thread
From: Daniel Wagner @ 2024-03-04 16:10 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, Hannes Reinecke, linux-nvme,
James Smart, Chao Leng
From: Hannes Reinecke <hare@suse.de>
Returning an nvme status from nvme_rdma_setup_ctrl() indicates
that the association was established and we have received a status
from the controller; consequently we should honour the DNR bit.
If not any future reconnect attempts will just return the same error, so
we can short-circuit the reconnect attempts and fail the connection
directly.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/host/rdma.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d3747795ad80..4a7cb7015cf0 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -982,9 +982,11 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl,
+ int status)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+ bool recon = status > 0 && (status & NVME_SC_DNR) ? false : true;
/* If we are resetting/deleting then do nothing */
if (state != NVME_CTRL_CONNECTING) {
@@ -992,12 +994,14 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
return;
}
- if (nvmf_should_reconnect(&ctrl->ctrl)) {
+ if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
+ dev_info(ctrl->ctrl.device, "Removing controller (%d)...\n",
+ status);
nvme_delete_ctrl(&ctrl->ctrl);
}
}
@@ -1098,10 +1102,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;
++ctrl->ctrl.nr_reconnects;
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1114,7 +1120,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1139,7 +1145,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
return;
}
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN);
}
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2163,6 +2169,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2173,14 +2180,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries
2024-03-04 16:10 [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries Daniel Wagner
2024-03-04 16:10 ` [PATCH v2 1/2] nvme-tcp: short-circuit reconnect retries Daniel Wagner
2024-03-04 16:10 ` [PATCH v2 2/2] nvme-rdma: " Daniel Wagner
@ 2024-03-04 16:32 ` Keith Busch
2024-03-05 7:21 ` Daniel Wagner
2 siblings, 1 reply; 6+ messages in thread
From: Keith Busch @ 2024-03-04 16:32 UTC (permalink / raw)
To: Daniel Wagner
Cc: Christoph Hellwig, Sagi Grimberg, Hannes Reinecke, linux-nvme,
James Smart, Chao Leng
On Mon, Mar 04, 2024 at 05:10:04PM +0100, Daniel Wagner wrote:
> I've picked up Hannes' DNR patches. In short the make the transports behave the
> same way when the DNR bit set on a re-connect attempt. We had a discussion this
> topic in the past and if I got this right we all agreed is that the host should
> honor the DNR bit on a connect attempt [1]
These look good to me. I think, though, if you're submitting a patch on
behalf of another, you should append your 'Signed-off-by' tag after the
author's.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: Re: [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries
2024-03-04 16:32 ` [PATCH v2 0/2] nvme-fabrics: short-circuit connect retries Keith Busch
@ 2024-03-05 7:21 ` Daniel Wagner
0 siblings, 0 replies; 6+ messages in thread
From: Daniel Wagner @ 2024-03-05 7:21 UTC (permalink / raw)
To: Keith Busch
Cc: Christoph Hellwig, Sagi Grimberg, Hannes Reinecke, linux-nvme,
James Smart, Chao Leng
On Mon, Mar 04, 2024 at 09:32:12AM -0700, Keith Busch wrote:
> On Mon, Mar 04, 2024 at 05:10:04PM +0100, Daniel Wagner wrote:
> > I've picked up Hannes' DNR patches. In short the make the transports behave the
> > same way when the DNR bit set on a re-connect attempt. We had a discussion this
> > topic in the past and if I got this right we all agreed is that the host should
> > honor the DNR bit on a connect attempt [1]
>
> These look good to me. I think, though, if you're submitting a patch on
> behalf of another, you should append your 'Signed-off-by' tag after the
> author's.
Will do. Yesterday was not my best git day for a while...
^ permalink raw reply [flat|nested] 6+ messages in thread