All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] nvme-fc: track state change failures
@ 2019-05-16  8:37 Hannes Reinecke
  2019-05-16  8:37 ` [PATCH 1/3] nvme: separate out nvme_ctrl_state_name() Hannes Reinecke
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Hannes Reinecke @ 2019-05-16  8:37 UTC (permalink / raw)


Hi all,

this patchset improves logging in the nvme-fc driver if expcected
state changes fail, and fixes up one case where a failed state change
might lead to a stuck controller.

Hannes Reinecke (3):
  nvme: separate out nvme_ctrl_state_name()
  nvme-fc: track state change failures during reconnect
  nvme-fc: fail reconnect if state change fails

 drivers/nvme/host/core.c | 36 +++++++++++++++++++++++-------------
 drivers/nvme/host/fc.c   | 23 +++++++++++++++++------
 drivers/nvme/host/nvme.h |  1 +
 3 files changed, 41 insertions(+), 19 deletions(-)

-- 
2.16.4

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] nvme: separate out nvme_ctrl_state_name()
  2019-05-16  8:37 [PATCH 0/3] nvme-fc: track state change failures Hannes Reinecke
@ 2019-05-16  8:37 ` Hannes Reinecke
  2019-05-16 13:55   ` Minwoo Im
  2019-05-16 16:24   ` James Smart
  2019-05-16  8:37 ` [PATCH 2/3] nvme-fc: track state change failures during reconnect Hannes Reinecke
  2019-05-16  8:37 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
  2 siblings, 2 replies; 14+ messages in thread
From: Hannes Reinecke @ 2019-05-16  8:37 UTC (permalink / raw)


Separate out nvme_ctrl_state_name() to return the controller state
as a string.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/core.c | 36 +++++++++++++++++++++++-------------
 drivers/nvme/host/nvme.h |  1 +
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c2e4fa694f79..2632276458f5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -380,6 +380,25 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
 
+static const char *const nvme_ctrl_state_names[] = {
+	[NVME_CTRL_NEW]		= "new",
+	[NVME_CTRL_LIVE]	= "live",
+	[NVME_CTRL_ADMIN_ONLY]	= "only-admin",
+	[NVME_CTRL_RESETTING]	= "resetting",
+	[NVME_CTRL_CONNECTING]	= "connecting",
+	[NVME_CTRL_DELETING]	= "deleting",
+	[NVME_CTRL_DEAD]	= "dead",
+};
+
+const char *nvme_ctrl_state_name(struct nvme_ctrl *ctrl)
+{
+	if ((unsigned)ctrl->state < ARRAY_SIZE(nvme_ctrl_state_names) &&
+	    nvme_ctrl_state_names[ctrl->state])
+		return nvme_ctrl_state_names[ctrl->state];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nvme_ctrl_state_name);
+
 static void nvme_free_ns_head(struct kref *ref)
 {
 	struct nvme_ns_head *head =
@@ -2989,19 +3008,10 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
 				     char *buf)
 {
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-	static const char *const state_name[] = {
-		[NVME_CTRL_NEW]		= "new",
-		[NVME_CTRL_LIVE]	= "live",
-		[NVME_CTRL_ADMIN_ONLY]	= "only-admin",
-		[NVME_CTRL_RESETTING]	= "resetting",
-		[NVME_CTRL_CONNECTING]	= "connecting",
-		[NVME_CTRL_DELETING]	= "deleting",
-		[NVME_CTRL_DEAD]	= "dead",
-	};
-
-	if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
-	    state_name[ctrl->state])
-		return sprintf(buf, "%s\n", state_name[ctrl->state]);
+	const char *state_name = nvme_ctrl_state_name(ctrl);
+
+	if (state_name)
+		return sprintf(buf, "%s\n", state_name);
 
 	return sprintf(buf, "unknown state\n");
 }
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5ee75b5ff83f..b3b13e465dc6 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -419,6 +419,7 @@ void nvme_complete_rq(struct request *req);
 bool nvme_cancel_request(struct request *req, void *data, bool reserved);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state);
+const char *nvme_ctrl_state_name(struct nvme_ctrl *ctrl);
 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] nvme-fc: track state change failures during reconnect
  2019-05-16  8:37 [PATCH 0/3] nvme-fc: track state change failures Hannes Reinecke
  2019-05-16  8:37 ` [PATCH 1/3] nvme: separate out nvme_ctrl_state_name() Hannes Reinecke
@ 2019-05-16  8:37 ` Hannes Reinecke
  2019-05-16 16:24   ` James Smart
  2019-05-16  8:37 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
  2 siblings, 1 reply; 14+ messages in thread
From: Hannes Reinecke @ 2019-05-16  8:37 UTC (permalink / raw)


The nvme-fc driver has several situation under which an expected
state transition fails, but doesn't print out any messages if
this happens.
The patch adds logging for these situations.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/fc.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0c9e036afd09..e5c81ba2b7a1 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2867,8 +2867,12 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
 	bool recon = true;
 
-	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
+	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
+		dev_info(ctrl->ctrl.device,
+			 "NVME-FC{%d}: couldn't reconnect in state %s\n",
+			 ctrl->cnum, nvme_ctrl_state_name(&ctrl->ctrl));
 		return;
+	}
 
 	if (portptr->port_state == FC_OBJSTATE_ONLINE)
 		dev_info(ctrl->ctrl.device,
@@ -2914,7 +2918,8 @@ __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
 	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: error_recovery: Couldn't change state "
-			"to CONNECTING\n", ctrl->cnum);
+			"from %s to CONNECTING\n", ctrl->cnum,
+			nvme_ctrl_state_name(&ctrl->ctrl));
 }
 
 static void
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-16  8:37 [PATCH 0/3] nvme-fc: track state change failures Hannes Reinecke
  2019-05-16  8:37 ` [PATCH 1/3] nvme: separate out nvme_ctrl_state_name() Hannes Reinecke
  2019-05-16  8:37 ` [PATCH 2/3] nvme-fc: track state change failures during reconnect Hannes Reinecke
@ 2019-05-16  8:37 ` Hannes Reinecke
  2019-05-16 16:25   ` James Smart
  2019-05-18  0:18   ` Arun Easi
  2 siblings, 2 replies; 14+ messages in thread
From: Hannes Reinecke @ 2019-05-16  8:37 UTC (permalink / raw)


If the final state change to LIVE in nvme_fc_create_association()
fails the controller is not operational as no I/O is possible.
So we should be returning an error here to reschedule reconnect.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/fc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e5c81ba2b7a1..9f9300cbdb62 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2620,7 +2620,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret;
-	bool changed;
 
 	++ctrl->ctrl.nr_reconnects;
 
@@ -2725,12 +2724,19 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 			goto out_term_aen_ops;
 	}
 
-	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) {
+		if (ctrl->ctrl.state != NVME_CTRL_DELETING) {
+			dev_err(ctrl->ctrl.device,
+				"NVME-FC{%d}: error_recovery: Couldn't change "
+				"state from %s to LIVE\n", ctrl->cnum,
+				nvme_ctrl_state_name(&ctrl->ctrl));
+			return -EAGAIN;
+		}
+	}
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (changed)
-		nvme_start_ctrl(&ctrl->ctrl);
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return 0;	/* Success */
 
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 1/3] nvme: separate out nvme_ctrl_state_name()
  2019-05-16  8:37 ` [PATCH 1/3] nvme: separate out nvme_ctrl_state_name() Hannes Reinecke
@ 2019-05-16 13:55   ` Minwoo Im
  2019-05-16 16:24   ` James Smart
  1 sibling, 0 replies; 14+ messages in thread
From: Minwoo Im @ 2019-05-16 13:55 UTC (permalink / raw)


Hi Hannes,

> +const char *nvme_ctrl_state_name(struct nvme_ctrl *ctrl)
> +{
> +	if ((unsigned)ctrl->state < ARRAY_SIZE(nvme_ctrl_state_names) &&
> +	    nvme_ctrl_state_names[ctrl->state])
> +		return nvme_ctrl_state_names[ctrl->state];
> +	return NULL;

Perhaps we can return "unknown" here that can make callers do not
need to consider that if it gives NULL and not return a string there also.

> @@ -2989,19 +3008,10 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
>  				     char *buf)
>  {
>  	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
> -	static const char *const state_name[] = {
> -		[NVME_CTRL_NEW]		= "new",
> -		[NVME_CTRL_LIVE]	= "live",
> -		[NVME_CTRL_ADMIN_ONLY]	= "only-admin",
> -		[NVME_CTRL_RESETTING]	= "resetting",
> -		[NVME_CTRL_CONNECTING]	= "connecting",
> -		[NVME_CTRL_DELETING]	= "deleting",
> -		[NVME_CTRL_DEAD]	= "dead",
> -	};
> -
> -	if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
> -	    state_name[ctrl->state])
> -		return sprintf(buf, "%s\n", state_name[ctrl->state]);
> +	const char *state_name = nvme_ctrl_state_name(ctrl);
> +
> +	if (state_name)
> +		return sprintf(buf, "%s\n", state_name);
>  
>  	return sprintf(buf, "unknown state\n");

If so, we can make these three lines to a single one without considering
the NULL case.

What do you think?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] nvme: separate out nvme_ctrl_state_name()
  2019-05-16  8:37 ` [PATCH 1/3] nvme: separate out nvme_ctrl_state_name() Hannes Reinecke
  2019-05-16 13:55   ` Minwoo Im
@ 2019-05-16 16:24   ` James Smart
  1 sibling, 0 replies; 14+ messages in thread
From: James Smart @ 2019-05-16 16:24 UTC (permalink / raw)


On 5/16/2019 1:37 AM, Hannes Reinecke wrote:
> Separate out nvme_ctrl_state_name() to return the controller state
> as a string.
>
> Signed-off-by: Hannes Reinecke <hare at suse.com>
> ---
>   drivers/nvme/host/core.c | 36 +++++++++++++++++++++++-------------
>   drivers/nvme/host/nvme.h |  1 +
>   2 files changed, 24 insertions(+), 13 deletions(-)
>
>

Reviewed-by:? James Smart? <james.smart at broadcom.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/3] nvme-fc: track state change failures during reconnect
  2019-05-16  8:37 ` [PATCH 2/3] nvme-fc: track state change failures during reconnect Hannes Reinecke
@ 2019-05-16 16:24   ` James Smart
  0 siblings, 0 replies; 14+ messages in thread
From: James Smart @ 2019-05-16 16:24 UTC (permalink / raw)




On 5/16/2019 1:37 AM, Hannes Reinecke wrote:
> The nvme-fc driver has several situation under which an expected
> state transition fails, but doesn't print out any messages if
> this happens.
> The patch adds logging for these situations.
>
> Signed-off-by: Hannes Reinecke <hare at suse.com>
> ---
>   drivers/nvme/host/fc.c | 9 +++++++--
>   1 file changed, 7 insertions(+), 2 deletions(-)
>
>

Reviewed-by:? James Smart?? <james.smart at broadcom.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-16  8:37 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
@ 2019-05-16 16:25   ` James Smart
  2019-05-18  0:18   ` Arun Easi
  1 sibling, 0 replies; 14+ messages in thread
From: James Smart @ 2019-05-16 16:25 UTC (permalink / raw)




On 5/16/2019 1:37 AM, Hannes Reinecke wrote:
> If the final state change to LIVE in nvme_fc_create_association()
> fails the controller is not operational as no I/O is possible.
> So we should be returning an error here to reschedule reconnect.
>
> Signed-off-by: Hannes Reinecke <hare at suse.com>
> ---
>   drivers/nvme/host/fc.c | 14 ++++++++++----
>   1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index e5c81ba2b7a1..9f9300cbdb62 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -2620,7 +2620,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
>   {
>   	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
>   	int ret;
> -	bool changed;
>   
>   	++ctrl->ctrl.nr_reconnects;
>   
> @@ -2725,12 +2724,19 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
>   			goto out_term_aen_ops;
>   	}
>   
> -	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
> +	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) {
> +		if (ctrl->ctrl.state != NVME_CTRL_DELETING) {
is this easier to arrange as:
 ??? if (nvme_change_ctrl_state(...) &&
 ???????? ctrl->ctrl.state != NVME_CTRL_DELETING) {

> +			dev_err(ctrl->ctrl.device,
> +				"NVME-FC{%d}: error_recovery: Couldn't change "
> +				"state from %s to LIVE\n", ctrl->cnum,
> +				nvme_ctrl_state_name(&ctrl->ctrl));
> +			return -EAGAIN;
> +		}
> +	}
>   
>   	ctrl->ctrl.nr_reconnects = 0;
>   
> -	if (changed)
> -		nvme_start_ctrl(&ctrl->ctrl);
> +	nvme_start_ctrl(&ctrl->ctrl);

I don't think you want to eliminate this check - you don't want to call 
nvme_start_ctrl() if state did transition to DELETING. You want to 
continue out to release the create thread, but the delete work will be 
coming along shortly.

-- james

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-16  8:37 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
  2019-05-16 16:25   ` James Smart
@ 2019-05-18  0:18   ` Arun Easi
  2019-05-18  0:21     ` Arun Easi
  1 sibling, 1 reply; 14+ messages in thread
From: Arun Easi @ 2019-05-18  0:18 UTC (permalink / raw)


On Thu, 16 May 2019, 1:37am, Hannes Reinecke wrote:

> If the final state change to LIVE in nvme_fc_create_association()
> fails the controller is not operational as no I/O is possible.
> So we should be returning an error here to reschedule reconnect.
> 
> Signed-off-by: Hannes Reinecke <hare at suse.com>
> ---
>  drivers/nvme/host/fc.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index e5c81ba2b7a1..9f9300cbdb62 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -2620,7 +2620,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
>  {
>  	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
>  	int ret;
> -	bool changed;
>  
>  	++ctrl->ctrl.nr_reconnects;
>  
> @@ -2725,12 +2724,19 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
>  			goto out_term_aen_ops;
>  	}
>  
> -	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
> +	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) {

Should not this be !nvme_change_ctrl_state()?

Regards,
-Arun
> +		if (ctrl->ctrl.state != NVME_CTRL_DELETING) {
> +			dev_err(ctrl->ctrl.device,
> +				"NVME-FC{%d}: error_recovery: Couldn't change "
> +				"state from %s to LIVE\n", ctrl->cnum,
> +				nvme_ctrl_state_name(&ctrl->ctrl));
> +			return -EAGAIN;
> +		}
> +	}
>  
>  	ctrl->ctrl.nr_reconnects = 0;
>  
> -	if (changed)
> -		nvme_start_ctrl(&ctrl->ctrl);
> +	nvme_start_ctrl(&ctrl->ctrl);
>  
>  	return 0;	/* Success */
>  
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-18  0:18   ` Arun Easi
@ 2019-05-18  0:21     ` Arun Easi
  0 siblings, 0 replies; 14+ messages in thread
From: Arun Easi @ 2019-05-18  0:21 UTC (permalink / raw)


Please ignore the comment. I see that v2 already has addressed this.

Regards,
-Arun

On Fri, 17 May 2019, 5:18pm, Arun Easi wrote:

> On Thu, 16 May 2019, 1:37am, Hannes Reinecke wrote:
> 
> > If the final state change to LIVE in nvme_fc_create_association()
> > fails the controller is not operational as no I/O is possible.
> > So we should be returning an error here to reschedule reconnect.
> > 
> > Signed-off-by: Hannes Reinecke <hare at suse.com>
> > ---
> >  drivers/nvme/host/fc.c | 14 ++++++++++----
> >  1 file changed, 10 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> > index e5c81ba2b7a1..9f9300cbdb62 100644
> > --- a/drivers/nvme/host/fc.c
> > +++ b/drivers/nvme/host/fc.c
> > @@ -2620,7 +2620,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
> >  {
> >  	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
> >  	int ret;
> > -	bool changed;
> >  
> >  	++ctrl->ctrl.nr_reconnects;
> >  
> > @@ -2725,12 +2724,19 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
> >  			goto out_term_aen_ops;
> >  	}
> >  
> > -	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
> > +	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) {
> 
> Should not this be !nvme_change_ctrl_state()?
> 
> Regards,
> -Arun
> > +		if (ctrl->ctrl.state != NVME_CTRL_DELETING) {
> > +			dev_err(ctrl->ctrl.device,
> > +				"NVME-FC{%d}: error_recovery: Couldn't change "
> > +				"state from %s to LIVE\n", ctrl->cnum,
> > +				nvme_ctrl_state_name(&ctrl->ctrl));
> > +			return -EAGAIN;
> > +		}
> > +	}
> >  
> >  	ctrl->ctrl.nr_reconnects = 0;
> >  
> > -	if (changed)
> > -		nvme_start_ctrl(&ctrl->ctrl);
> > +	nvme_start_ctrl(&ctrl->ctrl);
> >  
> >  	return 0;	/* Success */
> >  
> > 
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-17 18:34     ` Hannes Reinecke
@ 2019-05-17 18:52       ` James Smart
  0 siblings, 0 replies; 14+ messages in thread
From: James Smart @ 2019-05-17 18:52 UTC (permalink / raw)




On 5/17/2019 11:34 AM, Hannes Reinecke wrote:
> On 5/17/19 6:10 PM, James Smart wrote:
>> On 5/16/2019 11:42 PM, Hannes Reinecke wrote:
>>> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
>>> index e5c81ba2b7a1..a8ef62512d9a 100644
>>> --- a/drivers/nvme/host/fc.c
>>> +++ b/drivers/nvme/host/fc.c
>>> @@ -2726,6 +2734,13 @@ nvme_fc_create_association(struct 
>>> nvme_fc_ctrl *ctrl)
>>> ????? }
>>> ????? changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
>>> +??? if (!changed && ctrl->ctrl.state != NVME_CTRL_DELETING) {
>>> +??????? dev_err(ctrl->ctrl.device,
>>> +??????????? "NVME-FC{%d}: error_recovery: Couldn't change "
>>> +??????????? "state from %s to LIVE\n", ctrl->cnum,
>>> +??????????? nvme_ctrl_state_name(&ctrl->ctrl));
>>> +??????? return -EAGAIN;
>>> +??? }
>>> ????? ctrl->ctrl.nr_reconnects = 0;
>>
>> Sorry, I should have spotted this last time.?? We shouldn't be 
>> exiting in the bad case - we should be setting ret=-EAGAIN then have 
>> a goto out_term_aen_ops so that whatever was built up for the 
>> association is torn down.
>>
> I _actually_ did do that on purpose; I did ask me the very same question.
> Reasoning was that this is the final step, and reverting this would 
> amount to a full reset. Hence we could just return an error, and let 
> the reset handle things. Especially as we do not clear the reconnect 
> counter here.
> Am I wrong?
>

Yes, partly no.??? No, in that tearing down what you built up for the 
association will be similar to the action of a reset but the problem is 
- the association create expected that "teardown" to have happened 
before it runs. In this new case, data structures are left built up, 
queues live, flags live, yet the controller by state is in limbo.? After 
the failure return, all that happens in the transport schedules a 
connect timeout. When the timeout occurs it will attempt to rebuild 
everything that is already in place. There will be no attempt to 
terminate the prior association you left live and I have no idea what 
some of the reallocation paths will do when they start building up on 
things already live. At best they just overlay and we have leaks - I 
don't know. You've created a very new and unique path by returning.? 
Don't do that.

-- james

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-17 16:10   ` James Smart
@ 2019-05-17 18:34     ` Hannes Reinecke
  2019-05-17 18:52       ` James Smart
  0 siblings, 1 reply; 14+ messages in thread
From: Hannes Reinecke @ 2019-05-17 18:34 UTC (permalink / raw)


On 5/17/19 6:10 PM, James Smart wrote:
> On 5/16/2019 11:42 PM, Hannes Reinecke wrote:
>> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
>> index e5c81ba2b7a1..a8ef62512d9a 100644
>> --- a/drivers/nvme/host/fc.c
>> +++ b/drivers/nvme/host/fc.c
>> @@ -2726,6 +2734,13 @@ nvme_fc_create_association(struct nvme_fc_ctrl 
>> *ctrl)
>> ????? }
>> ????? changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
>> +??? if (!changed && ctrl->ctrl.state != NVME_CTRL_DELETING) {
>> +??????? dev_err(ctrl->ctrl.device,
>> +??????????? "NVME-FC{%d}: error_recovery: Couldn't change "
>> +??????????? "state from %s to LIVE\n", ctrl->cnum,
>> +??????????? nvme_ctrl_state_name(&ctrl->ctrl));
>> +??????? return -EAGAIN;
>> +??? }
>> ????? ctrl->ctrl.nr_reconnects = 0;
> 
> Sorry, I should have spotted this last time.?? We shouldn't be exiting 
> in the bad case - we should be setting ret=-EAGAIN then have a goto 
> out_term_aen_ops so that whatever was built up for the association is 
> torn down.
> 
I _actually_ did do that on purpose; I did ask me the very same question.
Reasoning was that this is the final step, and reverting this would 
amount to a full reset. Hence we could just return an error, and let the 
reset handle things. Especially as we do not clear the reconnect counter 
here.
Am I wrong?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare at suse.de                              +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Mary Higgins, Sri Rasiah
HRB 21284 (AG N?rnberg)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-17  6:42 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
@ 2019-05-17 16:10   ` James Smart
  2019-05-17 18:34     ` Hannes Reinecke
  0 siblings, 1 reply; 14+ messages in thread
From: James Smart @ 2019-05-17 16:10 UTC (permalink / raw)


On 5/16/2019 11:42 PM, Hannes Reinecke wrote:
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index e5c81ba2b7a1..a8ef62512d9a 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -2726,6 +2734,13 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
>   	}
>   
>   	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
> +	if (!changed && ctrl->ctrl.state != NVME_CTRL_DELETING) {
> +		dev_err(ctrl->ctrl.device,
> +			"NVME-FC{%d}: error_recovery: Couldn't change "
> +			"state from %s to LIVE\n", ctrl->cnum,
> +			nvme_ctrl_state_name(&ctrl->ctrl));
> +		return -EAGAIN;
> +	}
>   
>   	ctrl->ctrl.nr_reconnects = 0;
>   

Sorry, I should have spotted this last time.?? We shouldn't be exiting 
in the bad case - we should be setting ret=-EAGAIN then have a goto 
out_term_aen_ops so that whatever was built up for the association is 
torn down.

-- james

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/3] nvme-fc: fail reconnect if state change fails
  2019-05-17  6:42 [PATCHv2 0/3] nvme-fc: track state change failures Hannes Reinecke
@ 2019-05-17  6:42 ` Hannes Reinecke
  2019-05-17 16:10   ` James Smart
  0 siblings, 1 reply; 14+ messages in thread
From: Hannes Reinecke @ 2019-05-17  6:42 UTC (permalink / raw)


If the final state change to LIVE in nvme_fc_create_association()
fails the controller is not operational as no I/O is possible.
So we should be returning an error here to reschedule reconnect.
Additionally it should only be called while in CONNECTING state, so
add a check for this, too.

Signed-off-by: Hannes Reinecke <hare at suse.com>
---
 drivers/nvme/host/fc.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e5c81ba2b7a1..a8ef62512d9a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2624,6 +2624,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	++ctrl->ctrl.nr_reconnects;
 
+	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
+		dev_info(ctrl->ctrl.device,
+			 "NVME-FC{%d}: state %s cancelled new "
+			 "association attempt\n",
+			 ctrl->cnum, nvme_ctrl_state_name(&ctrl->ctrl));
+		return -ENODEV;
+	}
+
 	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
 		return -ENODEV;
 
@@ -2726,6 +2734,13 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	if (!changed && ctrl->ctrl.state != NVME_CTRL_DELETING) {
+		dev_err(ctrl->ctrl.device,
+			"NVME-FC{%d}: error_recovery: Couldn't change "
+			"state from %s to LIVE\n", ctrl->cnum,
+			nvme_ctrl_state_name(&ctrl->ctrl));
+		return -EAGAIN;
+	}
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2019-05-18  0:21 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-16  8:37 [PATCH 0/3] nvme-fc: track state change failures Hannes Reinecke
2019-05-16  8:37 ` [PATCH 1/3] nvme: separate out nvme_ctrl_state_name() Hannes Reinecke
2019-05-16 13:55   ` Minwoo Im
2019-05-16 16:24   ` James Smart
2019-05-16  8:37 ` [PATCH 2/3] nvme-fc: track state change failures during reconnect Hannes Reinecke
2019-05-16 16:24   ` James Smart
2019-05-16  8:37 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
2019-05-16 16:25   ` James Smart
2019-05-18  0:18   ` Arun Easi
2019-05-18  0:21     ` Arun Easi
2019-05-17  6:42 [PATCHv2 0/3] nvme-fc: track state change failures Hannes Reinecke
2019-05-17  6:42 ` [PATCH 3/3] nvme-fc: fail reconnect if state change fails Hannes Reinecke
2019-05-17 16:10   ` James Smart
2019-05-17 18:34     ` Hannes Reinecke
2019-05-17 18:52       ` James Smart

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.