All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout
@ 2022-05-25  8:11 Hannes Reinecke
  2022-05-25 11:20 ` Martin Wilck
  0 siblings, 1 reply; 5+ messages in thread
From: Hannes Reinecke @ 2022-05-25  8:11 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, linux-scsi, Hannes Reinecke,
	Brian Bunker, Martin Wilck

When ALUA transitioning timeout triggers the path group state must
be considered invalid. So add a new flag ALUA_PG_FAILED to indicate
that the path state isn't necessarily valid, and keep the existing
path state until we get a valid response from a RTPG.

Cc: Brian Bunker <brian@purestorage.com>
Cc: Martin Wilck <mwilck@suse.de>
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 24 +++++++---------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 1d9be771f3ee..6921490a5e65 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -49,6 +49,7 @@
 #define ALUA_PG_RUN_RTPG		0x10
 #define ALUA_PG_RUN_STPG		0x20
 #define ALUA_PG_RUNNING			0x40
+#define ALUA_PG_FAILED			0x80
 
 static uint optimize_stpg;
 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
@@ -420,7 +421,7 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
 			 */
 			rcu_read_lock();
 			pg = rcu_dereference(h->pg);
-			if (pg)
+			if (pg && !(pg->flags & ALUA_PG_FAILED))
 				pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
 			rcu_read_unlock();
 			alua_check(sdev, false);
@@ -694,7 +695,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 
  skip_rtpg:
 	spin_lock_irqsave(&pg->lock, flags);
-	if (transitioning_sense)
+	if (transitioning_sense && !(pg->flags & ALUA_PG_FAILED))
 		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
 
 	if (group_id_old != pg->group_id || state_old != pg->state ||
@@ -718,23 +719,10 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 			pg->interval = ALUA_RTPG_RETRY_DELAY;
 			err = SCSI_DH_RETRY;
 		} else {
-			struct alua_dh_data *h;
-
-			/* Transitioning time exceeded, set port to standby */
+			/* Transitioning time exceeded, mark pg as failed */
 			err = SCSI_DH_IO;
-			pg->state = SCSI_ACCESS_STATE_STANDBY;
+			pg->flags |= ALUA_PG_FAILED;
 			pg->expiry = 0;
-			rcu_read_lock();
-			list_for_each_entry_rcu(h, &pg->dh_list, node) {
-				if (!h->sdev)
-					continue;
-				h->sdev->access_state =
-					(pg->state & SCSI_ACCESS_STATE_MASK);
-				if (pg->pref)
-					h->sdev->access_state |=
-						SCSI_ACCESS_STATE_PREFERRED;
-			}
-			rcu_read_unlock();
 		}
 		break;
 	case SCSI_ACCESS_STATE_OFFLINE:
@@ -746,6 +734,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 		/* Useable path if active */
 		err = SCSI_DH_OK;
 		pg->expiry = 0;
+		/* RTPG succeeded, clear ALUA_PG_FAILED */
+		pg->flags &= ~ALUA_PG_FAILED;
 		break;
 	}
 	spin_unlock_irqrestore(&pg->lock, flags);
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout
  2022-05-25  8:11 [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout Hannes Reinecke
@ 2022-05-25 11:20 ` Martin Wilck
  2022-05-25 12:07   ` Hannes Reinecke
  0 siblings, 1 reply; 5+ messages in thread
From: Martin Wilck @ 2022-05-25 11:20 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, linux-scsi, Brian Bunker,
	Martin Wilck

On Wed, 2022-05-25 at 10:11 +0200, Hannes Reinecke wrote:
> When ALUA transitioning timeout triggers the path group state must
> be considered invalid. So add a new flag ALUA_PG_FAILED to indicate
> that the path state isn't necessarily valid, and keep the existing
> path state until we get a valid response from a RTPG.
> 
> Cc: Brian Bunker <brian@purestorage.com>
> Cc: Martin Wilck <mwilck@suse.de>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 24 +++++++-------------
> --
>  1 file changed, 7 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c
> b/drivers/scsi/device_handler/scsi_dh_alua.c
> index 1d9be771f3ee..6921490a5e65 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -49,6 +49,7 @@
>  #define ALUA_PG_RUN_RTPG               0x10
>  #define ALUA_PG_RUN_STPG               0x20
>  #define ALUA_PG_RUNNING                        0x40
> +#define ALUA_PG_FAILED                 0x80
>  
>  static uint optimize_stpg;
>  module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
> @@ -420,7 +421,7 @@ static enum scsi_disposition
> alua_check_sense(struct scsi_device *sdev,
>                          */
>                         rcu_read_lock();
>                         pg = rcu_dereference(h->pg);
> -                       if (pg)
> +                       if (pg && !(pg->flags & ALUA_PG_FAILED))
>                                 pg->state =
> SCSI_ACCESS_STATE_TRANSITIONING;
>                         rcu_read_unlock();
>                         alua_check(sdev, false);

You still return NEEDS_RETRY from alua_check_sense(), even if
ALUA_PG_FAILED is set?

> @@ -694,7 +695,7 @@ static int alua_rtpg(struct scsi_device *sdev,
> struct alua_port_group *pg)
>  
>   skip_rtpg:
>         spin_lock_irqsave(&pg->lock, flags);
> -       if (transitioning_sense)
> +       if (transitioning_sense && !(pg->flags & ALUA_PG_FAILED))
>                 pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
>  
>         if (group_id_old != pg->group_id || state_old != pg->state ||
> @@ -718,23 +719,10 @@ static int alua_rtpg(struct scsi_device *sdev,
> struct alua_port_group *pg)
>                         pg->interval = ALUA_RTPG_RETRY_DELAY;
>                         err = SCSI_DH_RETRY;
>                 } else {
> -                       struct alua_dh_data *h;
> -
> -                       /* Transitioning time exceeded, set port to
> standby */
> +                       /* Transitioning time exceeded, mark pg as
> failed */
>                         err = SCSI_DH_IO;
> -                       pg->state = SCSI_ACCESS_STATE_STANDBY;
> +                       pg->flags |= ALUA_PG_FAILED;
>                         pg->expiry = 0;
> -                       rcu_read_lock();
> -                       list_for_each_entry_rcu(h, &pg->dh_list,
> node) {
> -                               if (!h->sdev)
> -                                       continue;
> -                               h->sdev->access_state =
> -                                       (pg->state &
> SCSI_ACCESS_STATE_MASK);
> -                               if (pg->pref)
> -                                       h->sdev->access_state |=
> -
>                                                SCSI_ACCESS_STATE_PREFE
> RRED;
> -                       }
> -                       rcu_read_unlock();
>                 }
>                 break;
>         case SCSI_ACCESS_STATE_OFFLINE:
> @@ -746,6 +734,8 @@ static int alua_rtpg(struct scsi_device *sdev,
> struct alua_port_group *pg)
>                 /* Useable path if active */
>                 err = SCSI_DH_OK;
>                 pg->expiry = 0;
> +               /* RTPG succeeded, clear ALUA_PG_FAILED */
> +               pg->flags &= ~ALUA_PG_FAILED;

Shouldn't this be done for any state except TRANSITIONING?

Btw I've re-read SPC6 and found "The IMPLICIT TRANSITION TIME field
indicates the minimum amount of time in seconds the application client
should wait prior to timing out an implicit state transition (see
5.18.2.3)". 

This is unclear to me. "minimum amount of time" suggests that the host
could decide to wait longer until it times out the transition. Worse,
the spec doesn't clearly define when the transition is supposed to have
started. From the host's PoV it makes sense to assume the start time is
when it first encounters either TRANSITIONING from an RTPG, or NOT
READY / 04 / 0a from a regular I/O, which is what we currently do. But
the spec is remarkably unclear about it. Finally, the spec explicitly
says that the timeout applies only to implicit transitions, without
saying what to do in the case of an explicit transition...

Regards,
Martin


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout
  2022-05-25 11:20 ` Martin Wilck
@ 2022-05-25 12:07   ` Hannes Reinecke
  2022-05-25 19:33     ` Martin Wilck
  0 siblings, 1 reply; 5+ messages in thread
From: Hannes Reinecke @ 2022-05-25 12:07 UTC (permalink / raw)
  To: Martin Wilck, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, linux-scsi, Brian Bunker,
	Martin Wilck

On 5/25/22 13:20, Martin Wilck wrote:
> On Wed, 2022-05-25 at 10:11 +0200, Hannes Reinecke wrote:
>> When ALUA transitioning timeout triggers the path group state must
>> be considered invalid. So add a new flag ALUA_PG_FAILED to indicate
>> that the path state isn't necessarily valid, and keep the existing
>> path state until we get a valid response from a RTPG.
>>
>> Cc: Brian Bunker <brian@purestorage.com>
>> Cc: Martin Wilck <mwilck@suse.de>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>   drivers/scsi/device_handler/scsi_dh_alua.c | 24 +++++++-------------
>> --
>>   1 file changed, 7 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c
>> b/drivers/scsi/device_handler/scsi_dh_alua.c
>> index 1d9be771f3ee..6921490a5e65 100644
>> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
>> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
>> @@ -49,6 +49,7 @@
>>   #define ALUA_PG_RUN_RTPG               0x10
>>   #define ALUA_PG_RUN_STPG               0x20
>>   #define ALUA_PG_RUNNING                        0x40
>> +#define ALUA_PG_FAILED                 0x80
>>   
>>   static uint optimize_stpg;
>>   module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
>> @@ -420,7 +421,7 @@ static enum scsi_disposition
>> alua_check_sense(struct scsi_device *sdev,
>>                           */
>>                          rcu_read_lock();
>>                          pg = rcu_dereference(h->pg);
>> -                       if (pg)
>> +                       if (pg && !(pg->flags & ALUA_PG_FAILED))
>>                                  pg->state =
>> SCSI_ACCESS_STATE_TRANSITIONING;
>>                          rcu_read_unlock();
>>                          alua_check(sdev, false);
> 
> You still return NEEDS_RETRY from alua_check_sense(), even if
> ALUA_PG_FAILED is set?
> 
>> @@ -694,7 +695,7 @@ static int alua_rtpg(struct scsi_device *sdev,
>> struct alua_port_group *pg)
>>   
>>    skip_rtpg:
>>          spin_lock_irqsave(&pg->lock, flags);
>> -       if (transitioning_sense)
>> +       if (transitioning_sense && !(pg->flags & ALUA_PG_FAILED))
>>                  pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
>>   
>>          if (group_id_old != pg->group_id || state_old != pg->state ||
>> @@ -718,23 +719,10 @@ static int alua_rtpg(struct scsi_device *sdev,
>> struct alua_port_group *pg)
>>                          pg->interval = ALUA_RTPG_RETRY_DELAY;
>>                          err = SCSI_DH_RETRY;
>>                  } else {
>> -                       struct alua_dh_data *h;
>> -
>> -                       /* Transitioning time exceeded, set port to
>> standby */
>> +                       /* Transitioning time exceeded, mark pg as
>> failed */
>>                          err = SCSI_DH_IO;
>> -                       pg->state = SCSI_ACCESS_STATE_STANDBY;
>> +                       pg->flags |= ALUA_PG_FAILED;
>>                          pg->expiry = 0;
>> -                       rcu_read_lock();
>> -                       list_for_each_entry_rcu(h, &pg->dh_list,
>> node) {
>> -                               if (!h->sdev)
>> -                                       continue;
>> -                               h->sdev->access_state =
>> -                                       (pg->state &
>> SCSI_ACCESS_STATE_MASK);
>> -                               if (pg->pref)
>> -                                       h->sdev->access_state |=
>> -
>>                                                 SCSI_ACCESS_STATE_PREFE
>> RRED;
>> -                       }
>> -                       rcu_read_unlock();
>>                  }
>>                  break;
>>          case SCSI_ACCESS_STATE_OFFLINE:
>> @@ -746,6 +734,8 @@ static int alua_rtpg(struct scsi_device *sdev,
>> struct alua_port_group *pg)
>>                  /* Useable path if active */
>>                  err = SCSI_DH_OK;
>>                  pg->expiry = 0;
>> +               /* RTPG succeeded, clear ALUA_PG_FAILED */
>> +               pg->flags &= ~ALUA_PG_FAILED;
> 
> Shouldn't this be done for any state except TRANSITIONING?
> 
Why, but it does.
We're only entering this block if the state is not TRANSITIONING.
(It's part of a 'switch' statement)

> Btw I've re-read SPC6 and found "The IMPLICIT TRANSITION TIME field
> indicates the minimum amount of time in seconds the application client
> should wait prior to timing out an implicit state transition (see
> 5.18.2.3)".
> 
> This is unclear to me. "minimum amount of time" suggests that the host
> could decide to wait longer until it times out the transition. Worse,
> the spec doesn't clearly define when the transition is supposed to have
> started. From the host's PoV it makes sense to assume the start time is
> when it first encounters either TRANSITIONING from an RTPG, or NOT
> READY / 04 / 0a from a regular I/O, which is what we currently do. But
> the spec is remarkably unclear about it. Finally, the spec explicitly
> says that the timeout applies only to implicit transitions, without
> saying what to do in the case of an explicit transition...
> 
Why, but it does; "5.15.2.5 Transitions between target port asymmetric 
access states" speaks at length what should happen when transitions 
fail. Problem is, a timeout expiring is something outside the spec, so 
one can't be sure that the array does what's expected here.

But the one key takeaway is that any port in transitioning is allowed to 
ignore TMFs, so any SCSI EH is pointless anyway.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout
  2022-05-25 12:07   ` Hannes Reinecke
@ 2022-05-25 19:33     ` Martin Wilck
  2022-06-08  7:32       ` Martin Wilck
  0 siblings, 1 reply; 5+ messages in thread
From: Martin Wilck @ 2022-05-25 19:33 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, linux-scsi, Brian Bunker,
	Martin Wilck

On Wed, 2022-05-25 at 14:07 +0200, Hannes Reinecke wrote:
> On 5/25/22 13:20, Martin Wilck wrote:
> > On Wed, 2022-05-25 at 10:11 +0200, Hannes Reinecke wrote:
> > > When ALUA transitioning timeout triggers the path group state
> > > must
> > > be considered invalid. So add a new flag ALUA_PG_FAILED to
> > > indicate
> > > that the path state isn't necessarily valid, and keep the
> > > existing
> > > path state until we get a valid response from a RTPG.
> > > 
> > > Cc: Brian Bunker <brian@purestorage.com>
> > > Cc: Martin Wilck <mwilck@suse.de>
> > > Signed-off-by: Hannes Reinecke <hare@suse.de>
> > > ---
> > >   drivers/scsi/device_handler/scsi_dh_alua.c | 24 +++++++--------
> > > -----
> > > --
> > >   1 file changed, 7 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c
> > > b/drivers/scsi/device_handler/scsi_dh_alua.c
> > > index 1d9be771f3ee..6921490a5e65 100644
> > > --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> > > +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> > > @@ -49,6 +49,7 @@
> > >   #define ALUA_PG_RUN_RTPG               0x10
> > >   #define ALUA_PG_RUN_STPG               0x20
> > >   #define ALUA_PG_RUNNING                        0x40
> > > +#define ALUA_PG_FAILED                 0x80
> > >   
> > >   static uint optimize_stpg;
> > >   module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
> > > @@ -420,7 +421,7 @@ static enum scsi_disposition
> > > alua_check_sense(struct scsi_device *sdev,
> > >                           */
> > >                          rcu_read_lock();
> > >                          pg = rcu_dereference(h->pg);
> > > -                       if (pg)
> > > +                       if (pg && !(pg->flags & ALUA_PG_FAILED))
> > >                                  pg->state =
> > > SCSI_ACCESS_STATE_TRANSITIONING;
> > >                          rcu_read_unlock();
> > >                          alua_check(sdev, false);
> > 
> > You still return NEEDS_RETRY from alua_check_sense(), even if
> > ALUA_PG_FAILED is set?
> > 
> > > @@ -694,7 +695,7 @@ static int alua_rtpg(struct scsi_device
> > > *sdev,
> > > struct alua_port_group *pg)
> > >   
> > >    skip_rtpg:
> > >          spin_lock_irqsave(&pg->lock, flags);
> > > -       if (transitioning_sense)
> > > +       if (transitioning_sense && !(pg->flags & ALUA_PG_FAILED))
> > >                  pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
> > >   
> > >          if (group_id_old != pg->group_id || state_old != pg-
> > > >state ||
> > > @@ -718,23 +719,10 @@ static int alua_rtpg(struct scsi_device
> > > *sdev,
> > > struct alua_port_group *pg)
> > >                          pg->interval = ALUA_RTPG_RETRY_DELAY;
> > >                          err = SCSI_DH_RETRY;
> > >                  } else {
> > > -                       struct alua_dh_data *h;
> > > -
> > > -                       /* Transitioning time exceeded, set port
> > > to
> > > standby */
> > > +                       /* Transitioning time exceeded, mark pg
> > > as
> > > failed */
> > >                          err = SCSI_DH_IO;
> > > -                       pg->state = SCSI_ACCESS_STATE_STANDBY;
> > > +                       pg->flags |= ALUA_PG_FAILED;
> > >                          pg->expiry = 0;
> > > -                       rcu_read_lock();
> > > -                       list_for_each_entry_rcu(h, &pg->dh_list,
> > > node) {
> > > -                               if (!h->sdev)
> > > -                                       continue;
> > > -                               h->sdev->access_state =
> > > -                                       (pg->state &
> > > SCSI_ACCESS_STATE_MASK);
> > > -                               if (pg->pref)
> > > -                                       h->sdev->access_state |=
> > > -
> > >                                                 SCSI_ACCESS_STATE
> > > _PREFE
> > > RRED;
> > > -                       }
> > > -                       rcu_read_unlock();
> > >                  }
> > >                  break;
> > >          case SCSI_ACCESS_STATE_OFFLINE:
> > > @@ -746,6 +734,8 @@ static int alua_rtpg(struct scsi_device
> > > *sdev,
> > > struct alua_port_group *pg)
> > >                  /* Useable path if active */
> > >                  err = SCSI_DH_OK;
> > >                  pg->expiry = 0;
> > > +               /* RTPG succeeded, clear ALUA_PG_FAILED */
> > > +               pg->flags &= ~ALUA_PG_FAILED;
> > 
> > Shouldn't this be done for any state except TRANSITIONING?
> > 
> Why, but it does.
> We're only entering this block if the state is not TRANSITIONING.
> (It's part of a 'switch' statement)

Right, the only exception is SCSI_ACCESS_STATE_OFFLINE, for which the
ALUA_PG_FAILED state should of cause persist. Sorry for bothering.


Martin

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout
  2022-05-25 19:33     ` Martin Wilck
@ 2022-06-08  7:32       ` Martin Wilck
  0 siblings, 0 replies; 5+ messages in thread
From: Martin Wilck @ 2022-06-08  7:32 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, linux-scsi, Brian Bunker,
	Martin Wilck

On Wed, 2022-05-25 at 21:33 +0200, Martin Wilck wrote:
> On Wed, 2022-05-25 at 14:07 +0200, Hannes Reinecke wrote:
> > On 5/25/22 13:20, Martin Wilck wrote:
> > > On Wed, 2022-05-25 at 10:11 +0200, Hannes Reinecke wrote:
> > > > When ALUA transitioning timeout triggers the path group state
> > > > must
> > > > be considered invalid. So add a new flag ALUA_PG_FAILED to
> > > > indicate
> > > > that the path state isn't necessarily valid, and keep the
> > > > existing
> > > > path state until we get a valid response from a RTPG.
> > > > 
> > > > Cc: Brian Bunker <brian@purestorage.com>
> > > > Cc: Martin Wilck <mwilck@suse.de>
> > > > Signed-off-by: Hannes Reinecke <hare@suse.de>
> > > > ---
> > > >   drivers/scsi/device_handler/scsi_dh_alua.c | 24 +++++++------
> > > > --
> > > > -----
> > > > --
> > > >   1 file changed, 7 insertions(+), 17 deletions(-)
> > > > 
> > > > diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c
> > > > b/drivers/scsi/device_handler/scsi_dh_alua.c
> > > > index 1d9be771f3ee..6921490a5e65 100644
> > > > --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> > > > +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> > > > @@ -49,6 +49,7 @@
> > > >   #define ALUA_PG_RUN_RTPG               0x10
> > > >   #define ALUA_PG_RUN_STPG               0x20
> > > >   #define ALUA_PG_RUNNING                        0x40
> > > > +#define ALUA_PG_FAILED                 0x80
> > > >   
> > > >   static uint optimize_stpg;
> > > >   module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
> > > > @@ -420,7 +421,7 @@ static enum scsi_disposition
> > > > alua_check_sense(struct scsi_device *sdev,
> > > >                           */
> > > >                          rcu_read_lock();
> > > >                          pg = rcu_dereference(h->pg);
> > > > -                       if (pg)
> > > > +                       if (pg && !(pg->flags &
> > > > ALUA_PG_FAILED))
> > > >                                  pg->state =
> > > > SCSI_ACCESS_STATE_TRANSITIONING;
> > > >                          rcu_read_unlock();
> > > >                          alua_check(sdev, false);
> > > 
> > > You still return NEEDS_RETRY from alua_check_sense(), even if
> > > ALUA_PG_FAILED is set?
> > > 
> > > > @@ -694,7 +695,7 @@ static int alua_rtpg(struct scsi_device
> > > > *sdev,
> > > > struct alua_port_group *pg)
> > > >   
> > > >    skip_rtpg:
> > > >          spin_lock_irqsave(&pg->lock, flags);
> > > > -       if (transitioning_sense)
> > > > +       if (transitioning_sense && !(pg->flags &
> > > > ALUA_PG_FAILED))
> > > >                  pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
> > > >   
> > > >          if (group_id_old != pg->group_id || state_old != pg-
> > > > > state ||
> > > > @@ -718,23 +719,10 @@ static int alua_rtpg(struct scsi_device
> > > > *sdev,
> > > > struct alua_port_group *pg)
> > > >                          pg->interval = ALUA_RTPG_RETRY_DELAY;
> > > >                          err = SCSI_DH_RETRY;
> > > >                  } else {
> > > > -                       struct alua_dh_data *h;
> > > > -
> > > > -                       /* Transitioning time exceeded, set
> > > > port
> > > > to
> > > > standby */
> > > > +                       /* Transitioning time exceeded, mark pg
> > > > as
> > > > failed */
> > > >                          err = SCSI_DH_IO;
> > > > -                       pg->state = SCSI_ACCESS_STATE_STANDBY;
> > > > +                       pg->flags |= ALUA_PG_FAILED;
> > > >                          pg->expiry = 0;
> > > > -                       rcu_read_lock();
> > > > -                       list_for_each_entry_rcu(h, &pg-
> > > > >dh_list,
> > > > node) {
> > > > -                               if (!h->sdev)
> > > > -                                       continue;
> > > > -                               h->sdev->access_state =
> > > > -                                       (pg->state &
> > > > SCSI_ACCESS_STATE_MASK);
> > > > -                               if (pg->pref)
> > > > -                                       h->sdev->access_state
> > > > |=
> > > > -
> > > >                                                 SCSI_ACCESS_STA
> > > > TE
> > > > _PREFE
> > > > RRED;
> > > > -                       }
> > > > -                       rcu_read_unlock();
> > > >                  }
> > > >                  break;
> > > >          case SCSI_ACCESS_STATE_OFFLINE:
> > > > @@ -746,6 +734,8 @@ static int alua_rtpg(struct scsi_device
> > > > *sdev,
> > > > struct alua_port_group *pg)
> > > >                  /* Useable path if active */
> > > >                  err = SCSI_DH_OK;
> > > >                  pg->expiry = 0;
> > > > +               /* RTPG succeeded, clear ALUA_PG_FAILED */
> > > > +               pg->flags &= ~ALUA_PG_FAILED;
> > > 
> > > Shouldn't this be done for any state except TRANSITIONING?
> > > 
> > Why, but it does.
> > We're only entering this block if the state is not TRANSITIONING.
> > (It's part of a 'switch' statement)
> 
> Right, the only exception is SCSI_ACCESS_STATE_OFFLINE, for which the
> ALUA_PG_FAILED state should of cause persist. Sorry for bothering.
> 

What I'm still missing here is how we avoid sending further I/O down
the path after the timeout has expired.

alua_check_sense() will return NEEDS_RETRY, even if ALUA_PG_FAILED is
set. Without this patch, alua_prep_fn() would return an error because
it would see STANDBY state. With this patch, the state will stay
TRANSITIONING, and (after 6056a92ceb2a ("scsi: scsi_dh_alua: Properly
handle the ALUA transitioning state")) alua_prep_fn() will return
BLK_STS_OK.

IMO alua_check_sense() should return SUCCESS if ALUA_PG_FAILED is set,
and alua_prep_fn() should return an error.

Regards
Martin

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-06-08  7:59 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-25  8:11 [PATCH] scsi_dh_alua: mark port group as failed after ALUA transitioning timeout Hannes Reinecke
2022-05-25 11:20 ` Martin Wilck
2022-05-25 12:07   ` Hannes Reinecke
2022-05-25 19:33     ` Martin Wilck
2022-06-08  7:32       ` Martin Wilck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.