All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Use a more selective error recovery strategy based on device capabilities
@ 2013-02-12 18:19 Jeremy Linton
  2013-02-12 20:57 ` Elliott, Robert (Server Storage)
  2013-02-13 13:06 ` Hannes Reinecke
  0 siblings, 2 replies; 10+ messages in thread
From: Jeremy Linton @ 2013-02-12 18:19 UTC (permalink / raw)
  To: Linux Scsi

[-- Attachment #1: Type: text/plain, Size: 965 bytes --]

Ideally, Linux should not be sending task management commands to devices that
don't support the given task mgmt operation.

This patch uses the REPORT SUPPORTED TASK MGMT FUNCTIONS command to enable or
disable error recovery paths for a given device. For older devices, we make an
educated guess about what kind of error recovery the device supports. This isn't
going to be 100% accurate as it should probably take the transport as well as
the SCSI version into account, but it is a start.

While this patch improves the error recovery paths for modern SCSI networks, the
error recovery logic continues to fall through to host reset. It also continues
to send bus and target resets in cases where they may affect working devices. I
have a partial set of patches which attempt to make intelligent decisions in
these cases, but they are far more intrusive and at this point not as clear cut.


Just in case...
Signed-off-by: Jeremy Linton <jlinton@tributary.com>
---

[-- Attachment #2: more_selective_error_recovery.patch --]
[-- Type: text/x-patch, Size: 9854 bytes --]

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index c1b05a8..b249c2f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -572,24 +572,25 @@ static int scsi_try_host_reset(struct scsi_cmnd *scmd)
 static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
 {
 	unsigned long flags;
-	int rtn;
+	int rtn = FAILED ;
 	struct Scsi_Host *host = scmd->device->host;
 	struct scsi_host_template *hostt = host->hostt;
+	struct scsi_device *sdev = scmd->device;
 
 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
 					  __func__));
 
-	if (!hostt->eh_bus_reset_handler)
-		return FAILED;
+	if ((sdev->bus_reset_ok) && (hostt->eh_bus_reset_handler)) {
 
-	rtn = hostt->eh_bus_reset_handler(scmd);
+		rtn = hostt->eh_bus_reset_handler(scmd);
 
-	if (rtn == SUCCESS) {
-		if (!hostt->skip_settle_delay)
-			ssleep(BUS_RESET_SETTLE_TIME);
-		spin_lock_irqsave(host->host_lock, flags);
-		scsi_report_bus_reset(host, scmd_channel(scmd));
-		spin_unlock_irqrestore(host->host_lock, flags);
+		if (rtn == SUCCESS) {
+			if (!hostt->skip_settle_delay)
+				ssleep(BUS_RESET_SETTLE_TIME);
+			spin_lock_irqsave(host->host_lock, flags);
+			scsi_report_bus_reset(host, scmd_channel(scmd));
+			spin_unlock_irqrestore(host->host_lock, flags);
+		}
 	}
 
 	return rtn;
@@ -601,6 +602,7 @@ static void __scsi_report_device_reset(struct scsi_device *sdev, void *data)
 	sdev->expecting_cc_ua = 1;
 }
 
+
 /**
  * scsi_try_target_reset - Ask host to perform a target reset
  * @scmd:	SCSI cmd used to send a target reset
@@ -614,19 +616,26 @@ static void __scsi_report_device_reset(struct scsi_device *sdev, void *data)
 static int scsi_try_target_reset(struct scsi_cmnd *scmd)
 {
 	unsigned long flags;
-	int rtn;
+	int rtn = FAILED;
+	struct scsi_device *sdev = scmd->device;
 	struct Scsi_Host *host = scmd->device->host;
 	struct scsi_host_template *hostt = host->hostt;
 
-	if (!hostt->eh_target_reset_handler)
-		return FAILED;
+	if ((sdev->target_reset_ok) && (hostt->eh_target_reset_handler)) {
 
-	rtn = hostt->eh_target_reset_handler(scmd);
-	if (rtn == SUCCESS) {
-		spin_lock_irqsave(host->host_lock, flags);
-		__starget_for_each_device(scsi_target(scmd->device), NULL,
-					  __scsi_report_device_reset);
-		spin_unlock_irqrestore(host->host_lock, flags);
+		// TODO: Determine if other devices on this IT are experiencing
+		// issues. If not, return success without doing anything. 
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd target RST\n", 
+						  __func__));
+
+		rtn = hostt->eh_target_reset_handler(scmd);
+
+		if (rtn == SUCCESS) {
+			spin_lock_irqsave(host->host_lock, flags);
+			__starget_for_each_device(scsi_target(scmd->device), NULL,
+						     __scsi_report_device_reset);
+			spin_unlock_irqrestore(host->host_lock, flags);
+		}
 	}
 
 	return rtn;
@@ -644,24 +653,36 @@ static int scsi_try_target_reset(struct scsi_cmnd *scmd)
  */
 static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
 {
-	int rtn;
+	int rtn = FAILED;
 	struct scsi_host_template *hostt = scmd->device->host->hostt;
+	struct scsi_device *sdev = scmd->device;
 
-	if (!hostt->eh_device_reset_handler)
-		return FAILED;
+	if ((sdev->task_unit_reset_ok) && (hostt->eh_device_reset_handler)) {
+	       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd LUN RST\n", 
+						 __func__));
+		rtn = hostt->eh_device_reset_handler(scmd);
+
+		if (rtn == SUCCESS)
+		    __scsi_report_device_reset(scmd->device, NULL);
+	}
 
-	rtn = hostt->eh_device_reset_handler(scmd);
-	if (rtn == SUCCESS)
-		__scsi_report_device_reset(scmd->device, NULL);
 	return rtn;
 }
 
 static int scsi_try_to_abort_cmd(struct scsi_host_template *hostt, struct scsi_cmnd *scmd)
 {
-	if (!hostt->eh_abort_handler)
-		return FAILED;
+	int rtn = FAILED;
+	struct scsi_device *sdev = scmd->device;
+
+	if ((sdev->task_abort_ok) && (hostt->eh_abort_handler))
+	{
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", 
+						  __func__));
 
-	return hostt->eh_abort_handler(scmd);
+		rtn=hostt->eh_abort_handler(scmd);
+	}
+
+	return rtn;
 }
 
 static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 3e58b22..a71552b 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -525,6 +525,144 @@ static void sanitize_inquiry_string(unsigned char *s, int len)
 }
 
 /**
+ * scsi_query_task_mgmt_support - retrieve task mgmt bits from device
+ * @sdev: Readonly, scsi_device to probe
+ *
+ * Description:
+ *     sends as REPORT SUPPORTED TASK MGMT FUNCTIONS command to given device.
+ *     if it succeeds then the error recovery bits (abort, LU reset, IT reset
+ *     , etc) are set based on the return data. 
+ **/
+static void scsi_query_task_mgmt_support(struct scsi_device *sdev)
+{
+	int retries;
+	int resid,result;
+	struct scsi_sense_hdr sshdr;
+	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
+	unsigned char *report_task_result = NULL;
+
+	report_task_result = kmalloc(4, GFP_ATOMIC | 
+				     ((sdev->host->unchecked_isa_dma) 
+				      ? __GFP_DMA : 0));
+	if (report_task_result)
+	{
+	        retries=3; result=1;
+		/* This tends to be the first command sent that can catch the UA
+		 * as we are probing, in this case it might be ok to trap the 
+		 * UA. In general UA's _MUST_ propagate to the owner of the 
+		 * device that is so mode pages, tape positions, etc may be 
+		 * updated after power loss/IT nexus failure. 
+		 **/
+		while ((retries) && (result))
+		{
+		    memset(scsi_cmd,0,MAX_COMMAND_SIZE);
+		    scsi_cmd[0]=0xa3;
+		    scsi_cmd[1]=0x0D;
+		    scsi_cmd[9]=0x04;
+		    
+		    /* send the command, use the inq timeout as this command 
+		     * should be fairly fast */
+		    result = scsi_execute_req(sdev, scsi_cmd, DMA_FROM_DEVICE,
+					      report_task_result, 4, &sshdr, 
+					      HZ / 2 + HZ * scsi_inq_timeout,
+					      3, &resid);
+		    if (result==0) 
+		    {
+			sdev->task_abort_ok =
+			    ((report_task_result[0]&0x80)==0x80); 
+			sdev->task_unit_reset_ok =
+			    ((report_task_result[0]&0x08)==0x08);
+			sdev->target_reset_ok =
+			    ((report_task_result[0]&0x02)==0x02);
+			sdev->it_reset_ok =
+			    (report_task_result[1]&0x01);
+			
+			
+			
+			/* Use only a single reset strategy.
+			 * Both IT and Target can affect devices besides the one
+			 * in question. Until the linux eh code is smart enough
+			 * to be able to test other devices on the IT/target 
+			 * then really only the LUN reset should be used.
+			 */
+			if (sdev->task_unit_reset_ok)
+			{
+			    sdev->target_reset_ok=0;
+			}
+			
+		    }
+		    else
+		    {
+			SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
+				"query task mgmt: Got task mgmt error 0x%X"
+							 ,result));
+			retries--;
+		    }
+		}
+		kfree(report_task_result);
+	}
+}
+
+/**
+ * scsi_retrieve_task_mgmt_support - sets the error recovery bits
+ * @sdev: Readonly, scsi_device to probe
+ *
+ * Description:
+ *     Sets the default error recovery strategy based on the SCSI level
+ *     of the given device. Newer devices will also be queried to determine
+ *     if they can report their supported error recovery methods. 
+ **/
+static int scsi_retrieve_task_mgmt_support(struct scsi_device *sdev)
+{
+	/* default to some basic capabilities based on reported scsi level.
+	 * Of course this can't be 100% accurate, due to converter boxes, bad 
+	 * devices, etc. Hence the need for the device to report its capabilities. 
+	 
+	 * For now lets default the capabilities roughly based on the version 
+	 * aka old SCSI can do bus reset, but not abort..
+	 * FC can do abort/target reset, etc..
+	 **/
+	switch (sdev->scsi_level)  {
+	/* mostly ancient and broken SPI devices */
+	case SCSI_UNKNOWN:
+	case SCSI_1:
+	case SCSI_1_CCS:
+		 sdev->task_abort_ok=0;
+		 sdev->task_unit_reset_ok=0;
+		 sdev->target_reset_ok=0;
+		 sdev->it_reset_ok=0;
+		 sdev->bus_reset_ok=1;
+		 break;
+	/*lots of fairly common hardware here*/
+	case SCSI_2: 
+	case SCSI_3: 
+	        sdev->task_abort_ok=1;
+		sdev->task_unit_reset_ok=0;
+		sdev->target_reset_ok=1;
+		sdev->it_reset_ok=0;
+		sdev->bus_reset_ok=0;
+		break;
+	case SCSI_SPC_2:
+	case SCSI_SPC_3:
+	/* newer than SPC3 */
+        default: 
+	       sdev->task_abort_ok=1;
+	       sdev->task_unit_reset_ok=1;
+	       sdev->target_reset_ok=0;
+	       sdev->it_reset_ok=0;
+	       sdev->bus_reset_ok=0;
+	       break;
+	}
+			
+	if (sdev->scsi_level>SCSI_3)
+	{
+	    scsi_query_task_mgmt_support(sdev);
+	}
+
+	return 0;
+}
+
+/**
  * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY
  * @sdev:	scsi_device to probe
  * @inq_result:	area to store the INQUIRY result
@@ -898,6 +1036,11 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	if (*bflags & BLIST_USE_10_BYTE_MS)
 		sdev->use_10_for_ms = 1;
 
+	/* determine the supported error handing */
+	scsi_retrieve_task_mgmt_support(sdev);
+	
+
+
 	/* set the device running here so that slave configure
 	 * may do I/O */
 	ret = scsi_device_set_state(sdev, SDEV_RUNNING);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index e65c62e..349563c 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -160,6 +160,11 @@ struct scsi_device {
 	unsigned can_power_off:1; /* Device supports runtime power off */
 	unsigned wce_default_on:1;	/* Cache is ON by default */
 	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
+	unsigned task_abort_ok:1; /* can we send aborts? */
+	unsigned task_unit_reset_ok:1;  /* can we send lun reset? */
+	unsigned target_reset_ok:1;  /* can we send target reset? */
+	unsigned it_reset_ok:1;  /* can we send IT nexus reset */
+	unsigned bus_reset_ok:1;  /* can we send bus reset */
 
 	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
 	struct list_head event_list;	/* asserted events */

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* RE: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-12 18:19 [PATCH] Use a more selective error recovery strategy based on device capabilities Jeremy Linton
@ 2013-02-12 20:57 ` Elliott, Robert (Server Storage)
  2013-02-12 22:00   ` Jeremy Linton
  2013-02-13 13:06 ` Hannes Reinecke
  1 sibling, 1 reply; 10+ messages in thread
From: Elliott, Robert (Server Storage) @ 2013-02-12 20:57 UTC (permalink / raw)
  To: linux-scsi

I like that concept.  

Since TMFs are protocol specific, though, it is possible that the SCSI initiator port doesn't know how to send a TMF even though the device server supports that TMF.  The SCSI Architecture Model (SAM-5) standard includes an annex listing a variety of SCSI Initiator Port attributes and SCSI Target Port attributes that can limit what an application client and device server can do:
- LUN size
- Maximum CDB length
- Command Identifier size (i.e. tag size)
- Task Attributes supported (which ones) (e.g., SIMPLE, HEAD OF QUEUE, ORDERED, ACA)
- Maximum Data-In Buffer size
- Maximum Data-Out Buffer size
- Command Reference Number supported?
- Command Priority supported?
- Maximum Sense Data length
- Status Qualifier supported?
- Additional Response Information supported?
- Bidirectional Commands supported?
- TMFs supported (which ones)

How the application client determines what the SCSI initiator port supports is outside the scope of the SCSI standards - there's no SCSI command or TMF to report that information.  These attributes are first constrained by the SCSI transport protocol (e.g., SRP doesn't define an encoding for QUERY TASK), then by implementation choices (e.g., bidirectional commands and >16-byte CDBs are still not widely supported).

Ideally the device driver for the SCSI initiator port would report those attributes, and higher level code would combine them with support information from the device server (REPORT SUPPORTED TMF command, REPORT SUPPORTED OPCODES command, etc.) to decide what is supported.

---
Rob Elliott    HP Server Storage







> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
> owner@vger.kernel.org] On Behalf Of Jeremy Linton
> Sent: Tuesday, 12 February, 2013 12:20 PM
> To: Linux Scsi
> Subject: [PATCH] Use a more selective error recovery strategy based on device
> capabilities
> 
> Ideally, Linux should not be sending task management commands to devices
> that
> don't support the given task mgmt operation.
> 
> This patch uses the REPORT SUPPORTED TASK MGMT FUNCTIONS command to
> enable or
> disable error recovery paths for a given device. For older devices, we make an
> educated guess about what kind of error recovery the device supports. This isn't
> going to be 100% accurate as it should probably take the transport as well as
> the SCSI version into account, but it is a start.
> 
> While this patch improves the error recovery paths for modern SCSI networks,
> the
> error recovery logic continues to fall through to host reset. It also continues
> to send bus and target resets in cases where they may affect working devices. I
> have a partial set of patches which attempt to make intelligent decisions in
> these cases, but they are far more intrusive and at this point not as clear cut.
> 
> 
> Just in case...
> Signed-off-by: Jeremy Linton <jlinton@tributary.com>
> ---

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-12 20:57 ` Elliott, Robert (Server Storage)
@ 2013-02-12 22:00   ` Jeremy Linton
  0 siblings, 0 replies; 10+ messages in thread
From: Jeremy Linton @ 2013-02-12 22:00 UTC (permalink / raw)
  To: Elliott, Robert (Server Storage); +Cc: linux-scsi

On 2/12/2013 2:57 PM, Elliott, Robert (Server Storage) wrote:
> Ideally the device driver for the SCSI initiator port would report those
> attributes, and higher level code would combine them with support
> information from the device server (REPORT SUPPORTED TMF command, REPORT
> SUPPORTED OPCODES command, etc.) to decide what is supported.


Well, for the eh_xxx_handler functions, that is basically what happens now.

The host driver can fail to set a callback for the eh_xxx_handlers if it
doesn't support the operation. At that point, even if the target device
supports a function (say target reset) if the host driver doesn't, then the
target reset will be skipped.

Of course, a number of the drivers define functions their underlying
protocol's don't support. For example, bus reset on fibre channel. Which I
personally believe is an error.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-12 18:19 [PATCH] Use a more selective error recovery strategy based on device capabilities Jeremy Linton
  2013-02-12 20:57 ` Elliott, Robert (Server Storage)
@ 2013-02-13 13:06 ` Hannes Reinecke
  2013-02-13 15:46   ` Jeremy Linton
  1 sibling, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2013-02-13 13:06 UTC (permalink / raw)
  To: Jeremy Linton; +Cc: Linux Scsi, Elliott, Robert (Server Storage)

On 02/12/2013 07:19 PM, Jeremy Linton wrote:
> Ideally, Linux should not be sending task management commands to devices that
> don't support the given task mgmt operation.
>
> This patch uses the REPORT SUPPORTED TASK MGMT FUNCTIONS command to enable or
> disable error recovery paths for a given device. For older devices, we make an
> educated guess about what kind of error recovery the device supports. This isn't
> going to be 100% accurate as it should probably take the transport as well as
> the SCSI version into account, but it is a start.
>
> While this patch improves the error recovery paths for modern SCSI networks, the
> error recovery logic continues to fall through to host reset. It also continues
> to send bus and target resets in cases where they may affect working devices. I
> have a partial set of patches which attempt to make intelligent decisions in
> these cases, but they are far more intrusive and at this point not as clear cut.
>
>
> Just in case...
> Signed-off-by: Jeremy Linton <jlinton@tributary.com>

Nice concept.

But unfortunately it failed the reality check; of my zoo of storage 
arrays only NetApp OnTap 8.X and HP P2000 supports the
REPORT SUPPORTED TASK MANAGEMENT FUNCTIONS command.
None of the others (HP EVA, NetApp E-Series, EMC Clariion CX-3) do.

So implementing this command won't buy us much, as it'll default to 
the original behaviour for most cases ...

(Not prejudicing any market share here, of course :-)

Too bad.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-13 13:06 ` Hannes Reinecke
@ 2013-02-13 15:46   ` Jeremy Linton
  2013-02-14  2:43     ` Michael Christie
  0 siblings, 1 reply; 10+ messages in thread
From: Jeremy Linton @ 2013-02-13 15:46 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Linux Scsi, Elliott, Robert (Server Storage)

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 2/13/2013 7:06 AM, Hannes Reinecke wrote:

> But unfortunately it failed the reality check; of my zoo of storage arrays
> only NetApp OnTap 8.X and HP P2000 supports the REPORT SUPPORTED TASK
> MANAGEMENT FUNCTIONS command. None of the others (HP EVA, NetApp E-Series,
> EMC Clariion CX-3) do.

	I'm not really surprised. I've had better luck with devices that report
compliance with more recent versions of the specification. Of the those, I
would guess slightly more than 50% support it. That includes tape drives,
changer devices, and raid controllers. Of the ones I've tested that support
it, they almost always report 0xDA for the first byte. Which correspond to
abort task, abort task set, clear task set, lun reset and target reset.

	The patch also includes some "reasonable" defaults based on SCSI version,
which I think vastly improves the overall error handling situation. Querying
the device is just the last little icing.

	But its a chicken and egg thing, the command is optional, hardly anyone sends
it, so few of the device manufactures have implemented it.









-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (MingW32)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQEcBAEBAgAGBQJRG7VFAAoJEL5i86xrzcy7TjwH/R/V52LW66OPSwWVJHjVcbM/
ZQ7ekWZ48oKy4zQrCjpyKMFXqsH5gsMinoruUhiu/Zt8kmef88jti+aMe1RDyCbp
L2fFc5lUOIpHFkzWXCykq8f9ZDcxIi8cLcCR08S71gTZqo/E0gVqYKEzInXtATCQ
HK9yjX7g3FkzYVl9ASiLeBPUT4/kk7IhN+iagc0S+VY8zBG/+jQC9RykenbnAUU1
YLmWsWmzyWiTshk+vmBBpcikk+EEnB25NXZz6cqDSWGcm/wWq4Mcj9i6KvC9tKSr
PzW+0/T2LtedGCTZKnFgNsn1jMJtjId+soAt5Ho4nZD+BHYcr2iSckzWxX3defs=
=hDMi
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-13 15:46   ` Jeremy Linton
@ 2013-02-14  2:43     ` Michael Christie
  2013-02-14 20:57       ` Jeremy Linton
  0 siblings, 1 reply; 10+ messages in thread
From: Michael Christie @ 2013-02-14  2:43 UTC (permalink / raw)
  To: Jeremy Linton
  Cc: Hannes Reinecke, Linux Scsi, Elliott, Robert (Server Storage)


On Feb 13, 2013, at 9:46 AM, Jeremy Linton <jlinton@tributary.com> wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> On 2/13/2013 7:06 AM, Hannes Reinecke wrote:
> 
>> But unfortunately it failed the reality check; of my zoo of storage arrays
>> only NetApp OnTap 8.X and HP P2000 supports the REPORT SUPPORTED TASK
>> MANAGEMENT FUNCTIONS command. None of the others (HP EVA, NetApp E-Series,
>> EMC Clariion CX-3) do.
> 
> 	I'm not really surprised. I've had better luck with devices that report
> compliance with more recent versions of the specification. Of the those, I
> would guess slightly more than 50% support it. That includes tape drives,
> changer devices, and raid controllers. Of the ones I've tested that support
> it, they almost always report 0xDA for the first byte. Which correspond to
> abort task, abort task set, clear task set, lun reset and target reset.
> 


For the case where report supported TMFs is not supported can we just have the LLD return some new return code from the eh callback when it gets FUNCTION_REJECTED. scsi-ml would then clear the eh_*_ok bit, so at least it would not be called again.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-14  2:43     ` Michael Christie
@ 2013-02-14 20:57       ` Jeremy Linton
  2013-02-14 23:42         ` Elliott, Robert (Server Storage)
  2013-02-15  7:17         ` Hannes Reinecke
  0 siblings, 2 replies; 10+ messages in thread
From: Jeremy Linton @ 2013-02-14 20:57 UTC (permalink / raw)
  To: Michael Christie
  Cc: Hannes Reinecke, Linux Scsi, Elliott, Robert (Server Storage)

On 2/13/2013 8:43 PM, Michael Christie wrote:
> For the case where report supported TMFs is not supported can we just have the LLD return some new return code from the eh callback when it gets FUNCTION_REJECTED. scsi-ml would then clear the eh_*_ok bit, so at least it would not be called again..


	Hmm, that seems like a good idea. The question is, does propagating the flag
change to all the devices on the I_T make sense?




^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-14 20:57       ` Jeremy Linton
@ 2013-02-14 23:42         ` Elliott, Robert (Server Storage)
  2013-02-15 15:17           ` Jeremy Linton
  2013-02-15  7:17         ` Hannes Reinecke
  1 sibling, 1 reply; 10+ messages in thread
From: Elliott, Robert (Server Storage) @ 2013-02-14 23:42 UTC (permalink / raw)
  To: Jeremy Linton, Michael Christie; +Cc: Hannes Reinecke, Linux Scsi

Each logical unit is independent and is allowed to be different.  

Examples: In a RAID controller, the direct-access block device type logical units (i.e., logical drives) are probably all the same, but storage array controller and enclosure service type logical units might be more limited.  In a tape library, sequential-access device type logical units (i.e., tape drives) might differ from a media changer logical unit.

Although the SCSI transport protocol might currently mandate everything and leave no options, that might change in the next version of the SCSI transport protocol.


> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
> owner@vger.kernel.org] On Behalf Of Jeremy Linton
> Sent: Thursday, 14 February, 2013 2:57 PM
> To: Michael Christie
> Cc: Hannes Reinecke; Linux Scsi; Elliott, Robert (Server Storage)
> Subject: Re: [PATCH] Use a more selective error recovery strategy based on
> device capabilities
> 
> On 2/13/2013 8:43 PM, Michael Christie wrote:
> > For the case where report supported TMFs is not supported can we just have
> the LLD return some new return code from the eh callback when it gets
> FUNCTION_REJECTED. scsi-ml would then clear the eh_*_ok bit, so at least it
> would not be called again..
> 
> 
> 	Hmm, that seems like a good idea. The question is, does propagating the
> flag
> change to all the devices on the I_T make sense?
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-14 20:57       ` Jeremy Linton
  2013-02-14 23:42         ` Elliott, Robert (Server Storage)
@ 2013-02-15  7:17         ` Hannes Reinecke
  1 sibling, 0 replies; 10+ messages in thread
From: Hannes Reinecke @ 2013-02-15  7:17 UTC (permalink / raw)
  To: Jeremy Linton
  Cc: Michael Christie, Linux Scsi, Elliott, Robert (Server Storage)

On 02/14/2013 09:57 PM, Jeremy Linton wrote:
> On 2/13/2013 8:43 PM, Michael Christie wrote:
>> For the case where report supported TMFs is not supported can we just have the
 >> LLD return some new return code from the eh callback when it 
gets FUNCTION_REJECTED.
 >> scsi-ml would then clear the eh_*_ok bit, so at least it would 
not be called again..
>
>
> 	Hmm, that seems like a good idea. The question is, does propagating the flag
> change to all the devices on the I_T make sense?
>
Hmm. Again, I've yet to find a storage array which actually _does_ 
reject a TMF.
None of mine do, not even the latest and greatest NetApp Ontap 
version ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Use a more selective error recovery strategy based on device capabilities
  2013-02-14 23:42         ` Elliott, Robert (Server Storage)
@ 2013-02-15 15:17           ` Jeremy Linton
  0 siblings, 0 replies; 10+ messages in thread
From: Jeremy Linton @ 2013-02-15 15:17 UTC (permalink / raw)
  To: Elliott, Robert (Server Storage)
  Cc: Michael Christie, Hannes Reinecke, Linux Scsi

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 2/14/2013 5:42 PM, Elliott, Robert (Server Storage) wrote:
> Each logical unit is independent and is allowed to be different.

	I was actually just thinking about the target reset and IT reset flags. Two
flags which affect the I_T not the I_T_L.

	For the target reset its probably a small proportion of devices anyway, The
patch already disables target reset if the device is known to support lun reset.



-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (MingW32)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQEcBAEBAgAGBQJRHlGAAAoJEL5i86xrzcy7Yz8IAKn3w6DnUkPxmasQXi0WljNB
eqBZHIZRx8gjpa6AOP0nBt+FDSmyrhE4vLOUFIiwpcql1jmJ6iwcT+Y4YHwi+GzC
sE/ZtB5UDad4RleXcZIBHJwVVFtW0oCya2jYxr2GQFIEz3EefrfwwXEqdeI85uTv
aLiKakEP6EDQur280T08R9UEpFHirUnhMKoCFsXjtB5T6u2XDRLLWXQ8hE5ILBnX
Mf5HmCF8g1EjOnCJOzcUlhRlDuQe17FcDHyaxPkl2f34Qr+gdPo9WW5Cz38V0RLX
UYqmZI/B1GX7rUaU+Xhc4aAkxq6547cWZUwRLdZ6M4osFZT6GBuw3iRBevzNXDs=
=/1pc
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-02-15 15:17 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-12 18:19 [PATCH] Use a more selective error recovery strategy based on device capabilities Jeremy Linton
2013-02-12 20:57 ` Elliott, Robert (Server Storage)
2013-02-12 22:00   ` Jeremy Linton
2013-02-13 13:06 ` Hannes Reinecke
2013-02-13 15:46   ` Jeremy Linton
2013-02-14  2:43     ` Michael Christie
2013-02-14 20:57       ` Jeremy Linton
2013-02-14 23:42         ` Elliott, Robert (Server Storage)
2013-02-15 15:17           ` Jeremy Linton
2013-02-15  7:17         ` Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.