linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] scsi error update 1/3
@ 2002-10-04 19:44 Mike Anderson
  2002-10-04 19:56 ` [PATCH] scsi error update 2/3 (enh) Mike Anderson
  0 siblings, 1 reply; 3+ messages in thread
From: Mike Anderson @ 2002-10-04 19:44 UTC (permalink / raw)
  To: linux-scsi, linux-kernel

This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
	- Fix bug on incorrect check of scsi_eh_tur return value.
	- Fix debug printk format problems.
	- Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
	- Forward port of Russell King's retry scsi cmd restore.
	- Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
	  to provide increase time post bus_reset to allow door lock
	  command to succeed. This should be exported to driverfs so that
	  it can be adjusted if needed.
	- Error Policy change: Error recovery command retry is now not
	  based on failed command retry value.
	- Error Policy change: Failed command is not retried if retry
	  count is expired.

02_scsi-error-dr-lck-1.diff:
	- Forward port of Russell King's door lock changes.

Testing:
	- Current patches where tested on a SPI interconnect using both in
	  kernel and new versions of the aic driver. A Plextor SCSI cd-rom
	  was used for a door lock device. Cables where done during dd's
	  to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike
--
Michael Anderson
andmike@us.ibm.com

 scsi_error.c |   78 +++++++++++++++++++++++++++++------------------------------
 1 files changed, 39 insertions(+), 39 deletions(-)
-----

diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c	Fri Oct  4 08:04:49 2002
+++ b/drivers/scsi/scsi_error.c	Fri Oct  4 08:04:49 2002
@@ -91,9 +91,9 @@
 	scmd->eh_timeout.expires = jiffies + timeout;
 	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
 
-	SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at"
-					  "%d (%p)\n", scmd, timeout,
-					  complete));
+	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
+					  " %d, (%p)\n", __FUNCTION__,
+					  scmd, timeout, complete));
 
 	add_timer(&scmd->eh_timeout);
 
@@ -116,8 +116,9 @@
 
 	rtn = del_timer(&scmd->eh_timeout);
 
-	SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p"
-					 " %d\n", scmd, rtn));
+	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
+					 " rtn: %d\n", __FUNCTION__,
+					 scmd, rtn));
 
 	scmd->eh_timeout.data = (unsigned long) NULL;
 	scmd->eh_timeout.function = NULL;
@@ -150,7 +151,7 @@
 	scsi_host_failed_inc_and_test(scmd->host);
 
 	SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d "
-				   "failed=%d\n",
+				   " failed=%d\n",
 				   atomic_read(&scmd->host->host_active),
 				   scmd->host->host_busy,
 				   scmd->host->host_failed));
@@ -173,7 +174,7 @@
 
 	SCSI_SLEEP(&sdev->host->host_wait, sdev->host->in_recovery);
 
-	SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n",
+	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
 					  sdev->online));
 
 	return sdev->online;
@@ -209,10 +210,10 @@
 
 		if (cmd_timed_out || cmd_failed) {
 			SCSI_LOG_ERROR_RECOVERY(3,
-				printk("scsi_eh: %d:%d:%d:%d cmds failed: %d,"
-				       "timedout: %d\n",
-				       shost->host_no, sdev->channel,
-				       sdev->id, sdev->lun,
+				printk("%s: %d:%d:%d:%d cmds failed: %d,"
+				       " timedout: %d\n",
+				       __FUNCTION__, shost->host_no,
+				       sdev->channel, sdev->id, sdev->lun,
 				       cmd_failed, cmd_timed_out));
 			cmd_timed_out = 0;
 			cmd_failed = 0;
@@ -220,8 +221,8 @@
 		}
 	}
 
-	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d "
-					  "devices require eh work\n",
+	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
+					  " devices require eh work\n",
 				  total_failures, devices_failed));
 }
 #endif
@@ -265,10 +266,10 @@
 				 * queued and will be finished along the
 				 * way.
 				 */
-				SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr "
-							  "prematurely woken "
-							  "cmds still active "
-							  "(%p %x %d)\n",
+				SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr"
+							  " prematurely woken"
+							  " cmds still active"
+							  " (%p %x %d)\n",
 					       scmd, scmd->state,
 					       scmd->target));
 				}
@@ -440,12 +441,13 @@
 static void scsi_eh_times_out(Scsi_Cmnd *scmd)
 {
 	scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
-	SCSI_LOG_ERROR_RECOVERY(3, printk("in scsi_eh_times_out %p\n", scmd));
+	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
+					  scmd));
 
 	if (scmd->host->eh_action != NULL)
 		up(scmd->host->eh_action);
 	else
-		printk("missing scsi error handler thread\n");
+		printk("%s: eh_action NULL\n", __FUNCTION__);
 }
 
 /**
@@ -471,8 +473,8 @@
 
 	scmd->owner = SCSI_OWNER_ERROR_HANDLER;
 
-	SCSI_LOG_ERROR_RECOVERY(3, printk("in eh_done %p result:%x\n", scmd,
-					  scmd->result));
+	SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
+					  __FUNCTION__, scmd, scmd->result));
 
 	if (scmd->host->eh_action != NULL)
 		up(scmd->host->eh_action);
@@ -552,9 +554,8 @@
 			
 			rtn = FAILED;
 		}
-		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: %p rtn:%x\n",
-						  __FUNCTION__, scmd,
-						  rtn));
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
+						  __FUNCTION__, scmd, rtn));
 	} else {
 		int temp;
 
@@ -622,7 +623,7 @@
 	    ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);
 
 	if (scsi_result == NULL) {
-		printk("cannot allocate scsi_result in scsi_request_sense.\n");
+		printk("%s: cannot allocate scsi_result.\n", __FUNCTION__);
 		return FAILED;
 	}
 	/*
@@ -758,14 +759,14 @@
 			continue;
 
 		SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
-						  "for %d\n", __FUNCTION__,
-						  scmd->target));
+						  " for tgt: %d\n",
+						  __FUNCTION__, scmd->target));
 		rtn = scsi_request_sense(scmd);
 		if (rtn != SUCCESS)
 			continue;
 
 		SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
-						  "- result %x\n", scmd,
+						  " result %x\n", scmd,
 						  scmd->result));
 		SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", scmd));
 
@@ -929,7 +930,7 @@
 
 		rtn = scsi_try_to_abort_cmd(scmd);
 		if (rtn == SUCCESS) {
-			if (scsi_eh_tur(scmd)) {
+			if (!scsi_eh_tur(scmd)) {
 				rtn = scsi_eh_retry_cmd(scmd);
 				if (rtn == SUCCESS)
 					scsi_eh_finish_cmd(scmd, shost);
@@ -999,7 +1000,7 @@
 		 * a bus device reset to it.
 		 */
 		rtn = scsi_try_bus_device_reset(scmd);
-		if ((rtn == SUCCESS) && (scsi_eh_tur(scmd)))
+		if ((rtn == SUCCESS) && (!scsi_eh_tur(scmd)))
 				for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
 					if ((scmd->device == sdev) &&
 					    scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
@@ -1141,7 +1142,7 @@
 				if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)
 				    || channel != scmd->channel)
 					continue;
-				if (scsi_eh_tur(scmd)) {
+				if (!scsi_eh_tur(scmd)) {
 					rtn = scsi_eh_retry_cmd(scmd);
 
 					if (rtn == SUCCESS)
@@ -1168,10 +1169,10 @@
 		if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
 			continue;
 
-		printk(KERN_INFO "%s: Device set offline - not"
-				"ready or command retry failed"
-				"after error recovery: host"
-				"%d channel %d id %d lun %d\n",
+		printk(KERN_INFO "%s: Device offlined - not"
+				" ready or command retry failed"
+				" after error recovery: host"
+				" %d channel %d id %d lun %d\n",
 				__FUNCTION__, shost->host_no,
 				scmd->device->channel,
 				scmd->device->id,
@@ -1243,7 +1244,7 @@
 	 */
 	if (scmd->device->online == FALSE) {
 		SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
-						  "as SUCCESS\n",
+						  " as SUCCESS\n",
 						  __FUNCTION__));
 		return SUCCESS;
 	}
@@ -1362,7 +1363,7 @@
 		goto maybe_retry;
 
 	case RESERVATION_CONFLICT:
-		printk("scsi%d (%d,%d,%d) : reservation conflict\n", 
+		printk("scsi%d (%d,%d,%d) : reservation conflict\n",
 		       scmd->host->host_no, scmd->channel,
 		       scmd->device->id, scmd->device->lun);
 		return SUCCESS; /* causes immediate i/o error */
@@ -1558,8 +1559,7 @@
 	/*
 	 * Wake up the thread that created us.
 	 */
-	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n",
-					  shost->eh_notify->count.counter));
+	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent \n"));
 
 	up(shost->eh_notify);
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] scsi error update 2/3 (enh)
  2002-10-04 19:44 [PATCH] scsi error update 1/3 Mike Anderson
@ 2002-10-04 19:56 ` Mike Anderson
  2002-10-04 20:03   ` [PATCH] scsi error update 3/3 (door lck) Mike Anderson
  0 siblings, 1 reply; 3+ messages in thread
From: Mike Anderson @ 2002-10-04 19:56 UTC (permalink / raw)
  To: linux-scsi, linux-kernel


This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
	- Fix bug on incorrect check of scsi_eh_tur return value.
	- Fix debug printk format problems.
	- Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
	- Forward port of Russell King's retry scsi cmd restore.
	- Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
	  to provide increase time post bus_reset to allow door lock
	  command to succeed. This should be exported to driverfs so that
	  it can be adjusted if needed.
	- Error Policy change: Error recovery command retry is now not
	  based on failed command retry value.
	- Error Policy change: Failed command is not retried if retry
	  count is expired.

02_scsi-error-dr-lck-1.diff:
	- Forward port of Russell King's door lock changes.

Testing:
	- Current patches where tested on a SPI interconnect using both in
	  kernel and new versions of the aic driver. A Plextor SCSI cd-rom
	  was used for a door lock device. Cables where done during dd's
	  to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike
--
Michael Anderson
andmike@us.ibm.com

 scsi.c       |   12 ++----
 scsi.h       |    1 
 scsi_error.c |  117 +++++++++++++++++++++++------------------------------------
 scsi_lib.c   |   26 ++++++++++++-
 4 files changed, 76 insertions(+), 80 deletions(-)
-----
diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
--- a/drivers/scsi/scsi.c	Fri Oct  4 08:37:58 2002
+++ b/drivers/scsi/scsi.c	Fri Oct  4 08:37:58 2002
@@ -1345,14 +1345,10 @@
  */
 int scsi_retry_command(Scsi_Cmnd * SCpnt)
 {
-	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
-	       sizeof(SCpnt->data_cmnd));
-	SCpnt->request_buffer = SCpnt->buffer;
-	SCpnt->request_bufflen = SCpnt->bufflen;
-	SCpnt->use_sg = SCpnt->old_use_sg;
-	SCpnt->cmd_len = SCpnt->old_cmd_len;
-	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
-	SCpnt->underflow = SCpnt->old_underflow;
+	/*
+	 * Restore the SCSI command state.
+	 */
+	scsi_setup_cmd_retry(SCpnt);
 
         /*
          * Zero the sense information from the last time we tried
diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
--- a/drivers/scsi/scsi.h	Fri Oct  4 08:37:58 2002
+++ b/drivers/scsi/scsi.h	Fri Oct  4 08:37:58 2002
@@ -467,6 +467,7 @@
 				   int sectors);
 extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *);
 extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt);
+extern void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt);
 extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int);
 extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 			       int block_sectors);
diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c	Fri Oct  4 08:37:58 2002
+++ b/drivers/scsi/scsi_error.c	Fri Oct  4 08:37:58 2002
@@ -8,6 +8,10 @@
  *
  *	Restructured scsi_unjam_host and associated functions.
  *	September 04, 2002 Mike Anderson (andmike@us.ibm.com)
+ *
+ *	Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
+ *	minor  cleanups.
+ *	September 30, 2002 Mike Anderson (andmike@us.ibm.com)
  */
 
 #include <linux/module.h>
@@ -59,7 +63,7 @@
  * These should *probably* be handled by the host itself.
  * Since it is allowed to sleep, it probably should.
  */
-#define BUS_RESET_SETTLE_TIME   5*HZ
+#define BUS_RESET_SETTLE_TIME   10*HZ
 #define HOST_RESET_SETTLE_TIME  10*HZ
 
 /**
@@ -279,12 +283,17 @@
 
 	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(*sc_list, shost));
 
-	BUG_ON(shost->host_failed != found);
+	if (shost->host_failed != found)
+		printk(KERN_ERR "%s: host_failed: %d != found: %d\n", 
+		       __FUNCTION__, shost->host_failed, found);
 }
 
 /**
  * scsi_check_sense - Examine scsi cmd sense
  * @scmd:	Cmd to have sense checked.
+ *
+ * Return value:
+ * 	SUCCESS or FAILED or NEEDS_RETRY
  **/
 static int scsi_check_sense(Scsi_Cmnd *scmd)
 {
@@ -354,7 +363,6 @@
  **/
 static int scsi_eh_completed_normally(Scsi_Cmnd *scmd)
 {
-	int rtn;
 
 	/*
 	 * first check the host byte, to see if there is anything in there
@@ -370,7 +378,7 @@
 			 * SUCCESS.
 			 */
 			scmd->flags &= ~IS_RESETTING;
-			goto maybe_retry;
+			return NEEDS_RETRY;
 		}
 		/*
 		 * rats.  we are already in the error handler, so we now
@@ -378,10 +386,7 @@
 		 * is valid, we have a pretty good idea of what to do.
 		 * if not, we mark it as FAILED.
 		 */
-		rtn = scsi_check_sense(scmd);
-		if (rtn == NEEDS_RETRY)
-			goto maybe_retry;
-		return rtn;
+		return scsi_check_sense(scmd);
 	}
 	if (host_byte(scmd->result) != DID_OK) {
 		return FAILED;
@@ -401,10 +406,7 @@
 	case COMMAND_TERMINATED:
 		return SUCCESS;
 	case CHECK_CONDITION:
-		rtn = scsi_check_sense(scmd);
-		if (rtn == NEEDS_RETRY)
-			goto maybe_retry;
-		return rtn;
+		return scsi_check_sense(scmd);
 	case CONDITION_GOOD:
 	case INTERMEDIATE_GOOD:
 	case INTERMEDIATE_C_GOOD:
@@ -419,14 +421,6 @@
 		return FAILED;
 	}
 	return FAILED;
-
- maybe_retry:
-	if ((++scmd->retries) < scmd->allowed) {
-		return NEEDS_RETRY;
-	} else {
-		/* no more retries - report this one back to upper level */
-		return SUCCESS;
-	}
 }
 
 /**
@@ -490,7 +484,7 @@
  *    this case, and furthermore, there is a different completion handler
  *    vs scsi_dispatch_cmd.
  * Return value:
- *    SUCCESS/FAILED
+ *    SUCCESS or FAILED or NEEDS_RETRY
  **/
 static int scsi_send_eh_cmnd(Scsi_Cmnd *scmd, int timeout)
 {
@@ -500,7 +494,6 @@
 
 	ASSERT_LOCK(host->host_lock, 0);
 
-retry:
 	/*
 	 * we will use a queued command if possible, otherwise we will
 	 * emulate the queuing and calling of completion function ourselves.
@@ -577,16 +570,15 @@
 	 * actually did complete normally.
 	 */
 	if (rtn == SUCCESS) {
-		int ret = scsi_eh_completed_normally(scmd);
+		int rtn = scsi_eh_completed_normally(scmd);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			printk("%s: scsi_eh_completed_normally %x\n",
-			       __FUNCTION__, ret));
-		switch (ret) {
+			       __FUNCTION__, rtn));
+		switch (rtn) {
 		case SUCCESS:
-			break;
 		case NEEDS_RETRY:
-			goto retry;
 		case FAILED:
+			break;
 		default:
 			rtn = FAILED;
 			break;
@@ -658,15 +650,8 @@
 	 * when we eventually call scsi_finish, we really wish to complete
 	 * the original request, so let's restore the original data. (db)
 	 */
-	memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
-	       sizeof(scmd->data_cmnd));
+	scsi_setup_cmd_retry(scmd);
 	scmd->result = saved_result;
-	scmd->request_buffer = scmd->buffer;
-	scmd->request_bufflen = scmd->bufflen;
-	scmd->use_sg = scmd->old_use_sg;
-	scmd->cmd_len = scmd->old_cmd_len;
-	scmd->sc_data_direction = scmd->sc_old_data_direction;
-	scmd->underflow = scmd->old_underflow;
 
 	/*
 	 * hey, we are done.  let's look to see what happened.
@@ -684,16 +669,16 @@
  **/
 static int scsi_eh_retry_cmd(Scsi_Cmnd *scmd)
 {
-	memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
-	       sizeof(scmd->data_cmnd));
-	scmd->request_buffer = scmd->buffer;
-	scmd->request_bufflen = scmd->bufflen;
-	scmd->use_sg = scmd->old_use_sg;
-	scmd->cmd_len = scmd->old_cmd_len;
-	scmd->sc_data_direction = scmd->sc_old_data_direction;
-	scmd->underflow = scmd->old_underflow;
+	int rtn = SUCCESS;
+
+	for (; scmd->retries < scmd->allowed; scmd->retries++) {
+		scsi_setup_cmd_retry(scmd);
+		rtn = scsi_send_eh_cmnd(scmd, scmd->timeout_per_command);
+		if (rtn != NEEDS_RETRY)
+			break;
+	}
 
-	return scsi_send_eh_cmnd(scmd, scmd->timeout_per_command);
+	return rtn;
 }
 
 /**
@@ -718,9 +703,7 @@
 	 * set this back so that the upper level can correctly free up
 	 * things.
 	 */
-	scmd->use_sg = scmd->old_use_sg;
-	scmd->sc_data_direction = scmd->sc_old_data_direction;
-	scmd->underflow = scmd->old_underflow;
+	scsi_setup_cmd_retry(scmd);
 }
 
 /**
@@ -848,7 +831,9 @@
 	static unsigned char tur_command[6] =
 	{TEST_UNIT_READY, 0, 0, 0, 0, 0};
 	int rtn;
+	int retry_cnt = 1;
 
+retry_tur:
 	memcpy((void *) scmd->cmnd, (void *) tur_command,
 	       sizeof(tur_command));
 
@@ -874,32 +859,18 @@
 	 * when we eventually call scsi_finish, we really wish to complete
 	 * the original request, so let's restore the original data. (db)
 	 */
-	memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
-	       sizeof(scmd->data_cmnd));
-	scmd->request_buffer = scmd->buffer;
-	scmd->request_bufflen = scmd->bufflen;
-	scmd->use_sg = scmd->old_use_sg;
-	scmd->cmd_len = scmd->old_cmd_len;
-	scmd->sc_data_direction = scmd->sc_old_data_direction;
-	scmd->underflow = scmd->old_underflow;
+	scsi_setup_cmd_retry(scmd);
 
 	/*
 	 * hey, we are done.  let's look to see what happened.
 	 */
-	SCSI_LOG_ERROR_RECOVERY(3,
-		printk("%s: scmd %p rtn %x\n",
+	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
 		__FUNCTION__, scmd, rtn));
-	if ((rtn == SUCCESS) && scmd->result) {
-		if (((driver_byte(scmd->result) & DRIVER_SENSE) ||
-		     (status_byte(scmd->result) & CHECK_CONDITION)) &&
-		    (SCSI_SENSE_VALID(scmd))) {
-			if (((scmd->sense_buffer[2] & 0xf) != NOT_READY) &&
-			    ((scmd->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
-			    ((scmd->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
-				return 0;
-			}
-		}
-	}
+	if (rtn == SUCCESS)
+		return 0;
+	else if (rtn == NEEDS_RETRY)
+		if (retry_cnt--)
+			goto retry_tur;
 	return 1;
 }
 
@@ -964,6 +935,11 @@
 	rtn = scmd->host->hostt->eh_device_reset_handler(scmd);
 	spin_unlock_irqrestore(scmd->host->host_lock, flags);
 
+	if (rtn == SUCCESS) {
+		scmd->device->was_reset = 1;
+		scmd->device->expecting_cc_ua = 1;
+	}
+
 	return rtn;
 }
 
@@ -1422,8 +1398,7 @@
 		if ((shost->can_queue > 0 &&
 		     (shost->host_busy >= shost->can_queue))
 		    || (shost->host_blocked)
-		    || (shost->host_self_blocked)
-		    || (sdev->device_blocked)) {
+		    || (shost->host_self_blocked)) {
 			break;
 		}
 
@@ -1471,7 +1446,7 @@
 	if (scsi_eh_get_sense(sc_todo, shost))
 		if (scsi_eh_abort_cmd(sc_todo, shost))
 			if (scsi_eh_bus_device_reset(sc_todo, shost))
-				if(scsi_eh_bus_host_reset(sc_todo, shost))
+				if (scsi_eh_bus_host_reset(sc_todo, shost))
 					scsi_eh_offline_sdevs(sc_todo, shost);
 
 	BUG_ON(shost->host_failed);
diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c	Fri Oct  4 08:37:58 2002
+++ b/drivers/scsi/scsi_lib.c	Fri Oct  4 08:37:58 2002
@@ -160,6 +160,30 @@
 }
 
 /*
+ * Function:   scsi_setup_cmd_retry()
+ *
+ * Purpose:    Restore the command state for a retry
+ *
+ * Arguments:  SCpnt   - command to be restored
+ *
+ * Returns:    Nothing
+ *
+ * Notes:      Immediately prior to retrying a command, we need
+ *             to restore certain fields that we saved above.
+ */
+void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt)
+{
+	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+		sizeof(SCpnt->data_cmnd));
+	SCpnt->request_buffer = SCpnt->buffer;
+	SCpnt->request_bufflen = SCpnt->bufflen;
+	SCpnt->use_sg = SCpnt->old_use_sg;
+	SCpnt->cmd_len = SCpnt->old_cmd_len;
+	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+	SCpnt->underflow = SCpnt->old_underflow;
+}
+
+/*
  * Function:    scsi_queue_next_request()
  *
  * Purpose:     Handle post-processing of completed commands.
@@ -614,7 +638,7 @@
 			printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
 			       SCpnt->host->host_no, (int) SCpnt->channel,
 			       (int) SCpnt->target, (int) SCpnt->lun);
-			print_command(SCpnt->cmnd);
+			print_command(SCpnt->data_cmnd);
 			print_sense("sd", SCpnt);
 			SCpnt = scsi_end_request(SCpnt, 0, block_sectors);
 			return;

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] scsi error update 3/3 (door lck)
  2002-10-04 19:56 ` [PATCH] scsi error update 2/3 (enh) Mike Anderson
@ 2002-10-04 20:03   ` Mike Anderson
  0 siblings, 0 replies; 3+ messages in thread
From: Mike Anderson @ 2002-10-04 20:03 UTC (permalink / raw)
  To: linux-scsi, linux-kernel


This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
	- Fix bug on incorrect check of scsi_eh_tur return value.
	- Fix debug printk format problems.
	- Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
	- Forward port of Russell King's retry scsi cmd restore.
	- Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
	  to provide increase time post bus_reset to allow door lock
	  command to succeed. This should be exported to driverfs so that
	  it can be adjusted if needed.
	- Error Policy change: Error recovery command retry is now not
	  based on failed command retry value.
	- Error Policy change: Failed command is not retried if retry
	  count is expired.

02_scsi-error-dr-lck-1.diff:
	- Forward port of Russell King's door lock changes.

Testing:
	- Current patches where tested on a SPI interconnect using both in
	  kernel and new versions of the aic driver. A Plextor SCSI cd-rom
	  was used for a door lock device. Cable pulls where done during dd's
	  to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike
--
Michael Anderson
andmike@us.ibm.com

 drivers/scsi/scsi.h       |    1 
 drivers/scsi/scsi_error.c |   80 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/scsi_ioctl.c |   42 ++++++++++++++----------
 drivers/scsi/scsi_lib.c   |   27 ---------------
 drivers/scsi/scsi_syms.c  |    1 
 drivers/scsi/sd.c         |    4 +-
 drivers/scsi/sr_ioctl.c   |    4 +-
 include/scsi/scsi_ioctl.h |    8 ++--
 8 files changed, 115 insertions(+), 52 deletions(-)
------

diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
--- a/drivers/scsi/scsi.h	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/scsi.h	Fri Oct  4 08:59:02 2002
@@ -597,6 +597,7 @@
 	unsigned changed:1;	/* Data invalid due to media change */
 	unsigned busy:1;	/* Used to prevent races */
 	unsigned lockable:1;	/* Able to prevent media removal */
+	unsigned locked:1;      /* Media removal disabled */
 	unsigned borken:1;	/* Tell the Seagate driver to be 
 				 * painfully slow on this device */
 	unsigned tagged_supported:1;	/* Supports SCSI-II tagged queuing */
diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/scsi_error.c	Fri Oct  4 08:59:02 2002
@@ -39,6 +39,8 @@
 #include "scsi.h"
 #include "hosts.h"
 
+#include <scsi/scsi_ioctl.h> /* grr */
+
 /*
  * We must always allow SHUTDOWN_SIGS.  Even if we are not a module,
  * the host drivers that we are using may be loaded as modules, and
@@ -1361,6 +1363,75 @@
 }
 
 /**
+ * scsi_eh_lock_done - done function for eh door lock request
+ * @scmd:	SCSI command block for the door lock request
+ *
+ * Notes:
+ * 	We completed the asynchronous door lock request, and it has either
+ * 	locked the door or failed.  We must free the command structures
+ * 	associated with this request.
+ **/
+static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
+{
+	struct scsi_request *sreq = scmd->sc_request;
+
+	scmd->sc_request = NULL;
+	sreq->sr_command = NULL;
+
+	scsi_release_command(scmd);
+	scsi_release_request(sreq);
+}
+
+
+/**
+ * scsi_eh_lock_door - Prevent medium removal for the specified device
+ * @sdev:	SCSI device to prevent medium removal
+ *
+ * Locking:
+ * 	We must be called from process context; scsi_allocate_request()
+ * 	may sleep.
+ *
+ * Notes:
+ * 	We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
+ * 	head of the devices request queue, and continue.
+ *
+ * Bugs:
+ * 	scsi_allocate_request() may sleep waiting for existing requests to
+ * 	be processed.  However, since we haven't kicked off any request
+ * 	processing for this host, this may deadlock.
+ *
+ *	If scsi_allocate_request() fails for what ever reason, we
+ *	completely forget to lock the door.
+ **/
+static void scsi_eh_lock_door(struct scsi_device *sdev)
+{
+	struct scsi_request *sreq = scsi_allocate_request(sdev);
+
+	if (sreq == NULL) {
+		printk(KERN_ERR "%s: request allocate failed,"
+		       "prevent media removal cmd not sent", __FUNCTION__);
+		return;
+	}
+
+	sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
+	sreq->sr_cmnd[1] = (sdev->scsi_level <= SCSI_2) ? (sdev->lun << 5) : 0;
+	sreq->sr_cmnd[2] = 0;
+	sreq->sr_cmnd[3] = 0;
+	sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
+	sreq->sr_cmnd[5] = 0;
+	sreq->sr_data_direction = SCSI_DATA_NONE;
+	sreq->sr_bufflen = 0;
+	sreq->sr_buffer = NULL;
+	sreq->sr_allowed = 5;
+	sreq->sr_done = scsi_eh_lock_done;
+	sreq->sr_timeout_per_command = 10 * HZ;
+	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
+
+	scsi_insert_special_req(sreq, 1);
+}
+
+
+/**
  * scsi_restart_operations - restart io operations to the specified host.
  * @shost:	Host we are restarting.
  *
@@ -1374,6 +1445,15 @@
 	unsigned long flags;
 
 	ASSERT_LOCK(shost->host_lock, 0);
+
+	/*
+	 * If the door was locked, we need to insert a door lock request
+	 * onto the head of the SCSI request queue for the device.  There
+	 * is no point trying to lock the door of an off-line device.
+	 */
+	for (sdev = shost->host_queue; sdev; sdev = sdev->next)
+		if (sdev->online && sdev->locked)
+			scsi_eh_lock_door(sdev);
 
 	/*
 	 * next free up anything directly waiting upon the host.  this
diff -Nru a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
--- a/drivers/scsi/scsi_ioctl.c	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/scsi_ioctl.c	Fri Oct  4 08:59:02 2002
@@ -151,6 +151,29 @@
 	return result;
 }
 
+int scsi_set_medium_removal(Scsi_Device *dev, char state)
+{
+	char scsi_cmd[MAX_COMMAND_SIZE];
+	int ret;
+
+	if (!dev->removable || !dev->lockable)
+	       return 0;
+
+	scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+	scsi_cmd[1] = (dev->scsi_level <= SCSI_2) ? (dev->lun << 5) : 0;
+	scsi_cmd[2] = 0;
+	scsi_cmd[3] = 0;
+	scsi_cmd[4] = state;
+	scsi_cmd[5] = 0;
+
+	ret = ioctl_internal_command(dev, scsi_cmd, IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+
+	if (ret == 0)
+		dev->locked = state == SCSI_REMOVAL_PREVENT;
+
+	return ret;
+}
+
 /*
  * This interface is deprecated - users should use the scsi generic (sg)
  * interface instead, as this is a more flexible approach to performing
@@ -448,24 +471,9 @@
 		return scsi_ioctl_send_command((Scsi_Device *) dev,
 					     (Scsi_Ioctl_Command *) arg);
 	case SCSI_IOCTL_DOORLOCK:
-		if (!dev->removable || !dev->lockable)
-			return 0;
-		scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
-		scsi_cmd[1] = cmd_byte1;
-		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
-		scsi_cmd[4] = SCSI_REMOVAL_PREVENT;
-		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
-				   IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
-		break;
+		return scsi_set_medium_removal(dev, SCSI_REMOVAL_PREVENT);
 	case SCSI_IOCTL_DOORUNLOCK:
-		if (!dev->removable || !dev->lockable)
-			return 0;
-		scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
-		scsi_cmd[1] = cmd_byte1;
-		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
-		scsi_cmd[4] = SCSI_REMOVAL_ALLOW;
-		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
-				   IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+		return scsi_set_medium_removal(dev, SCSI_REMOVAL_ALLOW);
 	case SCSI_IOCTL_TEST_UNIT_READY:
 		scsi_cmd[0] = TEST_UNIT_READY;
 		scsi_cmd[1] = cmd_byte1;
diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/scsi_lib.c	Fri Oct  4 08:59:02 2002
@@ -804,33 +804,6 @@
 			SDpnt->starved = 0;
 		}
 
- 		/*
-		 * FIXME(eric)
-		 * I am not sure where the best place to do this is.  We need
-		 * to hook in a place where we are likely to come if in user
-		 * space.   Technically the error handling thread should be
-		 * doing this crap, but the error handler isn't used by
-		 * most hosts.
-		 */
-		if (SDpnt->was_reset) {
-			/*
-			 * We need to relock the door, but we might
-			 * be in an interrupt handler.  Only do this
-			 * from user space, since we do not want to
-			 * sleep from an interrupt.
-			 *
-			 * FIXME(eric) - have the error handler thread do
-			 * this work.
-			 */
-			SDpnt->was_reset = 0;
-			if (SDpnt->removable && !in_interrupt()) {
-				spin_unlock_irq(q->queue_lock);
-				scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0);
-				spin_lock_irq(q->queue_lock);
-				continue;
-			}
-		}
-
 		/*
 		 * If we couldn't find a request that could be queued, then we
 		 * can also quit.
diff -Nru a/drivers/scsi/scsi_syms.c b/drivers/scsi/scsi_syms.c
--- a/drivers/scsi/scsi_syms.c	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/scsi_syms.c	Fri Oct  4 08:59:02 2002
@@ -54,6 +54,7 @@
 EXPORT_SYMBOL(print_Scsi_Cmnd);
 EXPORT_SYMBOL(scsi_block_when_processing_errors);
 EXPORT_SYMBOL(scsi_ioctl_send_command);
+EXPORT_SYMBOL(scsi_set_medium_removal);
 #if defined(CONFIG_SCSI_LOGGING)	/* { */
 EXPORT_SYMBOL(scsi_logging_level);
 #endif
diff -Nru a/drivers/scsi/sd.c b/drivers/scsi/sd.c
--- a/drivers/scsi/sd.c	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/sd.c	Fri Oct  4 08:59:02 2002
@@ -524,7 +524,7 @@
 	if (sdp->removable)
 		if (sdp->access_count==1)
 			if (scsi_block_when_processing_errors(sdp))
-				scsi_ioctl(sdp, SCSI_IOCTL_DOORLOCK, NULL);
+				scsi_set_medium_removal(sdp, SCSI_REMOVAL_PREVENT);
 
 	return 0;
 
@@ -568,7 +568,7 @@
 	if (sdp->removable) {
 		if (!sdp->access_count)
 			if (scsi_block_when_processing_errors(sdp))
-				scsi_ioctl(sdp, SCSI_IOCTL_DOORUNLOCK, NULL);
+				scsi_set_medium_removal(sdp, SCSI_REMOVAL_ALLOW);
 	}
 	if (sdp->host->hostt->module)
 		__MOD_DEC_USE_COUNT(sdp->host->hostt->module);
diff -Nru a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
--- a/drivers/scsi/sr_ioctl.c	Fri Oct  4 08:59:02 2002
+++ b/drivers/scsi/sr_ioctl.c	Fri Oct  4 08:59:02 2002
@@ -218,8 +218,8 @@
 {
 	Scsi_CD *cd = cdi->handle;
 
-	return scsi_ioctl(cd->device, lock ? SCSI_IOCTL_DOORLOCK :
-			SCSI_IOCTL_DOORUNLOCK, 0);
+	return scsi_set_medium_removal(cd->device, lock ?
+		       SCSI_REMOVAL_PREVENT : SCSI_REMOVAL_ALLOW);
 }
 
 int sr_drive_status(struct cdrom_device_info *cdi, int slot)
diff -Nru a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h
--- a/include/scsi/scsi_ioctl.h	Fri Oct  4 08:59:02 2002
+++ b/include/scsi/scsi_ioctl.h	Fri Oct  4 08:59:02 2002
@@ -39,10 +39,10 @@
 	unsigned char host_wwn[8]; // include NULL term.
 } Scsi_FCTargAddress;
 
-extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
-extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
-extern int scsi_ioctl_send_command(Scsi_Device *dev,
-				   Scsi_Ioctl_Command *arg);
+extern int scsi_ioctl (Scsi_Device *, int , void *);
+extern int kernel_scsi_ioctl (Scsi_Device *, int, void *);
+extern int scsi_ioctl_send_command(Scsi_Device *, Scsi_Ioctl_Command *);
+extern int scsi_set_medium_removal(Scsi_Device *, char);
 
 #endif
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2002-10-04 19:56 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-10-04 19:44 [PATCH] scsi error update 1/3 Mike Anderson
2002-10-04 19:56 ` [PATCH] scsi error update 2/3 (enh) Mike Anderson
2002-10-04 20:03   ` [PATCH] scsi error update 3/3 (door lck) Mike Anderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).