All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Brace <don.brace@pmcs.com>
To: scott.teel@pmcs.com, Kevin.Barnett@pmcs.com,
	james.bottomley@parallels.com, hch@infradead.org,
	Justin.Lindley@pmcs.com, brace@pmcs.com
Cc: linux-scsi@vger.kernel.org
Subject: [PATCH v3 28/42] hpsa: don't return abort request until target is complete
Date: Tue, 17 Mar 2015 15:04:55 -0500	[thread overview]
Message-ID: <20150317200455.19856.62826.stgit@brunhilda> (raw)
In-Reply-To: <20150317200139.19856.87982.stgit@brunhilda>

From: Webb Scales <webbnh@hp.com>

Don't return from the abort request until the target command is complete.
Mark outstanding commands which have a pending abort, and do not send them
to the host if we can avoid it.

If the current command has been aborted, do not call the SCSI command
completion routine from the I/O path: when the abort returns successfully,
the SCSI mid-layer will handle the completion implicitly.

The following race was possible in theory.

1. LLD is requested to abort a scsi command
2. scsi command completes
3. The struct CommandList associated with 2 is made available.
4. new io request to LLD to another LUN re-uses struct CommandList
5. abort handler follows scsi_cmnd->host_scribble and
   finds struct CommandList and tries to aborts it.

Now we have aborted the wrong command.

Fix by resetting the scsi_cmd field of struct CommandList
upon completion and making the abort handler check that
the scsi_cmd pointer in the CommadList struct matches the
scsi_cmnd that it has been asked to abort.

Reviewed-by: Scott Teel <scott.teel@pmcs.com>
Reviewed-by: Kevin Barnett <kevin.barnett@pmcs.com>
Signed-off-by: Webb Scales <webbnh@hp.com>
Signed-off-by: Don Brace <don.brace@pmcs.com>
---
 drivers/scsi/hpsa.c     |  120 +++++++++++++++++++++++++++++++++++------------
 drivers/scsi/hpsa.h     |    1 
 drivers/scsi/hpsa_cmd.h |    2 +
 3 files changed, 93 insertions(+), 30 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index b0949f7..1cae336 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -195,6 +195,10 @@ static struct board_type products[] = {
 	{0xFFFF103C, "Unknown Smart Array", &SA5_access},
 };
 
+#define SCSI_CMD_BUSY ((struct scsi_cmnd *)&hpsa_cmd_busy)
+static const struct scsi_cmnd hpsa_cmd_busy;
+#define SCSI_CMD_IDLE ((struct scsi_cmnd *)&hpsa_cmd_idle)
+static const struct scsi_cmnd hpsa_cmd_idle;
 static int number_of_controllers;
 
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id);
@@ -270,6 +274,11 @@ static inline struct ctlr_info *shost_to_hba(struct Scsi_Host *sh)
 	return (struct ctlr_info *) *priv;
 }
 
+static inline bool hpsa_is_cmd_idle(struct CommandList *c)
+{
+	return c->scsi_cmd == SCSI_CMD_IDLE;
+}
+
 /* extract sense key, asc, and ascq from sense data.  -1 means invalid. */
 static void decode_sense_data(const u8 *sense_data, int sense_data_len,
 			int *sense_key, int *asc, int *ascq)
@@ -959,9 +968,11 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
 	}
 }
 
-static void enqueue_cmd_and_start_io(struct ctlr_info *h,
-					struct CommandList *c)
+static void enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c)
 {
+	if (unlikely(c->abort_pending))
+		return finish_cmd(c);
+
 	__enqueue_cmd_and_start_io(h, c, DEFAULT_REPLY_QUEUE);
 }
 
@@ -2010,9 +2021,36 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h,
 	return retry;	/* retry on raid path? */
 }
 
+static void hpsa_cmd_resolve_events(struct ctlr_info *h,
+		struct CommandList *c)
+{
+	/*
+	 * Prevent the following race in the abort handler:
+	 *
+	 * 1. LLD is requested to abort a SCSI command
+	 * 2. The SCSI command completes
+	 * 3. The struct CommandList associated with step 2 is made available
+	 * 4. New I/O request to LLD to another LUN re-uses struct CommandList
+	 * 5. Abort handler follows scsi_cmnd->host_scribble and
+	 *    finds struct CommandList and tries to aborts it
+	 * Now we have aborted the wrong command.
+	 *
+	 * Clear c->scsi_cmd here so that the abort handler will know this
+	 * command has completed.  Then, check to see if the abort handler is
+	 * waiting for this command, and, if so, wake it.
+	 */
+	c->scsi_cmd = SCSI_CMD_IDLE;
+	mb(); /* Ensure c->scsi_cmd is set to SCSI_CMD_IDLE */
+	if (c->abort_pending) {
+		c->abort_pending = false;
+		wake_up_all(&h->abort_sync_wait_queue);
+	}
+}
+
 static void hpsa_cmd_free_and_done(struct ctlr_info *h,
 		struct CommandList *c, struct scsi_cmnd *cmd)
 {
+	hpsa_cmd_resolve_events(h, c);
 	cmd_free(h, c);
 	cmd->scsi_done(cmd);
 }
@@ -2023,6 +2061,21 @@ static void hpsa_retry_cmd(struct ctlr_info *h, struct CommandList *c)
 	queue_work_on(raw_smp_processor_id(), h->resubmit_wq, &c->work);
 }
 
+static void hpsa_set_scsi_cmd_aborted(struct scsi_cmnd *cmd)
+{
+	cmd->result = DID_ABORT << 16;
+}
+
+static void hpsa_cmd_abort_and_free(struct ctlr_info *h, struct CommandList *c,
+				    struct scsi_cmnd *cmd)
+{
+	hpsa_set_scsi_cmd_aborted(cmd);
+	dev_warn(&h->pdev->dev, "CDB %16phN was aborted with status 0x%x\n",
+			 c->Request.CDB, c->err_info->ScsiStatus);
+	hpsa_cmd_resolve_events(h, c);
+	cmd_free(h, c);		/* FIX-ME:  change to cmd_tagged_free(h, c) */
+}
+
 static void process_ioaccel2_completion(struct ctlr_info *h,
 		struct CommandList *c, struct scsi_cmnd *cmd,
 		struct hpsa_scsi_dev_t *dev)
@@ -2034,6 +2087,10 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
 			c2->error_data.status == 0))
 		return hpsa_cmd_free_and_done(h, c, cmd);
 
+	/* don't requeue a command which is being aborted */
+	if (unlikely(c->abort_pending))
+		return hpsa_cmd_abort_and_free(h, c, cmd);
+
 	/*
 	 * Any RAID offload error results in retry which will use
 	 * the normal I/O path so the controller can handle whatever's
@@ -2155,10 +2212,14 @@ static void complete_scsi_command(struct CommandList *cp)
 		if (is_logical_dev_addr_mode(dev->scsi3addr)) {
 			if (ei->CommandStatus == CMD_IOACCEL_DISABLED)
 				dev->offload_enabled = 0;
-			return hpsa_retry_cmd(h, cp);
+			if (!cp->abort_pending)
+				return hpsa_retry_cmd(h, cp);
 		}
 	}
 
+	if (cp->abort_pending)
+		ei->CommandStatus = CMD_ABORTED;
+
 	/* an error has occurred */
 	switch (ei->CommandStatus) {
 
@@ -2246,10 +2307,8 @@ static void complete_scsi_command(struct CommandList *cp)
 			cp->Request.CDB);
 		break;
 	case CMD_ABORTED:
-		cmd->result = DID_ABORT << 16;
-		dev_warn(&h->pdev->dev, "CDB %16phN was aborted with status 0x%x\n",
-				cp->Request.CDB, ei->ScsiStatus);
-		break;
+		/* Return now to avoid calling scsi_done(). */
+		return hpsa_cmd_abort_and_free(h, cp, cmd);
 	case CMD_ABORT_FAILED:
 		cmd->result = DID_ERROR << 16;
 		dev_warn(&h->pdev->dev, "CDB %16phN : abort failed\n",
@@ -4485,6 +4544,7 @@ static void hpsa_cmd_init(struct ctlr_info *h, int index,
 	c->ErrDesc.Addr = cpu_to_le64((u64) err_dma_handle);
 	c->ErrDesc.Len = cpu_to_le32((u32) sizeof(*c->err_info));
 	c->h = h;
+	c->scsi_cmd = SCSI_CMD_IDLE;
 }
 
 static void hpsa_preinitialize_commands(struct ctlr_info *h)
@@ -4548,6 +4608,8 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
 		cmd->result = DID_NO_CONNECT << 16;
 		return hpsa_cmd_free_and_done(c->h, c, cmd);
 	}
+	if (c->abort_pending)
+		return hpsa_cmd_abort_and_free(c->h, c, cmd);
 	if (c->cmd_type == CMD_IOACCEL2) {
 		struct ctlr_info *h = c->h;
 		struct io_accel2_cmd *c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
@@ -4973,8 +5035,7 @@ static void setup_ioaccel2_abort_cmd(struct CommandList *c, struct ctlr_info *h,
 	struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *) c2;
 	struct io_accel2_cmd *c2a =
 		&h->ioaccel2_cmd_pool[command_to_abort->cmdindex];
-	struct scsi_cmnd *scmd =
-		(struct scsi_cmnd *) command_to_abort->scsi_cmd;
+	struct scsi_cmnd *scmd = command_to_abort->scsi_cmd;
 	struct hpsa_scsi_dev_t *dev = scmd->device->hostdata;
 
 	/*
@@ -4989,6 +5050,8 @@ static void setup_ioaccel2_abort_cmd(struct CommandList *c, struct ctlr_info *h,
 				sizeof(ac->error_len));
 
 	c->cmd_type = IOACCEL2_TMF;
+	c->scsi_cmd = SCSI_CMD_BUSY;
+
 	/* Adjust the DMA address to point to the accelerated command buffer */
 	c->busaddr = (u32) h->ioaccel2_cmd_pool_dhandle +
 				(c->cmdindex * sizeof(struct io_accel2_cmd));
@@ -5182,7 +5245,7 @@ static inline int wait_for_available_abort_cmd(struct ctlr_info *h)
 static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 {
 
-	int i, rc;
+	int rc;
 	struct ctlr_info *h;
 	struct hpsa_scsi_dev_t *dev;
 	struct CommandList *abort; /* pointer to command to be aborted */
@@ -5256,6 +5319,16 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 		return FAILED;
 	}
 
+	/*
+	 * Check that we're aborting the right command.
+	 * It's possible the CommandList already completed and got re-used.
+	 */
+	if (abort->scsi_cmd != sc) {
+		cmd_free(h, abort);
+		return SUCCESS;
+	}
+
+	abort->abort_pending = true;
 	hpsa_get_tag(h, abort, &taglower, &tagupper);
 	reply_queue = hpsa_extract_reply_queue(h, abort);
 	ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
@@ -5288,27 +5361,10 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 		return FAILED;
 	}
 	dev_info(&h->pdev->dev, "%s SENT, SUCCESS\n", msg);
-
-	/*
-	 * If the abort(s) above completed and actually aborted the
-	 * command, then the command to be aborted should already be
-	 * completed.  If not, wait around a bit more to see if they
-	 * manage to complete normally.
-	 */
-#define ABORT_COMPLETE_WAIT_SECS 30
-	for (i = 0; i < ABORT_COMPLETE_WAIT_SECS * 10; i++) {
-		refcount = atomic_read(&abort->refcount);
-		if (refcount < 2) {
-			cmd_free(h, abort);
-			return SUCCESS;
-		} else {
-			msleep(100);
-		}
-	}
-	dev_warn(&h->pdev->dev, "%s FAILED. Aborted command has not completed after %d seconds.\n",
-		msg, ABORT_COMPLETE_WAIT_SECS);
+	wait_event(h->abort_sync_wait_queue,
+		   abort->scsi_cmd != sc || lockup_detected(h));
 	cmd_free(h, abort);
-	return FAILED;
+	return !lockup_detected(h) ? SUCCESS : FAILED;
 }
 
 /*
@@ -5554,6 +5610,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 
 	/* Fill in the command type */
 	c->cmd_type = CMD_IOCTL_PEND;
+	c->scsi_cmd = SCSI_CMD_BUSY;
 	/* Fill in Command Header */
 	c->Header.ReplyQueue = 0; /* unused in simple mode */
 	if (iocommand.buf_size > 0) {	/* buffer to fill */
@@ -5687,6 +5744,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	c = cmd_alloc(h);
 
 	c->cmd_type = CMD_IOCTL_PEND;
+	c->scsi_cmd = SCSI_CMD_BUSY;
 	c->Header.ReplyQueue = 0;
 	c->Header.SGList = (u8) sg_used;
 	c->Header.SGTotal = cpu_to_le16(sg_used);
@@ -5829,6 +5887,7 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	u64 tag; /* for commands to be aborted */
 
 	c->cmd_type = CMD_IOCTL_PEND;
+	c->scsi_cmd = SCSI_CMD_BUSY;
 	c->Header.ReplyQueue = 0;
 	if (buff != NULL && size > 0) {
 		c->Header.SGList = 1;
@@ -7622,6 +7681,7 @@ reinit_after_soft_reset:
 		goto clean5;	/* cmd, irq, pci, lockup, wq/aer/h */
 	init_waitqueue_head(&h->scan_wait_queue);
 	init_waitqueue_head(&h->abort_cmd_wait_queue);
+	init_waitqueue_head(&h->abort_sync_wait_queue);
 	h->scan_finished = 1; /* no scan currently in progress */
 
 	pci_set_drvdata(pdev, h);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 28b5d79..7cb8586 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -266,6 +266,7 @@ struct ctlr_info {
 	struct workqueue_struct *rescan_ctlr_wq;
 	atomic_t abort_cmds_available;
 	wait_queue_head_t abort_cmd_wait_queue;
+	wait_queue_head_t abort_sync_wait_queue;
 };
 
 struct offline_device_entry {
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 3719592..f986402 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -439,6 +439,8 @@ struct CommandList {
 	 * not used.
 	 */
 	struct hpsa_scsi_dev_t *phys_disk;
+
+	int abort_pending;
 	atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */
 } __aligned(COMMANDLIST_ALIGNMENT);
 


  parent reply	other threads:[~2015-03-17 20:06 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-17 20:02 [PATCH v3 00/42] hpsa updates Don Brace
2015-03-17 20:02 ` [PATCH v3 01/42] hpsa: add masked physical devices into h->dev[] array Don Brace
2015-03-17 20:02 ` [PATCH v3 02/42] hpsa: clean up host, channel, target, lun prints Don Brace
2015-03-17 20:02 ` [PATCH v3 03/42] hpsa: rework controller command submission Don Brace
2015-03-27 15:11   ` Tomas Henzl
2015-03-27 18:04     ` brace
2015-03-17 20:02 ` [PATCH v3 04/42] hpsa: clean up aborts Don Brace
2015-03-17 20:02 ` [PATCH v3 05/42] hpsa: decrement h->commands_outstanding in fail_all_outstanding_cmds Don Brace
2015-04-02 13:33   ` Tomas Henzl
2015-03-17 20:02 ` [PATCH v3 06/42] hpsa: hpsa decode sense data for io and tmf Don Brace
2015-03-17 20:03 ` [PATCH v3 07/42] hpsa: allow lockup detected to be viewed via sysfs Don Brace
2015-03-17 20:03 ` [PATCH v3 08/42] hpsa: make function names consistent Don Brace
2015-03-17 20:03 ` [PATCH v3 09/42] hpsa: factor out hpsa_init_cmd function Don Brace
2015-03-17 20:03 ` [PATCH v3 10/42] hpsa: do not ignore return value of hpsa_register_scsi Don Brace
2015-03-17 20:03 ` [PATCH v3 11/42] hpsa: try resubmitting down raid path on task set full Don Brace
2015-03-17 20:03 ` [PATCH v3 12/42] hpsa: factor out hpsa_ioaccel_submit function Don Brace
2015-03-17 20:03 ` [PATCH v3 13/42] hpsa: print accurate SSD Smart Path Enabled status Don Brace
2015-03-17 20:03 ` [PATCH v3 14/42] hpsa: use ioaccel2 path to submit IOs to physical drives in HBA mode Don Brace
2015-03-17 20:03 ` [PATCH v3 15/42] hpsa: Get queue depth from identify physical bmic for physical disks Don Brace
2015-03-17 20:03 ` [PATCH v3 16/42] hpsa: break hpsa_free_irqs_and_disable_msix into two functions Don Brace
2015-03-17 20:03 ` [PATCH v3 17/42] hpsa: clean up error handling Don Brace
2015-03-17 20:04 ` [PATCH v3 18/42] hpsa: refactor freeing of resources into more logical functions Don Brace
2015-03-17 20:04 ` [PATCH v3 19/42] hpsa: add ioaccel sg chaining for the ioaccel2 path Don Brace
2015-03-17 20:04 ` [PATCH v3 20/42] hpsa: add more ioaccel2 error handling, including underrun statuses Don Brace
2015-03-17 20:04 ` [PATCH v3 21/42] hpsa: do not check cmd_alloc return value - it cannnot return NULL Don Brace
2015-03-17 20:04 ` [PATCH v3 22/42] hpsa: correct return values from driver functions Don Brace
2015-03-17 20:04 ` [PATCH v3 23/42] hpsa: clean up driver init Don Brace
2015-03-17 20:04 ` [PATCH v3 24/42] hpsa: clean up some error reporting output in abort handler Don Brace
2015-03-17 20:04 ` [PATCH v3 25/42] hpsa: do not print ioaccel2 warning messages about unusual completions Don Brace
2015-03-17 20:04 ` [PATCH v3 26/42] hpsa: add support sending aborts to physical devices via the ioaccel2 path Don Brace
2015-03-17 20:04 ` [PATCH v3 27/42] hpsa: use helper routines for finishing commands Don Brace
2015-03-17 20:04 ` Don Brace [this message]
2015-03-17 20:05 ` [PATCH v3 29/42] hpsa: refactor and rework support for sending TEST_UNIT_READY Don Brace
2015-03-17 20:05 ` [PATCH v3 30/42] hpsa: performance tweak for hpsa_scatter_gather() Don Brace
2015-03-17 20:05 ` [PATCH v3 31/42] hpsa: call pci_release_regions after pci_disable_device Don Brace
2015-03-17 20:05 ` [PATCH v3 32/42] hpsa: skip free_irq calls if irqs are not allocated Don Brace
2015-03-17 20:05 ` [PATCH v3 33/42] hpsa: cleanup for init_one step 2 in kdump Don Brace
2015-03-17 20:05 ` [PATCH v3 34/42] hpsa: fix try_soft_reset error handling Don Brace
2015-03-17 20:05 ` [PATCH v3 35/42] hpsa: create workqueue after the driver is ready for use Don Brace
2015-03-17 20:06 ` [PATCH v3 36/42] hpsa: add interrupt number to /proc/interrupts interrupt name Don Brace
2015-03-17 20:06 ` [PATCH v3 37/42] hpsa: use block layer tag for command allocation Don Brace
2015-03-23 16:57   ` Tomas Henzl
     [not found]     ` <07F70BBF6832E34FA1C923241E8833AB486892F9@BBYEXM01.pmc-sierra.internal>
2015-03-25 18:33       ` Webb Scales
2015-03-26 12:47         ` Tomas Henzl
2015-03-26 14:38           ` Webb Scales
2015-03-26 15:10             ` Tomas Henzl
2015-03-26 15:18               ` Webb Scales
2015-04-10 15:13                 ` James Bottomley
2015-03-27 18:49     ` brace
2015-03-17 20:06 ` [PATCH v3 38/42] hpsa: use scsi host_no as hpsa controller number Don Brace
2015-03-17 20:07 ` [PATCH v3 39/42] hpsa: propagate the error code in hpsa_kdump_soft_reset Don Brace
2015-03-17 20:07 ` [PATCH v3 40/42] hpsa: cleanup reset Don Brace
2015-03-17 20:07 ` [PATCH v3 41/42] hpsa: change driver version Don Brace
2015-03-17 20:07 ` [PATCH v3 42/42] hpsa: add PMC to copyright Don Brace

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150317200455.19856.62826.stgit@brunhilda \
    --to=don.brace@pmcs.com \
    --cc=Justin.Lindley@pmcs.com \
    --cc=Kevin.Barnett@pmcs.com \
    --cc=brace@pmcs.com \
    --cc=hch@infradead.org \
    --cc=james.bottomley@parallels.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=scott.teel@pmcs.com \
    --subject='Re: [PATCH v3 28/42] hpsa: don'\''t return abort request until target is complete' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.