All of lore.kernel.org
 help / color / mirror / Atom feed
* [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...)
@ 2012-01-10  7:38 Dan Williams
  2012-01-10  7:38 ` [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race Dan Williams
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Dan Williams @ 2012-01-10  7:38 UTC (permalink / raw)
  To: linux-scsi; +Cc: linux-ide, jack_wang

The latest version of the patch kit is available here:

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4

Changes since v3: http://marc.info/?l=linux-scsi&m=132581455214789&w=2

1/ null pointer regression smp_execute_task: make sure device is not
   removed from the domain while eh is in flight.  Fix up the logic that
   defers domain_device destruction to its own context.  Folded into:
   "libsas: prevent domain rediscovery competing with ata error handling"
   plus a small prep patch: "libsas: convert dev->gone to flags"

2/ lockdep regression in smp_execute_task: failed to release the lock
   before returning.  Folded into: "libsas: check for 'gone' expanders in
   smp_execute_task()"

3/ unnecessary locking in isci_ata_check_ready().  Folded into: "isci:
   ->lldd_ata_check_ready handler"

4/ two new fixes follow in separate mails
   [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race
   [PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter

Incremental diff: libsas-eh-reworks-v3..libsas-eh-reworks-v4

diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index e795645..eca9ba0 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -1681,10 +1681,7 @@ int isci_ata_check_ready(struct domain_device *dev)
 	if (test_bit(IPORT_RESET_PENDING, &iport->state))
 		goto out;
 
-	/* snapshot active phy mask */
-	spin_lock_irqsave(&ihost->scic_lock, flags);
 	rc = !!iport->active_phy_mask;
-	spin_unlock_irqrestore(&ihost->scic_lock, flags);
  out:
 	isci_put_device(idev);
 
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index f90fdcf..a062adc 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -195,7 +195,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
 	spin_unlock(ap->lock);
 
 	/* If the device fell off, no sense in issuing commands */
-	if (dev->gone)
+	if (test_bit(SAS_DEV_GONE, &dev->state))
 		goto out;
 
 	task = sas_alloc_task(GFP_ATOMIC);
@@ -327,7 +327,7 @@ static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
 	struct domain_device *dev = ap->private_data;
 	struct sas_internal *i = dev_to_sas_internal(dev);
 
-	if (dev->gone)
+	if (test_bit(SAS_DEV_GONE, &dev->state))
 		return -ENODEV;
 
 	res = i->dft->lldd_I_T_nexus_reset(dev);
@@ -663,6 +663,11 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie)
 
 	ata_port_printk(ap, KERN_DEBUG, "sas eh calling libata port error handler");
 	ata_scsi_port_error_handler(ha->core.shost, ap);
+
+	/* tell scsi_block_when_processing_errors() waiters that we are
+	 * still making forward progress
+	 */
+	wake_up(&ha->core.shost->host_wait);
 }
 
 void sas_ata_strategy_handler(struct Scsi_Host *shost)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 0b7555d..dfb0250 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -269,17 +269,22 @@ static void sas_destruct_devices(struct work_struct *work)
 
 	clear_bit(DISCE_DESTRUCT, &port->disc.pending);
 
-	list_for_each_entry_safe(dev, n, &port->destroy_list, dev_list_node) {
+	list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) {
+		list_del_init(&dev->disco_list_node);
+
 		sas_remove_children(&dev->rphy->dev);
 		sas_rphy_delete(dev->rphy);
 		dev->rphy = NULL;
 		sas_unregister_common_dev(port, dev);
+
+		sas_put_device(dev);
 	}
 }
 
 void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
 {
-	if (!list_empty(&dev->disco_list_node)) {
+	if (!test_bit(SAS_DEV_DESTROY, &dev->state) &&
+	    !list_empty(&dev->disco_list_node)) {
 		/* this rphy never saw sas_rphy_add */
 		list_del_init(&dev->disco_list_node);
 		sas_rphy_free(dev->rphy);
@@ -287,13 +292,9 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
 		sas_unregister_common_dev(port, dev);
 	}
 
-	if (dev->rphy) {
+	if (dev->rphy && !test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) {
 		sas_rphy_unlink(dev->rphy);
-
-		spin_lock_irq(&port->dev_list_lock);
-		list_move_tail(&dev->dev_list_node, &port->destroy_list);
-		spin_unlock_irq(&port->dev_list_lock);
-
+		list_move_tail(&dev->disco_list_node, &port->destroy_list);
 		sas_discover_event(dev->port, DISCE_DESTRUCT);
 	}
 }
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index e47599b..68a80a0 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -74,8 +74,10 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
 
 	mutex_lock(&dev->ex_dev.cmd_mutex);
 	for (retry = 0; retry < 3; retry++) {
-		if (dev->gone)
-			return -ECOMM;
+		if (test_bit(SAS_DEV_GONE, &dev->state)) {
+			res = -ECOMM;
+			break;
+		}
 
 		task = sas_alloc_task(GFP_KERNEL);
 		if (!task) {
@@ -1794,7 +1796,7 @@ static void sas_unregister_ex_tree(struct asd_sas_port *port, struct domain_devi
 	struct domain_device *child, *n;
 
 	list_for_each_entry_safe(child, n, &ex->children, siblings) {
-		child->gone = 1;
+		set_bit(SAS_DEV_GONE, &child->state);
 		if (child->dev_type == EDGE_DEV ||
 		    child->dev_type == FANOUT_DEV)
 			sas_unregister_ex_tree(port, child);
@@ -1815,7 +1817,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 			&ex_dev->children, siblings) {
 			if (SAS_ADDR(child->sas_addr) ==
 			    SAS_ADDR(phy->attached_sas_addr)) {
-				child->gone = 1;
+				set_bit(SAS_DEV_GONE, &child->state);
 				if (child->dev_type == EDGE_DEV ||
 				    child->dev_type == FANOUT_DEV)
 					sas_unregister_ex_tree(parent->port, child);
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 36e2905..31adcd1 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -168,7 +168,7 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
 
 	if (port->num_phys == 1) {
 		if (dev && gone)
-			dev->gone = 1;
+			set_bit(SAS_DEV_GONE, &dev->state);
 		sas_unregister_domain_devices(port);
 		sas_port_delete(port->port);
 		port->port = NULL;
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 59a227d..731c892 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -208,7 +208,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	int res = 0;
 
 	/* If the device fell off, no sense in issuing commands */
-	if (dev->gone) {
+	if (test_bit(SAS_DEV_GONE, &dev->state)) {
 		cmd->result = DID_BAD_TARGET << 16;
 		goto out_done;
 	}
@@ -249,8 +249,8 @@ out_done:
 
 static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 {
-	struct sas_task *task = TO_SAS_TASK(cmd);
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
+	struct sas_task *task = TO_SAS_TASK(cmd);
 
 	/* At this point, we only get called following an actual abort
 	 * of the task, so we should be guaranteed not to be racing with
@@ -267,9 +267,9 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 
 static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
 {
-	struct sas_task *task = TO_SAS_TASK(cmd);
-	struct domain_device *dev = task->dev;
+	struct domain_device *dev = cmd_to_domain_dev(cmd);
 	struct sas_ha_struct *ha = dev->port->ha;
+	struct sas_task *task = TO_SAS_TASK(cmd);
 
 	if (!dev_is_sata(dev)) {
 		sas_eh_finish_cmd(cmd);
@@ -530,8 +530,9 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
 	struct sas_internal *i = to_sas_internal(shost->transportt);
 	unsigned long flags;
 	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+	LIST_HEAD(done);
 
-Again:
+	/* clean out any commands that won the completion vs eh race */
 	list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
 		struct domain_device *dev = cmd_to_domain_dev(cmd);
 		struct sas_task *task;
@@ -545,7 +546,12 @@ Again:
 		spin_unlock_irqrestore(&dev->done_lock, flags);
 
 		if (!task)
-			continue;
+			list_move_tail(&cmd->eh_entry, &done);
+	}
+
+ Again:
+	list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
+		struct sas_task *task = TO_SAS_TASK(cmd);
 
 		list_del_init(&cmd->eh_entry);
 
@@ -649,15 +655,16 @@ Again:
 			goto clear_q;
 		}
 	}
+ out:
+	list_splice_tail(&done, work_q);
 	list_splice_tail_init(&ha->eh_ata_q, work_q);
 	return list_empty(work_q);
-clear_q:
+
+ clear_q:
 	SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__);
 	list_for_each_entry_safe(cmd, n, work_q, eh_entry)
 		sas_eh_finish_cmd(cmd);
-
-	list_splice_tail_init(&ha->eh_ata_q, work_q);
-	return list_empty(work_q);
+	goto out;
 }
 
 void sas_scsi_recover_host(struct Scsi_Host *shost)
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index feb61a8..55bab86 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -174,7 +174,11 @@ struct sata_device {
 	struct ata_taskfile tf;
 };
 
-/* ---------- Domain device ---------- */
+enum {
+	SAS_DEV_GONE,
+	SAS_DEV_DESTROY,
+};
+
 struct domain_device {
 	spinlock_t done_lock;
         enum sas_dev_type dev_type;
@@ -191,7 +195,7 @@ struct domain_device {
 	struct sas_phy *phy;
 
         struct list_head dev_list_node;
-	struct list_head disco_list_node;
+	struct list_head disco_list_node; /* awaiting probe or destruct */
 
         enum sas_protocol    iproto;
         enum sas_protocol    tproto;
@@ -209,7 +213,7 @@ struct domain_device {
         };
 
         void *lldd_dev;
-	int gone;
+	unsigned long state;
 	struct kref kref;
 };
 

The following changes since commit 5c41dc3a79150e93e5d050871a10b761be8281a1:

  [SCSI] lpfc 8.3.28: Update driver version to 8.3.28 (2011-12-15 10:57:45 +0400)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4

Dan Williams (36):
      libsas: remove unused ata_task_resp fields
      libsas: kill sas_slave_destroy
      libsas: fix domain_device leak
      libsas: fix leak of dev->sata_dev.identify_[packet_]device
      libsas: replace event locks with atomic bitops
      libsas: convert ha->state to flags
      libsas: introduce sas_drain_work()
      libsas: remove ata_port.lock management duties from lldds
      libsas: convert dev->gone to flags
      libsas: prevent domain rediscovery competing with ata error handling
      libsas: use ->set_dmamode to notify lldds of NCQ parameters
      libsas: kill invocation of scsi_eh_finish_cmd from sas_ata_task_done
      libsas: close error handling vs sas_ata_task_done() race
      libsas: prevent double completion of scmds from eh
      libsas: fix timeout vs completion race
      libsas: let libata handle command timeouts
      libsas: defer SAS_TASK_NEED_DEV_RESET commands to libata
      libsas: use libata-eh-reset for sata rediscovery fis transmit failures
      libsas: perform sas-transport resets in shost->workq context
      libsas: execute transport link resets with libata-eh via host workqueue
      libsas: sas_phy_enable via transport_sas_phy_reset
      libsas: async ata-eh
      libsas: poll for ata device readiness after reset
      libsas: don't mark expanders as gone when a child device is removed
      libsas: check for 'gone' expanders in smp_execute_task()
      libsas: fix sas_find_local_phy(), take phy references
      libsas: don't recover 'gone' devices in sas_ata_hard_reset()
      isci: kill iphy->isci_port lookups
      isci: kill isci_port->status
      isci: fix interpretation of "hard" reset
      isci: stop interpreting ->lldd_lu_reset() as an ata soft-reset
      isci: ->lldd_ata_check_ready handler
      isci: remove bus and reset handlers
      isci: remove IDEV_EH hack to disable "discovery-time" ata resets
      libsas: pre-clean commands that won the eh vs completion race
      libsas: feed the scsi_block_when_processing_errors() meter

Jeff Skirvin (2):
      libsas: Remove redundant phy state notification calls.
      libsas: add mutex for SMP task execution

 Documentation/scsi/libsas.txt       |   15 -
 drivers/ata/libata-eh.c             |    1 +
 drivers/ata/libata.h                |    1 -
 drivers/scsi/aic94xx/aic94xx.h      |    2 +
 drivers/scsi/aic94xx/aic94xx_dev.c  |   38 ++-
 drivers/scsi/aic94xx/aic94xx_init.c |    5 +-
 drivers/scsi/aic94xx/aic94xx_tmf.c  |    9 +-
 drivers/scsi/isci/host.c            |    8 +-
 drivers/scsi/isci/host.h            |   19 +-
 drivers/scsi/isci/init.c            |   13 +-
 drivers/scsi/isci/phy.c             |   18 +-
 drivers/scsi/isci/phy.h             |    1 -
 drivers/scsi/isci/port.c            |  217 ++++++------
 drivers/scsi/isci/port.h            |   11 +-
 drivers/scsi/isci/remote_device.c   |   32 +--
 drivers/scsi/isci/remote_device.h   |    7 +-
 drivers/scsi/isci/request.c         |  198 +----------
 drivers/scsi/isci/request.h         |    9 +-
 drivers/scsi/isci/task.c            |  158 ++-------
 drivers/scsi/isci/task.h            |   40 --
 drivers/scsi/libsas/sas_ata.c       |  692 +++++++++++++++--------------------
 drivers/scsi/libsas/sas_discover.c  |  152 +++++++--
 drivers/scsi/libsas/sas_event.c     |   89 +++++-
 drivers/scsi/libsas/sas_expander.c  |  113 ++++--
 drivers/scsi/libsas/sas_init.c      |  192 +++++++++-
 drivers/scsi/libsas/sas_internal.h  |   73 ++--
 drivers/scsi/libsas/sas_phy.c       |   12 +-
 drivers/scsi/libsas/sas_port.c      |   26 +-
 drivers/scsi/libsas/sas_scsi_host.c |  320 +++++++---------
 drivers/scsi/mvsas/mv_init.c        |    1 -
 drivers/scsi/mvsas/mv_sas.c         |   11 +-
 drivers/scsi/pm8001/pm8001_init.c   |    1 -
 drivers/scsi/pm8001/pm8001_sas.c    |   29 +-
 drivers/scsi/scsi_transport_sas.c   |   59 +++-
 include/linux/libata.h              |    1 +
 include/scsi/libsas.h               |   67 ++--
 include/scsi/sas_ata.h              |   26 +-
 include/scsi/scsi_transport_sas.h   |   12 +-
 38 files changed, 1321 insertions(+), 1357 deletions(-)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-01-16 20:44 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-10  7:38 [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...) Dan Williams
2012-01-10  7:38 ` [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race Dan Williams
2012-01-10  7:38 ` [PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter Dan Williams
2012-01-10 19:29 ` [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...) James Bottomley
2012-01-10 20:18   ` Dan Williams
2012-01-16  8:40     ` James Bottomley
2012-01-16 17:00       ` Dan Williams
2012-01-16 20:24         ` James Bottomley
2012-01-16 20:44           ` Dan Williams
2012-01-13  0:57 ` Jack Wang
2012-01-13  1:21   ` Dan Williams
2012-01-13  1:37     ` Jack Wang
2012-01-13  1:51       ` Dan Williams

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.