All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-scsi@vger.kernel.org
Cc: linux-ide@vger.kernel.org, jack_wang@usish.com
Subject: [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...)
Date: Mon, 09 Jan 2012 23:38:26 -0800	[thread overview]
Message-ID: <20120110073647.4563.7504.stgit@localhost6.localdomain6> (raw)

The latest version of the patch kit is available here:

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4

Changes since v3: http://marc.info/?l=linux-scsi&m=132581455214789&w=2

1/ null pointer regression smp_execute_task: make sure device is not
   removed from the domain while eh is in flight.  Fix up the logic that
   defers domain_device destruction to its own context.  Folded into:
   "libsas: prevent domain rediscovery competing with ata error handling"
   plus a small prep patch: "libsas: convert dev->gone to flags"

2/ lockdep regression in smp_execute_task: failed to release the lock
   before returning.  Folded into: "libsas: check for 'gone' expanders in
   smp_execute_task()"

3/ unnecessary locking in isci_ata_check_ready().  Folded into: "isci:
   ->lldd_ata_check_ready handler"

4/ two new fixes follow in separate mails
   [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race
   [PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter

Incremental diff: libsas-eh-reworks-v3..libsas-eh-reworks-v4

diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index e795645..eca9ba0 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -1681,10 +1681,7 @@ int isci_ata_check_ready(struct domain_device *dev)
 	if (test_bit(IPORT_RESET_PENDING, &iport->state))
 		goto out;
 
-	/* snapshot active phy mask */
-	spin_lock_irqsave(&ihost->scic_lock, flags);
 	rc = !!iport->active_phy_mask;
-	spin_unlock_irqrestore(&ihost->scic_lock, flags);
  out:
 	isci_put_device(idev);
 
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index f90fdcf..a062adc 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -195,7 +195,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
 	spin_unlock(ap->lock);
 
 	/* If the device fell off, no sense in issuing commands */
-	if (dev->gone)
+	if (test_bit(SAS_DEV_GONE, &dev->state))
 		goto out;
 
 	task = sas_alloc_task(GFP_ATOMIC);
@@ -327,7 +327,7 @@ static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
 	struct domain_device *dev = ap->private_data;
 	struct sas_internal *i = dev_to_sas_internal(dev);
 
-	if (dev->gone)
+	if (test_bit(SAS_DEV_GONE, &dev->state))
 		return -ENODEV;
 
 	res = i->dft->lldd_I_T_nexus_reset(dev);
@@ -663,6 +663,11 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie)
 
 	ata_port_printk(ap, KERN_DEBUG, "sas eh calling libata port error handler");
 	ata_scsi_port_error_handler(ha->core.shost, ap);
+
+	/* tell scsi_block_when_processing_errors() waiters that we are
+	 * still making forward progress
+	 */
+	wake_up(&ha->core.shost->host_wait);
 }
 
 void sas_ata_strategy_handler(struct Scsi_Host *shost)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 0b7555d..dfb0250 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -269,17 +269,22 @@ static void sas_destruct_devices(struct work_struct *work)
 
 	clear_bit(DISCE_DESTRUCT, &port->disc.pending);
 
-	list_for_each_entry_safe(dev, n, &port->destroy_list, dev_list_node) {
+	list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) {
+		list_del_init(&dev->disco_list_node);
+
 		sas_remove_children(&dev->rphy->dev);
 		sas_rphy_delete(dev->rphy);
 		dev->rphy = NULL;
 		sas_unregister_common_dev(port, dev);
+
+		sas_put_device(dev);
 	}
 }
 
 void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
 {
-	if (!list_empty(&dev->disco_list_node)) {
+	if (!test_bit(SAS_DEV_DESTROY, &dev->state) &&
+	    !list_empty(&dev->disco_list_node)) {
 		/* this rphy never saw sas_rphy_add */
 		list_del_init(&dev->disco_list_node);
 		sas_rphy_free(dev->rphy);
@@ -287,13 +292,9 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
 		sas_unregister_common_dev(port, dev);
 	}
 
-	if (dev->rphy) {
+	if (dev->rphy && !test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) {
 		sas_rphy_unlink(dev->rphy);
-
-		spin_lock_irq(&port->dev_list_lock);
-		list_move_tail(&dev->dev_list_node, &port->destroy_list);
-		spin_unlock_irq(&port->dev_list_lock);
-
+		list_move_tail(&dev->disco_list_node, &port->destroy_list);
 		sas_discover_event(dev->port, DISCE_DESTRUCT);
 	}
 }
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index e47599b..68a80a0 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -74,8 +74,10 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
 
 	mutex_lock(&dev->ex_dev.cmd_mutex);
 	for (retry = 0; retry < 3; retry++) {
-		if (dev->gone)
-			return -ECOMM;
+		if (test_bit(SAS_DEV_GONE, &dev->state)) {
+			res = -ECOMM;
+			break;
+		}
 
 		task = sas_alloc_task(GFP_KERNEL);
 		if (!task) {
@@ -1794,7 +1796,7 @@ static void sas_unregister_ex_tree(struct asd_sas_port *port, struct domain_devi
 	struct domain_device *child, *n;
 
 	list_for_each_entry_safe(child, n, &ex->children, siblings) {
-		child->gone = 1;
+		set_bit(SAS_DEV_GONE, &child->state);
 		if (child->dev_type == EDGE_DEV ||
 		    child->dev_type == FANOUT_DEV)
 			sas_unregister_ex_tree(port, child);
@@ -1815,7 +1817,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 			&ex_dev->children, siblings) {
 			if (SAS_ADDR(child->sas_addr) ==
 			    SAS_ADDR(phy->attached_sas_addr)) {
-				child->gone = 1;
+				set_bit(SAS_DEV_GONE, &child->state);
 				if (child->dev_type == EDGE_DEV ||
 				    child->dev_type == FANOUT_DEV)
 					sas_unregister_ex_tree(parent->port, child);
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 36e2905..31adcd1 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -168,7 +168,7 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
 
 	if (port->num_phys == 1) {
 		if (dev && gone)
-			dev->gone = 1;
+			set_bit(SAS_DEV_GONE, &dev->state);
 		sas_unregister_domain_devices(port);
 		sas_port_delete(port->port);
 		port->port = NULL;
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 59a227d..731c892 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -208,7 +208,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	int res = 0;
 
 	/* If the device fell off, no sense in issuing commands */
-	if (dev->gone) {
+	if (test_bit(SAS_DEV_GONE, &dev->state)) {
 		cmd->result = DID_BAD_TARGET << 16;
 		goto out_done;
 	}
@@ -249,8 +249,8 @@ out_done:
 
 static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 {
-	struct sas_task *task = TO_SAS_TASK(cmd);
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
+	struct sas_task *task = TO_SAS_TASK(cmd);
 
 	/* At this point, we only get called following an actual abort
 	 * of the task, so we should be guaranteed not to be racing with
@@ -267,9 +267,9 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 
 static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
 {
-	struct sas_task *task = TO_SAS_TASK(cmd);
-	struct domain_device *dev = task->dev;
+	struct domain_device *dev = cmd_to_domain_dev(cmd);
 	struct sas_ha_struct *ha = dev->port->ha;
+	struct sas_task *task = TO_SAS_TASK(cmd);
 
 	if (!dev_is_sata(dev)) {
 		sas_eh_finish_cmd(cmd);
@@ -530,8 +530,9 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
 	struct sas_internal *i = to_sas_internal(shost->transportt);
 	unsigned long flags;
 	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+	LIST_HEAD(done);
 
-Again:
+	/* clean out any commands that won the completion vs eh race */
 	list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
 		struct domain_device *dev = cmd_to_domain_dev(cmd);
 		struct sas_task *task;
@@ -545,7 +546,12 @@ Again:
 		spin_unlock_irqrestore(&dev->done_lock, flags);
 
 		if (!task)
-			continue;
+			list_move_tail(&cmd->eh_entry, &done);
+	}
+
+ Again:
+	list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
+		struct sas_task *task = TO_SAS_TASK(cmd);
 
 		list_del_init(&cmd->eh_entry);
 
@@ -649,15 +655,16 @@ Again:
 			goto clear_q;
 		}
 	}
+ out:
+	list_splice_tail(&done, work_q);
 	list_splice_tail_init(&ha->eh_ata_q, work_q);
 	return list_empty(work_q);
-clear_q:
+
+ clear_q:
 	SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__);
 	list_for_each_entry_safe(cmd, n, work_q, eh_entry)
 		sas_eh_finish_cmd(cmd);
-
-	list_splice_tail_init(&ha->eh_ata_q, work_q);
-	return list_empty(work_q);
+	goto out;
 }
 
 void sas_scsi_recover_host(struct Scsi_Host *shost)
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index feb61a8..55bab86 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -174,7 +174,11 @@ struct sata_device {
 	struct ata_taskfile tf;
 };
 
-/* ---------- Domain device ---------- */
+enum {
+	SAS_DEV_GONE,
+	SAS_DEV_DESTROY,
+};
+
 struct domain_device {
 	spinlock_t done_lock;
         enum sas_dev_type dev_type;
@@ -191,7 +195,7 @@ struct domain_device {
 	struct sas_phy *phy;
 
         struct list_head dev_list_node;
-	struct list_head disco_list_node;
+	struct list_head disco_list_node; /* awaiting probe or destruct */
 
         enum sas_protocol    iproto;
         enum sas_protocol    tproto;
@@ -209,7 +213,7 @@ struct domain_device {
         };
 
         void *lldd_dev;
-	int gone;
+	unsigned long state;
 	struct kref kref;
 };
 

The following changes since commit 5c41dc3a79150e93e5d050871a10b761be8281a1:

  [SCSI] lpfc 8.3.28: Update driver version to 8.3.28 (2011-12-15 10:57:45 +0400)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4

Dan Williams (36):
      libsas: remove unused ata_task_resp fields
      libsas: kill sas_slave_destroy
      libsas: fix domain_device leak
      libsas: fix leak of dev->sata_dev.identify_[packet_]device
      libsas: replace event locks with atomic bitops
      libsas: convert ha->state to flags
      libsas: introduce sas_drain_work()
      libsas: remove ata_port.lock management duties from lldds
      libsas: convert dev->gone to flags
      libsas: prevent domain rediscovery competing with ata error handling
      libsas: use ->set_dmamode to notify lldds of NCQ parameters
      libsas: kill invocation of scsi_eh_finish_cmd from sas_ata_task_done
      libsas: close error handling vs sas_ata_task_done() race
      libsas: prevent double completion of scmds from eh
      libsas: fix timeout vs completion race
      libsas: let libata handle command timeouts
      libsas: defer SAS_TASK_NEED_DEV_RESET commands to libata
      libsas: use libata-eh-reset for sata rediscovery fis transmit failures
      libsas: perform sas-transport resets in shost->workq context
      libsas: execute transport link resets with libata-eh via host workqueue
      libsas: sas_phy_enable via transport_sas_phy_reset
      libsas: async ata-eh
      libsas: poll for ata device readiness after reset
      libsas: don't mark expanders as gone when a child device is removed
      libsas: check for 'gone' expanders in smp_execute_task()
      libsas: fix sas_find_local_phy(), take phy references
      libsas: don't recover 'gone' devices in sas_ata_hard_reset()
      isci: kill iphy->isci_port lookups
      isci: kill isci_port->status
      isci: fix interpretation of "hard" reset
      isci: stop interpreting ->lldd_lu_reset() as an ata soft-reset
      isci: ->lldd_ata_check_ready handler
      isci: remove bus and reset handlers
      isci: remove IDEV_EH hack to disable "discovery-time" ata resets
      libsas: pre-clean commands that won the eh vs completion race
      libsas: feed the scsi_block_when_processing_errors() meter

Jeff Skirvin (2):
      libsas: Remove redundant phy state notification calls.
      libsas: add mutex for SMP task execution

 Documentation/scsi/libsas.txt       |   15 -
 drivers/ata/libata-eh.c             |    1 +
 drivers/ata/libata.h                |    1 -
 drivers/scsi/aic94xx/aic94xx.h      |    2 +
 drivers/scsi/aic94xx/aic94xx_dev.c  |   38 ++-
 drivers/scsi/aic94xx/aic94xx_init.c |    5 +-
 drivers/scsi/aic94xx/aic94xx_tmf.c  |    9 +-
 drivers/scsi/isci/host.c            |    8 +-
 drivers/scsi/isci/host.h            |   19 +-
 drivers/scsi/isci/init.c            |   13 +-
 drivers/scsi/isci/phy.c             |   18 +-
 drivers/scsi/isci/phy.h             |    1 -
 drivers/scsi/isci/port.c            |  217 ++++++------
 drivers/scsi/isci/port.h            |   11 +-
 drivers/scsi/isci/remote_device.c   |   32 +--
 drivers/scsi/isci/remote_device.h   |    7 +-
 drivers/scsi/isci/request.c         |  198 +----------
 drivers/scsi/isci/request.h         |    9 +-
 drivers/scsi/isci/task.c            |  158 ++-------
 drivers/scsi/isci/task.h            |   40 --
 drivers/scsi/libsas/sas_ata.c       |  692 +++++++++++++++--------------------
 drivers/scsi/libsas/sas_discover.c  |  152 +++++++--
 drivers/scsi/libsas/sas_event.c     |   89 +++++-
 drivers/scsi/libsas/sas_expander.c  |  113 ++++--
 drivers/scsi/libsas/sas_init.c      |  192 +++++++++-
 drivers/scsi/libsas/sas_internal.h  |   73 ++--
 drivers/scsi/libsas/sas_phy.c       |   12 +-
 drivers/scsi/libsas/sas_port.c      |   26 +-
 drivers/scsi/libsas/sas_scsi_host.c |  320 +++++++---------
 drivers/scsi/mvsas/mv_init.c        |    1 -
 drivers/scsi/mvsas/mv_sas.c         |   11 +-
 drivers/scsi/pm8001/pm8001_init.c   |    1 -
 drivers/scsi/pm8001/pm8001_sas.c    |   29 +-
 drivers/scsi/scsi_transport_sas.c   |   59 +++-
 include/linux/libata.h              |    1 +
 include/scsi/libsas.h               |   67 ++--
 include/scsi/sas_ata.h              |   26 +-
 include/scsi/scsi_transport_sas.h   |   12 +-
 38 files changed, 1321 insertions(+), 1357 deletions(-)

             reply	other threads:[~2012-01-10  7:38 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-10  7:38 Dan Williams [this message]
2012-01-10  7:38 ` [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race Dan Williams
2012-01-10  7:38 ` [PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter Dan Williams
2012-01-10 19:29 ` [GIT PATCH v4 0/2] libsas: eh reworks (ata-eh vs discovery, races, ...) James Bottomley
2012-01-10 20:18   ` Dan Williams
2012-01-16  8:40     ` James Bottomley
2012-01-16 17:00       ` Dan Williams
2012-01-16 20:24         ` James Bottomley
2012-01-16 20:44           ` Dan Williams
2012-01-13  0:57 ` Jack Wang
2012-01-13  1:21   ` Dan Williams
2012-01-13  1:37     ` Jack Wang
2012-01-13  1:51       ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120110073647.4563.7504.stgit@localhost6.localdomain6 \
    --to=dan.j.williams@intel.com \
    --cc=jack_wang@usish.com \
    --cc=linux-ide@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.