linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
To: linux-scsi@vger.kernel.org,
	James Bottomley <James.Bottomley@HansenPartnership.com>,
	"Nicholas A. Bellinger" <nab@linux-iscsi.org>,
	Brian King <brking@linux.vnet.ibm.com>,
	Ian Munsie <imunsie@au1.ibm.com>,
	Daniel Axtens <dja@ozlabs.au.ibm.com>,
	Andrew Donnellan <andrew.donnellan@au1.ibm.com>,
	Tomas Henzl <thenzl@redhat.com>,
	David Laight <David.Laight@ACULAB.COM>
Cc: Michael Neuling <mikey@neuling.org>,
	linuxppc-dev@lists.ozlabs.org,
	"Manoj N. Kumar" <manoj@linux.vnet.ibm.com>
Subject: [PATCH v3 13/32] cxlflash: Fix to avoid stall while waiting on TMF
Date: Thu, 24 Sep 2015 14:39:20 -0500	[thread overview]
Message-ID: <1443123560-17081-1-git-send-email-mrochs@linux.vnet.ibm.com> (raw)
In-Reply-To: <1443123193-16498-1-git-send-email-mrochs@linux.vnet.ibm.com>

Borrowing the TMF waitq's spinlock causes a stall condition when
waiting for the TMF to complete. To remedy, introduce our own spin
lock to serialize TMF and use the appropriate wait services.

Also add a timeout while waiting for a TMF completion. When a TMF
times out, report back a failure such that a bigger hammer reset
can occur.

Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
---
 drivers/scsi/cxlflash/common.h |  1 +
 drivers/scsi/cxlflash/main.c   | 55 +++++++++++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index b038ac7..7a0cb5c 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -124,6 +124,7 @@ struct cxlflash_cfg {
 	struct list_head lluns; /* list of llun_info structs */
 
 	wait_queue_head_t tmf_waitq;
+	spinlock_t tmf_slock;
 	bool tmf_active;
 	wait_queue_head_t reset_waitq;
 	enum cxlflash_state state;
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 527ff85..110037d 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -249,11 +249,10 @@ static void cmd_complete(struct afu_cmd *cmd)
 		scp->scsi_done(scp);
 
 		if (cmd_is_tmf) {
-			spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+			spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
 			cfg->tmf_active = false;
 			wake_up_all_locked(&cfg->tmf_waitq);
-			spin_unlock_irqrestore(&cfg->tmf_waitq.lock,
-					       lock_flags);
+			spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 		}
 	} else
 		complete(&cmd->cevent);
@@ -420,6 +419,7 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
 	struct device *dev = &cfg->dev->dev;
 	ulong lock_flags;
 	int rc = 0;
+	ulong to;
 
 	cmd = cmd_checkout(afu);
 	if (unlikely(!cmd)) {
@@ -428,15 +428,15 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
 		goto out;
 	}
 
-	/* If a Task Management Function is active, do not send one more.
-	 */
-	spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+	/* When Task Management Function is active do not send another */
+	spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
 	if (cfg->tmf_active)
-		wait_event_interruptible_locked_irq(cfg->tmf_waitq,
-						    !cfg->tmf_active);
+		wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+						  !cfg->tmf_active,
+						  cfg->tmf_slock);
 	cfg->tmf_active = true;
 	cmd->cmd_tmf = true;
-	spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+	spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
 	cmd->rcb.ctx_id = afu->ctx_hndl;
 	cmd->rcb.port_sel = port_sel;
@@ -457,15 +457,24 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
 	rc = send_cmd(afu, cmd);
 	if (unlikely(rc)) {
 		cmd_checkin(cmd);
-		spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+		spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
 		cfg->tmf_active = false;
-		spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+		spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 		goto out;
 	}
 
-	spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
-	wait_event_interruptible_locked_irq(cfg->tmf_waitq, !cfg->tmf_active);
-	spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+	spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
+	to = msecs_to_jiffies(5000);
+	to = wait_event_interruptible_lock_irq_timeout(cfg->tmf_waitq,
+						       !cfg->tmf_active,
+						       cfg->tmf_slock,
+						       to);
+	if (!to) {
+		cfg->tmf_active = false;
+		dev_err(dev, "%s: TMF timed out!\n", __func__);
+		rc = -1;
+	}
+	spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 out:
 	return rc;
 }
@@ -512,16 +521,17 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
 			    get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
 			    get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
 
-	/* If a Task Management Function is active, wait for it to complete
+	/*
+	 * If a Task Management Function is active, wait for it to complete
 	 * before continuing with regular commands.
 	 */
-	spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+	spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
 	if (cfg->tmf_active) {
-		spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+		spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 		rc = SCSI_MLQUEUE_HOST_BUSY;
 		goto out;
 	}
-	spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+	spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
 	switch (cfg->state) {
 	case STATE_RESET:
@@ -713,11 +723,12 @@ static void cxlflash_remove(struct pci_dev *pdev)
 	/* If a Task Management Function is active, wait for it to complete
 	 * before continuing with remove.
 	 */
-	spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+	spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
 	if (cfg->tmf_active)
-		wait_event_interruptible_locked_irq(cfg->tmf_waitq,
-						    !cfg->tmf_active);
-	spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+		wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+						  !cfg->tmf_active,
+						  cfg->tmf_slock);
+	spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
 	cfg->state = STATE_FAILTERM;
 	cxlflash_stop_term_user_contexts(cfg);
-- 
2.1.0

  parent reply	other threads:[~2015-09-24 19:40 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-24 19:33 [PATCH v3 00/32] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
2015-09-24 19:36 ` [PATCH v3 01/32] cxlflash: Fix to avoid invalid port_sel value Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 02/32] cxlflash: Replace magic numbers with literals Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 03/32] cxlflash: Fix read capacity timeout Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 04/32] cxlflash: Fix potential oops following LUN removal Matthew R. Ochs
2015-09-25  1:10   ` Brian King
2015-09-24 19:37 ` [PATCH v3 05/32] cxlflash: Fix data corruption when vLUN used over multiple cards Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 06/32] cxlflash: Fix to avoid sizeof(bool) Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 07/32] cxlflash: Fix context encode mask width Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 08/32] cxlflash: Fix to avoid CXL services during EEH Matthew R. Ochs
2015-09-25  1:23   ` Brian King
2015-09-25 17:09     ` Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 09/32] cxlflash: Correct naming of limbo state and waitq Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 10/32] cxlflash: Make functions static Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 11/32] cxlflash: Refine host/device attributes Matthew R. Ochs
2015-09-24 19:47   ` Matthew R. Ochs
2015-09-25  1:28   ` Brian King
2015-09-24 19:39 ` [PATCH v3 12/32] cxlflash: Fix to avoid spamming the kernel log Matthew R. Ochs
2015-09-24 19:39 ` Matthew R. Ochs [this message]
2015-09-25 20:32   ` [PATCH v3 13/32] cxlflash: Fix to avoid stall while waiting on TMF Brian King
2015-09-24 19:39 ` [PATCH v3 14/32] cxlflash: Fix location of setting resid Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 15/32] cxlflash: Fix host link up event handling Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 16/32] cxlflash: Fix async interrupt bypass logic Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 17/32] cxlflash: Remove dual port online dependency Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 18/32] cxlflash: Fix AFU version access/storage and add check Matthew R. Ochs
2015-09-24 19:40 ` [PATCH v3 19/32] cxlflash: Correct usage of scsi_host_put() Matthew R. Ochs
2015-09-25 20:35   ` Brian King
2015-09-24 19:41 ` [PATCH v3 20/32] cxlflash: Fix to prevent workq from accessing freed memory Matthew R. Ochs
2015-09-25 20:37   ` Brian King
2015-09-24 19:41 ` [PATCH v3 21/32] cxlflash: Correct behavior in device reset handler following EEH Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 22/32] cxlflash: Remove unnecessary scsi_block_requests Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 23/32] cxlflash: Fix function prolog parameters and return codes Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 24/32] cxlflash: Fix MMIO and endianness errors Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 25/32] cxlflash: Fix to prevent EEH recovery failure Matthew R. Ochs
2015-09-24 19:42 ` [PATCH v3 26/32] cxlflash: Correct spelling, grammar, and alignment mistakes Matthew R. Ochs
2015-09-24 19:42 ` [PATCH v3 27/32] cxlflash: Fix to prevent stale AFU RRQ Matthew R. Ochs
2015-09-24 19:42 ` [PATCH v3 28/32] cxlflash: Fix to avoid state change collision Matthew R. Ochs
2015-09-25 21:10   ` Brian King
2015-09-25 22:31     ` Matthew R. Ochs
2015-09-24 19:44 ` [PATCH v3 29/32] MAINTAINERS: Add cxlflash driver Matthew R. Ochs
2015-09-25  1:08   ` Andrew Donnellan
2015-09-24 19:44 ` [PATCH v3 30/32] cxlflash: Fix to double the delay each time Matthew R. Ochs
2015-09-25 21:12   ` Brian King
2015-09-24 19:44 ` [PATCH v3 31/32] cxlflash: Fix to avoid corrupting adapter fops Matthew R. Ochs
2015-09-25 21:23   ` Brian King
2015-09-25 22:35     ` Matthew R. Ochs
2015-09-24 19:44 ` [PATCH v3 32/32] cxlflash: Correct trace string Matthew R. Ochs
2015-09-25 21:24   ` Brian King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1443123560-17081-1-git-send-email-mrochs@linux.vnet.ibm.com \
    --to=mrochs@linux.vnet.ibm.com \
    --cc=David.Laight@ACULAB.COM \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=andrew.donnellan@au1.ibm.com \
    --cc=brking@linux.vnet.ibm.com \
    --cc=dja@ozlabs.au.ibm.com \
    --cc=imunsie@au1.ibm.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=manoj@linux.vnet.ibm.com \
    --cc=mikey@neuling.org \
    --cc=nab@linux-iscsi.org \
    --cc=thenzl@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).