linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: James Smart <jsmart2021@gmail.com>
To: linux-scsi@vger.kernel.org
Cc: James Smart <jsmart2021@gmail.com>,
	Dick Kennedy <dick.kennedy@broadcom.com>
Subject: [PATCH v2 10/15] lpfc: Fix NVME recovery after mailbox timeout
Date: Mon,  4 Jan 2021 10:02:35 -0800	[thread overview]
Message-ID: <20210104180240.46824-11-jsmart2021@gmail.com> (raw)
In-Reply-To: <20210104180240.46824-1-jsmart2021@gmail.com>

If a mailbox command times out, the SLI port is deemd in error and the
port is reset.  The hba cleanup is not returning I/O's to the NVMe layer
before the port is unregistered. This is due to the hba being marked
offline (!SLI_ACTIVE) and cleanup being done by the mailbox timeout
handler rather than an general adapter reset routine.  The mailbox timeout
handler mailbox handler only cleaned up scsi ios.

Fix by reworking the mailbox handler to:
- After handling the mailbox error, detect the board is already in
  failure (may be due to another error), and leave cleanup to the
  other handler.
- If the mailbox command timeout is initial detector of the port error,
  continue with the board cleanup and marking the adapter offline
  (!SLI_ACTIVE). Remove the scsi-only io cleanup routine. The generic
  reset adapter routine that is subsequently invoked, will clean up the
  ios.
- Have the reset adapter routine flush all nvme and scsi ios if the
  adapter has been marked failed (!SLI_ACTIVE).
- Rework the nvme io terminate routine to take a status code to fail
  the io with and update so that cleaned up io calls the wqe completion
  routine. Currently it is bypassing the wqe cleanup and calling the nvme
  io completion directly. The wqe completion routine will take care of
  data structure and node cleanup then call the nvme io completion
  handler.

Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
---
 drivers/scsi/lpfc/lpfc_crtn.h |  4 ++--
 drivers/scsi/lpfc/lpfc_init.c |  8 ++++++--
 drivers/scsi/lpfc/lpfc_nvme.c | 33 +++++++++++++++++----------------
 drivers/scsi/lpfc/lpfc_sli.c  | 20 ++++++++++++--------
 4 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index f78e52a18b0b..e70db9ec7da4 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -255,7 +255,6 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
 int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			       struct fc_frame_header *fc_hdr);
 void lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, struct lpfc_queue *wq);
-void lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba);
 void lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
@@ -598,7 +597,8 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
 void lpfc_io_ktime(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd);
 void lpfc_wqe_cmd_template(void);
 void lpfc_nvmet_cmd_template(void);
-void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn);
+void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
+			   uint32_t stat, uint32_t param);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
 extern int lpfc_no_hba_reset_cnt;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index af926768bcae..c2619d56be12 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -6191,10 +6191,14 @@ lpfc_reset_hba(struct lpfc_hba *phba)
 		phba->link_state = LPFC_HBA_ERROR;
 		return;
 	}
-	if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
+
+	/* If not LPFC_SLI_ACTIVE, force all IO to be flushed */
+	if (phba->sli.sli_flag & LPFC_SLI_ACTIVE) {
 		lpfc_offline_prep(phba, LPFC_MBX_WAIT);
-	else
+	} else {
 		lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
+		lpfc_sli_flush_io_rings(phba);
+	}
 	lpfc_offline(phba);
 	lpfc_sli_brdrestart(phba);
 	lpfc_online(phba);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index fd4a1cf0e4a6..e72c4cd3a97a 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2596,14 +2596,17 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
 }
 
 void
-lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn)
+lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
+		      uint32_t stat, uint32_t param)
 {
 #if (IS_ENABLED(CONFIG_NVME_FC))
 	struct lpfc_io_buf *lpfc_ncmd;
 	struct nvmefc_fcp_req *nCmd;
-	struct lpfc_nvme_fcpreq_priv *freqpriv;
+	struct lpfc_wcqe_complete wcqe;
+	struct lpfc_wcqe_complete *wcqep = &wcqe;
 
-	if (!pwqeIn->context1) {
+	lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
+	if (!lpfc_ncmd) {
 		lpfc_sli_release_iocbq(phba, pwqeIn);
 		return;
 	}
@@ -2613,31 +2616,29 @@ lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn)
 		lpfc_sli_release_iocbq(phba, pwqeIn);
 		return;
 	}
-	lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
 
 	spin_lock(&lpfc_ncmd->buf_lock);
-	if (!lpfc_ncmd->nvmeCmd) {
+	nCmd = lpfc_ncmd->nvmeCmd;
+	if (!nCmd) {
 		spin_unlock(&lpfc_ncmd->buf_lock);
 		lpfc_release_nvme_buf(phba, lpfc_ncmd);
 		return;
 	}
+	spin_unlock(&lpfc_ncmd->buf_lock);
 
-	nCmd = lpfc_ncmd->nvmeCmd;
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
 			"6194 NVME Cancel xri %x\n",
 			lpfc_ncmd->cur_iocbq.sli4_xritag);
 
-	nCmd->transferred_length = 0;
-	nCmd->rcv_rsplen = 0;
-	nCmd->status = NVME_SC_INTERNAL;
-	freqpriv = nCmd->private;
-	freqpriv->nvme_buf = NULL;
-	lpfc_ncmd->nvmeCmd = NULL;
-
-	spin_unlock(&lpfc_ncmd->buf_lock);
-	nCmd->done(nCmd);
+	wcqep->word0 = 0;
+	bf_set(lpfc_wcqe_c_status, wcqep, stat);
+	wcqep->parameter = param;
+	wcqep->word3 = 0; /* xb is 0 */
 
 	/* Call release with XB=1 to queue the IO into the abort list. */
-	lpfc_release_nvme_buf(phba, lpfc_ncmd);
+	if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
+		bf_set(lpfc_wcqe_c_xb, wcqep, 1);
+
+	(pwqeIn->wqe_cmpl)(phba, pwqeIn, wcqep);
 #endif
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 735fa1d484eb..dedea5de7d78 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1532,15 +1532,19 @@ lpfc_sli_cancel_iocbs(struct lpfc_hba *phba, struct list_head *iocblist,
 
 	while (!list_empty(iocblist)) {
 		list_remove_head(iocblist, piocb, struct lpfc_iocbq, list);
-		if (!piocb->iocb_cmpl) {
+		if (piocb->wqe_cmpl) {
 			if (piocb->iocb_flag & LPFC_IO_NVME)
-				lpfc_nvme_cancel_iocb(phba, piocb);
+				lpfc_nvme_cancel_iocb(phba, piocb,
+						      ulpstatus, ulpWord4);
 			else
 				lpfc_sli_release_iocbq(phba, piocb);
-		} else {
+
+		} else if (piocb->iocb_cmpl) {
 			piocb->iocb.ulpStatus = ulpstatus;
 			piocb->iocb.un.ulpWord[4] = ulpWord4;
 			(piocb->iocb_cmpl) (phba, piocb, piocb);
+		} else {
+			lpfc_sli_release_iocbq(phba, piocb);
 		}
 	}
 	return;
@@ -8269,8 +8273,10 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 
 	struct lpfc_sli *psli = &phba->sli;
 
-	/* If the mailbox completed, process the completion and return */
-	if (lpfc_sli4_process_missed_mbox_completions(phba))
+	/* If the mailbox completed, process the completion */
+	lpfc_sli4_process_missed_mbox_completions(phba);
+
+	if (!(psli->sli_flag & LPFC_SLI_ACTIVE))
 		return;
 
 	if (pmbox != NULL)
@@ -8311,8 +8317,6 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
-	lpfc_sli_abort_fcp_rings(phba);
-
 	lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
 			"0345 Resetting board due to mailbox timeout\n");
 
@@ -11783,7 +11787,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
 	struct lpfc_io_buf *lpfc_cmd;
 	int rc = 1;
 
-	if (iocbq->vport != vport)
+	if (!iocbq || iocbq->vport != vport)
 		return rc;
 
 	if (!(iocbq->iocb_flag &  LPFC_IO_FCP) ||
-- 
2.26.2


  parent reply	other threads:[~2021-01-04 18:04 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-04 18:02 [PATCH v2 00/15] lpfc: Update lpfc to revision 12.8.0.7 James Smart
2021-01-04 18:02 ` [PATCH v2 01/15] lpfc: Fix PLOGI S_ID of 0 on pt2pt config James Smart
2021-01-04 18:02 ` [PATCH v2 02/15] lpfc: Fix auto sli_mode and its effect on CONFIG_PORT for SLI3 James Smart
2021-06-07 11:06   ` Daniel Wagner
2021-06-07 15:12     ` James Smart
2021-06-15 12:45       ` Daniel Wagner
2021-06-18  8:52         ` Daniel Wagner
2021-12-14 13:19           ` [PATCH] lpfc: Reintroduce old IRQ probe logic Daniel Wagner
2021-01-04 18:02 ` [PATCH v2 03/15] lpfc: Refresh ndlp when a new PRLI is received in the PRLI issue state James Smart
2021-01-04 18:02 ` [PATCH v2 04/15] lpfc: Fix crash when a fabric node is released prematurely James Smart
2021-01-04 18:02 ` [PATCH v2 05/15] lpfc: Use the nvme-fc transport supplied timeout for LS requests James Smart
2021-01-04 18:02 ` [PATCH v2 06/15] lpfc: Fix FW reset action if IOs are outstanding James Smart
2021-01-04 18:02 ` [PATCH v2 07/15] lpfc: Prevent duplicate requests to unregister with cpuhp framework James Smart
2021-01-04 18:02 ` [PATCH v2 08/15] lpfc: Fix error log messages being logged following scsi task mgnt James Smart
2021-01-04 18:02 ` [PATCH v2 09/15] lpfc: Fix target reset failing James Smart
2021-01-04 18:02 ` James Smart [this message]
2021-01-04 18:02 ` [PATCH v2 11/15] lpfc: Fix vport create logging James Smart
2021-01-04 18:02 ` [PATCH v2 12/15] lpfc: Fix crash when nvmet transport calls host_release James Smart
2021-01-04 18:02 ` [PATCH v2 13/15] lpfc: Implement health checking when aborting io James Smart
2021-01-04 18:02 ` [PATCH v2 14/15] lpfc: Enhancements to LOG_TRACE_EVENT for better readability James Smart
2021-01-04 18:02 ` [PATCH v2 15/15] lpfc: Update lpfc version to 12.8.0.7 James Smart
2021-01-08  4:02 ` [PATCH v2 00/15] lpfc: Update lpfc to revision 12.8.0.7 Martin K. Petersen
2021-01-13  5:48 ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210104180240.46824-11-jsmart2021@gmail.com \
    --to=jsmart2021@gmail.com \
    --cc=dick.kennedy@broadcom.com \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).