All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nilesh Javali <njavali@marvell.com>
To: <martin.petersen@oracle.com>
Cc: <linux-scsi@vger.kernel.org>, <GR-QLogic-Storage-Upstream@marvell.com>
Subject: [PATCH v2] qla2xxx: add heartbeat check
Date: Fri, 18 Jun 2021 22:24:27 -0700	[thread overview]
Message-ID: <20210619052427.6440-1-njavali@marvell.com> (raw)

From: Quinn Tran <qutran@marvell.com>

Use 'no-op' mailbox command to check and see if FW is still responsive.

v2:
Change function name qla_do_hb to qla_do_heartbeat

Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Quinn Tran <qutran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
---
 drivers/scsi/qla2xxx/qla_def.h  |  4 ++
 drivers/scsi/qla2xxx/qla_gbl.h  |  1 +
 drivers/scsi/qla2xxx/qla_init.c |  6 ++-
 drivers/scsi/qla2xxx/qla_iocb.c |  4 ++
 drivers/scsi/qla2xxx/qla_isr.c  |  4 ++
 drivers/scsi/qla2xxx/qla_mbx.c  | 27 +++++++++++++
 drivers/scsi/qla2xxx/qla_nvme.c |  4 ++
 drivers/scsi/qla2xxx/qla_os.c   | 68 +++++++++++++++++++++++++++++++++
 8 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index def4d99f80e9..2f67ec1df3e6 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3660,6 +3660,8 @@ struct qla_qpair {
 	struct qla_tgt_counters tgt_counters;
 	uint16_t cpuid;
 	struct qla_fw_resources fwres ____cacheline_aligned;
+	u32	cmd_cnt;
+	u32	cmd_completion_cnt;
 };
 
 /* Place holder for FW buffer parameters */
@@ -4616,6 +4618,7 @@ struct qla_hw_data {
 
 	struct qla_hw_data_stat stat;
 	pci_error_state_t pci_error_state;
+	u64 prev_cmd_cnt;
 };
 
 struct active_regions {
@@ -4743,6 +4746,7 @@ typedef struct scsi_qla_host {
 #define SET_ZIO_THRESHOLD_NEEDED 32
 #define ISP_ABORT_TO_ROM	33
 #define VPORT_DELETE		34
+#define HEARTBEAT_CHK		38
 
 #define PROCESS_PUREX_IOCB	63
 
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index fae5cae6f0a8..70b7cda0a25a 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -551,6 +551,7 @@ extern int qla2xxx_read_remote_register(scsi_qla_host_t *, uint32_t,
     uint32_t *);
 extern int qla2xxx_write_remote_register(scsi_qla_host_t *, uint32_t,
     uint32_t);
+void qla_no_op_mb(struct scsi_qla_host *vha);
 
 /*
  * Global Function Prototypes in qla_isr.c source file.
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index eb825318e3f5..f8f471157109 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -6870,10 +6870,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 	ha->flags.fw_init_done = 0;
 	ha->chip_reset++;
 	ha->base_qpair->chip_reset = ha->chip_reset;
+	ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0;
 	for (i = 0; i < ha->max_qpairs; i++) {
-		if (ha->queue_pair_map[i])
+		if (ha->queue_pair_map[i]) {
 			ha->queue_pair_map[i]->chip_reset =
 				ha->base_qpair->chip_reset;
+			ha->queue_pair_map[i]->cmd_cnt =
+			    ha->queue_pair_map[i]->cmd_completion_cnt = 0;
+		}
 	}
 
 	/* purge MBox commands */
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 38b5bdde2405..d0ee843f6b04 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1710,6 +1710,7 @@ qla24xx_start_scsi(srb_t *sp)
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	sp->flags |= SRB_DMA_VALID;
 
 	/* Set chip new ring index. */
@@ -1912,6 +1913,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
@@ -2068,6 +2070,7 @@ qla2xxx_start_scsi_mq(srb_t *sp)
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	sp->flags |= SRB_DMA_VALID;
 
 	/* Set chip new ring index. */
@@ -2284,6 +2287,7 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 6e8f737a4af3..8a8e355f4a89 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2322,6 +2322,8 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	if (unlikely(iocb->u.nvme.aen_op))
 		atomic_dec(&sp->vha->hw->nvme_active_aen_cnt);
+	else
+		sp->qpair->cmd_completion_cnt++;
 
 	if (unlikely(comp_status != CS_COMPLETE))
 		logit = 1;
@@ -2976,6 +2978,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		return;
 	}
 
+	sp->qpair->cmd_completion_cnt++;
+
 	/* Fast path completion. */
 	if (comp_status == CS_COMPLETE && scsi_status == 0) {
 		qla2x00_process_completed_request(vha, req, handle);
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 0bcd8afdc0ff..9f3ad8aa649c 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -6939,3 +6939,30 @@ ql26xx_led_config(scsi_qla_host_t *vha, uint16_t options, uint16_t *led)
 
 	return rval;
 }
+
+/**
+ * qla_no_op_mb(): This MB is used to check if FW is still alive and
+ * able to generate an interrupt. Otherwise, a timeout will trigger
+ * FW dump + reset
+ * @vha: host adapter pointer
+ * Return: None
+ */
+void qla_no_op_mb(struct scsi_qla_host *vha)
+{
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+	int rval;
+
+	memset(&mc, 0, sizeof(mc));
+	mcp->mb[0] = 0; // noop cmd= 0
+	mcp->out_mb = MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = 5;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval) {
+		ql_dbg(ql_dbg_async, vha, 0x7071,
+			"Failed %s %x\n", __func__, rval);
+	}
+}
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index e119f8b24e33..3e5c70a1d969 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -536,6 +536,10 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
 		req->ring_ptr++;
 	}
 
+	/* ignore nvme async cmd due to long timeout */
+	if (!nvme->u.nvme.aen_op)
+		sp->qpair->cmd_cnt++;
+
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 4eab564ea6a0..cedd558f65eb 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -6969,6 +6969,17 @@ qla2x00_do_dpc(void *data)
 			qla2x00_lip_reset(base_vha);
 		}
 
+		if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) {
+			/*
+			 * if there is a mb in progress then that's
+			 * enough of a check to see if fw is still ticking.
+			 */
+			if (!ha->flags.mbox_busy && base_vha->flags.init_done)
+				qla_no_op_mb(base_vha);
+
+			clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags);
+		}
+
 		ha->dpc_active = 0;
 end_loop:
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -7025,6 +7036,61 @@ qla2x00_rst_aen(scsi_qla_host_t *vha)
 	}
 }
 
+static bool qla_do_heartbeat(struct scsi_qla_host *vha)
+{
+	u64 cmd_cnt, prev_cmd_cnt;
+	bool do_hb = false;
+	struct qla_hw_data *ha = vha->hw;
+	int i;
+
+	/* if cmds are still pending down in fw, then do hb */
+	if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) {
+		do_hb = true;
+		goto skip;
+	}
+
+	for (i = 0; i < ha->max_qpairs; i++) {
+		if (ha->queue_pair_map[i] &&
+		    ha->queue_pair_map[i]->cmd_cnt !=
+		    ha->queue_pair_map[i]->cmd_completion_cnt) {
+			do_hb = true;
+			break;
+		}
+	}
+
+skip:
+	prev_cmd_cnt = ha->prev_cmd_cnt;
+	cmd_cnt = ha->base_qpair->cmd_cnt;
+	for (i = 0; i < ha->max_qpairs; i++) {
+		if (ha->queue_pair_map[i])
+			cmd_cnt += ha->queue_pair_map[i]->cmd_cnt;
+	}
+	ha->prev_cmd_cnt = cmd_cnt;
+
+	if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50))
+		/*
+		 * IOs are completing before periodic hb check.
+		 * IOs seems to be running, do hb for sanity check.
+		 */
+		do_hb = true;
+
+	return do_hb;
+}
+
+static void qla_heart_beat(struct scsi_qla_host *vha)
+{
+	if (vha->vp_idx)
+		return;
+
+	if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha))
+		return;
+
+	if (qla_do_heartbeat(vha)) {
+		set_bit(HEARTBEAT_CHK, &vha->dpc_flags);
+		qla2xxx_wake_dpc(vha);
+	}
+}
+
 /**************************************************************************
 *   qla2x00_timer
 *
@@ -7243,6 +7309,8 @@ qla2x00_timer(struct timer_list *t)
 		qla2xxx_wake_dpc(vha);
 	}
 
+	qla_heart_beat(vha);
+
 	qla2x00_restart_timer(vha, WATCH_INTERVAL);
 }
 
-- 
2.19.0.rc0


             reply	other threads:[~2021-06-19  5:25 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-19  5:24 Nilesh Javali [this message]
2021-06-23  1:54 ` [PATCH v2] qla2xxx: add heartbeat check Martin K. Petersen
2021-06-29  4:10 ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210619052427.6440-1-njavali@marvell.com \
    --to=njavali@marvell.com \
    --cc=GR-QLogic-Storage-Upstream@marvell.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.