From: Manish Rangankar <mrangankar@marvell.com>
To: <martin.petersen@oracle.com>, <lduncan@suse.com>, <cleech@redhat.com>
Cc: <linux-scsi@vger.kernel.org>, <GR-QLogic-Storage-Upstream@marvell.com>
Subject: [PATCH v2 7/8] qedi: Add firmware error recovery invocation support.
Date: Tue, 8 Sep 2020 02:56:56 -0700 [thread overview]
Message-ID: <20200908095657.26821-8-mrangankar@marvell.com> (raw)
In-Reply-To: <20200908095657.26821-1-mrangankar@marvell.com>
Add support to initiate MFW process recovery for all
the devices if storage function receive the event first.
Also added fix for kernel test robot warning,
>> drivers/scsi/qedi/qedi_main.c:1119:6: warning: no previous prototype
>> for 'qedi_schedule_hw_err_handler' [-Wmissing-prototypes]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Manish Rangankar <mrangankar@marvell.com>
---
drivers/scsi/qedi/qedi.h | 4 +++
drivers/scsi/qedi/qedi_fw.c | 7 ++--
drivers/scsi/qedi/qedi_iscsi.c | 3 +-
drivers/scsi/qedi/qedi_main.c | 63 +++++++++++++++++++++++++++++++++-
4 files changed, 73 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 9c19ec9dc682..7e59d50f2fab 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -274,6 +274,10 @@ struct qedi_ctx {
spinlock_t ll2_lock; /* Light L2 lock */
spinlock_t hba_lock; /* per port lock */
struct task_struct *ll2_recv_thread;
+ unsigned long qedi_err_flags;
+#define QEDI_ERR_ATTN_CLR_EN 0
+#define QEDI_ERR_IS_RECOVERABLE 2
+#define QEDI_ERR_OVERRIDE_EN 31
unsigned long flags;
#define UIO_DEV_OPENED 1
#define QEDI_IOTHREAD_WAKE 2
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index f158fde0a43c..440ddd2309f1 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1267,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
rval = wait_event_interruptible_timeout(qedi_conn->wait_queue,
((qedi_conn->cmd_cleanup_req ==
qedi_conn->cmd_cleanup_cmpl) ||
- qedi_conn->ep),
+ test_bit(QEDI_IN_RECOVERY,
+ &qedi->flags)),
5 * HZ);
if (rval) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
@@ -1292,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
/* Enable IOs for all other sessions except current.*/
if (!wait_event_interruptible_timeout(qedi_conn->wait_queue,
(qedi_conn->cmd_cleanup_req ==
- qedi_conn->cmd_cleanup_cmpl),
+ qedi_conn->cmd_cleanup_cmpl) ||
+ test_bit(QEDI_IN_RECOVERY,
+ &qedi->flags),
5 * HZ)) {
iscsi_host_for_each_session(qedi->shost,
qedi_mark_device_available);
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index ae86a40ca040..08c05403cd72 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -1072,7 +1072,8 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
qedi_ep->state = EP_STATE_DISCONN_START;
- if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags))
+ if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags) ||
+ test_bit(QEDI_IN_RECOVERY, &qedi->flags))
goto ep_release_conn;
ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 4f43e2a24b50..1d8c78c41405 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644);
MODULE_PARM_DESC(qedi_ll2_buf_size,
"parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400.");
+static uint qedi_flags_override;
+module_param(qedi_flags_override, uint, 0644);
+MODULE_PARM_DESC(qedi_flags_override, "Disable/Enable MFW error flags bits action.");
+
const struct qed_iscsi_ops *qedi_ops;
static struct scsi_transport_template *qedi_scsi_transport;
static struct pci_driver qedi_pci_driver;
@@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev);
static void qedi_ll2_free_skbs(struct qedi_ctx *qedi);
static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
static void qedi_recovery_handler(struct work_struct *work);
+static void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
{
@@ -1113,6 +1119,39 @@ static void qedi_get_protocol_tlv_data(void *dev, void *data)
return;
}
+void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type)
+{
+ struct qedi_ctx *qedi = (struct qedi_ctx *)dev;
+ unsigned long override_flags = qedi_flags_override;
+
+ if (override_flags && test_bit(QEDI_ERR_OVERRIDE_EN, &override_flags))
+ qedi->qedi_err_flags = qedi_flags_override;
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "HW error handler scheduled, err=%d err_flags=0x%x\n",
+ err_type, qedi->qedi_err_flags);
+
+ switch (err_type) {
+ case QED_HW_ERR_MFW_RESP_FAIL:
+ case QED_HW_ERR_HW_ATTN:
+ case QED_HW_ERR_DMAE_FAIL:
+ case QED_HW_ERR_RAMROD_FAIL:
+ case QED_HW_ERR_FW_ASSERT:
+ /* Prevent HW attentions from being reasserted */
+ if (test_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags))
+ qedi_ops->common->attn_clr_enable(qedi->cdev, true);
+
+ if (err_type == QED_HW_ERR_RAMROD_FAIL &&
+ test_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags))
+ qedi_ops->common->recovery_process(qedi->cdev);
+
+ break;
+ default:
+ break;
+ }
+}
+
static void qedi_schedule_recovery_handler(void *dev)
{
struct qedi_ctx *qedi = dev;
@@ -1155,6 +1194,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = {
{
.link_update = qedi_link_update,
.schedule_recovery_handler = qedi_schedule_recovery_handler,
+ .schedule_hw_err_handler = qedi_schedule_hw_err_handler,
.get_protocol_tlv_data = qedi_get_protocol_tlv_data,
.get_generic_tlv_data = qedi_get_generic_tlv_data,
}
@@ -2355,6 +2395,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
{
struct qedi_ctx *qedi = pci_get_drvdata(pdev);
int rval;
+ u16 retry = 10;
if (mode == QEDI_MODE_SHUTDOWN)
iscsi_host_for_each_session(qedi->shost,
@@ -2383,7 +2424,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
qedi_sync_free_irqs(qedi);
if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) {
- qedi_ops->stop(qedi->cdev);
+ while (retry--) {
+ rval = qedi_ops->stop(qedi->cdev);
+ if (rval < 0)
+ msleep(1000);
+ else
+ break;
+ }
qedi_ops->ll2->stop(qedi->cdev);
}
@@ -2442,6 +2489,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
struct qed_probe_params qed_params;
void *task_start, *task_end;
int rc;
+ u16 retry = 10;
if (mode != QEDI_MODE_RECOVERY) {
qedi = qedi_host_alloc(pdev);
@@ -2453,6 +2501,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qedi = pci_get_drvdata(pdev);
}
+retry_probe:
+ if (mode == QEDI_MODE_RECOVERY)
+ msleep(2000);
+
memset(&qed_params, 0, sizeof(qed_params));
qed_params.protocol = QED_PROTOCOL_ISCSI;
qed_params.dp_module = qedi_qed_debug;
@@ -2460,11 +2512,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qed_params.is_vf = is_vf;
qedi->cdev = qedi_ops->common->probe(pdev, &qed_params);
if (!qedi->cdev) {
+ if (mode == QEDI_MODE_RECOVERY && retry) {
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Retry %d initialize hardware\n", retry);
+ retry--;
+ goto retry_probe;
+ }
+
rc = -ENODEV;
QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n");
goto free_host;
}
+ set_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags);
+ set_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags);
atomic_set(&qedi->link_state, QEDI_LINK_DOWN);
rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info);
--
2.25.0
next prev parent reply other threads:[~2020-09-08 10:00 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-08 9:56 [PATCH v2 0/8] qedi: Misc bug fixes and enhancements Manish Rangankar
2020-09-08 9:56 ` [PATCH v2 1/8] qedi: Use qed count from set_fp_int in msix allocation Manish Rangankar
2020-09-08 9:56 ` [PATCH v2 2/8] qedi: Skip f/w connection termination for pci shutdown handler Manish Rangankar
2020-09-08 9:56 ` [PATCH v2 3/8] qedi: Fix list_del corruption while removing active IO Manish Rangankar
2020-09-08 9:56 ` [PATCH v2 4/8] qedi: Protect active command list to avoid list corruption Manish Rangankar
2020-09-08 9:56 ` [PATCH v2 5/8] qedi: Use snprintf instead of sprintf Manish Rangankar
2020-09-08 9:56 ` [PATCH v2 6/8] qedi: Mark all connections for recovery on link down event Manish Rangankar
2020-09-08 9:56 ` Manish Rangankar [this message]
2020-09-08 9:56 ` [PATCH v2 8/8] qedi: Add support for handling the pcie errors Manish Rangankar
2020-09-09 2:42 ` [PATCH v2 0/8] qedi: Misc bug fixes and enhancements Martin K. Petersen
2020-09-15 20:16 ` Martin K. Petersen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200908095657.26821-8-mrangankar@marvell.com \
--to=mrangankar@marvell.com \
--cc=GR-QLogic-Storage-Upstream@marvell.com \
--cc=cleech@redhat.com \
--cc=lduncan@suse.com \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).