All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Brace <don.brace@microchip.com>
To: <hch@infradead.org>, <martin.petersen@oracle.com>,
	<jejb@linux.vnet.ibm.com>, <linux-scsi@vger.kernel.org>
Cc: <Kevin.Barnett@microchip.com>, <scott.teel@microchip.com>,
	<Justin.Lindley@microchip.com>, <scott.benesh@microchip.com>,
	<gerry.morong@microchip.com>, <mahesh.rajashekhara@microchip.com>,
	<mike.mcgowen@microchip.com>, <murthy.bhat@microchip.com>,
	<balsundar.p@microchip.com>, <joseph.szczypek@hpe.com>,
	<jeff@canonical.com>, <POSWALD@suse.com>,
	<john.p.donnelly@oracle.com>, <mwilck@suse.com>,
	<pmenzel@molgen.mpg.de>, <linux-kernel@vger.kernel.org>
Subject: [smartpqi updates PATCH V2 02/11] smartpqi: add controller handshake during kdump
Date: Tue, 28 Sep 2021 18:54:33 -0500	[thread overview]
Message-ID: <20210928235442.201875-3-don.brace@microchip.com> (raw)
In-Reply-To: <20210928235442.201875-1-don.brace@microchip.com>

From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>

Correct kdump hangs when controller is locked up.

There are occasions when a controller reboot
(controller soft reset) is issued when a controller
firmware crash dump is in progress.

This leads to incomplete controller firmware crash dump.
 - When the controller crash dump is in progress,
   and a kdump is initiated, the driver issues
   inbound doorbell reset to bring back the
   controller in SIS mode.
 - If the controller is in locked up state,
   the inbound doorbell reset does not work causing
   controller initialization failures. This results
   in the driver hanging waiting for SIS mode.

To avoid an incomplete controller crash dump, add in
a controller crash dump handshake.
 - Controller will indicate start and end of the controller
   crash dump by setting some register bits.
 - Driver will look these bits when a kdump is initiated.
   If a controller crash dump is in progress, the driver will
   wait for the controller crash dump to complete
   before issuing the controller soft reset then complete
   driver initialization.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Mike McGowen <mike.mcgowen@microchip.com>
Signed-off-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 41 +++++++++++++++++++--
 drivers/scsi/smartpqi/smartpqi_sis.c  | 51 +++++++++++++++++++++++++++
 drivers/scsi/smartpqi/smartpqi_sis.h  |  1 +
 3 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 97027574eb1f..5655d240f7a7 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -234,15 +234,46 @@ static inline bool pqi_is_hba_lunid(u8 *scsi3addr)
 	return pqi_scsi3addr_equal(scsi3addr, RAID_CTLR_LUNID);
 }
 
+#define PQI_DRIVER_SCRATCH_PQI_MODE			0x1
+#define PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED		0x2
+
 static inline enum pqi_ctrl_mode pqi_get_ctrl_mode(struct pqi_ctrl_info *ctrl_info)
 {
-	return sis_read_driver_scratch(ctrl_info);
+	return sis_read_driver_scratch(ctrl_info) & PQI_DRIVER_SCRATCH_PQI_MODE ? PQI_MODE : SIS_MODE;
 }
 
 static inline void pqi_save_ctrl_mode(struct pqi_ctrl_info *ctrl_info,
 	enum pqi_ctrl_mode mode)
 {
-	sis_write_driver_scratch(ctrl_info, mode);
+	u32 driver_scratch;
+
+	driver_scratch = sis_read_driver_scratch(ctrl_info);
+
+	if (mode == PQI_MODE)
+		driver_scratch |= PQI_DRIVER_SCRATCH_PQI_MODE;
+	else
+		driver_scratch &= ~PQI_DRIVER_SCRATCH_PQI_MODE;
+
+	sis_write_driver_scratch(ctrl_info, driver_scratch);
+}
+
+static inline bool pqi_is_fw_triage_supported(struct pqi_ctrl_info *ctrl_info)
+{
+	return (sis_read_driver_scratch(ctrl_info) & PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED) != 0;
+}
+
+static inline void pqi_save_fw_triage_setting(struct pqi_ctrl_info *ctrl_info, bool is_supported)
+{
+	u32 driver_scratch;
+
+	driver_scratch = sis_read_driver_scratch(ctrl_info);
+
+	if (is_supported)
+		driver_scratch |= PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED;
+	else
+		driver_scratch &= ~PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED;
+
+	sis_write_driver_scratch(ctrl_info, driver_scratch);
 }
 
 static inline void pqi_ctrl_block_scan(struct pqi_ctrl_info *ctrl_info)
@@ -7292,6 +7323,7 @@ static void pqi_ctrl_update_feature_flags(struct pqi_ctrl_info *ctrl_info,
 		ctrl_info->unique_wwid_in_report_phys_lun_supported =
 			firmware_feature->enabled;
 		break;
+		pqi_save_fw_triage_setting(ctrl_info, firmware_feature->enabled);
 	}
 
 	pqi_firmware_feature_status(ctrl_info, firmware_feature);
@@ -7618,6 +7650,11 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
 	u32 product_id;
 
 	if (reset_devices) {
+		if (pqi_is_fw_triage_supported(ctrl_info)) {
+			rc = sis_wait_for_fw_triage_completion(ctrl_info);
+			if (rc)
+				return rc;
+		}
 		sis_soft_reset(ctrl_info);
 		msleep(PQI_POST_RESET_DELAY_SECS * PQI_HZ);
 	} else {
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c
index d63c46a8e38b..8acd3a80f582 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.c
+++ b/drivers/scsi/smartpqi/smartpqi_sis.c
@@ -51,12 +51,20 @@
 #define SIS_BASE_STRUCT_REVISION		9
 #define SIS_BASE_STRUCT_ALIGNMENT		16
 
+#define SIS_CTRL_KERNEL_FW_TRIAGE		0x3
 #define SIS_CTRL_KERNEL_UP			0x80
 #define SIS_CTRL_KERNEL_PANIC			0x100
 #define SIS_CTRL_READY_TIMEOUT_SECS		180
 #define SIS_CTRL_READY_RESUME_TIMEOUT_SECS	90
 #define SIS_CTRL_READY_POLL_INTERVAL_MSECS	10
 
+enum sis_fw_triage_status {
+	FW_TRIAGE_NOT_STARTED = 0,
+	FW_TRIAGE_STARTED,
+	FW_TRIAGE_COND_INVALID,
+	FW_TRIAGE_COMPLETED
+};
+
 #pragma pack(1)
 
 /* for use with SIS_CMD_INIT_BASE_STRUCT_ADDRESS command */
@@ -419,12 +427,55 @@ u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info)
 	return readl(&ctrl_info->registers->sis_driver_scratch);
 }
 
+static inline enum sis_fw_triage_status
+	sis_read_firmware_triage_status(struct pqi_ctrl_info *ctrl_info)
+{
+	return ((enum sis_fw_triage_status)(readl(&ctrl_info->registers->sis_firmware_status) &
+		SIS_CTRL_KERNEL_FW_TRIAGE));
+}
+
 void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
 {
 	writel(SIS_SOFT_RESET,
 		&ctrl_info->registers->sis_host_to_ctrl_doorbell);
 }
 
+#define SIS_FW_TRIAGE_STATUS_TIMEOUT_SECS		300
+#define SIS_FW_TRIAGE_STATUS_POLL_INTERVAL_SECS		1
+
+int sis_wait_for_fw_triage_completion(struct pqi_ctrl_info *ctrl_info)
+{
+	int rc;
+	enum sis_fw_triage_status status;
+	unsigned long timeout;
+
+	timeout = (SIS_FW_TRIAGE_STATUS_TIMEOUT_SECS * PQI_HZ) + jiffies;
+	while (1) {
+		status = sis_read_firmware_triage_status(ctrl_info);
+		if (status == FW_TRIAGE_COND_INVALID) {
+			dev_err(&ctrl_info->pci_dev->dev,
+				"firmware triage condition invalid\n");
+			rc = -EINVAL;
+			break;
+		} else if (status == FW_TRIAGE_NOT_STARTED ||
+			status == FW_TRIAGE_COMPLETED) {
+			rc = 0;
+			break;
+		}
+
+		if (time_after(jiffies, timeout)) {
+			dev_err(&ctrl_info->pci_dev->dev,
+				"timed out waiting for firmware triage status\n");
+			rc = -ETIMEDOUT;
+			break;
+		}
+
+		ssleep(SIS_FW_TRIAGE_STATUS_POLL_INTERVAL_SECS);
+	}
+
+	return rc;
+}
+
 static void __attribute__((unused)) verify_structures(void)
 {
 	BUILD_BUG_ON(offsetof(struct sis_base_struct,
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.h b/drivers/scsi/smartpqi/smartpqi_sis.h
index d29c1352a826..c1db93054c86 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.h
+++ b/drivers/scsi/smartpqi/smartpqi_sis.h
@@ -28,5 +28,6 @@ void sis_write_driver_scratch(struct pqi_ctrl_info *ctrl_info, u32 value);
 u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info);
 void sis_soft_reset(struct pqi_ctrl_info *ctrl_info);
 u32 sis_get_product_id(struct pqi_ctrl_info *ctrl_info);
+int sis_wait_for_fw_triage_completion(struct pqi_ctrl_info *ctrl_info);
 
 #endif	/* _SMARTPQI_SIS_H */
-- 
2.28.0.rc1.9.ge7ae437ac1


  parent reply	other threads:[~2021-09-28 23:55 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-28 23:54 [smartpqi updates PATCH V2 00/11] smartpqi updates Don Brace
2021-09-28 23:54 ` [smartpqi updates PATCH V2 01/11] smartpqi: update device removal management Don Brace
2021-09-30 18:21   ` john.p.donnelly
2021-09-28 23:54 ` Don Brace [this message]
2021-09-30 18:21   ` [smartpqi updates PATCH V2 02/11] smartpqi: add controller handshake during kdump john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 03/11] smartpqi: capture controller reason codes Don Brace
2021-09-30 18:22   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 04/11] smartpqi: update LUN reset handler Don Brace
2021-09-30 18:22   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 05/11] smartpqi: add tur check for sanitize operation Don Brace
2021-09-29  7:56   ` Paul Menzel
2021-09-30 18:23   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 06/11] smartpqi: avoid failing ios for offline devices Don Brace
2021-09-30 18:23   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 07/11] smartpqi: add extended report physical luns Don Brace
2021-09-30 18:23   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 08/11] smartpqi: fix boot failure during lun rebuild Don Brace
2021-09-30 18:24   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 09/11] smartpqi: fix duplicate device nodes for tape changers Don Brace
2021-09-30 18:24   ` john.p.donnelly
2021-10-01  8:26   ` Paul Menzel
2021-10-05 20:23     ` Don.Brace
2021-10-06  2:37       ` Martin K. Petersen
2021-10-06 14:28         ` Don.Brace
2021-10-07  9:38       ` Paul Menzel
2021-09-28 23:54 ` [smartpqi updates PATCH V2 10/11] smartpqi: add 3252-8i pci id Don Brace
2021-09-30 18:24   ` john.p.donnelly
2021-09-28 23:54 ` [smartpqi updates PATCH V2 11/11] smartpqi: update version to 2.1.12-055 Don Brace
2021-09-30 18:25   ` john.p.donnelly
2021-09-29  9:34 ` [smartpqi updates PATCH V2 00/11] smartpqi updates Paul Menzel
2021-09-29 14:08   ` Don.Brace
2021-09-29 14:12     ` Paul Menzel
2021-10-12 20:35 ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210928235442.201875-3-don.brace@microchip.com \
    --to=don.brace@microchip.com \
    --cc=Justin.Lindley@microchip.com \
    --cc=Kevin.Barnett@microchip.com \
    --cc=POSWALD@suse.com \
    --cc=balsundar.p@microchip.com \
    --cc=gerry.morong@microchip.com \
    --cc=hch@infradead.org \
    --cc=jeff@canonical.com \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=john.p.donnelly@oracle.com \
    --cc=joseph.szczypek@hpe.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=mahesh.rajashekhara@microchip.com \
    --cc=martin.petersen@oracle.com \
    --cc=mike.mcgowen@microchip.com \
    --cc=murthy.bhat@microchip.com \
    --cc=mwilck@suse.com \
    --cc=pmenzel@molgen.mpg.de \
    --cc=scott.benesh@microchip.com \
    --cc=scott.teel@microchip.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.